Files
predictV1/test_models_accuracy.py

235 lines
7.7 KiB
Python
Raw Normal View History

import sys
sys.path.insert(0, '.')
import psycopg2
import pandas as pd
import numpy as np
from routes.predict import build_long_format_input, modelPro
from routes.predict_bag_of_heroes import build_bag_of_heroes_features, modelBagOfHeroes
from routes.predict_with_players import build_player_features, modelWithPlayers
from routes.predict_stacking import predict_stacking
# Подключение к БД
conn = psycopg2.connect(
host="localhost",
port=5432,
database="korobka_db",
user="postgres",
password="postgres"
)
# Получаем случайные матчи с известными игроками
query = """
SELECT
m.id as match_id,
m.radiant_win,
m.leagueid
FROM matches m
WHERE EXISTS (
SELECT 1
FROM details_match dm
WHERE dm.match_id = m.id
AND dm.players_id IS NOT NULL
AND dm.players_id != 0
)
ORDER BY RANDOM()
LIMIT 100
"""
matches_df = pd.read_sql(query, conn)
print(f"Загружено {len(matches_df)} случайных матчей")
# Получаем детали этих матчей
match_ids = matches_df['match_id'].tolist()
placeholders = ','.join(['%s'] * len(match_ids))
query_details = f"""
SELECT
dm.match_id,
dm.hero_id,
dm.team,
dm.players_id,
dm.pos,
dm."order"
FROM details_match dm
WHERE dm.match_id IN ({placeholders})
ORDER BY dm.match_id, dm."order"
"""
cursor = conn.cursor()
cursor.execute(query_details, match_ids)
details_rows = cursor.fetchall()
conn.close()
# Преобразуем детали в словарь по match_id
details_by_match = {}
for row in details_rows:
match_id = row[0]
if match_id not in details_by_match:
details_by_match[match_id] = []
details_by_match[match_id].append({
'hero_id': row[1],
'team': row[2],
'players_id': row[3],
'pos': row[4],
'order': row[5]
})
print(f"Загружено деталей для {len(details_by_match)} матчей\n")
# Счётчики правильных предсказаний
correct = {
'model1': 0,
'model2': 0,
'model3': 0,
'stacking': 0
}
# Списки для хранения всех предсказаний (для расчёта AUC)
predictions = {
'model1': [],
'model2': [],
'model3': [],
'stacking': []
}
actuals = []
print("Тестирование моделей...")
print("="*80)
# Проверяем каждый матч
for idx, match_row in matches_df.iterrows():
match_id = match_row['match_id']
radiant_win = match_row['radiant_win']
if match_id not in details_by_match:
continue
details = details_by_match[match_id]
# Формируем payload
payload = {}
# Разделяем на Radiant и Dire
radiant_picks = sorted([d for d in details if d['team'] == 0], key=lambda x: x['order'])
dire_picks = sorted([d for d in details if d['team'] == 1], key=lambda x: x['order'])
# Заполняем Radiant
for i in range(5):
if i < len(radiant_picks):
payload[f'r_h{i+1}'] = radiant_picks[i]['hero_id']
payload[f'r_p{i+1}'] = radiant_picks[i]['players_id'] if radiant_picks[i]['players_id'] else -1
payload[f'rp_h{i+1}'] = radiant_picks[i]['pos'] if radiant_picks[i]['pos'] else -1
else:
payload[f'r_h{i+1}'] = -1
payload[f'r_p{i+1}'] = -1
payload[f'rp_h{i+1}'] = -1
# Заполняем Dire
for i in range(5):
if i < len(dire_picks):
payload[f'd_h{i+1}'] = dire_picks[i]['hero_id']
payload[f'd_p{i+1}'] = dire_picks[i]['players_id'] if dire_picks[i]['players_id'] else -1
payload[f'dp_h{i+1}'] = dire_picks[i]['pos'] if dire_picks[i]['pos'] else -1
else:
payload[f'd_h{i+1}'] = -1
payload[f'd_p{i+1}'] = -1
payload[f'dp_h{i+1}'] = -1
# Предсказания
try:
# Модель 1: Heroes + Positions
X1 = build_long_format_input(payload)
pred1_proba = float(modelPro.predict_proba(X1)[0, 1])
pred1 = pred1_proba >= 0.5
# Модель 2: Bag of Heroes
X2 = build_bag_of_heroes_features(payload)
pred2_proba = float(modelBagOfHeroes.predict_proba(X2)[0, 1])
pred2 = pred2_proba >= 0.5
# Модель 3: With Players
X3 = build_player_features(payload)
pred3_proba = float(modelWithPlayers.predict_proba(X3)[0, 1])
pred3 = pred3_proba >= 0.5
# Стекинг
stack_result = predict_stacking(payload)
pred_stack_proba = stack_result['radiant_win'] / 100.0
pred_stack = pred_stack_proba >= 0.5
# Сохраняем предсказания
predictions['model1'].append(pred1_proba)
predictions['model2'].append(pred2_proba)
predictions['model3'].append(pred3_proba)
predictions['stacking'].append(pred_stack_proba)
actuals.append(int(radiant_win))
# Подсчитываем правильные предсказания
if pred1 == radiant_win:
correct['model1'] += 1
if pred2 == radiant_win:
correct['model2'] += 1
if pred3 == radiant_win:
correct['model3'] += 1
if pred_stack == radiant_win:
correct['stacking'] += 1
# Показываем первые 10 матчей
if idx < 10:
actual_str = "Radiant" if radiant_win else "Dire"
print(f"\nМатч #{idx+1} (ID: {match_id}):")
print(f" Реальный результат: {actual_str} win")
print(f" Модель 1: {pred1_proba*100:.1f}% Radiant ({'' if pred1 == radiant_win else ''})")
print(f" Модель 2: {pred2_proba*100:.1f}% Radiant ({'' if pred2 == radiant_win else ''})")
print(f" Модель 3: {pred3_proba*100:.1f}% Radiant ({'' if pred3 == radiant_win else ''})")
print(f" Стекинг: {pred_stack_proba*100:.1f}% Radiant ({'' if pred_stack == radiant_win else ''})")
except Exception as e:
print(f"Ошибка при обработке матча {match_id}: {e}")
continue
# Итоговая статистика
total = len(actuals)
print("\n" + "="*80)
print("ИТОГОВАЯ СТАТИСТИКА")
print("="*80)
print(f"Всего проверено матчей: {total}\n")
print("Точность (Accuracy):")
print(f" Модель 1 (Heroes + Positions): {correct['model1']}/{total} = {correct['model1']/total*100:.2f}%")
print(f" Модель 2 (Bag of Heroes): {correct['model2']}/{total} = {correct['model2']/total*100:.2f}%")
print(f" Модель 3 (With Players): {correct['model3']}/{total} = {correct['model3']/total*100:.2f}%")
print(f" Мета-модель (Stacking): {correct['stacking']}/{total} = {correct['stacking']/total*100:.2f}%")
# Расчёт AUC
from sklearn.metrics import roc_auc_score
print("\nAUC (Area Under ROC Curve):")
try:
auc1 = roc_auc_score(actuals, predictions['model1'])
print(f" Модель 1 (Heroes + Positions): {auc1:.4f}")
except:
print(f" Модель 1 (Heroes + Positions): N/A")
try:
auc2 = roc_auc_score(actuals, predictions['model2'])
print(f" Модель 2 (Bag of Heroes): {auc2:.4f}")
except:
print(f" Модель 2 (Bag of Heroes): N/A")
try:
auc3 = roc_auc_score(actuals, predictions['model3'])
print(f" Модель 3 (With Players): {auc3:.4f}")
except:
print(f" Модель 3 (With Players): N/A")
try:
auc_stack = roc_auc_score(actuals, predictions['stacking'])
print(f" Мета-модель (Stacking): {auc_stack:.4f}")
except:
print(f" Мета-модель (Stacking): N/A")
print("\n" + "="*80)