Initial commit: добавление проекта predictV1
Включает модели ML для предсказаний, API маршруты, скрипты обучения и данные. Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
This commit is contained in:
94
educationML/build_dataset_pro.py
Normal file
94
educationML/build_dataset_pro.py
Normal file
@@ -0,0 +1,94 @@
|
||||
import pandas as pd
|
||||
import psycopg2
|
||||
from psycopg2.extras import RealDictCursor
|
||||
|
||||
def get_db_connection():
|
||||
return psycopg2.connect(
|
||||
host="localhost",
|
||||
port=5432,
|
||||
database="korobka_db",
|
||||
user="postgres",
|
||||
password="postgres"
|
||||
)
|
||||
|
||||
print("Загрузка данных из БД...")
|
||||
|
||||
conn = get_db_connection()
|
||||
|
||||
# Загружаем матчи
|
||||
matches_df = pd.read_sql_query("""
|
||||
SELECT id as match_id, radiant_team_id, dire_team_id, radiant_win
|
||||
FROM matches
|
||||
WHERE "source" = 'pro'
|
||||
ORDER BY id
|
||||
""", conn)
|
||||
|
||||
print(f"Загружено {len(matches_df)} матчей")
|
||||
|
||||
# Загружаем детали героев
|
||||
details_df = pd.read_sql_query("""
|
||||
SELECT match_id, hero_id, team, players_id, pos
|
||||
FROM details_match
|
||||
WHERE "source" = 'pro'
|
||||
ORDER BY match_id
|
||||
""", conn)
|
||||
|
||||
print(f"Загружено {len(details_df)} записей героев")
|
||||
|
||||
conn.close()
|
||||
|
||||
# Формируем слоты героев для каждого матча
|
||||
def slots_from_picks(group):
|
||||
# --- Radiant (team = 0) ---
|
||||
r_heroes = group[group['team'] == 0]['hero_id'].tolist()[:5]
|
||||
r_players = group[group['team'] == 0]['players_id'].tolist()[:5]
|
||||
r_pos = group[group['team'] == 0]['pos'].tolist()[:5]
|
||||
|
||||
# --- Dire (team = 1) ---
|
||||
d_heroes = group[group['team'] == 1]['hero_id'].tolist()[:5]
|
||||
d_players = group[group['team'] == 1]['players_id'].tolist()[:5]
|
||||
d_pos = group[group['team'] == 1]['pos'].tolist()[:5]
|
||||
|
||||
row = {}
|
||||
# --- Добавляем 5 слотов для каждой стороны ---
|
||||
for i in range(5):
|
||||
# Герои Radiant / Dire
|
||||
row[f"r_h{i+1}"] = r_heroes[i] if i < len(r_heroes) else -1
|
||||
row[f"d_h{i+1}"] = d_heroes[i] if i < len(d_heroes) else -1
|
||||
|
||||
# Позиции героев
|
||||
row[f"rp_h{i+1}"] = r_pos[i] if i < len(r_pos) else -1
|
||||
row[f"dp_h{i+1}"] = d_pos[i] if i < len(d_pos) else -1
|
||||
|
||||
# Игроки Radiant / Dire
|
||||
row[f"r_p{i+1}"] = r_players[i] if i < len(r_players) else -1
|
||||
row[f"d_p{i+1}"] = d_players[i] if i < len(d_players) else -1
|
||||
|
||||
# Определяем, кто пикал первым (команда 0 = radiant)
|
||||
fp_team = group.iloc[0]['team'] if len(group) > 0 else 0
|
||||
row["is_first_pick_radiant"] = 1 if fp_team == 0 else 0
|
||||
|
||||
return pd.Series(row)
|
||||
|
||||
slots_df = details_df.groupby("match_id").apply(slots_from_picks).reset_index()
|
||||
|
||||
# Объединяем с информацией о матчах
|
||||
dataset = matches_df.merge(slots_df, on="match_id", how="inner")
|
||||
|
||||
# Добавляем целевую переменную
|
||||
dataset['y'] = dataset['radiant_win'].astype(int)
|
||||
|
||||
# Выбираем нужные колонки в правильном порядке
|
||||
final_df = dataset[['match_id', 'is_first_pick_radiant',
|
||||
'r_h1', 'r_h2', 'r_h3', 'r_h4', 'r_h5',
|
||||
'd_h1', 'd_h2', 'd_h3', 'd_h4', 'd_h5',
|
||||
'r_p1', 'r_p2', 'r_p3', 'r_p4', 'r_p5',
|
||||
'd_p1', 'd_p2', 'd_p3', 'd_p4', 'd_p5',
|
||||
'rp_h1', 'rp_h2', 'rp_h3', 'rp_h4', 'rp_h5',
|
||||
'dp_h1', 'dp_h2', 'dp_h3', 'dp_h4', 'dp_h5',
|
||||
'y']]
|
||||
|
||||
# Сохраняем
|
||||
final_df.to_parquet("data/dataset_from_db.parquet", index=False)
|
||||
print(f"Сохранено {len(final_df)} записей в data/dataset_from_db.parquet")
|
||||
print(f"Radiant wins: {final_df['y'].sum()}, Dire wins: {len(final_df) - final_df['y'].sum()}")
|
||||
Reference in New Issue
Block a user