Initial commit: добавление проекта predictV1

Включает модели ML для предсказаний, API маршруты, скрипты обучения и данные.

Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
This commit is contained in:
2026-02-21 17:22:58 +03:00
commit 8a134239d7
42 changed files with 12831 additions and 0 deletions

0
routes/__init__.py Normal file
View File

24
routes/heroes.py Normal file
View File

@@ -0,0 +1,24 @@
from fastapi import APIRouter
import psycopg2
from psycopg2.extras import RealDictCursor
router = APIRouter()
def get_db_connection():
return psycopg2.connect(
host="localhost",
port=5432,
database="korobka_db",
user="postgres",
password="postgres"
)
@router.get("/heroes")
def get_heroes():
conn = get_db_connection()
cursor = conn.cursor(cursor_factory=RealDictCursor)
cursor.execute("SELECT id, name FROM hero ORDER BY id")
heroes = cursor.fetchall()
cursor.close()
conn.close()
return heroes

62
routes/match.py Normal file
View File

@@ -0,0 +1,62 @@
from fastapi import APIRouter, HTTPException
from pydantic import BaseModel
from typing import List
import psycopg2
from psycopg2.extras import RealDictCursor
router = APIRouter()
def get_db_connection():
return psycopg2.connect(
host="localhost",
port=5432,
database="korobka_db",
user="postgres",
password="postgres"
)
class HeroDetail(BaseModel):
hero_id: int
team: int
order: int
class MatchData(BaseModel):
id: int
start_time: int
leagueid: int
radiant_team_id: int
dire_team_id: int
radiant_win: bool
heroes: List[HeroDetail]
@router.post("/match/pro/add")
def add_pro_match(match: MatchData):
conn = get_db_connection()
cursor = conn.cursor()
try:
# Добавляем матч в pro_matches
cursor.execute("""
INSERT INTO pro_matches (id, start_time, leagueid, radiant_team_id, dire_team_id, radiant_win)
VALUES (%s, %s, %s, %s, %s, %s)
ON CONFLICT (id) DO NOTHING
""", (match.id, match.start_time, match.leagueid, match.radiant_team_id, match.dire_team_id, match.radiant_win))
# Добавляем детали героев в pro_details_match
for hero in match.heroes:
cursor.execute("""
INSERT INTO pro_details_match (match_id, hero_id, team, "order")
VALUES (%s, %s, %s, %s)
""", (match.id, hero.hero_id, hero.team, hero.order))
conn.commit()
cursor.close()
conn.close()
return {"status": "success", "message": f"Match {match.id} added successfully"}
except Exception as e:
conn.rollback()
cursor.close()
conn.close()
raise HTTPException(status_code=500, detail=str(e))

24
routes/players.py Normal file
View File

@@ -0,0 +1,24 @@
from fastapi import APIRouter
import psycopg2
from psycopg2.extras import RealDictCursor
router = APIRouter()
def get_db_connection():
return psycopg2.connect(
host="localhost",
port=5432,
database="korobka_db",
user="postgres",
password="postgres"
)
@router.get("/pro-players")
def get_pro_players():
conn = get_db_connection()
cursor = conn.cursor(cursor_factory=RealDictCursor)
cursor.execute("SELECT id, name, team_id FROM pro_players ORDER BY id")
players = cursor.fetchall()
cursor.close()
conn.close()
return players

199
routes/predict.py Normal file
View File

@@ -0,0 +1,199 @@
from fastapi import APIRouter, Request
from pydantic import BaseModel, Field
from typing import Optional, Dict, Any
from catboost import CatBoostClassifier
import pandas as pd
import numpy as np
from routes.predict_bag_of_heroes import predict_bag_of_heroes
from routes.predict_with_players import predict_with_players
router = APIRouter()
# =========================
# Загрузка модели
# =========================
modelPro = CatBoostClassifier()
modelPro.load_model("artifacts/model_from_db_pro_v3.cbm")
# =========================
# Загрузка порядка фич
# =========================
def load_feature_order(path: str) -> list[str]:
fo = pd.read_csv(path)
first_col = fo.columns[0]
return fo[first_col].tolist()
FEATURE_ORDER_PRO: list[str] = load_feature_order("artifacts/feature_order_db.csv")
# =========================
# Дефолты для недостающих фич
# =========================
DEFAULTS: Dict[str, Any] = {
"is_first_pick_radiant": 0,
# Radiant heroes
"r_h1": -1, "r_h2": -1, "r_h3": -1, "r_h4": -1, "r_h5": -1,
# Dire heroes
"d_h1": -1, "d_h2": -1, "d_h3": -1, "d_h4": -1, "d_h5": -1,
# # Radiant players
"r_p1": -1, "r_p2": -1, "r_p3": -1, "r_p4": -1, "r_p5": -1,
# # Dire players
"d_p1": -1, "d_p2": -1, "d_p3": -1, "d_p4": -1, "d_p5": -1,
# Radiant positions
"rp_h1": -1, "rp_h2": -1, "rp_h3": -1, "rp_h4": -1, "rp_h5": -1,
# Dire positions
"dp_h1": -1, "dp_h2": -1, "dp_h3": -1, "dp_h4": -1, "dp_h5": -1,
}
# =========================
# Входная схема
# =========================
class DraftPayload(BaseModel):
# флаг первого пика (0 — Dire first pick/неизвестно, 1 — Radiant first pick)
is_first_pick_radiant: Optional[int] = Field(default=DEFAULTS["is_first_pick_radiant"])
# герои (IDs)
r_h1: Optional[int] = Field(default=DEFAULTS["r_h1"])
r_h2: Optional[int] = Field(default=DEFAULTS["r_h2"])
r_h3: Optional[int] = Field(default=DEFAULTS["r_h3"])
r_h4: Optional[int] = Field(default=DEFAULTS["r_h4"])
r_h5: Optional[int] = Field(default=DEFAULTS["r_h5"])
d_h1: Optional[int] = Field(default=DEFAULTS["d_h1"])
d_h2: Optional[int] = Field(default=DEFAULTS["d_h2"])
d_h3: Optional[int] = Field(default=DEFAULTS["d_h3"])
d_h4: Optional[int] = Field(default=DEFAULTS["d_h4"])
d_h5: Optional[int] = Field(default=DEFAULTS["d_h5"])
# игроки (IDs)
r_p1: Optional[int] = Field(default=DEFAULTS["r_p1"])
r_p2: Optional[int] = Field(default=DEFAULTS["r_p2"])
r_p3: Optional[int] = Field(default=DEFAULTS["r_p3"])
r_p4: Optional[int] = Field(default=DEFAULTS["r_p4"])
r_p5: Optional[int] = Field(default=DEFAULTS["r_p5"])
d_p1: Optional[int] = Field(default=DEFAULTS["d_p1"])
d_p2: Optional[int] = Field(default=DEFAULTS["d_p2"])
d_p3: Optional[int] = Field(default=DEFAULTS["d_p3"])
d_p4: Optional[int] = Field(default=DEFAULTS["d_p4"])
d_p5: Optional[int] = Field(default=DEFAULTS["d_p5"])
# позиции героев (1-5)
rp_h1: Optional[int] = Field(default=DEFAULTS["rp_h1"])
rp_h2: Optional[int] = Field(default=DEFAULTS["rp_h2"])
rp_h3: Optional[int] = Field(default=DEFAULTS["rp_h3"])
rp_h4: Optional[int] = Field(default=DEFAULTS["rp_h4"])
rp_h5: Optional[int] = Field(default=DEFAULTS["rp_h5"])
dp_h1: Optional[int] = Field(default=DEFAULTS["dp_h1"])
dp_h2: Optional[int] = Field(default=DEFAULTS["dp_h2"])
dp_h3: Optional[int] = Field(default=DEFAULTS["dp_h3"])
dp_h4: Optional[int] = Field(default=DEFAULTS["dp_h4"])
dp_h5: Optional[int] = Field(default=DEFAULTS["dp_h5"])
# =========================
# Хелперы
# =========================
def build_long_format_input(payload: dict) -> pd.DataFrame:
"""
Конвертирует payload в hero+position combination features для модели.
Создаёт бинарные признаки вида radiant_h{hero_id}_p{position} и dire_h{hero_id}_p{position}
"""
features = {}
# Инициализируем все признаки нулями
for feat in FEATURE_ORDER_PRO:
features[feat] = 0
# Radiant heroes с позициями
for i in range(1, 6):
hero_id = int(payload.get(f"r_h{i}", -1))
position = int(payload.get(f"rp_h{i}", -1))
if hero_id >= 0 and position >= 0:
feature_name = f"radiant_h{hero_id}_p{position}"
if feature_name in features:
features[feature_name] = 1
# Dire heroes с позициями
for i in range(1, 6):
hero_id = int(payload.get(f"d_h{i}", -1))
position = int(payload.get(f"dp_h{i}", -1))
if hero_id >= 0 and position >= 0:
feature_name = f"dire_h{hero_id}_p{position}"
if feature_name in features:
features[feature_name] = 1
# Создаём DataFrame с одной строкой в правильном порядке
df = pd.DataFrame([features], columns=FEATURE_ORDER_PRO)
return df
def proba_percent(p: float) -> float:
"""Перевод вероятности в проценты (0..100) с отсечкой."""
return round(float(np.clip(p * 100.0, 0.0, 100.0)))
# =========================
# Роут
# =========================
@router.post("/draft/predict")
async def predict(request: Request):
body = await request.json()
# Конвертируем все значения героев, игроков и позиций в int
for key in body:
if key.startswith(("r_h", "d_h", "is_first_pick_radiant")):
if body[key] is not None and body[key] != "":
try:
body[key] = int(body[key])
except (ValueError, TypeError):
body[key] = -1
else:
body[key] = -1
elif key.startswith(("rp_h", "dp_h")):
if body[key] == 0:
body[key] = -1
else:
body[key] = -1
# Hero+position combination предсказание
X_pro = build_long_format_input(body)
# Получаем предсказание для матча (одна строка)
radiant_pro = float(modelPro.predict_proba(X_pro)[0, 1])
rp = proba_percent(radiant_pro)
rd = 100.0 - rp
# Предсказание bag-of-heroes модели
bag_prediction = predict_bag_of_heroes(body)
# Предсказание модели с игроками
players_prediction = predict_with_players(body)
# Предсказание стекинг модели (ленивый импорт для избежания циклической зависимости)
try:
from routes.predict_stacking import predict_stacking
stacking_prediction = predict_stacking(body)
except Exception:
stacking_prediction = {"radiant_win": 50, "dire_win": 50}
return {
"pro-with-pos": {
"radiant_win": rp,
"dire_win": rd
},
"pro": {
"radiant_win": bag_prediction["radiant_win"],
"dire_win": bag_prediction["dire_win"]
},
"with-players": {
"radiant_win": players_prediction["radiant_win"],
"dire_win": players_prediction["dire_win"]
},
"stacking": {
"radiant_win": stacking_prediction["radiant_win"],
"dire_win": stacking_prediction["dire_win"]
}
}

View File

@@ -0,0 +1,85 @@
from catboost import CatBoostClassifier
import pandas as pd
import numpy as np
from typing import Dict, Any
# Загрузка модели
modelBagOfHeroes = CatBoostClassifier()
modelBagOfHeroes.load_model("artifacts/model_bag_of_heroes.cbm")
# Загрузка порядка фич
def load_feature_order(path: str) -> list[str]:
fo = pd.read_csv(path)
first_col = fo.columns[0]
return fo[first_col].tolist()
FEATURE_ORDER_BAG: list[str] = load_feature_order("artifacts/feature_order_bag_of_heroes.csv")
def build_bag_of_heroes_features(payload: Dict[str, Any]) -> pd.DataFrame:
"""
Конвертирует payload в bag-of-heroes формат.
payload содержит:
- is_first_pick_radiant
- r_h1, r_h2, r_h3, r_h4, r_h5
- d_h1, d_h2, d_h3, d_h4, d_h5
Возвращает DataFrame с колонками:
- is_first_pick_radiant
- radiant_hero_{1-145}
- dire_hero_{1-145}
"""
# Получаем героев из payload
radiant_heroes = []
dire_heroes = []
for i in range(1, 6):
r_hero = payload.get(f"r_h{i}", -1)
d_hero = payload.get(f"d_h{i}", -1)
if r_hero and r_hero != -1:
radiant_heroes.append(int(r_hero))
if d_hero and d_hero != -1:
dire_heroes.append(int(d_hero))
# Создаем словарь признаков
features = {feat: 0 for feat in FEATURE_ORDER_BAG}
# Устанавливаем is_first_pick_radiant
features["is_first_pick_radiant"] = int(payload.get("is_first_pick_radiant", 0))
# Устанавливаем бинарные признаки для героев Radiant
for hero_id in radiant_heroes:
feat_name = f"radiant_hero_{hero_id}"
if feat_name in features:
features[feat_name] = 1
# Устанавливаем бинарные признаки для героев Dire
for hero_id in dire_heroes:
feat_name = f"dire_hero_{hero_id}"
if feat_name in features:
features[feat_name] = 1
# Создаем DataFrame с правильным порядком колонок
return pd.DataFrame([features], columns=FEATURE_ORDER_BAG)
def predict_bag_of_heroes(payload: Dict[str, Any]) -> Dict[str, float]:
"""
Делает предсказание с использованием bag-of-heroes модели.
Возвращает:
{
"radiant_win": вероятность победы Radiant (0-100),
"dire_win": вероятность победы Dire (0-100)
}
"""
X = build_bag_of_heroes_features(payload)
proba = modelBagOfHeroes.predict_proba(X)[0, 1]
radiant_win = round(float(np.clip(proba * 100.0, 0.0, 100.0)))
dire_win = 100.0 - radiant_win
return {
"radiant_win": radiant_win,
"dire_win": dire_win
}

View File

@@ -0,0 +1,53 @@
import pandas as pd
import numpy as np
import pickle
from typing import Dict, Any
from routes.predict import build_long_format_input, modelPro
from routes.predict_bag_of_heroes import build_bag_of_heroes_features, modelBagOfHeroes
from routes.predict_with_players import build_player_features, modelWithPlayers
# Загрузка мета-модели (Logistic Regression)
with open("artifacts/model_stacking.pkl", 'rb') as f:
modelStacking = pickle.load(f)
def predict_stacking(payload: Dict[str, Any]) -> Dict[str, float]:
"""
Делает предсказание с использованием стекинг-модели.
Сначала получает предсказания от всех базовых моделей,
затем использует их как признаки для мета-модели.
Возвращает:
{
"radiant_win": вероятность победы Radiant (0-100),
"dire_win": вероятность победы Dire (0-100)
}
"""
# === Предсказание модели 1: Heroes + Positions ===
X_with_pos = build_long_format_input(payload)
pred1 = float(modelPro.predict_proba(X_with_pos)[0, 1])
# === Предсказание модели 2: Bag of Heroes ===
X_bag = build_bag_of_heroes_features(payload)
pred2 = float(modelBagOfHeroes.predict_proba(X_bag)[0, 1])
# === Предсказание модели 3: With Players ===
X_players = build_player_features(payload)
pred3 = float(modelWithPlayers.predict_proba(X_players)[0, 1])
# === Мета-модель ===
X_meta = pd.DataFrame([{
"pred_with_positions": pred1,
"pred_bag_of_heroes": pred2,
"pred_with_players": pred3
}])
proba = modelStacking.predict_proba(X_meta)[0, 1]
radiant_win = round(float(np.clip(proba * 100.0, 0.0, 100.0)))
dire_win = 100.0 - radiant_win
return {
"radiant_win": radiant_win,
"dire_win": dire_win
}

View File

@@ -0,0 +1,123 @@
from catboost import CatBoostClassifier
import pandas as pd
import numpy as np
from typing import Dict, Any
# Загрузка модели с игроками
modelWithPlayers = CatBoostClassifier()
modelWithPlayers.load_model("artifacts/model_with_players.cbm")
# Загрузка порядка фич
def load_feature_order(path: str) -> list:
fo = pd.read_csv(path)
first_col = fo.columns[0]
return fo[first_col].tolist()
FEATURE_ORDER_WITH_PLAYERS = load_feature_order("artifacts/feature_order_with_players.csv")
def build_player_features(payload: Dict[str, Any]) -> pd.DataFrame:
"""
Создаёт бинарные признаки для модели с игроками.
Признаки:
- radiant_p{player_id}_h{hero_id}_pos{position}
- radiant_p{player_id}_h{hero_id}
- radiant_p{player_id}_pos{position}
(аналогично для dire)
"""
features = {}
# Инициализируем все признаки нулями
for feat in FEATURE_ORDER_WITH_PLAYERS:
features[feat] = 0
# Radiant: игроки + герои + позиции
for i in range(1, 6):
hero_id = int(payload.get(f"r_h{i}", -1))
player_id = int(payload.get(f"r_p{i}", -1))
position = int(payload.get(f"rp_h{i}", -1))
# Признак: игрок + герой + позиция
if player_id > 0 and hero_id >= 0 and position >= 0:
feature_name = f"radiant_p{player_id}_h{hero_id}_pos{position}"
if feature_name in features:
features[feature_name] = 1
# Признак: только игрок + герой
if player_id > 0 and hero_id >= 0:
feature_name = f"radiant_p{player_id}_h{hero_id}"
if feature_name in features:
features[feature_name] = 1
# Признак: только игрок + позиция
if player_id > 0 and position >= 0:
feature_name = f"radiant_p{player_id}_pos{position}"
if feature_name in features:
features[feature_name] = 1
# Dire: игроки + герои + позиции
for i in range(1, 6):
hero_id = int(payload.get(f"d_h{i}", -1))
player_id = int(payload.get(f"d_p{i}", -1))
position = int(payload.get(f"dp_h{i}", -1))
# Признак: игрок + герой + позиция
if player_id > 0 and hero_id >= 0 and position >= 0:
feature_name = f"dire_p{player_id}_h{hero_id}_pos{position}"
if feature_name in features:
features[feature_name] = 1
# Признак: только игрок + герой
if player_id > 0 and hero_id >= 0:
feature_name = f"dire_p{player_id}_h{hero_id}"
if feature_name in features:
features[feature_name] = 1
# Признак: только игрок + позиция
if player_id > 0 and position >= 0:
feature_name = f"dire_p{player_id}_pos{position}"
if feature_name in features:
features[feature_name] = 1
# Создаём DataFrame с одной строкой в правильном порядке
df = pd.DataFrame([features], columns=FEATURE_ORDER_WITH_PLAYERS)
return df
def predict_with_players(payload: Dict[str, Any]) -> Dict[str, float]:
"""
Делает предсказание с использованием модели с игроками.
Возвращает:
{
"radiant_win": вероятность победы Radiant (0-100),
"dire_win": вероятность победы Dire (0-100)
}
"""
# Проверяем, есть ли хотя бы один игрок в payload
has_players = False
for i in range(1, 6):
if payload.get(f"r_p{i}", -1) > 0 or payload.get(f"d_p{i}", -1) > 0:
has_players = True
break
# Если нет игроков, возвращаем 50/50
if not has_players:
return {
"radiant_win": 50,
"dire_win": 50
}
# Создаём признаки
X = build_player_features(payload)
# Предсказание
proba = modelWithPlayers.predict_proba(X)[0, 1]
radiant_win = round(float(np.clip(proba * 100.0, 0.0, 100.0)))
dire_win = 100 - radiant_win
return {
"radiant_win": radiant_win,
"dire_win": dire_win
}

24
routes/teams.py Normal file
View File

@@ -0,0 +1,24 @@
from fastapi import APIRouter
import psycopg2
from psycopg2.extras import RealDictCursor
router = APIRouter()
def get_db_connection():
return psycopg2.connect(
host="localhost",
port=5432,
database="korobka_db",
user="postgres",
password="postgres"
)
@router.get("/teams")
def get_teams():
conn = get_db_connection()
cursor = conn.cursor(cursor_factory=RealDictCursor)
cursor.execute("SELECT id, name FROM teams ORDER BY id")
teams = cursor.fetchall()
cursor.close()
conn.close()
return teams