import logging
import re
import sqlite3
from pathlib import Path
from typing import List, Optional

import pandas as pd
from fuzzywuzzy import process

# --- Logger Setup ---
logging.basicConfig(level=logging.INFO,
                    format="%(asctime)s [%(levelname)s] :%(lineno)d %(funcName)s() - %(message)s")
logger = logging.getLogger(__name__)

COUNTRY_CODE_MAP = {
    'uk': 'England',
    'spain': 'Spain',
    'germany': 'Germany',
    'france': 'France',
    'cn': 'China'
}


def clean_name(name: str) -> str:
    if not name:
        return ''
    return re.sub(r'^\d+', '', str(name)).strip()


def find_best_match(name: str, choices: List[str], threshold: int = 70) -> Optional[str]:
    if not name or not choices:
        return None
    name = str(name)  # <-- 这里保证传入的是字符串
    result = process.extractOne(name, choices)
    if result and result[1] >= threshold:
        return result[0]
    logger.debug(f'No match for "{name}" in {choices} (best: "{result[0]}" score: {result[1]})')
    return None


def to_int(val, default=0):
    try:
        if val is None:
            return default
        val = str(val).split('(')[0].replace('-', '').strip()
        return int(val) if val else default
    except (ValueError, TypeError):
        return default


def to_float(val, default=0.0):
    try:
        if val is None:
            return default
        val = str(val).replace('-', '').strip()
        return float(val) if val else default
    except (ValueError, TypeError):
        return default


class FootvizImporter:
    def __init__(self, db_path: Path, base_path: Path):
        self.db_path = db_path
        self.base_path = base_path
        self.conn = sqlite3.connect(self.db_path)
        self.cursor = self.conn.cursor()
        self.team_to_country = {}  # team_name -> country_code

    def execute_schema(self, schema_file: Path):
        with open(schema_file, 'r', encoding='utf-8') as f:
            self.cursor.executescript(f.read())
        self.conn.commit()
        logger.info("Executed schema.sql")

    def import_countries(self):
        for code, name in COUNTRY_CODE_MAP.items():
            self.cursor.execute('INSERT OR IGNORE INTO Countries (name, code) VALUES (?, ?)', (name, code))
        self.conn.commit()
        logger.info(f"Imported {len(COUNTRY_CODE_MAP)} countries")

    def import_teams(self):
        scores_dir = self.base_path / 'footviz' / 'data' / 'scores'
        for csv_file in scores_dir.glob('*.csv'):
            country_code = csv_file.stem
            df = pd.read_csv(csv_file, encoding='utf-8-sig')
            for _, row in df.iterrows():
                team_name = row.get('球队') or row.get('Team')
                if team_name and team_name not in self.team_to_country:
                    self.team_to_country[team_name] = country_code
                    self.cursor.execute('INSERT OR IGNORE INTO Teams (name, country_code) VALUES (?, ?)',
                                        (team_name, country_code))
        self.conn.commit()
        logger.info(f"Imported {len(self.team_to_country)} teams")
        return list(self.team_to_country.keys())

    def import_competitions(self):
        competitions = [
            ('uk', 'Premier League'),
            ('spain', 'La Liga'),
            ('germany', 'Bundesliga'),
            ('france', 'Ligue 1'),
        ]
        for code, league_name in competitions:
            self.cursor.execute(
                'UPDATE Countries SET league_name=? WHERE code=?',
                (league_name, code)
            )
        self.conn.commit()
        logger.info(f"Updated league names for {len(competitions)} countries")
        return competitions

    def import_league_standings(self):
        scores_dir = self.base_path / 'footviz' / 'data' / 'scores'
        count = 0
        for csv_file in scores_dir.glob('*.csv'):
            df = pd.read_csv(csv_file, encoding='utf-8-sig')
            country_code = csv_file.stem
            for _, row in df.iterrows():
                team_name = row.get('球队') or row.get('Team')
                if not team_name or team_name not in self.team_to_country:
                    continue
                team_id = self.cursor.execute('SELECT id FROM Teams WHERE name=?', (team_name,)).fetchone()[0]
                self.cursor.execute(
                    'INSERT INTO LeagueStandings VALUES (NULL,?,?,?,?,?,?,?,?,?,?,?,?)',
                    (
                        team_id,
                        country_code,
                        to_int(row.get('排名')),
                        to_int(row.get('场次')),
                        to_int(row.get('胜')),
                        to_int(row.get('平')),
                        to_int(row.get('负')),
                        to_int(row.get('进球')),
                        to_int(row.get('失球')),
                        to_int(row.get('净胜球')),
                        to_int(row.get('积分')),
                        row.get('最近比赛')
                    )
                )
                count += 1
        self.conn.commit()
        logger.info(f"Imported {count} league standings")

    def import_matches(self):
        all_teams = {name: tid for tid, name in self.cursor.execute('SELECT id, name FROM Teams')}
        matches_dir = self.base_path / 'footviz' / 'data' / 'matches'
        count = 0
        for country_folder in matches_dir.iterdir():
            if not country_folder.is_dir():
                continue
            for csv_file in country_folder.glob('*.csv'):
                df = pd.read_csv(csv_file, encoding='utf-8-sig')
                for _, row in df.iterrows():
                    team_a_name = clean_name(row['TeamA'])
                    team_b_name = clean_name(row['TeamB'])
                    best_a_name = find_best_match(team_a_name, all_teams.keys())
                    best_b_name = find_best_match(team_b_name, all_teams.keys())
                    if not best_a_name or not best_b_name:
                        continue
                    team_a_id = all_teams[best_a_name]
                    team_b_id = all_teams[best_b_name]
                    try:
                        home_goals, away_goals = map(int, row['Goals'].split(':'))
                    except ValueError:
                        continue
                    self.cursor.execute(
                        'INSERT INTO MatchResults (country_code, home_team_id, away_team_id, match_date, home_goals, away_goals, result) '
                        'VALUES (?, ?, ?, ?, ?, ?, ?)',
                        (country_folder.name, team_a_id, team_b_id, row['Time'], home_goals, away_goals, row['Result'])
                    )
                    count += 1
        self.conn.commit()
        logger.info(f"Imported {count} matches")

    def import_players(self):
        data_path = self.base_path / 'footviz' / 'data'
        canonical_teams = list(self.team_to_country.keys())
        player_files = list((data_path / 'player_data_backup').glob('*.csv'))
        player_count = stats_processed = stats_skipped = 0

        for player_csv in player_files:
            try:
                df = pd.read_csv(player_csv, encoding='utf-8-sig')
            except Exception as e:
                continue
            first_row = df.iloc[0]
            player_name = clean_name(first_row.get('Name'))
            if not player_name:
                continue
            age_match = re.search(r'(\d+)', str(first_row.get('Age')))
            age = int(age_match.group(1)) if age_match else None
            height_match = re.search(r'(\d+)', str(first_row.get('Height')))
            height = int(height_match.group(1)) if height_match else None
            nationality = first_row.get('Nationality')
            positions = first_row.get('Positions')
            # 插入 player
            self.cursor.execute(
                'INSERT OR IGNORE INTO Players (name, age, height, nationality, positions) VALUES (?, ?, ?, ?, ?)',
                (player_name, age, height, nationality, positions)
            )
            player_id = self.cursor.execute('SELECT id FROM Players WHERE name=?', (player_name,)).fetchone()[0]
            player_count += 1

            for idx, row in df.iterrows():
                tournament = row.get('Tournament')
                if not tournament or 'Total' in str(tournament):
                    continue
                team_name = row.get('Current Team') or first_row.get('Current Team')
                team_matched = find_best_match(team_name, canonical_teams)
                if not team_matched:
                    stats_skipped += 1
                    continue
                team_id = self.cursor.execute('SELECT id FROM Teams WHERE name=?', (team_matched,)).fetchone()[0]
                self.cursor.execute(
                    'INSERT INTO PlayerStats (player_id, team_id, appearances, minutes_played, goals, assists, '
                    'tackles, interceptions, fouls_committed, pass_success_rate, shots_per_game, dribbles_per_game, rating) '
                    'VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)',
                    (
                        player_id,
                        team_id,
                        to_int(row.get('Apps')),
                        to_int(row.get('Mins')),
                        to_int(row.get('Goals')),
                        to_int(row.get('Assists')),
                        to_float(row.get('Tackles')),
                        to_float(row.get('Interceptions')),
                        to_float(row.get('Fouls')),
                        to_float(row.get('PS%')),
                        to_float(row.get('SpG')),
                        to_float(row.get('Dribbles')),
                        to_float(row.get('Rating'))
                    )
                )
                stats_processed += 1
        self.conn.commit()
        logger.info(f"Imported {player_count} players, {stats_processed} stats, {stats_skipped} skipped")

    def close(self):
        table_names = [row[0] for row in self.cursor.execute(
            "SELECT name FROM sqlite_master WHERE type='table' AND name NOT LIKE 'sqlite_%';")]
        counts = {}
        for table in table_names:
            count = self.cursor.execute(f'SELECT COUNT(*) FROM {table}').fetchone()[0]
            counts[table] = count
            logger.info(f"Table {table} has {count} rows")
            fks = self.cursor.execute(f'PRAGMA foreign_key_list({table})').fetchall()
            for fk in fks:
                parent_table, child_col, parent_col = fk[2], fk[3], fk[4]
                join_count = self.cursor.execute(
                    f'SELECT COUNT(*) FROM {table} AS child JOIN {parent_table} AS parent ON child.{child_col}=parent.{parent_col}'
                ).fetchone()[0]
                logger.info(f"Table {table} has {join_count} valid references to {parent_table}")
        self.conn.close()
        return counts


if __name__ == '__main__':
    BASE_PATH = Path('.')
    DB_PATH = BASE_PATH / 'footviz.db'
    SCHEMA_PATH = BASE_PATH / 'footviz' / 'schema.sql'

    importer = FootvizImporter(DB_PATH, BASE_PATH)
    importer.execute_schema(SCHEMA_PATH)
    importer.import_countries()
    importer.import_teams()
    importer.import_competitions()
    importer.import_league_standings()
    importer.import_matches()
    importer.import_players()
    print(importer.close())
