import os

# Define paths
matches_dir = r'C:\Users\jerry\PyCharmProjects\footviz\footviz\data\matches'
team_data_dir = r'C:\Users\jerry\PyCharmProjects\footviz\footviz\data\team_data'

# Check for team name inconsistencies in matches directory
print("检查 matches 目录中的球队名称:")

for country in os.listdir(matches_dir):
    country_path = os.path.join(matches_dir, country)
    if os.path.isdir(country_path):
        print(f"\n{country}:")
        for file in os.listdir(country_path):
            if file.endswith('.csv'):
                team_name = file.replace('.csv', '')
                print(f"  {team_name}")

# Check for team name inconsistencies in team_data directory
print("\n\n检查 team_data 目录中的球队名称:")
team_files = [f for f in os.listdir(team_data_dir) if f.endswith('.csv')]
team_names = []

for file in team_files:
    # Extract team name - either by number prefix or by removing .csv
    if '. ' in file:
        team_name = file.split('. ', 1)[1].replace('.csv', '')
    else:
        team_name = file.replace('.csv', '')
    team_names.append(team_name)
    print(f"  {team_name}")

# Check for duplicates or variations
print("\n\n检查可能的重复或变体名称:")
team_names_lower = [name.lower() for name in team_names]

# Look for potential duplicates based on lowercase comparison
unique_names = set(team_names_lower)
if len(unique_names) != len(team_names):
    print("发现可能的重复名称:")
    for name in unique_names:
        count = team_names_lower.count(name)
        if count > 1:
            print(f"  '{name}' 出现了 {count} 次")
else:
    print("未发现重复名称")

print("\n检查完成!")