import pandas as pd import plotly.express as px import gradio as gr # Load datasets file_paths = { "Training Session 1": "./ZED 2005 Session 1-12-2024.csv", "Training Session 2": "./ZED 2005 Session 2-12-2024.csv", "Training Session 6": "./ZED 2005 Session 6-12-2024.csv" } # Load and clean datasets def load_and_clean_data(file_path): df = pd.read_csv(file_path) # Converting relevant columns to numeric for col in ['Played Time (min)', 'Top Speed (km/h)', 'Dist. Covered (m)', 'Dribbling Count (#)', 'Kick Power (km/h)', 'Session Intensity', 'High Intensity Run (#)']: if col in df.columns: df[col] = pd.to_numeric(df[col], errors='coerce') # Dropping rows with missing values in key metrics df = df.dropna(subset=['Played Time (min)', 'Top Speed (km/h)', 'Dist. Covered (m)', 'Session Intensity', 'High Intensity Run (#)']) return df dataframes = {session: load_and_clean_data(path) for session, path in file_paths.items()} # Generate interactive visualizations def generate_visualizations(session, metric): df = dataframes[session] if metric == "Played Time Distribution": fig = px.histogram(df, x="Played Time (min)", nbins=20, title="Played Time Distribution", labels={"Played Time (min)": "Played Time (min)"}, color_discrete_sequence=["skyblue"]) elif metric == "Top Speed Boxplot": fig = px.box(df, y="Top Speed (km/h)", title="Top Speed of Players", labels={"Top Speed (km/h)": "Top Speed (km/h)"}, color_discrete_sequence=["lightgreen"]) elif metric == "Distance Covered vs Played Time": fig = px.scatter(df, x="Played Time (min)", y="Dist. Covered (m)", color="Main Possition", title="Distance Covered vs Played Time", labels={"Played Time (min)": "Played Time (min)", "Dist. Covered (m)": "Distance Covered (m)"}, color_discrete_sequence=px.colors.qualitative.Vivid) elif metric == "Dribbling Contribution by Position": fig = px.bar(df, x="Main Possition", y="Dribbling Count (#)", title="Dribbling Contribution by Position", labels={"Main Possition": "Position", "Dribbling Count (#)": "Dribbling Count (#)"}, color="Main Possition", color_discrete_sequence=px.colors.qualitative.Vivid) elif metric == "Kick Power Distribution": fig = px.histogram(df, x="Kick Power (km/h)", nbins=10, title="Kick Power Distribution", labels={"Kick Power (km/h)": "Kick Power (km/h)"}, color_discrete_sequence=["orange"]) elif metric == "Session Intensity by Position": fig = px.box(df, x="Main Possition", y="Session Intensity", title="Session Intensity by Position", labels={"Main Possition": "Position", "Session Intensity": "Session Intensity"}, color="Main Possition", color_discrete_sequence=px.colors.qualitative.Vivid) elif metric == "High-Intensity Runs": fig = px.scatter(df, x="High Intensity Run (#)", y="High Intensity Run (m)", color="Main Possition", title="High-Intensity Runs: Distance vs Frequency", labels={"High Intensity Run (#)": "High Intensity Runs (#)", "High Intensity Run (m)": "High Intensity Distance (m)"}, size="Session Intensity", color_discrete_sequence=px.colors.qualitative.Vivid) else: fig = px.scatter(df, x="Max Acceleration (m/s\u00b2)", y="Max Deceleration (m/s\u00b2)", color="Main Possition", title="Max Intensity: Acceleration vs Deceleration", labels={"Max Acceleration (m/s\u00b2)": "Max Acceleration (m/s\u00b2)", "Max Deceleration (m/s\u00b2)": "Max Deceleration (m/s\u00b2)"}, color_discrete_sequence=px.colors.qualitative.Vivid) return fig # Gradio Interface def visualize_data(session, metric): fig = generate_visualizations(session, metric) return fig sessions = list(file_paths.keys()) metrics = ["Played Time Distribution", "Top Speed Boxplot", "Distance Covered vs Played Time", "Dribbling Contribution by Position", "Kick Power Distribution", "Session Intensity by Position", "High-Intensity Runs", "Max Intensity: Acceleration vs Deceleration"] gr.Interface( fn=visualize_data, inputs=[gr.Dropdown(choices=sessions, label="Select Session"), gr.Dropdown(choices=metrics, label="Select Metric")], outputs=gr.Plot(label="Visualization"), title="Football Analytics: Interactive Visualizations" ).launch()