import plotly.graph_objects as go import numpy as np import pandas as pd import json from leaderboard_utils import ( get_organization, get_mario_leaderboard, get_sokoban_leaderboard, get_2048_leaderboard, get_candy_leaderboard, get_tetris_leaderboard, get_tetris_planning_leaderboard, get_combined_leaderboard, GAME_ORDER ) # Load model colors with open('assets/model_color.json', 'r') as f: MODEL_COLORS = json.load(f) GAME_SCORE_COLUMNS = { "Super Mario Bros": "Score", "Sokoban": "Levels Cracked", "2048": "Score", "Candy Crush": "Average Score", "Tetris (complete)": "Score", "Tetris (planning only)": "Score", "Ace Attorney": "Score" } def get_model_prefix(name): return name.split('-')[0] def normalize_values(values, mean, std): """ Normalize values using z-score and scale to 0-100 range Args: values (list): List of values to normalize mean (float): Mean value for normalization std (float): Standard deviation for normalization Returns: list: Normalized values scaled to 0-100 range """ if std == 0: return [50 if v > 0 else 0 for v in values] # Handle zero std case z_scores = [(v - mean) / std for v in values] # Scale z-scores to 0-100 range, with mean at 50 scaled_values = [max(0, min(100, (z * 30) + 35)) for z in z_scores] return scaled_values def simplify_model_name(name): if name == "claude-3-7-sonnet-20250219(thinking)": name ="claude-3-7-thinking" parts = name.split('-') return '-'.join(parts[:4]) + '-...' if len(parts) > 4 else name def create_horizontal_bar_chart(df, game_name): if game_name == "Super Mario Bros": score_col = "Score" df_sorted = df.sort_values(by=score_col, ascending=True) elif game_name == "Sokoban": # Process Sokoban scores by splitting and getting max level def get_max_level(levels_str): try: # Split by semicolon, strip whitespace, filter empty strings, convert to integers levels = [int(x.strip()) for x in levels_str.split(";") if x.strip()] return max(levels) if levels else 0 except: return 0 # Create a temporary column with max levels df['Max Level'] = df['Levels Cracked'].apply(get_max_level) df_sorted = df.sort_values(by='Max Level', ascending=True) score_col = 'Max Level' elif game_name == "2048": score_col = "Score" df_sorted = df.sort_values(by=score_col, ascending=True) elif game_name == "Candy Crush": score_col = "Average Score" df_sorted = df.sort_values(by=score_col, ascending=True) elif game_name in ["Tetris (complete)", "Tetris (planning only)"]: score_col = "Score" df_sorted = df.sort_values(by=score_col, ascending=True) elif game_name == "Ace Attorney": score_col = "Score" df_sorted = df.sort_values(by=score_col, ascending=True) else: return None x = df_sorted[score_col] y = [f"{row['Player']} [{row['Organization']}]" for _, row in df_sorted.iterrows()] colors = [MODEL_COLORS.get(row['Player'], '#808080') for _, row in df_sorted.iterrows()] texts = [f"{v:.1f}" if game_name == "Candy Crush" else f"{int(v)}" for v in x] fig = go.Figure(go.Bar( x=x, y=y, orientation='h', marker_color=colors, text=texts, textposition='auto', hovertemplate='%{y}
Score: %{x}' )) fig.update_layout( autosize=False, width=1000, height=600, margin=dict(l=200, r=200, t=20, b=20), title=dict( text=f"{game_name} Performance", pad=dict(t=10), font=dict(size=20) ), yaxis=dict(automargin=True), legend=dict( font=dict(size=12), itemsizing='trace', x=1.1, y=1, xanchor='left', yanchor='top', bgcolor='rgba(255,255,255,0.6)', bordercolor='gray', borderwidth=1 ) ) return fig def create_radar_charts(df): game_cols = [c for c in df.columns if c.endswith(" Score")] categories = [c.replace(" Score", "") for c in game_cols] for col in game_cols: vals = df[col].replace("n/a", 0).astype(float) mean, std = vals.mean(), vals.std() df[f"norm_{col}"] = normalize_values(vals, mean, std) fig = go.Figure() for _, row in df.iterrows(): player = row["Player"] r = [row[f"norm_{c}"] for c in game_cols] color = MODEL_COLORS.get(player, '#808080') # fallback to gray fig.add_trace(go.Scatterpolar( r=r + [r[0]], theta=categories + [categories[0]], mode='lines+markers', fill='toself', name=player, line=dict(color=color, width=2), marker=dict(color=color), fillcolor=color + '33', # add transparency to fill (33 = ~20% opacity) opacity=0.8 )) fig.update_layout( autosize=False, width=800, height=600, margin=dict(l=80, r=150, t=20, b=20), title=dict( text="Radar Chart of AI Performance (Normalized)", pad=dict(t=10) ), polar=dict(radialaxis=dict(visible=True, range=[0, 100])), legend=dict( font=dict(size=9), itemsizing='trace', x=1.4, y=1, xanchor='left', yanchor='top', bgcolor='rgba(255,255,255,0.6)', bordercolor='gray', borderwidth=1 ) ) return fig def get_combined_leaderboard_with_radar(rank_data, selected_games): df = get_combined_leaderboard(rank_data, selected_games) # Create a copy for visualization to avoid modifying the original df_viz = df.copy() return df, create_radar_charts(df_viz) def create_group_bar_chart(df): game_cols = {} for game in GAME_ORDER: col = f"{game} Score" if col in df.columns: # Replace "n/a" with np.nan and handle downcasting properly df[col] = df[col].replace("n/a", np.nan).infer_objects(copy=False).astype(float) if df[col].notna().any(): game_cols[game] = col if not game_cols: return go.Figure().update_layout(title="No data available") # Drop players with no data df = df.dropna(subset=game_cols.values(), how='all') # Normalize scores per game for game, col in game_cols.items(): valid = df[col].dropna() norm_col = f"norm_{col}" if valid.empty: df[norm_col] = np.nan else: mean, std = valid.mean(), valid.std() normalized = normalize_values(valid, mean, std) df[norm_col] = np.nan df.loc[valid.index, norm_col] = normalized # Build consistent game order (X-axis) sorted_games = [game for game in GAME_ORDER if f"norm_{game} Score" in df.columns] # Format game names with line breaks formatted_games = [] for game in sorted_games: if len(game) > 10 and ' ' in game: parts = game.split(' ') midpoint = len(parts) // 2 formatted_name = ' '.join(parts[:midpoint]) + '
' + ' '.join(parts[midpoint:]) formatted_games.append(formatted_name) else: formatted_games.append(game) # Create mapping from original to formatted names game_display_map = dict(zip(sorted_games, formatted_games)) # Group models by prefix, then sort alphabetically model_groups = {} for player in df["Player"].unique(): prefix = player.split('-')[0] model_groups.setdefault(prefix, []).append(player) ordered_players = [] for prefix in sorted(model_groups): ordered_players.extend(sorted(model_groups[prefix])) # Create one trace per player fig = go.Figure() for player in ordered_players: row = df[df["Player"] == player] if row.empty: continue row = row.iloc[0] y_vals = [] has_data = False for game in sorted_games: col = f"norm_{game} Score" val = row.get(col, np.nan) if not np.isnan(val): has_data = True y_vals.append(val if not np.isnan(val) else 0) if not has_data: continue fig.add_trace(go.Bar( name=row["Player"], x=[game_display_map[game] for game in sorted_games], y=y_vals, marker_color=MODEL_COLORS.get(player, '#808080'), hovertemplate="%{fullData.name}
Score: %{y:.1f}" )) fig.update_layout( autosize=False, width=1000, height=800, margin=dict(l=200, r=200, t=20, b=20), title=dict(text="Grouped Bar Chart of AI Models (Consistent Trace Grouping)", pad=dict(t=10)), xaxis_title="Games", yaxis_title="Normalized Score", xaxis=dict( categoryorder='array', categoryarray=[game_display_map[g] for g in sorted_games], tickangle=0 # Keep text horizontal since we're using line breaks ), barmode='group', bargap=0.2, # Gap between game categories bargroupgap=0.05, # Gap between bars in a group uniformtext=dict(mode='hide', minsize=8), # Hide text that doesn't fit legend=dict( font=dict(size=12), itemsizing='trace', x=1.1, y=1, xanchor='left', yanchor='top', bgcolor='rgba(255,255,255,0.6)', bordercolor='gray', borderwidth=1 ) ) return fig def get_combined_leaderboard_with_group_bar(rank_data, selected_games): df = get_combined_leaderboard(rank_data, selected_games) # Create a copy for visualization to avoid modifying the original df_viz = df.copy() return df, create_group_bar_chart(df_viz) def hex_to_rgba(hex_color, alpha=0.2): hex_color = hex_color.lstrip('#') r = int(hex_color[0:2], 16) g = int(hex_color[2:4], 16) b = int(hex_color[4:6], 16) return f'rgba({r}, {g}, {b}, {alpha})' def create_single_radar_chart(df, selected_games=None, highlight_models=None): if selected_games is None: selected_games = ['Super Mario Bros', '2048', 'Candy Crush', 'Sokoban', 'Ace Attorney'] # Format game names formatted_games = [] for game in selected_games: if game == 'Super Mario Bros': formatted_games.append('Super Mario') # Simplified name else: formatted_games.append(game) # Keep other names as is game_cols = [f"{game} Score" for game in selected_games] categories = formatted_games # Normalize for col in game_cols: vals = df[col].replace("n/a", 0).infer_objects(copy=False).astype(float) mean, std = vals.mean(), vals.std() df[f"norm_{col}"] = normalize_values(vals, mean, std) # Group players by prefix and sort alphabetically model_groups = {} for player in df["Player"]: prefix = get_model_prefix(player) model_groups.setdefault(prefix, []).append(player) # Sort each group alphabetically for prefix in model_groups: model_groups[prefix] = sorted(model_groups[prefix], key=str.lower) # Get sorted prefixes and create ordered player list sorted_prefixes = sorted(model_groups.keys(), key=str.lower) grouped_players = [] for prefix in sorted_prefixes: grouped_players.extend(model_groups[prefix]) fig = go.Figure() for player in grouped_players: row = df[df["Player"] == player] if row.empty: continue row = row.iloc[0] is_highlighted = highlight_models and player in highlight_models color = 'red' if is_highlighted else MODEL_COLORS.get(player, '#808080') fillcolor = 'rgba(255, 0, 0, 0.4)' if is_highlighted else hex_to_rgba(color, 0.2) r = [row[f"norm_{col}"] for col in game_cols] # Convert player name to lowercase for the legend display_name = player.lower() fig.add_trace(go.Scatterpolar( r=r + [r[0]], theta=categories + [categories[0]], mode='lines+markers', fill='toself', name=display_name, # Use lowercase name in legend line=dict(color=color, width=6 if is_highlighted else 2), marker=dict(color=color, size=10 if is_highlighted else 6), fillcolor=fillcolor, opacity=1.0 if is_highlighted else 0.7, hovertemplate='%{fullData.name}
Game: %{theta}
Score: %{r:.1f}' )) fig.update_layout( autosize=False, width=1000, height=620, # Increased height to accommodate legend margin=dict(l=400, r=200, t=20, b=20), title=dict( text="AI Normalized Performance Across Games", x=0.5, xanchor='center', yanchor='top', y=0.95, font=dict(size=20), pad=dict(b=20) ), polar=dict( radialaxis=dict( visible=True, range=[0, 100], tickangle=45, tickfont=dict(size=12), gridcolor='lightgray', gridwidth=1, angle=45 ), angularaxis=dict( tickfont=dict(size=14, weight='bold'), tickangle=0 ) ), legend=dict( font=dict(size=12), title="Choose your model 💡 (click / double-click)", itemsizing='trace', x=-1.4, # Moved further left y=0.8, # Moved to top yanchor='top', xanchor='left', bgcolor='rgba(255,255,255,0.6)', bordercolor='gray', borderwidth=1 ) ) fig.update_layout( legend=dict( itemclick="toggleothers", # This will make clicked item the only visible one itemdoubleclick="toggle" # Double click toggles visibility ) ) return fig def get_combined_leaderboard_with_single_radar(rank_data, selected_games, highlight_models=None): df = get_combined_leaderboard(rank_data, selected_games) selected_game_names = [g for g, sel in selected_games.items() if sel] # Create a copy for visualization to avoid modifying the original df_viz = df.copy() return df, create_single_radar_chart(df_viz, selected_game_names, highlight_models) def create_organization_radar_chart(rank_data): df = get_combined_leaderboard(rank_data, {g: True for g in GAME_ORDER}) orgs = df["Organization"].unique() game_cols = [f"{g} Score" for g in GAME_ORDER if f"{g} Score" in df.columns] categories = [g.replace(" Score", "") for g in game_cols] avg_df = pd.DataFrame([ { **{col: df[df["Organization"] == org][col].replace("n/a", 0).infer_objects(copy=False).astype(float).mean() for col in game_cols}, "Organization": org } for org in orgs ]) for col in game_cols: vals = avg_df[col] mean, std = vals.mean(), vals.std() avg_df[f"norm_{col}"] = normalize_values(vals, mean, std) fig = go.Figure() for _, row in avg_df.iterrows(): r = [row[f"norm_{col}"] for col in game_cols] fig.add_trace(go.Scatterpolar( r=r + [r[0]], theta=categories + [categories[0]], mode='lines+markers', fill='toself', name=row["Organization"] )) fig.update_layout( autosize=False, width=800, height=600, margin=dict(l=80, r=150, t=20, b=20), title=dict( text="Radar Chart: Organization Performance (Normalized)", pad=dict(t=10) ), polar=dict(radialaxis=dict(visible=True, range=[0, 100])), legend=dict( font=dict(size=9), itemsizing='trace', x=1.4, y=1, xanchor='left', yanchor='top', bgcolor='rgba(255,255,255,0.6)', bordercolor='gray', borderwidth=1 ) ) return fig def create_top_players_radar_chart(rank_data, n=5): df = get_combined_leaderboard(rank_data, {g: True for g in GAME_ORDER}) top_players = df.head(n)["Player"].tolist() top_df = df[df["Player"].isin(top_players)] game_cols = [f"{g} Score" for g in GAME_ORDER if f"{g} Score" in df.columns] categories = [g.replace(" Score", "") for g in game_cols] for col in game_cols: # Replace "n/a" with 0 and handle downcasting properly vals = top_df[col].replace("n/a", 0).infer_objects(copy=False).astype(float) mean, std = vals.mean(), vals.std() top_df[f"norm_{col}"] = normalize_values(vals, mean, std) fig = go.Figure() for _, row in top_df.iterrows(): r = [row[f"norm_{col}"] for col in game_cols] fig.add_trace(go.Scatterpolar( r=r + [r[0]], theta=categories + [categories[0]], mode='lines+markers', fill='toself', name=row["Player"] )) fig.update_layout( autosize=False, width=800, height=600, margin=dict(l=80, r=150, t=20, b=20), title=dict( text=f"Top {n} Players Radar Chart (Normalized)", pad=dict(t=10) ), polar=dict(radialaxis=dict(visible=True, range=[0, 100])), legend=dict( font=dict(size=9), itemsizing='trace', x=1.4, y=1, xanchor='left', yanchor='top', bgcolor='rgba(255,255,255,0.6)', bordercolor='gray', borderwidth=1 ) ) return fig def create_player_radar_chart(rank_data, player_name): df = get_combined_leaderboard(rank_data, {g: True for g in GAME_ORDER}) player_df = df[df["Player"] == player_name] if player_df.empty: return go.Figure().update_layout( title=dict(text="Player not found", pad=dict(t=10)), autosize=False, width=800, height=400 ) game_cols = [f"{g} Score" for g in GAME_ORDER if f"{g} Score" in df.columns] categories = [g.replace(" Score", "") for g in game_cols] for col in game_cols: # Replace "n/a" with 0 and handle downcasting properly vals = player_df[col].replace("n/a", 0).infer_objects(copy=False).astype(float) mean, std = df[col].replace("n/a", 0).infer_objects(copy=False).astype(float).mean(), df[col].replace("n/a", 0).infer_objects(copy=False).astype(float).std() player_df[f"norm_{col}"] = normalize_values(vals, mean, std) fig = go.Figure() for _, row in player_df.iterrows(): r = [row[f"norm_{col}"] for col in game_cols] fig.add_trace(go.Scatterpolar( r=r + [r[0]], theta=categories + [categories[0]], mode='lines+markers', fill='toself', name=row["Player"] )) fig.update_layout( autosize=False, width=800, height=600, margin=dict(l=80, r=150, t=20, b=20), title=dict( text=f"{row['Player']} Radar Chart (Normalized)", pad=dict(t=10) ), polar=dict(radialaxis=dict(visible=True, range=[0, 100])), legend=dict( font=dict(size=9), itemsizing='trace', x=1.4, y=1, xanchor='left', yanchor='top', bgcolor='rgba(255,255,255,0.6)', bordercolor='gray', borderwidth=1 ) ) return fig def save_visualization(fig, filename): fig.write_image(filename)