Yuxuan-Zhang-Dexter commited on
Commit
f589e51
·
1 Parent(s): 93c11f0

update ace attorney game in the gradio app

Browse files
app.py CHANGED
@@ -16,6 +16,7 @@ from leaderboard_utils import (
16
  get_candy_leaderboard,
17
  get_tetris_leaderboard,
18
  get_tetris_planning_leaderboard,
 
19
  get_combined_leaderboard,
20
  GAME_ORDER
21
  )
@@ -54,7 +55,8 @@ leaderboard_state = {
54
  "2048": True,
55
  "Candy Crash": True,
56
  "Tetris (complete)": True,
57
- "Tetris (planning only)": True
 
58
  },
59
  "previous_details": {
60
  "Super Mario Bros": False,
@@ -62,7 +64,8 @@ leaderboard_state = {
62
  "2048": False,
63
  "Candy Crash": False,
64
  "Tetris (complete)": False,
65
- "Tetris (planning only)": False
 
66
  }
67
  }
68
 
@@ -160,7 +163,8 @@ def update_leaderboard(mario_overall, mario_details,
160
  _2048_overall, _2048_details,
161
  candy_overall, candy_details,
162
  tetris_overall, tetris_details,
163
- tetris_plan_overall, tetris_plan_details):
 
164
  global leaderboard_state
165
 
166
  # Convert current checkbox states to dictionary for easier comparison
@@ -170,7 +174,8 @@ def update_leaderboard(mario_overall, mario_details,
170
  "2048": _2048_overall,
171
  "Candy Crash": candy_overall,
172
  "Tetris (complete)": tetris_overall,
173
- "Tetris (planning only)": tetris_plan_overall
 
174
  }
175
 
176
  current_details = {
@@ -179,7 +184,8 @@ def update_leaderboard(mario_overall, mario_details,
179
  "2048": _2048_details,
180
  "Candy Crash": candy_details,
181
  "Tetris (complete)": tetris_details,
182
- "Tetris (planning only)": tetris_plan_details
 
183
  }
184
 
185
  # Find which game's state changed
@@ -235,12 +241,11 @@ def update_leaderboard(mario_overall, mario_details,
235
  leaderboard_state["previous_details"][changed_game] = False
236
  if leaderboard_state["current_game"] == changed_game:
237
  leaderboard_state["current_game"] = None
238
- # When exiting details view, reset to show all games
239
- for game in current_overall.keys():
240
- current_overall[game] = True
241
- current_details[game] = False
242
- leaderboard_state["previous_overall"][game] = True
243
- leaderboard_state["previous_details"][game] = False
244
 
245
  # Special case: If all games are selected and we're trying to view details
246
  all_games_selected = all(current_overall.values()) and not any(current_details.values())
@@ -266,7 +271,8 @@ def update_leaderboard(mario_overall, mario_details,
266
  "2048": current_overall["2048"],
267
  "Candy Crash": current_overall["Candy Crash"],
268
  "Tetris (complete)": current_overall["Tetris (complete)"],
269
- "Tetris (planning only)": current_overall["Tetris (planning only)"]
 
270
  }
271
 
272
  # Get the appropriate DataFrame and charts based on current state
@@ -282,8 +288,10 @@ def update_leaderboard(mario_overall, mario_details,
282
  df = get_candy_leaderboard(rank_data)
283
  elif leaderboard_state["current_game"] == "Tetris (complete)":
284
  df = get_tetris_leaderboard(rank_data)
285
- else: # Tetris (planning only)
286
  df = get_tetris_planning_leaderboard(rank_data)
 
 
287
 
288
  # Format the DataFrame for display
289
  display_df = prepare_dataframe_for_display(df, leaderboard_state["current_game"])
@@ -303,21 +311,23 @@ def update_leaderboard(mario_overall, mario_details,
303
  chart = radar_chart
304
  group_bar_chart = radar_chart # Use radar chart instead of bar chart
305
 
306
- # Return exactly 16 values to match the expected outputs
307
  return (update_df_with_height(display_df), chart, radar_chart, radar_chart,
308
  current_overall["Super Mario Bros"], current_details["Super Mario Bros"],
309
  current_overall["Sokoban"], current_details["Sokoban"],
310
  current_overall["2048"], current_details["2048"],
311
  current_overall["Candy Crash"], current_details["Candy Crash"],
312
  current_overall["Tetris (complete)"], current_details["Tetris (complete)"],
313
- current_overall["Tetris (planning only)"], current_details["Tetris (planning only)"])
 
314
 
315
  def update_leaderboard_with_time(time_point, mario_overall, mario_details,
316
  sokoban_overall, sokoban_details,
317
  _2048_overall, _2048_details,
318
  candy_overall, candy_details,
319
  tetris_overall, tetris_details,
320
- tetris_plan_overall, tetris_plan_details):
 
321
  # Load rank data for the selected time point
322
  global rank_data
323
  new_rank_data = load_rank_data(time_point)
@@ -330,7 +340,8 @@ def update_leaderboard_with_time(time_point, mario_overall, mario_details,
330
  _2048_overall, _2048_details,
331
  candy_overall, candy_details,
332
  tetris_overall, tetris_details,
333
- tetris_plan_overall, tetris_plan_details)
 
334
 
335
  def get_initial_state():
336
  """Get the initial state for the leaderboard"""
@@ -342,7 +353,8 @@ def get_initial_state():
342
  "2048": True,
343
  "Candy Crash": True,
344
  "Tetris (complete)": True,
345
- "Tetris (planning only)": True
 
346
  },
347
  "previous_details": {
348
  "Super Mario Bros": False,
@@ -350,7 +362,8 @@ def get_initial_state():
350
  "2048": False,
351
  "Candy Crash": False,
352
  "Tetris (complete)": False,
353
- "Tetris (planning only)": False
 
354
  }
355
  }
356
 
@@ -364,7 +377,8 @@ def clear_filters():
364
  "2048": True,
365
  "Candy Crash": True,
366
  "Tetris (complete)": True,
367
- "Tetris (planning only)": True
 
368
  }
369
 
370
  # Get the combined leaderboard and group bar chart
@@ -386,7 +400,8 @@ def clear_filters():
386
  True, False, # 2048
387
  True, False, # candy
388
  True, False, # tetris
389
- True, False) # tetris plan
 
390
 
391
  def create_timeline_slider():
392
  """Create a custom timeline slider component"""
@@ -874,6 +889,10 @@ def build_app():
874
  gr.Markdown("**📋 Tetris (planning)**")
875
  tetris_plan_overall = gr.Checkbox(label="Tetris (planning) Score", value=True)
876
  tetris_plan_details = gr.Checkbox(label="Tetris (planning) Details", value=False)
 
 
 
 
877
 
878
  # Controls
879
  with gr.Row():
@@ -899,7 +918,8 @@ def build_app():
899
  "2048": True,
900
  "Candy Crash": True,
901
  "Tetris (complete)": True,
902
- "Tetris (planning only)": True
 
903
  })
904
 
905
  # Format the DataFrame for display
@@ -940,7 +960,8 @@ def build_app():
940
  _2048_overall, _2048_details,
941
  candy_overall, candy_details,
942
  tetris_overall, tetris_details,
943
- tetris_plan_overall, tetris_plan_details
 
944
  ]
945
 
946
  # Update visualizations when checkboxes change
@@ -948,7 +969,8 @@ def build_app():
948
  # Check if any details checkbox is selected
949
  is_details_view = any([
950
  checkbox_states[1], checkbox_states[3], checkbox_states[5],
951
- checkbox_states[7], checkbox_states[9], checkbox_states[11]
 
952
  ])
953
 
954
  # Update visibility of visualization blocks
 
16
  get_candy_leaderboard,
17
  get_tetris_leaderboard,
18
  get_tetris_planning_leaderboard,
19
+ get_ace_attorney_leaderboard,
20
  get_combined_leaderboard,
21
  GAME_ORDER
22
  )
 
55
  "2048": True,
56
  "Candy Crash": True,
57
  "Tetris (complete)": True,
58
+ "Tetris (planning only)": True,
59
+ "Ace Attorney": True
60
  },
61
  "previous_details": {
62
  "Super Mario Bros": False,
 
64
  "2048": False,
65
  "Candy Crash": False,
66
  "Tetris (complete)": False,
67
+ "Tetris (planning only)": False,
68
+ "Ace Attorney": False
69
  }
70
  }
71
 
 
163
  _2048_overall, _2048_details,
164
  candy_overall, candy_details,
165
  tetris_overall, tetris_details,
166
+ tetris_plan_overall, tetris_plan_details,
167
+ ace_attorney_overall, ace_attorney_details):
168
  global leaderboard_state
169
 
170
  # Convert current checkbox states to dictionary for easier comparison
 
174
  "2048": _2048_overall,
175
  "Candy Crash": candy_overall,
176
  "Tetris (complete)": tetris_overall,
177
+ "Tetris (planning only)": tetris_plan_overall,
178
+ "Ace Attorney": ace_attorney_overall
179
  }
180
 
181
  current_details = {
 
184
  "2048": _2048_details,
185
  "Candy Crash": candy_details,
186
  "Tetris (complete)": tetris_details,
187
+ "Tetris (planning only)": tetris_plan_details,
188
+ "Ace Attorney": ace_attorney_details
189
  }
190
 
191
  # Find which game's state changed
 
241
  leaderboard_state["previous_details"][changed_game] = False
242
  if leaderboard_state["current_game"] == changed_game:
243
  leaderboard_state["current_game"] = None
244
+ # When exiting details view, only reset the current game's state
245
+ current_overall[changed_game] = True
246
+ current_details[changed_game] = False
247
+ leaderboard_state["previous_overall"][changed_game] = True
248
+ leaderboard_state["previous_details"][changed_game] = False
 
249
 
250
  # Special case: If all games are selected and we're trying to view details
251
  all_games_selected = all(current_overall.values()) and not any(current_details.values())
 
271
  "2048": current_overall["2048"],
272
  "Candy Crash": current_overall["Candy Crash"],
273
  "Tetris (complete)": current_overall["Tetris (complete)"],
274
+ "Tetris (planning only)": current_overall["Tetris (planning only)"],
275
+ "Ace Attorney": current_overall["Ace Attorney"]
276
  }
277
 
278
  # Get the appropriate DataFrame and charts based on current state
 
288
  df = get_candy_leaderboard(rank_data)
289
  elif leaderboard_state["current_game"] == "Tetris (complete)":
290
  df = get_tetris_leaderboard(rank_data)
291
+ elif leaderboard_state["current_game"] == "Tetris (planning only)":
292
  df = get_tetris_planning_leaderboard(rank_data)
293
+ elif leaderboard_state["current_game"] == "Ace Attorney":
294
+ df = get_ace_attorney_leaderboard(rank_data)
295
 
296
  # Format the DataFrame for display
297
  display_df = prepare_dataframe_for_display(df, leaderboard_state["current_game"])
 
311
  chart = radar_chart
312
  group_bar_chart = radar_chart # Use radar chart instead of bar chart
313
 
314
+ # Return exactly 18 values to match the expected outputs
315
  return (update_df_with_height(display_df), chart, radar_chart, radar_chart,
316
  current_overall["Super Mario Bros"], current_details["Super Mario Bros"],
317
  current_overall["Sokoban"], current_details["Sokoban"],
318
  current_overall["2048"], current_details["2048"],
319
  current_overall["Candy Crash"], current_details["Candy Crash"],
320
  current_overall["Tetris (complete)"], current_details["Tetris (complete)"],
321
+ current_overall["Tetris (planning only)"], current_details["Tetris (planning only)"],
322
+ current_overall["Ace Attorney"], current_details["Ace Attorney"])
323
 
324
  def update_leaderboard_with_time(time_point, mario_overall, mario_details,
325
  sokoban_overall, sokoban_details,
326
  _2048_overall, _2048_details,
327
  candy_overall, candy_details,
328
  tetris_overall, tetris_details,
329
+ tetris_plan_overall, tetris_plan_details,
330
+ ace_attorney_overall, ace_attorney_details):
331
  # Load rank data for the selected time point
332
  global rank_data
333
  new_rank_data = load_rank_data(time_point)
 
340
  _2048_overall, _2048_details,
341
  candy_overall, candy_details,
342
  tetris_overall, tetris_details,
343
+ tetris_plan_overall, tetris_plan_details,
344
+ ace_attorney_overall, ace_attorney_details)
345
 
346
  def get_initial_state():
347
  """Get the initial state for the leaderboard"""
 
353
  "2048": True,
354
  "Candy Crash": True,
355
  "Tetris (complete)": True,
356
+ "Tetris (planning only)": True,
357
+ "Ace Attorney": True
358
  },
359
  "previous_details": {
360
  "Super Mario Bros": False,
 
362
  "2048": False,
363
  "Candy Crash": False,
364
  "Tetris (complete)": False,
365
+ "Tetris (planning only)": False,
366
+ "Ace Attorney": False
367
  }
368
  }
369
 
 
377
  "2048": True,
378
  "Candy Crash": True,
379
  "Tetris (complete)": True,
380
+ "Tetris (planning only)": True,
381
+ "Ace Attorney": True
382
  }
383
 
384
  # Get the combined leaderboard and group bar chart
 
400
  True, False, # 2048
401
  True, False, # candy
402
  True, False, # tetris
403
+ True, False, # tetris plan
404
+ True, False) # ace attorney
405
 
406
  def create_timeline_slider():
407
  """Create a custom timeline slider component"""
 
889
  gr.Markdown("**📋 Tetris (planning)**")
890
  tetris_plan_overall = gr.Checkbox(label="Tetris (planning) Score", value=True)
891
  tetris_plan_details = gr.Checkbox(label="Tetris (planning) Details", value=False)
892
+ with gr.Column():
893
+ gr.Markdown("**⚖️ Ace Attorney**")
894
+ ace_attorney_overall = gr.Checkbox(label="Ace Attorney Score", value=True)
895
+ ace_attorney_details = gr.Checkbox(label="Ace Attorney Details", value=False)
896
 
897
  # Controls
898
  with gr.Row():
 
918
  "2048": True,
919
  "Candy Crash": True,
920
  "Tetris (complete)": True,
921
+ "Tetris (planning only)": True,
922
+ "Ace Attorney": True
923
  })
924
 
925
  # Format the DataFrame for display
 
960
  _2048_overall, _2048_details,
961
  candy_overall, candy_details,
962
  tetris_overall, tetris_details,
963
+ tetris_plan_overall, tetris_plan_details,
964
+ ace_attorney_overall, ace_attorney_details
965
  ]
966
 
967
  # Update visualizations when checkboxes change
 
969
  # Check if any details checkbox is selected
970
  is_details_view = any([
971
  checkbox_states[1], checkbox_states[3], checkbox_states[5],
972
+ checkbox_states[7], checkbox_states[9], checkbox_states[11],
973
+ checkbox_states[13] # Ace Attorney details checkbox
974
  ])
975
 
976
  # Update visibility of visualization blocks
assets/game_video_link.json CHANGED
@@ -1,6 +1,7 @@
1
- {
2
  "sokoban": "https://www.youtube.com/watch?v=59enV32MBUE",
3
  "super_mario": "https://www.youtube.com/watch?v=nixMIJZYAgg",
4
  "2048": "https://www.youtube.com/watch?v=3aYDCSa3AWI",
5
- "candy": "https://www.youtube.com/watch?v=b-Uyz3W4yIg"
 
6
  }
 
1
+ {
2
  "sokoban": "https://www.youtube.com/watch?v=59enV32MBUE",
3
  "super_mario": "https://www.youtube.com/watch?v=nixMIJZYAgg",
4
  "2048": "https://www.youtube.com/watch?v=3aYDCSa3AWI",
5
+ "candy": "https://www.youtube.com/watch?v=b-Uyz3W4yIg",
6
+ "ace_attorney": "https://www.youtube.com/watch?v=q8PMW870yp8"
7
  }
assets/model_color.json CHANGED
@@ -1,17 +1,18 @@
1
  {
2
- "claude-3-7-sonnet-20250219": "#4A90E2",
3
  "claude-3-7-sonnet-20250219(thinking)": "#2E5C8A",
4
  "claude-3-5-haiku-20241022": "#7FB5E6",
5
- "claude-3-5-sonnet-20241022": "#1A4C7C",
6
  "gemini-2.0-flash": "#FF4081",
7
  "gemini-2.0-flash-thinking-exp-1219": "#C2185B",
8
- "gemini-2.5-pro-exp-03-25": "#FF80AB",
9
- "gpt-4o-2024-11-20": "#00BFA5",
10
- "gpt-4.5-preview-2025-02-27": "#00796B",
 
11
  "o1-2024-12-17": "#4DB6AC",
12
- "o1-mini-2024-09-12": "#26A69A",
13
  "o3-mini-2025-01-31(medium)": "#80CBC4",
14
  "deepseek-v3": "#FFC107",
15
- "deepseek-r1": "#FFA000",
16
  "Llama-4-Maverick-17B-128E-Instruct-FP8": "#8E24AA"
17
  }
 
1
  {
2
+ "claude-3-7-sonnet-20250219": "#4A90E2",
3
  "claude-3-7-sonnet-20250219(thinking)": "#2E5C8A",
4
  "claude-3-5-haiku-20241022": "#7FB5E6",
5
+ "claude-3-5-sonnet-20241022": "#1A4C7C",
6
  "gemini-2.0-flash": "#FF4081",
7
  "gemini-2.0-flash-thinking-exp-1219": "#C2185B",
8
+ "gemini-2.5-pro-exp-03-25": "#FF80AB",
9
+ "gpt-4o-2024-11-20": "#00BFA5",
10
+ "gpt-4.5-preview-2025-02-27": "#00796B",
11
+ "gpt-4.1-2025-04-14": "#00897B",
12
  "o1-2024-12-17": "#4DB6AC",
13
+ "o1-mini-2024-09-12": "#26A69A",
14
  "o3-mini-2025-01-31(medium)": "#80CBC4",
15
  "deepseek-v3": "#FFC107",
16
+ "deepseek-r1": "#FFA000",
17
  "Llama-4-Maverick-17B-128E-Instruct-FP8": "#8E24AA"
18
  }
assets/news.json CHANGED
@@ -1,5 +1,11 @@
1
  {
2
  "news": [
 
 
 
 
 
 
3
  {
4
  "date": "2025-04-08",
5
  "video_link": "https://www.youtube.com/watch?v=yoEo2Bk7PGA",
 
1
  {
2
  "news": [
3
+ {
4
+ "date": "2025-04-15",
5
+ "video_link": "https://www.youtube.com/watch?v=q8PMW870yp8",
6
+ "twitter_text": "Ace Attorney AI Revolution: O1 & Gemini 2.5 Pro lead in courtroom reasoning, while GPT-4.1 matches older models. Cost analysis reveals Gemini 2.5 Pro's 6-15x efficiency over O1.",
7
+ "twitter_link": "https://x.com/haoailab"
8
+ },
9
  {
10
  "date": "2025-04-08",
11
  "video_link": "https://www.youtube.com/watch?v=yoEo2Bk7PGA",
data_visualization.py CHANGED
@@ -24,7 +24,8 @@ GAME_SCORE_COLUMNS = {
24
  "2048": "Score",
25
  "Candy Crash": "Average Score",
26
  "Tetris (complete)": "Score",
27
- "Tetris (planning only)": "Score"
 
28
  }
29
  def get_model_prefix(name):
30
  return name.split('-')[0]
@@ -81,6 +82,9 @@ def create_horizontal_bar_chart(df, game_name):
81
  elif game_name in ["Tetris (complete)", "Tetris (planning only)"]:
82
  score_col = "Score"
83
  df_sorted = df.sort_values(by=score_col, ascending=True)
 
 
 
84
  else:
85
  return None
86
 
@@ -315,7 +319,7 @@ def hex_to_rgba(hex_color, alpha=0.2):
315
 
316
  def create_single_radar_chart(df, selected_games=None, highlight_models=None):
317
  if selected_games is None:
318
- selected_games = ['Super Mario Bros', '2048', 'Candy Crash', 'Sokoban']
319
 
320
  # Format game names
321
  formatted_games = []
 
24
  "2048": "Score",
25
  "Candy Crash": "Average Score",
26
  "Tetris (complete)": "Score",
27
+ "Tetris (planning only)": "Score",
28
+ "Ace Attorney": "Score"
29
  }
30
  def get_model_prefix(name):
31
  return name.split('-')[0]
 
82
  elif game_name in ["Tetris (complete)", "Tetris (planning only)"]:
83
  score_col = "Score"
84
  df_sorted = df.sort_values(by=score_col, ascending=True)
85
+ elif game_name == "Ace Attorney":
86
+ score_col = "Score"
87
+ df_sorted = df.sort_values(by=score_col, ascending=True)
88
  else:
89
  return None
90
 
 
319
 
320
  def create_single_radar_chart(df, selected_games=None, highlight_models=None):
321
  if selected_games is None:
322
+ selected_games = ['Super Mario Bros', '2048', 'Candy Crash', 'Sokoban', 'Ace Attorney']
323
 
324
  # Format game names
325
  formatted_games = []
leaderboard_utils.py CHANGED
@@ -9,7 +9,8 @@ GAME_ORDER = [
9
  "2048",
10
  "Candy Crash",
11
  "Tetris (complete)",
12
- "Tetris (planning only)"
 
13
  ]
14
 
15
  def get_organization(model_name):
@@ -102,6 +103,21 @@ def get_tetris_planning_leaderboard(rank_data):
102
  df = df[["Player", "Organization", "Score", "Steps"]]
103
  return df
104
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
105
  def calculate_rank_and_completeness(rank_data, selected_games):
106
  # Dictionary to store DataFrames for each game
107
  game_dfs = {}
@@ -119,6 +135,8 @@ def calculate_rank_and_completeness(rank_data, selected_games):
119
  game_dfs["Tetris (complete)"] = get_tetris_leaderboard(rank_data)
120
  if selected_games.get("Tetris (planning only)"):
121
  game_dfs["Tetris (planning only)"] = get_tetris_planning_leaderboard(rank_data)
 
 
122
 
123
  # Get all unique players
124
  all_players = set()
@@ -165,10 +183,10 @@ def calculate_rank_and_completeness(rank_data, selected_games):
165
  elif game == "Candy Crash":
166
  player_score = df[df["Player"] == player]["Average Score"].iloc[0]
167
  rank = len(df[df["Average Score"] > player_score]) + 1
168
- elif game == "Tetris (complete)":
169
  player_score = df[df["Player"] == player]["Score"].iloc[0]
170
  rank = len(df[df["Score"] > player_score]) + 1
171
- elif game == "Tetris (planning only)":
172
  player_score = df[df["Player"] == player]["Score"].iloc[0]
173
  rank = len(df[df["Score"] > player_score]) + 1
174
 
@@ -227,6 +245,8 @@ def get_combined_leaderboard(rank_data, selected_games):
227
  game_dfs["Tetris (complete)"] = get_tetris_leaderboard(rank_data)
228
  if selected_games.get("Tetris (planning only)"):
229
  game_dfs["Tetris (planning only)"] = get_tetris_planning_leaderboard(rank_data)
 
 
230
 
231
  # Get all unique players
232
  all_players = set()
@@ -263,6 +283,8 @@ def get_combined_leaderboard(rank_data, selected_games):
263
  player_data[f"{game} Score"] = df[df["Player"] == player]["Average Score"].iloc[0]
264
  elif game in ["Tetris (complete)", "Tetris (planning only)"]:
265
  player_data[f"{game} Score"] = df[df["Player"] == player]["Score"].iloc[0]
 
 
266
  else:
267
  player_data[f"{game} Score"] = 'n/a'
268
 
 
9
  "2048",
10
  "Candy Crash",
11
  "Tetris (complete)",
12
+ "Tetris (planning only)",
13
+ "Ace Attorney"
14
  ]
15
 
16
  def get_organization(model_name):
 
103
  df = df[["Player", "Organization", "Score", "Steps"]]
104
  return df
105
 
106
+ def get_ace_attorney_leaderboard(rank_data):
107
+ data = rank_data.get("Ace Attorney", {}).get("results", [])
108
+ df = pd.DataFrame(data)
109
+ df = df.rename(columns={
110
+ "model": "Player",
111
+ "levels_cracked": "Levels Cracked",
112
+ "lives_left": "Lives Left",
113
+ "cracked_details": "Progress",
114
+ "score": "Score",
115
+ "note": "Notes"
116
+ })
117
+ df["Organization"] = df["Player"].apply(get_organization)
118
+ df = df[["Player", "Organization", "Levels Cracked", "Lives Left", "Progress", "Score", "Notes"]]
119
+ return df
120
+
121
  def calculate_rank_and_completeness(rank_data, selected_games):
122
  # Dictionary to store DataFrames for each game
123
  game_dfs = {}
 
135
  game_dfs["Tetris (complete)"] = get_tetris_leaderboard(rank_data)
136
  if selected_games.get("Tetris (planning only)"):
137
  game_dfs["Tetris (planning only)"] = get_tetris_planning_leaderboard(rank_data)
138
+ if selected_games.get("Ace Attorney"):
139
+ game_dfs["Ace Attorney"] = get_ace_attorney_leaderboard(rank_data)
140
 
141
  # Get all unique players
142
  all_players = set()
 
183
  elif game == "Candy Crash":
184
  player_score = df[df["Player"] == player]["Average Score"].iloc[0]
185
  rank = len(df[df["Average Score"] > player_score]) + 1
186
+ elif game in ["Tetris (complete)", "Tetris (planning only)"]:
187
  player_score = df[df["Player"] == player]["Score"].iloc[0]
188
  rank = len(df[df["Score"] > player_score]) + 1
189
+ elif game == "Ace Attorney":
190
  player_score = df[df["Player"] == player]["Score"].iloc[0]
191
  rank = len(df[df["Score"] > player_score]) + 1
192
 
 
245
  game_dfs["Tetris (complete)"] = get_tetris_leaderboard(rank_data)
246
  if selected_games.get("Tetris (planning only)"):
247
  game_dfs["Tetris (planning only)"] = get_tetris_planning_leaderboard(rank_data)
248
+ if selected_games.get("Ace Attorney"):
249
+ game_dfs["Ace Attorney"] = get_ace_attorney_leaderboard(rank_data)
250
 
251
  # Get all unique players
252
  all_players = set()
 
283
  player_data[f"{game} Score"] = df[df["Player"] == player]["Average Score"].iloc[0]
284
  elif game in ["Tetris (complete)", "Tetris (planning only)"]:
285
  player_data[f"{game} Score"] = df[df["Player"] == player]["Score"].iloc[0]
286
+ elif game == "Ace Attorney":
287
+ player_data[f"{game} Score"] = df[df["Player"] == player]["Score"].iloc[0]
288
  else:
289
  player_data[f"{game} Score"] = 'n/a'
290
 
rank_data_03_25_2025.json CHANGED
@@ -236,7 +236,7 @@
236
  "score_runs": "0;0;0",
237
  "average_score": 0,
238
  "steps": 25,
239
- "rank":9
240
  },
241
  {
242
  "model": "Llama-4-Maverick-17B-128E-Instruct-FP8",
@@ -320,5 +320,82 @@
320
  "rank": 11
321
  }
322
  ]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
323
  }
324
  }
 
236
  "score_runs": "0;0;0",
237
  "average_score": 0,
238
  "steps": 25,
239
+ "rank": 9
240
  },
241
  {
242
  "model": "Llama-4-Maverick-17B-128E-Instruct-FP8",
 
320
  "rank": 11
321
  }
322
  ]
323
+ },
324
+ "Ace Attorney": {
325
+ "runs": 2,
326
+ "results": [
327
+ {
328
+ "model": "o1-2024-12-17",
329
+ "levels_cracked": "3; 3",
330
+ "lives_left": "[5, 3, 3, 0],[4, 5, 3, 0]",
331
+ "cracked_details": "4: 7/8",
332
+ "rank": 1,
333
+ "score": 26,
334
+ "note": "stuck at the end not present evidence"
335
+ },
336
+ {
337
+ "model": "gemini-2.5-pro-exp-03-25",
338
+ "levels_cracked": "2; 3",
339
+ "lives_left": "[5,5,0]; [5, 5, 4, 0]",
340
+ "cracked_details": "4: 0/8",
341
+ "rank": 2,
342
+ "score": 20,
343
+ "note": "failed to present evidence"
344
+ },
345
+ {
346
+ "model": "claude-3-7-sonnet-20250219(thinking)",
347
+ "levels_cracked": "1; 1",
348
+ "lives_left": "[3,0]; [5,0]",
349
+ "cracked_details": "2: 3/9",
350
+ "rank": 3,
351
+ "score": 8,
352
+ "note": "failed to present evidence"
353
+ },
354
+ {
355
+ "model": "claude-3-5-sonnet-20241022",
356
+ "levels_cracked": "1",
357
+ "lives_left": "5, 5",
358
+ "cracked_details": "1:1/8",
359
+ "rank": 4,
360
+ "score": 6,
361
+ "note": "stuck in loop"
362
+ },
363
+ {
364
+ "model": "gpt-4.1-2025-04-14",
365
+ "levels_cracked": "1",
366
+ "lives_left": "[4,5]",
367
+ "cracked_details": "1: 1/8",
368
+ "rank": 5,
369
+ "score": 6,
370
+ "note": "stuck in loop"
371
+ },
372
+ {
373
+ "model": "gemini-2.0-flash-thinking-exp-1219",
374
+ "levels_cracked": "0",
375
+ "lives_left": "0",
376
+ "cracked_details": "1: 4/5",
377
+ "rank": 6,
378
+ "score": 4,
379
+ "note": "stuck in the last option section"
380
+ },
381
+ {
382
+ "model": "deepseek-r1",
383
+ "levels_cracked": "0",
384
+ "lives_left": "0",
385
+ "cracked_details": "1: 4/5",
386
+ "rank": 7,
387
+ "score": 4,
388
+ "note": "stuck in the 3rd evidence present"
389
+ },
390
+ {
391
+ "model": "Llama-4-Maverick-17B-128E-Instruct-FP8",
392
+ "levels_cracked": "0",
393
+ "lives_left": "0",
394
+ "cracked_details": "0:0/5",
395
+ "rank": 8,
396
+ "score": 0,
397
+ "note": "failed to present evidence"
398
+ }
399
+ ]
400
  }
401
  }
requirements.txt CHANGED
@@ -1,4 +1,4 @@
1
- gradio
2
  pandas>=2.0.0
3
  matplotlib>=3.7.0
4
  seaborn>=0.12.0
 
1
+ gradio==5.23.3
2
  pandas>=2.0.0
3
  matplotlib>=3.7.0
4
  seaborn>=0.12.0