Spaces:
Restarting
Restarting
import json | |
import gradio as gr | |
from gradio_leaderboard import Leaderboard, ColumnFilter, SelectColumns | |
import pandas as pd | |
from apscheduler.schedulers.background import BackgroundScheduler | |
from huggingface_hub import snapshot_download | |
from datasets import load_dataset | |
from src.about import ( | |
CITATION_BUTTON_LABEL, | |
CITATION_BUTTON_TEXT, | |
EVALUATION_QUEUE_TEXT, | |
INTRODUCTION_TEXT, | |
TASK_TEXT, | |
SUBMIT_TEMPLATE, | |
LLM_BENCHMARKS_TEXT, | |
TITLE, | |
) | |
from src.display.css_html_js import custom_css | |
from src.display.utils import ( | |
BENCHMARK_COLS, | |
COLS, | |
EVAL_COLS, | |
EVAL_TYPES, | |
AutoEvalColumn, | |
ModelType, | |
fields, | |
WeightType, | |
Precision | |
) | |
from src.envs import API, EVAL_RESULTS_PATH, GOLDEN_REPO, REPO_ID, TOKEN | |
from src.populate import get_evaluation_queue_df, get_leaderboard_df | |
from src.submission.submit import add_new_eval | |
from src.evaluation import evaluate | |
import pdb | |
def restart_space(): | |
API.restart_space(repo_id=REPO_ID) | |
### Space initialisation | |
# try: | |
# print(EVAL_REQUESTS_PATH) | |
# snapshot_download( | |
# repo_id=QUEUE_REPO, local_dir=EVAL_REQUESTS_PATH, repo_type="dataset", tqdm_class=None, etag_timeout=30, token=TOKEN | |
# ) | |
# except Exception: | |
# restart_space() | |
# try: | |
# print(EVAL_RESULTS_PATH) | |
# snapshot_download( | |
# repo_id=RESULTS_REPO, local_dir=EVAL_RESULTS_PATH, repo_type="dataset", tqdm_class=None, etag_timeout=30, token=TOKEN | |
# ) | |
# except Exception: | |
# restart_space() | |
try: | |
golden = load_dataset(GOLDEN_REPO, token=TOKEN) | |
print(golden) | |
except Exception: | |
restart_space() | |
task = ['Overall', 'Crossword', 'Acrostic', 'Logic_Puzzle', 'Cryptogram', 'Sudoku', 'Drop_Quote'] | |
leaderboard_dict = {} | |
for t in task: | |
leaderboard_dict[t] = get_leaderboard_df(EVAL_RESULTS_PATH, COLS, task=t) | |
def init_leaderboard(dataframe): | |
if dataframe is None or dataframe.empty: | |
raise ValueError("Leaderboard DataFrame is empty or None.") | |
# pdb.set_trace() | |
def highlight_max_bold(s): | |
return ['font-weight: bold' if v == s.max() and v != s.min() else '' for v in s] | |
num_cols = dataframe.select_dtypes(include=['float']).columns | |
styler = dataframe.style.format({col: "{:.1f}" for col in num_cols}) | |
styler = styler.apply(highlight_max_bold, subset=num_cols) | |
return gr.components.Dataframe( | |
value=styler, | |
headers=[c.name for c in fields(AutoEvalColumn)], | |
datatype=[c.type for c in fields(AutoEvalColumn)], | |
row_count=10, | |
interactive=False, | |
column_widths=[180, 60, 80, 80, 80, 80, 60], | |
) | |
def eval_json(file): | |
try: | |
with open(file.name, 'r', encoding='utf-8') as f: | |
data = json.load(f) | |
tasks = ["crossword", "acrostic", "logic", "cryptogram", "sudoku", "drop"] | |
eval_dict = {} | |
for task in tasks: | |
data_list = data["results"][task] | |
golden_list = golden[task] | |
result = evaluate(data_list, golden_list, task) | |
eval_dict[task] = result | |
return json.dumps(eval_dict, indent=4) | |
except Exception as e: | |
return str(e) | |
demo = gr.Blocks(css=custom_css) | |
with demo: | |
gr.HTML(TITLE) | |
gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text") | |
with gr.Tabs(elem_id="main-tabs", elem_classes="tab-buttons") as tabs: | |
with gr.TabItem("🏅 LLM Benchmark", elem_id="llm-benchmark-tab-table", id=0): | |
# leaderboard = init_leaderboard(LEADERBOARD_DF) | |
with gr.Tabs(): | |
for i, t in enumerate(task): | |
with gr.TabItem(t.replace("_", " "), elem_id=f"llm-benchmark-tab-table-{t}", id=i): | |
if TASK_TEXT.get(t, None): | |
gr.Markdown(TASK_TEXT[t], elem_classes="markdown-text") | |
leaderboard = init_leaderboard(leaderboard_dict[t]) | |
# with gr.TabItem("📝 About", elem_id="llm-benchmark-tab-table", id=2): | |
# gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text") | |
with gr.TabItem("🚀 Submit here! ", elem_id="llm-benchmark-tab-table", id=3): | |
with gr.Row(): | |
gr.Markdown("# ✉️✨ Submit your results here!", elem_classes="markdown-text") | |
gr.Markdown("## Submission Template", elem_classes="markdown-text") | |
gr.Markdown("See [submission_template.json](https://github.com/Ultramarine-spec/LR2Bench/blob/main/submission_template.json) for detail. The following is an example for the JSON structure.", elem_classes="markdown-text") | |
gr.Markdown(SUBMIT_TEMPLATE, elem_classes="markdown-text", height=250) | |
file_input = gr.File(label="Upload JSON File", file_types=[".json"], height=150) | |
json_output = gr.JSON(label="Your Model Performance") # 输出 JSON 数据 | |
submit_button = gr.Button("Submit") | |
submit_button.click(fn=eval_json, inputs=file_input, outputs=json_output) | |
with gr.Row(): | |
# gr.Markdown() | |
citation_button = gr.Textbox( | |
value=CITATION_BUTTON_TEXT, | |
label=CITATION_BUTTON_LABEL, | |
elem_id="citation-button", | |
show_copy_button=True, | |
) | |
scheduler = BackgroundScheduler() | |
scheduler.add_job(restart_space, "interval", seconds=1800) | |
scheduler.start() | |
demo.queue(default_concurrency_limit=40).launch() |