Spaces:

Aiera
/

aiera-finance-leaderboard

Running

App Files Files Community

jrg-123 commited on Jul 12, 2024

Commit

c02e9eb

1 Parent(s): efc4686

Check in all

Browse files

Files changed (4) hide show

app.py +3 -2
src/about.py +13 -5
src/envs.py +4 -5
src/leaderboard/read_evals.py +4 -5

app.py CHANGED Viewed

@@ -57,6 +57,7 @@ LEADERBOARD_DF = get_leaderboard_df(EVAL_RESULTS_PATH, EVAL_REQUESTS_PATH, COLS,
     pending_eval_queue_df,
 ) = get_evaluation_queue_df(EVAL_REQUESTS_PATH, EVAL_COLS)
 def init_leaderboard(dataframe):
     if dataframe is None or dataframe.empty:
         raise ValueError("Leaderboard DataFrame is empty or None.")
@@ -76,12 +77,12 @@ def init_leaderboard(dataframe):
             ColumnFilter(
                 AutoEvalColumn.params.name,
                 type="slider",
-                min=0.01,
                 max=150,
                 label="Select the number of parameters (B)",
             ),
             ColumnFilter(
-                AutoEvalColumn.still_on_hub.name, type="boolean", label="Deleted/incomplete", default=True
             ),
         ],
         bool_checkboxgroup_label="Hide models",

     pending_eval_queue_df,
 ) = get_evaluation_queue_df(EVAL_REQUESTS_PATH, EVAL_COLS)
 def init_leaderboard(dataframe):
     if dataframe is None or dataframe.empty:
         raise ValueError("Leaderboard DataFrame is empty or None.")
             ColumnFilter(
                 AutoEvalColumn.params.name,
                 type="slider",
+                min=0.00,
                 max=150,
                 label="Select the number of parameters (B)",
             ),
             ColumnFilter(
+                AutoEvalColumn.still_on_hub.name, type="boolean", label="Deleted/incomplete", default=False
             ),
         ],
         bool_checkboxgroup_label="Hide models",

src/about.py CHANGED Viewed

@@ -12,26 +12,34 @@ class Task:
 # ---------------------------------------------------
 class Tasks(Enum):
     # task_key in the json file, metric_key in the json file, name to display in the leaderboard
-    task0 = Task("anli_r1", "acc", "ANLI")
-    task1 = Task("logiqa", "acc_norm", "LogiQA")
 NUM_FEWSHOT = 0 # Change with your few shot
 # ---------------------------------------------------
 # Your leaderboard name
-TITLE = """<h1 align="center" id="space-title">Demo leaderboard</h1>"""
 # What does your leaderboard evaluate?
 INTRODUCTION_TEXT = """
-Intro text
 """
 # Which evaluations are you running? how can people reproduce what you have?
 LLM_BENCHMARKS_TEXT = f"""
 ## How it works
 ## Reproducibility
 To reproduce our results, here is the commands you can run:

 # ---------------------------------------------------
 class Tasks(Enum):
     # task_key in the json file, metric_key in the json file, name to display in the leaderboard
+    task0 = Task("aiera_speaker_assign", "accuracy,none", "SPEAKER_ID")
+    task1 = Task("aiera_transcript_sentiment", "accuracy,none","SENT")
+    task2 = Task("bbh_zeroshot_causal_judgement", "exact_match,flexible-extract","BBH0")
+    task3 = Task("flare_ectsum", "recall,none","ECTSUM")
+    task4 = Task("flare_edtsum", "rougeLsum,none","EDTSUM")
+    task5 = Task("flare_finqa", "exact_match_manual,none","FINQA")
+    task6 = Task("flare_fiqasa", "accuracy,none","FIQASA")
+    task7 = Task("flare_ner", "accuracy,none","NER")
 NUM_FEWSHOT = 0 # Change with your few shot
 # ---------------------------------------------------
 # Your leaderboard name
+TITLE = """<h1 align="center" id="space-title">Aiera Leaderboard</h1>"""
 # What does your leaderboard evaluate?
 INTRODUCTION_TEXT = """
+The Aiera Financial Leaderboard evaluates the performance of LLMs on a variety of tasks tailored to financial services.
 """
 # Which evaluations are you running? how can people reproduce what you have?
 LLM_BENCHMARKS_TEXT = f"""
 ## How it works
+Proof something happenind
 ## Reproducibility
 To reproduce our results, here is the commands you can run:

src/envs.py CHANGED Viewed

@@ -5,13 +5,12 @@ from huggingface_hub import HfApi
 # Info to change for your repository
 # ----------------------------------
 TOKEN = os.environ.get("HF_TOKEN") # A read/write token for your org
-OWNER = "demo-leaderboard-backend" # Change to your org - don't forget to create a results and request dataset, with the correct format!
 # ----------------------------------
-REPO_ID = f"{OWNER}/leaderboard"
-QUEUE_REPO = f"{OWNER}/requests"
-RESULTS_REPO = f"{OWNER}/results"
 # If you setup a cache later, just change HF_HOME
 CACHE_PATH=os.getenv("HF_HOME", ".")

 # Info to change for your repository
 # ----------------------------------
 TOKEN = os.environ.get("HF_TOKEN") # A read/write token for your org
+OWNER = "Aiera" # Change to your org - don't forget to create a results and request dataset, with the correct format!
 # ----------------------------------
+REPO_ID = f"{OWNER}/aiera-finance-leaderboard"
+QUEUE_REPO = f"{OWNER}/aiera-leaderboard-queue"
+RESULTS_REPO = f"{OWNER}/aiera-leaderboard-results"
 # If you setup a cache later, just change HF_HOME
 CACHE_PATH=os.getenv("HF_HOME", ".")

src/leaderboard/read_evals.py CHANGED Viewed

@@ -93,7 +93,7 @@ class EvalResult:
     def update_with_request_file(self, requests_path):
         """Finds the relevant request file for the current model and updates info with it"""
-        request_file = get_request_file_for_model(requests_path, self.full_model, self.precision.value.name)
         try:
             with open(request_file, "r") as f:
@@ -105,7 +105,7 @@ class EvalResult:
             self.num_params = request.get("params", 0)
             self.date = request.get("submitted_time", "")
         except Exception:
-            print(f"Could not find request file for {self.org}/{self.model} with precision {self.precision.value.name}")
     def to_dict(self):
         """Converts the Eval Result to a dict compatible with our dataframe display"""
@@ -132,11 +132,11 @@ class EvalResult:
         return data_dict
-def get_request_file_for_model(requests_path, model_name, precision):
     """Selects the correct request file for a given model. Only keeps runs tagged as FINISHED"""
     request_files = os.path.join(
         requests_path,
-        f"{model_name}_eval_request_*.json",
     )
     request_files = glob.glob(request_files)
@@ -148,7 +148,6 @@ def get_request_file_for_model(requests_path, model_name, precision):
             req_content = json.load(f)
             if (
                 req_content["status"] in ["FINISHED"]
-                and req_content["precision"] == precision.split(".")[-1]
             ):
                 request_file = tmp_request_file
     return request_file

     def update_with_request_file(self, requests_path):
         """Finds the relevant request file for the current model and updates info with it"""
+        request_file = get_request_file_for_model(requests_path, self.full_model)
         try:
             with open(request_file, "r") as f:
             self.num_params = request.get("params", 0)
             self.date = request.get("submitted_time", "")
         except Exception:
+            print(f"Could not find request file for {self.org}/{self.model}")
     def to_dict(self):
         """Converts the Eval Result to a dict compatible with our dataframe display"""
         return data_dict
+def get_request_file_for_model(requests_path, model_name):
     """Selects the correct request file for a given model. Only keeps runs tagged as FINISHED"""
     request_files = os.path.join(
         requests_path,
+        f"{model_name}*.json",
     )
     request_files = glob.glob(request_files)
             req_content = json.load(f)
             if (
                 req_content["status"] in ["FINISHED"]
             ):
                 request_file = tmp_request_file
     return request_file