{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "100\n",
      "200\n",
      "300\n",
      "400\n",
      "500\n",
      "600\n",
      "700\n",
      "800\n",
      "900\n",
      "1000\n",
      "1100\n",
      "1200\n",
      "1300\n",
      "1400\n",
      "1500\n",
      "1600\n",
      "1700\n",
      "1800\n",
      "full_model_names\n",
      "1889\n",
      "organization_names\n",
      "12\n",
      "['Parameters', 'drop|3', 'gsm8k', 'MMLU_average', 'winogrande', 'all', 'arc:challenge|25', 'hellaswag|10', 'MMLU_abstract_algebra', 'MMLU_anatomy', 'MMLU_astronomy', 'MMLU_business_ethics', 'MMLU_clinical_knowledge', 'MMLU_college_biology', 'MMLU_college_chemistry', 'MMLU_college_computer_science', 'MMLU_college_mathematics', 'MMLU_college_medicine', 'MMLU_college_physics', 'MMLU_computer_security', 'MMLU_conceptual_physics', 'MMLU_econometrics', 'MMLU_electrical_engineering', 'MMLU_elementary_mathematics', 'MMLU_formal_logic', 'MMLU_global_facts', 'MMLU_high_school_biology', 'MMLU_high_school_chemistry', 'MMLU_high_school_computer_science', 'MMLU_high_school_european_history', 'MMLU_high_school_geography', 'MMLU_high_school_government_and_politics', 'MMLU_high_school_macroeconomics', 'MMLU_high_school_mathematics', 'MMLU_high_school_microeconomics', 'MMLU_high_school_physics', 'MMLU_high_school_psychology', 'MMLU_high_school_statistics', 'MMLU_high_school_us_history', 'MMLU_high_school_world_history', 'MMLU_human_aging', 'MMLU_human_sexuality', 'MMLU_international_law', 'MMLU_jurisprudence', 'MMLU_logical_fallacies', 'MMLU_machine_learning', 'MMLU_management', 'MMLU_marketing', 'MMLU_medical_genetics', 'MMLU_miscellaneous', 'MMLU_moral_disputes', 'MMLU_moral_scenarios', 'MMLU_nutrition', 'MMLU_philosophy', 'MMLU_prehistory', 'MMLU_professional_accounting', 'MMLU_professional_law', 'MMLU_professional_medicine', 'MMLU_professional_psychology', 'MMLU_public_relations', 'MMLU_security_studies', 'MMLU_sociology', 'MMLU_us_foreign_policy', 'MMLU_virology', 'MMLU_world_religions', 'truthfulqa:mc|0', 'full_model_name']\n"
     ]
    }
   ],
   "source": [
    "from result_data_processor import ResultDataProcessor\n",
    "result = ResultDataProcessor()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>URL</th>\n",
       "      <th>full_model_name</th>\n",
       "      <th>Parameters</th>\n",
       "      <th>MMLU_average</th>\n",
       "      <th>arc:challenge|25</th>\n",
       "      <th>hellaswag|10</th>\n",
       "      <th>MMLU_abstract_algebra</th>\n",
       "      <th>MMLU_anatomy</th>\n",
       "      <th>MMLU_astronomy</th>\n",
       "      <th>MMLU_business_ethics</th>\n",
       "      <th>...</th>\n",
       "      <th>MMLU_professional_accounting</th>\n",
       "      <th>MMLU_professional_law</th>\n",
       "      <th>MMLU_professional_medicine</th>\n",
       "      <th>MMLU_professional_psychology</th>\n",
       "      <th>MMLU_public_relations</th>\n",
       "      <th>MMLU_security_studies</th>\n",
       "      <th>MMLU_sociology</th>\n",
       "      <th>MMLU_us_foreign_policy</th>\n",
       "      <th>MMLU_virology</th>\n",
       "      <th>MMLU_world_religions</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>SparseOPT-1.3B</th>\n",
       "      <td>https://huggingface.co/shaohang/SparseOPT-1.3B</td>\n",
       "      <td>shaohang/SparseOPT-1.3B</td>\n",
       "      <td>1.3</td>\n",
       "      <td>0.255963</td>\n",
       "      <td>0.240614</td>\n",
       "      <td>0.383689</td>\n",
       "      <td>0.22</td>\n",
       "      <td>0.214815</td>\n",
       "      <td>0.157895</td>\n",
       "      <td>0.20</td>\n",
       "      <td>...</td>\n",
       "      <td>0.262411</td>\n",
       "      <td>0.238592</td>\n",
       "      <td>0.448529</td>\n",
       "      <td>0.254902</td>\n",
       "      <td>0.236364</td>\n",
       "      <td>0.171429</td>\n",
       "      <td>0.228856</td>\n",
       "      <td>0.27</td>\n",
       "      <td>0.283133</td>\n",
       "      <td>0.216374</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Athena-v1</th>\n",
       "      <td>https://huggingface.co/IkariDev/Athena-v1</td>\n",
       "      <td>IkariDev/Athena-v1</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0.556052</td>\n",
       "      <td>0.560580</td>\n",
       "      <td>0.631548</td>\n",
       "      <td>0.31</td>\n",
       "      <td>0.496296</td>\n",
       "      <td>0.526316</td>\n",
       "      <td>0.58</td>\n",
       "      <td>...</td>\n",
       "      <td>0.404255</td>\n",
       "      <td>0.392438</td>\n",
       "      <td>0.525735</td>\n",
       "      <td>0.540850</td>\n",
       "      <td>0.645455</td>\n",
       "      <td>0.640816</td>\n",
       "      <td>0.751244</td>\n",
       "      <td>0.83</td>\n",
       "      <td>0.493976</td>\n",
       "      <td>0.725146</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Athena-tmp</th>\n",
       "      <td>https://huggingface.co/IkariDev/Athena-tmp</td>\n",
       "      <td>IkariDev/Athena-tmp</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0.588685</td>\n",
       "      <td>0.567406</td>\n",
       "      <td>0.621888</td>\n",
       "      <td>0.29</td>\n",
       "      <td>0.518519</td>\n",
       "      <td>0.638158</td>\n",
       "      <td>0.62</td>\n",
       "      <td>...</td>\n",
       "      <td>0.450355</td>\n",
       "      <td>0.462842</td>\n",
       "      <td>0.569853</td>\n",
       "      <td>0.588235</td>\n",
       "      <td>0.645455</td>\n",
       "      <td>0.653061</td>\n",
       "      <td>0.721393</td>\n",
       "      <td>0.81</td>\n",
       "      <td>0.463855</td>\n",
       "      <td>0.801170</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>13B-Legerdemain-L2</th>\n",
       "      <td>https://huggingface.co/CalderaAI/13B-Legerdema...</td>\n",
       "      <td>CalderaAI/13B-Legerdemain-L2</td>\n",
       "      <td>13.0</td>\n",
       "      <td>0.560030</td>\n",
       "      <td>0.573379</td>\n",
       "      <td>0.635431</td>\n",
       "      <td>0.36</td>\n",
       "      <td>0.525926</td>\n",
       "      <td>0.572368</td>\n",
       "      <td>0.53</td>\n",
       "      <td>...</td>\n",
       "      <td>0.429078</td>\n",
       "      <td>0.424381</td>\n",
       "      <td>0.522059</td>\n",
       "      <td>0.532680</td>\n",
       "      <td>0.609091</td>\n",
       "      <td>0.636735</td>\n",
       "      <td>0.766169</td>\n",
       "      <td>0.87</td>\n",
       "      <td>0.427711</td>\n",
       "      <td>0.777778</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>13B-Ouroboros</th>\n",
       "      <td>https://huggingface.co/CalderaAI/13B-Ouroboros</td>\n",
       "      <td>CalderaAI/13B-Ouroboros</td>\n",
       "      <td>13.0</td>\n",
       "      <td>0.514311</td>\n",
       "      <td>0.560580</td>\n",
       "      <td>0.624378</td>\n",
       "      <td>0.31</td>\n",
       "      <td>0.466667</td>\n",
       "      <td>0.506579</td>\n",
       "      <td>0.52</td>\n",
       "      <td>...</td>\n",
       "      <td>0.365248</td>\n",
       "      <td>0.405476</td>\n",
       "      <td>0.481618</td>\n",
       "      <td>0.524510</td>\n",
       "      <td>0.609091</td>\n",
       "      <td>0.538776</td>\n",
       "      <td>0.691542</td>\n",
       "      <td>0.83</td>\n",
       "      <td>0.457831</td>\n",
       "      <td>0.760234</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Robin-v2</th>\n",
       "      <td>https://huggingface.co/HanningZhang/Robin-v2</td>\n",
       "      <td>HanningZhang/Robin-v2</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0.392680</td>\n",
       "      <td>0.435154</td>\n",
       "      <td>0.545310</td>\n",
       "      <td>0.32</td>\n",
       "      <td>0.437037</td>\n",
       "      <td>0.335526</td>\n",
       "      <td>0.46</td>\n",
       "      <td>...</td>\n",
       "      <td>0.290780</td>\n",
       "      <td>0.302477</td>\n",
       "      <td>0.382353</td>\n",
       "      <td>0.374183</td>\n",
       "      <td>0.445455</td>\n",
       "      <td>0.326531</td>\n",
       "      <td>0.457711</td>\n",
       "      <td>0.59</td>\n",
       "      <td>0.379518</td>\n",
       "      <td>0.590643</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>CodeUp-Llama-2-13b-chat-hf</th>\n",
       "      <td>https://huggingface.co/deepse/CodeUp-Llama-2-1...</td>\n",
       "      <td>deepse/CodeUp-Llama-2-13b-chat-hf</td>\n",
       "      <td>13.0</td>\n",
       "      <td>0.546262</td>\n",
       "      <td>0.558020</td>\n",
       "      <td>0.629257</td>\n",
       "      <td>0.31</td>\n",
       "      <td>0.474074</td>\n",
       "      <td>0.546053</td>\n",
       "      <td>0.53</td>\n",
       "      <td>...</td>\n",
       "      <td>0.390071</td>\n",
       "      <td>0.391786</td>\n",
       "      <td>0.500000</td>\n",
       "      <td>0.544118</td>\n",
       "      <td>0.663636</td>\n",
       "      <td>0.636735</td>\n",
       "      <td>0.751244</td>\n",
       "      <td>0.81</td>\n",
       "      <td>0.481928</td>\n",
       "      <td>0.730994</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Hermes-Platypus2-mini-7B</th>\n",
       "      <td>https://huggingface.co/edor/Hermes-Platypus2-m...</td>\n",
       "      <td>edor/Hermes-Platypus2-mini-7B</td>\n",
       "      <td>7.0</td>\n",
       "      <td>0.470828</td>\n",
       "      <td>0.523038</td>\n",
       "      <td>0.601573</td>\n",
       "      <td>0.33</td>\n",
       "      <td>0.488889</td>\n",
       "      <td>0.421053</td>\n",
       "      <td>0.48</td>\n",
       "      <td>...</td>\n",
       "      <td>0.390071</td>\n",
       "      <td>0.353977</td>\n",
       "      <td>0.470588</td>\n",
       "      <td>0.446078</td>\n",
       "      <td>0.518182</td>\n",
       "      <td>0.563265</td>\n",
       "      <td>0.621891</td>\n",
       "      <td>0.68</td>\n",
       "      <td>0.421687</td>\n",
       "      <td>0.637427</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Stable-Platypus2-mini-7B</th>\n",
       "      <td>https://huggingface.co/edor/Stable-Platypus2-m...</td>\n",
       "      <td>edor/Stable-Platypus2-mini-7B</td>\n",
       "      <td>7.0</td>\n",
       "      <td>0.517800</td>\n",
       "      <td>0.523891</td>\n",
       "      <td>0.596594</td>\n",
       "      <td>0.37</td>\n",
       "      <td>0.488889</td>\n",
       "      <td>0.407895</td>\n",
       "      <td>0.50</td>\n",
       "      <td>...</td>\n",
       "      <td>0.390071</td>\n",
       "      <td>0.391786</td>\n",
       "      <td>0.518382</td>\n",
       "      <td>0.509804</td>\n",
       "      <td>0.618182</td>\n",
       "      <td>0.657143</td>\n",
       "      <td>0.631841</td>\n",
       "      <td>0.73</td>\n",
       "      <td>0.427711</td>\n",
       "      <td>0.695906</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>llava-v1.5-13b-hf</th>\n",
       "      <td>https://huggingface.co/Community-LM/llava-v1.5...</td>\n",
       "      <td>Community-LM/llava-v1.5-13b-hf</td>\n",
       "      <td>13.0</td>\n",
       "      <td>0.568868</td>\n",
       "      <td>0.532423</td>\n",
       "      <td>0.601175</td>\n",
       "      <td>0.30</td>\n",
       "      <td>0.496296</td>\n",
       "      <td>0.585526</td>\n",
       "      <td>0.67</td>\n",
       "      <td>...</td>\n",
       "      <td>0.407801</td>\n",
       "      <td>0.415906</td>\n",
       "      <td>0.547794</td>\n",
       "      <td>0.578431</td>\n",
       "      <td>0.600000</td>\n",
       "      <td>0.653061</td>\n",
       "      <td>0.761194</td>\n",
       "      <td>0.81</td>\n",
       "      <td>0.506024</td>\n",
       "      <td>0.795322</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>1121 rows × 63 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "                                                                          URL  \\\n",
       "SparseOPT-1.3B                 https://huggingface.co/shaohang/SparseOPT-1.3B   \n",
       "Athena-v1                           https://huggingface.co/IkariDev/Athena-v1   \n",
       "Athena-tmp                         https://huggingface.co/IkariDev/Athena-tmp   \n",
       "13B-Legerdemain-L2          https://huggingface.co/CalderaAI/13B-Legerdema...   \n",
       "13B-Ouroboros                  https://huggingface.co/CalderaAI/13B-Ouroboros   \n",
       "...                                                                       ...   \n",
       "Robin-v2                         https://huggingface.co/HanningZhang/Robin-v2   \n",
       "CodeUp-Llama-2-13b-chat-hf  https://huggingface.co/deepse/CodeUp-Llama-2-1...   \n",
       "Hermes-Platypus2-mini-7B    https://huggingface.co/edor/Hermes-Platypus2-m...   \n",
       "Stable-Platypus2-mini-7B    https://huggingface.co/edor/Stable-Platypus2-m...   \n",
       "llava-v1.5-13b-hf           https://huggingface.co/Community-LM/llava-v1.5...   \n",
       "\n",
       "                                              full_model_name  Parameters  \\\n",
       "SparseOPT-1.3B                        shaohang/SparseOPT-1.3B         1.3   \n",
       "Athena-v1                                  IkariDev/Athena-v1         NaN   \n",
       "Athena-tmp                                IkariDev/Athena-tmp         NaN   \n",
       "13B-Legerdemain-L2               CalderaAI/13B-Legerdemain-L2        13.0   \n",
       "13B-Ouroboros                         CalderaAI/13B-Ouroboros        13.0   \n",
       "...                                                       ...         ...   \n",
       "Robin-v2                                HanningZhang/Robin-v2         NaN   \n",
       "CodeUp-Llama-2-13b-chat-hf  deepse/CodeUp-Llama-2-13b-chat-hf        13.0   \n",
       "Hermes-Platypus2-mini-7B        edor/Hermes-Platypus2-mini-7B         7.0   \n",
       "Stable-Platypus2-mini-7B        edor/Stable-Platypus2-mini-7B         7.0   \n",
       "llava-v1.5-13b-hf              Community-LM/llava-v1.5-13b-hf        13.0   \n",
       "\n",
       "                            MMLU_average  arc:challenge|25  hellaswag|10  \\\n",
       "SparseOPT-1.3B                  0.255963          0.240614      0.383689   \n",
       "Athena-v1                       0.556052          0.560580      0.631548   \n",
       "Athena-tmp                      0.588685          0.567406      0.621888   \n",
       "13B-Legerdemain-L2              0.560030          0.573379      0.635431   \n",
       "13B-Ouroboros                   0.514311          0.560580      0.624378   \n",
       "...                                  ...               ...           ...   \n",
       "Robin-v2                        0.392680          0.435154      0.545310   \n",
       "CodeUp-Llama-2-13b-chat-hf      0.546262          0.558020      0.629257   \n",
       "Hermes-Platypus2-mini-7B        0.470828          0.523038      0.601573   \n",
       "Stable-Platypus2-mini-7B        0.517800          0.523891      0.596594   \n",
       "llava-v1.5-13b-hf               0.568868          0.532423      0.601175   \n",
       "\n",
       "                            MMLU_abstract_algebra  MMLU_anatomy  \\\n",
       "SparseOPT-1.3B                               0.22      0.214815   \n",
       "Athena-v1                                    0.31      0.496296   \n",
       "Athena-tmp                                   0.29      0.518519   \n",
       "13B-Legerdemain-L2                           0.36      0.525926   \n",
       "13B-Ouroboros                                0.31      0.466667   \n",
       "...                                           ...           ...   \n",
       "Robin-v2                                     0.32      0.437037   \n",
       "CodeUp-Llama-2-13b-chat-hf                   0.31      0.474074   \n",
       "Hermes-Platypus2-mini-7B                     0.33      0.488889   \n",
       "Stable-Platypus2-mini-7B                     0.37      0.488889   \n",
       "llava-v1.5-13b-hf                            0.30      0.496296   \n",
       "\n",
       "                            MMLU_astronomy  MMLU_business_ethics  ...  \\\n",
       "SparseOPT-1.3B                    0.157895                  0.20  ...   \n",
       "Athena-v1                         0.526316                  0.58  ...   \n",
       "Athena-tmp                        0.638158                  0.62  ...   \n",
       "13B-Legerdemain-L2                0.572368                  0.53  ...   \n",
       "13B-Ouroboros                     0.506579                  0.52  ...   \n",
       "...                                    ...                   ...  ...   \n",
       "Robin-v2                          0.335526                  0.46  ...   \n",
       "CodeUp-Llama-2-13b-chat-hf        0.546053                  0.53  ...   \n",
       "Hermes-Platypus2-mini-7B          0.421053                  0.48  ...   \n",
       "Stable-Platypus2-mini-7B          0.407895                  0.50  ...   \n",
       "llava-v1.5-13b-hf                 0.585526                  0.67  ...   \n",
       "\n",
       "                            MMLU_professional_accounting  \\\n",
       "SparseOPT-1.3B                                  0.262411   \n",
       "Athena-v1                                       0.404255   \n",
       "Athena-tmp                                      0.450355   \n",
       "13B-Legerdemain-L2                              0.429078   \n",
       "13B-Ouroboros                                   0.365248   \n",
       "...                                                  ...   \n",
       "Robin-v2                                        0.290780   \n",
       "CodeUp-Llama-2-13b-chat-hf                      0.390071   \n",
       "Hermes-Platypus2-mini-7B                        0.390071   \n",
       "Stable-Platypus2-mini-7B                        0.390071   \n",
       "llava-v1.5-13b-hf                               0.407801   \n",
       "\n",
       "                            MMLU_professional_law  MMLU_professional_medicine  \\\n",
       "SparseOPT-1.3B                           0.238592                    0.448529   \n",
       "Athena-v1                                0.392438                    0.525735   \n",
       "Athena-tmp                               0.462842                    0.569853   \n",
       "13B-Legerdemain-L2                       0.424381                    0.522059   \n",
       "13B-Ouroboros                            0.405476                    0.481618   \n",
       "...                                           ...                         ...   \n",
       "Robin-v2                                 0.302477                    0.382353   \n",
       "CodeUp-Llama-2-13b-chat-hf               0.391786                    0.500000   \n",
       "Hermes-Platypus2-mini-7B                 0.353977                    0.470588   \n",
       "Stable-Platypus2-mini-7B                 0.391786                    0.518382   \n",
       "llava-v1.5-13b-hf                        0.415906                    0.547794   \n",
       "\n",
       "                            MMLU_professional_psychology  \\\n",
       "SparseOPT-1.3B                                  0.254902   \n",
       "Athena-v1                                       0.540850   \n",
       "Athena-tmp                                      0.588235   \n",
       "13B-Legerdemain-L2                              0.532680   \n",
       "13B-Ouroboros                                   0.524510   \n",
       "...                                                  ...   \n",
       "Robin-v2                                        0.374183   \n",
       "CodeUp-Llama-2-13b-chat-hf                      0.544118   \n",
       "Hermes-Platypus2-mini-7B                        0.446078   \n",
       "Stable-Platypus2-mini-7B                        0.509804   \n",
       "llava-v1.5-13b-hf                               0.578431   \n",
       "\n",
       "                            MMLU_public_relations  MMLU_security_studies  \\\n",
       "SparseOPT-1.3B                           0.236364               0.171429   \n",
       "Athena-v1                                0.645455               0.640816   \n",
       "Athena-tmp                               0.645455               0.653061   \n",
       "13B-Legerdemain-L2                       0.609091               0.636735   \n",
       "13B-Ouroboros                            0.609091               0.538776   \n",
       "...                                           ...                    ...   \n",
       "Robin-v2                                 0.445455               0.326531   \n",
       "CodeUp-Llama-2-13b-chat-hf               0.663636               0.636735   \n",
       "Hermes-Platypus2-mini-7B                 0.518182               0.563265   \n",
       "Stable-Platypus2-mini-7B                 0.618182               0.657143   \n",
       "llava-v1.5-13b-hf                        0.600000               0.653061   \n",
       "\n",
       "                            MMLU_sociology  MMLU_us_foreign_policy  \\\n",
       "SparseOPT-1.3B                    0.228856                    0.27   \n",
       "Athena-v1                         0.751244                    0.83   \n",
       "Athena-tmp                        0.721393                    0.81   \n",
       "13B-Legerdemain-L2                0.766169                    0.87   \n",
       "13B-Ouroboros                     0.691542                    0.83   \n",
       "...                                    ...                     ...   \n",
       "Robin-v2                          0.457711                    0.59   \n",
       "CodeUp-Llama-2-13b-chat-hf        0.751244                    0.81   \n",
       "Hermes-Platypus2-mini-7B          0.621891                    0.68   \n",
       "Stable-Platypus2-mini-7B          0.631841                    0.73   \n",
       "llava-v1.5-13b-hf                 0.761194                    0.81   \n",
       "\n",
       "                            MMLU_virology  MMLU_world_religions  \n",
       "SparseOPT-1.3B                   0.283133              0.216374  \n",
       "Athena-v1                        0.493976              0.725146  \n",
       "Athena-tmp                       0.463855              0.801170  \n",
       "13B-Legerdemain-L2               0.427711              0.777778  \n",
       "13B-Ouroboros                    0.457831              0.760234  \n",
       "...                                   ...                   ...  \n",
       "Robin-v2                         0.379518              0.590643  \n",
       "CodeUp-Llama-2-13b-chat-hf       0.481928              0.730994  \n",
       "Hermes-Platypus2-mini-7B         0.421687              0.637427  \n",
       "Stable-Platypus2-mini-7B         0.427711              0.695906  \n",
       "llava-v1.5-13b-hf                0.506024              0.795322  \n",
       "\n",
       "[1121 rows x 63 columns]"
      ]
     },
     "execution_count": 2,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df = result.data\n",
    "df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "mmlu",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.12"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}