LR2Bench / results /OpenAI-gpt-4o.json
UltraRonin's picture
add
b866cfe
{
"config": {
"model_name": "OpenAI-gpt-4o"
},
"results": {
"Overall": {
"CR": "99.8",
"S-Acc": "43.7",
"EM": "3.2",
"PM-0.5": "41.7",
"Tokens": "1486"
},
"Acrostic": {
"CR": "100.0",
"S-Acc": "56.0",
"EM": "0.0",
"PM-0.5": "67.0",
"Tokens": "3229"
},
"Crossword": {
"CR": "100.0",
"S-Acc": "66.0",
"EM": "1.3",
"PM-0.5": "86.7",
"Tokens": "1726"
},
"Cryptogram": {
"CR": "100.0",
"S-Acc": "20.7",
"EM": "0.0",
"PM-0.5": "5.0",
"Tokens": "740"
},
"Logic_Puzzle": {
"CR": "100.0",
"S-Acc": "39.3",
"EM": "3.5",
"PM-0.5": "29.5",
"Tokens": "953"
},
"Sudoku": {
"CR": "100.0",
"S-Acc": "52.2",
"EM": "14.5",
"PM-0.5": "48.0",
"Tokens": "1104"
},
"Drop_Quote": {
"CR": "99.0",
"S-Acc": "31.1",
"EM": "0.0",
"PM-0.5": "14.0",
"Tokens": "1165"
}
}
}