LR2Bench / results /OpenAI-o1-mini.json
UltraRonin's picture
add
b866cfe
{
"config": {
"model_name": "OpenAI-o1-mini"
},
"results": {
"Overall": {
"CR": "97.7",
"S-Acc": "41.3",
"EM": "9.1",
"PM-0.5": "32.8",
"Tokens": "9576"
},
"Acrostic": {
"CR": "97.0",
"S-Acc": "34.7",
"EM": "0.0",
"PM-0.5": "12.0",
"Tokens": "10952"
},
"Crossword": {
"CR": "95.3",
"S-Acc": "45.5",
"EM": "1.3",
"PM-0.5": "54.0",
"Tokens": "7840"
},
"Cryptogram": {
"CR": "100.0",
"S-Acc": "22.7",
"EM": "1.0",
"PM-0.5": "13.0",
"Tokens": "11208"
},
"Logic_Puzzle": {
"CR": "99.0",
"S-Acc": "57.2",
"EM": "23.5",
"PM-0.5": "53.5",
"Tokens": "10242"
},
"Sudoku": {
"CR": "99.0",
"S-Acc": "53.4",
"EM": "27.0",
"PM-0.5": "43.0",
"Tokens": "3961"
},
"Drop_Quote": {
"CR": "96.0",
"S-Acc": "34.3",
"EM": "2.0",
"PM-0.5": "21.0",
"Tokens": "13255"
}
}
}