LR2Bench / results /OpenAI-o1-preview.json
UltraRonin's picture
add
b866cfe
{
"config": {
"model_name": "OpenAI-o1-preview"
},
"results": {
"Overall": {
"CR": "96.3",
"S-Acc": "58.7",
"EM": "23.6",
"PM-0.5": "61.7",
"Tokens": "11436"
},
"Acrostic": {
"CR": "100.0",
"S-Acc": "67.2",
"EM": "0.0",
"PM-0.5": "90.0",
"Tokens": "14847"
},
"Crossword": {
"CR": "98.0",
"S-Acc": "77.7",
"EM": "24.7",
"PM-0.5": "89.3",
"Tokens": "10098"
},
"Cryptogram": {
"CR": "92.0",
"S-Acc": "34.8",
"EM": "13.0",
"PM-0.5": "29.0",
"Tokens": "12567"
},
"Logic_Puzzle": {
"CR": "99.0",
"S-Acc": "68.8",
"EM": "41.0",
"PM-0.5": "68.5",
"Tokens": "9449"
},
"Sudoku": {
"CR": "91.5",
"S-Acc": "65.1",
"EM": "50.0",
"PM-0.5": "55.5",
"Tokens": "8062"
},
"Drop_Quote": {
"CR": "97.0",
"S-Acc": "38.8",
"EM": "13.0",
"PM-0.5": "38.0",
"Tokens": "13595"
}
}
}