LR2Bench / results /Gemini-2.0-flash-thinking.json
UltraRonin's picture
add
b866cfe
{
"config": {
"model_name": "Gemini-2.0-flash-thinking"
},
"results": {
"Overall": {
"CR": "88.2",
"S-Acc": "39.4",
"EM": "4.3",
"PM-0.5": "35.0",
"Tokens": "3725"
},
"Acrostic": {
"CR": "92.0",
"S-Acc": "40.7",
"EM": "0.0",
"PM-0.5": "27.0",
"Tokens": "4257"
},
"Crossword": {
"CR": "94.7",
"S-Acc": "57.7",
"EM": "1.3",
"PM-0.5": "79.3",
"Tokens": "2648"
},
"Cryptogram": {
"CR": "68.0",
"S-Acc": "11.2",
"EM": "0.0",
"PM-0.5": "2.0",
"Tokens": "4167"
},
"Logic_Puzzle": {
"CR": "99.0",
"S-Acc": "45.9",
"EM": "8.0",
"PM-0.5": "37.5",
"Tokens": "4038"
},
"Sudoku": {
"CR": "79.5",
"S-Acc": "46.5",
"EM": "16.5",
"PM-0.5": "41.0",
"Tokens": "3853"
},
"Drop_Quote": {
"CR": "96.0",
"S-Acc": "34.4",
"EM": "0.0",
"PM-0.5": "23.0",
"Tokens": "3386"
}
}
}