OctoThinker-8B-Short-Base / evaluation_results.json
koalazf99's picture
Upload folder using huggingface_hub
b952489 verified
{
"amc-cot": {
"cot": {
"accuracy": 0.05,
"n_samples": 40
},
"tool": {
"n_samples": 0
}
},
"asdiv-cot": {
"cot": {
"accuracy": 0.8446952595936794,
"n_samples": 2215
},
"tool": {
"n_samples": 0
}
},
"gsm8k-cot": {
"cot": {
"accuracy": 0.7740712661106899,
"n_samples": 1319
},
"tool": {
"n_samples": 0
}
},
"math-500-cot": {
"cot": {
"accuracy": 0.426,
"n_samples": 500
},
"tool": {
"n_samples": 0
}
},
"math-cot": {
"cot": {
"accuracy": 0.4482,
"n_samples": 5000
},
"tool": {
"n_samples": 0
}
},
"math_sat-cot": {
"cot": {
"accuracy": 0.875,
"n_samples": 32
},
"tool": {
"n_samples": 0
}
},
"mathqa-cot": {
"cot": {
"accuracy": 0.605,
"n_samples": 1000
},
"tool": {
"n_samples": 0
}
},
"mawps-cot": {
"cot": {
"accuracy": 0.9598062953995158,
"n_samples": 2065
},
"tool": {
"n_samples": 0
}
},
"mmlu-stem-cot": {
"cot": {
"accuracy": 0.6408217362491716,
"n_samples": 3018
},
"tool": {
"n_samples": 0
}
},
"ocw-courses-cot": {
"cot": {
"accuracy": 0.15073529411764705,
"n_samples": 272
},
"tool": {
"n_samples": 0
}
},
"olympiad-bench-cot": {
"cot": {
"accuracy": 0.047407407407407405,
"n_samples": 675
},
"tool": {
"n_samples": 0
}
},
"svamp-cot": {
"cot": {
"accuracy": 0.861,
"n_samples": 1000
},
"tool": {
"n_samples": 0
}
},
"tabmwp-cot": {
"cot": {
"accuracy": 0.689,
"n_samples": 1000
},
"tool": {
"n_samples": 0
}
}
}