koalazf99 commited on
Commit
b952489
·
verified ·
1 Parent(s): 1d57d33

Upload folder using huggingface_hub

Browse files
Files changed (1) hide show
  1. evaluation_results.json +119 -0
evaluation_results.json ADDED
@@ -0,0 +1,119 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "amc-cot": {
3
+ "cot": {
4
+ "accuracy": 0.05,
5
+ "n_samples": 40
6
+ },
7
+ "tool": {
8
+ "n_samples": 0
9
+ }
10
+ },
11
+ "asdiv-cot": {
12
+ "cot": {
13
+ "accuracy": 0.8446952595936794,
14
+ "n_samples": 2215
15
+ },
16
+ "tool": {
17
+ "n_samples": 0
18
+ }
19
+ },
20
+ "gsm8k-cot": {
21
+ "cot": {
22
+ "accuracy": 0.7740712661106899,
23
+ "n_samples": 1319
24
+ },
25
+ "tool": {
26
+ "n_samples": 0
27
+ }
28
+ },
29
+ "math-500-cot": {
30
+ "cot": {
31
+ "accuracy": 0.426,
32
+ "n_samples": 500
33
+ },
34
+ "tool": {
35
+ "n_samples": 0
36
+ }
37
+ },
38
+ "math-cot": {
39
+ "cot": {
40
+ "accuracy": 0.4482,
41
+ "n_samples": 5000
42
+ },
43
+ "tool": {
44
+ "n_samples": 0
45
+ }
46
+ },
47
+ "math_sat-cot": {
48
+ "cot": {
49
+ "accuracy": 0.875,
50
+ "n_samples": 32
51
+ },
52
+ "tool": {
53
+ "n_samples": 0
54
+ }
55
+ },
56
+ "mathqa-cot": {
57
+ "cot": {
58
+ "accuracy": 0.605,
59
+ "n_samples": 1000
60
+ },
61
+ "tool": {
62
+ "n_samples": 0
63
+ }
64
+ },
65
+ "mawps-cot": {
66
+ "cot": {
67
+ "accuracy": 0.9598062953995158,
68
+ "n_samples": 2065
69
+ },
70
+ "tool": {
71
+ "n_samples": 0
72
+ }
73
+ },
74
+ "mmlu-stem-cot": {
75
+ "cot": {
76
+ "accuracy": 0.6408217362491716,
77
+ "n_samples": 3018
78
+ },
79
+ "tool": {
80
+ "n_samples": 0
81
+ }
82
+ },
83
+ "ocw-courses-cot": {
84
+ "cot": {
85
+ "accuracy": 0.15073529411764705,
86
+ "n_samples": 272
87
+ },
88
+ "tool": {
89
+ "n_samples": 0
90
+ }
91
+ },
92
+ "olympiad-bench-cot": {
93
+ "cot": {
94
+ "accuracy": 0.047407407407407405,
95
+ "n_samples": 675
96
+ },
97
+ "tool": {
98
+ "n_samples": 0
99
+ }
100
+ },
101
+ "svamp-cot": {
102
+ "cot": {
103
+ "accuracy": 0.861,
104
+ "n_samples": 1000
105
+ },
106
+ "tool": {
107
+ "n_samples": 0
108
+ }
109
+ },
110
+ "tabmwp-cot": {
111
+ "cot": {
112
+ "accuracy": 0.689,
113
+ "n_samples": 1000
114
+ },
115
+ "tool": {
116
+ "n_samples": 0
117
+ }
118
+ }
119
+ }