Update README.md
Browse files
README.md
CHANGED
@@ -132,8 +132,16 @@ Average: 42.25%
|
|
132 |
| | |acc_norm|82.59|± | 0.88|
|
133 |
|winogrande | 0|acc |77.19|± | 1.18|
|
134 |
|
|
|
135 |
Average: 73.45%
|
136 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
137 |
### TruthfulQA
|
138 |
| Task |Version|Metric|Value| |Stderr|
|
139 |
|-------------|------:|------|----:|---|-----:|
|
|
|
132 |
| | |acc_norm|82.59|± | 0.88|
|
133 |
|winogrande | 0|acc |77.19|± | 1.18|
|
134 |
|
135 |
+
|
136 |
Average: 73.45%
|
137 |
|
138 |
+
### GSM8K
|
139 |
+
|Task |Version| Metric |Value| |Stderr|
|
140 |
+
|-----|------:|-----------------------------|-----|---|------|
|
141 |
+
|gsm8k| 2|exact_match,get-answer | 0.75| | |
|
142 |
+
| | |exact_match_stderr,get-answer| 0.01| | |
|
143 |
+
| | |alias |gsm8k| | |
|
144 |
+
|
145 |
### TruthfulQA
|
146 |
| Task |Version|Metric|Value| |Stderr|
|
147 |
|-------------|------:|------|----:|---|-----:|
|