alexmarques
commited on
Update README.md
Browse files
README.md
CHANGED
@@ -150,9 +150,9 @@ This version of the lm-evaluation-harness includes versions of MMLU, ARC-Challen
|
|
150 |
</td>
|
151 |
<td>69.43
|
152 |
</td>
|
153 |
-
<td>
|
154 |
</td>
|
155 |
-
<td>99.
|
156 |
</td>
|
157 |
</tr>
|
158 |
<tr>
|
@@ -160,9 +160,9 @@ This version of the lm-evaluation-harness includes versions of MMLU, ARC-Challen
|
|
160 |
</td>
|
161 |
<td>72.56
|
162 |
</td>
|
163 |
-
<td>72.
|
164 |
</td>
|
165 |
-
<td>99.
|
166 |
</td>
|
167 |
</tr>
|
168 |
<tr>
|
@@ -170,9 +170,9 @@ This version of the lm-evaluation-harness includes versions of MMLU, ARC-Challen
|
|
170 |
</td>
|
171 |
<td>81.57
|
172 |
</td>
|
173 |
-
<td>81.
|
174 |
</td>
|
175 |
-
<td>99.
|
176 |
</td>
|
177 |
</tr>
|
178 |
<tr>
|
@@ -220,9 +220,9 @@ This version of the lm-evaluation-harness includes versions of MMLU, ARC-Challen
|
|
220 |
</td>
|
221 |
<td><strong>74.04</strong>
|
222 |
</td>
|
223 |
-
<td><strong>73.
|
224 |
</td>
|
225 |
-
<td><strong>99.
|
226 |
</td>
|
227 |
</tr>
|
228 |
</table>
|
|
|
150 |
</td>
|
151 |
<td>69.43
|
152 |
</td>
|
153 |
+
<td>68.78
|
154 |
</td>
|
155 |
+
<td>99.1%
|
156 |
</td>
|
157 |
</tr>
|
158 |
<tr>
|
|
|
160 |
</td>
|
161 |
<td>72.56
|
162 |
</td>
|
163 |
+
<td>72.20
|
164 |
</td>
|
165 |
+
<td>99.5%
|
166 |
</td>
|
167 |
</tr>
|
168 |
<tr>
|
|
|
170 |
</td>
|
171 |
<td>81.57
|
172 |
</td>
|
173 |
+
<td>81.06
|
174 |
</td>
|
175 |
+
<td>99.4%
|
176 |
</td>
|
177 |
</tr>
|
178 |
<tr>
|
|
|
220 |
</td>
|
221 |
<td><strong>74.04</strong>
|
222 |
</td>
|
223 |
+
<td><strong>73.59</strong>
|
224 |
</td>
|
225 |
+
<td><strong>99.4%</strong>
|
226 |
</td>
|
227 |
</tr>
|
228 |
</table>
|