alexmarques commited on
Commit
41fa77c
·
verified ·
1 Parent(s): e191b8d

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +35 -12
README.md CHANGED
@@ -148,9 +148,19 @@ This version of the lm-evaluation-harness includes versions of ARC-Challenge and
148
  <tr>
149
  <td>MMLU (5-shot)
150
  </td>
151
- <td>67.94
152
  </td>
153
- <td>67.58
 
 
 
 
 
 
 
 
 
 
154
  </td>
155
  <td>99.4%
156
  </td>
@@ -158,11 +168,11 @@ This version of the lm-evaluation-harness includes versions of ARC-Challenge and
158
  <tr>
159
  <td>ARC Challenge (0-shot)
160
  </td>
161
- <td>83.19
162
  </td>
163
- <td>82.08
164
  </td>
165
- <td>98.7%
166
  </td>
167
  </tr>
168
  <tr>
@@ -208,11 +218,11 @@ This version of the lm-evaluation-harness includes versions of ARC-Challenge and
208
  <tr>
209
  <td><strong>Average</strong>
210
  </td>
211
- <td><strong>74.31</strong>
212
  </td>
213
- <td><strong>73.79</strong>
214
  </td>
215
- <td><strong>99.3%</strong>
216
  </td>
217
  </tr>
218
  </table>
@@ -225,17 +235,30 @@ The results were obtained using the following commands:
225
  ```
226
  lm_eval \
227
  --model vllm \
228
- --model_args pretrained="neuralmagic/Meta-Llama-3.1-8B-Instruct-quantized.w8a8",dtype=auto,add_bos_token=True,max_model_len=4096,tensor_parallel_size=1 \
229
- --tasks mmlu \
 
 
230
  --num_fewshot 5 \
231
  --batch_size auto
232
  ```
233
 
 
 
 
 
 
 
 
 
 
 
 
234
  #### ARC-Challenge
235
  ```
236
  lm_eval \
237
  --model vllm \
238
- --model_args pretrained="neuralmagic/Meta-Llama-3.1-8B-Instruct-quantized.w8a8",dtype=auto,add_bos_token=True,max_model_len=4096,tensor_parallel_size=1 \
239
  --tasks arc_challenge_llama_3.1_instruct \
240
  --apply_chat_template \
241
  --num_fewshot 0 \
@@ -246,7 +269,7 @@ lm_eval \
246
  ```
247
  lm_eval \
248
  --model vllm \
249
- --model_args pretrained="neuralmagic/Meta-Llama-3.1-8B-Instruct-quantized.w8a8",dtype=auto,add_bos_token=True,max_model_len=4096,tensor_parallel_size=1 \
250
  --tasks gsm8k_cot_llama_3.1_instruct \
251
  --fewshot_as_multiturn \
252
  --apply_chat_template \
 
148
  <tr>
149
  <td>MMLU (5-shot)
150
  </td>
151
+ <td>69.43
152
  </td>
153
+ <td>69.37
154
+ </td>
155
+ <td>99.9%
156
+ </td>
157
+ </tr>
158
+ <tr>
159
+ <td>MMLU (CoT, 0-shot)
160
+ </td>
161
+ <td>72.56
162
+ </td>
163
+ <td>72.14
164
  </td>
165
  <td>99.4%
166
  </td>
 
168
  <tr>
169
  <td>ARC Challenge (0-shot)
170
  </td>
171
+ <td>81.57
172
  </td>
173
+ <td>81.48
174
  </td>
175
+ <td>99.9%
176
  </td>
177
  </tr>
178
  <tr>
 
218
  <tr>
219
  <td><strong>Average</strong>
220
  </td>
221
+ <td><strong>74.04</strong>
222
  </td>
223
+ <td><strong>73.89</strong>
224
  </td>
225
+ <td><strong>99.8%</strong>
226
  </td>
227
  </tr>
228
  </table>
 
235
  ```
236
  lm_eval \
237
  --model vllm \
238
+ --model_args pretrained="neuralmagic/Meta-Llama-3.1-8B-Instruct-quantized.w8a8",dtype=auto,add_bos_token=True,max_model_len=3850,max_gen_toks=10,tensor_parallel_size=1 \
239
+ --tasks mmlu_llama_3.1_instruct \
240
+ --fewshot_as_multiturn \
241
+ --apply_chat_template \
242
  --num_fewshot 5 \
243
  --batch_size auto
244
  ```
245
 
246
+ #### MMLU-CoT
247
+ ```
248
+ lm_eval \
249
+ --model vllm \
250
+ --model_args pretrained="neuralmagic/Meta-Llama-3.1-8B-Instruct-quantized.w8a8",dtype=auto,add_bos_token=True,max_model_len=4064,max_gen_toks=1024,tensor_parallel_size=1 \
251
+ --tasks mmlu_cot_0shot_llama_3.1_instruct \
252
+ --apply_chat_template \
253
+ --num_fewshot 0 \
254
+ --batch_size auto
255
+ ```
256
+
257
  #### ARC-Challenge
258
  ```
259
  lm_eval \
260
  --model vllm \
261
+ --model_args pretrained="neuralmagic/Meta-Llama-3.1-8B-Instruct-quantized.w8a8",dtype=auto,add_bos_token=True,max_model_len=3940,max_gen_toks=100,tensor_parallel_size=1 \
262
  --tasks arc_challenge_llama_3.1_instruct \
263
  --apply_chat_template \
264
  --num_fewshot 0 \
 
269
  ```
270
  lm_eval \
271
  --model vllm \
272
+ --model_args pretrained="neuralmagic/Meta-Llama-3.1-8B-Instruct-quantized.w8a8",dtype=auto,add_bos_token=True,max_model_len=4096,max_gen_toks=1024,tensor_parallel_size=1 \
273
  --tasks gsm8k_cot_llama_3.1_instruct \
274
  --fewshot_as_multiturn \
275
  --apply_chat_template \