daochf commited on
Commit
ce923aa
·
verified ·
1 Parent(s): 72e927c

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +607 -1
README.md CHANGED
@@ -7,7 +7,613 @@ base_model: meta-llama/Llama-2-13b-chat-hf
7
 
8
  <!-- Provide a quick summary of what the model is/does. -->
9
 
10
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
11
 
12
  ## Model Details
13
 
 
7
 
8
  <!-- Provide a quick summary of what the model is/does. -->
9
 
10
+ ```text
11
+ {
12
+ "alpha_pattern": {},
13
+ "auto_mapping": null,
14
+ "base_model_name_or_path": "meta-llama/Llama-2-13b-chat-hf",
15
+ "bias": "none",
16
+ "fan_in_fan_out": false,
17
+ "inference_mode": true,
18
+ "init_lora_weights": true,
19
+ "layers_pattern": null,
20
+ "layers_to_transform": null,
21
+ "lora_alpha": 16,
22
+ "lora_dropout": 0.05,
23
+ "modules_to_save": null,
24
+ "peft_type": "LORA",
25
+ "r": 8,
26
+ "rank_pattern": {},
27
+ "revision": null,
28
+ "target_modules": [
29
+ "q_proj",
30
+ "v_proj"
31
+ ],
32
+ "task_type": "CAUSAL_LM"
33
+ }
34
+ ```
35
+
36
+ ```text
37
+ {
38
+ "best_metric": 0.05725787207484245,
39
+ "best_model_checkpoint": "./Lora-Meta-Llama2-13b-chat-hf-QandA_2g_v01-v07\\checkpoint-80440",
40
+ "epoch": 40.0,
41
+ "eval_steps": 500,
42
+ "global_step": 80440,
43
+ "is_hyper_param_search": false,
44
+ "is_local_process_zero": true,
45
+ "is_world_process_zero": true,
46
+ "log_history": [
47
+ {
48
+ "epoch": 1.0,
49
+ "learning_rate": 4.875e-05,
50
+ "loss": 0.7946,
51
+ "step": 2011
52
+ },
53
+ {
54
+ "epoch": 1.0,
55
+ "eval_loss": 0.6483868360519409,
56
+ "eval_runtime": 69.9926,
57
+ "eval_samples_per_second": 5.758,
58
+ "eval_steps_per_second": 0.729,
59
+ "step": 2011
60
+ },
61
+ {
62
+ "epoch": 2.0,
63
+ "learning_rate": 4.75e-05,
64
+ "loss": 0.5829,
65
+ "step": 4022
66
+ },
67
+ {
68
+ "epoch": 2.0,
69
+ "eval_loss": 0.5402843356132507,
70
+ "eval_runtime": 70.0086,
71
+ "eval_samples_per_second": 5.756,
72
+ "eval_steps_per_second": 0.728,
73
+ "step": 4022
74
+ },
75
+ {
76
+ "epoch": 3.0,
77
+ "learning_rate": 4.6250000000000006e-05,
78
+ "loss": 0.4918,
79
+ "step": 6033
80
+ },
81
+ {
82
+ "epoch": 3.0,
83
+ "eval_loss": 0.45011985301971436,
84
+ "eval_runtime": 70.1392,
85
+ "eval_samples_per_second": 5.746,
86
+ "eval_steps_per_second": 0.727,
87
+ "step": 6033
88
+ },
89
+ {
90
+ "epoch": 4.0,
91
+ "learning_rate": 4.5e-05,
92
+ "loss": 0.419,
93
+ "step": 8044
94
+ },
95
+ {
96
+ "epoch": 4.0,
97
+ "eval_loss": 0.38660600781440735,
98
+ "eval_runtime": 70.1321,
99
+ "eval_samples_per_second": 5.746,
100
+ "eval_steps_per_second": 0.727,
101
+ "step": 8044
102
+ },
103
+ {
104
+ "epoch": 5.0,
105
+ "learning_rate": 4.375e-05,
106
+ "loss": 0.3644,
107
+ "step": 10055
108
+ },
109
+ {
110
+ "epoch": 5.0,
111
+ "eval_loss": 0.33983489871025085,
112
+ "eval_runtime": 69.9916,
113
+ "eval_samples_per_second": 5.758,
114
+ "eval_steps_per_second": 0.729,
115
+ "step": 10055
116
+ },
117
+ {
118
+ "epoch": 6.0,
119
+ "learning_rate": 4.25e-05,
120
+ "loss": 0.3211,
121
+ "step": 12066
122
+ },
123
+ {
124
+ "epoch": 6.0,
125
+ "eval_loss": 0.3004438281059265,
126
+ "eval_runtime": 70.0768,
127
+ "eval_samples_per_second": 5.751,
128
+ "eval_steps_per_second": 0.728,
129
+ "step": 12066
130
+ },
131
+ {
132
+ "epoch": 7.0,
133
+ "learning_rate": 4.125e-05,
134
+ "loss": 0.2854,
135
+ "step": 14077
136
+ },
137
+ {
138
+ "epoch": 7.0,
139
+ "eval_loss": 0.2634061574935913,
140
+ "eval_runtime": 70.2069,
141
+ "eval_samples_per_second": 5.74,
142
+ "eval_steps_per_second": 0.726,
143
+ "step": 14077
144
+ },
145
+ {
146
+ "epoch": 8.0,
147
+ "learning_rate": 4e-05,
148
+ "loss": 0.255,
149
+ "step": 16088
150
+ },
151
+ {
152
+ "epoch": 8.0,
153
+ "eval_loss": 0.23876915872097015,
154
+ "eval_runtime": 70.1721,
155
+ "eval_samples_per_second": 5.743,
156
+ "eval_steps_per_second": 0.727,
157
+ "step": 16088
158
+ },
159
+ {
160
+ "epoch": 9.0,
161
+ "learning_rate": 3.875e-05,
162
+ "loss": 0.2281,
163
+ "step": 18099
164
+ },
165
+ {
166
+ "epoch": 9.0,
167
+ "eval_loss": 0.21539266407489777,
168
+ "eval_runtime": 70.1355,
169
+ "eval_samples_per_second": 5.746,
170
+ "eval_steps_per_second": 0.727,
171
+ "step": 18099
172
+ },
173
+ {
174
+ "epoch": 10.0,
175
+ "learning_rate": 3.7500000000000003e-05,
176
+ "loss": 0.2052,
177
+ "step": 20110
178
+ },
179
+ {
180
+ "epoch": 10.0,
181
+ "eval_loss": 0.18904653191566467,
182
+ "eval_runtime": 70.1349,
183
+ "eval_samples_per_second": 5.746,
184
+ "eval_steps_per_second": 0.727,
185
+ "step": 20110
186
+ },
187
+ {
188
+ "epoch": 11.0,
189
+ "learning_rate": 3.625e-05,
190
+ "loss": 0.1853,
191
+ "step": 22121
192
+ },
193
+ {
194
+ "epoch": 11.0,
195
+ "eval_loss": 0.17202098667621613,
196
+ "eval_runtime": 69.9829,
197
+ "eval_samples_per_second": 5.759,
198
+ "eval_steps_per_second": 0.729,
199
+ "step": 22121
200
+ },
201
+ {
202
+ "epoch": 12.0,
203
+ "learning_rate": 3.5e-05,
204
+ "loss": 0.1673,
205
+ "step": 24132
206
+ },
207
+ {
208
+ "epoch": 12.0,
209
+ "eval_loss": 0.15875761210918427,
210
+ "eval_runtime": 70.1596,
211
+ "eval_samples_per_second": 5.744,
212
+ "eval_steps_per_second": 0.727,
213
+ "step": 24132
214
+ },
215
+ {
216
+ "epoch": 13.0,
217
+ "learning_rate": 3.375000000000001e-05,
218
+ "loss": 0.1526,
219
+ "step": 26143
220
+ },
221
+ {
222
+ "epoch": 13.0,
223
+ "eval_loss": 0.14447805285453796,
224
+ "eval_runtime": 70.1252,
225
+ "eval_samples_per_second": 5.747,
226
+ "eval_steps_per_second": 0.727,
227
+ "step": 26143
228
+ },
229
+ {
230
+ "epoch": 14.0,
231
+ "learning_rate": 3.2500000000000004e-05,
232
+ "loss": 0.1398,
233
+ "step": 28154
234
+ },
235
+ {
236
+ "epoch": 14.0,
237
+ "eval_loss": 0.13342420756816864,
238
+ "eval_runtime": 70.1196,
239
+ "eval_samples_per_second": 5.747,
240
+ "eval_steps_per_second": 0.727,
241
+ "step": 28154
242
+ },
243
+ {
244
+ "epoch": 15.0,
245
+ "learning_rate": 3.125e-05,
246
+ "loss": 0.1285,
247
+ "step": 30165
248
+ },
249
+ {
250
+ "epoch": 15.0,
251
+ "eval_loss": 0.12114470452070236,
252
+ "eval_runtime": 70.2112,
253
+ "eval_samples_per_second": 5.74,
254
+ "eval_steps_per_second": 0.726,
255
+ "step": 30165
256
+ },
257
+ {
258
+ "epoch": 16.0,
259
+ "learning_rate": 3e-05,
260
+ "loss": 0.1187,
261
+ "step": 32176
262
+ },
263
+ {
264
+ "epoch": 16.0,
265
+ "eval_loss": 0.11447372287511826,
266
+ "eval_runtime": 70.1257,
267
+ "eval_samples_per_second": 5.747,
268
+ "eval_steps_per_second": 0.727,
269
+ "step": 32176
270
+ },
271
+ {
272
+ "epoch": 17.0,
273
+ "learning_rate": 2.8749999999999997e-05,
274
+ "loss": 0.1104,
275
+ "step": 34187
276
+ },
277
+ {
278
+ "epoch": 17.0,
279
+ "eval_loss": 0.10539893060922623,
280
+ "eval_runtime": 70.1826,
281
+ "eval_samples_per_second": 5.742,
282
+ "eval_steps_per_second": 0.727,
283
+ "step": 34187
284
+ },
285
+ {
286
+ "epoch": 18.0,
287
+ "learning_rate": 2.7500000000000004e-05,
288
+ "loss": 0.1038,
289
+ "step": 36198
290
+ },
291
+ {
292
+ "epoch": 18.0,
293
+ "eval_loss": 0.09906744956970215,
294
+ "eval_runtime": 70.117,
295
+ "eval_samples_per_second": 5.748,
296
+ "eval_steps_per_second": 0.727,
297
+ "step": 36198
298
+ },
299
+ {
300
+ "epoch": 19.0,
301
+ "learning_rate": 2.625e-05,
302
+ "loss": 0.0974,
303
+ "step": 38209
304
+ },
305
+ {
306
+ "epoch": 19.0,
307
+ "eval_loss": 0.09452048689126968,
308
+ "eval_runtime": 70.1925,
309
+ "eval_samples_per_second": 5.741,
310
+ "eval_steps_per_second": 0.727,
311
+ "step": 38209
312
+ },
313
+ {
314
+ "epoch": 20.0,
315
+ "learning_rate": 2.5e-05,
316
+ "loss": 0.0927,
317
+ "step": 40220
318
+ },
319
+ {
320
+ "epoch": 20.0,
321
+ "eval_loss": 0.09014962613582611,
322
+ "eval_runtime": 69.9849,
323
+ "eval_samples_per_second": 5.758,
324
+ "eval_steps_per_second": 0.729,
325
+ "step": 40220
326
+ },
327
+ {
328
+ "epoch": 21.0,
329
+ "learning_rate": 2.375e-05,
330
+ "loss": 0.0878,
331
+ "step": 42231
332
+ },
333
+ {
334
+ "epoch": 21.0,
335
+ "eval_loss": 0.08503083884716034,
336
+ "eval_runtime": 70.1728,
337
+ "eval_samples_per_second": 5.743,
338
+ "eval_steps_per_second": 0.727,
339
+ "step": 42231
340
+ },
341
+ {
342
+ "epoch": 22.0,
343
+ "learning_rate": 2.25e-05,
344
+ "loss": 0.0838,
345
+ "step": 44242
346
+ },
347
+ {
348
+ "epoch": 22.0,
349
+ "eval_loss": 0.0820975974202156,
350
+ "eval_runtime": 70.0791,
351
+ "eval_samples_per_second": 5.751,
352
+ "eval_steps_per_second": 0.728,
353
+ "step": 44242
354
+ },
355
+ {
356
+ "epoch": 23.0,
357
+ "learning_rate": 2.125e-05,
358
+ "loss": 0.0801,
359
+ "step": 46253
360
+ },
361
+ {
362
+ "epoch": 23.0,
363
+ "eval_loss": 0.0777197927236557,
364
+ "eval_runtime": 69.9961,
365
+ "eval_samples_per_second": 5.757,
366
+ "eval_steps_per_second": 0.729,
367
+ "step": 46253
368
+ },
369
+ {
370
+ "epoch": 24.0,
371
+ "learning_rate": 2e-05,
372
+ "loss": 0.0775,
373
+ "step": 48264
374
+ },
375
+ {
376
+ "epoch": 24.0,
377
+ "eval_loss": 0.0748789981007576,
378
+ "eval_runtime": 69.905,
379
+ "eval_samples_per_second": 5.765,
380
+ "eval_steps_per_second": 0.73,
381
+ "step": 48264
382
+ },
383
+ {
384
+ "epoch": 25.0,
385
+ "learning_rate": 1.8750000000000002e-05,
386
+ "loss": 0.0751,
387
+ "step": 50275
388
+ },
389
+ {
390
+ "epoch": 25.0,
391
+ "eval_loss": 0.0729849636554718,
392
+ "eval_runtime": 70.0915,
393
+ "eval_samples_per_second": 5.75,
394
+ "eval_steps_per_second": 0.728,
395
+ "step": 50275
396
+ },
397
+ {
398
+ "epoch": 26.0,
399
+ "learning_rate": 1.75e-05,
400
+ "loss": 0.0727,
401
+ "step": 52286
402
+ },
403
+ {
404
+ "epoch": 26.0,
405
+ "eval_loss": 0.0698952004313469,
406
+ "eval_runtime": 70.0781,
407
+ "eval_samples_per_second": 5.751,
408
+ "eval_steps_per_second": 0.728,
409
+ "step": 52286
410
+ },
411
+ {
412
+ "epoch": 27.0,
413
+ "learning_rate": 1.6250000000000002e-05,
414
+ "loss": 0.0706,
415
+ "step": 54297
416
+ },
417
+ {
418
+ "epoch": 27.0,
419
+ "eval_loss": 0.06760543584823608,
420
+ "eval_runtime": 69.9618,
421
+ "eval_samples_per_second": 5.76,
422
+ "eval_steps_per_second": 0.729,
423
+ "step": 54297
424
+ },
425
+ {
426
+ "epoch": 28.0,
427
+ "learning_rate": 1.5e-05,
428
+ "loss": 0.0691,
429
+ "step": 56308
430
+ },
431
+ {
432
+ "epoch": 28.0,
433
+ "eval_loss": 0.06610006093978882,
434
+ "eval_runtime": 70.1085,
435
+ "eval_samples_per_second": 5.748,
436
+ "eval_steps_per_second": 0.727,
437
+ "step": 56308
438
+ },
439
+ {
440
+ "epoch": 29.0,
441
+ "learning_rate": 1.3750000000000002e-05,
442
+ "loss": 0.0678,
443
+ "step": 58319
444
+ },
445
+ {
446
+ "epoch": 29.0,
447
+ "eval_loss": 0.06433883309364319,
448
+ "eval_runtime": 70.1363,
449
+ "eval_samples_per_second": 5.746,
450
+ "eval_steps_per_second": 0.727,
451
+ "step": 58319
452
+ },
453
+ {
454
+ "epoch": 30.0,
455
+ "learning_rate": 1.25e-05,
456
+ "loss": 0.0666,
457
+ "step": 60330
458
+ },
459
+ {
460
+ "epoch": 30.0,
461
+ "eval_loss": 0.06277326494455338,
462
+ "eval_runtime": 70.0925,
463
+ "eval_samples_per_second": 5.75,
464
+ "eval_steps_per_second": 0.728,
465
+ "step": 60330
466
+ },
467
+ {
468
+ "epoch": 31.0,
469
+ "learning_rate": 1.125e-05,
470
+ "loss": 0.0652,
471
+ "step": 62341
472
+ },
473
+ {
474
+ "epoch": 31.0,
475
+ "eval_loss": 0.06192418932914734,
476
+ "eval_runtime": 69.9357,
477
+ "eval_samples_per_second": 5.762,
478
+ "eval_steps_per_second": 0.729,
479
+ "step": 62341
480
+ },
481
+ {
482
+ "epoch": 32.0,
483
+ "learning_rate": 1e-05,
484
+ "loss": 0.0644,
485
+ "step": 64352
486
+ },
487
+ {
488
+ "epoch": 32.0,
489
+ "eval_loss": 0.0610126368701458,
490
+ "eval_runtime": 70.061,
491
+ "eval_samples_per_second": 5.752,
492
+ "eval_steps_per_second": 0.728,
493
+ "step": 64352
494
+ },
495
+ {
496
+ "epoch": 33.0,
497
+ "learning_rate": 8.75e-06,
498
+ "loss": 0.0635,
499
+ "step": 66363
500
+ },
501
+ {
502
+ "epoch": 33.0,
503
+ "eval_loss": 0.060028236359357834,
504
+ "eval_runtime": 69.9253,
505
+ "eval_samples_per_second": 5.763,
506
+ "eval_steps_per_second": 0.729,
507
+ "step": 66363
508
+ },
509
+ {
510
+ "epoch": 34.0,
511
+ "learning_rate": 7.5e-06,
512
+ "loss": 0.0629,
513
+ "step": 68374
514
+ },
515
+ {
516
+ "epoch": 34.0,
517
+ "eval_loss": 0.05925382673740387,
518
+ "eval_runtime": 69.9042,
519
+ "eval_samples_per_second": 5.765,
520
+ "eval_steps_per_second": 0.73,
521
+ "step": 68374
522
+ },
523
+ {
524
+ "epoch": 35.0,
525
+ "learning_rate": 6.25e-06,
526
+ "loss": 0.0622,
527
+ "step": 70385
528
+ },
529
+ {
530
+ "epoch": 35.0,
531
+ "eval_loss": 0.05860263481736183,
532
+ "eval_runtime": 69.8706,
533
+ "eval_samples_per_second": 5.768,
534
+ "eval_steps_per_second": 0.73,
535
+ "step": 70385
536
+ },
537
+ {
538
+ "epoch": 36.0,
539
+ "learning_rate": 5e-06,
540
+ "loss": 0.0616,
541
+ "step": 72396
542
+ },
543
+ {
544
+ "epoch": 36.0,
545
+ "eval_loss": 0.05808304622769356,
546
+ "eval_runtime": 69.9999,
547
+ "eval_samples_per_second": 5.757,
548
+ "eval_steps_per_second": 0.729,
549
+ "step": 72396
550
+ },
551
+ {
552
+ "epoch": 37.0,
553
+ "learning_rate": 3.75e-06,
554
+ "loss": 0.061,
555
+ "step": 74407
556
+ },
557
+ {
558
+ "epoch": 37.0,
559
+ "eval_loss": 0.057825859636068344,
560
+ "eval_runtime": 69.9835,
561
+ "eval_samples_per_second": 5.758,
562
+ "eval_steps_per_second": 0.729,
563
+ "step": 74407
564
+ },
565
+ {
566
+ "epoch": 38.0,
567
+ "learning_rate": 2.5e-06,
568
+ "loss": 0.0605,
569
+ "step": 76418
570
+ },
571
+ {
572
+ "epoch": 38.0,
573
+ "eval_loss": 0.057523321360349655,
574
+ "eval_runtime": 69.9943,
575
+ "eval_samples_per_second": 5.758,
576
+ "eval_steps_per_second": 0.729,
577
+ "step": 76418
578
+ },
579
+ {
580
+ "epoch": 39.0,
581
+ "learning_rate": 1.25e-06,
582
+ "loss": 0.06,
583
+ "step": 78429
584
+ },
585
+ {
586
+ "epoch": 39.0,
587
+ "eval_loss": 0.05731285735964775,
588
+ "eval_runtime": 70.0036,
589
+ "eval_samples_per_second": 5.757,
590
+ "eval_steps_per_second": 0.729,
591
+ "step": 78429
592
+ },
593
+ {
594
+ "epoch": 40.0,
595
+ "learning_rate": 0.0,
596
+ "loss": 0.0595,
597
+ "step": 80440
598
+ },
599
+ {
600
+ "epoch": 40.0,
601
+ "eval_loss": 0.05725787207484245,
602
+ "eval_runtime": 69.9176,
603
+ "eval_samples_per_second": 5.764,
604
+ "eval_steps_per_second": 0.729,
605
+ "step": 80440
606
+ }
607
+ ],
608
+ "logging_steps": 500,
609
+ "max_steps": 80440,
610
+ "num_train_epochs": 40,
611
+ "save_steps": 500,
612
+ "total_flos": 9.118285061492736e+17,
613
+ "trial_name": null,
614
+ "trial_params": null
615
+ }
616
+ ```
617
 
618
  ## Model Details
619