farmery commited on
Commit
95e40fd
·
verified ·
1 Parent(s): cfc4bb2

Training in progress, step 342, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:76ddad9fbeff874dda8ca1839e7bb1197b79f011a28582bf37a51641b4be011a
3
  size 1521616
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:368e00a32e5e67904cdc68fad7b0d3ebbf6aa48a43908471978f2a2f044c71d6
3
  size 1521616
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ae8d18d29e4ff91cb5bdd564154a39689a95358bc39817fda9038cf84430b7ea
3
  size 3108666
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:13f99e35e1cd72dd67928139d82e8f270376e333cc058eeeca058766e79fd6da
3
  size 3108666
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:38556c732afc59cb1a699286598dbc47ed9ce8c5e433b5847ecb635b371d31ad
3
  size 14960
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b725e55dc7e3cfcd2470e0b973a56ed83568003bb225faa55cd6d2bf9770b30b
3
  size 14960
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0fccdf7b83d8154c1ea0818dc3f4385f34041c8c4464b2b411fb0c6aa67d41b4
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f3322560e39a902081358f17ff4055da3283ebe3e2fa1e6b657dab85c86236f7
3
  size 15024
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c77677d38c53addb49d40cbf8a8c4cd2947a26f613e7431ff240294c5c6aea18
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ea66678ca299dd2b3d155e3c39e632e66dc28be0c18e9664e7c6b137c3f76689
3
  size 15024
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9f16ecca0e5c37740bc12b846283c8173114d94f01e5134b62290fc0f5663c8a
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f314b3eeeb0bd949fe539d551bea83f6ef4f1ddb092c681433bf0279c6b7fe91
3
  size 15024
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:96480ebcfed4000c6ecf039795063cb715ce6511f645a848a58f5db20e8ed45b
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0aa55a29e9da8bc1ccf97b1a76ccd03130ba0539481d5a33300527a24e5c40c2
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": 6.472244739532471,
3
  "best_model_checkpoint": "miner_id_24/checkpoint-300",
4
- "epoch": 0.07734388217948615,
5
  "eval_steps": 25,
6
- "global_step": 300,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -2211,6 +2211,308 @@
2211
  "eval_samples_per_second": 249.929,
2212
  "eval_steps_per_second": 64.982,
2213
  "step": 300
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2214
  }
2215
  ],
2216
  "logging_steps": 1,
@@ -2234,12 +2536,12 @@
2234
  "should_evaluate": false,
2235
  "should_log": false,
2236
  "should_save": true,
2237
- "should_training_stop": false
2238
  },
2239
  "attributes": {}
2240
  }
2241
  },
2242
- "total_flos": 2787247007989760.0,
2243
  "train_batch_size": 1,
2244
  "trial_name": null,
2245
  "trial_params": null
 
1
  {
2
  "best_metric": 6.472244739532471,
3
  "best_model_checkpoint": "miner_id_24/checkpoint-300",
4
+ "epoch": 0.08817202568461421,
5
  "eval_steps": 25,
6
+ "global_step": 342,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
2211
  "eval_samples_per_second": 249.929,
2212
  "eval_steps_per_second": 64.982,
2213
  "step": 300
2214
+ },
2215
+ {
2216
+ "epoch": 0.07760169512008443,
2217
+ "grad_norm": 1.2554875612258911,
2218
+ "learning_rate": 1.3404895720259053e-05,
2219
+ "loss": 7.3701,
2220
+ "step": 301
2221
+ },
2222
+ {
2223
+ "epoch": 0.07785950806068272,
2224
+ "grad_norm": 0.8463560342788696,
2225
+ "learning_rate": 1.3242829026768597e-05,
2226
+ "loss": 6.4277,
2227
+ "step": 302
2228
+ },
2229
+ {
2230
+ "epoch": 0.078117321001281,
2231
+ "grad_norm": 0.644782304763794,
2232
+ "learning_rate": 1.3084569796502682e-05,
2233
+ "loss": 6.356,
2234
+ "step": 303
2235
+ },
2236
+ {
2237
+ "epoch": 0.0783751339418793,
2238
+ "grad_norm": 0.7398347854614258,
2239
+ "learning_rate": 1.293013245970609e-05,
2240
+ "loss": 6.2324,
2241
+ "step": 304
2242
+ },
2243
+ {
2244
+ "epoch": 0.07863294688247759,
2245
+ "grad_norm": 0.5900807976722717,
2246
+ "learning_rate": 1.2779531098139333e-05,
2247
+ "loss": 6.1167,
2248
+ "step": 305
2249
+ },
2250
+ {
2251
+ "epoch": 0.07889075982307588,
2252
+ "grad_norm": 0.6693830490112305,
2253
+ "learning_rate": 1.263277944379459e-05,
2254
+ "loss": 6.2377,
2255
+ "step": 306
2256
+ },
2257
+ {
2258
+ "epoch": 0.07914857276367415,
2259
+ "grad_norm": 0.6465635895729065,
2260
+ "learning_rate": 1.248989087764366e-05,
2261
+ "loss": 6.1853,
2262
+ "step": 307
2263
+ },
2264
+ {
2265
+ "epoch": 0.07940638570427244,
2266
+ "grad_norm": 0.6336793303489685,
2267
+ "learning_rate": 1.2350878428417839e-05,
2268
+ "loss": 5.9979,
2269
+ "step": 308
2270
+ },
2271
+ {
2272
+ "epoch": 0.07966419864487073,
2273
+ "grad_norm": 0.6897872090339661,
2274
+ "learning_rate": 1.2215754771419997e-05,
2275
+ "loss": 6.0263,
2276
+ "step": 309
2277
+ },
2278
+ {
2279
+ "epoch": 0.07992201158546902,
2280
+ "grad_norm": 0.7997320294380188,
2281
+ "learning_rate": 1.2084532227368761e-05,
2282
+ "loss": 6.3959,
2283
+ "step": 310
2284
+ },
2285
+ {
2286
+ "epoch": 0.08017982452606731,
2287
+ "grad_norm": 1.0460010766983032,
2288
+ "learning_rate": 1.1957222761275149e-05,
2289
+ "loss": 6.7064,
2290
+ "step": 311
2291
+ },
2292
+ {
2293
+ "epoch": 0.08043763746666559,
2294
+ "grad_norm": 1.4831907749176025,
2295
+ "learning_rate": 1.183383798135157e-05,
2296
+ "loss": 6.8091,
2297
+ "step": 312
2298
+ },
2299
+ {
2300
+ "epoch": 0.08069545040726388,
2301
+ "grad_norm": 2.0493836402893066,
2302
+ "learning_rate": 1.171438913795338e-05,
2303
+ "loss": 7.5173,
2304
+ "step": 313
2305
+ },
2306
+ {
2307
+ "epoch": 0.08095326334786217,
2308
+ "grad_norm": 1.0544261932373047,
2309
+ "learning_rate": 1.1598887122553061e-05,
2310
+ "loss": 6.7366,
2311
+ "step": 314
2312
+ },
2313
+ {
2314
+ "epoch": 0.08121107628846046,
2315
+ "grad_norm": 0.7413960099220276,
2316
+ "learning_rate": 1.1487342466747112e-05,
2317
+ "loss": 6.2798,
2318
+ "step": 315
2319
+ },
2320
+ {
2321
+ "epoch": 0.08146888922905875,
2322
+ "grad_norm": 0.6017177700996399,
2323
+ "learning_rate": 1.137976534129579e-05,
2324
+ "loss": 6.3163,
2325
+ "step": 316
2326
+ },
2327
+ {
2328
+ "epoch": 0.08172670216965702,
2329
+ "grad_norm": 0.6235430240631104,
2330
+ "learning_rate": 1.127616555519573e-05,
2331
+ "loss": 6.2398,
2332
+ "step": 317
2333
+ },
2334
+ {
2335
+ "epoch": 0.08198451511025531,
2336
+ "grad_norm": 0.6497268676757812,
2337
+ "learning_rate": 1.1176552554785504e-05,
2338
+ "loss": 6.2297,
2339
+ "step": 318
2340
+ },
2341
+ {
2342
+ "epoch": 0.0822423280508536,
2343
+ "grad_norm": 0.6185296177864075,
2344
+ "learning_rate": 1.1080935422884358e-05,
2345
+ "loss": 6.0099,
2346
+ "step": 319
2347
+ },
2348
+ {
2349
+ "epoch": 0.08250014099145189,
2350
+ "grad_norm": 0.7389028668403625,
2351
+ "learning_rate": 1.0989322877963985e-05,
2352
+ "loss": 5.8818,
2353
+ "step": 320
2354
+ },
2355
+ {
2356
+ "epoch": 0.08275795393205018,
2357
+ "grad_norm": 0.7540916204452515,
2358
+ "learning_rate": 1.0901723273353597e-05,
2359
+ "loss": 6.1902,
2360
+ "step": 321
2361
+ },
2362
+ {
2363
+ "epoch": 0.08301576687264846,
2364
+ "grad_norm": 0.7044777274131775,
2365
+ "learning_rate": 1.0818144596478224e-05,
2366
+ "loss": 6.1747,
2367
+ "step": 322
2368
+ },
2369
+ {
2370
+ "epoch": 0.08327357981324675,
2371
+ "grad_norm": 0.9389222264289856,
2372
+ "learning_rate": 1.0738594468130452e-05,
2373
+ "loss": 6.479,
2374
+ "step": 323
2375
+ },
2376
+ {
2377
+ "epoch": 0.08353139275384504,
2378
+ "grad_norm": 1.1994636058807373,
2379
+ "learning_rate": 1.0663080141775504e-05,
2380
+ "loss": 6.6693,
2381
+ "step": 324
2382
+ },
2383
+ {
2384
+ "epoch": 0.08378920569444333,
2385
+ "grad_norm": 3.2903223037719727,
2386
+ "learning_rate": 1.0591608502889928e-05,
2387
+ "loss": 6.1876,
2388
+ "step": 325
2389
+ },
2390
+ {
2391
+ "epoch": 0.08378920569444333,
2392
+ "eval_loss": 6.461916446685791,
2393
+ "eval_runtime": 0.1956,
2394
+ "eval_samples_per_second": 255.68,
2395
+ "eval_steps_per_second": 66.477,
2396
+ "step": 325
2397
+ },
2398
+ {
2399
+ "epoch": 0.08404701863504162,
2400
+ "grad_norm": 1.2868388891220093,
2401
+ "learning_rate": 1.0524186068333692e-05,
2402
+ "loss": 7.4553,
2403
+ "step": 326
2404
+ },
2405
+ {
2406
+ "epoch": 0.0843048315756399,
2407
+ "grad_norm": 0.8954585194587708,
2408
+ "learning_rate": 1.046081898575604e-05,
2409
+ "loss": 6.399,
2410
+ "step": 327
2411
+ },
2412
+ {
2413
+ "epoch": 0.08456264451623818,
2414
+ "grad_norm": 0.612129807472229,
2415
+ "learning_rate": 1.04015130330349e-05,
2416
+ "loss": 6.4022,
2417
+ "step": 328
2418
+ },
2419
+ {
2420
+ "epoch": 0.08482045745683647,
2421
+ "grad_norm": 0.6739881634712219,
2422
+ "learning_rate": 1.0346273617750057e-05,
2423
+ "loss": 6.0706,
2424
+ "step": 329
2425
+ },
2426
+ {
2427
+ "epoch": 0.08507827039743476,
2428
+ "grad_norm": 0.7707552909851074,
2429
+ "learning_rate": 1.0295105776690108e-05,
2430
+ "loss": 6.0031,
2431
+ "step": 330
2432
+ },
2433
+ {
2434
+ "epoch": 0.08533608333803305,
2435
+ "grad_norm": 0.7329960465431213,
2436
+ "learning_rate": 1.0248014175393177e-05,
2437
+ "loss": 6.1073,
2438
+ "step": 331
2439
+ },
2440
+ {
2441
+ "epoch": 0.08559389627863133,
2442
+ "grad_norm": 0.7465705275535583,
2443
+ "learning_rate": 1.0205003107721506e-05,
2444
+ "loss": 6.0385,
2445
+ "step": 332
2446
+ },
2447
+ {
2448
+ "epoch": 0.08585170921922962,
2449
+ "grad_norm": 0.6375886797904968,
2450
+ "learning_rate": 1.0166076495469963e-05,
2451
+ "loss": 6.0305,
2452
+ "step": 333
2453
+ },
2454
+ {
2455
+ "epoch": 0.08610952215982791,
2456
+ "grad_norm": 0.7056543231010437,
2457
+ "learning_rate": 1.0131237888008412e-05,
2458
+ "loss": 6.1335,
2459
+ "step": 334
2460
+ },
2461
+ {
2462
+ "epoch": 0.0863673351004262,
2463
+ "grad_norm": 0.8683858513832092,
2464
+ "learning_rate": 1.0100490461958109e-05,
2465
+ "loss": 6.3272,
2466
+ "step": 335
2467
+ },
2468
+ {
2469
+ "epoch": 0.08662514804102449,
2470
+ "grad_norm": 0.9683743119239807,
2471
+ "learning_rate": 1.0073837020902033e-05,
2472
+ "loss": 6.5318,
2473
+ "step": 336
2474
+ },
2475
+ {
2476
+ "epoch": 0.08688296098162278,
2477
+ "grad_norm": 1.4957187175750732,
2478
+ "learning_rate": 1.0051279995129273e-05,
2479
+ "loss": 6.8366,
2480
+ "step": 337
2481
+ },
2482
+ {
2483
+ "epoch": 0.08714077392222105,
2484
+ "grad_norm": 1.5700278282165527,
2485
+ "learning_rate": 1.0032821441413394e-05,
2486
+ "loss": 7.7112,
2487
+ "step": 338
2488
+ },
2489
+ {
2490
+ "epoch": 0.08739858686281934,
2491
+ "grad_norm": 1.0813219547271729,
2492
+ "learning_rate": 1.0018463042824957e-05,
2493
+ "loss": 6.923,
2494
+ "step": 339
2495
+ },
2496
+ {
2497
+ "epoch": 0.08765639980341763,
2498
+ "grad_norm": 0.6960993409156799,
2499
+ "learning_rate": 1.0008206108577992e-05,
2500
+ "loss": 6.4607,
2501
+ "step": 340
2502
+ },
2503
+ {
2504
+ "epoch": 0.08791421274401592,
2505
+ "grad_norm": 0.6256797313690186,
2506
+ "learning_rate": 1.0002051573910671e-05,
2507
+ "loss": 6.3162,
2508
+ "step": 341
2509
+ },
2510
+ {
2511
+ "epoch": 0.08817202568461421,
2512
+ "grad_norm": 0.5889110565185547,
2513
+ "learning_rate": 1e-05,
2514
+ "loss": 6.0812,
2515
+ "step": 342
2516
  }
2517
  ],
2518
  "logging_steps": 1,
 
2536
  "should_evaluate": false,
2537
  "should_log": false,
2538
  "should_save": true,
2539
+ "should_training_stop": true
2540
  },
2541
  "attributes": {}
2542
  }
2543
  },
2544
+ "total_flos": 3177646474133504.0,
2545
  "train_batch_size": 1,
2546
  "trial_name": null,
2547
  "trial_params": null