kmnis commited on
Commit
0ab6468
·
1 Parent(s): 795f3a0

Training in progress, step 4500, checkpoint

Browse files
last-checkpoint/adapter_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3b074dc89146bd028c0664b132fc46bfc59201185bda06775c72139e7c815a15
3
  size 19744138
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7d78c12c945e5fcb9c639c770a4e4f00430ece4b7fb6cb44feb0d796c2c4df01
3
  size 19744138
last-checkpoint/global_step4500/zero_pp_rank_0_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d6a3e1fa8080e0d3dd9adb7d1dd460a1627c85c0ea1e1f2ace8944dc1d5f80f1
3
+ size 6508458036
last-checkpoint/global_step4500/zero_pp_rank_0_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:482006f522673e84c1d45409be61b4367b41184b60e0d8d618e80d6d75b8336d
3
+ size 29495149
last-checkpoint/global_step4500/zero_pp_rank_1_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c8fe1443b48066be579e8c25114715a4eb2a1c5db5d352ed1bac83763e0928c0
3
+ size 6508458036
last-checkpoint/global_step4500/zero_pp_rank_1_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6f6c0d34c5bf6998556977483a273efe996a01f39142fdb28834075b3e56e329
3
+ size 29495149
last-checkpoint/global_step4500/zero_pp_rank_2_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d586e7c566560f5821dc52156ab57d93c7512a96ed40a143f9e0684eaf1291a1
3
+ size 6508458036
last-checkpoint/global_step4500/zero_pp_rank_2_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f829ce953cc45b3b18fc69256ea4777634df78f3df62d891beb7f460738739a6
3
+ size 29495149
last-checkpoint/global_step4500/zero_pp_rank_3_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:543ac969055a4f583015697c0421202311447ef4ad3c43f13526aee308b7ef60
3
+ size 6508458036
last-checkpoint/global_step4500/zero_pp_rank_3_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f12aceac015ed44c82674f0d8ce445df03745332037e673abe12ca5cf05f99eb
3
+ size 29495149
last-checkpoint/latest CHANGED
@@ -1 +1 @@
1
- global_step4000
 
1
+ global_step4500
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3cda7acc40557204710df648d13c6c64dd3bee9e11d98ca8ec6bf9765f6fd55b
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:94222b0d851841afa61b6aafcea0bff04917dffca319ac60be1b7888470e4549
3
  size 15024
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:df659af25f38deebdca58158c68718a93017141cfb7b33e8079633d427d6debf
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6eb7e27f4429243bc102257eec60825acb13cbb47a2667ad973c02b4b2dc57aa
3
  size 15024
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f89a12d795a7d9929749bfcc711935eda3c929f167285a61a0defe0e6815157d
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0caa8a8996d3cb7ae233c1902bff89eb356bf1170533c424be993eaf5748b7e9
3
  size 15024
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:595bcf6bb327a594b78f46fc654a60578f254399a5961cafc50cf97fe1934fba
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cafc79dc7941f6eb3a98f0b8b14f36db6f3be67545040d9683537528c3545459
3
  size 15024
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 1.6863406408094435,
5
  "eval_steps": 500,
6
- "global_step": 4000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -2407,13 +2407,313 @@
2407
  "learning_rate": 1e-05,
2408
  "loss": 0.6745,
2409
  "step": 4000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2410
  }
2411
  ],
2412
  "logging_steps": 10,
2413
  "max_steps": 5000,
2414
  "num_train_epochs": 3,
2415
  "save_steps": 500,
2416
- "total_flos": 1005625572065280.0,
2417
  "trial_name": null,
2418
  "trial_params": null
2419
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 1.897133220910624,
5
  "eval_steps": 500,
6
+ "global_step": 4500,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
2407
  "learning_rate": 1e-05,
2408
  "loss": 0.6745,
2409
  "step": 4000
2410
+ },
2411
+ {
2412
+ "epoch": 1.69,
2413
+ "learning_rate": 1e-05,
2414
+ "loss": 0.6371,
2415
+ "step": 4010
2416
+ },
2417
+ {
2418
+ "epoch": 1.69,
2419
+ "learning_rate": 1e-05,
2420
+ "loss": 0.6437,
2421
+ "step": 4020
2422
+ },
2423
+ {
2424
+ "epoch": 1.7,
2425
+ "learning_rate": 1e-05,
2426
+ "loss": 0.6366,
2427
+ "step": 4030
2428
+ },
2429
+ {
2430
+ "epoch": 1.7,
2431
+ "learning_rate": 1e-05,
2432
+ "loss": 0.6695,
2433
+ "step": 4040
2434
+ },
2435
+ {
2436
+ "epoch": 1.71,
2437
+ "learning_rate": 1e-05,
2438
+ "loss": 0.6926,
2439
+ "step": 4050
2440
+ },
2441
+ {
2442
+ "epoch": 1.71,
2443
+ "learning_rate": 1e-05,
2444
+ "loss": 0.6313,
2445
+ "step": 4060
2446
+ },
2447
+ {
2448
+ "epoch": 1.72,
2449
+ "learning_rate": 1e-05,
2450
+ "loss": 0.6514,
2451
+ "step": 4070
2452
+ },
2453
+ {
2454
+ "epoch": 1.72,
2455
+ "learning_rate": 1e-05,
2456
+ "loss": 0.6575,
2457
+ "step": 4080
2458
+ },
2459
+ {
2460
+ "epoch": 1.72,
2461
+ "learning_rate": 1e-05,
2462
+ "loss": 0.635,
2463
+ "step": 4090
2464
+ },
2465
+ {
2466
+ "epoch": 1.73,
2467
+ "learning_rate": 1e-05,
2468
+ "loss": 0.6485,
2469
+ "step": 4100
2470
+ },
2471
+ {
2472
+ "epoch": 1.73,
2473
+ "learning_rate": 1e-05,
2474
+ "loss": 0.6238,
2475
+ "step": 4110
2476
+ },
2477
+ {
2478
+ "epoch": 1.74,
2479
+ "learning_rate": 1e-05,
2480
+ "loss": 0.69,
2481
+ "step": 4120
2482
+ },
2483
+ {
2484
+ "epoch": 1.74,
2485
+ "learning_rate": 1e-05,
2486
+ "loss": 0.6913,
2487
+ "step": 4130
2488
+ },
2489
+ {
2490
+ "epoch": 1.75,
2491
+ "learning_rate": 1e-05,
2492
+ "loss": 0.6279,
2493
+ "step": 4140
2494
+ },
2495
+ {
2496
+ "epoch": 1.75,
2497
+ "learning_rate": 1e-05,
2498
+ "loss": 0.6726,
2499
+ "step": 4150
2500
+ },
2501
+ {
2502
+ "epoch": 1.75,
2503
+ "learning_rate": 1e-05,
2504
+ "loss": 0.6559,
2505
+ "step": 4160
2506
+ },
2507
+ {
2508
+ "epoch": 1.76,
2509
+ "learning_rate": 1e-05,
2510
+ "loss": 0.6728,
2511
+ "step": 4170
2512
+ },
2513
+ {
2514
+ "epoch": 1.76,
2515
+ "learning_rate": 1e-05,
2516
+ "loss": 0.6205,
2517
+ "step": 4180
2518
+ },
2519
+ {
2520
+ "epoch": 1.77,
2521
+ "learning_rate": 1e-05,
2522
+ "loss": 0.6565,
2523
+ "step": 4190
2524
+ },
2525
+ {
2526
+ "epoch": 1.77,
2527
+ "learning_rate": 1e-05,
2528
+ "loss": 0.6342,
2529
+ "step": 4200
2530
+ },
2531
+ {
2532
+ "epoch": 1.77,
2533
+ "learning_rate": 1e-05,
2534
+ "loss": 0.6828,
2535
+ "step": 4210
2536
+ },
2537
+ {
2538
+ "epoch": 1.78,
2539
+ "learning_rate": 1e-05,
2540
+ "loss": 0.6469,
2541
+ "step": 4220
2542
+ },
2543
+ {
2544
+ "epoch": 1.78,
2545
+ "learning_rate": 1e-05,
2546
+ "loss": 0.6843,
2547
+ "step": 4230
2548
+ },
2549
+ {
2550
+ "epoch": 1.79,
2551
+ "learning_rate": 1e-05,
2552
+ "loss": 0.6444,
2553
+ "step": 4240
2554
+ },
2555
+ {
2556
+ "epoch": 1.79,
2557
+ "learning_rate": 1e-05,
2558
+ "loss": 0.6292,
2559
+ "step": 4250
2560
+ },
2561
+ {
2562
+ "epoch": 1.8,
2563
+ "learning_rate": 1e-05,
2564
+ "loss": 0.6352,
2565
+ "step": 4260
2566
+ },
2567
+ {
2568
+ "epoch": 1.8,
2569
+ "learning_rate": 1e-05,
2570
+ "loss": 0.6106,
2571
+ "step": 4270
2572
+ },
2573
+ {
2574
+ "epoch": 1.8,
2575
+ "learning_rate": 1e-05,
2576
+ "loss": 0.6002,
2577
+ "step": 4280
2578
+ },
2579
+ {
2580
+ "epoch": 1.81,
2581
+ "learning_rate": 1e-05,
2582
+ "loss": 0.6659,
2583
+ "step": 4290
2584
+ },
2585
+ {
2586
+ "epoch": 1.81,
2587
+ "learning_rate": 1e-05,
2588
+ "loss": 0.6178,
2589
+ "step": 4300
2590
+ },
2591
+ {
2592
+ "epoch": 1.82,
2593
+ "learning_rate": 1e-05,
2594
+ "loss": 0.6645,
2595
+ "step": 4310
2596
+ },
2597
+ {
2598
+ "epoch": 1.82,
2599
+ "learning_rate": 1e-05,
2600
+ "loss": 0.6427,
2601
+ "step": 4320
2602
+ },
2603
+ {
2604
+ "epoch": 1.83,
2605
+ "learning_rate": 1e-05,
2606
+ "loss": 0.692,
2607
+ "step": 4330
2608
+ },
2609
+ {
2610
+ "epoch": 1.83,
2611
+ "learning_rate": 1e-05,
2612
+ "loss": 0.6473,
2613
+ "step": 4340
2614
+ },
2615
+ {
2616
+ "epoch": 1.83,
2617
+ "learning_rate": 1e-05,
2618
+ "loss": 0.6061,
2619
+ "step": 4350
2620
+ },
2621
+ {
2622
+ "epoch": 1.84,
2623
+ "learning_rate": 1e-05,
2624
+ "loss": 0.6521,
2625
+ "step": 4360
2626
+ },
2627
+ {
2628
+ "epoch": 1.84,
2629
+ "learning_rate": 1e-05,
2630
+ "loss": 0.6438,
2631
+ "step": 4370
2632
+ },
2633
+ {
2634
+ "epoch": 1.85,
2635
+ "learning_rate": 1e-05,
2636
+ "loss": 0.6158,
2637
+ "step": 4380
2638
+ },
2639
+ {
2640
+ "epoch": 1.85,
2641
+ "learning_rate": 1e-05,
2642
+ "loss": 0.6613,
2643
+ "step": 4390
2644
+ },
2645
+ {
2646
+ "epoch": 1.85,
2647
+ "learning_rate": 1e-05,
2648
+ "loss": 0.5958,
2649
+ "step": 4400
2650
+ },
2651
+ {
2652
+ "epoch": 1.86,
2653
+ "learning_rate": 1e-05,
2654
+ "loss": 0.6013,
2655
+ "step": 4410
2656
+ },
2657
+ {
2658
+ "epoch": 1.86,
2659
+ "learning_rate": 1e-05,
2660
+ "loss": 0.6591,
2661
+ "step": 4420
2662
+ },
2663
+ {
2664
+ "epoch": 1.87,
2665
+ "learning_rate": 1e-05,
2666
+ "loss": 0.6364,
2667
+ "step": 4430
2668
+ },
2669
+ {
2670
+ "epoch": 1.87,
2671
+ "learning_rate": 1e-05,
2672
+ "loss": 0.6247,
2673
+ "step": 4440
2674
+ },
2675
+ {
2676
+ "epoch": 1.88,
2677
+ "learning_rate": 1e-05,
2678
+ "loss": 0.6,
2679
+ "step": 4450
2680
+ },
2681
+ {
2682
+ "epoch": 1.88,
2683
+ "learning_rate": 1e-05,
2684
+ "loss": 0.7082,
2685
+ "step": 4460
2686
+ },
2687
+ {
2688
+ "epoch": 1.88,
2689
+ "learning_rate": 1e-05,
2690
+ "loss": 0.6476,
2691
+ "step": 4470
2692
+ },
2693
+ {
2694
+ "epoch": 1.89,
2695
+ "learning_rate": 1e-05,
2696
+ "loss": 0.6046,
2697
+ "step": 4480
2698
+ },
2699
+ {
2700
+ "epoch": 1.89,
2701
+ "learning_rate": 1e-05,
2702
+ "loss": 0.7318,
2703
+ "step": 4490
2704
+ },
2705
+ {
2706
+ "epoch": 1.9,
2707
+ "learning_rate": 1e-05,
2708
+ "loss": 0.6637,
2709
+ "step": 4500
2710
  }
2711
  ],
2712
  "logging_steps": 10,
2713
  "max_steps": 5000,
2714
  "num_train_epochs": 3,
2715
  "save_steps": 500,
2716
+ "total_flos": 1131517606625280.0,
2717
  "trial_name": null,
2718
  "trial_params": null
2719
  }