DuongTrongChi commited on
Commit
9fcd483
1 Parent(s): b5e47ca

Training in progress, step 385, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d8c0d97e659bc4443647ca4bb5214a3fd172de8cb97b76926060afaa1ce02b68
3
  size 100198584
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:636cede6b3e50ff1861e2dc1273f6134e7156308fc6d82d873c1502b6d314e41
3
  size 100198584
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:86c43689ac0cec244a4d403549cfe7f198290515d01fcc3fc5c91fcfe4ca6318
3
  size 50675604
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dd537c19ebae2b08dbd0494d960639a61ecf609956482e67a546d6c169289698
3
  size 50675604
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ddd284836fe01fb8da0367ba9e958c4168cceec88a256cdb9badb53073d3392b
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2475b14d80337b5e82132cfdcc36b578188577f583a180c04ac0c29d7bf259cc
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.5289014701853711,
5
  "eval_steps": 500,
6
- "global_step": 362,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -2541,6 +2541,167 @@
2541
  "learning_rate": 1.1027397260273974e-05,
2542
  "loss": 1.1608,
2543
  "step": 362
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2544
  }
2545
  ],
2546
  "logging_steps": 1,
@@ -2560,7 +2721,7 @@
2560
  "attributes": {}
2561
  }
2562
  },
2563
- "total_flos": 4.077009720080179e+17,
2564
  "train_batch_size": 4,
2565
  "trial_name": null,
2566
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.5625057072413479,
5
  "eval_steps": 500,
6
+ "global_step": 385,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
2541
  "learning_rate": 1.1027397260273974e-05,
2542
  "loss": 1.1608,
2543
  "step": 362
2544
+ },
2545
+ {
2546
+ "epoch": 0.5303625239704136,
2547
+ "grad_norm": 0.1049429252743721,
2548
+ "learning_rate": 1.0993150684931509e-05,
2549
+ "loss": 1.2217,
2550
+ "step": 363
2551
+ },
2552
+ {
2553
+ "epoch": 0.5318235777554561,
2554
+ "grad_norm": 0.10289900004863739,
2555
+ "learning_rate": 1.0958904109589042e-05,
2556
+ "loss": 1.1854,
2557
+ "step": 364
2558
+ },
2559
+ {
2560
+ "epoch": 0.5332846315404985,
2561
+ "grad_norm": 0.105230912566185,
2562
+ "learning_rate": 1.0924657534246576e-05,
2563
+ "loss": 1.2872,
2564
+ "step": 365
2565
+ },
2566
+ {
2567
+ "epoch": 0.534745685325541,
2568
+ "grad_norm": 0.10501307249069214,
2569
+ "learning_rate": 1.089041095890411e-05,
2570
+ "loss": 1.2098,
2571
+ "step": 366
2572
+ },
2573
+ {
2574
+ "epoch": 0.5362067391105835,
2575
+ "grad_norm": 0.11315510421991348,
2576
+ "learning_rate": 1.0856164383561644e-05,
2577
+ "loss": 1.2473,
2578
+ "step": 367
2579
+ },
2580
+ {
2581
+ "epoch": 0.5376677928956259,
2582
+ "grad_norm": 0.10925040394067764,
2583
+ "learning_rate": 1.082191780821918e-05,
2584
+ "loss": 1.2147,
2585
+ "step": 368
2586
+ },
2587
+ {
2588
+ "epoch": 0.5391288466806684,
2589
+ "grad_norm": 0.11574160307645798,
2590
+ "learning_rate": 1.0787671232876714e-05,
2591
+ "loss": 1.1954,
2592
+ "step": 369
2593
+ },
2594
+ {
2595
+ "epoch": 0.5405899004657109,
2596
+ "grad_norm": 0.09681655466556549,
2597
+ "learning_rate": 1.0753424657534248e-05,
2598
+ "loss": 1.2026,
2599
+ "step": 370
2600
+ },
2601
+ {
2602
+ "epoch": 0.5420509542507533,
2603
+ "grad_norm": 0.10630439221858978,
2604
+ "learning_rate": 1.0719178082191782e-05,
2605
+ "loss": 1.1083,
2606
+ "step": 371
2607
+ },
2608
+ {
2609
+ "epoch": 0.5435120080357958,
2610
+ "grad_norm": 0.1086338609457016,
2611
+ "learning_rate": 1.0684931506849316e-05,
2612
+ "loss": 1.2337,
2613
+ "step": 372
2614
+ },
2615
+ {
2616
+ "epoch": 0.5449730618208383,
2617
+ "grad_norm": 0.10121461749076843,
2618
+ "learning_rate": 1.065068493150685e-05,
2619
+ "loss": 1.2983,
2620
+ "step": 373
2621
+ },
2622
+ {
2623
+ "epoch": 0.5464341156058807,
2624
+ "grad_norm": 0.10418357700109482,
2625
+ "learning_rate": 1.0616438356164384e-05,
2626
+ "loss": 1.1552,
2627
+ "step": 374
2628
+ },
2629
+ {
2630
+ "epoch": 0.5478951693909232,
2631
+ "grad_norm": 0.09971540421247482,
2632
+ "learning_rate": 1.0582191780821917e-05,
2633
+ "loss": 1.131,
2634
+ "step": 375
2635
+ },
2636
+ {
2637
+ "epoch": 0.5493562231759657,
2638
+ "grad_norm": 0.09615826606750488,
2639
+ "learning_rate": 1.0547945205479453e-05,
2640
+ "loss": 1.2724,
2641
+ "step": 376
2642
+ },
2643
+ {
2644
+ "epoch": 0.5508172769610081,
2645
+ "grad_norm": 0.11235067993402481,
2646
+ "learning_rate": 1.0513698630136988e-05,
2647
+ "loss": 1.2207,
2648
+ "step": 377
2649
+ },
2650
+ {
2651
+ "epoch": 0.5522783307460506,
2652
+ "grad_norm": 0.12269837409257889,
2653
+ "learning_rate": 1.0479452054794521e-05,
2654
+ "loss": 1.1767,
2655
+ "step": 378
2656
+ },
2657
+ {
2658
+ "epoch": 0.5537393845310931,
2659
+ "grad_norm": 0.11360511928796768,
2660
+ "learning_rate": 1.0445205479452056e-05,
2661
+ "loss": 1.1903,
2662
+ "step": 379
2663
+ },
2664
+ {
2665
+ "epoch": 0.5552004383161355,
2666
+ "grad_norm": 0.11293426156044006,
2667
+ "learning_rate": 1.0410958904109589e-05,
2668
+ "loss": 1.231,
2669
+ "step": 380
2670
+ },
2671
+ {
2672
+ "epoch": 0.556661492101178,
2673
+ "grad_norm": 0.10496404767036438,
2674
+ "learning_rate": 1.0376712328767124e-05,
2675
+ "loss": 1.2697,
2676
+ "step": 381
2677
+ },
2678
+ {
2679
+ "epoch": 0.5581225458862205,
2680
+ "grad_norm": 0.09859599173069,
2681
+ "learning_rate": 1.0342465753424657e-05,
2682
+ "loss": 1.3125,
2683
+ "step": 382
2684
+ },
2685
+ {
2686
+ "epoch": 0.5595835996712629,
2687
+ "grad_norm": 0.10170820355415344,
2688
+ "learning_rate": 1.0308219178082193e-05,
2689
+ "loss": 1.1881,
2690
+ "step": 383
2691
+ },
2692
+ {
2693
+ "epoch": 0.5610446534563054,
2694
+ "grad_norm": 0.11982686072587967,
2695
+ "learning_rate": 1.0273972602739728e-05,
2696
+ "loss": 1.1198,
2697
+ "step": 384
2698
+ },
2699
+ {
2700
+ "epoch": 0.5625057072413479,
2701
+ "grad_norm": 0.10333485156297684,
2702
+ "learning_rate": 1.0239726027397261e-05,
2703
+ "loss": 1.2105,
2704
+ "step": 385
2705
  }
2706
  ],
2707
  "logging_steps": 1,
 
2721
  "attributes": {}
2722
  }
2723
  },
2724
+ "total_flos": 4.340642918557778e+17,
2725
  "train_batch_size": 4,
2726
  "trial_name": null,
2727
  "trial_params": null