DuongTrongChi
commited on
Commit
•
9fcd483
1
Parent(s):
b5e47ca
Training in progress, step 385, checkpoint
Browse files
last-checkpoint/adapter_model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 100198584
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:636cede6b3e50ff1861e2dc1273f6134e7156308fc6d82d873c1502b6d314e41
|
3 |
size 100198584
|
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 50675604
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:dd537c19ebae2b08dbd0494d960639a61ecf609956482e67a546d6c169289698
|
3 |
size 50675604
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2475b14d80337b5e82132cfdcc36b578188577f583a180c04ac0c29d7bf259cc
|
3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 0.
|
5 |
"eval_steps": 500,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -2541,6 +2541,167 @@
|
|
2541 |
"learning_rate": 1.1027397260273974e-05,
|
2542 |
"loss": 1.1608,
|
2543 |
"step": 362
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2544 |
}
|
2545 |
],
|
2546 |
"logging_steps": 1,
|
@@ -2560,7 +2721,7 @@
|
|
2560 |
"attributes": {}
|
2561 |
}
|
2562 |
},
|
2563 |
-
"total_flos": 4.
|
2564 |
"train_batch_size": 4,
|
2565 |
"trial_name": null,
|
2566 |
"trial_params": null
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 0.5625057072413479,
|
5 |
"eval_steps": 500,
|
6 |
+
"global_step": 385,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
2541 |
"learning_rate": 1.1027397260273974e-05,
|
2542 |
"loss": 1.1608,
|
2543 |
"step": 362
|
2544 |
+
},
|
2545 |
+
{
|
2546 |
+
"epoch": 0.5303625239704136,
|
2547 |
+
"grad_norm": 0.1049429252743721,
|
2548 |
+
"learning_rate": 1.0993150684931509e-05,
|
2549 |
+
"loss": 1.2217,
|
2550 |
+
"step": 363
|
2551 |
+
},
|
2552 |
+
{
|
2553 |
+
"epoch": 0.5318235777554561,
|
2554 |
+
"grad_norm": 0.10289900004863739,
|
2555 |
+
"learning_rate": 1.0958904109589042e-05,
|
2556 |
+
"loss": 1.1854,
|
2557 |
+
"step": 364
|
2558 |
+
},
|
2559 |
+
{
|
2560 |
+
"epoch": 0.5332846315404985,
|
2561 |
+
"grad_norm": 0.105230912566185,
|
2562 |
+
"learning_rate": 1.0924657534246576e-05,
|
2563 |
+
"loss": 1.2872,
|
2564 |
+
"step": 365
|
2565 |
+
},
|
2566 |
+
{
|
2567 |
+
"epoch": 0.534745685325541,
|
2568 |
+
"grad_norm": 0.10501307249069214,
|
2569 |
+
"learning_rate": 1.089041095890411e-05,
|
2570 |
+
"loss": 1.2098,
|
2571 |
+
"step": 366
|
2572 |
+
},
|
2573 |
+
{
|
2574 |
+
"epoch": 0.5362067391105835,
|
2575 |
+
"grad_norm": 0.11315510421991348,
|
2576 |
+
"learning_rate": 1.0856164383561644e-05,
|
2577 |
+
"loss": 1.2473,
|
2578 |
+
"step": 367
|
2579 |
+
},
|
2580 |
+
{
|
2581 |
+
"epoch": 0.5376677928956259,
|
2582 |
+
"grad_norm": 0.10925040394067764,
|
2583 |
+
"learning_rate": 1.082191780821918e-05,
|
2584 |
+
"loss": 1.2147,
|
2585 |
+
"step": 368
|
2586 |
+
},
|
2587 |
+
{
|
2588 |
+
"epoch": 0.5391288466806684,
|
2589 |
+
"grad_norm": 0.11574160307645798,
|
2590 |
+
"learning_rate": 1.0787671232876714e-05,
|
2591 |
+
"loss": 1.1954,
|
2592 |
+
"step": 369
|
2593 |
+
},
|
2594 |
+
{
|
2595 |
+
"epoch": 0.5405899004657109,
|
2596 |
+
"grad_norm": 0.09681655466556549,
|
2597 |
+
"learning_rate": 1.0753424657534248e-05,
|
2598 |
+
"loss": 1.2026,
|
2599 |
+
"step": 370
|
2600 |
+
},
|
2601 |
+
{
|
2602 |
+
"epoch": 0.5420509542507533,
|
2603 |
+
"grad_norm": 0.10630439221858978,
|
2604 |
+
"learning_rate": 1.0719178082191782e-05,
|
2605 |
+
"loss": 1.1083,
|
2606 |
+
"step": 371
|
2607 |
+
},
|
2608 |
+
{
|
2609 |
+
"epoch": 0.5435120080357958,
|
2610 |
+
"grad_norm": 0.1086338609457016,
|
2611 |
+
"learning_rate": 1.0684931506849316e-05,
|
2612 |
+
"loss": 1.2337,
|
2613 |
+
"step": 372
|
2614 |
+
},
|
2615 |
+
{
|
2616 |
+
"epoch": 0.5449730618208383,
|
2617 |
+
"grad_norm": 0.10121461749076843,
|
2618 |
+
"learning_rate": 1.065068493150685e-05,
|
2619 |
+
"loss": 1.2983,
|
2620 |
+
"step": 373
|
2621 |
+
},
|
2622 |
+
{
|
2623 |
+
"epoch": 0.5464341156058807,
|
2624 |
+
"grad_norm": 0.10418357700109482,
|
2625 |
+
"learning_rate": 1.0616438356164384e-05,
|
2626 |
+
"loss": 1.1552,
|
2627 |
+
"step": 374
|
2628 |
+
},
|
2629 |
+
{
|
2630 |
+
"epoch": 0.5478951693909232,
|
2631 |
+
"grad_norm": 0.09971540421247482,
|
2632 |
+
"learning_rate": 1.0582191780821917e-05,
|
2633 |
+
"loss": 1.131,
|
2634 |
+
"step": 375
|
2635 |
+
},
|
2636 |
+
{
|
2637 |
+
"epoch": 0.5493562231759657,
|
2638 |
+
"grad_norm": 0.09615826606750488,
|
2639 |
+
"learning_rate": 1.0547945205479453e-05,
|
2640 |
+
"loss": 1.2724,
|
2641 |
+
"step": 376
|
2642 |
+
},
|
2643 |
+
{
|
2644 |
+
"epoch": 0.5508172769610081,
|
2645 |
+
"grad_norm": 0.11235067993402481,
|
2646 |
+
"learning_rate": 1.0513698630136988e-05,
|
2647 |
+
"loss": 1.2207,
|
2648 |
+
"step": 377
|
2649 |
+
},
|
2650 |
+
{
|
2651 |
+
"epoch": 0.5522783307460506,
|
2652 |
+
"grad_norm": 0.12269837409257889,
|
2653 |
+
"learning_rate": 1.0479452054794521e-05,
|
2654 |
+
"loss": 1.1767,
|
2655 |
+
"step": 378
|
2656 |
+
},
|
2657 |
+
{
|
2658 |
+
"epoch": 0.5537393845310931,
|
2659 |
+
"grad_norm": 0.11360511928796768,
|
2660 |
+
"learning_rate": 1.0445205479452056e-05,
|
2661 |
+
"loss": 1.1903,
|
2662 |
+
"step": 379
|
2663 |
+
},
|
2664 |
+
{
|
2665 |
+
"epoch": 0.5552004383161355,
|
2666 |
+
"grad_norm": 0.11293426156044006,
|
2667 |
+
"learning_rate": 1.0410958904109589e-05,
|
2668 |
+
"loss": 1.231,
|
2669 |
+
"step": 380
|
2670 |
+
},
|
2671 |
+
{
|
2672 |
+
"epoch": 0.556661492101178,
|
2673 |
+
"grad_norm": 0.10496404767036438,
|
2674 |
+
"learning_rate": 1.0376712328767124e-05,
|
2675 |
+
"loss": 1.2697,
|
2676 |
+
"step": 381
|
2677 |
+
},
|
2678 |
+
{
|
2679 |
+
"epoch": 0.5581225458862205,
|
2680 |
+
"grad_norm": 0.09859599173069,
|
2681 |
+
"learning_rate": 1.0342465753424657e-05,
|
2682 |
+
"loss": 1.3125,
|
2683 |
+
"step": 382
|
2684 |
+
},
|
2685 |
+
{
|
2686 |
+
"epoch": 0.5595835996712629,
|
2687 |
+
"grad_norm": 0.10170820355415344,
|
2688 |
+
"learning_rate": 1.0308219178082193e-05,
|
2689 |
+
"loss": 1.1881,
|
2690 |
+
"step": 383
|
2691 |
+
},
|
2692 |
+
{
|
2693 |
+
"epoch": 0.5610446534563054,
|
2694 |
+
"grad_norm": 0.11982686072587967,
|
2695 |
+
"learning_rate": 1.0273972602739728e-05,
|
2696 |
+
"loss": 1.1198,
|
2697 |
+
"step": 384
|
2698 |
+
},
|
2699 |
+
{
|
2700 |
+
"epoch": 0.5625057072413479,
|
2701 |
+
"grad_norm": 0.10333485156297684,
|
2702 |
+
"learning_rate": 1.0239726027397261e-05,
|
2703 |
+
"loss": 1.2105,
|
2704 |
+
"step": 385
|
2705 |
}
|
2706 |
],
|
2707 |
"logging_steps": 1,
|
|
|
2721 |
"attributes": {}
|
2722 |
}
|
2723 |
},
|
2724 |
+
"total_flos": 4.340642918557778e+17,
|
2725 |
"train_batch_size": 4,
|
2726 |
"trial_name": null,
|
2727 |
"trial_params": null
|