KevinKibe commited on
Commit
b8cf3a0
·
verified ·
1 Parent(s): 5a2a796

Training in progress, step 700, checkpoint

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:db740509f5a95eecba7872e2a1e17b037034a23a73f00bb1d1061af31b9b5e44
3
  size 2751040864
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:67fb82930389b24ba0208a7a08b33f2abbd2323860716962cf881cf5a16bc6ed
3
  size 2751040864
last-checkpoint/pytorch_model-00001-of-00002.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:398cfd12e51c24f5864a31c205e44db9a404ab7717a0075a6d153b914fcef1f0
3
  size 5000078781
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c84f8cc0dcd4bb5980ea5c4e462292f7fdcfa76bf833ecfba7b6e2fcdd96d3c0
3
  size 5000078781
last-checkpoint/pytorch_model-00002-of-00002.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1606b9f5785865abbc70c805039dc367e7a957d7c8b8bdc7b28a1ae2899b5d75
3
  size 482838574
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:13a55fd318dc6a5d1963163df52119c23c1dac0f939b79c076170d494afe47c8
3
  size 482838574
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ae687f4500411ca2e1ef17be88ce145b3ea9cb18005002027164c0c3ab6645d9
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ac1b10d6f7dd9cdf9b5b89778eacbed4b4a8f31412820b85b64a56b3a491d825
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:50f0e1d62e75d3dedce865615f4c070df947c2befbd956dcb7172fce5c716479
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a573157b5af49c7fd752beb625308fa4d3184af6323528856e81787deb4e252b
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "best_metric": 2.453127384185791,
3
- "best_model_checkpoint": "../KevinKibe/nllb-200-distilled-1.3B-finetuned-finetuned/checkpoint-400",
4
- "epoch": 400.0,
5
  "eval_steps": 100,
6
- "global_step": 400,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -75,6 +75,57 @@
75
  "eval_samples_per_second": 1.224,
76
  "eval_steps_per_second": 0.612,
77
  "step": 400
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
78
  }
79
  ],
80
  "logging_steps": 100,
@@ -82,7 +133,7 @@
82
  "num_input_tokens_seen": 0,
83
  "num_train_epochs": 1000,
84
  "save_steps": 100,
85
- "total_flos": 1000114932940800.0,
86
  "train_batch_size": 16,
87
  "trial_name": null,
88
  "trial_params": null
 
1
  {
2
+ "best_metric": 0.8545865416526794,
3
+ "best_model_checkpoint": "../KevinKibe/nllb-200-distilled-1.3B-finetuned-finetuned/checkpoint-700",
4
+ "epoch": 700.0,
5
  "eval_steps": 100,
6
+ "global_step": 700,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
75
  "eval_samples_per_second": 1.224,
76
  "eval_steps_per_second": 0.612,
77
  "step": 400
78
+ },
79
+ {
80
+ "epoch": 500.0,
81
+ "grad_norm": 11.111770629882812,
82
+ "learning_rate": 1.006e-05,
83
+ "loss": 1.4112,
84
+ "step": 500
85
+ },
86
+ {
87
+ "epoch": 500.0,
88
+ "eval_gen_len": 70.0,
89
+ "eval_loss": 1.6681925058364868,
90
+ "eval_rouge": 0.1899,
91
+ "eval_runtime": 6.0247,
92
+ "eval_samples_per_second": 0.332,
93
+ "eval_steps_per_second": 0.166,
94
+ "step": 500
95
+ },
96
+ {
97
+ "epoch": 600.0,
98
+ "grad_norm": 6.4994120597839355,
99
+ "learning_rate": 8.06e-06,
100
+ "loss": 0.6738,
101
+ "step": 600
102
+ },
103
+ {
104
+ "epoch": 600.0,
105
+ "eval_gen_len": 68.0,
106
+ "eval_loss": 1.1857625246047974,
107
+ "eval_rouge": 0.1907,
108
+ "eval_runtime": 6.0626,
109
+ "eval_samples_per_second": 0.33,
110
+ "eval_steps_per_second": 0.165,
111
+ "step": 600
112
+ },
113
+ {
114
+ "epoch": 700.0,
115
+ "grad_norm": 3.2670979499816895,
116
+ "learning_rate": 6.0600000000000004e-06,
117
+ "loss": 0.2921,
118
+ "step": 700
119
+ },
120
+ {
121
+ "epoch": 700.0,
122
+ "eval_gen_len": 28.0,
123
+ "eval_loss": 0.8545865416526794,
124
+ "eval_rouge": 0.2776,
125
+ "eval_runtime": 1.6454,
126
+ "eval_samples_per_second": 1.215,
127
+ "eval_steps_per_second": 0.608,
128
+ "step": 700
129
  }
130
  ],
131
  "logging_steps": 100,
 
133
  "num_input_tokens_seen": 0,
134
  "num_train_epochs": 1000,
135
  "save_steps": 100,
136
+ "total_flos": 1750201132646400.0,
137
  "train_batch_size": 16,
138
  "trial_name": null,
139
  "trial_params": null