mgh6 commited on
Commit
d5e98b5
·
verified ·
1 Parent(s): 7ebdceb

Training in progress, step 100, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:47630d4c3006b50921d7e36c744d50be38e8879aea194e330ca9b15f1daaafd3
3
  size 136000488
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:192259f2c1b4d88d789b5c489ca9003298f60ecbe0b848a8d37411dcd693b958
3
  size 136000488
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:697d38dc27773b03d627eaea47fe209ad4c1e9a4073dc7b3d8e4eec46920a338
3
  size 268176506
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a386ca0bda826444b25bbba60d0adc8beec6ba09f5bcc57ab30665a76e50f853
3
  size 268176506
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d27635aa3bf8c9513bde838cce12dd861db1a571c559e6e3d3706dff45fc7a8d
3
  size 15006
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6bf3f3c24bb80a8453d4f69a54971bc84e22f4d6c02bfcca11e053397356566a
3
  size 15006
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c95b96648dba4a14572860ec64f84946a5310ebc9249f9d3352c1018256aeef8
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ea9b67c97ec0b0a1b79a6330badd5da865b550616c82ba334622fd4f95186829
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,132 +1,27 @@
1
  {
2
- "best_metric": 1.1703433990478516,
3
- "best_model_checkpoint": "mgh6/TCS_MLM_50/checkpoint-800",
4
- "epoch": 0.23222060957910015,
5
  "eval_steps": 100,
6
- "global_step": 800,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
  "epoch": 0.02902757619738752,
13
- "grad_norm": 0.5972162485122681,
14
- "learning_rate": 0.0009970972423802612,
15
- "loss": 3.1833,
16
  "step": 100
17
  },
18
  {
19
  "epoch": 0.02902757619738752,
20
- "eval_loss": 1.3059791326522827,
21
- "eval_runtime": 213.3119,
22
- "eval_samples_per_second": 213.298,
23
- "eval_steps_per_second": 3.333,
24
  "step": 100
25
- },
26
- {
27
- "epoch": 0.05805515239477504,
28
- "grad_norm": 0.615159809589386,
29
- "learning_rate": 0.0009941944847605226,
30
- "loss": 2.7965,
31
- "step": 200
32
- },
33
- {
34
- "epoch": 0.05805515239477504,
35
- "eval_loss": 1.2634466886520386,
36
- "eval_runtime": 213.6629,
37
- "eval_samples_per_second": 212.948,
38
- "eval_steps_per_second": 3.328,
39
- "step": 200
40
- },
41
- {
42
- "epoch": 0.08708272859216255,
43
- "grad_norm": 0.534396767616272,
44
- "learning_rate": 0.0009912917271407838,
45
- "loss": 2.6855,
46
- "step": 300
47
- },
48
- {
49
- "epoch": 0.08708272859216255,
50
- "eval_loss": 1.230128526687622,
51
- "eval_runtime": 213.6816,
52
- "eval_samples_per_second": 212.929,
53
- "eval_steps_per_second": 3.327,
54
- "step": 300
55
- },
56
- {
57
- "epoch": 0.11611030478955008,
58
- "grad_norm": 0.5327989459037781,
59
- "learning_rate": 0.000988388969521045,
60
- "loss": 2.618,
61
- "step": 400
62
- },
63
- {
64
- "epoch": 0.11611030478955008,
65
- "eval_loss": 1.216036319732666,
66
- "eval_runtime": 213.4593,
67
- "eval_samples_per_second": 213.151,
68
- "eval_steps_per_second": 3.331,
69
- "step": 400
70
- },
71
- {
72
- "epoch": 0.14513788098693758,
73
- "grad_norm": 0.5518757700920105,
74
- "learning_rate": 0.0009854862119013062,
75
- "loss": 2.5759,
76
- "step": 500
77
- },
78
- {
79
- "epoch": 0.14513788098693758,
80
- "eval_loss": 1.1977550983428955,
81
- "eval_runtime": 213.2072,
82
- "eval_samples_per_second": 213.403,
83
- "eval_steps_per_second": 3.335,
84
- "step": 500
85
- },
86
- {
87
- "epoch": 0.1741654571843251,
88
- "grad_norm": 0.5023015141487122,
89
- "learning_rate": 0.0009825834542815674,
90
- "loss": 2.526,
91
- "step": 600
92
- },
93
- {
94
- "epoch": 0.1741654571843251,
95
- "eval_loss": 1.1928865909576416,
96
- "eval_runtime": 213.533,
97
- "eval_samples_per_second": 213.077,
98
- "eval_steps_per_second": 3.33,
99
- "step": 600
100
- },
101
- {
102
- "epoch": 0.20319303338171263,
103
- "grad_norm": 0.5164414048194885,
104
- "learning_rate": 0.0009796806966618288,
105
- "loss": 2.5121,
106
- "step": 700
107
- },
108
- {
109
- "epoch": 0.20319303338171263,
110
- "eval_loss": 1.181251049041748,
111
- "eval_runtime": 213.4132,
112
- "eval_samples_per_second": 213.197,
113
- "eval_steps_per_second": 3.332,
114
- "step": 700
115
- },
116
- {
117
- "epoch": 0.23222060957910015,
118
- "grad_norm": 0.535000205039978,
119
- "learning_rate": 0.00097677793904209,
120
- "loss": 2.456,
121
- "step": 800
122
- },
123
- {
124
- "epoch": 0.23222060957910015,
125
- "eval_loss": 1.1703433990478516,
126
- "eval_runtime": 213.4281,
127
- "eval_samples_per_second": 213.182,
128
- "eval_steps_per_second": 3.331,
129
- "step": 800
130
  }
131
  ],
132
  "logging_steps": 100,
@@ -155,7 +50,7 @@
155
  "attributes": {}
156
  }
157
  },
158
- "total_flos": 8003100862316544.0,
159
  "train_batch_size": 64,
160
  "trial_name": null,
161
  "trial_params": null
 
1
  {
2
+ "best_metric": 1.2662084102630615,
3
+ "best_model_checkpoint": "mgh6/TCS_MLM_50/checkpoint-100",
4
+ "epoch": 0.02902757619738752,
5
  "eval_steps": 100,
6
+ "global_step": 100,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
  "epoch": 0.02902757619738752,
13
+ "grad_norm": 1.131932258605957,
14
+ "learning_rate": 9.970972423802612e-05,
15
+ "loss": 2.8244,
16
  "step": 100
17
  },
18
  {
19
  "epoch": 0.02902757619738752,
20
+ "eval_loss": 1.2662084102630615,
21
+ "eval_runtime": 213.5614,
22
+ "eval_samples_per_second": 213.049,
23
+ "eval_steps_per_second": 3.329,
24
  "step": 100
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
25
  }
26
  ],
27
  "logging_steps": 100,
 
50
  "attributes": {}
51
  }
52
  },
53
+ "total_flos": 1000387607789568.0,
54
  "train_batch_size": 64,
55
  "trial_name": null,
56
  "trial_params": null
last-checkpoint/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ee0ef74173b2c9a49dca197269030629fd4de03cd43419ed74c4742b9a5dba90
3
  size 5368
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:106cd64593a78067217b619a1bb4288f6aff3cb8411c9fafd726f7129f1b9be1
3
  size 5368