jssky commited on
Commit
6860940
·
verified ·
1 Parent(s): 2029b60

Training in progress, step 9, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4482d7256d18df484f99d9344cd6c73f2c6be890e580f3dacff2935ac4574db5
3
  size 161533192
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6d5c66b6ffda021237c9a3115ef48c563bdc76349c79fb2c7c5241e4c0491b2f
3
  size 161533192
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:397cbf3fd95b8441e13a80d3458d1d5118939022ab9c9f740a39f40dcfc6302f
3
  size 82460660
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5109a92eb438ee2a74a28631740ab42f422667d66326971776fad9bb5fd6c70c
3
  size 82460660
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:66d7bd5bd26fb498fac45d7a49833d11a7c8053ebb24166db6968f1081bb4cc7
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0524491b2cbb00c2c8bf92935b27c0c8415926c182a2ae29784bff8683c2ca22
3
  size 15984
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2f665697f616128f42731916c7044720dea1d302d4144b429951163834ef9e5d
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:01fcfc9c89ee2c5890f040dad1ce0c6fe540609b6cee7e0d21d7f980a79b13cb
3
  size 15984
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:984ed304ee2572c476b1d7a45e6238796d5af9a4e90fc90ca75172730556a741
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:38d4ba61cffcb299e48489e74353d2e61936c89e4c8eac1c79c7544323e29350
3
  size 15984
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:21af059a95787b7103d10be3dd2a11ad053234532829088ec510f2255efcaac2
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c32abb5bec1e6ac1fe8165766ea3654be86d90351042bf3409dd4368f62c2c4e
3
  size 15984
last-checkpoint/rng_state_4.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0d9244d7c44bd09796fa40aa056d4d8f84a0e62a0760ef9fb058e3b6f83f559b
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4dfa554528bc82d959dabfbd96640f6e380a6603f98f4e438b3be297113362bc
3
  size 15984
last-checkpoint/rng_state_5.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a5e3d03497958e23e4f9a788b468b828dda534d6fb5b783596c62c2fcee0cda2
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a8b9f9c45856e3431088b9f1af4ddf561dc01f39dd2787aea79ed362bb2ba073
3
  size 15984
last-checkpoint/rng_state_6.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:377df699a816e08c783752ebd9f711a4239e23640fb988b4f539150862736011
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ea4ad072a9dd5444ff20683781218febde95b0b2273aea074f8d461d2d22f2cc
3
  size 15984
last-checkpoint/rng_state_7.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:56e479b5fc35fe4d057156da47d86e23a8eec615fb7c45d4d529249205f42d59
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2dc8abdb4abb1ced175224941679bc40048e3666374c008c2d593b10a0268603
3
  size 15984
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:917587e2c0a52fd7c3d6680e02e388879c84c249a215f319208d6a9951b36ba9
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8bdda546a1390be3b85a90dd4ef31050dfb7b691765ffc12ed691b6786ed6e3d
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.36923076923076925,
5
  "eval_steps": 3,
6
- "global_step": 6,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -73,6 +73,35 @@
73
  "eval_samples_per_second": 21.423,
74
  "eval_steps_per_second": 3.174,
75
  "step": 6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
76
  }
77
  ],
78
  "logging_steps": 1,
@@ -92,7 +121,7 @@
92
  "attributes": {}
93
  }
94
  },
95
- "total_flos": 3.3553856474382336e+16,
96
  "train_batch_size": 1,
97
  "trial_name": null,
98
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.5538461538461539,
5
  "eval_steps": 3,
6
+ "global_step": 9,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
73
  "eval_samples_per_second": 21.423,
74
  "eval_steps_per_second": 3.174,
75
  "step": 6
76
+ },
77
+ {
78
+ "epoch": 0.4307692307692308,
79
+ "grad_norm": 2.464343547821045,
80
+ "learning_rate": 0.0001,
81
+ "loss": 4.1546,
82
+ "step": 7
83
+ },
84
+ {
85
+ "epoch": 0.49230769230769234,
86
+ "grad_norm": 2.381273031234741,
87
+ "learning_rate": 0.00012,
88
+ "loss": 3.6829,
89
+ "step": 8
90
+ },
91
+ {
92
+ "epoch": 0.5538461538461539,
93
+ "grad_norm": 3.1305465698242188,
94
+ "learning_rate": 0.00014,
95
+ "loss": 3.8566,
96
+ "step": 9
97
+ },
98
+ {
99
+ "epoch": 0.5538461538461539,
100
+ "eval_loss": 3.2923731803894043,
101
+ "eval_runtime": 1.2727,
102
+ "eval_samples_per_second": 21.214,
103
+ "eval_steps_per_second": 3.143,
104
+ "step": 9
105
  }
106
  ],
107
  "logging_steps": 1,
 
121
  "attributes": {}
122
  }
123
  },
124
+ "total_flos": 5.03307847115735e+16,
125
  "train_batch_size": 1,
126
  "trial_name": null,
127
  "trial_params": null