ccore commited on
Commit
d0a9627
·
verified ·
1 Parent(s): 05d879a

Training in progress, epoch 1, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a07b7d4a9c1b86f87d485901d5e37ea7b953f9e365829f31dbaea704f076c7bd
3
  size 500979600
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c3fc925c5996c1a83a5ddb1a6c38c4d8439fac65dad95db78497618749ba9df6
3
  size 500979600
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:868dd0dcf987dbe3fc0f6e805629c90bab980bfee1dec1990f93a52eeb7c144f
3
  size 1002078330
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:04a5ddead50161901fc017bbc88ff23e8b30f8312bf923149cb57ab17f54567c
3
  size 1002078330
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:86a1ccd9f7066dca55443398dffdd4e76ff6f9b9071f99d8222f75921367ac98
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:243bed054e6ca95566a31f84929c545f9505688d6a662df2d1ba4f4954f9e518
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6602c18a1ebe894c1d51ce5c9cea3744db091c466423f123d4fa8b7754d9378a
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:57298e368684a7f74b8580ab500ef876b797e959380e8c09da0e2d665556ed4b
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,63 +1,34 @@
1
  {
2
- "best_metric": 0.9013135433197021,
3
- "best_model_checkpoint": "./opt_trained3/checkpoint-2720",
4
- "epoch": 1.99898924928788,
5
  "eval_steps": 500,
6
- "global_step": 2720,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
  "epoch": 0.3675457134981163,
13
- "grad_norm": 1.054343342781067,
14
  "learning_rate": 9.189154158168292e-05,
15
- "loss": 0.9192,
16
  "step": 500
17
  },
18
  {
19
  "epoch": 0.7350914269962326,
20
- "grad_norm": 1.1676549911499023,
21
  "learning_rate": 7.019605024359474e-05,
22
- "loss": 0.9147,
23
  "step": 1000
24
  },
25
  {
26
  "epoch": 1.0,
27
- "eval_loss": 0.9048793911933899,
28
- "eval_runtime": 768.112,
29
- "eval_samples_per_second": 50.377,
30
- "eval_steps_per_second": 12.595,
31
  "step": 1361
32
- },
33
- {
34
- "epoch": 1.1021777083524764,
35
- "grad_norm": 1.1806821823120117,
36
- "learning_rate": 4.195020556092935e-05,
37
- "loss": 0.9113,
38
- "step": 1500
39
- },
40
- {
41
- "epoch": 1.4697234218505928,
42
- "grad_norm": 1.2350996732711792,
43
- "learning_rate": 1.631521781767214e-05,
44
- "loss": 0.907,
45
- "step": 2000
46
- },
47
- {
48
- "epoch": 1.837269135348709,
49
- "grad_norm": 0.9862784147262573,
50
- "learning_rate": 1.6054963006338742e-06,
51
- "loss": 0.9047,
52
- "step": 2500
53
- },
54
- {
55
- "epoch": 1.99898924928788,
56
- "eval_loss": 0.9013135433197021,
57
- "eval_runtime": 765.3687,
58
- "eval_samples_per_second": 50.557,
59
- "eval_steps_per_second": 12.64,
60
- "step": 2720
61
  }
62
  ],
63
  "logging_steps": 500,
@@ -72,12 +43,12 @@
72
  "should_evaluate": false,
73
  "should_log": false,
74
  "should_save": true,
75
- "should_training_stop": true
76
  },
77
  "attributes": {}
78
  }
79
  },
80
- "total_flos": 1.37491227277056e+17,
81
  "train_batch_size": 32,
82
  "trial_name": null,
83
  "trial_params": null
 
1
  {
2
+ "best_metric": 0.9017300605773926,
3
+ "best_model_checkpoint": "./opt_trained3/checkpoint-1361",
4
+ "epoch": 1.0,
5
  "eval_steps": 500,
6
+ "global_step": 1361,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
  "epoch": 0.3675457134981163,
13
+ "grad_norm": 1.3510414361953735,
14
  "learning_rate": 9.189154158168292e-05,
15
+ "loss": 0.9069,
16
  "step": 500
17
  },
18
  {
19
  "epoch": 0.7350914269962326,
20
+ "grad_norm": 1.1864418983459473,
21
  "learning_rate": 7.019605024359474e-05,
22
+ "loss": 0.9081,
23
  "step": 1000
24
  },
25
  {
26
  "epoch": 1.0,
27
+ "eval_loss": 0.9017300605773926,
28
+ "eval_runtime": 764.7068,
29
+ "eval_samples_per_second": 50.601,
30
+ "eval_steps_per_second": 12.651,
31
  "step": 1361
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
32
  }
33
  ],
34
  "logging_steps": 500,
 
43
  "should_evaluate": false,
44
  "should_log": false,
45
  "should_save": true,
46
+ "should_training_stop": false
47
  },
48
  "attributes": {}
49
  }
50
  },
51
+ "total_flos": 6.8780176144128e+16,
52
  "train_batch_size": 32,
53
  "trial_name": null,
54
  "trial_params": null
last-checkpoint/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a452ae32a30f3ff55dc4f63985dae2c59ba5cf468734128941ae47abda2b6cbd
3
  size 5368
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f33fb5eb763ecb18f713e3ce572f1cd2a04862eab8b3ede5970af390529cb8ee
3
  size 5368