Joemgu commited on
Commit
44f57ed
·
1 Parent(s): ba6a75f

Training in progress, step 3000

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:25ee98c0f7b49949d81a28c9754bef9cbad66a8aef1c17fd8167889685e9b857
3
  size 4736616809
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f6de8b06ed44d4963b25bb376812b173405edc863a23e8867e0f736341d74a8d
3
  size 4736616809
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9aee612a0a1c2847da99ac4385fa982ffe96a11a86e9c19b170bc87c12c873b1
3
  size 2368281769
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:02c85713dad1dcfeb5c32c03d2b6f448d20d550a87402f9d21be276100bce607
3
  size 2368281769
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7860309a9f924235d948b2eb29b2575e789c9092d1500261ccc3edc2aa2e038b
3
  size 14575
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0a2eb101bb6b101af129672a2bea24581932e36cd77fc392a7cda646daab49ff
3
  size 14575
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b96aa558fbc8a00885c5ed2fa77b045521eaafa17be7361a44ccd54fb7c1f7aa
3
  size 627
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cabb275feefad232b9c910702270f8d2a4ae8df759c0f8aeaf1aba436940e944
3
  size 627
last-checkpoint/trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "best_metric": 2.074338674545288,
3
- "best_model_checkpoint": "output/checkpoint-2500",
4
- "epoch": 0.4166666666666667,
5
- "global_step": 2500,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -15071,11 +15071,3024 @@
15071
  "eval_samples_per_second": 1.188,
15072
  "eval_steps_per_second": 0.166,
15073
  "step": 2500
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
15074
  }
15075
  ],
15076
  "max_steps": 6000,
15077
  "num_train_epochs": 9223372036854775807,
15078
- "total_flos": 2.37606578201958e+18,
15079
  "trial_name": null,
15080
  "trial_params": null
15081
  }
 
1
  {
2
+ "best_metric": 2.0400795936584473,
3
+ "best_model_checkpoint": "output/checkpoint-3000",
4
+ "epoch": 0.5,
5
+ "global_step": 3000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
15071
  "eval_samples_per_second": 1.188,
15072
  "eval_steps_per_second": 0.166,
15073
  "step": 2500
15074
+ },
15075
+ {
15076
+ "epoch": 0.42,
15077
+ "learning_rate": 0.00047444067796610173,
15078
+ "loss": 2.207,
15079
+ "step": 2501
15080
+ },
15081
+ {
15082
+ "epoch": 0.42,
15083
+ "learning_rate": 0.0004743050847457627,
15084
+ "loss": 2.1369,
15085
+ "step": 2502
15086
+ },
15087
+ {
15088
+ "epoch": 0.42,
15089
+ "learning_rate": 0.0004741694915254238,
15090
+ "loss": 2.1234,
15091
+ "step": 2503
15092
+ },
15093
+ {
15094
+ "epoch": 0.42,
15095
+ "learning_rate": 0.00047403389830508473,
15096
+ "loss": 2.1835,
15097
+ "step": 2504
15098
+ },
15099
+ {
15100
+ "epoch": 0.42,
15101
+ "learning_rate": 0.0004738983050847458,
15102
+ "loss": 2.1962,
15103
+ "step": 2505
15104
+ },
15105
+ {
15106
+ "epoch": 0.42,
15107
+ "learning_rate": 0.0004737627118644068,
15108
+ "loss": 2.1814,
15109
+ "step": 2506
15110
+ },
15111
+ {
15112
+ "epoch": 0.42,
15113
+ "learning_rate": 0.00047362711864406783,
15114
+ "loss": 2.1553,
15115
+ "step": 2507
15116
+ },
15117
+ {
15118
+ "epoch": 0.42,
15119
+ "learning_rate": 0.0004734915254237288,
15120
+ "loss": 2.1694,
15121
+ "step": 2508
15122
+ },
15123
+ {
15124
+ "epoch": 0.42,
15125
+ "learning_rate": 0.0004733559322033899,
15126
+ "loss": 2.2564,
15127
+ "step": 2509
15128
+ },
15129
+ {
15130
+ "epoch": 0.42,
15131
+ "learning_rate": 0.0004732203389830509,
15132
+ "loss": 2.1611,
15133
+ "step": 2510
15134
+ },
15135
+ {
15136
+ "epoch": 0.42,
15137
+ "learning_rate": 0.00047308474576271186,
15138
+ "loss": 2.3156,
15139
+ "step": 2511
15140
+ },
15141
+ {
15142
+ "epoch": 0.42,
15143
+ "learning_rate": 0.0004729491525423729,
15144
+ "loss": 2.1023,
15145
+ "step": 2512
15146
+ },
15147
+ {
15148
+ "epoch": 0.42,
15149
+ "learning_rate": 0.0004728135593220339,
15150
+ "loss": 2.2861,
15151
+ "step": 2513
15152
+ },
15153
+ {
15154
+ "epoch": 0.42,
15155
+ "learning_rate": 0.00047267796610169497,
15156
+ "loss": 2.1733,
15157
+ "step": 2514
15158
+ },
15159
+ {
15160
+ "epoch": 0.42,
15161
+ "learning_rate": 0.0004725423728813559,
15162
+ "loss": 2.1,
15163
+ "step": 2515
15164
+ },
15165
+ {
15166
+ "epoch": 0.42,
15167
+ "learning_rate": 0.000472406779661017,
15168
+ "loss": 2.1805,
15169
+ "step": 2516
15170
+ },
15171
+ {
15172
+ "epoch": 0.42,
15173
+ "learning_rate": 0.00047227118644067796,
15174
+ "loss": 2.1485,
15175
+ "step": 2517
15176
+ },
15177
+ {
15178
+ "epoch": 0.42,
15179
+ "learning_rate": 0.000472135593220339,
15180
+ "loss": 2.2094,
15181
+ "step": 2518
15182
+ },
15183
+ {
15184
+ "epoch": 0.42,
15185
+ "learning_rate": 0.000472,
15186
+ "loss": 2.1898,
15187
+ "step": 2519
15188
+ },
15189
+ {
15190
+ "epoch": 0.42,
15191
+ "learning_rate": 0.00047186440677966107,
15192
+ "loss": 2.2167,
15193
+ "step": 2520
15194
+ },
15195
+ {
15196
+ "epoch": 0.42,
15197
+ "learning_rate": 0.00047172881355932205,
15198
+ "loss": 2.1117,
15199
+ "step": 2521
15200
+ },
15201
+ {
15202
+ "epoch": 0.42,
15203
+ "learning_rate": 0.0004715932203389831,
15204
+ "loss": 2.1604,
15205
+ "step": 2522
15206
+ },
15207
+ {
15208
+ "epoch": 0.42,
15209
+ "learning_rate": 0.00047145762711864407,
15210
+ "loss": 2.1861,
15211
+ "step": 2523
15212
+ },
15213
+ {
15214
+ "epoch": 0.42,
15215
+ "learning_rate": 0.00047132203389830515,
15216
+ "loss": 2.1883,
15217
+ "step": 2524
15218
+ },
15219
+ {
15220
+ "epoch": 0.42,
15221
+ "learning_rate": 0.00047118644067796613,
15222
+ "loss": 2.1772,
15223
+ "step": 2525
15224
+ },
15225
+ {
15226
+ "epoch": 0.42,
15227
+ "learning_rate": 0.00047105084745762717,
15228
+ "loss": 2.1937,
15229
+ "step": 2526
15230
+ },
15231
+ {
15232
+ "epoch": 0.42,
15233
+ "learning_rate": 0.00047091525423728815,
15234
+ "loss": 2.2126,
15235
+ "step": 2527
15236
+ },
15237
+ {
15238
+ "epoch": 0.42,
15239
+ "learning_rate": 0.00047077966101694924,
15240
+ "loss": 2.2554,
15241
+ "step": 2528
15242
+ },
15243
+ {
15244
+ "epoch": 0.42,
15245
+ "learning_rate": 0.00047064406779661017,
15246
+ "loss": 2.1951,
15247
+ "step": 2529
15248
+ },
15249
+ {
15250
+ "epoch": 0.42,
15251
+ "learning_rate": 0.00047050847457627125,
15252
+ "loss": 2.1638,
15253
+ "step": 2530
15254
+ },
15255
+ {
15256
+ "epoch": 0.42,
15257
+ "learning_rate": 0.00047037288135593224,
15258
+ "loss": 2.1608,
15259
+ "step": 2531
15260
+ },
15261
+ {
15262
+ "epoch": 0.42,
15263
+ "learning_rate": 0.0004702372881355932,
15264
+ "loss": 2.092,
15265
+ "step": 2532
15266
+ },
15267
+ {
15268
+ "epoch": 0.42,
15269
+ "learning_rate": 0.00047010169491525425,
15270
+ "loss": 2.1712,
15271
+ "step": 2533
15272
+ },
15273
+ {
15274
+ "epoch": 0.42,
15275
+ "learning_rate": 0.00046996610169491523,
15276
+ "loss": 2.2195,
15277
+ "step": 2534
15278
+ },
15279
+ {
15280
+ "epoch": 0.42,
15281
+ "learning_rate": 0.0004698305084745763,
15282
+ "loss": 2.1822,
15283
+ "step": 2535
15284
+ },
15285
+ {
15286
+ "epoch": 0.42,
15287
+ "learning_rate": 0.0004696949152542373,
15288
+ "loss": 2.1229,
15289
+ "step": 2536
15290
+ },
15291
+ {
15292
+ "epoch": 0.42,
15293
+ "learning_rate": 0.00046955932203389834,
15294
+ "loss": 2.191,
15295
+ "step": 2537
15296
+ },
15297
+ {
15298
+ "epoch": 0.42,
15299
+ "learning_rate": 0.0004694237288135593,
15300
+ "loss": 2.196,
15301
+ "step": 2538
15302
+ },
15303
+ {
15304
+ "epoch": 0.42,
15305
+ "learning_rate": 0.0004692881355932204,
15306
+ "loss": 2.2301,
15307
+ "step": 2539
15308
+ },
15309
+ {
15310
+ "epoch": 0.42,
15311
+ "learning_rate": 0.00046915254237288133,
15312
+ "loss": 2.1972,
15313
+ "step": 2540
15314
+ },
15315
+ {
15316
+ "epoch": 0.42,
15317
+ "learning_rate": 0.0004690169491525424,
15318
+ "loss": 2.2244,
15319
+ "step": 2541
15320
+ },
15321
+ {
15322
+ "epoch": 0.42,
15323
+ "learning_rate": 0.0004688813559322034,
15324
+ "loss": 2.1232,
15325
+ "step": 2542
15326
+ },
15327
+ {
15328
+ "epoch": 0.42,
15329
+ "learning_rate": 0.00046874576271186444,
15330
+ "loss": 2.2249,
15331
+ "step": 2543
15332
+ },
15333
+ {
15334
+ "epoch": 0.42,
15335
+ "learning_rate": 0.0004686101694915254,
15336
+ "loss": 2.1743,
15337
+ "step": 2544
15338
+ },
15339
+ {
15340
+ "epoch": 0.42,
15341
+ "learning_rate": 0.0004684745762711865,
15342
+ "loss": 2.186,
15343
+ "step": 2545
15344
+ },
15345
+ {
15346
+ "epoch": 0.42,
15347
+ "learning_rate": 0.0004683389830508475,
15348
+ "loss": 2.1699,
15349
+ "step": 2546
15350
+ },
15351
+ {
15352
+ "epoch": 0.42,
15353
+ "learning_rate": 0.0004682033898305085,
15354
+ "loss": 2.188,
15355
+ "step": 2547
15356
+ },
15357
+ {
15358
+ "epoch": 0.42,
15359
+ "learning_rate": 0.0004680677966101695,
15360
+ "loss": 2.1757,
15361
+ "step": 2548
15362
+ },
15363
+ {
15364
+ "epoch": 0.42,
15365
+ "learning_rate": 0.0004679322033898306,
15366
+ "loss": 2.1531,
15367
+ "step": 2549
15368
+ },
15369
+ {
15370
+ "epoch": 0.42,
15371
+ "learning_rate": 0.00046779661016949157,
15372
+ "loss": 2.2214,
15373
+ "step": 2550
15374
+ },
15375
+ {
15376
+ "epoch": 0.43,
15377
+ "learning_rate": 0.0004676610169491526,
15378
+ "loss": 2.1826,
15379
+ "step": 2551
15380
+ },
15381
+ {
15382
+ "epoch": 0.43,
15383
+ "learning_rate": 0.0004675254237288136,
15384
+ "loss": 2.2332,
15385
+ "step": 2552
15386
+ },
15387
+ {
15388
+ "epoch": 0.43,
15389
+ "learning_rate": 0.00046738983050847457,
15390
+ "loss": 2.1946,
15391
+ "step": 2553
15392
+ },
15393
+ {
15394
+ "epoch": 0.43,
15395
+ "learning_rate": 0.0004672542372881356,
15396
+ "loss": 2.1449,
15397
+ "step": 2554
15398
+ },
15399
+ {
15400
+ "epoch": 0.43,
15401
+ "learning_rate": 0.0004671186440677966,
15402
+ "loss": 2.1659,
15403
+ "step": 2555
15404
+ },
15405
+ {
15406
+ "epoch": 0.43,
15407
+ "learning_rate": 0.00046698305084745767,
15408
+ "loss": 2.2544,
15409
+ "step": 2556
15410
+ },
15411
+ {
15412
+ "epoch": 0.43,
15413
+ "learning_rate": 0.00046684745762711865,
15414
+ "loss": 2.1633,
15415
+ "step": 2557
15416
+ },
15417
+ {
15418
+ "epoch": 0.43,
15419
+ "learning_rate": 0.0004667118644067797,
15420
+ "loss": 2.169,
15421
+ "step": 2558
15422
+ },
15423
+ {
15424
+ "epoch": 0.43,
15425
+ "learning_rate": 0.00046657627118644067,
15426
+ "loss": 2.2566,
15427
+ "step": 2559
15428
+ },
15429
+ {
15430
+ "epoch": 0.43,
15431
+ "learning_rate": 0.00046644067796610176,
15432
+ "loss": 2.1982,
15433
+ "step": 2560
15434
+ },
15435
+ {
15436
+ "epoch": 0.43,
15437
+ "learning_rate": 0.00046630508474576274,
15438
+ "loss": 2.1657,
15439
+ "step": 2561
15440
+ },
15441
+ {
15442
+ "epoch": 0.43,
15443
+ "learning_rate": 0.00046616949152542377,
15444
+ "loss": 2.2471,
15445
+ "step": 2562
15446
+ },
15447
+ {
15448
+ "epoch": 0.43,
15449
+ "learning_rate": 0.00046603389830508475,
15450
+ "loss": 2.207,
15451
+ "step": 2563
15452
+ },
15453
+ {
15454
+ "epoch": 0.43,
15455
+ "learning_rate": 0.0004658983050847458,
15456
+ "loss": 2.1663,
15457
+ "step": 2564
15458
+ },
15459
+ {
15460
+ "epoch": 0.43,
15461
+ "learning_rate": 0.00046576271186440677,
15462
+ "loss": 2.087,
15463
+ "step": 2565
15464
+ },
15465
+ {
15466
+ "epoch": 0.43,
15467
+ "learning_rate": 0.00046562711864406786,
15468
+ "loss": 2.2147,
15469
+ "step": 2566
15470
+ },
15471
+ {
15472
+ "epoch": 0.43,
15473
+ "learning_rate": 0.00046549152542372884,
15474
+ "loss": 2.22,
15475
+ "step": 2567
15476
+ },
15477
+ {
15478
+ "epoch": 0.43,
15479
+ "learning_rate": 0.00046535593220338987,
15480
+ "loss": 2.2194,
15481
+ "step": 2568
15482
+ },
15483
+ {
15484
+ "epoch": 0.43,
15485
+ "learning_rate": 0.00046522033898305085,
15486
+ "loss": 2.1823,
15487
+ "step": 2569
15488
+ },
15489
+ {
15490
+ "epoch": 0.43,
15491
+ "learning_rate": 0.00046508474576271194,
15492
+ "loss": 2.1742,
15493
+ "step": 2570
15494
+ },
15495
+ {
15496
+ "epoch": 0.43,
15497
+ "learning_rate": 0.0004649491525423729,
15498
+ "loss": 2.2006,
15499
+ "step": 2571
15500
+ },
15501
+ {
15502
+ "epoch": 0.43,
15503
+ "learning_rate": 0.00046481355932203396,
15504
+ "loss": 2.2428,
15505
+ "step": 2572
15506
+ },
15507
+ {
15508
+ "epoch": 0.43,
15509
+ "learning_rate": 0.00046467796610169494,
15510
+ "loss": 2.1535,
15511
+ "step": 2573
15512
+ },
15513
+ {
15514
+ "epoch": 0.43,
15515
+ "learning_rate": 0.0004645423728813559,
15516
+ "loss": 2.1628,
15517
+ "step": 2574
15518
+ },
15519
+ {
15520
+ "epoch": 0.43,
15521
+ "learning_rate": 0.00046440677966101695,
15522
+ "loss": 2.1787,
15523
+ "step": 2575
15524
+ },
15525
+ {
15526
+ "epoch": 0.43,
15527
+ "learning_rate": 0.00046427118644067793,
15528
+ "loss": 2.1241,
15529
+ "step": 2576
15530
+ },
15531
+ {
15532
+ "epoch": 0.43,
15533
+ "learning_rate": 0.000464135593220339,
15534
+ "loss": 2.2536,
15535
+ "step": 2577
15536
+ },
15537
+ {
15538
+ "epoch": 0.43,
15539
+ "learning_rate": 0.000464,
15540
+ "loss": 2.0936,
15541
+ "step": 2578
15542
+ },
15543
+ {
15544
+ "epoch": 0.43,
15545
+ "learning_rate": 0.00046386440677966104,
15546
+ "loss": 2.2309,
15547
+ "step": 2579
15548
+ },
15549
+ {
15550
+ "epoch": 0.43,
15551
+ "learning_rate": 0.000463728813559322,
15552
+ "loss": 2.1871,
15553
+ "step": 2580
15554
+ },
15555
+ {
15556
+ "epoch": 0.43,
15557
+ "learning_rate": 0.0004635932203389831,
15558
+ "loss": 2.2268,
15559
+ "step": 2581
15560
+ },
15561
+ {
15562
+ "epoch": 0.43,
15563
+ "learning_rate": 0.0004634576271186441,
15564
+ "loss": 2.1735,
15565
+ "step": 2582
15566
+ },
15567
+ {
15568
+ "epoch": 0.43,
15569
+ "learning_rate": 0.0004633220338983051,
15570
+ "loss": 2.1171,
15571
+ "step": 2583
15572
+ },
15573
+ {
15574
+ "epoch": 0.43,
15575
+ "learning_rate": 0.0004631864406779661,
15576
+ "loss": 2.2784,
15577
+ "step": 2584
15578
+ },
15579
+ {
15580
+ "epoch": 0.43,
15581
+ "learning_rate": 0.0004630508474576272,
15582
+ "loss": 2.212,
15583
+ "step": 2585
15584
+ },
15585
+ {
15586
+ "epoch": 0.43,
15587
+ "learning_rate": 0.0004629152542372881,
15588
+ "loss": 2.3069,
15589
+ "step": 2586
15590
+ },
15591
+ {
15592
+ "epoch": 0.43,
15593
+ "learning_rate": 0.0004627796610169492,
15594
+ "loss": 2.2547,
15595
+ "step": 2587
15596
+ },
15597
+ {
15598
+ "epoch": 0.43,
15599
+ "learning_rate": 0.0004626440677966102,
15600
+ "loss": 2.1827,
15601
+ "step": 2588
15602
+ },
15603
+ {
15604
+ "epoch": 0.43,
15605
+ "learning_rate": 0.0004625084745762712,
15606
+ "loss": 2.1967,
15607
+ "step": 2589
15608
+ },
15609
+ {
15610
+ "epoch": 0.43,
15611
+ "learning_rate": 0.0004623728813559322,
15612
+ "loss": 2.1982,
15613
+ "step": 2590
15614
+ },
15615
+ {
15616
+ "epoch": 0.43,
15617
+ "learning_rate": 0.0004622372881355933,
15618
+ "loss": 2.2301,
15619
+ "step": 2591
15620
+ },
15621
+ {
15622
+ "epoch": 0.43,
15623
+ "learning_rate": 0.00046210169491525427,
15624
+ "loss": 2.1121,
15625
+ "step": 2592
15626
+ },
15627
+ {
15628
+ "epoch": 0.43,
15629
+ "learning_rate": 0.00046196610169491525,
15630
+ "loss": 2.1594,
15631
+ "step": 2593
15632
+ },
15633
+ {
15634
+ "epoch": 0.43,
15635
+ "learning_rate": 0.0004618305084745763,
15636
+ "loss": 2.2726,
15637
+ "step": 2594
15638
+ },
15639
+ {
15640
+ "epoch": 0.43,
15641
+ "learning_rate": 0.00046169491525423727,
15642
+ "loss": 2.1845,
15643
+ "step": 2595
15644
+ },
15645
+ {
15646
+ "epoch": 0.43,
15647
+ "learning_rate": 0.00046155932203389836,
15648
+ "loss": 2.131,
15649
+ "step": 2596
15650
+ },
15651
+ {
15652
+ "epoch": 0.43,
15653
+ "learning_rate": 0.0004614237288135593,
15654
+ "loss": 2.1921,
15655
+ "step": 2597
15656
+ },
15657
+ {
15658
+ "epoch": 0.43,
15659
+ "learning_rate": 0.0004612881355932204,
15660
+ "loss": 2.1942,
15661
+ "step": 2598
15662
+ },
15663
+ {
15664
+ "epoch": 0.43,
15665
+ "learning_rate": 0.00046115254237288135,
15666
+ "loss": 2.2077,
15667
+ "step": 2599
15668
+ },
15669
+ {
15670
+ "epoch": 0.43,
15671
+ "learning_rate": 0.0004610169491525424,
15672
+ "loss": 2.237,
15673
+ "step": 2600
15674
+ },
15675
+ {
15676
+ "epoch": 0.43,
15677
+ "learning_rate": 0.00046088135593220337,
15678
+ "loss": 2.1849,
15679
+ "step": 2601
15680
+ },
15681
+ {
15682
+ "epoch": 0.43,
15683
+ "learning_rate": 0.00046074576271186446,
15684
+ "loss": 2.2331,
15685
+ "step": 2602
15686
+ },
15687
+ {
15688
+ "epoch": 0.43,
15689
+ "learning_rate": 0.00046061016949152544,
15690
+ "loss": 2.1861,
15691
+ "step": 2603
15692
+ },
15693
+ {
15694
+ "epoch": 0.43,
15695
+ "learning_rate": 0.0004604745762711865,
15696
+ "loss": 2.1723,
15697
+ "step": 2604
15698
+ },
15699
+ {
15700
+ "epoch": 0.43,
15701
+ "learning_rate": 0.00046033898305084745,
15702
+ "loss": 2.2068,
15703
+ "step": 2605
15704
+ },
15705
+ {
15706
+ "epoch": 0.43,
15707
+ "learning_rate": 0.00046020338983050854,
15708
+ "loss": 2.1222,
15709
+ "step": 2606
15710
+ },
15711
+ {
15712
+ "epoch": 0.43,
15713
+ "learning_rate": 0.0004600677966101695,
15714
+ "loss": 2.1435,
15715
+ "step": 2607
15716
+ },
15717
+ {
15718
+ "epoch": 0.43,
15719
+ "learning_rate": 0.00045993220338983056,
15720
+ "loss": 2.2329,
15721
+ "step": 2608
15722
+ },
15723
+ {
15724
+ "epoch": 0.43,
15725
+ "learning_rate": 0.00045979661016949154,
15726
+ "loss": 2.1265,
15727
+ "step": 2609
15728
+ },
15729
+ {
15730
+ "epoch": 0.43,
15731
+ "learning_rate": 0.0004596610169491526,
15732
+ "loss": 2.2636,
15733
+ "step": 2610
15734
+ },
15735
+ {
15736
+ "epoch": 0.44,
15737
+ "learning_rate": 0.00045952542372881355,
15738
+ "loss": 2.2236,
15739
+ "step": 2611
15740
+ },
15741
+ {
15742
+ "epoch": 0.44,
15743
+ "learning_rate": 0.00045938983050847464,
15744
+ "loss": 2.2194,
15745
+ "step": 2612
15746
+ },
15747
+ {
15748
+ "epoch": 0.44,
15749
+ "learning_rate": 0.0004592542372881356,
15750
+ "loss": 2.1611,
15751
+ "step": 2613
15752
+ },
15753
+ {
15754
+ "epoch": 0.44,
15755
+ "learning_rate": 0.0004591186440677966,
15756
+ "loss": 2.1603,
15757
+ "step": 2614
15758
+ },
15759
+ {
15760
+ "epoch": 0.44,
15761
+ "learning_rate": 0.00045898305084745764,
15762
+ "loss": 2.1169,
15763
+ "step": 2615
15764
+ },
15765
+ {
15766
+ "epoch": 0.44,
15767
+ "learning_rate": 0.0004588474576271186,
15768
+ "loss": 2.2098,
15769
+ "step": 2616
15770
+ },
15771
+ {
15772
+ "epoch": 0.44,
15773
+ "learning_rate": 0.0004587118644067797,
15774
+ "loss": 2.237,
15775
+ "step": 2617
15776
+ },
15777
+ {
15778
+ "epoch": 0.44,
15779
+ "learning_rate": 0.0004585762711864407,
15780
+ "loss": 2.1146,
15781
+ "step": 2618
15782
+ },
15783
+ {
15784
+ "epoch": 0.44,
15785
+ "learning_rate": 0.0004584406779661017,
15786
+ "loss": 2.2139,
15787
+ "step": 2619
15788
+ },
15789
+ {
15790
+ "epoch": 0.44,
15791
+ "learning_rate": 0.0004583050847457627,
15792
+ "loss": 2.1968,
15793
+ "step": 2620
15794
+ },
15795
+ {
15796
+ "epoch": 0.44,
15797
+ "learning_rate": 0.00045816949152542374,
15798
+ "loss": 2.1989,
15799
+ "step": 2621
15800
+ },
15801
+ {
15802
+ "epoch": 0.44,
15803
+ "learning_rate": 0.0004580338983050847,
15804
+ "loss": 2.2338,
15805
+ "step": 2622
15806
+ },
15807
+ {
15808
+ "epoch": 0.44,
15809
+ "learning_rate": 0.0004578983050847458,
15810
+ "loss": 2.2344,
15811
+ "step": 2623
15812
+ },
15813
+ {
15814
+ "epoch": 0.44,
15815
+ "learning_rate": 0.0004577627118644068,
15816
+ "loss": 2.1605,
15817
+ "step": 2624
15818
+ },
15819
+ {
15820
+ "epoch": 0.44,
15821
+ "learning_rate": 0.0004576271186440678,
15822
+ "loss": 2.1599,
15823
+ "step": 2625
15824
+ },
15825
+ {
15826
+ "epoch": 0.44,
15827
+ "learning_rate": 0.0004574915254237288,
15828
+ "loss": 2.2193,
15829
+ "step": 2626
15830
+ },
15831
+ {
15832
+ "epoch": 0.44,
15833
+ "learning_rate": 0.0004573559322033899,
15834
+ "loss": 2.199,
15835
+ "step": 2627
15836
+ },
15837
+ {
15838
+ "epoch": 0.44,
15839
+ "learning_rate": 0.0004572203389830509,
15840
+ "loss": 2.1502,
15841
+ "step": 2628
15842
+ },
15843
+ {
15844
+ "epoch": 0.44,
15845
+ "learning_rate": 0.0004570847457627119,
15846
+ "loss": 2.2478,
15847
+ "step": 2629
15848
+ },
15849
+ {
15850
+ "epoch": 0.44,
15851
+ "learning_rate": 0.0004569491525423729,
15852
+ "loss": 2.1512,
15853
+ "step": 2630
15854
+ },
15855
+ {
15856
+ "epoch": 0.44,
15857
+ "learning_rate": 0.000456813559322034,
15858
+ "loss": 2.2605,
15859
+ "step": 2631
15860
+ },
15861
+ {
15862
+ "epoch": 0.44,
15863
+ "learning_rate": 0.0004566779661016949,
15864
+ "loss": 2.258,
15865
+ "step": 2632
15866
+ },
15867
+ {
15868
+ "epoch": 0.44,
15869
+ "learning_rate": 0.000456542372881356,
15870
+ "loss": 2.1609,
15871
+ "step": 2633
15872
+ },
15873
+ {
15874
+ "epoch": 0.44,
15875
+ "learning_rate": 0.000456406779661017,
15876
+ "loss": 2.2546,
15877
+ "step": 2634
15878
+ },
15879
+ {
15880
+ "epoch": 0.44,
15881
+ "learning_rate": 0.00045627118644067796,
15882
+ "loss": 2.1307,
15883
+ "step": 2635
15884
+ },
15885
+ {
15886
+ "epoch": 0.44,
15887
+ "learning_rate": 0.000456135593220339,
15888
+ "loss": 2.1853,
15889
+ "step": 2636
15890
+ },
15891
+ {
15892
+ "epoch": 0.44,
15893
+ "learning_rate": 0.00045599999999999997,
15894
+ "loss": 2.2144,
15895
+ "step": 2637
15896
+ },
15897
+ {
15898
+ "epoch": 0.44,
15899
+ "learning_rate": 0.00045586440677966106,
15900
+ "loss": 2.2212,
15901
+ "step": 2638
15902
+ },
15903
+ {
15904
+ "epoch": 0.44,
15905
+ "learning_rate": 0.00045572881355932204,
15906
+ "loss": 2.1757,
15907
+ "step": 2639
15908
+ },
15909
+ {
15910
+ "epoch": 0.44,
15911
+ "learning_rate": 0.0004555932203389831,
15912
+ "loss": 2.1837,
15913
+ "step": 2640
15914
+ },
15915
+ {
15916
+ "epoch": 0.44,
15917
+ "learning_rate": 0.00045545762711864406,
15918
+ "loss": 2.1359,
15919
+ "step": 2641
15920
+ },
15921
+ {
15922
+ "epoch": 0.44,
15923
+ "learning_rate": 0.00045532203389830514,
15924
+ "loss": 2.2074,
15925
+ "step": 2642
15926
+ },
15927
+ {
15928
+ "epoch": 0.44,
15929
+ "learning_rate": 0.00045518644067796607,
15930
+ "loss": 2.1438,
15931
+ "step": 2643
15932
+ },
15933
+ {
15934
+ "epoch": 0.44,
15935
+ "learning_rate": 0.00045505084745762716,
15936
+ "loss": 2.1324,
15937
+ "step": 2644
15938
+ },
15939
+ {
15940
+ "epoch": 0.44,
15941
+ "learning_rate": 0.00045491525423728814,
15942
+ "loss": 2.225,
15943
+ "step": 2645
15944
+ },
15945
+ {
15946
+ "epoch": 0.44,
15947
+ "learning_rate": 0.0004547796610169492,
15948
+ "loss": 2.1935,
15949
+ "step": 2646
15950
+ },
15951
+ {
15952
+ "epoch": 0.44,
15953
+ "learning_rate": 0.00045464406779661016,
15954
+ "loss": 2.1445,
15955
+ "step": 2647
15956
+ },
15957
+ {
15958
+ "epoch": 0.44,
15959
+ "learning_rate": 0.00045450847457627125,
15960
+ "loss": 2.2461,
15961
+ "step": 2648
15962
+ },
15963
+ {
15964
+ "epoch": 0.44,
15965
+ "learning_rate": 0.0004543728813559322,
15966
+ "loss": 2.256,
15967
+ "step": 2649
15968
+ },
15969
+ {
15970
+ "epoch": 0.44,
15971
+ "learning_rate": 0.00045423728813559326,
15972
+ "loss": 2.1491,
15973
+ "step": 2650
15974
+ },
15975
+ {
15976
+ "epoch": 0.44,
15977
+ "learning_rate": 0.00045410169491525424,
15978
+ "loss": 2.139,
15979
+ "step": 2651
15980
+ },
15981
+ {
15982
+ "epoch": 0.44,
15983
+ "learning_rate": 0.00045396610169491533,
15984
+ "loss": 2.1354,
15985
+ "step": 2652
15986
+ },
15987
+ {
15988
+ "epoch": 0.44,
15989
+ "learning_rate": 0.0004538305084745763,
15990
+ "loss": 2.2622,
15991
+ "step": 2653
15992
+ },
15993
+ {
15994
+ "epoch": 0.44,
15995
+ "learning_rate": 0.00045369491525423735,
15996
+ "loss": 2.2424,
15997
+ "step": 2654
15998
+ },
15999
+ {
16000
+ "epoch": 0.44,
16001
+ "learning_rate": 0.0004535593220338983,
16002
+ "loss": 2.3128,
16003
+ "step": 2655
16004
+ },
16005
+ {
16006
+ "epoch": 0.44,
16007
+ "learning_rate": 0.0004534237288135593,
16008
+ "loss": 2.2169,
16009
+ "step": 2656
16010
+ },
16011
+ {
16012
+ "epoch": 0.44,
16013
+ "learning_rate": 0.00045328813559322034,
16014
+ "loss": 2.2816,
16015
+ "step": 2657
16016
+ },
16017
+ {
16018
+ "epoch": 0.44,
16019
+ "learning_rate": 0.0004531525423728813,
16020
+ "loss": 2.2674,
16021
+ "step": 2658
16022
+ },
16023
+ {
16024
+ "epoch": 0.44,
16025
+ "learning_rate": 0.0004530169491525424,
16026
+ "loss": 2.1776,
16027
+ "step": 2659
16028
+ },
16029
+ {
16030
+ "epoch": 0.44,
16031
+ "learning_rate": 0.0004528813559322034,
16032
+ "loss": 2.2076,
16033
+ "step": 2660
16034
+ },
16035
+ {
16036
+ "epoch": 0.44,
16037
+ "learning_rate": 0.0004527457627118644,
16038
+ "loss": 2.1904,
16039
+ "step": 2661
16040
+ },
16041
+ {
16042
+ "epoch": 0.44,
16043
+ "learning_rate": 0.0004526101694915254,
16044
+ "loss": 2.175,
16045
+ "step": 2662
16046
+ },
16047
+ {
16048
+ "epoch": 0.44,
16049
+ "learning_rate": 0.0004524745762711865,
16050
+ "loss": 2.2094,
16051
+ "step": 2663
16052
+ },
16053
+ {
16054
+ "epoch": 0.44,
16055
+ "learning_rate": 0.0004523389830508475,
16056
+ "loss": 2.0975,
16057
+ "step": 2664
16058
+ },
16059
+ {
16060
+ "epoch": 0.44,
16061
+ "learning_rate": 0.0004522033898305085,
16062
+ "loss": 2.1961,
16063
+ "step": 2665
16064
+ },
16065
+ {
16066
+ "epoch": 0.44,
16067
+ "learning_rate": 0.0004520677966101695,
16068
+ "loss": 2.1559,
16069
+ "step": 2666
16070
+ },
16071
+ {
16072
+ "epoch": 0.44,
16073
+ "learning_rate": 0.0004519322033898306,
16074
+ "loss": 2.1439,
16075
+ "step": 2667
16076
+ },
16077
+ {
16078
+ "epoch": 0.44,
16079
+ "learning_rate": 0.0004517966101694915,
16080
+ "loss": 2.1496,
16081
+ "step": 2668
16082
+ },
16083
+ {
16084
+ "epoch": 0.44,
16085
+ "learning_rate": 0.0004516610169491526,
16086
+ "loss": 2.1898,
16087
+ "step": 2669
16088
+ },
16089
+ {
16090
+ "epoch": 0.45,
16091
+ "learning_rate": 0.0004515254237288136,
16092
+ "loss": 2.1843,
16093
+ "step": 2670
16094
+ },
16095
+ {
16096
+ "epoch": 0.45,
16097
+ "learning_rate": 0.0004513898305084746,
16098
+ "loss": 2.1139,
16099
+ "step": 2671
16100
+ },
16101
+ {
16102
+ "epoch": 0.45,
16103
+ "learning_rate": 0.0004512542372881356,
16104
+ "loss": 2.1933,
16105
+ "step": 2672
16106
+ },
16107
+ {
16108
+ "epoch": 0.45,
16109
+ "learning_rate": 0.0004511186440677967,
16110
+ "loss": 2.1813,
16111
+ "step": 2673
16112
+ },
16113
+ {
16114
+ "epoch": 0.45,
16115
+ "learning_rate": 0.00045098305084745766,
16116
+ "loss": 2.1483,
16117
+ "step": 2674
16118
+ },
16119
+ {
16120
+ "epoch": 0.45,
16121
+ "learning_rate": 0.0004508474576271187,
16122
+ "loss": 2.1978,
16123
+ "step": 2675
16124
+ },
16125
+ {
16126
+ "epoch": 0.45,
16127
+ "learning_rate": 0.0004507118644067797,
16128
+ "loss": 2.1171,
16129
+ "step": 2676
16130
+ },
16131
+ {
16132
+ "epoch": 0.45,
16133
+ "learning_rate": 0.00045057627118644066,
16134
+ "loss": 2.1211,
16135
+ "step": 2677
16136
+ },
16137
+ {
16138
+ "epoch": 0.45,
16139
+ "learning_rate": 0.00045044067796610175,
16140
+ "loss": 2.1748,
16141
+ "step": 2678
16142
+ },
16143
+ {
16144
+ "epoch": 0.45,
16145
+ "learning_rate": 0.0004503050847457627,
16146
+ "loss": 2.1855,
16147
+ "step": 2679
16148
+ },
16149
+ {
16150
+ "epoch": 0.45,
16151
+ "learning_rate": 0.00045016949152542376,
16152
+ "loss": 2.1501,
16153
+ "step": 2680
16154
+ },
16155
+ {
16156
+ "epoch": 0.45,
16157
+ "learning_rate": 0.00045003389830508474,
16158
+ "loss": 2.2428,
16159
+ "step": 2681
16160
+ },
16161
+ {
16162
+ "epoch": 0.45,
16163
+ "learning_rate": 0.0004498983050847458,
16164
+ "loss": 2.1357,
16165
+ "step": 2682
16166
+ },
16167
+ {
16168
+ "epoch": 0.45,
16169
+ "learning_rate": 0.00044976271186440676,
16170
+ "loss": 2.1712,
16171
+ "step": 2683
16172
+ },
16173
+ {
16174
+ "epoch": 0.45,
16175
+ "learning_rate": 0.00044962711864406785,
16176
+ "loss": 2.1691,
16177
+ "step": 2684
16178
+ },
16179
+ {
16180
+ "epoch": 0.45,
16181
+ "learning_rate": 0.00044949152542372883,
16182
+ "loss": 2.2532,
16183
+ "step": 2685
16184
+ },
16185
+ {
16186
+ "epoch": 0.45,
16187
+ "learning_rate": 0.00044935593220338986,
16188
+ "loss": 2.1859,
16189
+ "step": 2686
16190
+ },
16191
+ {
16192
+ "epoch": 0.45,
16193
+ "learning_rate": 0.00044922033898305084,
16194
+ "loss": 2.2379,
16195
+ "step": 2687
16196
+ },
16197
+ {
16198
+ "epoch": 0.45,
16199
+ "learning_rate": 0.00044908474576271193,
16200
+ "loss": 2.2257,
16201
+ "step": 2688
16202
+ },
16203
+ {
16204
+ "epoch": 0.45,
16205
+ "learning_rate": 0.0004489491525423729,
16206
+ "loss": 2.1396,
16207
+ "step": 2689
16208
+ },
16209
+ {
16210
+ "epoch": 0.45,
16211
+ "learning_rate": 0.00044881355932203395,
16212
+ "loss": 2.2046,
16213
+ "step": 2690
16214
+ },
16215
+ {
16216
+ "epoch": 0.45,
16217
+ "learning_rate": 0.00044867796610169493,
16218
+ "loss": 2.1902,
16219
+ "step": 2691
16220
+ },
16221
+ {
16222
+ "epoch": 0.45,
16223
+ "learning_rate": 0.00044854237288135596,
16224
+ "loss": 2.1787,
16225
+ "step": 2692
16226
+ },
16227
+ {
16228
+ "epoch": 0.45,
16229
+ "learning_rate": 0.00044840677966101694,
16230
+ "loss": 2.2116,
16231
+ "step": 2693
16232
+ },
16233
+ {
16234
+ "epoch": 0.45,
16235
+ "learning_rate": 0.00044827118644067803,
16236
+ "loss": 2.196,
16237
+ "step": 2694
16238
+ },
16239
+ {
16240
+ "epoch": 0.45,
16241
+ "learning_rate": 0.000448135593220339,
16242
+ "loss": 2.1512,
16243
+ "step": 2695
16244
+ },
16245
+ {
16246
+ "epoch": 0.45,
16247
+ "learning_rate": 0.00044800000000000005,
16248
+ "loss": 2.2,
16249
+ "step": 2696
16250
+ },
16251
+ {
16252
+ "epoch": 0.45,
16253
+ "learning_rate": 0.00044786440677966103,
16254
+ "loss": 2.2264,
16255
+ "step": 2697
16256
+ },
16257
+ {
16258
+ "epoch": 0.45,
16259
+ "learning_rate": 0.000447728813559322,
16260
+ "loss": 2.2349,
16261
+ "step": 2698
16262
+ },
16263
+ {
16264
+ "epoch": 0.45,
16265
+ "learning_rate": 0.0004475932203389831,
16266
+ "loss": 2.191,
16267
+ "step": 2699
16268
+ },
16269
+ {
16270
+ "epoch": 0.45,
16271
+ "learning_rate": 0.0004474576271186441,
16272
+ "loss": 2.1978,
16273
+ "step": 2700
16274
+ },
16275
+ {
16276
+ "epoch": 0.45,
16277
+ "learning_rate": 0.0004473220338983051,
16278
+ "loss": 2.2675,
16279
+ "step": 2701
16280
+ },
16281
+ {
16282
+ "epoch": 0.45,
16283
+ "learning_rate": 0.0004471864406779661,
16284
+ "loss": 2.178,
16285
+ "step": 2702
16286
+ },
16287
+ {
16288
+ "epoch": 0.45,
16289
+ "learning_rate": 0.00044705084745762713,
16290
+ "loss": 2.2941,
16291
+ "step": 2703
16292
+ },
16293
+ {
16294
+ "epoch": 0.45,
16295
+ "learning_rate": 0.0004469152542372881,
16296
+ "loss": 2.2429,
16297
+ "step": 2704
16298
+ },
16299
+ {
16300
+ "epoch": 0.45,
16301
+ "learning_rate": 0.0004467796610169492,
16302
+ "loss": 2.2242,
16303
+ "step": 2705
16304
+ },
16305
+ {
16306
+ "epoch": 0.45,
16307
+ "learning_rate": 0.0004466440677966102,
16308
+ "loss": 2.2783,
16309
+ "step": 2706
16310
+ },
16311
+ {
16312
+ "epoch": 0.45,
16313
+ "learning_rate": 0.0004465084745762712,
16314
+ "loss": 2.241,
16315
+ "step": 2707
16316
+ },
16317
+ {
16318
+ "epoch": 0.45,
16319
+ "learning_rate": 0.0004463728813559322,
16320
+ "loss": 2.2416,
16321
+ "step": 2708
16322
+ },
16323
+ {
16324
+ "epoch": 0.45,
16325
+ "learning_rate": 0.0004462372881355933,
16326
+ "loss": 2.1677,
16327
+ "step": 2709
16328
+ },
16329
+ {
16330
+ "epoch": 0.45,
16331
+ "learning_rate": 0.00044610169491525426,
16332
+ "loss": 2.2013,
16333
+ "step": 2710
16334
+ },
16335
+ {
16336
+ "epoch": 0.45,
16337
+ "learning_rate": 0.0004459661016949153,
16338
+ "loss": 2.1218,
16339
+ "step": 2711
16340
+ },
16341
+ {
16342
+ "epoch": 0.45,
16343
+ "learning_rate": 0.0004458305084745763,
16344
+ "loss": 2.1403,
16345
+ "step": 2712
16346
+ },
16347
+ {
16348
+ "epoch": 0.45,
16349
+ "learning_rate": 0.00044569491525423737,
16350
+ "loss": 2.2065,
16351
+ "step": 2713
16352
+ },
16353
+ {
16354
+ "epoch": 0.45,
16355
+ "learning_rate": 0.0004455593220338983,
16356
+ "loss": 2.1355,
16357
+ "step": 2714
16358
+ },
16359
+ {
16360
+ "epoch": 0.45,
16361
+ "learning_rate": 0.0004454237288135594,
16362
+ "loss": 2.1722,
16363
+ "step": 2715
16364
+ },
16365
+ {
16366
+ "epoch": 0.45,
16367
+ "learning_rate": 0.00044528813559322036,
16368
+ "loss": 2.2116,
16369
+ "step": 2716
16370
+ },
16371
+ {
16372
+ "epoch": 0.45,
16373
+ "learning_rate": 0.0004451525423728814,
16374
+ "loss": 2.1821,
16375
+ "step": 2717
16376
+ },
16377
+ {
16378
+ "epoch": 0.45,
16379
+ "learning_rate": 0.0004450169491525424,
16380
+ "loss": 2.2321,
16381
+ "step": 2718
16382
+ },
16383
+ {
16384
+ "epoch": 0.45,
16385
+ "learning_rate": 0.00044488135593220336,
16386
+ "loss": 2.1749,
16387
+ "step": 2719
16388
+ },
16389
+ {
16390
+ "epoch": 0.45,
16391
+ "learning_rate": 0.00044474576271186445,
16392
+ "loss": 2.219,
16393
+ "step": 2720
16394
+ },
16395
+ {
16396
+ "epoch": 0.45,
16397
+ "learning_rate": 0.00044461016949152543,
16398
+ "loss": 2.1305,
16399
+ "step": 2721
16400
+ },
16401
+ {
16402
+ "epoch": 0.45,
16403
+ "learning_rate": 0.00044447457627118646,
16404
+ "loss": 2.1545,
16405
+ "step": 2722
16406
+ },
16407
+ {
16408
+ "epoch": 0.45,
16409
+ "learning_rate": 0.00044433898305084744,
16410
+ "loss": 2.2897,
16411
+ "step": 2723
16412
+ },
16413
+ {
16414
+ "epoch": 0.45,
16415
+ "learning_rate": 0.00044420338983050853,
16416
+ "loss": 2.1997,
16417
+ "step": 2724
16418
+ },
16419
+ {
16420
+ "epoch": 0.45,
16421
+ "learning_rate": 0.00044406779661016946,
16422
+ "loss": 2.2596,
16423
+ "step": 2725
16424
+ },
16425
+ {
16426
+ "epoch": 0.45,
16427
+ "learning_rate": 0.00044393220338983055,
16428
+ "loss": 2.2346,
16429
+ "step": 2726
16430
+ },
16431
+ {
16432
+ "epoch": 0.45,
16433
+ "learning_rate": 0.00044379661016949153,
16434
+ "loss": 2.1678,
16435
+ "step": 2727
16436
+ },
16437
+ {
16438
+ "epoch": 0.45,
16439
+ "learning_rate": 0.00044366101694915256,
16440
+ "loss": 2.1903,
16441
+ "step": 2728
16442
+ },
16443
+ {
16444
+ "epoch": 0.45,
16445
+ "learning_rate": 0.00044352542372881354,
16446
+ "loss": 2.1976,
16447
+ "step": 2729
16448
+ },
16449
+ {
16450
+ "epoch": 0.46,
16451
+ "learning_rate": 0.00044338983050847463,
16452
+ "loss": 2.1003,
16453
+ "step": 2730
16454
+ },
16455
+ {
16456
+ "epoch": 0.46,
16457
+ "learning_rate": 0.0004432542372881356,
16458
+ "loss": 2.0941,
16459
+ "step": 2731
16460
+ },
16461
+ {
16462
+ "epoch": 0.46,
16463
+ "learning_rate": 0.00044311864406779665,
16464
+ "loss": 2.2205,
16465
+ "step": 2732
16466
+ },
16467
+ {
16468
+ "epoch": 0.46,
16469
+ "learning_rate": 0.00044298305084745763,
16470
+ "loss": 2.108,
16471
+ "step": 2733
16472
+ },
16473
+ {
16474
+ "epoch": 0.46,
16475
+ "learning_rate": 0.0004428474576271187,
16476
+ "loss": 2.1486,
16477
+ "step": 2734
16478
+ },
16479
+ {
16480
+ "epoch": 0.46,
16481
+ "learning_rate": 0.0004427118644067797,
16482
+ "loss": 2.1967,
16483
+ "step": 2735
16484
+ },
16485
+ {
16486
+ "epoch": 0.46,
16487
+ "learning_rate": 0.00044257627118644073,
16488
+ "loss": 2.2078,
16489
+ "step": 2736
16490
+ },
16491
+ {
16492
+ "epoch": 0.46,
16493
+ "learning_rate": 0.0004424406779661017,
16494
+ "loss": 2.2254,
16495
+ "step": 2737
16496
+ },
16497
+ {
16498
+ "epoch": 0.46,
16499
+ "learning_rate": 0.00044230508474576275,
16500
+ "loss": 2.2598,
16501
+ "step": 2738
16502
+ },
16503
+ {
16504
+ "epoch": 0.46,
16505
+ "learning_rate": 0.00044216949152542373,
16506
+ "loss": 2.2207,
16507
+ "step": 2739
16508
+ },
16509
+ {
16510
+ "epoch": 0.46,
16511
+ "learning_rate": 0.0004420338983050847,
16512
+ "loss": 2.1272,
16513
+ "step": 2740
16514
+ },
16515
+ {
16516
+ "epoch": 0.46,
16517
+ "learning_rate": 0.0004418983050847458,
16518
+ "loss": 2.2438,
16519
+ "step": 2741
16520
+ },
16521
+ {
16522
+ "epoch": 0.46,
16523
+ "learning_rate": 0.0004417627118644068,
16524
+ "loss": 2.1494,
16525
+ "step": 2742
16526
+ },
16527
+ {
16528
+ "epoch": 0.46,
16529
+ "learning_rate": 0.0004416271186440678,
16530
+ "loss": 2.2687,
16531
+ "step": 2743
16532
+ },
16533
+ {
16534
+ "epoch": 0.46,
16535
+ "learning_rate": 0.0004414915254237288,
16536
+ "loss": 2.1845,
16537
+ "step": 2744
16538
+ },
16539
+ {
16540
+ "epoch": 0.46,
16541
+ "learning_rate": 0.0004413559322033899,
16542
+ "loss": 2.1781,
16543
+ "step": 2745
16544
+ },
16545
+ {
16546
+ "epoch": 0.46,
16547
+ "learning_rate": 0.00044122033898305087,
16548
+ "loss": 2.1787,
16549
+ "step": 2746
16550
+ },
16551
+ {
16552
+ "epoch": 0.46,
16553
+ "learning_rate": 0.0004410847457627119,
16554
+ "loss": 2.1107,
16555
+ "step": 2747
16556
+ },
16557
+ {
16558
+ "epoch": 0.46,
16559
+ "learning_rate": 0.0004409491525423729,
16560
+ "loss": 2.1027,
16561
+ "step": 2748
16562
+ },
16563
+ {
16564
+ "epoch": 0.46,
16565
+ "learning_rate": 0.0004408135593220339,
16566
+ "loss": 2.2099,
16567
+ "step": 2749
16568
+ },
16569
+ {
16570
+ "epoch": 0.46,
16571
+ "learning_rate": 0.0004406779661016949,
16572
+ "loss": 2.1507,
16573
+ "step": 2750
16574
+ },
16575
+ {
16576
+ "epoch": 0.46,
16577
+ "learning_rate": 0.000440542372881356,
16578
+ "loss": 2.168,
16579
+ "step": 2751
16580
+ },
16581
+ {
16582
+ "epoch": 0.46,
16583
+ "learning_rate": 0.00044040677966101697,
16584
+ "loss": 2.1331,
16585
+ "step": 2752
16586
+ },
16587
+ {
16588
+ "epoch": 0.46,
16589
+ "learning_rate": 0.000440271186440678,
16590
+ "loss": 2.286,
16591
+ "step": 2753
16592
+ },
16593
+ {
16594
+ "epoch": 0.46,
16595
+ "learning_rate": 0.000440135593220339,
16596
+ "loss": 2.1822,
16597
+ "step": 2754
16598
+ },
16599
+ {
16600
+ "epoch": 0.46,
16601
+ "learning_rate": 0.00044000000000000007,
16602
+ "loss": 2.1692,
16603
+ "step": 2755
16604
+ },
16605
+ {
16606
+ "epoch": 0.46,
16607
+ "learning_rate": 0.00043986440677966105,
16608
+ "loss": 2.1917,
16609
+ "step": 2756
16610
+ },
16611
+ {
16612
+ "epoch": 0.46,
16613
+ "learning_rate": 0.0004397288135593221,
16614
+ "loss": 2.1979,
16615
+ "step": 2757
16616
+ },
16617
+ {
16618
+ "epoch": 0.46,
16619
+ "learning_rate": 0.00043959322033898307,
16620
+ "loss": 2.1971,
16621
+ "step": 2758
16622
+ },
16623
+ {
16624
+ "epoch": 0.46,
16625
+ "learning_rate": 0.00043945762711864415,
16626
+ "loss": 2.1208,
16627
+ "step": 2759
16628
+ },
16629
+ {
16630
+ "epoch": 0.46,
16631
+ "learning_rate": 0.0004393220338983051,
16632
+ "loss": 2.2626,
16633
+ "step": 2760
16634
+ },
16635
+ {
16636
+ "epoch": 0.46,
16637
+ "learning_rate": 0.00043918644067796606,
16638
+ "loss": 2.2041,
16639
+ "step": 2761
16640
+ },
16641
+ {
16642
+ "epoch": 0.46,
16643
+ "learning_rate": 0.00043905084745762715,
16644
+ "loss": 2.1519,
16645
+ "step": 2762
16646
+ },
16647
+ {
16648
+ "epoch": 0.46,
16649
+ "learning_rate": 0.00043891525423728813,
16650
+ "loss": 2.2102,
16651
+ "step": 2763
16652
+ },
16653
+ {
16654
+ "epoch": 0.46,
16655
+ "learning_rate": 0.00043877966101694917,
16656
+ "loss": 2.1933,
16657
+ "step": 2764
16658
+ },
16659
+ {
16660
+ "epoch": 0.46,
16661
+ "learning_rate": 0.00043864406779661015,
16662
+ "loss": 2.124,
16663
+ "step": 2765
16664
+ },
16665
+ {
16666
+ "epoch": 0.46,
16667
+ "learning_rate": 0.00043850847457627124,
16668
+ "loss": 2.1205,
16669
+ "step": 2766
16670
+ },
16671
+ {
16672
+ "epoch": 0.46,
16673
+ "learning_rate": 0.0004383728813559322,
16674
+ "loss": 2.306,
16675
+ "step": 2767
16676
+ },
16677
+ {
16678
+ "epoch": 0.46,
16679
+ "learning_rate": 0.00043823728813559325,
16680
+ "loss": 2.196,
16681
+ "step": 2768
16682
+ },
16683
+ {
16684
+ "epoch": 0.46,
16685
+ "learning_rate": 0.00043810169491525423,
16686
+ "loss": 2.2451,
16687
+ "step": 2769
16688
+ },
16689
+ {
16690
+ "epoch": 0.46,
16691
+ "learning_rate": 0.0004379661016949153,
16692
+ "loss": 2.1794,
16693
+ "step": 2770
16694
+ },
16695
+ {
16696
+ "epoch": 0.46,
16697
+ "learning_rate": 0.00043783050847457625,
16698
+ "loss": 2.1634,
16699
+ "step": 2771
16700
+ },
16701
+ {
16702
+ "epoch": 0.46,
16703
+ "learning_rate": 0.00043769491525423734,
16704
+ "loss": 2.1123,
16705
+ "step": 2772
16706
+ },
16707
+ {
16708
+ "epoch": 0.46,
16709
+ "learning_rate": 0.0004375593220338983,
16710
+ "loss": 2.2193,
16711
+ "step": 2773
16712
+ },
16713
+ {
16714
+ "epoch": 0.46,
16715
+ "learning_rate": 0.00043742372881355935,
16716
+ "loss": 2.1491,
16717
+ "step": 2774
16718
+ },
16719
+ {
16720
+ "epoch": 0.46,
16721
+ "learning_rate": 0.00043728813559322033,
16722
+ "loss": 2.2288,
16723
+ "step": 2775
16724
+ },
16725
+ {
16726
+ "epoch": 0.46,
16727
+ "learning_rate": 0.0004371525423728814,
16728
+ "loss": 2.1587,
16729
+ "step": 2776
16730
+ },
16731
+ {
16732
+ "epoch": 0.46,
16733
+ "learning_rate": 0.0004370169491525424,
16734
+ "loss": 2.2145,
16735
+ "step": 2777
16736
+ },
16737
+ {
16738
+ "epoch": 0.46,
16739
+ "learning_rate": 0.00043688135593220344,
16740
+ "loss": 2.1669,
16741
+ "step": 2778
16742
+ },
16743
+ {
16744
+ "epoch": 0.46,
16745
+ "learning_rate": 0.0004367457627118644,
16746
+ "loss": 2.1546,
16747
+ "step": 2779
16748
+ },
16749
+ {
16750
+ "epoch": 0.46,
16751
+ "learning_rate": 0.0004366101694915254,
16752
+ "loss": 2.1785,
16753
+ "step": 2780
16754
+ },
16755
+ {
16756
+ "epoch": 0.46,
16757
+ "learning_rate": 0.0004364745762711865,
16758
+ "loss": 2.1258,
16759
+ "step": 2781
16760
+ },
16761
+ {
16762
+ "epoch": 0.46,
16763
+ "learning_rate": 0.0004363389830508474,
16764
+ "loss": 2.2338,
16765
+ "step": 2782
16766
+ },
16767
+ {
16768
+ "epoch": 0.46,
16769
+ "learning_rate": 0.0004362033898305085,
16770
+ "loss": 2.1971,
16771
+ "step": 2783
16772
+ },
16773
+ {
16774
+ "epoch": 0.46,
16775
+ "learning_rate": 0.0004360677966101695,
16776
+ "loss": 2.1648,
16777
+ "step": 2784
16778
+ },
16779
+ {
16780
+ "epoch": 0.46,
16781
+ "learning_rate": 0.0004359322033898305,
16782
+ "loss": 2.1698,
16783
+ "step": 2785
16784
+ },
16785
+ {
16786
+ "epoch": 0.46,
16787
+ "learning_rate": 0.0004357966101694915,
16788
+ "loss": 2.1884,
16789
+ "step": 2786
16790
+ },
16791
+ {
16792
+ "epoch": 0.46,
16793
+ "learning_rate": 0.0004356610169491526,
16794
+ "loss": 2.1965,
16795
+ "step": 2787
16796
+ },
16797
+ {
16798
+ "epoch": 0.46,
16799
+ "learning_rate": 0.00043552542372881357,
16800
+ "loss": 2.263,
16801
+ "step": 2788
16802
+ },
16803
+ {
16804
+ "epoch": 0.46,
16805
+ "learning_rate": 0.0004353898305084746,
16806
+ "loss": 2.2093,
16807
+ "step": 2789
16808
+ },
16809
+ {
16810
+ "epoch": 0.47,
16811
+ "learning_rate": 0.0004352542372881356,
16812
+ "loss": 2.1458,
16813
+ "step": 2790
16814
+ },
16815
+ {
16816
+ "epoch": 0.47,
16817
+ "learning_rate": 0.00043511864406779667,
16818
+ "loss": 2.1612,
16819
+ "step": 2791
16820
+ },
16821
+ {
16822
+ "epoch": 0.47,
16823
+ "learning_rate": 0.00043498305084745765,
16824
+ "loss": 2.2038,
16825
+ "step": 2792
16826
+ },
16827
+ {
16828
+ "epoch": 0.47,
16829
+ "learning_rate": 0.0004348474576271187,
16830
+ "loss": 2.1496,
16831
+ "step": 2793
16832
+ },
16833
+ {
16834
+ "epoch": 0.47,
16835
+ "learning_rate": 0.00043471186440677967,
16836
+ "loss": 2.1516,
16837
+ "step": 2794
16838
+ },
16839
+ {
16840
+ "epoch": 0.47,
16841
+ "learning_rate": 0.00043457627118644076,
16842
+ "loss": 2.1516,
16843
+ "step": 2795
16844
+ },
16845
+ {
16846
+ "epoch": 0.47,
16847
+ "learning_rate": 0.0004344406779661017,
16848
+ "loss": 2.2392,
16849
+ "step": 2796
16850
+ },
16851
+ {
16852
+ "epoch": 0.47,
16853
+ "learning_rate": 0.00043430508474576277,
16854
+ "loss": 2.1482,
16855
+ "step": 2797
16856
+ },
16857
+ {
16858
+ "epoch": 0.47,
16859
+ "learning_rate": 0.00043416949152542375,
16860
+ "loss": 2.1994,
16861
+ "step": 2798
16862
+ },
16863
+ {
16864
+ "epoch": 0.47,
16865
+ "learning_rate": 0.0004340338983050848,
16866
+ "loss": 2.1625,
16867
+ "step": 2799
16868
+ },
16869
+ {
16870
+ "epoch": 0.47,
16871
+ "learning_rate": 0.00043389830508474577,
16872
+ "loss": 2.1373,
16873
+ "step": 2800
16874
+ },
16875
+ {
16876
+ "epoch": 0.47,
16877
+ "learning_rate": 0.00043376271186440675,
16878
+ "loss": 2.2308,
16879
+ "step": 2801
16880
+ },
16881
+ {
16882
+ "epoch": 0.47,
16883
+ "learning_rate": 0.00043362711864406784,
16884
+ "loss": 2.132,
16885
+ "step": 2802
16886
+ },
16887
+ {
16888
+ "epoch": 0.47,
16889
+ "learning_rate": 0.0004334915254237288,
16890
+ "loss": 2.1553,
16891
+ "step": 2803
16892
+ },
16893
+ {
16894
+ "epoch": 0.47,
16895
+ "learning_rate": 0.00043335593220338985,
16896
+ "loss": 2.1216,
16897
+ "step": 2804
16898
+ },
16899
+ {
16900
+ "epoch": 0.47,
16901
+ "learning_rate": 0.00043322033898305083,
16902
+ "loss": 2.1978,
16903
+ "step": 2805
16904
+ },
16905
+ {
16906
+ "epoch": 0.47,
16907
+ "learning_rate": 0.0004330847457627119,
16908
+ "loss": 2.2046,
16909
+ "step": 2806
16910
+ },
16911
+ {
16912
+ "epoch": 0.47,
16913
+ "learning_rate": 0.00043294915254237285,
16914
+ "loss": 2.1302,
16915
+ "step": 2807
16916
+ },
16917
+ {
16918
+ "epoch": 0.47,
16919
+ "learning_rate": 0.00043281355932203394,
16920
+ "loss": 2.1334,
16921
+ "step": 2808
16922
+ },
16923
+ {
16924
+ "epoch": 0.47,
16925
+ "learning_rate": 0.0004326779661016949,
16926
+ "loss": 2.1931,
16927
+ "step": 2809
16928
+ },
16929
+ {
16930
+ "epoch": 0.47,
16931
+ "learning_rate": 0.00043254237288135595,
16932
+ "loss": 2.0879,
16933
+ "step": 2810
16934
+ },
16935
+ {
16936
+ "epoch": 0.47,
16937
+ "learning_rate": 0.00043240677966101693,
16938
+ "loss": 2.1784,
16939
+ "step": 2811
16940
+ },
16941
+ {
16942
+ "epoch": 0.47,
16943
+ "learning_rate": 0.000432271186440678,
16944
+ "loss": 2.18,
16945
+ "step": 2812
16946
+ },
16947
+ {
16948
+ "epoch": 0.47,
16949
+ "learning_rate": 0.000432135593220339,
16950
+ "loss": 2.1735,
16951
+ "step": 2813
16952
+ },
16953
+ {
16954
+ "epoch": 0.47,
16955
+ "learning_rate": 0.00043200000000000004,
16956
+ "loss": 2.0985,
16957
+ "step": 2814
16958
+ },
16959
+ {
16960
+ "epoch": 0.47,
16961
+ "learning_rate": 0.000431864406779661,
16962
+ "loss": 2.1894,
16963
+ "step": 2815
16964
+ },
16965
+ {
16966
+ "epoch": 0.47,
16967
+ "learning_rate": 0.0004317288135593221,
16968
+ "loss": 2.1497,
16969
+ "step": 2816
16970
+ },
16971
+ {
16972
+ "epoch": 0.47,
16973
+ "learning_rate": 0.0004315932203389831,
16974
+ "loss": 2.1981,
16975
+ "step": 2817
16976
+ },
16977
+ {
16978
+ "epoch": 0.47,
16979
+ "learning_rate": 0.0004314576271186441,
16980
+ "loss": 2.2008,
16981
+ "step": 2818
16982
+ },
16983
+ {
16984
+ "epoch": 0.47,
16985
+ "learning_rate": 0.0004313220338983051,
16986
+ "loss": 2.1571,
16987
+ "step": 2819
16988
+ },
16989
+ {
16990
+ "epoch": 0.47,
16991
+ "learning_rate": 0.00043118644067796614,
16992
+ "loss": 2.2743,
16993
+ "step": 2820
16994
+ },
16995
+ {
16996
+ "epoch": 0.47,
16997
+ "learning_rate": 0.0004310508474576271,
16998
+ "loss": 2.1833,
16999
+ "step": 2821
17000
+ },
17001
+ {
17002
+ "epoch": 0.47,
17003
+ "learning_rate": 0.0004309152542372881,
17004
+ "loss": 2.1796,
17005
+ "step": 2822
17006
+ },
17007
+ {
17008
+ "epoch": 0.47,
17009
+ "learning_rate": 0.0004307796610169492,
17010
+ "loss": 2.2698,
17011
+ "step": 2823
17012
+ },
17013
+ {
17014
+ "epoch": 0.47,
17015
+ "learning_rate": 0.00043064406779661017,
17016
+ "loss": 2.2112,
17017
+ "step": 2824
17018
+ },
17019
+ {
17020
+ "epoch": 0.47,
17021
+ "learning_rate": 0.0004305084745762712,
17022
+ "loss": 2.1944,
17023
+ "step": 2825
17024
+ },
17025
+ {
17026
+ "epoch": 0.47,
17027
+ "learning_rate": 0.0004303728813559322,
17028
+ "loss": 2.1812,
17029
+ "step": 2826
17030
+ },
17031
+ {
17032
+ "epoch": 0.47,
17033
+ "learning_rate": 0.0004302372881355933,
17034
+ "loss": 2.2353,
17035
+ "step": 2827
17036
+ },
17037
+ {
17038
+ "epoch": 0.47,
17039
+ "learning_rate": 0.00043010169491525425,
17040
+ "loss": 2.2165,
17041
+ "step": 2828
17042
+ },
17043
+ {
17044
+ "epoch": 0.47,
17045
+ "learning_rate": 0.0004299661016949153,
17046
+ "loss": 2.1878,
17047
+ "step": 2829
17048
+ },
17049
+ {
17050
+ "epoch": 0.47,
17051
+ "learning_rate": 0.00042983050847457627,
17052
+ "loss": 2.1921,
17053
+ "step": 2830
17054
+ },
17055
+ {
17056
+ "epoch": 0.47,
17057
+ "learning_rate": 0.0004296949152542373,
17058
+ "loss": 2.1842,
17059
+ "step": 2831
17060
+ },
17061
+ {
17062
+ "epoch": 0.47,
17063
+ "learning_rate": 0.0004295593220338983,
17064
+ "loss": 2.2485,
17065
+ "step": 2832
17066
+ },
17067
+ {
17068
+ "epoch": 0.47,
17069
+ "learning_rate": 0.0004294237288135594,
17070
+ "loss": 2.1801,
17071
+ "step": 2833
17072
+ },
17073
+ {
17074
+ "epoch": 0.47,
17075
+ "learning_rate": 0.00042928813559322035,
17076
+ "loss": 2.2013,
17077
+ "step": 2834
17078
+ },
17079
+ {
17080
+ "epoch": 0.47,
17081
+ "learning_rate": 0.0004291525423728814,
17082
+ "loss": 2.2313,
17083
+ "step": 2835
17084
+ },
17085
+ {
17086
+ "epoch": 0.47,
17087
+ "learning_rate": 0.00042901694915254237,
17088
+ "loss": 2.1955,
17089
+ "step": 2836
17090
+ },
17091
+ {
17092
+ "epoch": 0.47,
17093
+ "learning_rate": 0.00042888135593220346,
17094
+ "loss": 2.1234,
17095
+ "step": 2837
17096
+ },
17097
+ {
17098
+ "epoch": 0.47,
17099
+ "learning_rate": 0.00042874576271186444,
17100
+ "loss": 2.1559,
17101
+ "step": 2838
17102
+ },
17103
+ {
17104
+ "epoch": 0.47,
17105
+ "learning_rate": 0.0004286101694915255,
17106
+ "loss": 2.1444,
17107
+ "step": 2839
17108
+ },
17109
+ {
17110
+ "epoch": 0.47,
17111
+ "learning_rate": 0.00042847457627118645,
17112
+ "loss": 2.1511,
17113
+ "step": 2840
17114
+ },
17115
+ {
17116
+ "epoch": 0.47,
17117
+ "learning_rate": 0.00042833898305084754,
17118
+ "loss": 2.1536,
17119
+ "step": 2841
17120
+ },
17121
+ {
17122
+ "epoch": 0.47,
17123
+ "learning_rate": 0.00042820338983050847,
17124
+ "loss": 2.2474,
17125
+ "step": 2842
17126
+ },
17127
+ {
17128
+ "epoch": 0.47,
17129
+ "learning_rate": 0.00042806779661016945,
17130
+ "loss": 2.1643,
17131
+ "step": 2843
17132
+ },
17133
+ {
17134
+ "epoch": 0.47,
17135
+ "learning_rate": 0.00042793220338983054,
17136
+ "loss": 2.1671,
17137
+ "step": 2844
17138
+ },
17139
+ {
17140
+ "epoch": 0.47,
17141
+ "learning_rate": 0.0004277966101694915,
17142
+ "loss": 2.1588,
17143
+ "step": 2845
17144
+ },
17145
+ {
17146
+ "epoch": 0.47,
17147
+ "learning_rate": 0.00042766101694915255,
17148
+ "loss": 2.1799,
17149
+ "step": 2846
17150
+ },
17151
+ {
17152
+ "epoch": 0.47,
17153
+ "learning_rate": 0.00042752542372881354,
17154
+ "loss": 2.1565,
17155
+ "step": 2847
17156
+ },
17157
+ {
17158
+ "epoch": 0.47,
17159
+ "learning_rate": 0.0004273898305084746,
17160
+ "loss": 2.1751,
17161
+ "step": 2848
17162
+ },
17163
+ {
17164
+ "epoch": 0.47,
17165
+ "learning_rate": 0.0004272542372881356,
17166
+ "loss": 2.1091,
17167
+ "step": 2849
17168
+ },
17169
+ {
17170
+ "epoch": 0.47,
17171
+ "learning_rate": 0.00042711864406779664,
17172
+ "loss": 2.2063,
17173
+ "step": 2850
17174
+ },
17175
+ {
17176
+ "epoch": 0.48,
17177
+ "learning_rate": 0.0004269830508474576,
17178
+ "loss": 2.0999,
17179
+ "step": 2851
17180
+ },
17181
+ {
17182
+ "epoch": 0.48,
17183
+ "learning_rate": 0.0004268474576271187,
17184
+ "loss": 2.1041,
17185
+ "step": 2852
17186
+ },
17187
+ {
17188
+ "epoch": 0.48,
17189
+ "learning_rate": 0.00042671186440677964,
17190
+ "loss": 2.1839,
17191
+ "step": 2853
17192
+ },
17193
+ {
17194
+ "epoch": 0.48,
17195
+ "learning_rate": 0.0004265762711864407,
17196
+ "loss": 2.0895,
17197
+ "step": 2854
17198
+ },
17199
+ {
17200
+ "epoch": 0.48,
17201
+ "learning_rate": 0.0004264406779661017,
17202
+ "loss": 2.1978,
17203
+ "step": 2855
17204
+ },
17205
+ {
17206
+ "epoch": 0.48,
17207
+ "learning_rate": 0.00042630508474576274,
17208
+ "loss": 2.1899,
17209
+ "step": 2856
17210
+ },
17211
+ {
17212
+ "epoch": 0.48,
17213
+ "learning_rate": 0.0004261694915254237,
17214
+ "loss": 2.1461,
17215
+ "step": 2857
17216
+ },
17217
+ {
17218
+ "epoch": 0.48,
17219
+ "learning_rate": 0.0004260338983050848,
17220
+ "loss": 2.2126,
17221
+ "step": 2858
17222
+ },
17223
+ {
17224
+ "epoch": 0.48,
17225
+ "learning_rate": 0.0004258983050847458,
17226
+ "loss": 2.1616,
17227
+ "step": 2859
17228
+ },
17229
+ {
17230
+ "epoch": 0.48,
17231
+ "learning_rate": 0.0004257627118644068,
17232
+ "loss": 2.1526,
17233
+ "step": 2860
17234
+ },
17235
+ {
17236
+ "epoch": 0.48,
17237
+ "learning_rate": 0.0004256271186440678,
17238
+ "loss": 2.143,
17239
+ "step": 2861
17240
+ },
17241
+ {
17242
+ "epoch": 0.48,
17243
+ "learning_rate": 0.0004254915254237289,
17244
+ "loss": 2.1868,
17245
+ "step": 2862
17246
+ },
17247
+ {
17248
+ "epoch": 0.48,
17249
+ "learning_rate": 0.0004253559322033899,
17250
+ "loss": 2.0951,
17251
+ "step": 2863
17252
+ },
17253
+ {
17254
+ "epoch": 0.48,
17255
+ "learning_rate": 0.0004252203389830508,
17256
+ "loss": 2.206,
17257
+ "step": 2864
17258
+ },
17259
+ {
17260
+ "epoch": 0.48,
17261
+ "learning_rate": 0.0004250847457627119,
17262
+ "loss": 2.1904,
17263
+ "step": 2865
17264
+ },
17265
+ {
17266
+ "epoch": 0.48,
17267
+ "learning_rate": 0.00042494915254237287,
17268
+ "loss": 2.2698,
17269
+ "step": 2866
17270
+ },
17271
+ {
17272
+ "epoch": 0.48,
17273
+ "learning_rate": 0.0004248135593220339,
17274
+ "loss": 2.1582,
17275
+ "step": 2867
17276
+ },
17277
+ {
17278
+ "epoch": 0.48,
17279
+ "learning_rate": 0.0004246779661016949,
17280
+ "loss": 2.1616,
17281
+ "step": 2868
17282
+ },
17283
+ {
17284
+ "epoch": 0.48,
17285
+ "learning_rate": 0.000424542372881356,
17286
+ "loss": 2.2028,
17287
+ "step": 2869
17288
+ },
17289
+ {
17290
+ "epoch": 0.48,
17291
+ "learning_rate": 0.00042440677966101696,
17292
+ "loss": 2.2078,
17293
+ "step": 2870
17294
+ },
17295
+ {
17296
+ "epoch": 0.48,
17297
+ "learning_rate": 0.000424271186440678,
17298
+ "loss": 2.1194,
17299
+ "step": 2871
17300
+ },
17301
+ {
17302
+ "epoch": 0.48,
17303
+ "learning_rate": 0.00042413559322033897,
17304
+ "loss": 2.1585,
17305
+ "step": 2872
17306
+ },
17307
+ {
17308
+ "epoch": 0.48,
17309
+ "learning_rate": 0.00042400000000000006,
17310
+ "loss": 2.1722,
17311
+ "step": 2873
17312
+ },
17313
+ {
17314
+ "epoch": 0.48,
17315
+ "learning_rate": 0.00042386440677966104,
17316
+ "loss": 2.1138,
17317
+ "step": 2874
17318
+ },
17319
+ {
17320
+ "epoch": 0.48,
17321
+ "learning_rate": 0.0004237288135593221,
17322
+ "loss": 2.118,
17323
+ "step": 2875
17324
+ },
17325
+ {
17326
+ "epoch": 0.48,
17327
+ "learning_rate": 0.00042359322033898306,
17328
+ "loss": 2.1648,
17329
+ "step": 2876
17330
+ },
17331
+ {
17332
+ "epoch": 0.48,
17333
+ "learning_rate": 0.0004234576271186441,
17334
+ "loss": 2.1642,
17335
+ "step": 2877
17336
+ },
17337
+ {
17338
+ "epoch": 0.48,
17339
+ "learning_rate": 0.00042332203389830507,
17340
+ "loss": 2.1489,
17341
+ "step": 2878
17342
+ },
17343
+ {
17344
+ "epoch": 0.48,
17345
+ "learning_rate": 0.00042318644067796616,
17346
+ "loss": 2.1387,
17347
+ "step": 2879
17348
+ },
17349
+ {
17350
+ "epoch": 0.48,
17351
+ "learning_rate": 0.00042305084745762714,
17352
+ "loss": 2.1737,
17353
+ "step": 2880
17354
+ },
17355
+ {
17356
+ "epoch": 0.48,
17357
+ "learning_rate": 0.0004229152542372882,
17358
+ "loss": 2.1702,
17359
+ "step": 2881
17360
+ },
17361
+ {
17362
+ "epoch": 0.48,
17363
+ "learning_rate": 0.00042277966101694916,
17364
+ "loss": 2.0842,
17365
+ "step": 2882
17366
+ },
17367
+ {
17368
+ "epoch": 0.48,
17369
+ "learning_rate": 0.00042264406779661025,
17370
+ "loss": 2.1522,
17371
+ "step": 2883
17372
+ },
17373
+ {
17374
+ "epoch": 0.48,
17375
+ "learning_rate": 0.0004225084745762712,
17376
+ "loss": 2.2546,
17377
+ "step": 2884
17378
+ },
17379
+ {
17380
+ "epoch": 0.48,
17381
+ "learning_rate": 0.0004223728813559322,
17382
+ "loss": 2.1756,
17383
+ "step": 2885
17384
+ },
17385
+ {
17386
+ "epoch": 0.48,
17387
+ "learning_rate": 0.00042223728813559324,
17388
+ "loss": 2.2049,
17389
+ "step": 2886
17390
+ },
17391
+ {
17392
+ "epoch": 0.48,
17393
+ "learning_rate": 0.0004221016949152542,
17394
+ "loss": 2.1915,
17395
+ "step": 2887
17396
+ },
17397
+ {
17398
+ "epoch": 0.48,
17399
+ "learning_rate": 0.00042196610169491526,
17400
+ "loss": 2.2158,
17401
+ "step": 2888
17402
+ },
17403
+ {
17404
+ "epoch": 0.48,
17405
+ "learning_rate": 0.00042183050847457624,
17406
+ "loss": 2.1971,
17407
+ "step": 2889
17408
+ },
17409
+ {
17410
+ "epoch": 0.48,
17411
+ "learning_rate": 0.0004216949152542373,
17412
+ "loss": 2.1891,
17413
+ "step": 2890
17414
+ },
17415
+ {
17416
+ "epoch": 0.48,
17417
+ "learning_rate": 0.0004215593220338983,
17418
+ "loss": 2.1599,
17419
+ "step": 2891
17420
+ },
17421
+ {
17422
+ "epoch": 0.48,
17423
+ "learning_rate": 0.00042142372881355934,
17424
+ "loss": 2.2192,
17425
+ "step": 2892
17426
+ },
17427
+ {
17428
+ "epoch": 0.48,
17429
+ "learning_rate": 0.0004212881355932203,
17430
+ "loss": 2.1888,
17431
+ "step": 2893
17432
+ },
17433
+ {
17434
+ "epoch": 0.48,
17435
+ "learning_rate": 0.0004211525423728814,
17436
+ "loss": 2.1549,
17437
+ "step": 2894
17438
+ },
17439
+ {
17440
+ "epoch": 0.48,
17441
+ "learning_rate": 0.0004210169491525424,
17442
+ "loss": 2.0905,
17443
+ "step": 2895
17444
+ },
17445
+ {
17446
+ "epoch": 0.48,
17447
+ "learning_rate": 0.0004208813559322034,
17448
+ "loss": 2.1428,
17449
+ "step": 2896
17450
+ },
17451
+ {
17452
+ "epoch": 0.48,
17453
+ "learning_rate": 0.0004207457627118644,
17454
+ "loss": 2.0789,
17455
+ "step": 2897
17456
+ },
17457
+ {
17458
+ "epoch": 0.48,
17459
+ "learning_rate": 0.0004206101694915255,
17460
+ "loss": 2.2274,
17461
+ "step": 2898
17462
+ },
17463
+ {
17464
+ "epoch": 0.48,
17465
+ "learning_rate": 0.0004204745762711864,
17466
+ "loss": 2.2216,
17467
+ "step": 2899
17468
+ },
17469
+ {
17470
+ "epoch": 0.48,
17471
+ "learning_rate": 0.0004203389830508475,
17472
+ "loss": 2.1846,
17473
+ "step": 2900
17474
+ },
17475
+ {
17476
+ "epoch": 0.48,
17477
+ "learning_rate": 0.0004202033898305085,
17478
+ "loss": 2.2441,
17479
+ "step": 2901
17480
+ },
17481
+ {
17482
+ "epoch": 0.48,
17483
+ "learning_rate": 0.00042006779661016953,
17484
+ "loss": 2.1295,
17485
+ "step": 2902
17486
+ },
17487
+ {
17488
+ "epoch": 0.48,
17489
+ "learning_rate": 0.0004199322033898305,
17490
+ "loss": 2.2114,
17491
+ "step": 2903
17492
+ },
17493
+ {
17494
+ "epoch": 0.48,
17495
+ "learning_rate": 0.0004197966101694916,
17496
+ "loss": 2.1641,
17497
+ "step": 2904
17498
+ },
17499
+ {
17500
+ "epoch": 0.48,
17501
+ "learning_rate": 0.0004196610169491526,
17502
+ "loss": 2.1816,
17503
+ "step": 2905
17504
+ },
17505
+ {
17506
+ "epoch": 0.48,
17507
+ "learning_rate": 0.00041952542372881356,
17508
+ "loss": 2.2033,
17509
+ "step": 2906
17510
+ },
17511
+ {
17512
+ "epoch": 0.48,
17513
+ "learning_rate": 0.0004193898305084746,
17514
+ "loss": 2.1715,
17515
+ "step": 2907
17516
+ },
17517
+ {
17518
+ "epoch": 0.48,
17519
+ "learning_rate": 0.0004192542372881356,
17520
+ "loss": 2.1575,
17521
+ "step": 2908
17522
+ },
17523
+ {
17524
+ "epoch": 0.48,
17525
+ "learning_rate": 0.00041911864406779666,
17526
+ "loss": 2.2248,
17527
+ "step": 2909
17528
+ },
17529
+ {
17530
+ "epoch": 0.48,
17531
+ "learning_rate": 0.0004189830508474576,
17532
+ "loss": 2.281,
17533
+ "step": 2910
17534
+ },
17535
+ {
17536
+ "epoch": 0.49,
17537
+ "learning_rate": 0.0004188474576271187,
17538
+ "loss": 2.2173,
17539
+ "step": 2911
17540
+ },
17541
+ {
17542
+ "epoch": 0.49,
17543
+ "learning_rate": 0.00041871186440677966,
17544
+ "loss": 2.1685,
17545
+ "step": 2912
17546
+ },
17547
+ {
17548
+ "epoch": 0.49,
17549
+ "learning_rate": 0.0004185762711864407,
17550
+ "loss": 2.1741,
17551
+ "step": 2913
17552
+ },
17553
+ {
17554
+ "epoch": 0.49,
17555
+ "learning_rate": 0.0004184406779661017,
17556
+ "loss": 2.1727,
17557
+ "step": 2914
17558
+ },
17559
+ {
17560
+ "epoch": 0.49,
17561
+ "learning_rate": 0.00041830508474576276,
17562
+ "loss": 2.1974,
17563
+ "step": 2915
17564
+ },
17565
+ {
17566
+ "epoch": 0.49,
17567
+ "learning_rate": 0.00041816949152542374,
17568
+ "loss": 2.1526,
17569
+ "step": 2916
17570
+ },
17571
+ {
17572
+ "epoch": 0.49,
17573
+ "learning_rate": 0.0004180338983050848,
17574
+ "loss": 2.2312,
17575
+ "step": 2917
17576
+ },
17577
+ {
17578
+ "epoch": 0.49,
17579
+ "learning_rate": 0.00041789830508474576,
17580
+ "loss": 2.1809,
17581
+ "step": 2918
17582
+ },
17583
+ {
17584
+ "epoch": 0.49,
17585
+ "learning_rate": 0.00041776271186440685,
17586
+ "loss": 2.2041,
17587
+ "step": 2919
17588
+ },
17589
+ {
17590
+ "epoch": 0.49,
17591
+ "learning_rate": 0.00041762711864406783,
17592
+ "loss": 2.1479,
17593
+ "step": 2920
17594
+ },
17595
+ {
17596
+ "epoch": 0.49,
17597
+ "learning_rate": 0.00041749152542372886,
17598
+ "loss": 2.1227,
17599
+ "step": 2921
17600
+ },
17601
+ {
17602
+ "epoch": 0.49,
17603
+ "learning_rate": 0.00041735593220338984,
17604
+ "loss": 2.2373,
17605
+ "step": 2922
17606
+ },
17607
+ {
17608
+ "epoch": 0.49,
17609
+ "learning_rate": 0.00041722033898305093,
17610
+ "loss": 2.1932,
17611
+ "step": 2923
17612
+ },
17613
+ {
17614
+ "epoch": 0.49,
17615
+ "learning_rate": 0.00041708474576271186,
17616
+ "loss": 2.1475,
17617
+ "step": 2924
17618
+ },
17619
+ {
17620
+ "epoch": 0.49,
17621
+ "learning_rate": 0.00041694915254237295,
17622
+ "loss": 2.229,
17623
+ "step": 2925
17624
+ },
17625
+ {
17626
+ "epoch": 0.49,
17627
+ "learning_rate": 0.00041681355932203393,
17628
+ "loss": 2.2022,
17629
+ "step": 2926
17630
+ },
17631
+ {
17632
+ "epoch": 0.49,
17633
+ "learning_rate": 0.0004166779661016949,
17634
+ "loss": 2.1027,
17635
+ "step": 2927
17636
+ },
17637
+ {
17638
+ "epoch": 0.49,
17639
+ "learning_rate": 0.00041654237288135594,
17640
+ "loss": 2.1726,
17641
+ "step": 2928
17642
+ },
17643
+ {
17644
+ "epoch": 0.49,
17645
+ "learning_rate": 0.0004164067796610169,
17646
+ "loss": 2.1762,
17647
+ "step": 2929
17648
+ },
17649
+ {
17650
+ "epoch": 0.49,
17651
+ "learning_rate": 0.000416271186440678,
17652
+ "loss": 2.1976,
17653
+ "step": 2930
17654
+ },
17655
+ {
17656
+ "epoch": 0.49,
17657
+ "learning_rate": 0.000416135593220339,
17658
+ "loss": 2.1517,
17659
+ "step": 2931
17660
+ },
17661
+ {
17662
+ "epoch": 0.49,
17663
+ "learning_rate": 0.00041600000000000003,
17664
+ "loss": 2.1961,
17665
+ "step": 2932
17666
+ },
17667
+ {
17668
+ "epoch": 0.49,
17669
+ "learning_rate": 0.000415864406779661,
17670
+ "loss": 2.1523,
17671
+ "step": 2933
17672
+ },
17673
+ {
17674
+ "epoch": 0.49,
17675
+ "learning_rate": 0.0004157288135593221,
17676
+ "loss": 2.1589,
17677
+ "step": 2934
17678
+ },
17679
+ {
17680
+ "epoch": 0.49,
17681
+ "learning_rate": 0.000415593220338983,
17682
+ "loss": 2.2447,
17683
+ "step": 2935
17684
+ },
17685
+ {
17686
+ "epoch": 0.49,
17687
+ "learning_rate": 0.0004154576271186441,
17688
+ "loss": 2.1304,
17689
+ "step": 2936
17690
+ },
17691
+ {
17692
+ "epoch": 0.49,
17693
+ "learning_rate": 0.0004153220338983051,
17694
+ "loss": 2.137,
17695
+ "step": 2937
17696
+ },
17697
+ {
17698
+ "epoch": 0.49,
17699
+ "learning_rate": 0.00041518644067796613,
17700
+ "loss": 2.1495,
17701
+ "step": 2938
17702
+ },
17703
+ {
17704
+ "epoch": 0.49,
17705
+ "learning_rate": 0.0004150508474576271,
17706
+ "loss": 2.1051,
17707
+ "step": 2939
17708
+ },
17709
+ {
17710
+ "epoch": 0.49,
17711
+ "learning_rate": 0.0004149152542372882,
17712
+ "loss": 2.1827,
17713
+ "step": 2940
17714
+ },
17715
+ {
17716
+ "epoch": 0.49,
17717
+ "learning_rate": 0.0004147796610169492,
17718
+ "loss": 2.2329,
17719
+ "step": 2941
17720
+ },
17721
+ {
17722
+ "epoch": 0.49,
17723
+ "learning_rate": 0.0004146440677966102,
17724
+ "loss": 2.1234,
17725
+ "step": 2942
17726
+ },
17727
+ {
17728
+ "epoch": 0.49,
17729
+ "learning_rate": 0.0004145084745762712,
17730
+ "loss": 2.1709,
17731
+ "step": 2943
17732
+ },
17733
+ {
17734
+ "epoch": 0.49,
17735
+ "learning_rate": 0.0004143728813559323,
17736
+ "loss": 2.1335,
17737
+ "step": 2944
17738
+ },
17739
+ {
17740
+ "epoch": 0.49,
17741
+ "learning_rate": 0.00041423728813559326,
17742
+ "loss": 2.1752,
17743
+ "step": 2945
17744
+ },
17745
+ {
17746
+ "epoch": 0.49,
17747
+ "learning_rate": 0.0004141016949152543,
17748
+ "loss": 2.1176,
17749
+ "step": 2946
17750
+ },
17751
+ {
17752
+ "epoch": 0.49,
17753
+ "learning_rate": 0.0004139661016949153,
17754
+ "loss": 2.1184,
17755
+ "step": 2947
17756
+ },
17757
+ {
17758
+ "epoch": 0.49,
17759
+ "learning_rate": 0.00041383050847457626,
17760
+ "loss": 2.0973,
17761
+ "step": 2948
17762
+ },
17763
+ {
17764
+ "epoch": 0.49,
17765
+ "learning_rate": 0.0004136949152542373,
17766
+ "loss": 2.1017,
17767
+ "step": 2949
17768
+ },
17769
+ {
17770
+ "epoch": 0.49,
17771
+ "learning_rate": 0.0004135593220338983,
17772
+ "loss": 2.1777,
17773
+ "step": 2950
17774
+ },
17775
+ {
17776
+ "epoch": 0.49,
17777
+ "learning_rate": 0.00041342372881355936,
17778
+ "loss": 2.1708,
17779
+ "step": 2951
17780
+ },
17781
+ {
17782
+ "epoch": 0.49,
17783
+ "learning_rate": 0.00041328813559322034,
17784
+ "loss": 2.2064,
17785
+ "step": 2952
17786
+ },
17787
+ {
17788
+ "epoch": 0.49,
17789
+ "learning_rate": 0.0004131525423728814,
17790
+ "loss": 2.1337,
17791
+ "step": 2953
17792
+ },
17793
+ {
17794
+ "epoch": 0.49,
17795
+ "learning_rate": 0.00041301694915254236,
17796
+ "loss": 2.1547,
17797
+ "step": 2954
17798
+ },
17799
+ {
17800
+ "epoch": 0.49,
17801
+ "learning_rate": 0.00041288135593220345,
17802
+ "loss": 2.1788,
17803
+ "step": 2955
17804
+ },
17805
+ {
17806
+ "epoch": 0.49,
17807
+ "learning_rate": 0.00041274576271186443,
17808
+ "loss": 2.2068,
17809
+ "step": 2956
17810
+ },
17811
+ {
17812
+ "epoch": 0.49,
17813
+ "learning_rate": 0.00041261016949152546,
17814
+ "loss": 2.1452,
17815
+ "step": 2957
17816
+ },
17817
+ {
17818
+ "epoch": 0.49,
17819
+ "learning_rate": 0.00041247457627118645,
17820
+ "loss": 2.1059,
17821
+ "step": 2958
17822
+ },
17823
+ {
17824
+ "epoch": 0.49,
17825
+ "learning_rate": 0.0004123389830508475,
17826
+ "loss": 2.1151,
17827
+ "step": 2959
17828
+ },
17829
+ {
17830
+ "epoch": 0.49,
17831
+ "learning_rate": 0.00041220338983050846,
17832
+ "loss": 2.2186,
17833
+ "step": 2960
17834
+ },
17835
+ {
17836
+ "epoch": 0.49,
17837
+ "learning_rate": 0.00041206779661016955,
17838
+ "loss": 2.2221,
17839
+ "step": 2961
17840
+ },
17841
+ {
17842
+ "epoch": 0.49,
17843
+ "learning_rate": 0.00041193220338983053,
17844
+ "loss": 2.1348,
17845
+ "step": 2962
17846
+ },
17847
+ {
17848
+ "epoch": 0.49,
17849
+ "learning_rate": 0.00041179661016949156,
17850
+ "loss": 2.1694,
17851
+ "step": 2963
17852
+ },
17853
+ {
17854
+ "epoch": 0.49,
17855
+ "learning_rate": 0.00041166101694915255,
17856
+ "loss": 2.1813,
17857
+ "step": 2964
17858
+ },
17859
+ {
17860
+ "epoch": 0.49,
17861
+ "learning_rate": 0.00041152542372881363,
17862
+ "loss": 2.2421,
17863
+ "step": 2965
17864
+ },
17865
+ {
17866
+ "epoch": 0.49,
17867
+ "learning_rate": 0.0004113898305084746,
17868
+ "loss": 2.2113,
17869
+ "step": 2966
17870
+ },
17871
+ {
17872
+ "epoch": 0.49,
17873
+ "learning_rate": 0.0004112542372881356,
17874
+ "loss": 2.146,
17875
+ "step": 2967
17876
+ },
17877
+ {
17878
+ "epoch": 0.49,
17879
+ "learning_rate": 0.00041111864406779663,
17880
+ "loss": 2.1209,
17881
+ "step": 2968
17882
+ },
17883
+ {
17884
+ "epoch": 0.49,
17885
+ "learning_rate": 0.0004109830508474576,
17886
+ "loss": 2.2126,
17887
+ "step": 2969
17888
+ },
17889
+ {
17890
+ "epoch": 0.49,
17891
+ "learning_rate": 0.00041084745762711865,
17892
+ "loss": 2.1748,
17893
+ "step": 2970
17894
+ },
17895
+ {
17896
+ "epoch": 0.5,
17897
+ "learning_rate": 0.0004107118644067796,
17898
+ "loss": 2.0626,
17899
+ "step": 2971
17900
+ },
17901
+ {
17902
+ "epoch": 0.5,
17903
+ "learning_rate": 0.0004105762711864407,
17904
+ "loss": 2.1513,
17905
+ "step": 2972
17906
+ },
17907
+ {
17908
+ "epoch": 0.5,
17909
+ "learning_rate": 0.0004104406779661017,
17910
+ "loss": 2.1045,
17911
+ "step": 2973
17912
+ },
17913
+ {
17914
+ "epoch": 0.5,
17915
+ "learning_rate": 0.00041030508474576273,
17916
+ "loss": 2.218,
17917
+ "step": 2974
17918
+ },
17919
+ {
17920
+ "epoch": 0.5,
17921
+ "learning_rate": 0.0004101694915254237,
17922
+ "loss": 2.1739,
17923
+ "step": 2975
17924
+ },
17925
+ {
17926
+ "epoch": 0.5,
17927
+ "learning_rate": 0.0004100338983050848,
17928
+ "loss": 2.1967,
17929
+ "step": 2976
17930
+ },
17931
+ {
17932
+ "epoch": 0.5,
17933
+ "learning_rate": 0.0004098983050847458,
17934
+ "loss": 2.1824,
17935
+ "step": 2977
17936
+ },
17937
+ {
17938
+ "epoch": 0.5,
17939
+ "learning_rate": 0.0004097627118644068,
17940
+ "loss": 2.1947,
17941
+ "step": 2978
17942
+ },
17943
+ {
17944
+ "epoch": 0.5,
17945
+ "learning_rate": 0.0004096271186440678,
17946
+ "loss": 2.1325,
17947
+ "step": 2979
17948
+ },
17949
+ {
17950
+ "epoch": 0.5,
17951
+ "learning_rate": 0.0004094915254237289,
17952
+ "loss": 2.2497,
17953
+ "step": 2980
17954
+ },
17955
+ {
17956
+ "epoch": 0.5,
17957
+ "learning_rate": 0.0004093559322033898,
17958
+ "loss": 2.1418,
17959
+ "step": 2981
17960
+ },
17961
+ {
17962
+ "epoch": 0.5,
17963
+ "learning_rate": 0.0004092203389830509,
17964
+ "loss": 2.1621,
17965
+ "step": 2982
17966
+ },
17967
+ {
17968
+ "epoch": 0.5,
17969
+ "learning_rate": 0.0004090847457627119,
17970
+ "loss": 2.1295,
17971
+ "step": 2983
17972
+ },
17973
+ {
17974
+ "epoch": 0.5,
17975
+ "learning_rate": 0.0004089491525423729,
17976
+ "loss": 2.1085,
17977
+ "step": 2984
17978
+ },
17979
+ {
17980
+ "epoch": 0.5,
17981
+ "learning_rate": 0.0004088135593220339,
17982
+ "loss": 2.1285,
17983
+ "step": 2985
17984
+ },
17985
+ {
17986
+ "epoch": 0.5,
17987
+ "learning_rate": 0.000408677966101695,
17988
+ "loss": 2.0908,
17989
+ "step": 2986
17990
+ },
17991
+ {
17992
+ "epoch": 0.5,
17993
+ "learning_rate": 0.00040854237288135597,
17994
+ "loss": 2.151,
17995
+ "step": 2987
17996
+ },
17997
+ {
17998
+ "epoch": 0.5,
17999
+ "learning_rate": 0.00040840677966101695,
18000
+ "loss": 2.2518,
18001
+ "step": 2988
18002
+ },
18003
+ {
18004
+ "epoch": 0.5,
18005
+ "learning_rate": 0.000408271186440678,
18006
+ "loss": 2.0897,
18007
+ "step": 2989
18008
+ },
18009
+ {
18010
+ "epoch": 0.5,
18011
+ "learning_rate": 0.00040813559322033896,
18012
+ "loss": 2.2226,
18013
+ "step": 2990
18014
+ },
18015
+ {
18016
+ "epoch": 0.5,
18017
+ "learning_rate": 0.00040800000000000005,
18018
+ "loss": 2.1714,
18019
+ "step": 2991
18020
+ },
18021
+ {
18022
+ "epoch": 0.5,
18023
+ "learning_rate": 0.000407864406779661,
18024
+ "loss": 2.1256,
18025
+ "step": 2992
18026
+ },
18027
+ {
18028
+ "epoch": 0.5,
18029
+ "learning_rate": 0.00040772881355932207,
18030
+ "loss": 2.2306,
18031
+ "step": 2993
18032
+ },
18033
+ {
18034
+ "epoch": 0.5,
18035
+ "learning_rate": 0.00040759322033898305,
18036
+ "loss": 2.18,
18037
+ "step": 2994
18038
+ },
18039
+ {
18040
+ "epoch": 0.5,
18041
+ "learning_rate": 0.0004074576271186441,
18042
+ "loss": 2.2124,
18043
+ "step": 2995
18044
+ },
18045
+ {
18046
+ "epoch": 0.5,
18047
+ "learning_rate": 0.00040732203389830506,
18048
+ "loss": 2.1893,
18049
+ "step": 2996
18050
+ },
18051
+ {
18052
+ "epoch": 0.5,
18053
+ "learning_rate": 0.00040718644067796615,
18054
+ "loss": 2.1891,
18055
+ "step": 2997
18056
+ },
18057
+ {
18058
+ "epoch": 0.5,
18059
+ "learning_rate": 0.00040705084745762713,
18060
+ "loss": 2.2257,
18061
+ "step": 2998
18062
+ },
18063
+ {
18064
+ "epoch": 0.5,
18065
+ "learning_rate": 0.00040691525423728817,
18066
+ "loss": 2.0973,
18067
+ "step": 2999
18068
+ },
18069
+ {
18070
+ "epoch": 0.5,
18071
+ "learning_rate": 0.00040677966101694915,
18072
+ "loss": 2.221,
18073
+ "step": 3000
18074
+ },
18075
+ {
18076
+ "epoch": 0.5,
18077
+ "eval_gen_len": 19.0,
18078
+ "eval_loss": 2.0400795936584473,
18079
+ "eval_rouge1": 0.2657,
18080
+ "eval_rouge2": 0.1001,
18081
+ "eval_rougeL": 0.2227,
18082
+ "eval_rougeLsum": 0.2226,
18083
+ "eval_runtime": 21.6863,
18084
+ "eval_samples_per_second": 2.306,
18085
+ "eval_steps_per_second": 0.323,
18086
+ "step": 3000
18087
  }
18088
  ],
18089
  "max_steps": 6000,
18090
  "num_train_epochs": 9223372036854775807,
18091
+ "total_flos": 2.8570333609822003e+18,
18092
  "trial_name": null,
18093
  "trial_params": null
18094
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9aee612a0a1c2847da99ac4385fa982ffe96a11a86e9c19b170bc87c12c873b1
3
  size 2368281769
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:02c85713dad1dcfeb5c32c03d2b6f448d20d550a87402f9d21be276100bce607
3
  size 2368281769