GlycerinLOL commited on
Commit
c590e84
·
verified ·
1 Parent(s): c33e842

End of training

Browse files
Files changed (3) hide show
  1. all_results.json +7 -0
  2. train_results.json +7 -0
  3. trainer_state.json +136 -0
all_results.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 4.0,
3
+ "train_loss": 1.6477281238721764,
4
+ "train_runtime": 4221.2735,
5
+ "train_samples_per_second": 13.96,
6
+ "train_steps_per_second": 0.872
7
+ }
train_results.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 4.0,
3
+ "train_loss": 1.6477281238721764,
4
+ "train_runtime": 4221.2735,
5
+ "train_samples_per_second": 13.96,
6
+ "train_steps_per_second": 0.872
7
+ }
trainer_state.json ADDED
@@ -0,0 +1,136 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 3.9967417865870214,
5
+ "eval_steps": 500,
6
+ "global_step": 3680,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.54,
13
+ "learning_rate": 1.731521739130435e-05,
14
+ "loss": 1.9542,
15
+ "step": 500
16
+ },
17
+ {
18
+ "epoch": 1.0,
19
+ "eval_f1": 0.9149,
20
+ "eval_gen_len": 18.56723716381418,
21
+ "eval_loss": 1.534995198249817,
22
+ "eval_precision": 0.9229,
23
+ "eval_recall": 0.9074,
24
+ "eval_rouge1": 0.4928,
25
+ "eval_rouge2": 0.2436,
26
+ "eval_rougeL": 0.4085,
27
+ "eval_rougeLsum": 0.4086,
28
+ "eval_runtime": 246.8012,
29
+ "eval_samples_per_second": 3.314,
30
+ "eval_steps_per_second": 0.831,
31
+ "step": 920
32
+ },
33
+ {
34
+ "epoch": 1.09,
35
+ "learning_rate": 1.4603260869565218e-05,
36
+ "loss": 1.7174,
37
+ "step": 1000
38
+ },
39
+ {
40
+ "epoch": 1.63,
41
+ "learning_rate": 1.1885869565217392e-05,
42
+ "loss": 1.6331,
43
+ "step": 1500
44
+ },
45
+ {
46
+ "epoch": 2.0,
47
+ "eval_f1": 0.9166,
48
+ "eval_gen_len": 18.815403422982886,
49
+ "eval_loss": 1.491409182548523,
50
+ "eval_precision": 0.9246,
51
+ "eval_recall": 0.9092,
52
+ "eval_rouge1": 0.5037,
53
+ "eval_rouge2": 0.257,
54
+ "eval_rougeL": 0.4202,
55
+ "eval_rougeLsum": 0.4206,
56
+ "eval_runtime": 247.8681,
57
+ "eval_samples_per_second": 3.3,
58
+ "eval_steps_per_second": 0.827,
59
+ "step": 1841
60
+ },
61
+ {
62
+ "epoch": 2.17,
63
+ "learning_rate": 9.179347826086958e-06,
64
+ "loss": 1.5996,
65
+ "step": 2000
66
+ },
67
+ {
68
+ "epoch": 2.72,
69
+ "learning_rate": 6.472826086956522e-06,
70
+ "loss": 1.5694,
71
+ "step": 2500
72
+ },
73
+ {
74
+ "epoch": 3.0,
75
+ "eval_f1": 0.917,
76
+ "eval_gen_len": 19.448655256723715,
77
+ "eval_loss": 1.4760992527008057,
78
+ "eval_precision": 0.9241,
79
+ "eval_recall": 0.9103,
80
+ "eval_rouge1": 0.5071,
81
+ "eval_rouge2": 0.259,
82
+ "eval_rougeL": 0.4212,
83
+ "eval_rougeLsum": 0.4214,
84
+ "eval_runtime": 249.775,
85
+ "eval_samples_per_second": 3.275,
86
+ "eval_steps_per_second": 0.821,
87
+ "step": 2762
88
+ },
89
+ {
90
+ "epoch": 3.26,
91
+ "learning_rate": 3.7554347826086963e-06,
92
+ "loss": 1.5609,
93
+ "step": 3000
94
+ },
95
+ {
96
+ "epoch": 3.8,
97
+ "learning_rate": 1.048913043478261e-06,
98
+ "loss": 1.5374,
99
+ "step": 3500
100
+ },
101
+ {
102
+ "epoch": 4.0,
103
+ "eval_f1": 0.917,
104
+ "eval_gen_len": 19.1479217603912,
105
+ "eval_loss": 1.4708906412124634,
106
+ "eval_precision": 0.9247,
107
+ "eval_recall": 0.9099,
108
+ "eval_rouge1": 0.5072,
109
+ "eval_rouge2": 0.2631,
110
+ "eval_rougeL": 0.4243,
111
+ "eval_rougeLsum": 0.4244,
112
+ "eval_runtime": 248.4801,
113
+ "eval_samples_per_second": 3.292,
114
+ "eval_steps_per_second": 0.825,
115
+ "step": 3680
116
+ },
117
+ {
118
+ "epoch": 4.0,
119
+ "step": 3680,
120
+ "total_flos": 3.997817815616717e+16,
121
+ "train_loss": 1.6477281238721764,
122
+ "train_runtime": 4221.2735,
123
+ "train_samples_per_second": 13.96,
124
+ "train_steps_per_second": 0.872
125
+ }
126
+ ],
127
+ "logging_steps": 500,
128
+ "max_steps": 3680,
129
+ "num_input_tokens_seen": 0,
130
+ "num_train_epochs": 4,
131
+ "save_steps": 500,
132
+ "total_flos": 3.997817815616717e+16,
133
+ "train_batch_size": 4,
134
+ "trial_name": null,
135
+ "trial_params": null
136
+ }