player1537
commited on
Commit
•
2011daf
1
Parent(s):
ed9454a
Training in progress, step 4962
Browse files- generation_config.json +0 -7
- last-checkpoint/optimizer.pt +1 -1
- last-checkpoint/pytorch_model.bin +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +75 -3
- last-checkpoint/training_args.bin +2 -2
- pytorch_model.bin +0 -3
generation_config.json
DELETED
@@ -1,7 +0,0 @@
|
|
1 |
-
{
|
2 |
-
"_from_model_config": true,
|
3 |
-
"bos_token_id": 1,
|
4 |
-
"eos_token_id": 2,
|
5 |
-
"pad_token_id": 3,
|
6 |
-
"transformers_version": "4.31.0"
|
7 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4473888693
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c718401e44301e7b6b0916484ddf1c5bcc01330ee4727d76b71cf2ed469c5ca7
|
3 |
size 4473888693
|
last-checkpoint/pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 2236957537
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:777293302c4d76ea91b8cbdaeeeba26e11bd1803a22107140aafa16914330c6c
|
3 |
size 2236957537
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 627
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c6369e7a8ae0c3f7b8b98eb916a8737c71a3d74933f972fb7688df71a66b6095
|
3 |
size 627
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,8 +1,8 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 0.
|
5 |
-
"global_step":
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
@@ -72,11 +72,83 @@
|
|
72 |
"learning_rate": 1.429761629252488e-05,
|
73 |
"loss": 2.1086,
|
74 |
"step": 2472
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
75 |
}
|
76 |
],
|
77 |
"max_steps": 8642,
|
78 |
"num_train_epochs": 1,
|
79 |
-
"total_flos":
|
80 |
"trial_name": null,
|
81 |
"trial_params": null
|
82 |
}
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 0.5741726452210136,
|
5 |
+
"global_step": 4962,
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
|
|
72 |
"learning_rate": 1.429761629252488e-05,
|
73 |
"loss": 2.1086,
|
74 |
"step": 2472
|
75 |
+
},
|
76 |
+
{
|
77 |
+
"epoch": 0.31,
|
78 |
+
"learning_rate": 1.382087479750058e-05,
|
79 |
+
"loss": 2.0791,
|
80 |
+
"step": 2678
|
81 |
+
},
|
82 |
+
{
|
83 |
+
"epoch": 0.33,
|
84 |
+
"learning_rate": 1.3344133302476279e-05,
|
85 |
+
"loss": 2.0026,
|
86 |
+
"step": 2884
|
87 |
+
},
|
88 |
+
{
|
89 |
+
"epoch": 0.36,
|
90 |
+
"learning_rate": 1.2867391807451979e-05,
|
91 |
+
"loss": 1.9953,
|
92 |
+
"step": 3090
|
93 |
+
},
|
94 |
+
{
|
95 |
+
"epoch": 0.38,
|
96 |
+
"learning_rate": 1.2390650312427679e-05,
|
97 |
+
"loss": 2.0375,
|
98 |
+
"step": 3296
|
99 |
+
},
|
100 |
+
{
|
101 |
+
"epoch": 0.41,
|
102 |
+
"learning_rate": 1.19185373756075e-05,
|
103 |
+
"loss": 2.0465,
|
104 |
+
"step": 3502
|
105 |
+
},
|
106 |
+
{
|
107 |
+
"epoch": 0.43,
|
108 |
+
"learning_rate": 1.144411015968526e-05,
|
109 |
+
"loss": 2.0147,
|
110 |
+
"step": 3708
|
111 |
+
},
|
112 |
+
{
|
113 |
+
"epoch": 0.45,
|
114 |
+
"learning_rate": 1.096736866466096e-05,
|
115 |
+
"loss": 2.0066,
|
116 |
+
"step": 3914
|
117 |
+
},
|
118 |
+
{
|
119 |
+
"epoch": 0.48,
|
120 |
+
"learning_rate": 1.0490627169636659e-05,
|
121 |
+
"loss": 2.0109,
|
122 |
+
"step": 4120
|
123 |
+
},
|
124 |
+
{
|
125 |
+
"epoch": 0.5,
|
126 |
+
"learning_rate": 1.0013885674612359e-05,
|
127 |
+
"loss": 1.9669,
|
128 |
+
"step": 4326
|
129 |
+
},
|
130 |
+
{
|
131 |
+
"epoch": 0.52,
|
132 |
+
"learning_rate": 9.537144179588059e-06,
|
133 |
+
"loss": 1.9822,
|
134 |
+
"step": 4532
|
135 |
+
},
|
136 |
+
{
|
137 |
+
"epoch": 0.55,
|
138 |
+
"learning_rate": 9.062716963665819e-06,
|
139 |
+
"loss": 1.9876,
|
140 |
+
"step": 4738
|
141 |
+
},
|
142 |
+
{
|
143 |
+
"epoch": 0.57,
|
144 |
+
"learning_rate": 8.585975468641519e-06,
|
145 |
+
"loss": 1.9557,
|
146 |
+
"step": 4944
|
147 |
}
|
148 |
],
|
149 |
"max_steps": 8642,
|
150 |
"num_train_epochs": 1,
|
151 |
+
"total_flos": 9216488128905216.0,
|
152 |
"trial_name": null,
|
153 |
"trial_params": null
|
154 |
}
|
last-checkpoint/training_args.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:805af7f0ae2c315b9a5aa85e8153519d586ecfae4cf097a3553c416e6588254e
|
3 |
+
size 3963
|
pytorch_model.bin
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:ad1047af4d787fcc1458849f4291e7a32cc4ddffeac14bd3fe057f3da7b7c5dd
|
3 |
-
size 2236957537
|
|
|
|
|
|
|
|