EllieS commited on
Commit
6bf1e2f
1 Parent(s): 72d03ed

Model save

Browse files
README.md ADDED
@@ -0,0 +1,65 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ license: apache-2.0
3
+ library_name: peft
4
+ tags:
5
+ - trl
6
+ - sft
7
+ - generated_from_trainer
8
+ base_model: alignment-handbook/zephyr-7b-sft-full
9
+ model-index:
10
+ - name: zephyr-7b-sft-lora-timedial
11
+ results: []
12
+ ---
13
+
14
+ <!-- This model card has been generated automatically according to the information the Trainer had access to. You
15
+ should probably proofread and complete it, then remove this comment. -->
16
+
17
+ # zephyr-7b-sft-lora-timedial
18
+
19
+ This model is a fine-tuned version of [alignment-handbook/zephyr-7b-sft-full](https://huggingface.co/alignment-handbook/zephyr-7b-sft-full) on the None dataset.
20
+ It achieves the following results on the evaluation set:
21
+ - Loss: 1.1794
22
+
23
+ ## Model description
24
+
25
+ More information needed
26
+
27
+ ## Intended uses & limitations
28
+
29
+ More information needed
30
+
31
+ ## Training and evaluation data
32
+
33
+ More information needed
34
+
35
+ ## Training procedure
36
+
37
+ ### Training hyperparameters
38
+
39
+ The following hyperparameters were used during training:
40
+ - learning_rate: 0.0002
41
+ - train_batch_size: 4
42
+ - eval_batch_size: 8
43
+ - seed: 42
44
+ - distributed_type: multi-GPU
45
+ - gradient_accumulation_steps: 2
46
+ - total_train_batch_size: 8
47
+ - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
48
+ - lr_scheduler_type: cosine
49
+ - lr_scheduler_warmup_ratio: 0.1
50
+ - num_epochs: 1
51
+
52
+ ### Training results
53
+
54
+ | Training Loss | Epoch | Step | Validation Loss |
55
+ |:-------------:|:-----:|:----:|:---------------:|
56
+ | 1.2691 | 1.0 | 145 | 1.1794 |
57
+
58
+
59
+ ### Framework versions
60
+
61
+ - PEFT 0.7.1
62
+ - Transformers 4.36.2
63
+ - Pytorch 2.1.2+cu121
64
+ - Datasets 2.14.6
65
+ - Tokenizers 0.15.2
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:93858770e0b5ff4d445de949f7d30fa7a2e0eea0ed1594ff051e65cf81745acd
3
  size 42002584
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b7ed026777ac769a0773552276a5cf0dd862f0cd22b625d2dd591fa64630209c
3
  size 42002584
all_results.json ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 1.0,
3
+ "eval_loss": 1.1793650388717651,
4
+ "eval_runtime": 24.3874,
5
+ "eval_samples": 289,
6
+ "eval_samples_per_second": 11.85,
7
+ "eval_steps_per_second": 1.517,
8
+ "train_loss": 1.310744782151847,
9
+ "train_runtime": 268.5316,
10
+ "train_samples": 1157,
11
+ "train_samples_per_second": 4.309,
12
+ "train_steps_per_second": 0.54
13
+ }
eval_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 1.0,
3
+ "eval_loss": 1.1793650388717651,
4
+ "eval_runtime": 24.3874,
5
+ "eval_samples": 289,
6
+ "eval_samples_per_second": 11.85,
7
+ "eval_steps_per_second": 1.517
8
+ }
runs/Mar29_03-10-31_586cb8b6da8c/events.out.tfevents.1711681858.586cb8b6da8c.32659.0 CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:abe60db19687aca1d2eb2a6d74dd115490ce2cc148581028a106aac071a5c419
3
- size 7710
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:87dfc1bb3e2745164da7a098c651b54a4ddc732ca829bb87d9085e5cabb638a6
3
+ size 9733
runs/Mar29_03-10-31_586cb8b6da8c/events.out.tfevents.1711682151.586cb8b6da8c.32659.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:03895e0d8d99587ae55c07254050d4836a8509ced826ed5bb0cc95d87083c73f
3
+ size 359
train_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 1.0,
3
+ "train_loss": 1.310744782151847,
4
+ "train_runtime": 268.5316,
5
+ "train_samples": 1157,
6
+ "train_samples_per_second": 4.309,
7
+ "train_steps_per_second": 0.54
8
+ }
trainer_state.json ADDED
@@ -0,0 +1,218 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 1.0,
5
+ "eval_steps": 500,
6
+ "global_step": 145,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.01,
13
+ "learning_rate": 1.3333333333333333e-05,
14
+ "loss": 1.9595,
15
+ "step": 1
16
+ },
17
+ {
18
+ "epoch": 0.03,
19
+ "learning_rate": 6.666666666666667e-05,
20
+ "loss": 2.0078,
21
+ "step": 5
22
+ },
23
+ {
24
+ "epoch": 0.07,
25
+ "learning_rate": 0.00013333333333333334,
26
+ "loss": 1.7643,
27
+ "step": 10
28
+ },
29
+ {
30
+ "epoch": 0.1,
31
+ "learning_rate": 0.0002,
32
+ "loss": 1.4478,
33
+ "step": 15
34
+ },
35
+ {
36
+ "epoch": 0.14,
37
+ "learning_rate": 0.0001992708874098054,
38
+ "loss": 1.3356,
39
+ "step": 20
40
+ },
41
+ {
42
+ "epoch": 0.17,
43
+ "learning_rate": 0.0001970941817426052,
44
+ "loss": 1.4178,
45
+ "step": 25
46
+ },
47
+ {
48
+ "epoch": 0.21,
49
+ "learning_rate": 0.0001935016242685415,
50
+ "loss": 1.2849,
51
+ "step": 30
52
+ },
53
+ {
54
+ "epoch": 0.24,
55
+ "learning_rate": 0.000188545602565321,
56
+ "loss": 1.271,
57
+ "step": 35
58
+ },
59
+ {
60
+ "epoch": 0.28,
61
+ "learning_rate": 0.00018229838658936564,
62
+ "loss": 1.3692,
63
+ "step": 40
64
+ },
65
+ {
66
+ "epoch": 0.31,
67
+ "learning_rate": 0.00017485107481711012,
68
+ "loss": 1.2819,
69
+ "step": 45
70
+ },
71
+ {
72
+ "epoch": 0.34,
73
+ "learning_rate": 0.00016631226582407952,
74
+ "loss": 1.3848,
75
+ "step": 50
76
+ },
77
+ {
78
+ "epoch": 0.38,
79
+ "learning_rate": 0.00015680647467311557,
80
+ "loss": 1.332,
81
+ "step": 55
82
+ },
83
+ {
84
+ "epoch": 0.41,
85
+ "learning_rate": 0.00014647231720437686,
86
+ "loss": 1.2716,
87
+ "step": 60
88
+ },
89
+ {
90
+ "epoch": 0.45,
91
+ "learning_rate": 0.00013546048870425356,
92
+ "loss": 1.2611,
93
+ "step": 65
94
+ },
95
+ {
96
+ "epoch": 0.48,
97
+ "learning_rate": 0.0001239315664287558,
98
+ "loss": 1.2658,
99
+ "step": 70
100
+ },
101
+ {
102
+ "epoch": 0.52,
103
+ "learning_rate": 0.0001120536680255323,
104
+ "loss": 1.3735,
105
+ "step": 75
106
+ },
107
+ {
108
+ "epoch": 0.55,
109
+ "learning_rate": 0.0001,
110
+ "loss": 1.3004,
111
+ "step": 80
112
+ },
113
+ {
114
+ "epoch": 0.59,
115
+ "learning_rate": 8.79463319744677e-05,
116
+ "loss": 1.2043,
117
+ "step": 85
118
+ },
119
+ {
120
+ "epoch": 0.62,
121
+ "learning_rate": 7.606843357124426e-05,
122
+ "loss": 1.1837,
123
+ "step": 90
124
+ },
125
+ {
126
+ "epoch": 0.66,
127
+ "learning_rate": 6.453951129574644e-05,
128
+ "loss": 1.1508,
129
+ "step": 95
130
+ },
131
+ {
132
+ "epoch": 0.69,
133
+ "learning_rate": 5.3527682795623146e-05,
134
+ "loss": 1.2025,
135
+ "step": 100
136
+ },
137
+ {
138
+ "epoch": 0.72,
139
+ "learning_rate": 4.3193525326884435e-05,
140
+ "loss": 1.2575,
141
+ "step": 105
142
+ },
143
+ {
144
+ "epoch": 0.76,
145
+ "learning_rate": 3.36877341759205e-05,
146
+ "loss": 1.2624,
147
+ "step": 110
148
+ },
149
+ {
150
+ "epoch": 0.79,
151
+ "learning_rate": 2.514892518288988e-05,
152
+ "loss": 1.2505,
153
+ "step": 115
154
+ },
155
+ {
156
+ "epoch": 0.83,
157
+ "learning_rate": 1.7701613410634365e-05,
158
+ "loss": 1.2264,
159
+ "step": 120
160
+ },
161
+ {
162
+ "epoch": 0.86,
163
+ "learning_rate": 1.1454397434679021e-05,
164
+ "loss": 1.1638,
165
+ "step": 125
166
+ },
167
+ {
168
+ "epoch": 0.9,
169
+ "learning_rate": 6.498375731458528e-06,
170
+ "loss": 1.1018,
171
+ "step": 130
172
+ },
173
+ {
174
+ "epoch": 0.93,
175
+ "learning_rate": 2.905818257394799e-06,
176
+ "loss": 1.2153,
177
+ "step": 135
178
+ },
179
+ {
180
+ "epoch": 0.97,
181
+ "learning_rate": 7.291125901946027e-07,
182
+ "loss": 1.1638,
183
+ "step": 140
184
+ },
185
+ {
186
+ "epoch": 1.0,
187
+ "learning_rate": 0.0,
188
+ "loss": 1.2691,
189
+ "step": 145
190
+ },
191
+ {
192
+ "epoch": 1.0,
193
+ "eval_loss": 1.1793650388717651,
194
+ "eval_runtime": 24.4695,
195
+ "eval_samples_per_second": 11.811,
196
+ "eval_steps_per_second": 1.512,
197
+ "step": 145
198
+ },
199
+ {
200
+ "epoch": 1.0,
201
+ "step": 145,
202
+ "total_flos": 2.153195061955789e+16,
203
+ "train_loss": 1.310744782151847,
204
+ "train_runtime": 268.5316,
205
+ "train_samples_per_second": 4.309,
206
+ "train_steps_per_second": 0.54
207
+ }
208
+ ],
209
+ "logging_steps": 5,
210
+ "max_steps": 145,
211
+ "num_input_tokens_seen": 0,
212
+ "num_train_epochs": 1,
213
+ "save_steps": 100,
214
+ "total_flos": 2.153195061955789e+16,
215
+ "train_batch_size": 4,
216
+ "trial_name": null,
217
+ "trial_params": null
218
+ }