hannahbillo commited on
Commit
26b9459
·
verified ·
1 Parent(s): 7394d85

End of training

Browse files
README.md CHANGED
@@ -19,6 +19,8 @@ should probably proofread and complete it, then remove this comment. -->
19
  # zephyr-7b-sft-lora
20
 
21
  This model is a fine-tuned version of [mistralai/Mistral-7B-v0.1](https://huggingface.co/mistralai/Mistral-7B-v0.1) on the generator dataset.
 
 
22
 
23
  ## Model description
24
 
@@ -45,13 +47,14 @@ The following hyperparameters were used during training:
45
  - total_train_batch_size: 128
46
  - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
47
  - lr_scheduler_type: cosine
48
- - num_epochs: 1
49
 
50
  ### Training results
51
 
52
  | Training Loss | Epoch | Step | Validation Loss |
53
  |:-------------:|:-----:|:----:|:---------------:|
54
  | No log | 1.0 | 1 | 1.1585 |
 
55
 
56
 
57
  ### Framework versions
 
19
  # zephyr-7b-sft-lora
20
 
21
  This model is a fine-tuned version of [mistralai/Mistral-7B-v0.1](https://huggingface.co/mistralai/Mistral-7B-v0.1) on the generator dataset.
22
+ It achieves the following results on the evaluation set:
23
+ - Loss: 1.1563
24
 
25
  ## Model description
26
 
 
47
  - total_train_batch_size: 128
48
  - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
49
  - lr_scheduler_type: cosine
50
+ - num_epochs: 3
51
 
52
  ### Training results
53
 
54
  | Training Loss | Epoch | Step | Validation Loss |
55
  |:-------------:|:-----:|:----:|:---------------:|
56
  | No log | 1.0 | 1 | 1.1585 |
57
+ | No log | 2.0 | 3 | 1.1563 |
58
 
59
 
60
  ### Framework versions
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8a9153adf247e26307caae2a48bc8fe9d5c40f3e0315878c4c2b8f7fa8d93041
3
  size 109086672
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bd498aa390277d4d5480c6044b88e597b352fe3a0278fb1c963bd55ebf39b619
3
  size 109086672
all_results.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "epoch": 1.0,
3
  "total_flos": 5899069012574208.0,
4
- "train_loss": 0.5863452553749084,
5
- "train_runtime": 93.4265,
6
  "train_samples": 100,
7
- "train_samples_per_second": 0.717,
8
  "train_steps_per_second": 0.011
9
  }
 
1
  {
2
  "epoch": 1.0,
3
  "total_flos": 5899069012574208.0,
4
+ "train_loss": 0.5863915681838989,
5
+ "train_runtime": 88.6368,
6
  "train_samples": 100,
7
+ "train_samples_per_second": 0.756,
8
  "train_steps_per_second": 0.011
9
  }
runs/Apr08_14-24-03_39f6269d6750/events.out.tfevents.1712586255.39f6269d6750.561.2 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5e850f6036dc045ca31bdd4815d0ee7fef227cdaf0aab8ef642f511eab455aa7
3
+ size 5939
special_tokens_map.json CHANGED
@@ -13,7 +13,13 @@
13
  "rstrip": false,
14
  "single_word": false
15
  },
16
- "pad_token": "</s>",
 
 
 
 
 
 
17
  "unk_token": {
18
  "content": "<unk>",
19
  "lstrip": false,
 
13
  "rstrip": false,
14
  "single_word": false
15
  },
16
+ "pad_token": {
17
+ "content": "</s>",
18
+ "lstrip": false,
19
+ "normalized": false,
20
+ "rstrip": false,
21
+ "single_word": false
22
+ },
23
  "unk_token": {
24
  "content": "<unk>",
25
  "lstrip": false,
train_results.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "epoch": 1.0,
3
  "total_flos": 5899069012574208.0,
4
- "train_loss": 0.5863452553749084,
5
- "train_runtime": 93.4265,
6
  "train_samples": 100,
7
- "train_samples_per_second": 0.717,
8
  "train_steps_per_second": 0.011
9
  }
 
1
  {
2
  "epoch": 1.0,
3
  "total_flos": 5899069012574208.0,
4
+ "train_loss": 0.5863915681838989,
5
+ "train_runtime": 88.6368,
6
  "train_samples": 100,
7
+ "train_samples_per_second": 0.756,
8
  "train_steps_per_second": 0.011
9
  }
trainer_state.json CHANGED
@@ -10,19 +10,19 @@
10
  "log_history": [
11
  {
12
  "epoch": 1.0,
13
- "eval_loss": 1.1567405462265015,
14
- "eval_runtime": 24.4278,
15
- "eval_samples_per_second": 2.62,
16
- "eval_steps_per_second": 2.62,
17
  "step": 1
18
  },
19
  {
20
  "epoch": 1.0,
21
  "step": 1,
22
  "total_flos": 5899069012574208.0,
23
- "train_loss": 0.5863452553749084,
24
- "train_runtime": 93.4265,
25
- "train_samples_per_second": 0.717,
26
  "train_steps_per_second": 0.011
27
  }
28
  ],
 
10
  "log_history": [
11
  {
12
  "epoch": 1.0,
13
+ "eval_loss": 1.1584750413894653,
14
+ "eval_runtime": 24.1178,
15
+ "eval_samples_per_second": 2.654,
16
+ "eval_steps_per_second": 2.654,
17
  "step": 1
18
  },
19
  {
20
  "epoch": 1.0,
21
  "step": 1,
22
  "total_flos": 5899069012574208.0,
23
+ "train_loss": 0.5863915681838989,
24
+ "train_runtime": 88.6368,
25
+ "train_samples_per_second": 0.756,
26
  "train_steps_per_second": 0.011
27
  }
28
  ],
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4986090b642bc14ee74878c04e50e4221377fe1e24527819cbd66ee69af07d71
3
  size 4984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:40a1bb5dcc207e7129f2a045941c3c828bd157617df0503126a3864e20ad1063
3
  size 4984