ChenWu98 commited on
Commit
c23fad8
1 Parent(s): 264f50c

Model save

Browse files
Files changed (5) hide show
  1. README.md +4 -12
  2. all_results.json +9 -9
  3. eval_results.json +5 -5
  4. train_results.json +5 -5
  5. trainer_state.json +23 -55
README.md CHANGED
@@ -2,16 +2,9 @@
2
  license: mit
3
  library_name: peft
4
  tags:
5
- - alignment-handbook
6
  - trl
7
  - sft
8
  - generated_from_trainer
9
- - trl
10
- - sft
11
- - generated_from_trainer
12
- datasets:
13
- - ChenWu98/skills_metaphor_chat
14
- - ChenWu98/skills_red_herring_chat
15
  base_model: HuggingFaceH4/zephyr-7b-beta
16
  model-index:
17
  - name: skills_metaphor_chat-skills_red_herring_chat-lora
@@ -23,9 +16,9 @@ should probably proofread and complete it, then remove this comment. -->
23
 
24
  # skills_metaphor_chat-skills_red_herring_chat-lora
25
 
26
- This model is a fine-tuned version of [HuggingFaceH4/zephyr-7b-beta](https://huggingface.co/HuggingFaceH4/zephyr-7b-beta) on the ChenWu98/skills_metaphor_chat and the ChenWu98/skills_red_herring_chat datasets.
27
  It achieves the following results on the evaluation set:
28
- - Loss: 0.2245
29
 
30
  ## Model description
31
 
@@ -54,14 +47,13 @@ The following hyperparameters were used during training:
54
  - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
55
  - lr_scheduler_type: cosine
56
  - lr_scheduler_warmup_ratio: 0.1
57
- - num_epochs: 2.0
58
 
59
  ### Training results
60
 
61
  | Training Loss | Epoch | Step | Validation Loss |
62
  |:-------------:|:-----:|:----:|:---------------:|
63
- | 0.2788 | 0.96 | 18 | 0.2390 |
64
- | 0.1993 | 1.92 | 36 | 0.2245 |
65
 
66
 
67
  ### Framework versions
 
2
  license: mit
3
  library_name: peft
4
  tags:
 
5
  - trl
6
  - sft
7
  - generated_from_trainer
 
 
 
 
 
 
8
  base_model: HuggingFaceH4/zephyr-7b-beta
9
  model-index:
10
  - name: skills_metaphor_chat-skills_red_herring_chat-lora
 
16
 
17
  # skills_metaphor_chat-skills_red_herring_chat-lora
18
 
19
+ This model is a fine-tuned version of [HuggingFaceH4/zephyr-7b-beta](https://huggingface.co/HuggingFaceH4/zephyr-7b-beta) on the None dataset.
20
  It achieves the following results on the evaluation set:
21
+ - Loss: 0.2636
22
 
23
  ## Model description
24
 
 
47
  - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
48
  - lr_scheduler_type: cosine
49
  - lr_scheduler_warmup_ratio: 0.1
50
+ - num_epochs: 1.0
51
 
52
  ### Training results
53
 
54
  | Training Loss | Epoch | Step | Validation Loss |
55
  |:-------------:|:-----:|:----:|:---------------:|
56
+ | 0.2847 | 0.96 | 18 | 0.2636 |
 
57
 
58
 
59
  ### Framework versions
all_results.json CHANGED
@@ -1,13 +1,13 @@
1
  {
2
- "epoch": 1.92,
3
- "eval_loss": 0.2245262861251831,
4
- "eval_runtime": 10.1729,
5
  "eval_samples": 200,
6
- "eval_samples_per_second": 19.66,
7
- "eval_steps_per_second": 2.458,
8
- "train_loss": 0.5313632095025645,
9
- "train_runtime": 554.6126,
10
  "train_samples": 600,
11
- "train_samples_per_second": 2.164,
12
- "train_steps_per_second": 0.065
13
  }
 
1
  {
2
+ "epoch": 0.96,
3
+ "eval_loss": 0.26359862089157104,
4
+ "eval_runtime": 7.9312,
5
  "eval_samples": 200,
6
+ "eval_samples_per_second": 25.217,
7
+ "eval_steps_per_second": 3.152,
8
+ "train_loss": 0.7799174222681258,
9
+ "train_runtime": 192.3889,
10
  "train_samples": 600,
11
+ "train_samples_per_second": 3.119,
12
+ "train_steps_per_second": 0.094
13
  }
eval_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "epoch": 1.92,
3
- "eval_loss": 0.2245262861251831,
4
- "eval_runtime": 10.1729,
5
  "eval_samples": 200,
6
- "eval_samples_per_second": 19.66,
7
- "eval_steps_per_second": 2.458
8
  }
 
1
  {
2
+ "epoch": 0.96,
3
+ "eval_loss": 0.26359862089157104,
4
+ "eval_runtime": 7.9312,
5
  "eval_samples": 200,
6
+ "eval_samples_per_second": 25.217,
7
+ "eval_steps_per_second": 3.152
8
  }
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "epoch": 1.92,
3
- "train_loss": 0.5313632095025645,
4
- "train_runtime": 554.6126,
5
  "train_samples": 600,
6
- "train_samples_per_second": 2.164,
7
- "train_steps_per_second": 0.065
8
  }
 
1
  {
2
+ "epoch": 0.96,
3
+ "train_loss": 0.7799174222681258,
4
+ "train_runtime": 192.3889,
5
  "train_samples": 600,
6
+ "train_samples_per_second": 3.119,
7
+ "train_steps_per_second": 0.094
8
  }
trainer_state.json CHANGED
@@ -1,93 +1,61 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 1.92,
5
  "eval_steps": 500,
6
- "global_step": 36,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
  "epoch": 0.05,
13
- "learning_rate": 5e-05,
14
  "loss": 2.272,
15
  "step": 1
16
  },
17
  {
18
  "epoch": 0.27,
19
- "learning_rate": 0.0001995184726672197,
20
- "loss": 1.954,
21
  "step": 5
22
  },
23
  {
24
  "epoch": 0.53,
25
- "learning_rate": 0.00018314696123025454,
26
- "loss": 0.6371,
27
  "step": 10
28
  },
29
  {
30
  "epoch": 0.8,
31
- "learning_rate": 0.0001471396736825998,
32
- "loss": 0.2788,
33
  "step": 15
34
  },
35
  {
36
  "epoch": 0.96,
37
- "eval_loss": 0.23904697597026825,
38
- "eval_runtime": 11.1391,
39
- "eval_samples_per_second": 17.955,
40
- "eval_steps_per_second": 2.244,
41
  "step": 18
42
  },
43
  {
44
- "epoch": 1.07,
45
- "learning_rate": 0.0001,
46
- "loss": 0.2252,
47
- "step": 20
48
- },
49
- {
50
- "epoch": 1.33,
51
- "learning_rate": 5.286032631740023e-05,
52
- "loss": 0.2128,
53
- "step": 25
54
- },
55
- {
56
- "epoch": 1.6,
57
- "learning_rate": 1.6853038769745467e-05,
58
- "loss": 0.2122,
59
- "step": 30
60
- },
61
- {
62
- "epoch": 1.87,
63
- "learning_rate": 4.815273327803182e-07,
64
- "loss": 0.1993,
65
- "step": 35
66
- },
67
- {
68
- "epoch": 1.92,
69
- "eval_loss": 0.2245262861251831,
70
- "eval_runtime": 9.6856,
71
- "eval_samples_per_second": 20.649,
72
- "eval_steps_per_second": 2.581,
73
- "step": 36
74
- },
75
- {
76
- "epoch": 1.92,
77
- "step": 36,
78
- "total_flos": 40159503319040.0,
79
- "train_loss": 0.5313632095025645,
80
- "train_runtime": 554.6126,
81
- "train_samples_per_second": 2.164,
82
- "train_steps_per_second": 0.065
83
  }
84
  ],
85
  "logging_steps": 5,
86
- "max_steps": 36,
87
  "num_input_tokens_seen": 0,
88
- "num_train_epochs": 2,
89
  "save_steps": 500,
90
- "total_flos": 40159503319040.0,
91
  "train_batch_size": 4,
92
  "trial_name": null,
93
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.96,
5
  "eval_steps": 500,
6
+ "global_step": 18,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
  "epoch": 0.05,
13
+ "learning_rate": 0.0001,
14
  "loss": 2.272,
15
  "step": 1
16
  },
17
  {
18
  "epoch": 0.27,
19
+ "learning_rate": 0.00018314696123025454,
20
+ "loss": 1.7677,
21
  "step": 5
22
  },
23
  {
24
  "epoch": 0.53,
25
+ "learning_rate": 0.0001,
26
+ "loss": 0.5037,
27
  "step": 10
28
  },
29
  {
30
  "epoch": 0.8,
31
+ "learning_rate": 1.6853038769745467e-05,
32
+ "loss": 0.2847,
33
  "step": 15
34
  },
35
  {
36
  "epoch": 0.96,
37
+ "eval_loss": 0.26359862089157104,
38
+ "eval_runtime": 8.8309,
39
+ "eval_samples_per_second": 22.648,
40
+ "eval_steps_per_second": 2.831,
41
  "step": 18
42
  },
43
  {
44
+ "epoch": 0.96,
45
+ "step": 18,
46
+ "total_flos": 20001168556032.0,
47
+ "train_loss": 0.7799174222681258,
48
+ "train_runtime": 192.3889,
49
+ "train_samples_per_second": 3.119,
50
+ "train_steps_per_second": 0.094
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
51
  }
52
  ],
53
  "logging_steps": 5,
54
+ "max_steps": 18,
55
  "num_input_tokens_seen": 0,
56
+ "num_train_epochs": 1,
57
  "save_steps": 500,
58
+ "total_flos": 20001168556032.0,
59
  "train_batch_size": 4,
60
  "trial_name": null,
61
  "trial_params": null