NicholasCorrado commited on
Commit
2eb1b8d
1 Parent(s): 69c9d49

End of training

Browse files
Files changed (4) hide show
  1. README.md +22 -16
  2. all_results.json +20 -0
  3. config.json +1 -1
  4. eval_results.json +23 -0
README.md CHANGED
@@ -3,9 +3,15 @@ library_name: transformers
3
  license: apache-2.0
4
  base_model: alignment-handbook/zephyr-7b-sft-full
5
  tags:
 
6
  - trl
7
  - dpo
8
  - generated_from_trainer
 
 
 
 
 
9
  model-index:
10
  - name: zephyr-7b-uf-rlced-conifer-group-dpo-2e
11
  results: []
@@ -16,24 +22,24 @@ should probably proofread and complete it, then remove this comment. -->
16
 
17
  # zephyr-7b-uf-rlced-conifer-group-dpo-2e
18
 
19
- This model is a fine-tuned version of [alignment-handbook/zephyr-7b-sft-full](https://huggingface.co/alignment-handbook/zephyr-7b-sft-full) on an unknown dataset.
20
  It achieves the following results on the evaluation set:
21
  - Loss: 0.2410
22
- - Rewards/chosen: -3.4515
23
- - Rewards/rejected: -8.7505
24
- - Rewards/accuracies: 0.8769
25
- - Rewards/margins: 5.2990
26
- - Logps/rejected: -1278.7848
27
- - Logps/chosen: -737.6204
28
- - Logits/rejected: 3.0507
29
- - Logits/chosen: 0.9407
30
- - Alpha0: 0.6369
31
- - Alpha1: 0.3631
32
- - Task Loss1: 0.1726
33
- - Task Excess Loss1: 0.0379
34
- - Excess Loss: 0.0341
35
- - Task Loss0: 0.5306
36
- - Task Excess Loss0: 0.0889
37
 
38
  ## Model description
39
 
 
3
  license: apache-2.0
4
  base_model: alignment-handbook/zephyr-7b-sft-full
5
  tags:
6
+ - alignment-handbook
7
  - trl
8
  - dpo
9
  - generated_from_trainer
10
+ - trl
11
+ - dpo
12
+ - generated_from_trainer
13
+ datasets:
14
+ - data/zephyr_uf_rlced_conifer_ref
15
  model-index:
16
  - name: zephyr-7b-uf-rlced-conifer-group-dpo-2e
17
  results: []
 
22
 
23
  # zephyr-7b-uf-rlced-conifer-group-dpo-2e
24
 
25
+ This model is a fine-tuned version of [alignment-handbook/zephyr-7b-sft-full](https://huggingface.co/alignment-handbook/zephyr-7b-sft-full) on the data/zephyr_uf_rlced_conifer_ref dataset.
26
  It achieves the following results on the evaluation set:
27
  - Loss: 0.2410
28
+ - Rewards/chosen: -3.4514
29
+ - Rewards/rejected: -8.7503
30
+ - Rewards/accuracies: 0.8778
31
+ - Rewards/margins: 5.2989
32
+ - Logps/rejected: -1278.7679
33
+ - Logps/chosen: -737.6100
34
+ - Logits/rejected: 3.0512
35
+ - Logits/chosen: 0.9415
36
+ - Alpha0: 0.1957
37
+ - Alpha1: 0.8043
38
+ - Task Loss1: 0.1724
39
+ - Task Excess Loss1: 0.0378
40
+ - Excess Loss: 0.0340
41
+ - Task Loss0: 0.5295
42
+ - Task Excess Loss0: 0.0879
43
 
44
  ## Model description
45
 
all_results.json CHANGED
@@ -1,5 +1,25 @@
1
  {
2
  "epoch": 1.9986120749479528,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3
  "total_flos": 0.0,
4
  "train_loss": 0.17575526105033026,
5
  "train_runtime": 46867.94,
 
1
  {
2
  "epoch": 1.9986120749479528,
3
+ "eval_alpha0": 0.1956627070903778,
4
+ "eval_alpha1": 0.8043374419212341,
5
+ "eval_excess_loss": 0.03396472496617781,
6
+ "eval_logits/chosen": 0.941495954990387,
7
+ "eval_logits/rejected": 3.0511972904205322,
8
+ "eval_logps/chosen": -737.6100463867188,
9
+ "eval_logps/rejected": -1278.7679443359375,
10
+ "eval_loss": 0.2410118132829666,
11
+ "eval_rewards/accuracies": 0.8778195381164551,
12
+ "eval_rewards/chosen": -3.451406478881836,
13
+ "eval_rewards/margins": 5.298919200897217,
14
+ "eval_rewards/rejected": -8.750325202941895,
15
+ "eval_runtime": 386.05,
16
+ "eval_samples": 8491,
17
+ "eval_samples_per_second": 21.995,
18
+ "eval_steps_per_second": 0.345,
19
+ "eval_task_excess_loss0": 0.08787519361543576,
20
+ "eval_task_excess_loss1": 0.03778563067191184,
21
+ "eval_task_loss0": 0.5294545888900757,
22
+ "eval_task_loss1": 0.1724005937576294,
23
  "total_flos": 0.0,
24
  "train_loss": 0.17575526105033026,
25
  "train_runtime": 46867.94,
config.json CHANGED
@@ -22,6 +22,6 @@
22
  "tie_word_embeddings": false,
23
  "torch_dtype": "bfloat16",
24
  "transformers_version": "4.44.1",
25
- "use_cache": false,
26
  "vocab_size": 32000
27
  }
 
22
  "tie_word_embeddings": false,
23
  "torch_dtype": "bfloat16",
24
  "transformers_version": "4.44.1",
25
+ "use_cache": true,
26
  "vocab_size": 32000
27
  }
eval_results.json ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 1.9986120749479528,
3
+ "eval_alpha0": 0.1956627070903778,
4
+ "eval_alpha1": 0.8043374419212341,
5
+ "eval_excess_loss": 0.03396472496617781,
6
+ "eval_logits/chosen": 0.941495954990387,
7
+ "eval_logits/rejected": 3.0511972904205322,
8
+ "eval_logps/chosen": -737.6100463867188,
9
+ "eval_logps/rejected": -1278.7679443359375,
10
+ "eval_loss": 0.2410118132829666,
11
+ "eval_rewards/accuracies": 0.8778195381164551,
12
+ "eval_rewards/chosen": -3.451406478881836,
13
+ "eval_rewards/margins": 5.298919200897217,
14
+ "eval_rewards/rejected": -8.750325202941895,
15
+ "eval_runtime": 386.05,
16
+ "eval_samples": 8491,
17
+ "eval_samples_per_second": 21.995,
18
+ "eval_steps_per_second": 0.345,
19
+ "eval_task_excess_loss0": 0.08787519361543576,
20
+ "eval_task_excess_loss1": 0.03778563067191184,
21
+ "eval_task_loss0": 0.5294545888900757,
22
+ "eval_task_loss1": 0.1724005937576294
23
+ }