chansung commited on
Commit
984f772
1 Parent(s): 48a3c78

Model save

Browse files
Files changed (4) hide show
  1. README.md +15 -16
  2. all_results.json +6 -11
  3. train_results.json +6 -6
  4. trainer_state.json +0 -0
README.md CHANGED
@@ -1,11 +1,10 @@
1
  ---
2
  base_model: meta-llama/Meta-Llama-3.1-8B
3
  datasets:
4
- - llama-duo/synth_classification_dataset_dedup
5
  library_name: peft
6
  license: llama3.1
7
  tags:
8
- - alignment-handbook
9
  - trl
10
  - sft
11
  - generated_from_trainer
@@ -19,9 +18,9 @@ should probably proofread and complete it, then remove this comment. -->
19
 
20
  # llama3.1-8b-classification-gpt4o-100k
21
 
22
- This model is a fine-tuned version of [meta-llama/Meta-Llama-3.1-8B](https://huggingface.co/meta-llama/Meta-Llama-3.1-8B) on the llama-duo/synth_classification_dataset_dedup dataset.
23
  It achieves the following results on the evaluation set:
24
- - Loss: 1.8520
25
 
26
  ## Model description
27
 
@@ -56,18 +55,18 @@ The following hyperparameters were used during training:
56
 
57
  ### Training results
58
 
59
- | Training Loss | Epoch | Step | Validation Loss |
60
- |:-------------:|:------:|:----:|:---------------:|
61
- | 1.4961 | 0.9978 | 225 | 1.7708 |
62
- | 1.3952 | 2.0 | 451 | 1.7770 |
63
- | 1.3491 | 2.9978 | 676 | 1.7484 |
64
- | 1.3025 | 4.0 | 902 | 1.7902 |
65
- | 1.2904 | 4.9978 | 1127 | 1.7997 |
66
- | 1.2729 | 6.0 | 1353 | 1.8170 |
67
- | 1.2451 | 6.9978 | 1578 | 1.8180 |
68
- | 1.229 | 8.0 | 1804 | 1.8372 |
69
- | 1.2239 | 8.9978 | 2029 | 1.8482 |
70
- | 1.2051 | 9.9778 | 2250 | 1.8520 |
71
 
72
 
73
  ### Framework versions
 
1
  ---
2
  base_model: meta-llama/Meta-Llama-3.1-8B
3
  datasets:
4
+ - generator
5
  library_name: peft
6
  license: llama3.1
7
  tags:
 
8
  - trl
9
  - sft
10
  - generated_from_trainer
 
18
 
19
  # llama3.1-8b-classification-gpt4o-100k
20
 
21
+ This model is a fine-tuned version of [meta-llama/Meta-Llama-3.1-8B](https://huggingface.co/meta-llama/Meta-Llama-3.1-8B) on the generator dataset.
22
  It achieves the following results on the evaluation set:
23
+ - Loss: 3.0330
24
 
25
  ## Model description
26
 
 
55
 
56
  ### Training results
57
 
58
+ | Training Loss | Epoch | Step | Validation Loss |
59
+ |:-------------:|:-----:|:----:|:---------------:|
60
+ | 1.2062 | 1.0 | 296 | 1.6781 |
61
+ | 1.1339 | 2.0 | 592 | 1.6897 |
62
+ | 1.0779 | 3.0 | 888 | 1.7536 |
63
+ | 1.0043 | 4.0 | 1184 | 1.8225 |
64
+ | 0.9288 | 5.0 | 1480 | 2.0044 |
65
+ | 0.8437 | 6.0 | 1776 | 2.1710 |
66
+ | 0.7654 | 7.0 | 2072 | 2.4080 |
67
+ | 0.7117 | 8.0 | 2368 | 2.6554 |
68
+ | 0.6916 | 9.0 | 2664 | 2.9172 |
69
+ | 0.6652 | 10.0 | 2960 | 3.0330 |
70
 
71
 
72
  ### Framework versions
all_results.json CHANGED
@@ -1,14 +1,9 @@
1
  {
2
- "epoch": 9.977827050997783,
3
- "eval_loss": 1.8520119190216064,
4
- "eval_runtime": 0.3553,
5
- "eval_samples": 16,
6
- "eval_samples_per_second": 2.814,
7
- "eval_steps_per_second": 2.814,
8
- "total_flos": 3.3259687719144e+18,
9
- "train_loss": 1.3362829395929972,
10
- "train_runtime": 6815.0283,
11
  "train_samples": 92634,
12
- "train_samples_per_second": 10.572,
13
- "train_steps_per_second": 0.33
14
  }
 
1
  {
2
+ "epoch": 10.0,
3
+ "total_flos": 4.416382035459834e+18,
4
+ "train_loss": 0.922980490487975,
5
+ "train_runtime": 12382.7598,
 
 
 
 
 
6
  "train_samples": 92634,
7
+ "train_samples_per_second": 7.645,
8
+ "train_steps_per_second": 0.239
9
  }
train_results.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "epoch": 9.977827050997783,
3
- "total_flos": 3.3259687719144e+18,
4
- "train_loss": 1.3362829395929972,
5
- "train_runtime": 6815.0283,
6
  "train_samples": 92634,
7
- "train_samples_per_second": 10.572,
8
- "train_steps_per_second": 0.33
9
  }
 
1
  {
2
+ "epoch": 10.0,
3
+ "total_flos": 4.416382035459834e+18,
4
+ "train_loss": 0.922980490487975,
5
+ "train_runtime": 12382.7598,
6
  "train_samples": 92634,
7
+ "train_samples_per_second": 7.645,
8
+ "train_steps_per_second": 0.239
9
  }
trainer_state.json CHANGED
The diff for this file is too large to render. See raw diff