Adi-ds commited on
Commit
7e82808
·
1 Parent(s): ab64665

End of training

Browse files
README.md CHANGED
@@ -4,6 +4,7 @@ tags:
4
  model-index:
5
  - name: Kaggle-Science-LLM
6
  results: []
 
7
  ---
8
 
9
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
@@ -29,6 +30,17 @@ More information needed
29
 
30
  ## Training procedure
31
 
 
 
 
 
 
 
 
 
 
 
 
32
  ### Training hyperparameters
33
 
34
  The following hyperparameters were used during training:
@@ -62,6 +74,7 @@ The following hyperparameters were used during training:
62
 
63
  ### Framework versions
64
 
 
65
  - Transformers 4.30.2
66
  - Pytorch 2.0.0
67
  - Datasets 2.1.0
 
4
  model-index:
5
  - name: Kaggle-Science-LLM
6
  results: []
7
+ library_name: peft
8
  ---
9
 
10
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
 
30
 
31
  ## Training procedure
32
 
33
+
34
+ The following `bitsandbytes` quantization config was used during training:
35
+ - load_in_8bit: False
36
+ - load_in_4bit: True
37
+ - llm_int8_threshold: 6.0
38
+ - llm_int8_skip_modules: None
39
+ - llm_int8_enable_fp32_cpu_offload: False
40
+ - llm_int8_has_fp16_weight: False
41
+ - bnb_4bit_quant_type: nf4
42
+ - bnb_4bit_use_double_quant: True
43
+ - bnb_4bit_compute_dtype: bfloat16
44
  ### Training hyperparameters
45
 
46
  The following hyperparameters were used during training:
 
74
 
75
  ### Framework versions
76
 
77
+ - PEFT 0.4.0
78
  - Transformers 4.30.2
79
  - Pytorch 2.0.0
80
  - Datasets 2.1.0
adapter_config.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
  "auto_mapping": null,
3
- "base_model_name_or_path": "meta-llama/Llama-2-7b-hf",
4
  "bias": "none",
5
  "fan_in_fan_out": false,
6
  "inference_mode": true,
 
1
  {
2
  "auto_mapping": null,
3
+ "base_model_name_or_path": null,
4
  "bias": "none",
5
  "fan_in_fan_out": false,
6
  "inference_mode": true,
adapter_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:554d9152e3fb5cfbb0656095bbeb1e9757e5e6dd5bea8e4ffa086b485a3d19c8
3
- size 134263757
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f13c037e9d5e77ed7381fbf545355fcdaa357489fe4fda768ad3b262980a490c
3
+ size 134265933
all_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 0.08,
3
  "total_flos": 1782763267522560.0,
4
- "train_loss": 5.71207763671875,
5
- "train_runtime": 12536.5893,
6
  "train_samples_per_second": 0.032,
7
  "train_steps_per_second": 0.004
8
  }
 
1
  {
2
  "epoch": 0.08,
3
  "total_flos": 1782763267522560.0,
4
+ "train_loss": 5.688729438781738,
5
+ "train_runtime": 12357.0887,
6
  "train_samples_per_second": 0.032,
7
  "train_steps_per_second": 0.004
8
  }
runs/Oct28_18-27-05_948ebb293dc9/events.out.tfevents.1698518147.948ebb293dc9.233.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:92c32358e8ceae5d1f8bef28288d8af98b7995ddb55f9070714d6b8673638d8a
3
+ size 8967
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 0.08,
3
  "total_flos": 1782763267522560.0,
4
- "train_loss": 5.71207763671875,
5
- "train_runtime": 12536.5893,
6
  "train_samples_per_second": 0.032,
7
  "train_steps_per_second": 0.004
8
  }
 
1
  {
2
  "epoch": 0.08,
3
  "total_flos": 1782763267522560.0,
4
+ "train_loss": 5.688729438781738,
5
+ "train_runtime": 12357.0887,
6
  "train_samples_per_second": 0.032,
7
  "train_steps_per_second": 0.004
8
  }
trainer_state.json CHANGED
@@ -10,149 +10,149 @@
10
  {
11
  "epoch": 0.01,
12
  "learning_rate": 1.4960191082802548e-05,
13
- "loss": 6.6677,
14
  "step": 5
15
  },
16
  {
17
  "epoch": 0.01,
18
- "eval_loss": 6.512014389038086,
19
- "eval_runtime": 1183.8648,
20
- "eval_samples_per_second": 1.414,
21
- "eval_steps_per_second": 0.354,
22
  "step": 5
23
  },
24
  {
25
  "epoch": 0.02,
26
  "learning_rate": 1.4920382165605097e-05,
27
- "loss": 6.4854,
28
  "step": 10
29
  },
30
  {
31
  "epoch": 0.02,
32
- "eval_loss": 6.347909927368164,
33
- "eval_runtime": 1183.7191,
34
- "eval_samples_per_second": 1.414,
35
- "eval_steps_per_second": 0.354,
36
  "step": 10
37
  },
38
  {
39
  "epoch": 0.02,
40
  "learning_rate": 1.4880573248407642e-05,
41
- "loss": 6.2537,
42
  "step": 15
43
  },
44
  {
45
  "epoch": 0.02,
46
- "eval_loss": 6.1641035079956055,
47
- "eval_runtime": 1184.2747,
48
- "eval_samples_per_second": 1.414,
49
- "eval_steps_per_second": 0.354,
50
  "step": 15
51
  },
52
  {
53
  "epoch": 0.03,
54
  "learning_rate": 1.4840764331210191e-05,
55
- "loss": 6.0912,
56
  "step": 20
57
  },
58
  {
59
  "epoch": 0.03,
60
- "eval_loss": 5.954966068267822,
61
- "eval_runtime": 1187.8538,
62
- "eval_samples_per_second": 1.409,
63
- "eval_steps_per_second": 0.353,
64
  "step": 20
65
  },
66
  {
67
  "epoch": 0.04,
68
  "learning_rate": 1.4800955414012738e-05,
69
- "loss": 5.8341,
70
  "step": 25
71
  },
72
  {
73
  "epoch": 0.04,
74
- "eval_loss": 5.724631309509277,
75
- "eval_runtime": 1188.1346,
76
- "eval_samples_per_second": 1.409,
77
- "eval_steps_per_second": 0.353,
78
  "step": 25
79
  },
80
  {
81
  "epoch": 0.05,
82
  "learning_rate": 1.4761146496815287e-05,
83
- "loss": 5.6128,
84
  "step": 30
85
  },
86
  {
87
  "epoch": 0.05,
88
- "eval_loss": 5.47757625579834,
89
- "eval_runtime": 1188.0744,
90
- "eval_samples_per_second": 1.409,
91
- "eval_steps_per_second": 0.353,
92
  "step": 30
93
  },
94
  {
95
  "epoch": 0.06,
96
- "learning_rate": 1.4729299363057326e-05,
97
- "loss": 5.3665,
98
  "step": 35
99
  },
100
  {
101
  "epoch": 0.06,
102
- "eval_loss": 5.272798538208008,
103
- "eval_runtime": 1185.4582,
104
- "eval_samples_per_second": 1.412,
105
- "eval_steps_per_second": 0.353,
106
  "step": 35
107
  },
108
  {
109
  "epoch": 0.06,
110
- "learning_rate": 1.4689490445859873e-05,
111
- "loss": 5.1581,
112
  "step": 40
113
  },
114
  {
115
  "epoch": 0.06,
116
- "eval_loss": 5.01292610168457,
117
- "eval_runtime": 1184.7537,
118
- "eval_samples_per_second": 1.413,
119
- "eval_steps_per_second": 0.354,
120
  "step": 40
121
  },
122
  {
123
  "epoch": 0.07,
124
- "learning_rate": 1.464968152866242e-05,
125
- "loss": 4.9526,
126
  "step": 45
127
  },
128
  {
129
  "epoch": 0.07,
130
- "eval_loss": 4.750097274780273,
131
- "eval_runtime": 1184.6596,
132
- "eval_samples_per_second": 1.413,
133
- "eval_steps_per_second": 0.354,
134
  "step": 45
135
  },
136
  {
137
  "epoch": 0.08,
138
- "learning_rate": 1.4609872611464967e-05,
139
- "loss": 4.6988,
140
  "step": 50
141
  },
142
  {
143
  "epoch": 0.08,
144
- "eval_loss": 4.482077598571777,
145
- "eval_runtime": 1185.1466,
146
- "eval_samples_per_second": 1.412,
147
- "eval_steps_per_second": 0.354,
148
  "step": 50
149
  },
150
  {
151
  "epoch": 0.08,
152
  "step": 50,
153
  "total_flos": 1782763267522560.0,
154
- "train_loss": 5.71207763671875,
155
- "train_runtime": 12536.5893,
156
  "train_samples_per_second": 0.032,
157
  "train_steps_per_second": 0.004
158
  }
 
10
  {
11
  "epoch": 0.01,
12
  "learning_rate": 1.4960191082802548e-05,
13
+ "loss": 6.6679,
14
  "step": 5
15
  },
16
  {
17
  "epoch": 0.01,
18
+ "eval_loss": 6.511322021484375,
19
+ "eval_runtime": 1175.4522,
20
+ "eval_samples_per_second": 1.424,
21
+ "eval_steps_per_second": 0.356,
22
  "step": 5
23
  },
24
  {
25
  "epoch": 0.02,
26
  "learning_rate": 1.4920382165605097e-05,
27
+ "loss": 6.4844,
28
  "step": 10
29
  },
30
  {
31
  "epoch": 0.02,
32
+ "eval_loss": 6.34607458114624,
33
+ "eval_runtime": 1171.2756,
34
+ "eval_samples_per_second": 1.429,
35
+ "eval_steps_per_second": 0.358,
36
  "step": 10
37
  },
38
  {
39
  "epoch": 0.02,
40
  "learning_rate": 1.4880573248407642e-05,
41
+ "loss": 6.2521,
42
  "step": 15
43
  },
44
  {
45
  "epoch": 0.02,
46
+ "eval_loss": 6.161616802215576,
47
+ "eval_runtime": 1175.2918,
48
+ "eval_samples_per_second": 1.424,
49
+ "eval_steps_per_second": 0.357,
50
  "step": 15
51
  },
52
  {
53
  "epoch": 0.03,
54
  "learning_rate": 1.4840764331210191e-05,
55
+ "loss": 6.0889,
56
  "step": 20
57
  },
58
  {
59
  "epoch": 0.03,
60
+ "eval_loss": 5.951450347900391,
61
+ "eval_runtime": 1163.7316,
62
+ "eval_samples_per_second": 1.438,
63
+ "eval_steps_per_second": 0.36,
64
  "step": 20
65
  },
66
  {
67
  "epoch": 0.04,
68
  "learning_rate": 1.4800955414012738e-05,
69
+ "loss": 5.8295,
70
  "step": 25
71
  },
72
  {
73
  "epoch": 0.04,
74
+ "eval_loss": 5.720163345336914,
75
+ "eval_runtime": 1170.5429,
76
+ "eval_samples_per_second": 1.43,
77
+ "eval_steps_per_second": 0.358,
78
  "step": 25
79
  },
80
  {
81
  "epoch": 0.05,
82
  "learning_rate": 1.4761146496815287e-05,
83
+ "loss": 5.6072,
84
  "step": 30
85
  },
86
  {
87
  "epoch": 0.05,
88
+ "eval_loss": 5.472379207611084,
89
+ "eval_runtime": 1167.985,
90
+ "eval_samples_per_second": 1.433,
91
+ "eval_steps_per_second": 0.359,
92
  "step": 30
93
  },
94
  {
95
  "epoch": 0.06,
96
+ "learning_rate": 1.4721337579617834e-05,
97
+ "loss": 5.339,
98
  "step": 35
99
  },
100
  {
101
  "epoch": 0.06,
102
+ "eval_loss": 5.213606357574463,
103
+ "eval_runtime": 1168.8328,
104
+ "eval_samples_per_second": 1.432,
105
+ "eval_steps_per_second": 0.358,
106
  "step": 35
107
  },
108
  {
109
  "epoch": 0.06,
110
+ "learning_rate": 1.4681528662420383e-05,
111
+ "loss": 5.0985,
112
  "step": 40
113
  },
114
  {
115
  "epoch": 0.06,
116
+ "eval_loss": 4.951411724090576,
117
+ "eval_runtime": 1160.2184,
118
+ "eval_samples_per_second": 1.443,
119
+ "eval_steps_per_second": 0.361,
120
  "step": 40
121
  },
122
  {
123
  "epoch": 0.07,
124
+ "learning_rate": 1.4641719745222929e-05,
125
+ "loss": 4.8879,
126
  "step": 45
127
  },
128
  {
129
  "epoch": 0.07,
130
+ "eval_loss": 4.6860737800598145,
131
+ "eval_runtime": 1166.5033,
132
+ "eval_samples_per_second": 1.435,
133
+ "eval_steps_per_second": 0.359,
134
  "step": 45
135
  },
136
  {
137
  "epoch": 0.08,
138
+ "learning_rate": 1.4601910828025478e-05,
139
+ "loss": 4.6319,
140
  "step": 50
141
  },
142
  {
143
  "epoch": 0.08,
144
+ "eval_loss": 4.414491176605225,
145
+ "eval_runtime": 1165.2141,
146
+ "eval_samples_per_second": 1.437,
147
+ "eval_steps_per_second": 0.36,
148
  "step": 50
149
  },
150
  {
151
  "epoch": 0.08,
152
  "step": 50,
153
  "total_flos": 1782763267522560.0,
154
+ "train_loss": 5.688729438781738,
155
+ "train_runtime": 12357.0887,
156
  "train_samples_per_second": 0.032,
157
  "train_steps_per_second": 0.004
158
  }
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b63c609acbfe9fb282b8b5cd86b3af90ef32052a72c6a691f1bdff0059fe5a04
3
  size 3963
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5e7788309affb869aa11990ec5cffd9f89ef97293c0db6c8ea7c29509cb00fb3
3
  size 3963