WangXFng commited on
Commit
3ea0682
·
verified ·
1 Parent(s): 5f5cedf

Model save

Browse files
adapter_config.json CHANGED
@@ -19,17 +19,17 @@
19
  "megatron_core": "megatron.core",
20
  "modules_to_save": null,
21
  "peft_type": "LORA",
22
- "r": 64,
23
  "rank_pattern": {},
24
  "revision": null,
25
  "target_modules": [
26
  "k_proj",
27
- "o_proj",
28
  "gate_proj",
29
- "v_proj",
30
  "up_proj",
31
  "down_proj",
32
- "q_proj"
 
33
  ],
34
  "task_type": "CAUSAL_LM",
35
  "use_dora": false,
 
19
  "megatron_core": "megatron.core",
20
  "modules_to_save": null,
21
  "peft_type": "LORA",
22
+ "r": 32,
23
  "rank_pattern": {},
24
  "revision": null,
25
  "target_modules": [
26
  "k_proj",
27
+ "q_proj",
28
  "gate_proj",
 
29
  "up_proj",
30
  "down_proj",
31
+ "v_proj",
32
+ "o_proj"
33
  ],
34
  "task_type": "CAUSAL_LM",
35
  "use_dora": false,
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:07f3924e34badcecbf387a22b15f0befc53d9e7aba5a94ed23b9289307716fd2
3
- size 1239447000
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3c1085cc31aaf24ecb9fa39dfa434995d07da84f55dfdaedba2a0f0b282223a5
3
+ size 1149269232
trainer_state.json CHANGED
@@ -10,110 +10,110 @@
10
  "log_history": [
11
  {
12
  "epoch": 0.28435345134001566,
13
- "grad_norm": 0.49568846821784973,
14
  "learning_rate": 9.294251565167901e-05,
15
- "loss": 1.2539,
16
  "step": 250
17
  },
18
  {
19
  "epoch": 0.5687069026800313,
20
- "grad_norm": 0.45915305614471436,
21
  "learning_rate": 8.582811610700058e-05,
22
- "loss": 0.6466,
23
  "step": 500
24
  },
25
  {
26
  "epoch": 0.8530603540200469,
27
- "grad_norm": 0.3644406795501709,
28
  "learning_rate": 7.871371656232215e-05,
29
- "loss": 0.5166,
30
  "step": 750
31
  },
32
  {
33
  "epoch": 1.1374138053600626,
34
- "grad_norm": 0.3468233644962311,
35
  "learning_rate": 7.159931701764372e-05,
36
  "loss": 0.4885,
37
  "step": 1000
38
  },
39
  {
40
  "epoch": 1.421767256700078,
41
- "grad_norm": 0.3474404513835907,
42
  "learning_rate": 6.448491747296529e-05,
43
- "loss": 0.4758,
44
  "step": 1250
45
  },
46
  {
47
  "epoch": 1.706120708040094,
48
- "grad_norm": 0.3625764548778534,
49
  "learning_rate": 5.737051792828686e-05,
50
- "loss": 0.4624,
51
  "step": 1500
52
  },
53
  {
54
  "epoch": 1.9904741593801094,
55
- "grad_norm": 0.3761649429798126,
56
  "learning_rate": 5.025611838360843e-05,
57
- "loss": 0.4549,
58
  "step": 1750
59
  },
60
  {
61
  "epoch": 2.2748276107201253,
62
- "grad_norm": 0.3799266815185547,
63
  "learning_rate": 4.3141718838929996e-05,
64
- "loss": 0.4438,
65
  "step": 2000
66
  },
67
  {
68
  "epoch": 2.5591810620601407,
69
- "grad_norm": 0.4071201682090759,
70
  "learning_rate": 3.602731929425157e-05,
71
- "loss": 0.431,
72
  "step": 2250
73
  },
74
  {
75
  "epoch": 2.843534513400156,
76
- "grad_norm": 0.3974260985851288,
77
  "learning_rate": 2.8912919749573137e-05,
78
- "loss": 0.4193,
79
  "step": 2500
80
  },
81
  {
82
  "epoch": 3.127887964740172,
83
- "grad_norm": 0.4286128580570221,
84
  "learning_rate": 2.1798520204894708e-05,
85
- "loss": 0.4081,
86
  "step": 2750
87
  },
88
  {
89
  "epoch": 3.4122414160801875,
90
- "grad_norm": 0.4309234917163849,
91
  "learning_rate": 1.4684120660216277e-05,
92
- "loss": 0.3971,
93
  "step": 3000
94
  },
95
  {
96
  "epoch": 3.6965948674202034,
97
- "grad_norm": 0.4457033574581146,
98
  "learning_rate": 7.569721115537849e-06,
99
- "loss": 0.3893,
100
  "step": 3250
101
  },
102
  {
103
  "epoch": 3.980948318760219,
104
- "grad_norm": 0.45286279916763306,
105
  "learning_rate": 4.553215708594195e-07,
106
- "loss": 0.3847,
107
  "step": 3500
108
  },
109
  {
110
  "epoch": 3.9991469396459802,
111
  "step": 3516,
112
- "total_flos": 9.688385912636621e+17,
113
- "train_loss": 0.51168809517523,
114
- "train_runtime": 22604.8542,
115
- "train_samples_per_second": 39.825,
116
- "train_steps_per_second": 0.156
117
  }
118
  ],
119
  "logging_steps": 250,
@@ -133,7 +133,7 @@
133
  "attributes": {}
134
  }
135
  },
136
- "total_flos": 9.688385912636621e+17,
137
  "train_batch_size": 16,
138
  "trial_name": null,
139
  "trial_params": null
 
10
  "log_history": [
11
  {
12
  "epoch": 0.28435345134001566,
13
+ "grad_norm": 0.656255841255188,
14
  "learning_rate": 9.294251565167901e-05,
15
+ "loss": 1.2378,
16
  "step": 250
17
  },
18
  {
19
  "epoch": 0.5687069026800313,
20
+ "grad_norm": 0.543454647064209,
21
  "learning_rate": 8.582811610700058e-05,
22
+ "loss": 0.6375,
23
  "step": 500
24
  },
25
  {
26
  "epoch": 0.8530603540200469,
27
+ "grad_norm": 0.43407657742500305,
28
  "learning_rate": 7.871371656232215e-05,
29
+ "loss": 0.5158,
30
  "step": 750
31
  },
32
  {
33
  "epoch": 1.1374138053600626,
34
+ "grad_norm": 0.4036562740802765,
35
  "learning_rate": 7.159931701764372e-05,
36
  "loss": 0.4885,
37
  "step": 1000
38
  },
39
  {
40
  "epoch": 1.421767256700078,
41
+ "grad_norm": 0.40085434913635254,
42
  "learning_rate": 6.448491747296529e-05,
43
+ "loss": 0.4759,
44
  "step": 1250
45
  },
46
  {
47
  "epoch": 1.706120708040094,
48
+ "grad_norm": 0.4041031002998352,
49
  "learning_rate": 5.737051792828686e-05,
50
+ "loss": 0.4629,
51
  "step": 1500
52
  },
53
  {
54
  "epoch": 1.9904741593801094,
55
+ "grad_norm": 0.4215051233768463,
56
  "learning_rate": 5.025611838360843e-05,
57
+ "loss": 0.4555,
58
  "step": 1750
59
  },
60
  {
61
  "epoch": 2.2748276107201253,
62
+ "grad_norm": 0.4411364495754242,
63
  "learning_rate": 4.3141718838929996e-05,
64
+ "loss": 0.4451,
65
  "step": 2000
66
  },
67
  {
68
  "epoch": 2.5591810620601407,
69
+ "grad_norm": 0.4689200818538666,
70
  "learning_rate": 3.602731929425157e-05,
71
+ "loss": 0.433,
72
  "step": 2250
73
  },
74
  {
75
  "epoch": 2.843534513400156,
76
+ "grad_norm": 0.4492100775241852,
77
  "learning_rate": 2.8912919749573137e-05,
78
+ "loss": 0.4217,
79
  "step": 2500
80
  },
81
  {
82
  "epoch": 3.127887964740172,
83
+ "grad_norm": 0.4875541627407074,
84
  "learning_rate": 2.1798520204894708e-05,
85
+ "loss": 0.4116,
86
  "step": 2750
87
  },
88
  {
89
  "epoch": 3.4122414160801875,
90
+ "grad_norm": 0.48542124032974243,
91
  "learning_rate": 1.4684120660216277e-05,
92
+ "loss": 0.4012,
93
  "step": 3000
94
  },
95
  {
96
  "epoch": 3.6965948674202034,
97
+ "grad_norm": 0.5120503306388855,
98
  "learning_rate": 7.569721115537849e-06,
99
+ "loss": 0.3939,
100
  "step": 3250
101
  },
102
  {
103
  "epoch": 3.980948318760219,
104
+ "grad_norm": 0.510686993598938,
105
  "learning_rate": 4.553215708594195e-07,
106
+ "loss": 0.3892,
107
  "step": 3500
108
  },
109
  {
110
  "epoch": 3.9991469396459802,
111
  "step": 3516,
112
+ "total_flos": 9.473878732962447e+17,
113
+ "train_loss": 0.5115500152856959,
114
+ "train_runtime": 22624.5633,
115
+ "train_samples_per_second": 39.791,
116
+ "train_steps_per_second": 0.155
117
  }
118
  ],
119
  "logging_steps": 250,
 
133
  "attributes": {}
134
  }
135
  },
136
+ "total_flos": 9.473878732962447e+17,
137
  "train_batch_size": 16,
138
  "trial_name": null,
139
  "trial_params": null
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d440338186a609f27bd06ffc3c0ece6d78660db22740828600577e50e4528def
3
  size 5176
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d25eb557ad70264fc715b2aa13435242b988886d53c26e60542469af6e08bf28
3
  size 5176