bharati2324 commited on
Commit
55a4d75
·
verified ·
1 Parent(s): 3aba268

Training in progress, step 200, checkpoint

Browse files
checkpoint-200/README.md CHANGED
@@ -199,4 +199,4 @@ Carbon emissions can be estimated using the [Machine Learning Impact calculator]
199
  [More Information Needed]
200
  ### Framework versions
201
 
202
- - PEFT 0.13.2
 
199
  [More Information Needed]
200
  ### Framework versions
201
 
202
+ - PEFT 0.14.0
checkpoint-200/adapter_config.json CHANGED
@@ -3,6 +3,8 @@
3
  "auto_mapping": null,
4
  "base_model_name_or_path": "unsloth/Llama-3.2-1B-Instruct",
5
  "bias": "none",
 
 
6
  "fan_in_fan_out": false,
7
  "inference_mode": true,
8
  "init_lora_weights": true,
@@ -11,6 +13,7 @@
11
  "layers_to_transform": null,
12
  "loftq_config": {},
13
  "lora_alpha": 16,
 
14
  "lora_dropout": 0,
15
  "megatron_config": null,
16
  "megatron_core": "megatron.core",
@@ -20,9 +23,13 @@
20
  "rank_pattern": {},
21
  "revision": null,
22
  "target_modules": [
23
- "gate_proj",
 
 
 
24
  "up_proj",
25
- "down_proj"
 
26
  ],
27
  "task_type": "CAUSAL_LM",
28
  "use_dora": false,
 
3
  "auto_mapping": null,
4
  "base_model_name_or_path": "unsloth/Llama-3.2-1B-Instruct",
5
  "bias": "none",
6
+ "eva_config": null,
7
+ "exclude_modules": null,
8
  "fan_in_fan_out": false,
9
  "inference_mode": true,
10
  "init_lora_weights": true,
 
13
  "layers_to_transform": null,
14
  "loftq_config": {},
15
  "lora_alpha": 16,
16
+ "lora_bias": false,
17
  "lora_dropout": 0,
18
  "megatron_config": null,
19
  "megatron_core": "megatron.core",
 
23
  "rank_pattern": {},
24
  "revision": null,
25
  "target_modules": [
26
+ "q_proj",
27
+ "o_proj",
28
+ "v_proj",
29
+ "down_proj",
30
  "up_proj",
31
+ "gate_proj",
32
+ "k_proj"
33
  ],
34
  "task_type": "CAUSAL_LM",
35
  "use_dora": false,
checkpoint-200/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4bfa5ef9bc3fa42ee576cc10f6466b7c840fe42934f5d1b804e768481ba010d7
3
- size 31469800
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c6a35053aa45fdcee5deb77dd71015e46df182a8545535c67b8bf3129f415fe7
3
+ size 45118424
checkpoint-200/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:55316d38116cca602a8876d692b1f6528d558d60d5ad242cd9982a15cddb77ad
3
- size 16088954
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ca69af920d027b19b15395aac650378132e9f6d9186ff2009b5c2c81595338bc
3
+ size 23159290
checkpoint-200/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:97459de4fa732beab73d0b796f2344fdefe2aa83f040df40b862b751977373e1
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:689af50812d14a1adfc0f4d13c6b372e27d2e362edaff3f2f3668be58d77081a
3
  size 1064
checkpoint-200/trainer_state.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.24390243902439024,
5
  "eval_steps": 500,
6
  "global_step": 200,
7
  "is_hyper_param_search": false,
@@ -9,78 +9,78 @@
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
- "epoch": 0.024390243902439025,
13
- "grad_norm": 0.33140748739242554,
14
- "learning_rate": 0.00019631901840490797,
15
- "loss": 2.6356,
16
  "step": 20
17
  },
18
  {
19
- "epoch": 0.04878048780487805,
20
- "grad_norm": 0.26923373341560364,
21
- "learning_rate": 0.0001914110429447853,
22
- "loss": 2.3438,
23
  "step": 40
24
  },
25
  {
26
- "epoch": 0.07317073170731707,
27
- "grad_norm": 0.259231835603714,
28
- "learning_rate": 0.00018650306748466258,
29
- "loss": 2.3363,
30
  "step": 60
31
  },
32
  {
33
- "epoch": 0.0975609756097561,
34
- "grad_norm": 0.2962253987789154,
35
- "learning_rate": 0.00018159509202453987,
36
- "loss": 2.278,
37
  "step": 80
38
  },
39
  {
40
- "epoch": 0.12195121951219512,
41
- "grad_norm": 0.2886357605457306,
42
- "learning_rate": 0.0001766871165644172,
43
- "loss": 2.2543,
44
  "step": 100
45
  },
46
  {
47
- "epoch": 0.14634146341463414,
48
- "grad_norm": 0.40607205033302307,
49
- "learning_rate": 0.0001717791411042945,
50
- "loss": 2.2058,
51
  "step": 120
52
  },
53
  {
54
- "epoch": 0.17073170731707318,
55
- "grad_norm": 0.4145870804786682,
56
- "learning_rate": 0.00016687116564417177,
57
- "loss": 2.3036,
58
  "step": 140
59
  },
60
  {
61
- "epoch": 0.1951219512195122,
62
- "grad_norm": 0.2872335612773895,
63
- "learning_rate": 0.00016196319018404909,
64
- "loss": 2.1944,
65
  "step": 160
66
  },
67
  {
68
- "epoch": 0.21951219512195122,
69
- "grad_norm": 0.4880731403827667,
70
- "learning_rate": 0.0001570552147239264,
71
- "loss": 2.1981,
72
  "step": 180
73
  },
74
  {
75
- "epoch": 0.24390243902439024,
76
- "grad_norm": 0.3285306394100189,
77
- "learning_rate": 0.0001521472392638037,
78
- "loss": 2.2533,
79
  "step": 200
80
  }
81
  ],
82
  "logging_steps": 20,
83
- "max_steps": 820,
84
  "num_input_tokens_seen": 0,
85
  "num_train_epochs": 1,
86
  "save_steps": 200,
@@ -96,7 +96,7 @@
96
  "attributes": {}
97
  }
98
  },
99
- "total_flos": 4.79925999501312e+16,
100
  "train_batch_size": 2,
101
  "trial_name": null,
102
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.21333333333333335,
5
  "eval_steps": 500,
6
  "global_step": 200,
7
  "is_hyper_param_search": false,
 
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
+ "epoch": 0.021333333333333333,
13
+ "grad_norm": 0.46613356471061707,
14
+ "learning_rate": 0.00019678111587982831,
15
+ "loss": 1.2799,
16
  "step": 20
17
  },
18
  {
19
+ "epoch": 0.042666666666666665,
20
+ "grad_norm": 0.3468495309352875,
21
+ "learning_rate": 0.0001924892703862661,
22
+ "loss": 0.8919,
23
  "step": 40
24
  },
25
  {
26
+ "epoch": 0.064,
27
+ "grad_norm": 0.4602198004722595,
28
+ "learning_rate": 0.00018819742489270387,
29
+ "loss": 0.8586,
30
  "step": 60
31
  },
32
  {
33
+ "epoch": 0.08533333333333333,
34
+ "grad_norm": 0.480325311422348,
35
+ "learning_rate": 0.00018390557939914164,
36
+ "loss": 0.7571,
37
  "step": 80
38
  },
39
  {
40
+ "epoch": 0.10666666666666667,
41
+ "grad_norm": 0.30179363489151,
42
+ "learning_rate": 0.00017961373390557942,
43
+ "loss": 0.7793,
44
  "step": 100
45
  },
46
  {
47
+ "epoch": 0.128,
48
+ "grad_norm": 0.3483397364616394,
49
+ "learning_rate": 0.00017532188841201717,
50
+ "loss": 0.7647,
51
  "step": 120
52
  },
53
  {
54
+ "epoch": 0.14933333333333335,
55
+ "grad_norm": 0.29965728521347046,
56
+ "learning_rate": 0.00017103004291845494,
57
+ "loss": 0.6741,
58
  "step": 140
59
  },
60
  {
61
+ "epoch": 0.17066666666666666,
62
+ "grad_norm": 0.26644188165664673,
63
+ "learning_rate": 0.00016673819742489272,
64
+ "loss": 0.7586,
65
  "step": 160
66
  },
67
  {
68
+ "epoch": 0.192,
69
+ "grad_norm": 0.2962466776371002,
70
+ "learning_rate": 0.0001624463519313305,
71
+ "loss": 0.7364,
72
  "step": 180
73
  },
74
  {
75
+ "epoch": 0.21333333333333335,
76
+ "grad_norm": 0.36480244994163513,
77
+ "learning_rate": 0.00015815450643776824,
78
+ "loss": 0.7944,
79
  "step": 200
80
  }
81
  ],
82
  "logging_steps": 20,
83
+ "max_steps": 937,
84
  "num_input_tokens_seen": 0,
85
  "num_train_epochs": 1,
86
  "save_steps": 200,
 
96
  "attributes": {}
97
  }
98
  },
99
+ "total_flos": 4.81593185009664e+16,
100
  "train_batch_size": 2,
101
  "trial_name": null,
102
  "trial_params": null
checkpoint-200/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f7501bc42d09d7753e639dfc1e66ccdf53ccdce2f916fa9d21b9425fc1435629
3
  size 5560
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f519abf615386e0857c941fcb28a9140901798289aceaff057539afc5159bd3d
3
  size 5560