robertou2 commited on
Commit
bf507f7
·
verified ·
1 Parent(s): 1051b75

Upload folder using huggingface_hub

Browse files
adapter_config.json CHANGED
@@ -20,10 +20,10 @@
20
  "rank_pattern": {},
21
  "revision": null,
22
  "target_modules": [
23
- "down_proj",
24
- "qkv_proj",
25
  "o_proj",
26
- "gate_up_proj"
 
 
27
  ],
28
  "task_type": "CAUSAL_LM",
29
  "use_dora": false,
 
20
  "rank_pattern": {},
21
  "revision": null,
22
  "target_modules": [
 
 
23
  "o_proj",
24
+ "gate_up_proj",
25
+ "qkv_proj",
26
+ "down_proj"
27
  ],
28
  "task_type": "CAUSAL_LM",
29
  "use_dora": false,
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f038e0934a3168a134372d76747c35963d810535ac44238c96f526ba5d73444d
3
  size 50365768
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:09dd6cf05506f7922ad70f74fe6934c8336e697e67448b49f05a9b4a7a6566de
3
  size 50365768
all_results.json CHANGED
@@ -1,13 +1,12 @@
1
  {
2
- "epoch": 22.235294117647058,
3
- "eval_loss": 0.5779597759246826,
4
- "eval_runtime": 3.3667,
5
- "eval_samples": 15,
6
- "eval_samples_per_second": 4.455,
7
- "eval_steps_per_second": 0.594,
8
- "total_flos": 1.040122623832658e+17,
9
- "train_loss": 0.13381972634699196,
10
- "train_runtime": 1987.8506,
11
- "train_samples_per_second": 1.61,
12
- "train_steps_per_second": 0.101
13
  }
 
1
  {
2
+ "epoch": 1.8235294117647058,
3
+ "eval_loss": 0.7067741751670837,
4
+ "eval_runtime": 28.5765,
5
+ "eval_samples_per_second": 0.525,
6
+ "eval_steps_per_second": 0.28,
7
+ "total_flos": 1.1336724214972416e+16,
8
+ "train_loss": 0.8289451897144318,
9
+ "train_runtime": 1583.6827,
10
+ "train_samples_per_second": 0.17,
11
+ "train_steps_per_second": 0.01
 
12
  }
eval_results.json CHANGED
@@ -1,8 +1,7 @@
1
  {
2
- "epoch": 22.235294117647058,
3
- "eval_loss": 0.5779597759246826,
4
- "eval_runtime": 3.3667,
5
- "eval_samples": 15,
6
- "eval_samples_per_second": 4.455,
7
- "eval_steps_per_second": 0.594
8
  }
 
1
  {
2
+ "epoch": 1.8235294117647058,
3
+ "eval_loss": 0.7067741751670837,
4
+ "eval_runtime": 28.5765,
5
+ "eval_samples_per_second": 0.525,
6
+ "eval_steps_per_second": 0.28
 
7
  }
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "epoch": 22.235294117647058,
3
- "total_flos": 1.040122623832658e+17,
4
- "train_loss": 0.13381972634699196,
5
- "train_runtime": 1987.8506,
6
- "train_samples_per_second": 1.61,
7
- "train_steps_per_second": 0.101
8
  }
 
1
  {
2
+ "epoch": 1.8235294117647058,
3
+ "total_flos": 1.1336724214972416e+16,
4
+ "train_loss": 0.8289451897144318,
5
+ "train_runtime": 1583.6827,
6
+ "train_samples_per_second": 0.17,
7
+ "train_steps_per_second": 0.01
8
  }
trainer_state.json CHANGED
@@ -1,15 +1,48 @@
1
  {
2
- "best_metric": null,
3
- "best_model_checkpoint": null,
4
- "epoch": 1.9481481481481482,
5
  "eval_steps": 500,
6
- "global_step": 66,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
- "log_history": [],
11
- "logging_steps": 100,
12
- "max_steps": 66,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
13
  "num_input_tokens_seen": 0,
14
  "num_train_epochs": 2,
15
  "save_steps": 500,
@@ -25,8 +58,8 @@
25
  "attributes": {}
26
  }
27
  },
28
- "total_flos": 1.2071086917156864e+16,
29
- "train_batch_size": 1,
30
  "trial_name": null,
31
  "trial_params": null
32
  }
 
1
  {
2
+ "best_metric": 0.7067741751670837,
3
+ "best_model_checkpoint": "./phi3_finetuned/checkpoint-16",
4
+ "epoch": 1.8235294117647058,
5
  "eval_steps": 500,
6
+ "global_step": 16,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 1.0,
13
+ "eval_loss": 0.7619180083274841,
14
+ "eval_runtime": 28.359,
15
+ "eval_samples_per_second": 0.529,
16
+ "eval_steps_per_second": 0.282,
17
+ "step": 9
18
+ },
19
+ {
20
+ "epoch": 1.1176470588235294,
21
+ "grad_norm": 0.53853839635849,
22
+ "learning_rate": 0.000225,
23
+ "loss": 0.8601,
24
+ "step": 10
25
+ },
26
+ {
27
+ "epoch": 1.8235294117647058,
28
+ "eval_loss": 0.7067741751670837,
29
+ "eval_runtime": 28.3045,
30
+ "eval_samples_per_second": 0.53,
31
+ "eval_steps_per_second": 0.283,
32
+ "step": 16
33
+ },
34
+ {
35
+ "epoch": 1.8235294117647058,
36
+ "step": 16,
37
+ "total_flos": 1.1336724214972416e+16,
38
+ "train_loss": 0.8289451897144318,
39
+ "train_runtime": 1583.6827,
40
+ "train_samples_per_second": 0.17,
41
+ "train_steps_per_second": 0.01
42
+ }
43
+ ],
44
+ "logging_steps": 10,
45
+ "max_steps": 16,
46
  "num_input_tokens_seen": 0,
47
  "num_train_epochs": 2,
48
  "save_steps": 500,
 
58
  "attributes": {}
59
  }
60
  },
61
+ "total_flos": 1.1336724214972416e+16,
62
+ "train_batch_size": 2,
63
  "trial_name": null,
64
  "trial_params": null
65
  }
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1cd9f76f1c27b8546e1ede6a4353769298e7cfd4ca0d040b456617a994550ac6
3
  size 5304
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:08d02580fa61a9f76991a83deddc72fb2290b7a3aa24cadbb04f537cefeeef71
3
  size 5304