Muhammad Khalifa commited on
Commit
ea17c2f
1 Parent(s): e5d11d8

update coin flip model

Browse files
low-shot-task-specific-500-ex/coin_flip/best_model/adapter_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:206bdaf3cf034deec6de2394ba6a7b29d0b637ab2d1925332e8f1abb76025dd6
3
  size 104973389
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:52d3183a841c92cbf2569e43d73e61d3182b7acfd37ae4c1925fbcdfb5ae5037
3
  size 104973389
low-shot-task-specific-500-ex/coin_flip/best_model/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d53c7e4694c2cfcf4dd0eb9bfe3b05a7cd7809c6a8e6ff871c4c99d6ddfefaf4
3
  size 209984517
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8b393456323a196b768aebc7f56020f0818e4b80d7ac3a55132cfb4b72dbd0e0
3
  size 209984517
low-shot-task-specific-500-ex/coin_flip/best_model/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3055502c9e3004eb987550db217f6677d695763c959badae25d773f1d985ab91
3
  size 14575
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e8edb68dd6c81386559d1e69774ba987c7891d658260174f825deb48eee022d8
3
  size 14575
low-shot-task-specific-500-ex/coin_flip/best_model/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8aea4ff6d6c72e86d24e872bf7765995d2e2e0abda70fdf4dff06ed25a492666
3
  size 627
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:72784f3df358e4b01284895b23305a7f44b47671c221043eb9da1558ee751bbc
3
  size 627
low-shot-task-specific-500-ex/coin_flip/best_model/trainer_state.json CHANGED
@@ -1,135 +1,107 @@
1
  {
2
- "best_metric": 0.17182409763336182,
3
- "best_model_checkpoint": "checkpoints/instrucode/low-shot-task-specific-500-ex/coin_flip/checkpoint-60",
4
- "epoch": 9.795918367346939,
5
  "eval_steps": 500,
6
- "global_step": 60,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
  "epoch": 0.98,
13
- "eval_loss": 3.2136309146881104,
14
- "eval_runtime": 1.7971,
15
- "eval_samples_per_second": 27.266,
16
- "eval_steps_per_second": 3.895,
17
  "step": 6
18
  },
19
  {
20
  "epoch": 1.63,
21
  "learning_rate": 6.666666666666667e-05,
22
- "loss": 3.5659,
23
  "step": 10
24
  },
25
  {
26
  "epoch": 1.96,
27
- "eval_loss": 1.1381325721740723,
28
- "eval_runtime": 1.8028,
29
- "eval_samples_per_second": 27.18,
30
- "eval_steps_per_second": 3.883,
31
  "step": 12
32
  },
33
  {
34
  "epoch": 2.94,
35
- "eval_loss": 0.39599937200546265,
36
- "eval_runtime": 1.7938,
37
- "eval_samples_per_second": 27.316,
38
- "eval_steps_per_second": 3.902,
39
  "step": 18
40
  },
41
  {
42
  "epoch": 3.27,
43
  "learning_rate": 5.333333333333333e-05,
44
- "loss": 0.8239,
45
  "step": 20
46
  },
47
  {
48
  "epoch": 3.92,
49
- "eval_loss": 0.23788291215896606,
50
- "eval_runtime": 1.8071,
51
- "eval_samples_per_second": 27.115,
52
- "eval_steps_per_second": 3.874,
53
  "step": 24
54
  },
55
  {
56
  "epoch": 4.9,
57
  "learning_rate": 4e-05,
58
- "loss": 0.2375,
59
  "step": 30
60
  },
61
  {
62
  "epoch": 4.9,
63
- "eval_loss": 0.1869448572397232,
64
- "eval_runtime": 1.8046,
65
- "eval_samples_per_second": 27.154,
66
- "eval_steps_per_second": 3.879,
67
  "step": 30
68
  },
69
  {
70
  "epoch": 5.88,
71
- "eval_loss": 0.1762770116329193,
72
- "eval_runtime": 1.7955,
73
- "eval_samples_per_second": 27.291,
74
- "eval_steps_per_second": 3.899,
75
  "step": 36
76
  },
77
  {
78
  "epoch": 6.53,
79
  "learning_rate": 2.6666666666666667e-05,
80
- "loss": 0.1756,
81
  "step": 40
82
  },
83
  {
84
  "epoch": 6.86,
85
- "eval_loss": 0.17334015667438507,
86
- "eval_runtime": 1.7994,
87
- "eval_samples_per_second": 27.231,
88
- "eval_steps_per_second": 3.89,
89
  "step": 42
90
  },
91
  {
92
  "epoch": 8.0,
93
- "eval_loss": 0.17443998157978058,
94
- "eval_runtime": 1.7969,
95
- "eval_samples_per_second": 27.269,
96
- "eval_steps_per_second": 3.896,
97
  "step": 49
98
- },
99
- {
100
- "epoch": 8.16,
101
- "learning_rate": 1.3333333333333333e-05,
102
- "loss": 0.1626,
103
- "step": 50
104
- },
105
- {
106
- "epoch": 8.98,
107
- "eval_loss": 0.17577075958251953,
108
- "eval_runtime": 1.7999,
109
- "eval_samples_per_second": 27.223,
110
- "eval_steps_per_second": 3.889,
111
- "step": 55
112
- },
113
- {
114
- "epoch": 9.8,
115
- "learning_rate": 0.0,
116
- "loss": 0.1535,
117
- "step": 60
118
- },
119
- {
120
- "epoch": 9.8,
121
- "eval_loss": 0.17182409763336182,
122
- "eval_runtime": 1.7992,
123
- "eval_samples_per_second": 27.235,
124
- "eval_steps_per_second": 3.891,
125
- "step": 60
126
  }
127
  ],
128
  "logging_steps": 10,
129
  "max_steps": 60,
130
  "num_train_epochs": 10,
131
  "save_steps": 500,
132
- "total_flos": 7168130697461760.0,
133
  "trial_name": null,
134
  "trial_params": null
135
  }
 
1
  {
2
+ "best_metric": 0.16547438502311707,
3
+ "best_model_checkpoint": "checkpoints/instrucode/low-shot-task-specific-500-ex/coin_flip/checkpoint-49",
4
+ "epoch": 8.0,
5
  "eval_steps": 500,
6
+ "global_step": 49,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
  "epoch": 0.98,
13
+ "eval_loss": 3.1955792903900146,
14
+ "eval_runtime": 1.7606,
15
+ "eval_samples_per_second": 27.832,
16
+ "eval_steps_per_second": 3.976,
17
  "step": 6
18
  },
19
  {
20
  "epoch": 1.63,
21
  "learning_rate": 6.666666666666667e-05,
22
+ "loss": 3.5618,
23
  "step": 10
24
  },
25
  {
26
  "epoch": 1.96,
27
+ "eval_loss": 1.2319456338882446,
28
+ "eval_runtime": 1.775,
29
+ "eval_samples_per_second": 27.605,
30
+ "eval_steps_per_second": 3.944,
31
  "step": 12
32
  },
33
  {
34
  "epoch": 2.94,
35
+ "eval_loss": 0.3878885507583618,
36
+ "eval_runtime": 1.7784,
37
+ "eval_samples_per_second": 27.553,
38
+ "eval_steps_per_second": 3.936,
39
  "step": 18
40
  },
41
  {
42
  "epoch": 3.27,
43
  "learning_rate": 5.333333333333333e-05,
44
+ "loss": 0.8597,
45
  "step": 20
46
  },
47
  {
48
  "epoch": 3.92,
49
+ "eval_loss": 0.21831320226192474,
50
+ "eval_runtime": 1.7786,
51
+ "eval_samples_per_second": 27.549,
52
+ "eval_steps_per_second": 3.936,
53
  "step": 24
54
  },
55
  {
56
  "epoch": 4.9,
57
  "learning_rate": 4e-05,
58
+ "loss": 0.2398,
59
  "step": 30
60
  },
61
  {
62
  "epoch": 4.9,
63
+ "eval_loss": 0.18551723659038544,
64
+ "eval_runtime": 1.7792,
65
+ "eval_samples_per_second": 27.54,
66
+ "eval_steps_per_second": 3.934,
67
  "step": 30
68
  },
69
  {
70
  "epoch": 5.88,
71
+ "eval_loss": 0.1781032681465149,
72
+ "eval_runtime": 1.7761,
73
+ "eval_samples_per_second": 27.588,
74
+ "eval_steps_per_second": 3.941,
75
  "step": 36
76
  },
77
  {
78
  "epoch": 6.53,
79
  "learning_rate": 2.6666666666666667e-05,
80
+ "loss": 0.1729,
81
  "step": 40
82
  },
83
  {
84
  "epoch": 6.86,
85
+ "eval_loss": 0.17477163672447205,
86
+ "eval_runtime": 1.7765,
87
+ "eval_samples_per_second": 27.583,
88
+ "eval_steps_per_second": 3.94,
89
  "step": 42
90
  },
91
  {
92
  "epoch": 8.0,
93
+ "eval_loss": 0.16547438502311707,
94
+ "eval_runtime": 1.7731,
95
+ "eval_samples_per_second": 27.635,
96
+ "eval_steps_per_second": 3.948,
97
  "step": 49
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
98
  }
99
  ],
100
  "logging_steps": 10,
101
  "max_steps": 60,
102
  "num_train_epochs": 10,
103
  "save_steps": 500,
104
+ "total_flos": 5852695881646080.0,
105
  "trial_name": null,
106
  "trial_params": null
107
  }
low-shot-task-specific-500-ex/coin_flip/best_model/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e5173d21d2a38d1cd1cd4daa45bed5a9f6f0d64b0897c6366683a240cd58f864
3
  size 4091
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d6b95880f589434d710796f9f9e970ac87dcf098561bddac80dccc8c12e9aed5
3
  size 4091
low-shot-task-specific-500-ex/coin_flip/checkpoint-60/adapter_config.json ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "base_model_name_or_path": "meta-llama/Llama-2-13b-hf",
3
+ "bias": "none",
4
+ "enable_lora": null,
5
+ "fan_in_fan_out": false,
6
+ "inference_mode": true,
7
+ "init_lora_weights": true,
8
+ "lora_alpha": 16,
9
+ "lora_dropout": 0.05,
10
+ "merge_weights": false,
11
+ "modules_to_save": null,
12
+ "peft_type": "LORA",
13
+ "r": 16,
14
+ "target_modules": [
15
+ "q_proj",
16
+ "k_proj",
17
+ "v_proj",
18
+ "o_proj"
19
+ ],
20
+ "task_type": "CAUSAL_LM"
21
+ }
low-shot-task-specific-500-ex/coin_flip/checkpoint-60/adapter_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dedda326c7c619c6ee1e0863e8333c86f5010e4b0bbba306c5b8dfad1ac7a35d
3
+ size 104973389
low-shot-task-specific-500-ex/coin_flip/checkpoint-60/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a24ada10edf0d000f276cf7a2e17965fadda7af5feae736cd25bd682cbeb4fef
3
+ size 209984517
low-shot-task-specific-500-ex/coin_flip/checkpoint-60/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3055502c9e3004eb987550db217f6677d695763c959badae25d773f1d985ab91
3
+ size 14575
low-shot-task-specific-500-ex/coin_flip/checkpoint-60/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8aea4ff6d6c72e86d24e872bf7765995d2e2e0abda70fdf4dff06ed25a492666
3
+ size 627
low-shot-task-specific-500-ex/coin_flip/checkpoint-60/trainer_state.json ADDED
@@ -0,0 +1,135 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.16547438502311707,
3
+ "best_model_checkpoint": "checkpoints/instrucode/low-shot-task-specific-500-ex/coin_flip/checkpoint-49",
4
+ "epoch": 9.795918367346939,
5
+ "eval_steps": 500,
6
+ "global_step": 60,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.98,
13
+ "eval_loss": 3.1955792903900146,
14
+ "eval_runtime": 1.7606,
15
+ "eval_samples_per_second": 27.832,
16
+ "eval_steps_per_second": 3.976,
17
+ "step": 6
18
+ },
19
+ {
20
+ "epoch": 1.63,
21
+ "learning_rate": 6.666666666666667e-05,
22
+ "loss": 3.5618,
23
+ "step": 10
24
+ },
25
+ {
26
+ "epoch": 1.96,
27
+ "eval_loss": 1.2319456338882446,
28
+ "eval_runtime": 1.775,
29
+ "eval_samples_per_second": 27.605,
30
+ "eval_steps_per_second": 3.944,
31
+ "step": 12
32
+ },
33
+ {
34
+ "epoch": 2.94,
35
+ "eval_loss": 0.3878885507583618,
36
+ "eval_runtime": 1.7784,
37
+ "eval_samples_per_second": 27.553,
38
+ "eval_steps_per_second": 3.936,
39
+ "step": 18
40
+ },
41
+ {
42
+ "epoch": 3.27,
43
+ "learning_rate": 5.333333333333333e-05,
44
+ "loss": 0.8597,
45
+ "step": 20
46
+ },
47
+ {
48
+ "epoch": 3.92,
49
+ "eval_loss": 0.21831320226192474,
50
+ "eval_runtime": 1.7786,
51
+ "eval_samples_per_second": 27.549,
52
+ "eval_steps_per_second": 3.936,
53
+ "step": 24
54
+ },
55
+ {
56
+ "epoch": 4.9,
57
+ "learning_rate": 4e-05,
58
+ "loss": 0.2398,
59
+ "step": 30
60
+ },
61
+ {
62
+ "epoch": 4.9,
63
+ "eval_loss": 0.18551723659038544,
64
+ "eval_runtime": 1.7792,
65
+ "eval_samples_per_second": 27.54,
66
+ "eval_steps_per_second": 3.934,
67
+ "step": 30
68
+ },
69
+ {
70
+ "epoch": 5.88,
71
+ "eval_loss": 0.1781032681465149,
72
+ "eval_runtime": 1.7761,
73
+ "eval_samples_per_second": 27.588,
74
+ "eval_steps_per_second": 3.941,
75
+ "step": 36
76
+ },
77
+ {
78
+ "epoch": 6.53,
79
+ "learning_rate": 2.6666666666666667e-05,
80
+ "loss": 0.1729,
81
+ "step": 40
82
+ },
83
+ {
84
+ "epoch": 6.86,
85
+ "eval_loss": 0.17477163672447205,
86
+ "eval_runtime": 1.7765,
87
+ "eval_samples_per_second": 27.583,
88
+ "eval_steps_per_second": 3.94,
89
+ "step": 42
90
+ },
91
+ {
92
+ "epoch": 8.0,
93
+ "eval_loss": 0.16547438502311707,
94
+ "eval_runtime": 1.7731,
95
+ "eval_samples_per_second": 27.635,
96
+ "eval_steps_per_second": 3.948,
97
+ "step": 49
98
+ },
99
+ {
100
+ "epoch": 8.16,
101
+ "learning_rate": 1.3333333333333333e-05,
102
+ "loss": 0.1579,
103
+ "step": 50
104
+ },
105
+ {
106
+ "epoch": 8.98,
107
+ "eval_loss": 0.18015137314796448,
108
+ "eval_runtime": 1.7751,
109
+ "eval_samples_per_second": 27.604,
110
+ "eval_steps_per_second": 3.943,
111
+ "step": 55
112
+ },
113
+ {
114
+ "epoch": 9.8,
115
+ "learning_rate": 0.0,
116
+ "loss": 0.1442,
117
+ "step": 60
118
+ },
119
+ {
120
+ "epoch": 9.8,
121
+ "eval_loss": 0.1699230819940567,
122
+ "eval_runtime": 1.7773,
123
+ "eval_samples_per_second": 27.57,
124
+ "eval_steps_per_second": 3.939,
125
+ "step": 60
126
+ }
127
+ ],
128
+ "logging_steps": 10,
129
+ "max_steps": 60,
130
+ "num_train_epochs": 10,
131
+ "save_steps": 500,
132
+ "total_flos": 7168130697461760.0,
133
+ "trial_name": null,
134
+ "trial_params": null
135
+ }
low-shot-task-specific-500-ex/coin_flip/checkpoint-60/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d6b95880f589434d710796f9f9e970ac87dcf098561bddac80dccc8c12e9aed5
3
+ size 4091