flyyufelix commited on
Commit
41d1278
·
verified ·
1 Parent(s): fa86c24

Model save

Browse files
README.md CHANGED
@@ -1,5 +1,5 @@
1
  ---
2
- base_model: Qwen/Qwen2.5-0.5B
3
  library_name: transformers
4
  model_name: Qwen-2.5-7B-Simple-RL
5
  tags:
@@ -11,7 +11,7 @@ licence: license
11
 
12
  # Model Card for Qwen-2.5-7B-Simple-RL
13
 
14
- This model is a fine-tuned version of [Qwen/Qwen2.5-0.5B](https://huggingface.co/Qwen/Qwen2.5-0.5B).
15
  It has been trained using [TRL](https://github.com/huggingface/trl).
16
 
17
  ## Quick start
 
1
  ---
2
+ base_model: Qwen/Qwen2.5-Math-7B
3
  library_name: transformers
4
  model_name: Qwen-2.5-7B-Simple-RL
5
  tags:
 
11
 
12
  # Model Card for Qwen-2.5-7B-Simple-RL
13
 
14
+ This model is a fine-tuned version of [Qwen/Qwen2.5-Math-7B](https://huggingface.co/Qwen/Qwen2.5-Math-7B).
15
  It has been trained using [TRL](https://github.com/huggingface/trl).
16
 
17
  ## Quick start
adapter_config.json ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "alpha_pattern": {},
3
+ "auto_mapping": null,
4
+ "base_model_name_or_path": "Qwen/Qwen2.5-Math-7B",
5
+ "bias": "none",
6
+ "eva_config": null,
7
+ "exclude_modules": null,
8
+ "fan_in_fan_out": false,
9
+ "inference_mode": true,
10
+ "init_lora_weights": true,
11
+ "layer_replication": null,
12
+ "layers_pattern": null,
13
+ "layers_to_transform": null,
14
+ "loftq_config": {},
15
+ "lora_alpha": 16,
16
+ "lora_bias": false,
17
+ "lora_dropout": 0.05,
18
+ "megatron_config": null,
19
+ "megatron_core": "megatron.core",
20
+ "modules_to_save": null,
21
+ "peft_type": "LORA",
22
+ "r": 16,
23
+ "rank_pattern": {},
24
+ "revision": null,
25
+ "target_modules": [
26
+ "up_proj",
27
+ "o_proj",
28
+ "v_proj",
29
+ "gate_proj",
30
+ "k_proj",
31
+ "q_proj",
32
+ "down_proj"
33
+ ],
34
+ "task_type": "CAUSAL_LM",
35
+ "use_dora": false,
36
+ "use_rslora": false
37
+ }
adapter_model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8d1299b48026bfa4882d567b9562ebfc2772388a281c3593858b688dd0446f7c
3
+ size 161533192
all_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "total_flos": 0.0,
3
- "train_loss": -0.002354813553392887,
4
- "train_runtime": 733.4112,
5
  "train_samples": 7500,
6
- "train_samples_per_second": 1.309,
7
- "train_steps_per_second": 0.041
8
  }
 
1
  {
2
  "total_flos": 0.0,
3
+ "train_loss": 0.0008721697144210338,
4
+ "train_runtime": 2002.6709,
5
  "train_samples": 7500,
6
+ "train_samples_per_second": 0.24,
7
+ "train_steps_per_second": 0.015
8
  }
config.json CHANGED
@@ -23,7 +23,7 @@
23
  "tie_word_embeddings": true,
24
  "torch_dtype": "bfloat16",
25
  "transformers_version": "4.49.0",
26
- "use_cache": false,
27
  "use_mrope": false,
28
  "use_sliding_window": false,
29
  "vocab_size": 151936
 
23
  "tie_word_embeddings": true,
24
  "torch_dtype": "bfloat16",
25
  "transformers_version": "4.49.0",
26
+ "use_cache": true,
27
  "use_mrope": false,
28
  "use_sliding_window": false,
29
  "vocab_size": 151936
tokenizer_config.json CHANGED
@@ -195,7 +195,7 @@
195
  "<|video_pad|>"
196
  ],
197
  "bos_token": null,
198
- "chat_template": "{%- if tools %}\n {{- '<|im_start|>system\\n' }}\n {%- if messages[0]['role'] == 'system' %}\n {{- messages[0]['content'] }}\n {%- else %}\n {{- 'You are a helpful assistant.' }}\n {%- endif %}\n {{- \"\\n\\n# Tools\\n\\nYou may call one or more functions to assist with the user query.\\n\\nYou are provided with function signatures within <tools></tools> XML tags:\\n<tools>\" }}\n {%- for tool in tools %}\n {{- \"\\n\" }}\n {{- tool | tojson }}\n {%- endfor %}\n {{- \"\\n</tools>\\n\\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\\n<tool_call>\\n{\\\"name\\\": <function-name>, \\\"arguments\\\": <args-json-object>}\\n</tool_call><|im_end|>\\n\" }}\n{%- else %}\n {%- if messages[0]['role'] == 'system' %}\n {{- '<|im_start|>system\\n' + messages[0]['content'] + '<|im_end|>\\n' }}\n {%- else %}\n {{- '<|im_start|>system\\nYou are a helpful assistant.<|im_end|>\\n' }}\n {%- endif %}\n{%- endif %}\n{%- for message in messages %}\n {%- if (message.role == \"user\") or (message.role == \"system\" and not loop.first) or (message.role == \"assistant\" and not message.tool_calls) %}\n {{- '<|im_start|>' + message.role + '\\n' + message.content + '<|im_end|>' + '\\n' }}\n {%- elif message.role == \"assistant\" %}\n {{- '<|im_start|>' + message.role }}\n {%- if message.content %}\n {{- '\\n' + message.content }}\n {%- endif %}\n {%- for tool_call in message.tool_calls %}\n {%- if tool_call.function is defined %}\n {%- set tool_call = tool_call.function %}\n {%- endif %}\n {{- '\\n<tool_call>\\n{\"name\": \"' }}\n {{- tool_call.name }}\n {{- '\", \"arguments\": ' }}\n {{- tool_call.arguments | tojson }}\n {{- '}\\n</tool_call>' }}\n {%- endfor %}\n {{- '<|im_end|>\\n' }}\n {%- elif message.role == \"tool\" %}\n {%- if (loop.index0 == 0) or (messages[loop.index0 - 1].role != \"tool\") %}\n {{- '<|im_start|>user' }}\n {%- endif %}\n {{- '\\n<tool_response>\\n' }}\n {{- message.content }}\n {{- '\\n</tool_response>' }}\n {%- if loop.last or (messages[loop.index0 + 1].role != \"tool\") %}\n {{- '<|im_end|>\\n' }}\n {%- endif %}\n {%- endif %}\n{%- endfor %}\n{%- if add_generation_prompt %}\n {{- '<|im_start|>assistant\\n' }}\n{%- endif %}\n",
199
  "clean_up_tokenization_spaces": false,
200
  "eos_token": "<|endoftext|>",
201
  "errors": "replace",
 
195
  "<|video_pad|>"
196
  ],
197
  "bos_token": null,
198
+ "chat_template": "{%- if tools %}\n {{- '<|im_start|>system\\n' }}\n {%- if messages[0]['role'] == 'system' %}\n {{- messages[0]['content'] }}\n {%- else %}\n {{- 'Please reason step by step, and put your final answer within \\\\boxed{}.' }}\n {%- endif %}\n {{- \"\\n\\n# Tools\\n\\nYou may call one or more functions to assist with the user query.\\n\\nYou are provided with function signatures within <tools></tools> XML tags:\\n<tools>\" }}\n {%- for tool in tools %}\n {{- \"\\n\" }}\n {{- tool | tojson }}\n {%- endfor %}\n {{- \"\\n</tools>\\n\\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\\n<tool_call>\\n{\\\"name\\\": <function-name>, \\\"arguments\\\": <args-json-object>}\\n</tool_call><|im_end|>\\n\" }}\n{%- else %}\n {%- if messages[0]['role'] == 'system' %}\n {{- '<|im_start|>system\\n' + messages[0]['content'] + '<|im_end|>\\n' }}\n {%- else %}\n {{- '<|im_start|>system\\nPlease reason step by step, and put your final answer within \\\\boxed{}.<|im_end|>\\n' }}\n {%- endif %}\n{%- endif %}\n{%- for message in messages %}\n {%- if (message.role == \"user\") or (message.role == \"system\" and not loop.first) or (message.role == \"assistant\" and not message.tool_calls) %}\n {{- '<|im_start|>' + message.role + '\\n' + message.content + '<|im_end|>' + '\\n' }}\n {%- elif message.role == \"assistant\" %}\n {{- '<|im_start|>' + message.role }}\n {%- if message.content %}\n {{- '\\n' + message.content }}\n {%- endif %}\n {%- for tool_call in message.tool_calls %}\n {%- if tool_call.function is defined %}\n {%- set tool_call = tool_call.function %}\n {%- endif %}\n {{- '\\n<tool_call>\\n{\"name\": \"' }}\n {{- tool_call.name }}\n {{- '\", \"arguments\": ' }}\n {{- tool_call.arguments | tojson }}\n {{- '}\\n</tool_call>' }}\n {%- endfor %}\n {{- '<|im_end|>\\n' }}\n {%- elif message.role == \"tool\" %}\n {%- if (loop.index0 == 0) or (messages[loop.index0 - 1].role != \"tool\") %}\n {{- '<|im_start|>user' }}\n {%- endif %}\n {{- '\\n<tool_response>\\n' }}\n {{- message.content }}\n {{- '\\n</tool_response>' }}\n {%- if loop.last or (messages[loop.index0 + 1].role != \"tool\") %}\n {{- '<|im_end|>\\n' }}\n {%- endif %}\n {%- endif %}\n{%- endfor %}\n{%- if add_generation_prompt %}\n {{- '<|im_start|>assistant\\n' }}\n{%- endif %}\n",
199
  "clean_up_tokenization_spaces": false,
200
  "eos_token": "<|endoftext|>",
201
  "errors": "replace",
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "total_flos": 0.0,
3
- "train_loss": -0.002354813553392887,
4
- "train_runtime": 733.4112,
5
  "train_samples": 7500,
6
- "train_samples_per_second": 1.309,
7
- "train_steps_per_second": 0.041
8
  }
 
1
  {
2
  "total_flos": 0.0,
3
+ "train_loss": 0.0008721697144210338,
4
+ "train_runtime": 2002.6709,
5
  "train_samples": 7500,
6
+ "train_samples_per_second": 0.24,
7
+ "train_steps_per_second": 0.015
8
  }
trainer_state.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.064,
5
  "eval_steps": 100,
6
  "global_step": 30,
7
  "is_hyper_param_search": false,
@@ -10,110 +10,110 @@
10
  "log_history": [
11
  {
12
  "clip_ratio": 0.0,
13
- "completion_length": 576.28125,
14
- "epoch": 0.0021333333333333334,
15
- "grad_norm": 0.20850299298763275,
16
  "kl": 0.0,
17
  "learning_rate": 1e-06,
18
- "loss": -0.0284,
19
- "reward": 0.03125,
20
- "reward_std": 0.04419417306780815,
21
- "rewards/accuracy_reward": 0.03125,
22
  "rewards/format_reward": 0.0,
23
  "step": 1
24
  },
25
  {
26
  "clip_ratio": 0.0,
27
- "completion_length": 571.2578125,
28
- "epoch": 0.010666666666666666,
29
- "grad_norm": 0.19187743961811066,
30
- "kl": 0.0002953857183456421,
31
  "learning_rate": 2.959567305869736e-06,
32
- "loss": -0.0078,
33
- "reward": 0.0703125,
34
- "reward_std": 0.05524271633476019,
35
- "rewards/accuracy_reward": 0.0703125,
36
  "rewards/format_reward": 0.0,
37
  "step": 5
38
  },
39
  {
40
  "clip_ratio": 0.0,
41
- "completion_length": 595.875,
42
- "epoch": 0.021333333333333333,
43
- "grad_norm": 0.1668393611907959,
44
- "kl": 0.0013158321380615234,
45
  "learning_rate": 2.529362456803101e-06,
46
- "loss": -0.0104,
47
- "reward": 0.1,
48
- "reward_std": 0.07071067690849304,
49
- "rewards/accuracy_reward": 0.1,
50
  "rewards/format_reward": 0.0,
51
  "step": 10
52
  },
53
  {
54
  "clip_ratio": 0.0,
55
- "completion_length": 580.5,
56
- "epoch": 0.032,
57
- "grad_norm": 0.5971764326095581,
58
- "kl": 0.005566215515136719,
59
  "learning_rate": 1.7604722665003958e-06,
60
- "loss": 0.0111,
61
- "reward": 0.1,
62
- "reward_std": 0.10606601536273956,
63
- "rewards/accuracy_reward": 0.1,
64
  "rewards/format_reward": 0.0,
65
  "step": 15
66
  },
67
  {
68
  "clip_ratio": 0.0,
69
- "completion_length": 589.3125,
70
- "epoch": 0.042666666666666665,
71
- "grad_norm": 0.2944951355457306,
72
- "kl": 0.00770721435546875,
73
  "learning_rate": 9.058803509412648e-07,
74
- "loss": 0.0027,
75
- "reward": 0.0875,
76
- "reward_std": 0.07071067690849304,
77
- "rewards/accuracy_reward": 0.0875,
78
  "rewards/format_reward": 0.0,
79
  "step": 20
80
  },
81
  {
82
  "clip_ratio": 0.0,
83
- "completion_length": 637.46875,
84
- "epoch": 0.05333333333333334,
85
- "grad_norm": 0.3306269943714142,
86
- "kl": 0.005007171630859375,
87
  "learning_rate": 2.467682828805956e-07,
88
- "loss": -0.0159,
89
- "reward": 0.125,
90
- "reward_std": 0.10606601536273956,
91
- "rewards/accuracy_reward": 0.125,
92
  "rewards/format_reward": 0.0,
93
  "step": 25
94
  },
95
  {
96
  "clip_ratio": 0.0,
97
- "completion_length": 585.0625,
98
- "epoch": 0.064,
99
- "grad_norm": 0.4771800935268402,
100
- "kl": 0.005538177490234375,
101
  "learning_rate": 0.0,
102
- "loss": 0.0103,
103
- "reward": 0.1,
104
  "reward_std": 0.05303300768136978,
105
- "rewards/accuracy_reward": 0.1,
106
  "rewards/format_reward": 0.0,
107
  "step": 30
108
  },
109
  {
110
- "epoch": 0.064,
111
  "step": 30,
112
  "total_flos": 0.0,
113
- "train_loss": -0.002354813553392887,
114
- "train_runtime": 733.4112,
115
- "train_samples_per_second": 1.309,
116
- "train_steps_per_second": 0.041
117
  }
118
  ],
119
  "logging_steps": 5,
@@ -134,7 +134,7 @@
134
  }
135
  },
136
  "total_flos": 0.0,
137
- "train_batch_size": 4,
138
  "trial_name": null,
139
  "trial_params": null
140
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.032,
5
  "eval_steps": 100,
6
  "global_step": 30,
7
  "is_hyper_param_search": false,
 
10
  "log_history": [
11
  {
12
  "clip_ratio": 0.0,
13
+ "completion_length": 246.8125,
14
+ "epoch": 0.0010666666666666667,
15
+ "grad_norm": 0.0,
16
  "kl": 0.0,
17
  "learning_rate": 1e-06,
18
+ "loss": 0.0,
19
+ "reward": 0.0,
20
+ "reward_std": 0.0,
21
+ "rewards/accuracy_reward": 0.0,
22
  "rewards/format_reward": 0.0,
23
  "step": 1
24
  },
25
  {
26
  "clip_ratio": 0.0,
27
+ "completion_length": 228.984375,
28
+ "epoch": 0.005333333333333333,
29
+ "grad_norm": 0.0002897010708693415,
30
+ "kl": 0.0002434550688121817,
31
  "learning_rate": 2.959567305869736e-06,
32
+ "loss": -0.0114,
33
+ "reward": 0.0625,
34
+ "reward_std": 0.0883883461356163,
35
+ "rewards/accuracy_reward": 0.0625,
36
  "rewards/format_reward": 0.0,
37
  "step": 5
38
  },
39
  {
40
  "clip_ratio": 0.0,
41
+ "completion_length": 234.275,
42
+ "epoch": 0.010666666666666666,
43
+ "grad_norm": 0.012982388027012348,
44
+ "kl": 0.00034311858398723417,
45
  "learning_rate": 2.529362456803101e-06,
46
+ "loss": -0.0046,
47
+ "reward": 0.0625,
48
+ "reward_std": 0.01767766922712326,
49
+ "rewards/accuracy_reward": 0.0625,
50
  "rewards/format_reward": 0.0,
51
  "step": 10
52
  },
53
  {
54
  "clip_ratio": 0.0,
55
+ "completion_length": 239.0,
56
+ "epoch": 0.016,
57
+ "grad_norm": 0.00560641847550869,
58
+ "kl": 0.000386229114519665,
59
  "learning_rate": 1.7604722665003958e-06,
60
+ "loss": 0.0031,
61
+ "reward": 0.1375,
62
+ "reward_std": 0.12374368458986282,
63
+ "rewards/accuracy_reward": 0.1375,
64
  "rewards/format_reward": 0.0,
65
  "step": 15
66
  },
67
  {
68
  "clip_ratio": 0.0,
69
+ "completion_length": 244.1125,
70
+ "epoch": 0.021333333333333333,
71
+ "grad_norm": 0.0002242429181933403,
72
+ "kl": 0.00041347902733832596,
73
  "learning_rate": 9.058803509412648e-07,
74
+ "loss": 0.0022,
75
+ "reward": 0.05,
76
+ "reward_std": 0.03535533845424652,
77
+ "rewards/accuracy_reward": 0.05,
78
  "rewards/format_reward": 0.0,
79
  "step": 20
80
  },
81
  {
82
  "clip_ratio": 0.0,
83
+ "completion_length": 242.75,
84
+ "epoch": 0.02666666666666667,
85
+ "grad_norm": 0.016108760610222816,
86
+ "kl": 0.0003348791698954301,
87
  "learning_rate": 2.467682828805956e-07,
88
+ "loss": 0.0156,
89
+ "reward": 0.1375,
90
+ "reward_std": 0.0883883461356163,
91
+ "rewards/accuracy_reward": 0.1375,
92
  "rewards/format_reward": 0.0,
93
  "step": 25
94
  },
95
  {
96
  "clip_ratio": 0.0,
97
+ "completion_length": 234.625,
98
+ "epoch": 0.032,
99
+ "grad_norm": 0.0001786644133972004,
100
+ "kl": 0.0004210583254462108,
101
  "learning_rate": 0.0,
102
+ "loss": -0.0019,
103
+ "reward": 0.0625,
104
  "reward_std": 0.05303300768136978,
105
+ "rewards/accuracy_reward": 0.0625,
106
  "rewards/format_reward": 0.0,
107
  "step": 30
108
  },
109
  {
110
+ "epoch": 0.032,
111
  "step": 30,
112
  "total_flos": 0.0,
113
+ "train_loss": 0.0008721697144210338,
114
+ "train_runtime": 2002.6709,
115
+ "train_samples_per_second": 0.24,
116
+ "train_steps_per_second": 0.015
117
  }
118
  ],
119
  "logging_steps": 5,
 
134
  }
135
  },
136
  "total_flos": 0.0,
137
+ "train_batch_size": 2,
138
  "trial_name": null,
139
  "trial_params": null
140
  }
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b54993a40d108f108aee06b404faf7282e71ad6feada81ab7f888a425e9051c6
3
- size 7928
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7ce212c5204dd486d663714791f972520601089fe064d65c6f7d1b24d7d5cb22
3
+ size 6520