FatCat87 commited on
Commit
21e53d9
·
verified ·
1 Parent(s): d1c7778

End of training

Browse files
README.md ADDED
@@ -0,0 +1,151 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ license: other
3
+ library_name: peft
4
+ tags:
5
+ - axolotl
6
+ - generated_from_trainer
7
+ base_model: Qwen/Qwen1.5-0.5B-Chat
8
+ model-index:
9
+ - name: f7f5e398-2eac-48f3-9b11-f398342892a9
10
+ results: []
11
+ ---
12
+
13
+ <!-- This model card has been generated automatically according to the information the Trainer had access to. You
14
+ should probably proofread and complete it, then remove this comment. -->
15
+
16
+ [<img src="https://raw.githubusercontent.com/OpenAccess-AI-Collective/axolotl/main/image/axolotl-badge-web.png" alt="Built with Axolotl" width="200" height="32"/>](https://github.com/OpenAccess-AI-Collective/axolotl)
17
+ <details><summary>See axolotl config</summary>
18
+
19
+ axolotl version: `0.4.1`
20
+ ```yaml
21
+ adapter: lora
22
+ base_model: Qwen/Qwen1.5-0.5B-Chat
23
+ bf16: auto
24
+ datasets:
25
+ - data_files:
26
+ - 54930b455054cf1c_train_data.json
27
+ ds_type: json
28
+ format: custom
29
+ path: 54930b455054cf1c_train_data.json
30
+ type:
31
+ field: null
32
+ field_input: null
33
+ field_instruction: prompt
34
+ field_output: chosen
35
+ field_system: null
36
+ format: null
37
+ no_input_format: null
38
+ system_format: '{system}'
39
+ system_prompt: ''
40
+ debug: null
41
+ deepspeed: null
42
+ early_stopping_patience: null
43
+ eval_max_new_tokens: 128
44
+ eval_sample_packing: false
45
+ eval_table_size: null
46
+ evals_per_epoch: 4
47
+ flash_attention: true
48
+ fp16: null
49
+ fsdp: null
50
+ fsdp_config: null
51
+ gradient_accumulation_steps: 4
52
+ gradient_checkpointing: true
53
+ group_by_length: false
54
+ hub_model_id: FatCat87/f7f5e398-2eac-48f3-9b11-f398342892a9
55
+ learning_rate: 0.0002
56
+ load_in_4bit: false
57
+ load_in_8bit: true
58
+ local_rank: null
59
+ logging_steps: 1
60
+ lora_alpha: 16
61
+ lora_dropout: 0.05
62
+ lora_r: 32
63
+ lora_target_linear: true
64
+ lr_scheduler: cosine
65
+ micro_batch_size: 2
66
+ model_type: AutoModelForCausalLM
67
+ num_epochs: 1
68
+ optimizer: adamw_bnb_8bit
69
+ output_dir: ./outputs/out
70
+ pad_to_sequence_len: true
71
+ resume_from_checkpoint: null
72
+ sample_packing: true
73
+ saves_per_epoch: 1
74
+ seed: 70832
75
+ sequence_len: 4096
76
+ special_tokens: null
77
+ strict: false
78
+ tf32: false
79
+ tokenizer_type: AutoTokenizer
80
+ train_on_inputs: false
81
+ val_set_size: 0.1
82
+ wandb_entity: fatcat87-taopanda
83
+ wandb_log_model: null
84
+ wandb_mode: online
85
+ wandb_name: f7f5e398-2eac-48f3-9b11-f398342892a9
86
+ wandb_project: subnet56
87
+ wandb_runid: f7f5e398-2eac-48f3-9b11-f398342892a9
88
+ wandb_watch: null
89
+ warmup_ratio: 0.05
90
+ weight_decay: 0.0
91
+ xformers_attention: null
92
+
93
+ ```
94
+
95
+ </details><br>
96
+
97
+ [<img src="https://raw.githubusercontent.com/wandb/assets/main/wandb-github-badge-28.svg" alt="Visualize in Weights & Biases" width="200" height="32"/>](https://wandb.ai/fatcat87-taopanda/subnet56/runs/677gd6kv)
98
+ # f7f5e398-2eac-48f3-9b11-f398342892a9
99
+
100
+ This model is a fine-tuned version of [Qwen/Qwen1.5-0.5B-Chat](https://huggingface.co/Qwen/Qwen1.5-0.5B-Chat) on the None dataset.
101
+ It achieves the following results on the evaluation set:
102
+ - Loss: 2.3591
103
+
104
+ ## Model description
105
+
106
+ More information needed
107
+
108
+ ## Intended uses & limitations
109
+
110
+ More information needed
111
+
112
+ ## Training and evaluation data
113
+
114
+ More information needed
115
+
116
+ ## Training procedure
117
+
118
+ ### Training hyperparameters
119
+
120
+ The following hyperparameters were used during training:
121
+ - learning_rate: 0.0002
122
+ - train_batch_size: 2
123
+ - eval_batch_size: 2
124
+ - seed: 70832
125
+ - distributed_type: multi-GPU
126
+ - num_devices: 2
127
+ - gradient_accumulation_steps: 4
128
+ - total_train_batch_size: 16
129
+ - total_eval_batch_size: 4
130
+ - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
131
+ - lr_scheduler_type: cosine
132
+ - lr_scheduler_warmup_steps: 3
133
+ - num_epochs: 1
134
+
135
+ ### Training results
136
+
137
+ | Training Loss | Epoch | Step | Validation Loss |
138
+ |:-------------:|:------:|:----:|:---------------:|
139
+ | 2.8901 | 0.0123 | 1 | 3.0258 |
140
+ | 2.4547 | 0.2585 | 21 | 2.5135 |
141
+ | 2.394 | 0.5169 | 42 | 2.3987 |
142
+ | 2.3121 | 0.7754 | 63 | 2.3591 |
143
+
144
+
145
+ ### Framework versions
146
+
147
+ - PEFT 0.11.1
148
+ - Transformers 4.42.3
149
+ - Pytorch 2.3.0+cu121
150
+ - Datasets 2.19.1
151
+ - Tokenizers 0.19.1
adapter_config.json ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "alpha_pattern": {},
3
+ "auto_mapping": null,
4
+ "base_model_name_or_path": "Qwen/Qwen1.5-0.5B-Chat",
5
+ "bias": "none",
6
+ "fan_in_fan_out": null,
7
+ "inference_mode": true,
8
+ "init_lora_weights": true,
9
+ "layer_replication": null,
10
+ "layers_pattern": null,
11
+ "layers_to_transform": null,
12
+ "loftq_config": {},
13
+ "lora_alpha": 16,
14
+ "lora_dropout": 0.05,
15
+ "megatron_config": null,
16
+ "megatron_core": "megatron.core",
17
+ "modules_to_save": null,
18
+ "peft_type": "LORA",
19
+ "r": 32,
20
+ "rank_pattern": {},
21
+ "revision": null,
22
+ "target_modules": [
23
+ "gate_proj",
24
+ "v_proj",
25
+ "o_proj",
26
+ "k_proj",
27
+ "up_proj",
28
+ "down_proj",
29
+ "q_proj"
30
+ ],
31
+ "task_type": "CAUSAL_LM",
32
+ "use_dora": false,
33
+ "use_rslora": false
34
+ }
adapter_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a7c1cca8954159b90f35bc47a8b2386a319a8070a1b124c3f187c1d56fbcd012
3
+ size 60676170
adapter_model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c459153fac42fe1a6c26375a2c7ae042a1869e04477d19413f9ae3fff144b4af
3
+ size 60599872
added_tokens.json ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ {
2
+ "<|endoftext|>": 151643,
3
+ "<|im_end|>": 151645,
4
+ "<|im_start|>": 151644
5
+ }
config.json ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "Qwen/Qwen1.5-0.5B-Chat",
3
+ "architectures": [
4
+ "Qwen2ForCausalLM"
5
+ ],
6
+ "attention_dropout": 0.0,
7
+ "eos_token_id": 151645,
8
+ "hidden_act": "silu",
9
+ "hidden_size": 1024,
10
+ "initializer_range": 0.02,
11
+ "intermediate_size": 2816,
12
+ "max_position_embeddings": 32768,
13
+ "max_window_layers": 21,
14
+ "model_type": "qwen2",
15
+ "num_attention_heads": 16,
16
+ "num_hidden_layers": 24,
17
+ "num_key_value_heads": 16,
18
+ "quantization_config": {
19
+ "_load_in_4bit": false,
20
+ "_load_in_8bit": true,
21
+ "bnb_4bit_compute_dtype": "float32",
22
+ "bnb_4bit_quant_storage": "uint8",
23
+ "bnb_4bit_quant_type": "fp4",
24
+ "bnb_4bit_use_double_quant": false,
25
+ "llm_int8_enable_fp32_cpu_offload": false,
26
+ "llm_int8_has_fp16_weight": false,
27
+ "llm_int8_skip_modules": null,
28
+ "llm_int8_threshold": 6.0,
29
+ "load_in_4bit": false,
30
+ "load_in_8bit": true,
31
+ "quant_method": "bitsandbytes"
32
+ },
33
+ "rms_norm_eps": 1e-06,
34
+ "rope_theta": 1000000.0,
35
+ "sliding_window": 32768,
36
+ "tie_word_embeddings": true,
37
+ "torch_dtype": "bfloat16",
38
+ "transformers_version": "4.42.3",
39
+ "use_cache": false,
40
+ "use_sliding_window": false,
41
+ "vocab_size": 151936
42
+ }
merged/added_tokens.json ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ {
2
+ "<|endoftext|>": 151643,
3
+ "<|im_end|>": 151645,
4
+ "<|im_start|>": 151644
5
+ }
merged/config.json ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "Qwen/Qwen1.5-0.5B-Chat",
3
+ "architectures": [
4
+ "Qwen2ForCausalLM"
5
+ ],
6
+ "attention_dropout": 0.0,
7
+ "eos_token_id": 151645,
8
+ "hidden_act": "silu",
9
+ "hidden_size": 1024,
10
+ "initializer_range": 0.02,
11
+ "intermediate_size": 2816,
12
+ "max_position_embeddings": 32768,
13
+ "max_window_layers": 21,
14
+ "model_type": "qwen2",
15
+ "num_attention_heads": 16,
16
+ "num_hidden_layers": 24,
17
+ "num_key_value_heads": 16,
18
+ "rms_norm_eps": 1e-06,
19
+ "rope_theta": 1000000.0,
20
+ "sliding_window": 32768,
21
+ "tie_word_embeddings": true,
22
+ "torch_dtype": "bfloat16",
23
+ "transformers_version": "4.42.3",
24
+ "use_cache": false,
25
+ "use_sliding_window": false,
26
+ "vocab_size": 151936
27
+ }
merged/generation_config.json ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token_id": 151643,
3
+ "do_sample": true,
4
+ "eos_token_id": [
5
+ 151645,
6
+ 151643
7
+ ],
8
+ "pad_token_id": 151643,
9
+ "repetition_penalty": 1.1,
10
+ "top_p": 0.8,
11
+ "transformers_version": "4.42.3"
12
+ }
merged/merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
merged/pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c47b186b84389a878ec11fb86b838162bb0297a73ef9a2a1be1ed9b643f14afc
3
+ size 928069010
merged/special_tokens_map.json ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "additional_special_tokens": [
3
+ "<|im_start|>",
4
+ "<|im_end|>"
5
+ ],
6
+ "eos_token": {
7
+ "content": "<|im_end|>",
8
+ "lstrip": false,
9
+ "normalized": false,
10
+ "rstrip": false,
11
+ "single_word": false
12
+ },
13
+ "pad_token": {
14
+ "content": "<|endoftext|>",
15
+ "lstrip": false,
16
+ "normalized": false,
17
+ "rstrip": false,
18
+ "single_word": false
19
+ }
20
+ }
merged/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
merged/tokenizer_config.json ADDED
@@ -0,0 +1,43 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_prefix_space": false,
3
+ "added_tokens_decoder": {
4
+ "151643": {
5
+ "content": "<|endoftext|>",
6
+ "lstrip": false,
7
+ "normalized": false,
8
+ "rstrip": false,
9
+ "single_word": false,
10
+ "special": true
11
+ },
12
+ "151644": {
13
+ "content": "<|im_start|>",
14
+ "lstrip": false,
15
+ "normalized": false,
16
+ "rstrip": false,
17
+ "single_word": false,
18
+ "special": true
19
+ },
20
+ "151645": {
21
+ "content": "<|im_end|>",
22
+ "lstrip": false,
23
+ "normalized": false,
24
+ "rstrip": false,
25
+ "single_word": false,
26
+ "special": true
27
+ }
28
+ },
29
+ "additional_special_tokens": [
30
+ "<|im_start|>",
31
+ "<|im_end|>"
32
+ ],
33
+ "bos_token": null,
34
+ "chat_template": "{% for message in messages %}{% if loop.first and messages[0]['role'] != 'system' %}{{ '<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n' }}{% endif %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}",
35
+ "clean_up_tokenization_spaces": false,
36
+ "eos_token": "<|im_end|>",
37
+ "errors": "replace",
38
+ "model_max_length": 32768,
39
+ "pad_token": "<|endoftext|>",
40
+ "split_special_tokens": false,
41
+ "tokenizer_class": "Qwen2Tokenizer",
42
+ "unk_token": null
43
+ }
merged/vocab.json ADDED
The diff for this file is too large to render. See raw diff
 
merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
special_tokens_map.json ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "additional_special_tokens": [
3
+ "<|im_start|>",
4
+ "<|im_end|>"
5
+ ],
6
+ "eos_token": {
7
+ "content": "<|im_end|>",
8
+ "lstrip": false,
9
+ "normalized": false,
10
+ "rstrip": false,
11
+ "single_word": false
12
+ },
13
+ "pad_token": {
14
+ "content": "<|endoftext|>",
15
+ "lstrip": false,
16
+ "normalized": false,
17
+ "rstrip": false,
18
+ "single_word": false
19
+ }
20
+ }
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:45ccb9c8b6b561889acea59191d66986d314e7cbd6a78abc6e49b139ca91c1e6
3
+ size 500058
tokenizer_config.json ADDED
@@ -0,0 +1,43 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_prefix_space": false,
3
+ "added_tokens_decoder": {
4
+ "151643": {
5
+ "content": "<|endoftext|>",
6
+ "lstrip": false,
7
+ "normalized": false,
8
+ "rstrip": false,
9
+ "single_word": false,
10
+ "special": true
11
+ },
12
+ "151644": {
13
+ "content": "<|im_start|>",
14
+ "lstrip": false,
15
+ "normalized": false,
16
+ "rstrip": false,
17
+ "single_word": false,
18
+ "special": true
19
+ },
20
+ "151645": {
21
+ "content": "<|im_end|>",
22
+ "lstrip": false,
23
+ "normalized": false,
24
+ "rstrip": false,
25
+ "single_word": false,
26
+ "special": true
27
+ }
28
+ },
29
+ "additional_special_tokens": [
30
+ "<|im_start|>",
31
+ "<|im_end|>"
32
+ ],
33
+ "bos_token": null,
34
+ "chat_template": "{% for message in messages %}{% if loop.first and messages[0]['role'] != 'system' %}{{ '<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n' }}{% endif %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}",
35
+ "clean_up_tokenization_spaces": false,
36
+ "eos_token": "<|im_end|>",
37
+ "errors": "replace",
38
+ "model_max_length": 32768,
39
+ "pad_token": "<|endoftext|>",
40
+ "split_special_tokens": false,
41
+ "tokenizer_class": "Qwen2Tokenizer",
42
+ "unk_token": null
43
+ }
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a0345a9c30e1e82937b476c0fc41b9e71455c2bc19f6543d7173932d6884b272
3
+ size 6072
vocab.json ADDED
The diff for this file is too large to render. See raw diff