rombodawg commited on
Commit
a32debd
0 Parent(s):

Duplicate from Replete-AI/Replete-Coder-Qwen2-1.5b

Browse files
.gitattributes ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tar filter=lfs diff=lfs merge=lfs -text
29
+ *.tflite filter=lfs diff=lfs merge=lfs -text
30
+ *.tgz filter=lfs diff=lfs merge=lfs -text
31
+ *.wasm filter=lfs diff=lfs merge=lfs -text
32
+ *.xz filter=lfs diff=lfs merge=lfs -text
33
+ *.zip filter=lfs diff=lfs merge=lfs -text
34
+ *.zst filter=lfs diff=lfs merge=lfs -text
35
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
README.md ADDED
@@ -0,0 +1,232 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ license: apache-2.0
3
+ base_model: Qwen/Qwen2-1.5B
4
+ tags:
5
+ - text-generation-inference
6
+ - transformers
7
+ - unsloth
8
+ - qwen2
9
+ datasets:
10
+ - Replete-AI/code_bagel_hermes-2.5
11
+ - Replete-AI/code_bagel
12
+ - Replete-AI/OpenHermes-2.5-Uncensored
13
+ - teknium/OpenHermes-2.5
14
+ - layoric/tiny-codes-alpaca
15
+ - glaiveai/glaive-code-assistant-v3
16
+ - ajibawa-2023/Code-290k-ShareGPT
17
+ - TIGER-Lab/MathInstruct
18
+ - chargoddard/commitpack-ft-instruct-rated
19
+ - iamturun/code_instructions_120k_alpaca
20
+ - ise-uiuc/Magicoder-Evol-Instruct-110K
21
+ - cognitivecomputations/dolphin-coder
22
+ - nickrosh/Evol-Instruct-Code-80k-v1
23
+ - coseal/CodeUltraFeedback_binarized
24
+ - glaiveai/glaive-function-calling-v2
25
+ - CyberNative/Code_Vulnerability_Security_DPO
26
+ - jondurbin/airoboros-2.2
27
+ - camel-ai
28
+ - lmsys/lmsys-chat-1m
29
+ - CollectiveCognition/chats-data-2023-09-22
30
+ - CoT-Alpaca-GPT4
31
+ - WizardLM/WizardLM_evol_instruct_70k
32
+ - WizardLM/WizardLM_evol_instruct_V2_196k
33
+ - teknium/GPT4-LLM-Cleaned
34
+ - GPTeacher
35
+ - OpenGPT
36
+ - meta-math/MetaMathQA
37
+ - Open-Orca/SlimOrca
38
+ - garage-bAInd/Open-Platypus
39
+ - anon8231489123/ShareGPT_Vicuna_unfiltered
40
+ - Unnatural-Instructions-GPT4
41
+ model-index:
42
+ - name: Replete-Coder-llama3-8b
43
+ results:
44
+ - task:
45
+ name: HumanEval
46
+ type: text-generation
47
+ dataset:
48
+ type: openai_humaneval
49
+ name: HumanEval
50
+ metrics:
51
+ - name: pass@1
52
+ type: pass@1
53
+ value: 0.35365853658536583
54
+ verified: True
55
+ - task:
56
+ name: AI2 Reasoning Challenge
57
+ type: text-generation
58
+ dataset:
59
+ name: AI2 Reasoning Challenge (25-Shot)
60
+ type: ai2_arc
61
+ config: ARC-Challenge
62
+ split: test
63
+ args:
64
+ num_few_shot: 25
65
+ metrics:
66
+ - type: accuracy
67
+ value:
68
+ name: normalized accuracy
69
+ source:
70
+ url: https://www.placeholderurl.com
71
+ name: Open LLM Leaderboard
72
+ - task:
73
+ name: Text Generation
74
+ type: text-generation
75
+ dataset:
76
+ name: HellaSwag (10-Shot)
77
+ type: hellaswag
78
+ split: validation
79
+ args:
80
+ num_few_shot: 10
81
+ metrics:
82
+ - type: accuracy
83
+ value:
84
+ name: normalized accuracy
85
+ source:
86
+ url: https://www.placeholderurl.com
87
+ name: Open LLM Leaderboard
88
+ - task:
89
+ name: Text Generation
90
+ type: text-generation
91
+ dataset:
92
+ name: MMLU (5-Shot)
93
+ type: cais/mmlu
94
+ config: all
95
+ split: test
96
+ args:
97
+ num_few_shot: 5
98
+ metrics:
99
+ - type: accuracy
100
+ value:
101
+ name: accuracy
102
+ source:
103
+ url: https://www.placeholderurl.com
104
+ name: Open LLM Leaderboard
105
+ - task:
106
+ name: Text Generation
107
+ type: text-generation
108
+ dataset:
109
+ name: TruthfulQA (0-shot)
110
+ type: truthful_qa
111
+ config: multiple_choice
112
+ split: validation
113
+ args:
114
+ num_few_shot: 0
115
+ metrics:
116
+ - type: multiple_choice_accuracy
117
+ value:
118
+ source:
119
+ url: https://www.placeholderurl.com
120
+ name: Open LLM Leaderboard
121
+ - task:
122
+ name: Text Generation
123
+ type: text-generation
124
+ dataset:
125
+ name: Winogrande (5-shot)
126
+ type: winogrande
127
+ config: winogrande_xl
128
+ split: validation
129
+ args:
130
+ num_few_shot: 5
131
+ metrics:
132
+ - type: accuracy
133
+ value:
134
+ name: accuracy
135
+ source:
136
+ url: https://www.placeholderurl.com
137
+ name: Open LLM Leaderboard
138
+ - task:
139
+ name: Text Generation
140
+ type: text-generation
141
+ dataset:
142
+ name: GSM8k (5-shot)
143
+ type: gsm8k
144
+ config: main
145
+ split: test
146
+ args:
147
+ num_few_shot: 5
148
+ metrics:
149
+ - type: accuracy
150
+ value:
151
+ name: accuracy
152
+ source:
153
+ url: https://www.placeholderurl.com
154
+ name: Open LLM Leaderboard
155
+
156
+ ---
157
+ # Replete-Coder-Qwen2-1.5b
158
+ Finetuned by: Rombodawg
159
+ ### More than just a coding model!
160
+ Although Replete-Coder has amazing coding capabilities, its trained on vaste amount of non-coding data, fully cleaned and uncensored. Dont just use it for coding, use it for all your needs! We are truly trying to make the GPT killer!
161
+ ![image/png](https://cdn-uploads.huggingface.co/production/uploads/642cc1c253e76b4c2286c58e/-0dERC793D9XeFsJ9uHbx.png)
162
+
163
+ Thank you to TensorDock for sponsoring Replete-Coder-llama3-8b and Replete-Coder-Qwen2-1.5b
164
+ you can check out their website for cloud compute rental below.
165
+ - https://tensordock.com
166
+ __________________________________________________________________________________________________
167
+ Replete-Coder-Qwen2-1.5b is a general purpose model that is specially trained in coding in over 100 coding languages. The data used to train the model contains 25% non-code instruction data and 75% coding instruction data totaling up to 3.9 million lines, roughly 1 billion tokens, or 7.27gb of instruct data. The data used to train this model was 100% uncensored, then fully deduplicated, before training happened.
168
+
169
+ The Replete-Coder models (including Replete-Coder-llama3-8b and Replete-Coder-Qwen2-1.5b) feature the following:
170
+
171
+ - Advanced coding capabilities in over 100 coding languages
172
+ - Advanced code translation (between languages)
173
+ - Security and vulnerability prevention related coding capabilities
174
+ - General purpose use
175
+ - Uncensored use
176
+ - Function calling
177
+ - Advanced math use
178
+ - Use on low end (8b) and mobile (1.5b) platforms
179
+
180
+ Notice: Replete-Coder series of models are fine-tuned on a context window of 8192 tokens. Performance past this context window is not guaranteed.
181
+
182
+ ![image/png](https://cdn-uploads.huggingface.co/production/uploads/642cc1c253e76b4c2286c58e/JNGVmzaZC30xrvLI1D7pp.png)
183
+ _________________________________________________________________________________________________
184
+
185
+ You can find the 25% non-coding instruction below:
186
+
187
+ - https://huggingface.co/datasets/Replete-AI/OpenHermes-2.5-Uncensored
188
+
189
+ And the 75% coding specific instruction data below:
190
+
191
+ - https://huggingface.co/datasets/Replete-AI/code_bagel
192
+
193
+ These two datasets were combined to create the final dataset for training, which is linked below:
194
+
195
+ - https://huggingface.co/datasets/Replete-AI/code_bagel_hermes-2.5
196
+ __________________________________________________________________________________________________
197
+ ## Prompt Template: ChatML
198
+ ```
199
+ <|im_start|>system
200
+ {}<|im_end|>
201
+
202
+ <|im_start|>user
203
+ {}<|im_end|>
204
+
205
+ <|im_start|>assistant
206
+ {}
207
+ ```
208
+ Note: The system prompt varies in training data, but the most commonly used one is:
209
+ ```
210
+ Below is an instruction that describes a task, Write a response that appropriately completes the request.
211
+ ```
212
+ End token:
213
+ ```
214
+ <|endoftext|>
215
+ ```
216
+ __________________________________________________________________________________________________
217
+ Thank you to the community for your contributions to the Replete-AI/code_bagel_hermes-2.5 dataset. Without the participation of so many members making their datasets free and open source for any to use, this amazing AI model wouldn't be possible.
218
+
219
+ Extra special thanks to Teknium for the Open-Hermes-2.5 dataset and jondurbin for the bagel dataset and the naming idea for the code_bagel series of datasets. You can find both of their huggingface accounts linked below:
220
+
221
+ - https://huggingface.co/teknium
222
+ - https://huggingface.co/jondurbin
223
+
224
+ Another special thanks to unsloth for being the main method of training for Replete-Coder. Bellow you can find their github, as well as the special Replete-Ai secret sause (Unsloth + Qlora + Galore) colab code document that was used to train this model.
225
+
226
+ - https://github.com/unslothai/unsloth
227
+ - https://colab.research.google.com/drive/1eXGqy5M--0yW4u0uRnmNgBka-tDk2Li0?usp=sharing
228
+ __________________________________________________________________________________________________
229
+
230
+ ## Join the Replete-Ai discord! We are a great and Loving community!
231
+
232
+ - https://discord.gg/ZZbnsmVnjD
added_tokens.json ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ {
2
+ "<|PAD_TOKEN|>": 151646,
3
+ "<|endoftext|>": 151643,
4
+ "<|im_end|>": 151645,
5
+ "<|im_start|>": 151644
6
+ }
config.json ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "rombodawg/Qwen2-1.5b-Reuploaded",
3
+ "architectures": [
4
+ "Qwen2ForCausalLM"
5
+ ],
6
+ "attention_dropout": 0.0,
7
+ "bos_token_id": 151643,
8
+ "eos_token_id": 151643,
9
+ "hidden_act": "silu",
10
+ "hidden_size": 1536,
11
+ "initializer_range": 0.02,
12
+ "intermediate_size": 8960,
13
+ "max_position_embeddings": 131072,
14
+ "max_window_layers": 28,
15
+ "model_type": "qwen2",
16
+ "num_attention_heads": 12,
17
+ "num_hidden_layers": 28,
18
+ "num_key_value_heads": 2,
19
+ "pad_token_id": 151646,
20
+ "rms_norm_eps": 1e-06,
21
+ "rope_theta": 1000000.0,
22
+ "sliding_window": 131072,
23
+ "tie_word_embeddings": true,
24
+ "torch_dtype": "bfloat16",
25
+ "transformers_version": "4.41.2",
26
+ "unsloth_version": "2024.6",
27
+ "use_cache": true,
28
+ "use_sliding_window": false,
29
+ "vocab_size": 151936
30
+ }
generation_config.json ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token_id": 151643,
3
+ "eos_token_id": 151643,
4
+ "max_new_tokens": 2048,
5
+ "transformers_version": "4.41.2"
6
+ }
merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4e9af48980efaa5bf239fdc15bfa05ba3ec5bf005419eaf75e1335887f514c45
3
+ size 3087467144
special_tokens_map.json ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "additional_special_tokens": [
3
+ "<|im_start|>",
4
+ "<|im_end|>"
5
+ ],
6
+ "eos_token": {
7
+ "content": "<|endoftext|>",
8
+ "lstrip": false,
9
+ "normalized": false,
10
+ "rstrip": false,
11
+ "single_word": false
12
+ },
13
+ "pad_token": "<|PAD_TOKEN|>"
14
+ }
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json ADDED
@@ -0,0 +1,52 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_prefix_space": false,
3
+ "added_tokens_decoder": {
4
+ "151643": {
5
+ "content": "<|endoftext|>",
6
+ "lstrip": false,
7
+ "normalized": false,
8
+ "rstrip": false,
9
+ "single_word": false,
10
+ "special": true
11
+ },
12
+ "151644": {
13
+ "content": "<|im_start|>",
14
+ "lstrip": false,
15
+ "normalized": false,
16
+ "rstrip": false,
17
+ "single_word": false,
18
+ "special": true
19
+ },
20
+ "151645": {
21
+ "content": "<|im_end|>",
22
+ "lstrip": false,
23
+ "normalized": false,
24
+ "rstrip": false,
25
+ "single_word": false,
26
+ "special": true
27
+ },
28
+ "151646": {
29
+ "content": "<|PAD_TOKEN|>",
30
+ "lstrip": false,
31
+ "normalized": false,
32
+ "rstrip": false,
33
+ "single_word": false,
34
+ "special": true
35
+ }
36
+ },
37
+ "additional_special_tokens": [
38
+ "<|im_start|>",
39
+ "<|im_end|>"
40
+ ],
41
+ "bos_token": null,
42
+ "chat_template": "{% for message in messages %}{% if loop.first and messages[0]['role'] != 'system' %}{{ '<|im_start|>system\nYou are a helpful assistant<|im_end|>\n' }}{% endif %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}",
43
+ "clean_up_tokenization_spaces": false,
44
+ "eos_token": "<|endoftext|>",
45
+ "errors": "replace",
46
+ "model_max_length": 131072,
47
+ "pad_token": "<|PAD_TOKEN|>",
48
+ "padding_side": "left",
49
+ "split_special_tokens": false,
50
+ "tokenizer_class": "Qwen2Tokenizer",
51
+ "unk_token": null
52
+ }
vocab.json ADDED
The diff for this file is too large to render. See raw diff