TheBloke commited on
Commit
c208d89
·
1 Parent(s): 029baf4

GPTQ model commit

Browse files
TokenBender_gradio_evolvedSeeker_inference.ipynb ADDED
@@ -0,0 +1,171 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "execution_count": null,
6
+ "id": "e0f555c6-4f5d-4f2d-93ab-8106d2c470dc",
7
+ "metadata": {
8
+ "jupyter": {
9
+ "source_hidden": true
10
+ },
11
+ "id": "e0f555c6-4f5d-4f2d-93ab-8106d2c470dc"
12
+ },
13
+ "outputs": [],
14
+ "source": [
15
+ "!pip install -q accelerate sentencepiece torch transformers"
16
+ ]
17
+ },
18
+ {
19
+ "cell_type": "code",
20
+ "execution_count": null,
21
+ "id": "1mncI66sFR9a",
22
+ "metadata": {
23
+ "id": "1mncI66sFR9a",
24
+ "jupyter": {
25
+ "source_hidden": true
26
+ }
27
+ },
28
+ "outputs": [],
29
+ "source": [
30
+ "!pip install -q --upgrade gradio"
31
+ ]
32
+ },
33
+ {
34
+ "cell_type": "markdown",
35
+ "source": [
36
+ "### Inference with Gradio but no streaming"
37
+ ],
38
+ "metadata": {
39
+ "id": "0q800RsXd6Nj"
40
+ },
41
+ "id": "0q800RsXd6Nj"
42
+ },
43
+ {
44
+ "cell_type": "code",
45
+ "execution_count": null,
46
+ "id": "26153855-215a-4289-b4ed-a1cb935ebe69",
47
+ "metadata": {
48
+ "jupyter": {
49
+ "source_hidden": true
50
+ },
51
+ "scrolled": true,
52
+ "id": "26153855-215a-4289-b4ed-a1cb935ebe69"
53
+ },
54
+ "outputs": [],
55
+ "source": [
56
+ "import gradio as gr\n",
57
+ "import torch\n",
58
+ "from transformers import AutoModelForCausalLM, AutoTokenizer\n",
59
+ "\n",
60
+ "base_model = \"TokenBender/evolvedSeeker_1_3\"\n",
61
+ "tokenizer = AutoTokenizer.from_pretrained(base_model)\n",
62
+ "model = AutoModelForCausalLM.from_pretrained(base_model, torch_dtype=torch.float16)\n",
63
+ "model.config.use_cache = True\n",
64
+ "model = model.to('cuda:0')\n",
65
+ "\n",
66
+ "def predict(message, history):\n",
67
+ " history_transformed = [{'role': 'system', 'content': \"You are a helpful coding assistant, provide code based on the given query in context.\\n\"}]\n",
68
+ " for msg in history:\n",
69
+ " history_transformed.append({'role': 'user', 'content': msg[0]})\n",
70
+ " history_transformed.append({'role': 'assistant', 'content': msg[1]})\n",
71
+ "\n",
72
+ " history_transformed.append({'role': 'user', 'content': message})\n",
73
+ "\n",
74
+ " inputs = tokenizer.apply_chat_template(history_transformed, return_tensors=\"pt\").to(model.device)\n",
75
+ " outputs = model.generate(inputs, max_new_tokens=512, do_sample=False, top_k=10, top_p=0.95, num_return_sequences=1, eos_token_id=32021)\n",
76
+ " response = tokenizer.decode(outputs[0][len(inputs[0]):], skip_special_tokens=True)\n",
77
+ " yield response\n",
78
+ "\n",
79
+ "gr.ChatInterface(predict).queue().launch(share=True)\n"
80
+ ]
81
+ },
82
+ {
83
+ "cell_type": "markdown",
84
+ "source": [
85
+ "### Inference without gradio"
86
+ ],
87
+ "metadata": {
88
+ "id": "0gpUWgWtdhOi"
89
+ },
90
+ "id": "0gpUWgWtdhOi"
91
+ },
92
+ {
93
+ "cell_type": "code",
94
+ "execution_count": null,
95
+ "id": "7f5f98f1-430e-45a0-b4b3-6a3340b5efcf",
96
+ "metadata": {
97
+ "id": "7f5f98f1-430e-45a0-b4b3-6a3340b5efcf"
98
+ },
99
+ "outputs": [],
100
+ "source": [
101
+ "from transformers import AutoTokenizer, AutoModelForCausalLM\n",
102
+ "tokenizer = AutoTokenizer.from_pretrained(\"TokenBender/evolvedSeeker_1_3\", trust_remote_code=True)\n",
103
+ "model = AutoModelForCausalLM.from_pretrained(\"TokenBender/evolvedSeeker_1_3\", trust_remote_code=True).cuda()\n",
104
+ "messages=[\n",
105
+ " {'role': 'system', 'content': \"You are EvolvedSeeker, a model fine-tuned by TokenBender for coding assistant role. Help the user in a friendly, curious manner.\"},\n",
106
+ " { 'role': 'user', 'content': \"Hi, who are you?.\"}\n",
107
+ "]\n",
108
+ "inputs = tokenizer.apply_chat_template(messages, return_tensors=\"pt\").to(model.device)\n",
109
+ "# 32021 is the id of <|EOT|> token\n",
110
+ "outputs = model.generate(inputs, max_new_tokens=512, do_sample=False, top_k=10, top_p=0.95, num_return_sequences=1, eos_token_id=32021)\n",
111
+ "print(tokenizer.decode(outputs[0][len(inputs[0]):], skip_special_tokens=True))"
112
+ ]
113
+ },
114
+ {
115
+ "cell_type": "markdown",
116
+ "source": [
117
+ "### Chat further"
118
+ ],
119
+ "metadata": {
120
+ "id": "TsFjwbtadqsJ"
121
+ },
122
+ "id": "TsFjwbtadqsJ"
123
+ },
124
+ {
125
+ "cell_type": "code",
126
+ "execution_count": null,
127
+ "id": "a15a4f07-846f-4b89-bdcc-21b7c182e614",
128
+ "metadata": {
129
+ "id": "a15a4f07-846f-4b89-bdcc-21b7c182e614"
130
+ },
131
+ "outputs": [],
132
+ "source": [
133
+ "messages=[\n",
134
+ " {'role': 'system', 'content': \"You are EvolvedSeeker, a model fine-tuned by TokenBender for coding assistant role. Help the user in a friendly, curious manner.\"},\n",
135
+ " { 'role': 'user', 'content': \"Write a python program to create a snake game.\"}\n",
136
+ "]\n",
137
+ "inputs = tokenizer.apply_chat_template(messages, return_tensors=\"pt\").to(model.device)\n",
138
+ "# 32021 is the id of <|EOT|> token\n",
139
+ "outputs = model.generate(inputs, max_new_tokens=2048, do_sample=False, top_k=10, top_p=0.95, num_return_sequences=1, eos_token_id=32021)\n",
140
+ "print(tokenizer.decode(outputs[0][len(inputs[0]):], skip_special_tokens=True))"
141
+ ]
142
+ }
143
+ ],
144
+ "metadata": {
145
+ "accelerator": "GPU",
146
+ "colab": {
147
+ "gpuType": "T4",
148
+ "machine_shape": "hm",
149
+ "provenance": []
150
+ },
151
+ "kernelspec": {
152
+ "display_name": "Python 3 (ipykernel)",
153
+ "language": "python",
154
+ "name": "python3"
155
+ },
156
+ "language_info": {
157
+ "codemirror_mode": {
158
+ "name": "ipython",
159
+ "version": 3
160
+ },
161
+ "file_extension": ".py",
162
+ "mimetype": "text/x-python",
163
+ "name": "python",
164
+ "nbconvert_exporter": "python",
165
+ "pygments_lexer": "ipython3",
166
+ "version": "3.10.13"
167
+ }
168
+ },
169
+ "nbformat": 4,
170
+ "nbformat_minor": 5
171
+ }
config.json ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "/workspace/process/tokenbender_evolvedseeker_1_3/source",
3
+ "architectures": [
4
+ "LlamaForCausalLM"
5
+ ],
6
+ "attention_bias": false,
7
+ "bos_token_id": 32013,
8
+ "eos_token_id": 32021,
9
+ "hidden_act": "silu",
10
+ "hidden_size": 2048,
11
+ "initializer_range": 0.02,
12
+ "intermediate_size": 5504,
13
+ "max_position_embeddings": 16384,
14
+ "model_type": "llama",
15
+ "num_attention_heads": 16,
16
+ "num_hidden_layers": 24,
17
+ "num_key_value_heads": 16,
18
+ "pad_token_id": 0,
19
+ "pretraining_tp": 1,
20
+ "rms_norm_eps": 1e-06,
21
+ "rope_scaling": {
22
+ "factor": 4.0,
23
+ "type": "linear"
24
+ },
25
+ "rope_theta": 100000,
26
+ "tie_word_embeddings": false,
27
+ "torch_dtype": "float16",
28
+ "transformers_version": "4.35.2",
29
+ "use_cache": true,
30
+ "vocab_size": 32256,
31
+ "quantization_config": {
32
+ "bits": 4,
33
+ "group_size": 128,
34
+ "damp_percent": 0.1,
35
+ "desc_act": true,
36
+ "sym": true,
37
+ "true_sequential": true,
38
+ "model_name_or_path": null,
39
+ "model_file_base_name": "model",
40
+ "quant_method": "gptq"
41
+ }
42
+ }
generation_config.json ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ {
2
+ "_from_model_config": true,
3
+ "bos_token_id": 32013,
4
+ "eos_token_id": 32021,
5
+ "transformers_version": "4.35.2"
6
+ }
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ab922f6e13bd333ffedff9eeed9acb4f7ce9bbe8158b14aff29d3f24f6a3419a
3
+ size 898108736
quantize_config.json ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bits": 4,
3
+ "group_size": 128,
4
+ "damp_percent": 0.1,
5
+ "desc_act": true,
6
+ "sym": true,
7
+ "true_sequential": true,
8
+ "model_name_or_path": null,
9
+ "model_file_base_name": "model"
10
+ }
special_tokens_map.json ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": {
3
+ "content": "<|begin▁of▁sentence|>",
4
+ "lstrip": false,
5
+ "normalized": true,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "eos_token": {
10
+ "content": "<|EOT|>",
11
+ "lstrip": false,
12
+ "normalized": true,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "pad_token": {
17
+ "content": "<|end▁of▁sentence|>",
18
+ "lstrip": false,
19
+ "normalized": true,
20
+ "rstrip": false,
21
+ "single_word": false
22
+ }
23
+ }
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json ADDED
@@ -0,0 +1,191 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "32000": {
4
+ "content": "õ",
5
+ "lstrip": false,
6
+ "normalized": true,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": false
10
+ },
11
+ "32001": {
12
+ "content": "÷",
13
+ "lstrip": false,
14
+ "normalized": true,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": false
18
+ },
19
+ "32002": {
20
+ "content": "Á",
21
+ "lstrip": false,
22
+ "normalized": true,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": false
26
+ },
27
+ "32003": {
28
+ "content": "ý",
29
+ "lstrip": false,
30
+ "normalized": true,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": false
34
+ },
35
+ "32004": {
36
+ "content": "À",
37
+ "lstrip": false,
38
+ "normalized": true,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": false
42
+ },
43
+ "32005": {
44
+ "content": "ÿ",
45
+ "lstrip": false,
46
+ "normalized": true,
47
+ "rstrip": false,
48
+ "single_word": false,
49
+ "special": false
50
+ },
51
+ "32006": {
52
+ "content": "ø",
53
+ "lstrip": false,
54
+ "normalized": true,
55
+ "rstrip": false,
56
+ "single_word": false,
57
+ "special": false
58
+ },
59
+ "32007": {
60
+ "content": "ú",
61
+ "lstrip": false,
62
+ "normalized": true,
63
+ "rstrip": false,
64
+ "single_word": false,
65
+ "special": false
66
+ },
67
+ "32008": {
68
+ "content": "þ",
69
+ "lstrip": false,
70
+ "normalized": true,
71
+ "rstrip": false,
72
+ "single_word": false,
73
+ "special": false
74
+ },
75
+ "32009": {
76
+ "content": "ü",
77
+ "lstrip": false,
78
+ "normalized": true,
79
+ "rstrip": false,
80
+ "single_word": false,
81
+ "special": false
82
+ },
83
+ "32010": {
84
+ "content": "ù",
85
+ "lstrip": false,
86
+ "normalized": true,
87
+ "rstrip": false,
88
+ "single_word": false,
89
+ "special": false
90
+ },
91
+ "32011": {
92
+ "content": "ö",
93
+ "lstrip": false,
94
+ "normalized": true,
95
+ "rstrip": false,
96
+ "single_word": false,
97
+ "special": false
98
+ },
99
+ "32012": {
100
+ "content": "û",
101
+ "lstrip": false,
102
+ "normalized": true,
103
+ "rstrip": false,
104
+ "single_word": false,
105
+ "special": false
106
+ },
107
+ "32013": {
108
+ "content": "<|begin▁of▁sentence|>",
109
+ "lstrip": false,
110
+ "normalized": true,
111
+ "rstrip": false,
112
+ "single_word": false,
113
+ "special": true
114
+ },
115
+ "32014": {
116
+ "content": "<|end▁of▁sentence|>",
117
+ "lstrip": false,
118
+ "normalized": true,
119
+ "rstrip": false,
120
+ "single_word": false,
121
+ "special": true
122
+ },
123
+ "32015": {
124
+ "content": "<|fim▁hole|>",
125
+ "lstrip": false,
126
+ "normalized": true,
127
+ "rstrip": false,
128
+ "single_word": false,
129
+ "special": false
130
+ },
131
+ "32016": {
132
+ "content": "<|fim▁begin|>",
133
+ "lstrip": false,
134
+ "normalized": true,
135
+ "rstrip": false,
136
+ "single_word": false,
137
+ "special": false
138
+ },
139
+ "32017": {
140
+ "content": "<|fim▁end|>",
141
+ "lstrip": false,
142
+ "normalized": true,
143
+ "rstrip": false,
144
+ "single_word": false,
145
+ "special": false
146
+ },
147
+ "32018": {
148
+ "content": "<pad>",
149
+ "lstrip": false,
150
+ "normalized": true,
151
+ "rstrip": false,
152
+ "single_word": false,
153
+ "special": false
154
+ },
155
+ "32019": {
156
+ "content": "<|User|>",
157
+ "lstrip": false,
158
+ "normalized": true,
159
+ "rstrip": false,
160
+ "single_word": false,
161
+ "special": false
162
+ },
163
+ "32020": {
164
+ "content": "<|Assistant|>",
165
+ "lstrip": false,
166
+ "normalized": true,
167
+ "rstrip": false,
168
+ "single_word": false,
169
+ "special": false
170
+ },
171
+ "32021": {
172
+ "content": "<|EOT|>",
173
+ "lstrip": false,
174
+ "normalized": true,
175
+ "rstrip": false,
176
+ "single_word": false,
177
+ "special": true
178
+ }
179
+ },
180
+ "bos_token": "<|begin▁of▁sentence|>",
181
+ "chat_template": "{%- set found_item = false -%}\n{%- for message in messages -%}\n {%- if message['role'] == 'system' -%}\n {%- set found_item = true -%}\n {%- endif -%}\n{%- endfor -%}\n{%- if not found_item -%}\n{{'You are an AI programming assistant, utilizing the Deepseek Coder model, developed by Deepseek Company, and you only answer questions related to computer science. For politically sensitive questions, security and privacy issues, and other non-computer science questions, you will refuse to answer.\\n'}}\n{%- endif %}\n{%- for message in messages %}\n {%- if message['role'] == 'system' %}\n{{ message['content'] }}\n {%- else %}\n {%- if message['role'] == 'user' %}\n{{'### Instruction:\\n' + message['content'] + '\\n'}}\n {%- else %}\n{{'### Response:\\n' + message['content'] + '\\n<|EOT|>\\n'}}\n {%- endif %}\n {%- endif %}\n{%- endfor %}\n{{'### Response:\\n'}}\n",
182
+ "clean_up_tokenization_spaces": false,
183
+ "eos_token": "<|EOT|>",
184
+ "legacy": true,
185
+ "model_max_length": 16384,
186
+ "pad_token": "<|end▁of▁sentence|>",
187
+ "sp_model_kwargs": {},
188
+ "tokenizer_class": "LlamaTokenizer",
189
+ "unk_token": null,
190
+ "use_default_system_prompt": false
191
+ }