Pra-tham commited on
Commit
de55b15
·
1 Parent(s): 9997a25

added model path

Browse files
Files changed (3) hide show
  1. app.py +1 -1
  2. backup.py +97 -0
  3. utils.py +313 -0
app.py CHANGED
@@ -35,7 +35,7 @@ from transformers import (
35
  n_repetitions = 1
36
  TOTAL_TOKENS = 2048
37
 
38
- MODEL_PATH = "/kaggle/input/deepseek-math"
39
  #"/kaggle/input/gemma/transformers/7b-it/1"
40
 
41
  # DEEP = True
 
35
  n_repetitions = 1
36
  TOTAL_TOKENS = 2048
37
 
38
+ MODEL_PATH = "Pra-tham/quant_deepseekmath"
39
  #"/kaggle/input/gemma/transformers/7b-it/1"
40
 
41
  # DEEP = True
backup.py ADDED
@@ -0,0 +1,97 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ # from huggingface_hub import InferenceClient
3
+
4
+ """
5
+ For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
6
+ """
7
+ # client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
8
+ from transformers import AutoModelForCausalLM, AutoTokenizer, AutoConfig, set_seed
9
+ # from accelerate import infer_auto_device_map as iadm
10
+
11
+ import torch
12
+ from transformers import AutoTokenizer, AutoModelForCausalLM, GenerationConfig
13
+
14
+ model_name = "deepseek-ai/deepseek-math-7b-instruct"
15
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
16
+ model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=torch.bfloat16, device_map="auto")
17
+ model.generation_config = GenerationConfig.from_pretrained(model_name)
18
+ model.generation_config.pad_token_id = model.generation_config.eos_token_id
19
+
20
+
21
+
22
+
23
+ def evaluate_response(problem):
24
+ # problem=b'what is angle x if angle y is 60 degree and angle z in 60 degree of a traingle'
25
+ problem=problem+'\nPlease reason step by step, and put your final answer within \\boxed{}.'
26
+ messages = [
27
+ {"role": "user", "content": problem}
28
+ ]
29
+ input_tensor = tokenizer.apply_chat_template(messages, add_generation_prompt=True, return_tensors="pt")
30
+ outputs = model.generate(input_tensor.to(model.device), max_new_tokens=100)
31
+
32
+ result = tokenizer.decode(outputs[0][input_tensor.shape[1]:], skip_special_tokens=True)
33
+ # result_output, code_output = process_output(raw_output)
34
+ return result
35
+
36
+ # def respond(
37
+ # evaluate_response,
38
+ # history: list[tuple[str, str]],
39
+ # system_message,
40
+ # max_tokens,
41
+ # temperature,
42
+ # top_p,
43
+ # ):
44
+ # messages = [{"role": "system", "content": system_message}]
45
+
46
+ # for val in history:
47
+ # if val[0]:
48
+ # messages.append({"role": "user", "content": val[0]})
49
+ # if val[1]:
50
+ # messages.append({"role": "assistant", "content": val[1]})
51
+
52
+ # messages.append({"role": "user", "content": message})
53
+
54
+ # response = ""
55
+
56
+ # for message in client.chat_completion(
57
+ # messages,
58
+ # max_tokens=max_tokens,
59
+ # stream=True,
60
+ # temperature=temperature,
61
+ # top_p=top_p,
62
+ # ):
63
+ # token = message.choices[0].delta.content
64
+
65
+ # response += token
66
+ # yield response
67
+
68
+ """
69
+ For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
70
+ """
71
+ # demo = gr.ChatInterface(
72
+ # evaluate_response,
73
+ # additional_inputs=[
74
+ # gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
75
+ # gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
76
+ # gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
77
+ # gr.Slider(
78
+ # minimum=0.1,
79
+ # maximum=1.0,
80
+ # value=0.95,
81
+ # step=0.05,
82
+ # label="Top-p (nucleus sampling)",
83
+ # ),
84
+ # ],
85
+ # )
86
+
87
+ demo = gr.Interface(
88
+ fn=evaluate_response,
89
+ inputs=[gr.Textbox(label="Question")],
90
+ outputs=gr.Textbox(label="Answer"),
91
+ title="Question and Answer Interface",
92
+ description="Enter a question."
93
+ )
94
+
95
+
96
+ if __name__ == "__main__":
97
+ demo.launch()
utils.py ADDED
@@ -0,0 +1,313 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import re
2
+ import math
3
+ import random
4
+
5
+ from collections import defaultdict
6
+
7
+
8
+
9
+
10
+ def naive_parse(answer):
11
+ out = []
12
+ start = False
13
+ end = False
14
+ for l in reversed(list(answer)):
15
+ if l in '0123456789' and not end:
16
+ start = True
17
+ out.append(l)
18
+ else:
19
+ if start:
20
+ end = True
21
+
22
+ out = reversed(out)
23
+ return ''.join(out)
24
+
25
+
26
+ import re
27
+ import sys
28
+ import subprocess
29
+
30
+ def return_last_print(output, n):
31
+ lines = output.strip().split('\n')
32
+ if lines:
33
+ return lines[n]
34
+ else:
35
+ return ""
36
+
37
+ def process_code(code, return_shell_output=False):
38
+
39
+ def repl(match):
40
+ if "real" not in match.group():
41
+ return "{}{}".format(match.group()[:-1], ', real=True)')
42
+ else:
43
+ return "{}{}".format(match.group()[:-1], ')')
44
+ code = re.sub(r"symbols\([^)]+\)", repl, code)
45
+
46
+ if return_shell_output:
47
+ code = code.replace('\n', '\n ')
48
+ # Add a try...except block
49
+ code = "\ntry:\n from sympy import *\n{}\nexcept Exception as e:\n print(e)\n print('FAIL')\n".format(code)
50
+
51
+ if not return_shell_output:
52
+ print(code)
53
+ with open('code.py', 'w') as fout:
54
+ fout.write(code)
55
+
56
+ batcmd = 'timeout 7 ' + sys.executable + ' code.py'
57
+ try:
58
+ shell_output = subprocess.check_output(batcmd, shell=True).decode('utf8')
59
+ return_value = return_last_print(shell_output, -1)
60
+ print(shell_output)
61
+ if return_shell_output:
62
+ if return_value=='FAIL':
63
+ CODE_STATUS = False
64
+ return_value = return_last_print(shell_output, -2)
65
+ if "not defined" in return_value:
66
+ return_value+='\nTry checking the formatting and imports'
67
+ else:
68
+ CODE_STATUS = True
69
+ return return_value, CODE_STATUS
70
+ code_output = round(float(eval(return_value))) % 1000
71
+ except Exception as e:
72
+ print(e,'shell_output')
73
+ code_output = -1
74
+
75
+ if return_shell_output:
76
+ if code_output==-1:
77
+ CODE_STATUS = False
78
+ else:
79
+ CODE_STATUS = True
80
+ return code_output, CODE_STATUS
81
+
82
+
83
+ return code_output
84
+
85
+
86
+ def process_text_output(output):
87
+ result = output
88
+ try:
89
+ result_output = re.findall(r'\\boxed\{(\d+)\}', result)
90
+
91
+ print('BOXED', result_output)
92
+ if not len(result_output):
93
+ result_output = naive_parse(result)
94
+ else:
95
+ result_output = result_output[-1]
96
+
97
+ print('BOXED FINAL', result_output)
98
+ if not len(result_output):
99
+ result_output = -1
100
+
101
+ else:
102
+ result_output = round(float(eval(result_output))) % 1000
103
+
104
+ except Exception as e:
105
+ print(e)
106
+ print('ERROR PARSING TEXT')
107
+ result_output = -1
108
+
109
+ return result_output
110
+
111
+ from collections import defaultdict
112
+ from collections import Counter
113
+ def predict(problem):
114
+
115
+ temperature = 0.9
116
+ top_p = 3.0
117
+
118
+ temperature_coding = 0.9
119
+ top_p_coding = 3.0
120
+
121
+
122
+ total_results = {}
123
+ total_answers = {}
124
+ best_stats = {}
125
+ total_outputs = {}
126
+ question_type_counts = {}
127
+ starting_counts = (2,3)
128
+ i = 0
129
+
130
+ global n_repetitions,TOTAL_TOKENS,model,tokenizer,USE_PAST_KEY,NOTEBOOK_START_TIME,promplt_options,code,cot
131
+
132
+
133
+
134
+ for jj in tqdm(range(n_repetitions)):
135
+ best, best_count = best_stats.get(i,(-1,-1))
136
+ if best_count>np.sqrt(jj):
137
+ print("SKIPPING CAUSE ALREADY FOUND BEST")
138
+ continue
139
+
140
+ outputs = total_outputs.get(i,[])
141
+ text_answers, code_answers = question_type_counts.get(i,starting_counts)
142
+ results = total_results.get(i,[])
143
+ answers = total_answers.get(i,[])
144
+
145
+ for _ in range(5):
146
+ torch.cuda.empty_cache()
147
+ gc.collect()
148
+ time.sleep(0.2)
149
+
150
+ try:
151
+ ALREADY_GEN = 0
152
+ code_error = None
153
+ code_error_count = 0
154
+ code_output = -1
155
+ #initail_message = problem + tool_instruction
156
+ counts = np.array([text_answers,code_answers])
157
+
158
+ draw = choice(promplt_options, 1,
159
+ p=counts/counts.sum())
160
+
161
+ initail_message = draw[0].format(problem,"{}")
162
+ prompt = f"User: {initail_message}"
163
+
164
+ current_printed = len(prompt)
165
+ print(f"{jj}_{prompt}\n")
166
+
167
+ model_inputs = tokenizer(prompt, return_tensors='pt').to(model.device)
168
+ input_len = len(model_inputs['input_ids'][0])
169
+
170
+ generation_output = model.generate(**model_inputs,
171
+ max_new_tokens=TOTAL_TOKENS-ALREADY_GEN,
172
+ return_dict_in_generate=USE_PAST_KEY,
173
+ do_sample = True,
174
+ temperature = temperature,
175
+ top_p = top_p,
176
+ num_return_sequences=1, stopping_criteria = stopping_criteria)
177
+
178
+ if USE_PAST_KEY:
179
+ output_ids = generation_output.sequences[0]
180
+ else:
181
+ output_ids = generation_output[0]
182
+ decoded_output = tokenizer.decode(output_ids, skip_special_tokens=True)
183
+ print(f"{decoded_output[current_printed:]}\n")
184
+ current_printed += len(decoded_output[current_printed:])
185
+ cummulative_code = ""
186
+
187
+ stop_word_cond = False
188
+ for stop_word in stop_words:
189
+ stop_word_cond = stop_word_cond or (decoded_output[-len(stop_word):]==stop_word)
190
+
191
+
192
+ while (stop_word_cond) and (ALREADY_GEN<(TOTAL_TOKENS)):
193
+
194
+ if (decoded_output[-len("```python"):]=="```python"):
195
+ temperature_inner=temperature_coding
196
+ top_p_inner = top_p_coding
197
+ prompt = decoded_output
198
+ else:
199
+ temperature_inner=temperature
200
+ top_p_inner = top_p
201
+ try:
202
+ if (decoded_output[-len("``````output"):]=="``````output"):
203
+ code_text = decoded_output.split('```python')[-1].split("``````")[0]
204
+ else:
205
+ code_text = decoded_output.split('```python')[-1].split("```")[0]
206
+
207
+
208
+ cummulative_code+=code_text
209
+ code_output, CODE_STATUS = process_code(cummulative_code, return_shell_output=True)
210
+ print('CODE RESULTS', code_output)
211
+
212
+ if code_error==code_output:
213
+ code_error_count+=1
214
+ else:
215
+ code_error=code_output
216
+ code_error_count = 0
217
+
218
+ if not CODE_STATUS:
219
+ cummulative_code = cummulative_code[:-len(code_text)]
220
+
221
+ if code_error_count>=1:
222
+ print("REPEATED ERRORS")
223
+ break
224
+
225
+ except Exception as e:
226
+ print(e)
227
+ print('ERROR PARSING CODE')
228
+ code_output = -1
229
+
230
+ if code_output!=-1:
231
+ if (decoded_output[-len(")\n```"):]==")\n```"):
232
+ prompt = decoded_output+'```output\n'+str(code_output)+'\n```\n'
233
+ else:
234
+ prompt = decoded_output+'\n'+str(code_output)+'\n```\n'
235
+ else:
236
+ prompt = decoded_output
237
+ cummulative_code=""
238
+ model_inputs = tokenizer(prompt, return_tensors='pt').to(model.device)
239
+ ALREADY_GEN = len(model_inputs['input_ids'][0])-input_len
240
+
241
+ if USE_PAST_KEY:
242
+ old_values = generation_output.past_key_values
243
+ else:
244
+ old_values = None
245
+
246
+ generation_output = model.generate(**model_inputs,
247
+ max_new_tokens=TOTAL_TOKENS-ALREADY_GEN,
248
+ return_dict_in_generate=USE_PAST_KEY,
249
+ past_key_values=old_values,
250
+ do_sample = True,
251
+ temperature = temperature_inner,
252
+ top_p = top_p_inner,
253
+ num_return_sequences=1, stopping_criteria = stopping_criteria)
254
+ if USE_PAST_KEY:
255
+ output_ids = generation_output.sequences[0]
256
+ else:
257
+ output_ids = generation_output[0]
258
+ decoded_output = tokenizer.decode(output_ids, skip_special_tokens=True)
259
+ print(f"\nINTERMEDIATE OUT :\n{decoded_output[current_printed:]}\n")
260
+ current_printed+=len(decoded_output[current_printed:])
261
+
262
+ stop_word_cond = False
263
+ for stop_word in stop_words:
264
+ stop_word_cond = stop_word_cond or (decoded_output[-len(stop_word):]==stop_word)
265
+ if USE_PAST_KEY:
266
+ output_ids = generation_output.sequences[0]
267
+ else:
268
+ output_ids = generation_output[0]
269
+
270
+ raw_output = tokenizer.decode(output_ids[input_len:], skip_special_tokens=True)
271
+ #print(f"\n\nOutput :\n{raw_output}\n")
272
+ result_output = process_text_output(raw_output)
273
+
274
+ try:
275
+ code_output = round(float(eval(code_output))) % 1000
276
+ except Exception as e:
277
+ print(e,'final_eval')
278
+ code_output = -1
279
+ except Exception as e:
280
+ print(e,"5")
281
+ result_output, code_output = -1, -1
282
+
283
+ if code_output!=-1:
284
+ outputs.append(code_output)
285
+ code_answers+=1
286
+
287
+ if result_output!=-1:
288
+ outputs.append(result_output)
289
+ text_answers+=1
290
+
291
+ if len(outputs) > 0:
292
+ occurances = Counter(outputs).most_common()
293
+ print(occurances)
294
+ if occurances[0][1] > best_count:
295
+ print("GOOD ANSWER UPDATED!")
296
+ best = occurances[0][0]
297
+ best_count = occurances[0][1]
298
+ if occurances[0][1] > 5:
299
+ print("ANSWER FOUND!")
300
+ break
301
+
302
+ results.append(result_output)
303
+ answers.append(code_output)
304
+
305
+ best_stats[i] = (best, best_count)
306
+ question_type_counts[i] = (text_answers, code_answers)
307
+ total_outputs[i] = outputs
308
+
309
+ total_results[i] = results
310
+ total_answers[i] = answers
311
+
312
+ print("code_answers",code_answers-starting_counts[1],"text_answers",text_answers-starting_counts[0])
313
+ return best_stats[0][0]