Pra-tham commited on
Commit
9997a25
·
1 Parent(s): d743525
Files changed (2) hide show
  1. app.py +99 -69
  2. requirements.txt +2 -1
app.py CHANGED
@@ -10,82 +10,112 @@ from transformers import AutoModelForCausalLM, AutoTokenizer, AutoConfig, set_se
10
 
11
  import torch
12
  from transformers import AutoTokenizer, AutoModelForCausalLM, GenerationConfig
 
 
 
 
 
 
 
 
 
13
 
14
- model_name = "deepseek-ai/deepseek-math-7b-instruct"
15
- tokenizer = AutoTokenizer.from_pretrained(model_name)
16
- model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=torch.bfloat16, device_map="auto")
17
- model.generation_config = GenerationConfig.from_pretrained(model_name)
18
- model.generation_config.pad_token_id = model.generation_config.eos_token_id
19
-
20
-
21
 
 
 
 
 
 
 
 
22
 
23
- def evaluate_response(problem):
24
- # problem=b'what is angle x if angle y is 60 degree and angle z in 60 degree of a traingle'
25
- problem=problem+'\nPlease reason step by step, and put your final answer within \\boxed{}.'
26
- messages = [
27
- {"role": "user", "content": problem}
28
- ]
29
- input_tensor = tokenizer.apply_chat_template(messages, add_generation_prompt=True, return_tensors="pt")
30
- outputs = model.generate(input_tensor.to(model.device), max_new_tokens=100)
31
 
32
- result = tokenizer.decode(outputs[0][input_tensor.shape[1]:], skip_special_tokens=True)
33
- # result_output, code_output = process_output(raw_output)
34
- return result
35
 
36
- def respond(
37
- evaluate_response,
38
- history: list[tuple[str, str]],
39
- system_message,
40
- max_tokens,
41
- temperature,
42
- top_p,
43
- ):
44
- messages = [{"role": "system", "content": system_message}]
45
-
46
- for val in history:
47
- if val[0]:
48
- messages.append({"role": "user", "content": val[0]})
49
- if val[1]:
50
- messages.append({"role": "assistant", "content": val[1]})
51
-
52
- messages.append({"role": "user", "content": message})
53
-
54
- response = ""
55
-
56
- for message in client.chat_completion(
57
- messages,
58
- max_tokens=max_tokens,
59
- stream=True,
60
- temperature=temperature,
61
- top_p=top_p,
62
- ):
63
- token = message.choices[0].delta.content
64
-
65
- response += token
66
- yield response
67
-
68
- """
69
- For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
70
- """
71
- # demo = gr.ChatInterface(
72
- # evaluate_response,
73
- # additional_inputs=[
74
- # gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
75
- # gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
76
- # gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
77
- # gr.Slider(
78
- # minimum=0.1,
79
- # maximum=1.0,
80
- # value=0.95,
81
- # step=0.05,
82
- # label="Top-p (nucleus sampling)",
83
- # ),
84
- # ],
85
- # )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
86
 
87
  demo = gr.Interface(
88
- fn=evaluate_response,
89
  inputs=[gr.Textbox(label="Question")],
90
  outputs=gr.Textbox(label="Answer"),
91
  title="Question and Answer Interface",
 
10
 
11
  import torch
12
  from transformers import AutoTokenizer, AutoModelForCausalLM, GenerationConfig
13
+ from transformers import BitsAndBytesConfig
14
+ from tqdm import tqdm
15
+ import os
16
+ quantization_config = BitsAndBytesConfig(
17
+ load_in_4bit = True,
18
+ bnb_4bit_quant_type="nf4",
19
+ bnb_4bit_compute_dtype=torch.bfloat16,
20
+ bnb_4bit_use_double_quant=True,
21
+ )
22
 
23
+ USE_PAST_KEY = True
24
+ import gc
25
+ torch.backends.cuda.enable_mem_efficient_sdp(False)
 
 
 
 
26
 
27
+ from transformers import (
28
+ AutoModelForCausalLM,
29
+ AutoTokenizer,
30
+ AutoConfig,
31
+ StoppingCriteria,
32
+ set_seed
33
+ )
34
 
35
+ n_repetitions = 1
36
+ TOTAL_TOKENS = 2048
 
 
 
 
 
 
37
 
38
+ MODEL_PATH = "/kaggle/input/deepseek-math"
39
+ #"/kaggle/input/gemma/transformers/7b-it/1"
 
40
 
41
+ # DEEP = True
42
+ import torch
43
+ from transformers import BitsAndBytesConfig
44
+ from transformers import AutoModelForCausalLM, AutoTokenizer, AutoConfig
45
+
46
+ config = AutoConfig.from_pretrained(MODEL_PATH)
47
+ config.gradient_checkpointing = True
48
+
49
+ tokenizer = AutoTokenizer.from_pretrained(MODEL_PATH)
50
+ quantization_config = BitsAndBytesConfig(
51
+ load_in_4bit = True,
52
+ bnb_4bit_quant_type="nf4",
53
+ bnb_4bit_compute_dtype=torch.bfloat16,
54
+ bnb_4bit_use_double_quant=True,
55
+ )
56
+ model = AutoModelForCausalLM.from_pretrained(
57
+ MODEL_PATH,
58
+ device_map="sequential",
59
+ torch_dtype="auto",
60
+ trust_remote_code=True,
61
+ quantization_config=quantization_config,
62
+ config=config
63
+ )
64
+ pipeline = transformers.pipeline(
65
+ "text-generation",
66
+ model=model,
67
+ tokenizer=tokenizer,
68
+ torch_dtype='auto',
69
+ device_map=device_map,
70
+ )
71
+ from transformers import StoppingCriteriaList
72
+
73
+ class StoppingCriteriaSub(StoppingCriteria):
74
+ def __init__(self, stops = [], encounters=1):
75
+ super().__init__()
76
+ self.stops = [stop.to("cuda") for stop in stops]
77
+
78
+ def __call__(self, input_ids: torch.LongTensor, scores: torch.FloatTensor):
79
+ for stop in self.stops:
80
+ last_token = input_ids[0][-len(stop):]
81
+ if torch.all(torch.eq(stop,last_token)):
82
+ return True
83
+ return False
84
+
85
+
86
+ stop_words = ["```output", "```python", "```\nOutput" , ")\n```" , "``````output"] #,
87
+ stop_words_ids = [tokenizer(stop_word, return_tensors='pt', add_special_tokens=False)['input_ids'].squeeze() for stop_word in stop_words]
88
+ stopping_criteria = StoppingCriteriaList([StoppingCriteriaSub(stops=stop_words_ids)])
89
+
90
+ code = """Below is a math problem you are to solve (positive numerical answer):
91
+ \"{}\"
92
+ To accomplish this, first determine a sympy-based approach for solving the problem by listing each step to take and what functions need to be called in each step. Be clear so even an idiot can follow your instructions, and remember, your final answer should be positive integer, not an algebraic expression!
93
+ Write the entire script covering all the steps (use comments and document it well) and print the result. After solving the problem, output the final numerical answer within \\boxed{}.
94
+
95
+ Approach:"""
96
+
97
+
98
+ cot = """Below is a math problem you are to solve (positive numerical answer!):
99
+ \"{}\"
100
+ Analyze this problem and think step by step to come to a solution with programs. After solving the problem, output the final numerical answer within \\boxed{}.\n\n"""
101
+
102
+ promplt_options = [code,cot]
103
+
104
+ import re
105
+ from collections import defaultdict
106
+ from collections import Counter
107
+
108
+ from numpy.random import choice
109
+ import numpy as np
110
+
111
+ tool_instruction = '\n\nPlease integrate natural language reasoning with programs to solve the above problem, and put your final numerical answer within \\boxed{}.\nNote that the intermediary calculations may be real numbers, but the final numercal answer would always be an integer.'
112
+
113
+
114
+ #tool_instruction = " The answer should be given as a non-negative modulo 1000."
115
+ #tool_instruction += '\nPlease integrate natural language reasoning with programs to solve the problem above, and put your final answer within \\boxed{}.'
116
 
117
  demo = gr.Interface(
118
+ fn=predict,
119
  inputs=[gr.Textbox(label="Question")],
120
  outputs=gr.Textbox(label="Answer"),
121
  title="Question and Answer Interface",
requirements.txt CHANGED
@@ -1,4 +1,5 @@
1
  huggingface_hub==0.22.2
2
  transformers==4.40.0
3
  torch==2.3.1
4
- accelerate==0.31.0
 
 
1
  huggingface_hub==0.22.2
2
  transformers==4.40.0
3
  torch==2.3.1
4
+ accelerate==0.31.0
5
+ bitsandbytes==0.43.1