darsoarafa commited on
Commit
4fc5037
·
verified ·
1 Parent(s): 41326a3

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +348 -0
app.py ADDED
@@ -0,0 +1,348 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import re
2
+ import threading
3
+
4
+ import gradio as gr
5
+ import spaces
6
+ import transformers
7
+ from transformers import pipeline
8
+
9
+ # Loading model and tokenizer
10
+ model_name = "meta-llama/Llama-3.1-8B-Instruct"
11
+ if gr.NO_RELOAD:
12
+ pipe = pipeline(
13
+ "text-generation",
14
+ model=model_name,
15
+ device_map="auto",
16
+ torch_dtype="auto",
17
+ )
18
+
19
+ # Marker for detecting final answer
20
+ ANSWER_MARKER = "**Answer**"
21
+
22
+ # Sentences to start step-by-step reasoning
23
+ rethink_prepends = [
24
+ "Now, I need to understand the following ",
25
+ "In my opinion ",
26
+ "Let me verify if the following is correct ",
27
+ "Also, I should remember that ",
28
+ "Another point to note is ",
29
+ "And I also remember the following fact ",
30
+ "Now I think I understand sufficiently ",
31
+ ]
32
+
33
+ # Prompt addition for generating final answer
34
+ final_answer_prompt = """
35
+ Based on my reasoning process so far, I will answer the original question in the language it was asked:
36
+ {question}
37
+ Here is the conclusion I've reasoned:
38
+ {reasoning_conclusion}
39
+ Based on the above reasoning, my final answer:
40
+ {ANSWER_MARKER}
41
+ """
42
+
43
+ # Settings for displaying formulas
44
+ latex_delimiters = [
45
+ {"left": "$$", "right": "$$", "display": True},
46
+ {"left": "$", "right": "$", "display": False},
47
+ ]
48
+
49
+
50
+ def reformat_math(text):
51
+ """Modify MathJax delimiters to use Gradio syntax (Katex).
52
+ This is a temporary fix for displaying math formulas in Gradio. Currently,
53
+ I haven't found a way to make it work as expected with other latex_delimiters...
54
+ """
55
+ text = re.sub(r"\\\[\s*(.*?)\s*\\\]", r"$$\1$$", text, flags=re.DOTALL)
56
+ text = re.sub(r"\\\(\s*(.*?)\s*\\\)", r"$\1$", text, flags=re.DOTALL)
57
+ return text
58
+
59
+
60
+ def user_input(message, history_original, history_thinking):
61
+ """Add user input to history and clear input text box"""
62
+ return "", history_original + [
63
+ gr.ChatMessage(role="user", content=message.replace(ANSWER_MARKER, ""))
64
+ ], history_thinking + [
65
+ gr.ChatMessage(role="user", content=message.replace(ANSWER_MARKER, ""))
66
+ ]
67
+
68
+
69
+ def rebuild_messages(history: list):
70
+ """Reconstruct messages from history for model use without intermediate thinking process"""
71
+ messages = []
72
+ for h in history:
73
+ if isinstance(h, dict) and not h.get("metadata", {}).get("title", False):
74
+ messages.append(h)
75
+ elif (
76
+ isinstance(h, gr.ChatMessage)
77
+ and h.metadata.get("title", None) is None
78
+ and isinstance(h.content, str)
79
+ ):
80
+ messages.append({"role": h.role, "content": h.content})
81
+ return messages
82
+
83
+
84
+ @spaces.GPU
85
+ def bot_original(
86
+ history: list,
87
+ max_num_tokens: int,
88
+ do_sample: bool,
89
+ temperature: float,
90
+ ):
91
+ """Make the original model answer questions (without reasoning process)"""
92
+
93
+ # For streaming tokens from thread later
94
+ streamer = transformers.TextIteratorStreamer(
95
+ pipe.tokenizer, # pyright: ignore
96
+ skip_special_tokens=True,
97
+ skip_prompt=True,
98
+ )
99
+
100
+ # Prepare assistant message
101
+ history.append(
102
+ gr.ChatMessage(
103
+ role="assistant",
104
+ content=str(""),
105
+ )
106
+ )
107
+
108
+ # Messages to be displayed in current chat
109
+ messages = rebuild_messages(history[:-1]) # Excluding last empty message
110
+
111
+ # Original model answers directly without reasoning
112
+ t = threading.Thread(
113
+ target=pipe,
114
+ args=(messages,),
115
+ kwargs=dict(
116
+ max_new_tokens=max_num_tokens,
117
+ streamer=streamer,
118
+ do_sample=do_sample,
119
+ temperature=temperature,
120
+ ),
121
+ )
122
+ t.start()
123
+
124
+ for token in streamer:
125
+ history[-1].content += token
126
+ history[-1].content = reformat_math(history[-1].content)
127
+ yield history
128
+ t.join()
129
+
130
+ yield history
131
+
132
+
133
+ @spaces.GPU
134
+ def bot_thinking(
135
+ history: list,
136
+ max_num_tokens: int,
137
+ final_num_tokens: int,
138
+ do_sample: bool,
139
+ temperature: float,
140
+ ):
141
+ """Make the model answer questions with reasoning process"""
142
+
143
+ # For streaming tokens from thread later
144
+ streamer = transformers.TextIteratorStreamer(
145
+ pipe.tokenizer, # pyright: ignore
146
+ skip_special_tokens=True,
147
+ skip_prompt=True,
148
+ )
149
+
150
+ # For reinserting the question into reasoning if needed
151
+ question = history[-1]["content"]
152
+
153
+ # Prepare assistant message
154
+ history.append(
155
+ gr.ChatMessage(
156
+ role="assistant",
157
+ content=str(""),
158
+ metadata={"title": "🧠 Thinking...", "status": "pending"},
159
+ )
160
+ )
161
+
162
+ # Reasoning process to be displayed in current chat
163
+ messages = rebuild_messages(history)
164
+
165
+ # Variable to store the entire reasoning process
166
+ full_reasoning = ""
167
+
168
+ # Run reasoning steps
169
+ for i, prepend in enumerate(rethink_prepends):
170
+ if i > 0:
171
+ messages[-1]["content"] += "\n\n"
172
+ messages[-1]["content"] += prepend.format(question=question)
173
+
174
+ t = threading.Thread(
175
+ target=pipe,
176
+ args=(messages,),
177
+ kwargs=dict(
178
+ max_new_tokens=max_num_tokens,
179
+ streamer=streamer,
180
+ do_sample=do_sample,
181
+ temperature=temperature,
182
+ ),
183
+ )
184
+ t.start()
185
+
186
+ # Reconstruct history with new content
187
+ history[-1].content += prepend.format(question=question)
188
+ for token in streamer:
189
+ history[-1].content += token
190
+ history[-1].content = reformat_math(history[-1].content)
191
+ yield history
192
+ t.join()
193
+
194
+ # Save the result of each reasoning step to full_reasoning
195
+ full_reasoning = history[-1].content
196
+
197
+ # Reasoning complete, now generate final answer
198
+ history[-1].metadata = {"title": "💭 Thought Process", "status": "done"}
199
+
200
+ # Extract conclusion part from reasoning process (approximately last 1-2 paragraphs)
201
+ reasoning_parts = full_reasoning.split("\n\n")
202
+ reasoning_conclusion = "\n\n".join(reasoning_parts[-2:]) if len(reasoning_parts) > 2 else full_reasoning
203
+
204
+ # Add final answer message
205
+ history.append(gr.ChatMessage(role="assistant", content=""))
206
+
207
+ # Construct message for final answer
208
+ final_messages = rebuild_messages(history[:-1]) # Excluding last empty message
209
+ final_prompt = final_answer_prompt.format(
210
+ question=question,
211
+ reasoning_conclusion=reasoning_conclusion,
212
+ ANSWER_MARKER=ANSWER_MARKER
213
+ )
214
+ final_messages[-1]["content"] += final_prompt
215
+
216
+ # Generate final answer
217
+ t = threading.Thread(
218
+ target=pipe,
219
+ args=(final_messages,),
220
+ kwargs=dict(
221
+ max_new_tokens=final_num_tokens,
222
+ streamer=streamer,
223
+ do_sample=do_sample,
224
+ temperature=temperature,
225
+ ),
226
+ )
227
+ t.start()
228
+
229
+ # Stream final answer
230
+ for token in streamer:
231
+ history[-1].content += token
232
+ history[-1].content = reformat_math(history[-1].content)
233
+ yield history
234
+ t.join()
235
+
236
+ yield history
237
+
238
+
239
+ with gr.Blocks(fill_height=True, title="ThinkFlow") as demo:
240
+ # Title and description
241
+ gr.Markdown("# ThinkFlow")
242
+ gr.Markdown("### An LLM reasoning generation platform that automatically applies reasoning capabilities to LLM models without modification")
243
+
244
+ # Features and benefits section
245
+ with gr.Accordion("✨ Features & Benefits", open=True):
246
+ gr.Markdown("""
247
+ - **Enhanced Reasoning**: Transform any LLM into a step-by-step reasoning engine without model modifications
248
+ - **Transparency**: Visualize the model's thought process alongside direct answers
249
+ - **Improved Accuracy**: See how guided reasoning leads to more accurate solutions for complex problems
250
+ - **Educational Tool**: Perfect for teaching critical thinking and problem-solving approaches
251
+ - **Versatile Application**: Works with mathematical problems, logical puzzles, and complex questions
252
+ - **Side-by-Side Comparison**: Compare standard model responses with reasoning-enhanced outputs
253
+ """)
254
+
255
+ with gr.Row(scale=1):
256
+ with gr.Column(scale=2):
257
+ gr.Markdown("## Before (Original)")
258
+ chatbot_original = gr.Chatbot(
259
+ scale=1,
260
+ type="messages",
261
+ latex_delimiters=latex_delimiters,
262
+ label="Original Model (No Reasoning)"
263
+ )
264
+
265
+ with gr.Column(scale=2):
266
+ gr.Markdown("## After (Thinking)")
267
+ chatbot_thinking = gr.Chatbot(
268
+ scale=1,
269
+ type="messages",
270
+ latex_delimiters=latex_delimiters,
271
+ label="Model with Reasoning"
272
+ )
273
+
274
+ with gr.Row():
275
+ # Define msg textbox first
276
+ msg = gr.Textbox(
277
+ submit_btn=True,
278
+ label="",
279
+ show_label=False,
280
+ placeholder="Enter your question here.",
281
+ autofocus=True,
282
+ )
283
+
284
+ # Examples section - placed after msg variable definition
285
+ with gr.Accordion("EXAMPLES", open=False):
286
+ examples = gr.Examples(
287
+ examples=[
288
+ "[Source: MATH-500)] How many numbers among the first 100 positive integers are divisible by 3, 4, and 5?",
289
+ "[Source: MATH-500)] In the land of Ink, the money system is unique. 1 trinket equals 4 blinkets, and 3 blinkets equal 7 drinkits. What is the value of 56 drinkits in trinkets?",
290
+ "[Source: MATH-500)] The average age of Amy, Ben, and Chris is 6 years. Four years ago, Chris was the same age as Amy is now. Four years from now, Ben's age will be $\\frac{3}{5}$ of Amy's age at that time. How old is Chris now?",
291
+ "[Source: MATH-500)] A bag contains yellow and blue marbles. Currently, the ratio of blue marbles to yellow marbles is 4:3. After adding 5 blue marbles and removing 3 yellow marbles, the ratio becomes 7:3. How many blue marbles were in the bag before any were added?"
292
+ ],
293
+ inputs=msg
294
+ )
295
+
296
+ with gr.Row():
297
+ with gr.Column():
298
+ gr.Markdown("""## Parameter Adjustment""")
299
+ num_tokens = gr.Slider(
300
+ 50,
301
+ 4000,
302
+ 2000,
303
+ step=1,
304
+ label="Maximum tokens per reasoning step",
305
+ interactive=True,
306
+ )
307
+ final_num_tokens = gr.Slider(
308
+ 50,
309
+ 4000,
310
+ 2000,
311
+ step=1,
312
+ label="Maximum tokens for final answer",
313
+ interactive=True,
314
+ )
315
+ do_sample = gr.Checkbox(True, label="Use sampling")
316
+ temperature = gr.Slider(0.1, 1.0, 0.7, step=0.1, label="Temperature")
317
+
318
+ # Community link at the bottom
319
+ gr.Markdown("<p style='font-size: 12px;'>Community: <a href='https://discord.gg/openfreeai' target='_blank'>https://discord.gg/openfreeai</a></p>")
320
+
321
+ # When user submits a message, both bots respond simultaneously
322
+ msg.submit(
323
+ user_input,
324
+ [msg, chatbot_original, chatbot_thinking], # inputs
325
+ [msg, chatbot_original, chatbot_thinking], # outputs
326
+ ).then(
327
+ bot_original,
328
+ [
329
+ chatbot_original,
330
+ num_tokens,
331
+ do_sample,
332
+ temperature,
333
+ ],
334
+ chatbot_original, # save new history in outputs
335
+ ).then(
336
+ bot_thinking,
337
+ [
338
+ chatbot_thinking,
339
+ num_tokens,
340
+ final_num_tokens,
341
+ do_sample,
342
+ temperature,
343
+ ],
344
+ chatbot_thinking, # save new history in outputs
345
+ )
346
+
347
+ if __name__ == "__main__":
348
+ demo.queue().launch()