Tonic commited on
Commit
b14955e
·
1 Parent(s): 95e8864

adds latex formatting better response parsing

Browse files
Files changed (1) hide show
  1. app.py +103 -24
app.py CHANGED
@@ -4,8 +4,23 @@ from threading import Thread
4
  import gradio as gr
5
  import spaces
6
  import re
 
7
  from peft import PeftModel
8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9
  # Load the base model
10
  try:
11
  base_model = AutoModelForCausalLM.from_pretrained(
@@ -39,8 +54,47 @@ def format_conversation_history(chat_history):
39
  messages.append({"role": role, "content": content})
40
  return messages
41
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
42
  @spaces.GPU(duration=60)
43
  def generate_response(input_data, chat_history, max_new_tokens, system_prompt, temperature, top_p, top_k, repetition_penalty):
 
 
 
 
 
 
 
 
44
  new_message = {"role": "user", "content": input_data}
45
  system_message = [{"role": "system", "content": system_prompt}] if system_prompt else []
46
  processed_history = format_conversation_history(chat_history)
@@ -74,27 +128,40 @@ def generate_response(input_data, chat_history, max_new_tokens, system_prompt, t
74
  thread = Thread(target=model.generate, kwargs={**inputs, **generation_kwargs})
75
  thread.start()
76
 
77
- # Stream the response
78
- thinking = ""
79
- final = ""
80
- started_final = False
81
 
82
- for chunk in streamer:
83
- if not started_final:
84
- if "assistantfinal" in chunk.lower():
85
- split_parts = re.split(r'assistantfinal', chunk, maxsplit=1)
86
- thinking += split_parts[0]
87
- final += split_parts[1]
88
- started_final = True
89
- else:
90
- thinking += chunk
91
- else:
92
- final += chunk
 
 
 
 
 
 
 
 
 
 
93
 
94
- clean_thinking = re.sub(r'^analysis\s*', '', thinking).strip()
95
- clean_final = final.strip()
96
- formatted = f"<details open><summary>Click to view Thinking Process</summary>\n\n{clean_thinking}\n\n</details>\n\n{clean_final}"
97
- yield formatted
 
 
 
98
 
99
  demo = gr.ChatInterface(
100
  fn=generate_response,
@@ -112,24 +179,36 @@ demo = gr.ChatInterface(
112
  gr.Slider(label="Repetition Penalty", minimum=1.0, maximum=2.0, step=0.05, value=1.0)
113
  ],
114
  examples=[
115
- [{"text": "Explain Newton laws clearly and concisely"}],
116
  [{"text": "Write a Python function to calculate the Fibonacci sequence"}],
117
- [{"text": "What are the benefits of open weight AI models"}],
 
118
  ],
119
  cache_examples=False,
120
  type="messages",
121
  description="""
122
  # 🙋🏻‍♂️Welcome to 🌟Tonic's gpt-oss-20b Multilingual Reasoner Demo !
123
- Wait couple of seconds initially. You can adjust reasoning level in the system prompt like "Reasoning: high.
 
 
 
 
 
 
 
 
 
 
124
  """,
125
  fill_height=True,
126
  textbox=gr.Textbox(
127
  label="Query Input",
128
- placeholder="Type your prompt"
129
  ),
130
  stop_btn="Stop Generation",
131
  multimodal=False,
132
- theme=gr.themes.Soft()
 
133
  )
134
 
135
  if __name__ == "__main__":
 
4
  import gradio as gr
5
  import spaces
6
  import re
7
+ import logging
8
  from peft import PeftModel
9
 
10
+ # ----------------------------------------------------------------------
11
+ # KaTeX delimiter config for Gradio
12
+ # ----------------------------------------------------------------------
13
+
14
+ LATEX_DELIMS = [
15
+ {"left": "$$", "right": "$$", "display": True},
16
+ {"left": "$", "right": "$", "display": False},
17
+ {"left": "\\[", "right": "\\]", "display": True},
18
+ {"left": "\\(", "right": "\\)", "display": False},
19
+ ]
20
+
21
+ # Configure logging
22
+ logging.basicConfig(level=logging.INFO)
23
+
24
  # Load the base model
25
  try:
26
  base_model = AutoModelForCausalLM.from_pretrained(
 
54
  messages.append({"role": role, "content": content})
55
  return messages
56
 
57
+ def format_analysis_response(text):
58
+ """Enhanced response formatting with better structure and LaTeX support."""
59
+ # Look for analysis section followed by final response
60
+ m = re.search(r"analysis(.*?)assistantfinal", text, re.DOTALL | re.IGNORECASE)
61
+ if m:
62
+ reasoning = m.group(1).strip()
63
+ response = text.split("assistantfinal", 1)[-1].strip()
64
+
65
+ # Clean up the reasoning section
66
+ reasoning = re.sub(r'^analysis\s*', '', reasoning, flags=re.IGNORECASE).strip()
67
+
68
+ # Format with improved structure
69
+ formatted = (
70
+ f"**🤔 Analysis & Reasoning:**\n\n"
71
+ f"*{reasoning}*\n\n"
72
+ f"---\n\n"
73
+ f"**💬 Final Response:**\n\n{response}"
74
+ )
75
+
76
+ # Ensure LaTeX delimiters are balanced
77
+ if formatted.count("$") % 2:
78
+ formatted += "$"
79
+
80
+ return formatted
81
+
82
+ # Fallback: clean up the text and return as-is
83
+ cleaned = re.sub(r'^analysis\s*', '', text, flags=re.IGNORECASE).strip()
84
+ if cleaned.count("$") % 2:
85
+ cleaned += "$"
86
+ return cleaned
87
+
88
  @spaces.GPU(duration=60)
89
  def generate_response(input_data, chat_history, max_new_tokens, system_prompt, temperature, top_p, top_k, repetition_penalty):
90
+ if not input_data.strip():
91
+ yield "Please enter a prompt."
92
+ return
93
+
94
+ # Log the request
95
+ logging.info(f"[User] {input_data}")
96
+ logging.info(f"[System] {system_prompt} | Temp={temperature} | Max tokens={max_new_tokens}")
97
+
98
  new_message = {"role": "user", "content": input_data}
99
  system_message = [{"role": "system", "content": system_prompt}] if system_prompt else []
100
  processed_history = format_conversation_history(chat_history)
 
128
  thread = Thread(target=model.generate, kwargs={**inputs, **generation_kwargs})
129
  thread.start()
130
 
131
+ # Stream the response with enhanced formatting
132
+ collected_text = ""
133
+ buffer = ""
134
+ yielded_once = False
135
 
136
+ try:
137
+ for chunk in streamer:
138
+ if not chunk:
139
+ continue
140
+
141
+ collected_text += chunk
142
+ buffer += chunk
143
+
144
+ # Initial yield to show immediate response
145
+ if not yielded_once:
146
+ yield chunk
147
+ buffer = ""
148
+ yielded_once = True
149
+ continue
150
+
151
+ # Yield accumulated text periodically for smooth streaming
152
+ if "\n" in buffer or len(buffer) > 150:
153
+ # Use enhanced formatting for partial text
154
+ partial_formatted = format_analysis_response(collected_text)
155
+ yield partial_formatted
156
+ buffer = ""
157
 
158
+ # Final formatting with complete text
159
+ final_formatted = format_analysis_response(collected_text)
160
+ yield final_formatted
161
+
162
+ except Exception as e:
163
+ logging.exception("Generation streaming failed")
164
+ yield f"❌ Error during generation: {e}"
165
 
166
  demo = gr.ChatInterface(
167
  fn=generate_response,
 
179
  gr.Slider(label="Repetition Penalty", minimum=1.0, maximum=2.0, step=0.05, value=1.0)
180
  ],
181
  examples=[
182
+ [{"text": "Explain Newton's laws clearly and concisely with mathematical formulas"}],
183
  [{"text": "Write a Python function to calculate the Fibonacci sequence"}],
184
+ [{"text": "What are the benefits of open weight AI models? Include analysis."}],
185
+ [{"text": "Solve this equation: $x^2 + 5x + 6 = 0$"}],
186
  ],
187
  cache_examples=False,
188
  type="messages",
189
  description="""
190
  # 🙋🏻‍♂️Welcome to 🌟Tonic's gpt-oss-20b Multilingual Reasoner Demo !
191
+
192
+ ✨ **Enhanced Features:**
193
+ - 🧠 **Advanced Reasoning**: Detailed analysis and step-by-step thinking
194
+ - 📊 **LaTeX Support**: Mathematical formulas rendered beautifully (use `$` or `$$`)
195
+ - 🎯 **Improved Formatting**: Clear separation of reasoning and final responses
196
+ - 📝 **Smart Logging**: Better error handling and request tracking
197
+
198
+ 💡 **Usage Tips:**
199
+ - Adjust reasoning level in system prompt (e.g., "Reasoning: high")
200
+ - Use LaTeX for math: `$E = mc^2$` or `$$\\int x^2 dx$$`
201
+ - Wait a couple of seconds initially for model loading
202
  """,
203
  fill_height=True,
204
  textbox=gr.Textbox(
205
  label="Query Input",
206
+ placeholder="Type your prompt (supports LaTeX: $x^2 + y^2 = z^2$)"
207
  ),
208
  stop_btn="Stop Generation",
209
  multimodal=False,
210
+ theme=gr.themes.Soft(),
211
+ latex_delims=LATEX_DELIMS
212
  )
213
 
214
  if __name__ == "__main__":