gpt-oss-20b-mutlilingual-reasoning

Running

App Files Files Community

Tonic commited on Aug 6

Commit

b14955e

1 Parent(s): 95e8864

adds latex formatting better response parsing

Browse files

Files changed (1) hide show

app.py +103 -24

app.py CHANGED Viewed

@@ -4,8 +4,23 @@ from threading import Thread
 import gradio as gr
 import spaces
 import re
 from peft import PeftModel
 # Load the base model
 try:
     base_model = AutoModelForCausalLM.from_pretrained(
@@ -39,8 +54,47 @@ def format_conversation_history(chat_history):
         messages.append({"role": role, "content": content})
     return messages
 @spaces.GPU(duration=60)
 def generate_response(input_data, chat_history, max_new_tokens, system_prompt, temperature, top_p, top_k, repetition_penalty):
     new_message = {"role": "user", "content": input_data}
     system_message = [{"role": "system", "content": system_prompt}] if system_prompt else []
     processed_history = format_conversation_history(chat_history)
@@ -74,27 +128,40 @@ def generate_response(input_data, chat_history, max_new_tokens, system_prompt, t
     thread = Thread(target=model.generate, kwargs={**inputs, **generation_kwargs})
     thread.start()
-    # Stream the response
-    thinking = ""
-    final = ""
-    started_final = False
-    for chunk in streamer:
-        if not started_final:
-            if "assistantfinal" in chunk.lower():
-                split_parts = re.split(r'assistantfinal', chunk, maxsplit=1)
-                thinking += split_parts[0]
-                final += split_parts[1]
-                started_final = True
-            else:
-                thinking += chunk
-        else:
-            final += chunk
-        clean_thinking = re.sub(r'^analysis\s*', '', thinking).strip()
-        clean_final = final.strip()
-        formatted = f"<details open><summary>Click to view Thinking Process</summary>\n\n{clean_thinking}\n\n</details>\n\n{clean_final}"
-        yield formatted
 demo = gr.ChatInterface(
     fn=generate_response,
@@ -112,24 +179,36 @@ demo = gr.ChatInterface(
         gr.Slider(label="Repetition Penalty", minimum=1.0, maximum=2.0, step=0.05, value=1.0)
     ],
     examples=[
-        [{"text": "Explain Newton laws clearly and concisely"}],
         [{"text": "Write a Python function to calculate the Fibonacci sequence"}],
-        [{"text": "What are the benefits of open weight AI models"}],
     ],
     cache_examples=False,
     type="messages",
     description="""
 # 🙋🏻‍♂️Welcome to 🌟Tonic's gpt-oss-20b Multilingual Reasoner Demo !
-Wait couple of seconds initially. You can adjust reasoning level in the system prompt like "Reasoning: high.
     """,
     fill_height=True,
     textbox=gr.Textbox(
         label="Query Input",
-        placeholder="Type your prompt"
     ),
     stop_btn="Stop Generation",
     multimodal=False,
-    theme=gr.themes.Soft()
 )
 if __name__ == "__main__":

 import gradio as gr
 import spaces
 import re
+import logging
 from peft import PeftModel
+# ----------------------------------------------------------------------
+# KaTeX delimiter config for Gradio
+# ----------------------------------------------------------------------
+LATEX_DELIMS = [
+    {"left": "$$",  "right": "$$",  "display": True},
+    {"left": "$",   "right": "$",   "display": False},
+    {"left": "\\[", "right": "\\]", "display": True},
+    {"left": "\\(", "right": "\\)", "display": False},
+]
+# Configure logging
+logging.basicConfig(level=logging.INFO)
 # Load the base model
 try:
     base_model = AutoModelForCausalLM.from_pretrained(
         messages.append({"role": role, "content": content})
     return messages
+def format_analysis_response(text):
+    """Enhanced response formatting with better structure and LaTeX support."""
+    # Look for analysis section followed by final response
+    m = re.search(r"analysis(.*?)assistantfinal", text, re.DOTALL | re.IGNORECASE)
+    if m:
+        reasoning = m.group(1).strip()
+        response = text.split("assistantfinal", 1)[-1].strip()
+        # Clean up the reasoning section
+        reasoning = re.sub(r'^analysis\s*', '', reasoning, flags=re.IGNORECASE).strip()
+        # Format with improved structure
+        formatted = (
+            f"**🤔 Analysis & Reasoning:**\n\n"
+            f"*{reasoning}*\n\n"
+            f"---\n\n"
+            f"**💬 Final Response:**\n\n{response}"
+        )
+        # Ensure LaTeX delimiters are balanced
+        if formatted.count("$") % 2:
+            formatted += "$"
+        return formatted
+    # Fallback: clean up the text and return as-is
+    cleaned = re.sub(r'^analysis\s*', '', text, flags=re.IGNORECASE).strip()
+    if cleaned.count("$") % 2:
+        cleaned += "$"
+    return cleaned
 @spaces.GPU(duration=60)
 def generate_response(input_data, chat_history, max_new_tokens, system_prompt, temperature, top_p, top_k, repetition_penalty):
+    if not input_data.strip():
+        yield "Please enter a prompt."
+        return
+    # Log the request
+    logging.info(f"[User] {input_data}")
+    logging.info(f"[System] {system_prompt} | Temp={temperature} | Max tokens={max_new_tokens}")
     new_message = {"role": "user", "content": input_data}
     system_message = [{"role": "system", "content": system_prompt}] if system_prompt else []
     processed_history = format_conversation_history(chat_history)
     thread = Thread(target=model.generate, kwargs={**inputs, **generation_kwargs})
     thread.start()
+    # Stream the response with enhanced formatting
+    collected_text = ""
+    buffer = ""
+    yielded_once = False
+    try:
+        for chunk in streamer:
+            if not chunk:
+                continue
+            collected_text += chunk
+            buffer += chunk
+            # Initial yield to show immediate response
+            if not yielded_once:
+                yield chunk
+                buffer = ""
+                yielded_once = True
+                continue
+            # Yield accumulated text periodically for smooth streaming
+            if "\n" in buffer or len(buffer) > 150:
+                # Use enhanced formatting for partial text
+                partial_formatted = format_analysis_response(collected_text)
+                yield partial_formatted
+                buffer = ""
+        # Final formatting with complete text
+        final_formatted = format_analysis_response(collected_text)
+        yield final_formatted
+    except Exception as e:
+        logging.exception("Generation streaming failed")
+        yield f"❌ Error during generation: {e}"
 demo = gr.ChatInterface(
     fn=generate_response,
         gr.Slider(label="Repetition Penalty", minimum=1.0, maximum=2.0, step=0.05, value=1.0)
     ],
     examples=[
+        [{"text": "Explain Newton's laws clearly and concisely with mathematical formulas"}],
         [{"text": "Write a Python function to calculate the Fibonacci sequence"}],
+        [{"text": "What are the benefits of open weight AI models? Include analysis."}],
+        [{"text": "Solve this equation: $x^2 + 5x + 6 = 0$"}],
     ],
     cache_examples=False,
     type="messages",
     description="""
 # 🙋🏻‍♂️Welcome to 🌟Tonic's gpt-oss-20b Multilingual Reasoner Demo !
+✨ **Enhanced Features:**
+- 🧠 **Advanced Reasoning**: Detailed analysis and step-by-step thinking
+- 📊 **LaTeX Support**: Mathematical formulas rendered beautifully (use `$` or `$$`)
+- 🎯 **Improved Formatting**: Clear separation of reasoning and final responses
+- 📝 **Smart Logging**: Better error handling and request tracking
+💡 **Usage Tips:**
+- Adjust reasoning level in system prompt (e.g., "Reasoning: high")
+- Use LaTeX for math: `$E = mc^2$` or `$$\\int x^2 dx$$`
+- Wait a couple of seconds initially for model loading
     """,
     fill_height=True,
     textbox=gr.Textbox(
         label="Query Input",
+        placeholder="Type your prompt (supports LaTeX: $x^2 + y^2 = z^2$)"
     ),
     stop_btn="Stop Generation",
     multimodal=False,
+    theme=gr.themes.Soft(),
+    latex_delims=LATEX_DELIMS
 )
 if __name__ == "__main__":