agenttemplate

Sleeping

App Files Files Community

ianeksdi commited on Feb 19

Commit

f1f3641

verified ·

1 Parent(s): 3cb26f0

Update app.py

Browse files

Files changed (1) hide show

app.py +57 -78

app.py CHANGED Viewed

@@ -1,88 +1,67 @@
-import yaml
-import re
-from smolagents import CodeAgent, HfApiModel
-from tools.final_answer import FinalAnswerTool
-from Gradio_UI import GradioUI
-# Updated system prompt: Only output the final, direct advice in plain text.
-system_prompt = (
-    "You are a health and lifestyle advisor specializing in the early detection and prevention of hypertension. "
-    "Provide only the final, direct, and concise lifestyle advice based solely on the user's details. "
-    "Do NOT include any internal reasoning, chain-of-thought, intermediate steps, or code snippets. "
-    "Output exactly one final answer as plain text with no extra commentary."
 )
-def remove_code_snippets(text):
-    """
-    Removes code blocks, inline code, chain-of-thought, and debugging/step logs from the output.
-    """
-    # Remove triple-backtick code blocks.
-    text = re.sub(r"```[\s\S]+?```", "", text, flags=re.DOTALL)
-    # Remove inline code enclosed in single backticks.
-    text = re.sub(r"`[^`]+`", "", text)
-    # Remove any text between <think> and </think> tags.
-    text = re.sub(r"<think>[\s\S]*?</think>", "", text, flags=re.DOTALL)
-    # Remove debug/step log banners (e.g., "━━━━━ Step X ━━━━━")
-    text = re.sub(r"━+.*Step \d+.*━+", "", text)
-    # Remove any lines that start with "[Step" (which include duration and token info).
-    text = re.sub(r"\[Step \d+: Duration .*", "", text)
-    # Remove lines that mention code snippet instructions.
-    text = re.sub(r"Make sure to include code with the correct pattern.*", "", text)
-    # Finally, remove any remaining lines that seem to be debug logs.
-    lines = text.splitlines()
-    cleaned_lines = [line for line in lines if not re.search(r"Step \d+|Duration", line)]
-    return "\n".join(cleaned_lines).strip()
-# Use only the final_answer tool.
-final_answer = FinalAnswerTool()
-# Set up the model with a reduced token limit.
-model = HfApiModel(
-    max_tokens=1024,
-    temperature=0.5,
-    model_id='deepseek-ai/DeepSeek-R1-Distill-Qwen-32B',
-    custom_role_conversions=None,
-)
-# Load prompt templates from YAML.
-with open("prompts.yaml", 'r') as stream:
-    prompt_templates = yaml.safe_load(stream)
-# Ensure the final_answer key exists in prompt_templates to prevent KeyError.
-if "final_answer" not in prompt_templates:
-    prompt_templates["final_answer"] = {"pre_messages": "", "post_messages": ""}
-# Initialize CodeAgent with a low verbosity level to reduce extra debug output.
-agent = CodeAgent(
-    model=model,
-    tools=[final_answer],
-    max_steps=4,
-    verbosity_level=0,
-    grammar=None,
-    planning_interval=None,
-    name="Hypertension Prevention Advisor",
-    description=system_prompt,
-    prompt_templates=prompt_templates
-)
-def run_agent(user_input):
-    """
-    Runs the agent, then removes any internal chain-of-thought, step logs, and code snippets
-    before returning the final plain-text answer.
-    """
-    raw_response = agent.run(user_input)
-    print("Raw Agent Response:", raw_response)  # Debugging output (optional)
-    if not raw_response.strip():
-        return "I'm sorry, but I couldn't generate a response. Please try again."
-    if "final_answer" not in raw_response.lower():
-        return "Error: The response did not use the `final_answer` tool. Please try again."
-    clean_response = remove_code_snippets(raw_response)
-    words = clean_response.split()
-    if len(set(words)) < 5:
-        return "I'm unable to generate a meaningful response. Please refine your query."
-    return clean_response
-# Launch the Gradio UI.
-GradioUI(agent).launch()

+import llama_cpp
+import llama_cpp.llama_tokenizer
+import gradio as gr
+llama = llama_cpp.Llama.from_pretrained(
+    repo_id="Qwen/Qwen1.5-0.5B-Chat-GGUF",
+    filename="*q8_0.gguf",
+    tokenizer=llama_cpp.llama_tokenizer.LlamaHFTokenizer.from_pretrained(
+        "Qwen/Qwen1.5-0.5B"
+    ),
+    verbose=False,
 )
+model = "gpt-3.5-turbo"
+def predict(message, history):
+    messages = []
+    for user_message, assistant_message in history:
+        messages.append({"role": "user", "content": user_message})
+        messages.append({"role": "assistant", "content": assistant_message})
+    messages.append({"role": "user", "content": message})
+    response = llama.create_chat_completion_openai_v1(
+        model=model, messages=messages, stream=True
+    )
+    text = ""
+    for chunk in response:
+        content = chunk.choices[0].delta.content
+        if content:
+            text += content
+            yield text
+js = """function () {
+  gradioURL = window.location.href
+  if (!gradioURL.endsWith('?__theme=dark')) {
+    window.location.replace(gradioURL + '?__theme=dark');
+  }
+}"""
+css = """
+footer {
+    visibility: hidden;
+}
+full-height {
+    height: 100%;
+}
+"""
+with gr.Blocks(theme=gr.themes.Soft(), js=js, css=css, fill_height=True) as demo:
+    gr.ChatInterface(
+        predict,
+        fill_height=True,
+        examples=[
+            "What is the capital of France?",
+            "Who was the first person on the moon?",
+        ],
+    )
+if __name__ == "__main__":
+    demo.launch()