ianeksdi commited on
Commit
f1f3641
·
verified ·
1 Parent(s): 3cb26f0

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +57 -78
app.py CHANGED
@@ -1,88 +1,67 @@
1
- import yaml
2
- import re
3
- from smolagents import CodeAgent, HfApiModel
4
- from tools.final_answer import FinalAnswerTool
5
- from Gradio_UI import GradioUI
6
 
7
- # Updated system prompt: Only output the final, direct advice in plain text.
8
- system_prompt = (
9
- "You are a health and lifestyle advisor specializing in the early detection and prevention of hypertension. "
10
- "Provide only the final, direct, and concise lifestyle advice based solely on the user's details. "
11
- "Do NOT include any internal reasoning, chain-of-thought, intermediate steps, or code snippets. "
12
- "Output exactly one final answer as plain text with no extra commentary."
 
 
 
13
  )
14
 
15
- def remove_code_snippets(text):
16
- """
17
- Removes code blocks, inline code, chain-of-thought, and debugging/step logs from the output.
18
- """
19
- # Remove triple-backtick code blocks.
20
- text = re.sub(r"```[\s\S]+?```", "", text, flags=re.DOTALL)
21
- # Remove inline code enclosed in single backticks.
22
- text = re.sub(r"`[^`]+`", "", text)
23
- # Remove any text between <think> and </think> tags.
24
- text = re.sub(r"<think>[\s\S]*?</think>", "", text, flags=re.DOTALL)
25
- # Remove debug/step log banners (e.g., "━━━━━ Step X ━━━━━")
26
- text = re.sub(r"━+.*Step \d+.*━+", "", text)
27
- # Remove any lines that start with "[Step" (which include duration and token info).
28
- text = re.sub(r"\[Step \d+: Duration .*", "", text)
29
- # Remove lines that mention code snippet instructions.
30
- text = re.sub(r"Make sure to include code with the correct pattern.*", "", text)
31
- # Finally, remove any remaining lines that seem to be debug logs.
32
- lines = text.splitlines()
33
- cleaned_lines = [line for line in lines if not re.search(r"Step \d+|Duration", line)]
34
- return "\n".join(cleaned_lines).strip()
35
 
36
- # Use only the final_answer tool.
37
- final_answer = FinalAnswerTool()
38
 
39
- # Set up the model with a reduced token limit.
40
- model = HfApiModel(
41
- max_tokens=1024,
42
- temperature=0.5,
43
- model_id='deepseek-ai/DeepSeek-R1-Distill-Qwen-32B',
44
- custom_role_conversions=None,
45
- )
46
 
47
- # Load prompt templates from YAML.
48
- with open("prompts.yaml", 'r') as stream:
49
- prompt_templates = yaml.safe_load(stream)
50
 
51
- # Ensure the final_answer key exists in prompt_templates to prevent KeyError.
52
- if "final_answer" not in prompt_templates:
53
- prompt_templates["final_answer"] = {"pre_messages": "", "post_messages": ""}
54
 
55
- # Initialize CodeAgent with a low verbosity level to reduce extra debug output.
56
- agent = CodeAgent(
57
- model=model,
58
- tools=[final_answer],
59
- max_steps=4,
60
- verbosity_level=0,
61
- grammar=None,
62
- planning_interval=None,
63
- name="Hypertension Prevention Advisor",
64
- description=system_prompt,
65
- prompt_templates=prompt_templates
66
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
67
 
68
- def run_agent(user_input):
69
- """
70
- Runs the agent, then removes any internal chain-of-thought, step logs, and code snippets
71
- before returning the final plain-text answer.
72
- """
73
- raw_response = agent.run(user_input)
74
- print("Raw Agent Response:", raw_response) # Debugging output (optional)
75
-
76
- if not raw_response.strip():
77
- return "I'm sorry, but I couldn't generate a response. Please try again."
78
- if "final_answer" not in raw_response.lower():
79
- return "Error: The response did not use the `final_answer` tool. Please try again."
80
-
81
- clean_response = remove_code_snippets(raw_response)
82
- words = clean_response.split()
83
- if len(set(words)) < 5:
84
- return "I'm unable to generate a meaningful response. Please refine your query."
85
- return clean_response
86
 
87
- # Launch the Gradio UI.
88
- GradioUI(agent).launch()
 
1
+ import llama_cpp
2
+ import llama_cpp.llama_tokenizer
 
 
 
3
 
4
+ import gradio as gr
5
+
6
+ llama = llama_cpp.Llama.from_pretrained(
7
+ repo_id="Qwen/Qwen1.5-0.5B-Chat-GGUF",
8
+ filename="*q8_0.gguf",
9
+ tokenizer=llama_cpp.llama_tokenizer.LlamaHFTokenizer.from_pretrained(
10
+ "Qwen/Qwen1.5-0.5B"
11
+ ),
12
+ verbose=False,
13
  )
14
 
15
+ model = "gpt-3.5-turbo"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
16
 
 
 
17
 
18
+ def predict(message, history):
19
+ messages = []
 
 
 
 
 
20
 
21
+ for user_message, assistant_message in history:
22
+ messages.append({"role": "user", "content": user_message})
23
+ messages.append({"role": "assistant", "content": assistant_message})
24
 
25
+ messages.append({"role": "user", "content": message})
 
 
26
 
27
+ response = llama.create_chat_completion_openai_v1(
28
+ model=model, messages=messages, stream=True
29
+ )
30
+
31
+ text = ""
32
+ for chunk in response:
33
+ content = chunk.choices[0].delta.content
34
+ if content:
35
+ text += content
36
+ yield text
37
+
38
+
39
+ js = """function () {
40
+ gradioURL = window.location.href
41
+ if (!gradioURL.endsWith('?__theme=dark')) {
42
+ window.location.replace(gradioURL + '?__theme=dark');
43
+ }
44
+ }"""
45
+
46
+ css = """
47
+ footer {
48
+ visibility: hidden;
49
+ }
50
+ full-height {
51
+ height: 100%;
52
+ }
53
+ """
54
+
55
+ with gr.Blocks(theme=gr.themes.Soft(), js=js, css=css, fill_height=True) as demo:
56
+ gr.ChatInterface(
57
+ predict,
58
+ fill_height=True,
59
+ examples=[
60
+ "What is the capital of France?",
61
+ "Who was the first person on the moon?",
62
+ ],
63
+ )
64
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
65
 
66
+ if __name__ == "__main__":
67
+ demo.launch()