userlocallm commited on
Commit
f846bf2
verified
1 Parent(s): 14724e9

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +55 -40
app.py CHANGED
@@ -1,44 +1,69 @@
 
1
  import gradio as gr
2
- from huggingface_hub import InferenceClient
 
 
 
 
 
 
3
 
4
- """
5
- For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
6
- """
7
- client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
 
 
 
 
 
 
 
 
 
 
 
8
 
 
 
 
 
 
 
 
 
 
 
 
 
9
 
10
  def respond(
11
  message,
12
  history: list[tuple[str, str]],
13
  system_message,
14
- max_tokens,
15
- temperature,
16
- top_p,
17
  ):
18
- messages = [{"role": "system", "content": system_message}]
19
-
20
- for val in history:
21
- if val[0]:
22
- messages.append({"role": "user", "content": val[0]})
23
- if val[1]:
24
- messages.append({"role": "assistant", "content": val[1]})
25
 
26
- messages.append({"role": "user", "content": message})
 
 
 
 
27
 
28
- response = ""
 
 
 
 
 
29
 
30
- for message in client.chat_completion(
31
- messages,
32
- max_tokens=max_tokens,
33
- stream=True,
34
- temperature=temperature,
35
- top_p=top_p,
36
- ):
37
- token = message.choices[0].delta.content
38
-
39
- response += token
40
- yield response
41
 
 
 
42
 
43
  """
44
  For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
@@ -46,19 +71,9 @@ For information on how to customize the ChatInterface, peruse the gradio docs: h
46
  demo = gr.ChatInterface(
47
  respond,
48
  additional_inputs=[
49
- gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
50
- gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
51
- gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
52
- gr.Slider(
53
- minimum=0.1,
54
- maximum=1.0,
55
- value=0.95,
56
- step=0.05,
57
- label="Top-p (nucleus sampling)",
58
- ),
59
  ],
60
  )
61
 
62
-
63
  if __name__ == "__main__":
64
- demo.launch()
 
1
+ # app.py
2
  import gradio as gr
3
+ from src.agent import Agent
4
+ from src.create_database import load_and_process_dataset # Import from create_database.py
5
+ import os
6
+ import uuid
7
+ import requests
8
+ import logging
9
+ from llama_cpp import Llama
10
 
11
+ # Configure logging
12
+ logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
13
+
14
+ # Create the directory if it doesn't exist
15
+ local_dir = "models"
16
+ os.makedirs(local_dir, exist_ok=True)
17
+
18
+ # Specify the filename for the model
19
+ filename = "unsloth.Q4_K_M.gguf"
20
+ model_path = os.path.join(local_dir, filename)
21
+
22
+ # Function to download the model file
23
+ def download_model(repo_id, filename, save_path):
24
+ # Construct the URL for the model file
25
+ url = f"https://huggingface.co/{repo_id}/resolve/main/{filename}"
26
 
27
+ # Download the model file
28
+ response = requests.get(url)
29
+ if response.status_code == 200:
30
+ with open(save_path, 'wb') as f:
31
+ f.write(response.content)
32
+ print(f"Model downloaded to {save_path}")
33
+ else:
34
+ print(f"Failed to download model: {response.status_code}")
35
+
36
+ # Download the model if it doesn't exist
37
+ if not os.path.exists(model_path):
38
+ download_model("PurpleAILAB/Llama3.2-3B-uncensored-SQLi-Q4_K_M-GGUF", filename, model_path)
39
 
40
  def respond(
41
  message,
42
  history: list[tuple[str, str]],
43
  system_message,
 
 
 
44
  ):
45
+ model_path = "models/unsloth.Q4_K_M.gguf" # Path to the downloaded model
46
+ db_path = "agent.db"
47
+ system_prompt = system_message
 
 
 
 
48
 
49
+ # Check if the database exists, if not, initialize it
50
+ if not os.path.exists(db_path):
51
+ data_update_path = "data-update.txt"
52
+ keyword_dir = "keyword" # Updated keyword directory
53
+ load_and_process_dataset(data_update_path, keyword_dir, db_path)
54
 
55
+ # Load the model with the maximum context length and control the maximum tokens in the response
56
+ llm = Llama(
57
+ model_path=model_path,
58
+ n_ctx=5072, # Set the maximum context length
59
+ max_tokens=512 # Control the maximum number of tokens generated in the response
60
+ )
61
 
62
+ agent = Agent(llm, db_path, system_prompt)
63
+ user_id = str(uuid.uuid4()) # Generate a unique user ID for each session
 
 
 
 
 
 
 
 
 
64
 
65
+ response = agent.process_query(user_id, message)
66
+ return response
67
 
68
  """
69
  For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
 
71
  demo = gr.ChatInterface(
72
  respond,
73
  additional_inputs=[
74
+ gr.Textbox(value="Vous 锚tes l'assistant intelligent de Les Chronique MTC. Votre r么le est d'aider les visiteurs en expliquant le contenu des Chroniques, Flash Infos et Chronique-FAQ de Michel Thomas. Utilisez le contexte fourni pour am茅liorer vos r茅ponses et veillez 脿 ce qu'elles soient pr茅cises et pertinentes.", label="System message"),
 
 
 
 
 
 
 
 
 
75
  ],
76
  )
77
 
 
78
  if __name__ == "__main__":
79
+ demo.launch(server_name="0.0.0.0", server_port=7860)