Threatthriver commited on
Commit
c51870d
1 Parent(s): 66980c9

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +8 -40
app.py CHANGED
@@ -4,11 +4,9 @@ from huggingface_hub import InferenceClient
4
  # Define available models and their Hugging Face IDs
5
  available_models = {
6
  "Zephyr 7B Beta": "HuggingFaceH4/zephyr-7b-beta",
7
- "Llama 2 70B Chat": "meta-llama/Llama-2-70b-chat",
8
  # Add more models here as needed
9
  }
10
 
11
-
12
  def respond(
13
  message: str,
14
  history: list[tuple[str, str]],
@@ -20,37 +18,17 @@ def respond(
20
  ):
21
  """
22
  Generates a response from the AI model based on the user's message and chat history.
23
-
24
- Args:
25
- message (str): The user's input message.
26
- history (list): A list of tuples representing the conversation history (user, assistant).
27
- system_message (str): A system-level message guiding the AI's behavior.
28
- max_tokens (int): The maximum number of tokens for the output.
29
- temperature (float): Sampling temperature for controlling the randomness.
30
- top_p (float): Top-p (nucleus sampling) for controlling diversity.
31
- model_name (str): The name of the model to use.
32
-
33
- Yields:
34
- str: The AI's response as it is generated.
35
  """
36
- # Initialize the InferenceClient with the selected model
37
  client = InferenceClient(model=available_models[model_name])
38
 
39
- # Prepare the conversation history for the API call
40
  messages = [{"role": "system", "content": system_message}]
41
-
42
  for user_input, assistant_response in history:
43
  messages.append({"role": "user", "content": user_input})
44
  messages.append({"role": "assistant", "content": assistant_response})
45
-
46
- # Add the latest user message to the conversation
47
  messages.append({"role": "user", "content": message})
48
 
49
- # Initialize an empty response
50
  streamed_response = ""
51
-
52
  try:
53
- # Generate a response from the model with streaming
54
  for response in client.chat_completion(
55
  messages=messages,
56
  max_tokens=max_tokens,
@@ -61,15 +39,18 @@ def respond(
61
  chunk = response.choices[0].delta.get("content", "")
62
  streamed_response += chunk
63
  yield streamed_response
64
-
65
  except Exception as e:
66
  yield f"**Error:** {str(e)}"
67
 
 
 
 
 
 
 
 
68
 
69
  def show_updates_and_respond(history, system_message, max_tokens, temperature, top_p, model_name):
70
- """
71
- Shows the latest updates and then generates a response from the model based on the updates.
72
- """
73
  history.append(("User: ", "Show me the latest updates"))
74
  yield from respond(
75
  message="Show me the latest updates",
@@ -92,19 +73,6 @@ def show_updates_and_respond(history, system_message, max_tokens, temperature, t
92
  model_name=model_name,
93
  )
94
 
95
-
96
- # Latest updates (you can replace this with actual update information)
97
- latest_updates = """
98
- **Chatbot - Latest Updates:**
99
-
100
- * **Multiple Model Support:** You can now choose from different models like Zephyr 7B and Llama 2.
101
- * **Improved Error Handling:** The chatbot now provides clearer error messages if something goes wrong.
102
- * **Enhanced System Message Input:** You can now provide multi-line system messages to guide the AI's behavior.
103
- * **Optimized Temperature Range:** The temperature slider's range has been adjusted for better control over randomness.
104
- * **Robust Chunk Handling:** The chatbot now handles streamed responses more reliably, even if some chunks are missing content.
105
- """
106
-
107
-
108
  # Define the Gradio interface with the Blocks context
109
  with gr.Blocks(css=".gradio-container {border: none;}") as demo:
110
  chat_history = gr.State([]) # Initialize an empty chat history state
@@ -149,4 +117,4 @@ with gr.Blocks(css=".gradio-container {border: none;}") as demo:
149
 
150
  # Launch the Gradio interface in full screen
151
  if __name__ == "__main__":
152
- demo.launch(share=True, fullscreen=True)
 
4
  # Define available models and their Hugging Face IDs
5
  available_models = {
6
  "Zephyr 7B Beta": "HuggingFaceH4/zephyr-7b-beta",
 
7
  # Add more models here as needed
8
  }
9
 
 
10
  def respond(
11
  message: str,
12
  history: list[tuple[str, str]],
 
18
  ):
19
  """
20
  Generates a response from the AI model based on the user's message and chat history.
 
 
 
 
 
 
 
 
 
 
 
 
21
  """
 
22
  client = InferenceClient(model=available_models[model_name])
23
 
 
24
  messages = [{"role": "system", "content": system_message}]
 
25
  for user_input, assistant_response in history:
26
  messages.append({"role": "user", "content": user_input})
27
  messages.append({"role": "assistant", "content": assistant_response})
 
 
28
  messages.append({"role": "user", "content": message})
29
 
 
30
  streamed_response = ""
 
31
  try:
 
32
  for response in client.chat_completion(
33
  messages=messages,
34
  max_tokens=max_tokens,
 
39
  chunk = response.choices[0].delta.get("content", "")
40
  streamed_response += chunk
41
  yield streamed_response
 
42
  except Exception as e:
43
  yield f"**Error:** {str(e)}"
44
 
45
+ # Latest updates
46
+ latest_updates = """
47
+ **Chatbot - Latest Updates:**
48
+
49
+ * **Multiple Model Support:** You can now choose from different models like Zephyr 7B.
50
+ * **Improved Error Handling:** The chatbot now provides clearer error messages if something goes wrong.
51
+ """
52
 
53
  def show_updates_and_respond(history, system_message, max_tokens, temperature, top_p, model_name):
 
 
 
54
  history.append(("User: ", "Show me the latest updates"))
55
  yield from respond(
56
  message="Show me the latest updates",
 
73
  model_name=model_name,
74
  )
75
 
 
 
 
 
 
 
 
 
 
 
 
 
 
76
  # Define the Gradio interface with the Blocks context
77
  with gr.Blocks(css=".gradio-container {border: none;}") as demo:
78
  chat_history = gr.State([]) # Initialize an empty chat history state
 
117
 
118
  # Launch the Gradio interface in full screen
119
  if __name__ == "__main__":
120
+ demo.launch(share=True, fullscreen=True)