DJStomp commited on
Commit
5d7fdef
·
verified ·
1 Parent(s): 8393716

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +20 -17
app.py CHANGED
@@ -2,14 +2,14 @@ import os
2
  import gradio as gr
3
  from huggingface_hub import InferenceClient
4
 
5
- # Load HF Token from environment variables
6
  hf_token = os.getenv("HF_TOKEN")
7
  if not hf_token:
8
  raise ValueError("HF_TOKEN is not set in environment variables!")
9
 
10
- # Initialize InferenceClient
11
  client = InferenceClient(model="huihui-ai/Llama-3.3-70B-Instruct-abliterated", token=hf_token)
12
 
 
13
  def respond(
14
  message,
15
  history: list[tuple[str, str]],
@@ -21,33 +21,36 @@ def respond(
21
  # Prepare messages for the API
22
  messages = [{"role": "system", "content": system_message}]
23
 
24
- for val in history:
25
- if val[0]:
26
- messages.append({"role": "user", "content": val[0]})
27
- if val[1]:
28
- messages.append({"role": "assistant", "content": val[1]})
29
 
30
  messages.append({"role": "user", "content": message})
31
 
32
  response = ""
33
 
34
  try:
35
- # Use 'messages' instead of 'inputs'
36
- for message in client.chat_completion(
37
  model="huihui-ai/Llama-3.3-70B-Instruct-abliterated",
38
- messages=messages, # Correct argument
39
- parameters={
40
- "max_tokens": max_tokens,
41
- "temperature": temperature,
42
- "top_p": top_p,
43
- },
44
  stream=True,
45
- ):
 
 
 
46
  token = message.choices[0].delta.content
47
  response += token
48
  yield response
 
49
  except Exception as e:
50
- yield f"Error: {str(e)}"
 
51
 
52
  demo = gr.ChatInterface(
53
  respond,
 
2
  import gradio as gr
3
  from huggingface_hub import InferenceClient
4
 
 
5
  hf_token = os.getenv("HF_TOKEN")
6
  if not hf_token:
7
  raise ValueError("HF_TOKEN is not set in environment variables!")
8
 
9
+
10
  client = InferenceClient(model="huihui-ai/Llama-3.3-70B-Instruct-abliterated", token=hf_token)
11
 
12
+
13
  def respond(
14
  message,
15
  history: list[tuple[str, str]],
 
21
  # Prepare messages for the API
22
  messages = [{"role": "system", "content": system_message}]
23
 
24
+ for user_msg, assistant_msg in history:
25
+ if user_msg:
26
+ messages.append({"role": "user", "content": user_msg})
27
+ if assistant_msg:
28
+ messages.append({"role": "assistant", "content": assistant_msg})
29
 
30
  messages.append({"role": "user", "content": message})
31
 
32
  response = ""
33
 
34
  try:
35
+ # Call the chat_completion method with the correct parameters
36
+ completion = client.chat_completion(
37
  model="huihui-ai/Llama-3.3-70B-Instruct-abliterated",
38
+ messages=messages,
39
+ max_tokens=max_tokens,
40
+ temperature=temperature,
41
+ top_p=top_p,
 
 
42
  stream=True,
43
+ )
44
+
45
+ # Handle streaming responses
46
+ for message in completion:
47
  token = message.choices[0].delta.content
48
  response += token
49
  yield response
50
+
51
  except Exception as e:
52
+ yield f"Error: {str(e)}"
53
+
54
 
55
  demo = gr.ChatInterface(
56
  respond,