ericbanzuzi commited on
Commit
d984f4c
1 Parent(s): 2adcb82
Files changed (1) hide show
  1. app.py +8 -6
app.py CHANGED
@@ -1,11 +1,15 @@
1
  import gradio as gr
 
2
  from huggingface_hub import InferenceClient
3
 
4
  """
5
  For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
6
  """
7
- client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
8
-
 
 
 
9
 
10
  def respond(
11
  message,
@@ -26,8 +30,7 @@ def respond(
26
  messages.append({"role": "user", "content": message})
27
 
28
  response = ""
29
-
30
- for message in client.chat_completion(
31
  messages,
32
  max_tokens=max_tokens,
33
  stream=True,
@@ -39,7 +42,6 @@ def respond(
39
  response += token
40
  yield response
41
 
42
-
43
  """
44
  For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
45
  """
@@ -61,4 +63,4 @@ demo = gr.ChatInterface(
61
 
62
 
63
  if __name__ == "__main__":
64
- demo.launch()
 
1
  import gradio as gr
2
+ from llama_cpp import Llama
3
  from huggingface_hub import InferenceClient
4
 
5
  """
6
  For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
7
  """
8
+ llm = Llama.from_pretrained(
9
+ repo_id="rcarioniporras/model",
10
+ filename="*.gguf",
11
+ verbose=False
12
+ )
13
 
14
  def respond(
15
  message,
 
30
  messages.append({"role": "user", "content": message})
31
 
32
  response = ""
33
+ for message in llm.chat_completion(
 
34
  messages,
35
  max_tokens=max_tokens,
36
  stream=True,
 
42
  response += token
43
  yield response
44
 
 
45
  """
46
  For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
47
  """
 
63
 
64
 
65
  if __name__ == "__main__":
66
+ demo.launch()