Ahil1991 commited on
Commit
127d2d3
·
verified ·
1 Parent(s): b06dd28

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +14 -10
app.py CHANGED
@@ -1,5 +1,6 @@
1
  import gradio as gr
2
  from llama_cpp import Llama
 
3
 
4
  # Load your LLaMA model
5
  llm = Llama.from_pretrained(
@@ -7,7 +8,7 @@ llm = Llama.from_pretrained(
7
  filename="Bee-V.01.gguf",
8
  )
9
 
10
- # Function to handle user input and generate a chat completion
11
  def chat_with_model(user_input):
12
  messages = [
13
  {
@@ -17,20 +18,23 @@ def chat_with_model(user_input):
17
  ]
18
 
19
  # Get response from the model
20
- response = llm.create_chat_completion(
21
- messages=messages
22
- )
23
 
24
- # Extract the content from the response
25
- return response['choices'][0]['message']['content']
 
 
 
 
 
26
 
27
- # Create a Gradio interface
28
  iface = gr.Interface(
29
  fn=chat_with_model, # The function to handle input
30
  inputs="text", # Input: text from user
31
- outputs="text", # Output: response as text
32
- title="Chat with Bee 8B Model", # Title for the Gradio app
33
- description="Ask anything and get responses from Bee!"
34
  )
35
 
36
  # Launch the Gradio interface
 
1
  import gradio as gr
2
  from llama_cpp import Llama
3
+ import time
4
 
5
  # Load your LLaMA model
6
  llm = Llama.from_pretrained(
 
8
  filename="Bee-V.01.gguf",
9
  )
10
 
11
+ # Function to generate a "typing" effect by yielding each part of the response
12
  def chat_with_model(user_input):
13
  messages = [
14
  {
 
18
  ]
19
 
20
  # Get response from the model
21
+ response = llm.create_chat_completion(messages=messages)
 
 
22
 
23
+ # Extract the full content from the response
24
+ full_response = response['choices'][0]['message']['content']
25
+
26
+ # Stream the response like it's typing in real-time
27
+ for i in range(1, len(full_response) + 1):
28
+ yield full_response[:i] # Yield progressively larger chunks of the response
29
+ time.sleep(0.05) # Simulate typing speed (adjust as needed)
30
 
31
+ # Create a Gradio interface that streams the response
32
  iface = gr.Interface(
33
  fn=chat_with_model, # The function to handle input
34
  inputs="text", # Input: text from user
35
+ outputs="text", # Output: streamed response as text
36
+ title="Chat with Bee 8B Model", # Title for the Gradio app
37
+ description="Ask anything and get responses from Bee in real-time!"
38
  )
39
 
40
  # Launch the Gradio interface