Spaces:

Ahil1991
/

Bee_8B_HF_Space

Sleeping

Ahil1991 commited on Sep 15, 2024

Commit

127d2d3

verified ·

1 Parent(s): b06dd28

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -1,5 +1,6 @@
 import gradio as gr
 from llama_cpp import Llama
 # Load your LLaMA model
 llm = Llama.from_pretrained(
@@ -7,7 +8,7 @@ llm = Llama.from_pretrained(
     filename="Bee-V.01.gguf",
 )
-# Function to handle user input and generate a chat completion
 def chat_with_model(user_input):
     messages = [
         {
@@ -17,20 +18,23 @@ def chat_with_model(user_input):
     ]
     # Get response from the model
-    response = llm.create_chat_completion(
-        messages=messages
-    )
-    # Extract the content from the response
-    return response['choices'][0]['message']['content']
-# Create a Gradio interface
 iface = gr.Interface(
     fn=chat_with_model,              # The function to handle input
     inputs="text",                   # Input: text from user
-    outputs="text",                  # Output: response as text
-    title="Chat with Bee 8B Model",   # Title for the Gradio app
-    description="Ask anything and get responses from Bee!"
 )
 # Launch the Gradio interface

 import gradio as gr
 from llama_cpp import Llama
+import time
 # Load your LLaMA model
 llm = Llama.from_pretrained(
     filename="Bee-V.01.gguf",
 )
+# Function to generate a "typing" effect by yielding each part of the response
 def chat_with_model(user_input):
     messages = [
         {
     ]
     # Get response from the model
+    response = llm.create_chat_completion(messages=messages)
+    # Extract the full content from the response
+    full_response = response['choices'][0]['message']['content']
+    # Stream the response like it's typing in real-time
+    for i in range(1, len(full_response) + 1):
+        yield full_response[:i]  # Yield progressively larger chunks of the response
+        time.sleep(0.05)  # Simulate typing speed (adjust as needed)
+# Create a Gradio interface that streams the response
 iface = gr.Interface(
     fn=chat_with_model,              # The function to handle input
     inputs="text",                   # Input: text from user
+    outputs="text",                  # Output: streamed response as text
+    title="Chat with Bee 8B Model",  # Title for the Gradio app
+    description="Ask anything and get responses from Bee in real-time!"
 )
 # Launch the Gradio interface