Spaces:

Brifeb
/

UUD45

Sleeping

App Files Files Community

Brifeb commited on Jul 11, 2024

Commit

69e404b

1 Parent(s): 019d31e

fix req

Browse files

Files changed (2) hide show

app.py +74 -59
requirements.txt +2 -1

app.py CHANGED Viewed

@@ -1,63 +1,78 @@
 import gradio as gr
-from huggingface_hub import InferenceClient
-"""
-For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
-"""
-client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
-def respond(
-    message,
-    history: list[tuple[str, str]],
-    system_message,
-    max_tokens,
-    temperature,
-    top_p,
-):
-    messages = [{"role": "system", "content": system_message}]
-    for val in history:
-        if val[0]:
-            messages.append({"role": "user", "content": val[0]})
-        if val[1]:
-            messages.append({"role": "assistant", "content": val[1]})
-    messages.append({"role": "user", "content": message})
-    response = ""
-    for message in client.chat_completion(
-        messages,
-        max_tokens=max_tokens,
-        stream=True,
-        temperature=temperature,
-        top_p=top_p,
-    ):
-        token = message.choices[0].delta.content
-        response += token
-        yield response
-"""
-For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
-"""
-demo = gr.ChatInterface(
-    respond,
-    additional_inputs=[
-        gr.Textbox(value="You are a proffesional Chatbot.", label="System message"),
-        gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
-        gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
-        gr.Slider(
-            minimum=0.1,
-            maximum=1.0,
-            value=0.95,
-            step=0.05,
-            label="Top-p (nucleus sampling)",
-        ),
-    ],
 )
-if __name__ == "__main__":
-    demo.launch()

+from llama_index.core import VectorStoreIndex, SimpleDirectoryReader, ChatPromptTemplate
+from llama_index.llms.huggingface_api import HuggingFaceInferenceAPI
+from dotenv import load_dotenv
+from llama_index.embeddings.huggingface import HuggingFaceEmbedding
+from llama_index.core.memory import ChatMemoryBuffer
+from llama_index.core import Settings
+import os
 import gradio as gr
+Settings.llm = HuggingFaceInferenceAPI(
+    model_name="HuggingFaceH4/zephyr-7b-beta",
+    tokenizer_name="HuggingFaceH4/zephyr-7b-beta",
+    context_window=3000,
+    max_new_tokens=512,
+    generate_kwargs={"temperature": 0.1},
+    stream=True
+)
+Settings.embed_model = HuggingFaceEmbedding(
+    model_name="BAAI/bge-small-en-v1.5"
+)
+# Define the directory of    data
+DATA_DIR = "data"
+# Ensure data directory exists
+os.makedirs(DATA_DIR, exist_ok=True)
+# Load documents
+documents = SimpleDirectoryReader(DATA_DIR).load_data()
+# Create Index
+index = VectorStoreIndex.from_documents(documents)
+chat_text_qa_msgs = [
+(
+    "user",
+    """You are a Q&A assistant named PEDEEP. For all other inquiries, your main goal is to provide answers as accurately as possible, based on the instructions and context you have been given. If a question does not match the provided context or is outside the scope of the document, kindly advise the user to ask questions within the context of the document.
+    Context:
+    {context_str}
+    Question:
+    {query_str}
+    """
 )
+]
+text_qa_template = ChatPromptTemplate.from_messages(chat_text_qa_msgs)
+# Initialize Chat Memory Buffer for Conversation Memory
+memory = ChatMemoryBuffer.from_defaults(token_limit=3900)
+# Create Chat Engine with LLM
+chat_engine = index.as_chat_engine(
+    text_qa_template=text_qa_template,
+    memory=memory,
+    chat_mode="condense_question"  # Chooses mode suit for your use case
+)
+### Gradio Interface ###
+def chat_with_ollama(message, history):
+    # debug print memory
+    # print(memory.get_all())
+    if history == []:
+        print("# cleared history, resetting chatbot state")
+        chat_engine.reset()
+    # HuggingFaceInferenceAPI not implemented stream yet
+    return chat_engine.chat(message).response
+chatbot = gr.ChatInterface(
+    chat_with_ollama, title="(UUD45) Document-Based Chatbot with LLM")
+chatbot.launch()
+# chatbot.launch(server_name="xx.xx.xx.xx", server_port=7860)  # set IP and port for deployment

requirements.txt CHANGED Viewed

@@ -1,4 +1,5 @@
 gradio
 llama-index
 llama-index-llms-ollama
-llama-index-embeddings-huggingface

 gradio
 llama-index
 llama-index-llms-ollama
+llama-index-embeddings-huggingface
+llama-index-llms-huggingface-api