Brifeb commited on
Commit
69e404b
·
1 Parent(s): 019d31e
Files changed (2) hide show
  1. app.py +74 -59
  2. requirements.txt +2 -1
app.py CHANGED
@@ -1,63 +1,78 @@
 
 
 
 
 
 
 
1
  import gradio as gr
2
- from huggingface_hub import InferenceClient
3
-
4
- """
5
- For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
6
- """
7
- client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
8
-
9
-
10
- def respond(
11
- message,
12
- history: list[tuple[str, str]],
13
- system_message,
14
- max_tokens,
15
- temperature,
16
- top_p,
17
- ):
18
- messages = [{"role": "system", "content": system_message}]
19
-
20
- for val in history:
21
- if val[0]:
22
- messages.append({"role": "user", "content": val[0]})
23
- if val[1]:
24
- messages.append({"role": "assistant", "content": val[1]})
25
-
26
- messages.append({"role": "user", "content": message})
27
-
28
- response = ""
29
-
30
- for message in client.chat_completion(
31
- messages,
32
- max_tokens=max_tokens,
33
- stream=True,
34
- temperature=temperature,
35
- top_p=top_p,
36
- ):
37
- token = message.choices[0].delta.content
38
-
39
- response += token
40
- yield response
41
-
42
- """
43
- For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
44
- """
45
- demo = gr.ChatInterface(
46
- respond,
47
- additional_inputs=[
48
- gr.Textbox(value="You are a proffesional Chatbot.", label="System message"),
49
- gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
50
- gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
51
- gr.Slider(
52
- minimum=0.1,
53
- maximum=1.0,
54
- value=0.95,
55
- step=0.05,
56
- label="Top-p (nucleus sampling)",
57
- ),
58
- ],
59
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
60
 
 
 
61
 
62
- if __name__ == "__main__":
63
- demo.launch()
 
1
+ from llama_index.core import VectorStoreIndex, SimpleDirectoryReader, ChatPromptTemplate
2
+ from llama_index.llms.huggingface_api import HuggingFaceInferenceAPI
3
+ from dotenv import load_dotenv
4
+ from llama_index.embeddings.huggingface import HuggingFaceEmbedding
5
+ from llama_index.core.memory import ChatMemoryBuffer
6
+ from llama_index.core import Settings
7
+ import os
8
  import gradio as gr
9
+
10
+
11
+ Settings.llm = HuggingFaceInferenceAPI(
12
+ model_name="HuggingFaceH4/zephyr-7b-beta",
13
+ tokenizer_name="HuggingFaceH4/zephyr-7b-beta",
14
+ context_window=3000,
15
+ max_new_tokens=512,
16
+ generate_kwargs={"temperature": 0.1},
17
+ stream=True
18
+ )
19
+
20
+ Settings.embed_model = HuggingFaceEmbedding(
21
+ model_name="BAAI/bge-small-en-v1.5"
22
+ )
23
+
24
+ # Define the directory of data
25
+ DATA_DIR = "data"
26
+
27
+ # Ensure data directory exists
28
+ os.makedirs(DATA_DIR, exist_ok=True)
29
+
30
+ # Load documents
31
+ documents = SimpleDirectoryReader(DATA_DIR).load_data()
32
+
33
+ # Create Index
34
+ index = VectorStoreIndex.from_documents(documents)
35
+
36
+ chat_text_qa_msgs = [
37
+ (
38
+ "user",
39
+ """You are a Q&A assistant named PEDEEP. For all other inquiries, your main goal is to provide answers as accurately as possible, based on the instructions and context you have been given. If a question does not match the provided context or is outside the scope of the document, kindly advise the user to ask questions within the context of the document.
40
+ Context:
41
+ {context_str}
42
+ Question:
43
+ {query_str}
44
+ """
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
45
  )
46
+ ]
47
+ text_qa_template = ChatPromptTemplate.from_messages(chat_text_qa_msgs)
48
+
49
+ # Initialize Chat Memory Buffer for Conversation Memory
50
+ memory = ChatMemoryBuffer.from_defaults(token_limit=3900)
51
+
52
+ # Create Chat Engine with LLM
53
+ chat_engine = index.as_chat_engine(
54
+ text_qa_template=text_qa_template,
55
+ memory=memory,
56
+ chat_mode="condense_question" # Chooses mode suit for your use case
57
+ )
58
+
59
+ ### Gradio Interface ###
60
+
61
+ def chat_with_ollama(message, history):
62
+ # debug print memory
63
+ # print(memory.get_all())
64
+
65
+ if history == []:
66
+ print("# cleared history, resetting chatbot state")
67
+ chat_engine.reset()
68
+
69
+ # HuggingFaceInferenceAPI not implemented stream yet
70
+
71
+ return chat_engine.chat(message).response
72
+
73
 
74
+ chatbot = gr.ChatInterface(
75
+ chat_with_ollama, title="(UUD45) Document-Based Chatbot with LLM")
76
 
77
+ chatbot.launch()
78
+ # chatbot.launch(server_name="xx.xx.xx.xx", server_port=7860) # set IP and port for deployment
requirements.txt CHANGED
@@ -1,4 +1,5 @@
1
  gradio
2
  llama-index
3
  llama-index-llms-ollama
4
- llama-index-embeddings-huggingface
 
 
1
  gradio
2
  llama-index
3
  llama-index-llms-ollama
4
+ llama-index-embeddings-huggingface
5
+ llama-index-llms-huggingface-api