sam2ai commited on
Commit
73c09e2
·
verified ·
1 Parent(s): 7857025
Files changed (1) hide show
  1. app.py +227 -0
app.py ADDED
@@ -0,0 +1,227 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from langchain_community.vectorstores import Qdrant
2
+ from langchain_together import Together
3
+ from langchain_community.embeddings import HuggingFaceBgeEmbeddings
4
+ from qdrant_client import QdrantClient
5
+ from langchain_core.prompts import PromptTemplate
6
+ import os
7
+ from dotenv import load_dotenv
8
+
9
+ from langchain_community.vectorstores import Qdrant
10
+ from langchain.embeddings import HuggingFaceBgeEmbeddings
11
+ from langchain.docstore.document import Document
12
+ import pandas as pd
13
+
14
+ # formatting the data for ingestion
15
+ all_prods_df = pd.read_csv("data/cleaned_CSVIndian10000.csv")
16
+ all_prods_df = all_prods_df.fillna("")
17
+
18
+ product_metadata = all_prods_df.to_dict(orient="index")
19
+
20
+ texts = [str(v['name']) + "\n" + str(v['product_desc']) for k, v in product_metadata.items()]
21
+
22
+ metadatas = list(product_metadata.values())
23
+
24
+ docs = [Document(page_content=txt, metadata={"source": meta}) for txt, meta in zip(texts, metadatas)]
25
+
26
+ print("Data loaded.........")
27
+
28
+
29
+ # load the embedding model
30
+ model_name = "BAAI/bge-large-en"
31
+ model_kwargs = {"device": "cpu"}
32
+ encode_kwargs = {"normalize_embeddings": True}
33
+
34
+ embeddings = HuggingFaceBgeEmbeddings(
35
+ model_name=model_name,
36
+ model_kwargs=model_kwargs,
37
+ encode_kwargs=encode_kwargs
38
+ )
39
+
40
+ print("Embedding model loaded.........")
41
+
42
+
43
+ # load the vector store
44
+ # url="http://localhost:6333"
45
+ collection_name = "shopintel100v3"
46
+
47
+ vector_store = Qdrant.from_documents(
48
+ docs,
49
+ embeddings,
50
+ location=":memory:",
51
+ collection_name=collection_name,
52
+ prefer_grpc = False
53
+ )
54
+
55
+ print("Vector store loaded.........")
56
+
57
+
58
+ load_dotenv()
59
+
60
+ TOGETHER_API_KEY = os.getenv('TOGETHER_API_KEY')
61
+ print("api key: ", TOGETHER_API_KEY, type(TOGETHER_API_KEY))
62
+
63
+
64
+ # load the embedding model
65
+ # model_name = "BAAI/bge-large-en"
66
+ # model_kwargs = {"device": "cpu"}
67
+ # encode_kwargs = {"normalize_embeddings": True}
68
+
69
+ # embeddings = HuggingFaceBgeEmbeddings(
70
+ # model_name=model_name, model_kwargs=model_kwargs, encode_kwargs=encode_kwargs
71
+ # )
72
+ # print("embeddings loaded.............")
73
+
74
+ # url = "http://localhost:6333"
75
+ # collection_name = "shopintel100v3"
76
+
77
+ # client = QdrantClient(url=url, prefer_grpc=False)
78
+
79
+ # vector_store = Qdrant(
80
+ # client=client,
81
+ # collection_name=collection_name,
82
+ # embeddings=embeddings
83
+ # )
84
+
85
+ print("qdrant embeddings from docker were loaded.............")
86
+
87
+ llm = Together(
88
+ model="mistralai/Mixtral-8x7B-Instruct-v0.1",
89
+ temperature=0.2,
90
+ max_tokens=5000,
91
+ top_k=50,
92
+ together_api_key=TOGETHER_API_KEY
93
+ )
94
+
95
+
96
+ # query = "ASUS VivoBook 15 (2021)"
97
+ # result = vector_store.similarity_search_with_score(query=query, k=5)
98
+
99
+ # for i in result:
100
+ # doc, score = i
101
+ # print({"score": score, "content": doc.page_content, "metadata": doc.metadata["source"]})
102
+ # print("---------------------------------")
103
+
104
+ # function to retrieve products from qdrant
105
+
106
+ def retrieve_product(user_input, vector_store, k = 10):
107
+ result = vector_store.similarity_search_with_score(
108
+ query=user_input,
109
+ k=k
110
+ )
111
+
112
+ return result
113
+
114
+
115
+ # function to create context from user query
116
+
117
+ def create_context(user_input, vector_store):
118
+ result = retrieve_product(user_input, vector_store)
119
+
120
+ context = ""
121
+ for index, value in enumerate(result):
122
+ product = value
123
+ product_title = product[0].page_content # Extracting the page_content for each result which is a string
124
+ product_metadata = product[0].metadata["source"] # Extracting the metadata for each result which is a dictionary with key values
125
+
126
+ context += f"""
127
+ * Product {index + 1} -
128
+ - Product name : {product_metadata["name"]}
129
+ - Product price: {product_metadata["discount_price"]}
130
+ - Brief description of the product: {product_metadata["product_desc"]}
131
+ - Detailed description of the product: {product_metadata["about_this_item"]}
132
+ - Rating value (1.0 - 5.0): {product_metadata["ratings"]}
133
+ - Overall review: {product_metadata["overall_review"]}
134
+
135
+
136
+ """
137
+ # print(f"product_title: {type(product_title)}", product_title)
138
+ # print(f"product_metadata: {type(product_metadata)}", product_metadata)
139
+
140
+ return context
141
+
142
+
143
+
144
+ # prompt template for the mistral model
145
+
146
+ template = """You are a friendly, conversational AI ecommerce assistant. The context includes 5 ecommerce products.
147
+ Use only the following context, to find the answer to the questions from the customer.
148
+
149
+ Its very important that you follow the below instructions.
150
+ -Dont use general knowledge to answer the question
151
+ -If you dont find the answer from the context or the question is not related to the context, just say that you don't know the answer.
152
+ -By any chance the customer should not know you are referring to a context.
153
+
154
+
155
+ Context:
156
+
157
+ {context}
158
+
159
+
160
+ Question:
161
+ {question}
162
+
163
+
164
+ Helpful Answer:"""
165
+
166
+
167
+ import random
168
+ import gradio as gr
169
+
170
+ chat_history = []
171
+ def respond(message, chat_history):
172
+ global vector_store, template, llm
173
+ chatbot_response = ""
174
+ try:
175
+ context = create_context(message, vector_store)
176
+ print("context:-------------------------\n", context)
177
+ prompt = PromptTemplate(template=template, input_variables=["context", "question"])
178
+ prompt_formatted_str = prompt.format(
179
+ context=context,
180
+ question=message
181
+ )
182
+ output = llm.invoke(prompt_formatted_str)
183
+ chat_history.append((message, output))
184
+ return "", chat_history
185
+ except Exception as e:
186
+ print("Error:", e)
187
+ error_responses = [
188
+ "Sorry, I encountered an error while processing your request.",
189
+ "Oops, something went wrong. Please try again later.",
190
+ "I'm having trouble understanding that. Can you please rephrase?",
191
+ "It seems there was an issue. Let's try something else."
192
+ ]
193
+ error_message = random.choice(error_responses)
194
+ output = error_message
195
+ chat_history.append((message, output))
196
+ return "", chat_history
197
+
198
+ # Define the Gradio interface
199
+ # chatbot = gr.Chatbot(height=450)
200
+ # msg = gr.Textbox(label="What would you like to know?")
201
+ # gr.Interface(
202
+ # fn=respond,
203
+ # inputs=msg,
204
+ # outputs=gr.Textbox(label="Response"),
205
+ # title="Conversational AI Chatbot",
206
+ # ).launch(
207
+ # share=True,
208
+ # )
209
+
210
+ # # Define Gradio components
211
+ with gr.Blocks() as demo:
212
+ chat_history = []
213
+ chatbot = gr.Chatbot(height=450)
214
+ msg = gr.Textbox(label="What would you like to know?")
215
+ btn = gr.Button("Submit")
216
+ clear = gr.ClearButton(value="Clear Console", components=[msg, chatbot])
217
+
218
+ # Button click event to respond to the message
219
+ btn.click(respond, inputs=[msg, chatbot], outputs=[msg, chatbot])
220
+
221
+ # Clear button event to clear the console
222
+ msg.submit(respond, inputs=[msg, chatbot], outputs=[msg, chatbot])
223
+
224
+ # Define the Gradio interface
225
+ gr.close_all()
226
+
227
+ demo.launch()