change to sync
Browse files
main.py
CHANGED
@@ -154,63 +154,49 @@ class ChatRequest(BaseModel):
|
|
154 |
user_id: str = Field(..., description="Unique identifier for the user")
|
155 |
enable_followup: bool = Field(default=False, description="Flag to enable follow-up questions")
|
156 |
|
157 |
-
|
158 |
if x_api_key != CHAT_AUTH_KEY:
|
159 |
raise HTTPException(status_code=403, detail="Invalid API key")
|
160 |
return x_api_key
|
161 |
|
162 |
-
|
163 |
"""
|
164 |
-
Make a streaming request to the LLM service.
|
165 |
"""
|
166 |
try:
|
167 |
-
|
168 |
-
|
169 |
-
|
170 |
-
|
171 |
-
|
172 |
-
|
173 |
-
|
174 |
-
|
175 |
-
|
176 |
-
|
177 |
-
|
178 |
-
if
|
179 |
-
raise HTTPException(status_code=response.status_code, detail="Error from LLM service")
|
180 |
-
|
181 |
-
async for chunk in response.aiter_text():
|
182 |
yield chunk
|
183 |
-
except
|
184 |
logger.error(f"HTTP error occurred while making LLM request: {str(e)}")
|
185 |
raise HTTPException(status_code=500, detail=f"HTTP error occurred while making LLM request: {str(e)}")
|
186 |
except Exception as e:
|
187 |
logger.error(f"Unexpected error occurred while making LLM request: {str(e)}")
|
188 |
raise HTTPException(status_code=500, detail=f"Unexpected error occurred while making LLM request: {str(e)}")
|
189 |
|
190 |
-
|
191 |
@app.post("/chat/", response_class=StreamingResponse, tags=["Chat"])
|
192 |
async def chat(request: ChatRequest, background_tasks: BackgroundTasks, api_key: str = Depends(get_api_key)):
|
193 |
-
"""
|
194 |
-
Chat endpoint that uses embeddings search and LLM for response generation.
|
195 |
-
"""
|
196 |
try:
|
197 |
-
# Load embeddings for the specified index
|
198 |
document_list = load_embeddings(request.index_id)
|
199 |
-
|
200 |
-
# Perform embeddings search
|
201 |
-
search_results = embeddings.search(request.query, 5) # Get top 5 relevant results
|
202 |
context = "\n".join([document_list[idx[0]] for idx in search_results])
|
203 |
|
204 |
-
# Create RAG prompt
|
205 |
rag_prompt = f"Based on the following context, please answer the user's question:\n\nContext:\n{context}\n\nUser's question: {request.query}\n\nAnswer:"
|
206 |
system_prompt = "You are a helpful assistant tasked with providing answers using the context provided"
|
207 |
|
208 |
-
# Generate conversation_id if not provided
|
209 |
conversation_id = request.conversation_id or str(uuid.uuid4())
|
210 |
|
211 |
if request.enable_followup:
|
212 |
-
# Prepare the request for the LLM service
|
213 |
-
pass
|
214 |
llm_request = {
|
215 |
"query": rag_prompt,
|
216 |
"model_id": 'openai/gpt-4o-mini',
|
@@ -218,7 +204,6 @@ async def chat(request: ChatRequest, background_tasks: BackgroundTasks, api_key:
|
|
218 |
"user_id": request.user_id
|
219 |
}
|
220 |
endpoint_url = "https://pvanand-general-chat.hf.space/v2/followup-agent"
|
221 |
-
|
222 |
else:
|
223 |
llm_request = {
|
224 |
"prompt": rag_prompt,
|
@@ -230,17 +215,13 @@ async def chat(request: ChatRequest, background_tasks: BackgroundTasks, api_key:
|
|
230 |
endpoint_url = "https://pvanand-audio-chat.hf.space/llm-agent"
|
231 |
|
232 |
logger.info(f"Starting chat response generation for user: {request.user_id} Full request: {llm_request}")
|
233 |
-
|
|
|
234 |
full_response = ""
|
235 |
-
|
236 |
full_response += chunk
|
237 |
yield chunk
|
238 |
-
logger.info(f"Finished chat response generation for user: {request.user_id} Full response{full_response}")
|
239 |
-
|
240 |
-
# Here you might want to add logic to save the conversation or perform other background tasks
|
241 |
-
# For example:
|
242 |
-
# background_tasks.add_task(save_conversation, request.user_id, conversation_id, request.query, full_response)
|
243 |
-
|
244 |
|
245 |
return StreamingResponse(response_generator(), media_type="text/event-stream")
|
246 |
|
@@ -249,6 +230,7 @@ async def chat(request: ChatRequest, background_tasks: BackgroundTasks, api_key:
|
|
249 |
raise HTTPException(status_code=500, detail=f"Error in chat endpoint: {str(e)}")
|
250 |
|
251 |
|
|
|
252 |
@app.on_event("startup")
|
253 |
async def startup_event():
|
254 |
check_and_index_csv_files()
|
|
|
154 |
user_id: str = Field(..., description="Unique identifier for the user")
|
155 |
enable_followup: bool = Field(default=False, description="Flag to enable follow-up questions")
|
156 |
|
157 |
+
def get_api_key(x_api_key: str = Header(...)) -> str:
|
158 |
if x_api_key != CHAT_AUTH_KEY:
|
159 |
raise HTTPException(status_code=403, detail="Invalid API key")
|
160 |
return x_api_key
|
161 |
|
162 |
+
def stream_llm_request(api_key: str, llm_request: dict, endpoint_url: str):
|
163 |
"""
|
164 |
+
Make a streaming request to the LLM service using requests.
|
165 |
"""
|
166 |
try:
|
167 |
+
headers = {
|
168 |
+
"accept": "text/event-stream",
|
169 |
+
"X-API-Key": api_key,
|
170 |
+
"Content-Type": "application/json"
|
171 |
+
}
|
172 |
+
|
173 |
+
with requests.post(endpoint_url, headers=headers, json=llm_request, stream=True) as response:
|
174 |
+
if response.status_code != 200:
|
175 |
+
raise HTTPException(status_code=response.status_code, detail="Error from LLM service")
|
176 |
+
|
177 |
+
for chunk in response.iter_content(chunk_size=None, decode_unicode=True):
|
178 |
+
if chunk:
|
|
|
|
|
|
|
179 |
yield chunk
|
180 |
+
except requests.RequestException as e:
|
181 |
logger.error(f"HTTP error occurred while making LLM request: {str(e)}")
|
182 |
raise HTTPException(status_code=500, detail=f"HTTP error occurred while making LLM request: {str(e)}")
|
183 |
except Exception as e:
|
184 |
logger.error(f"Unexpected error occurred while making LLM request: {str(e)}")
|
185 |
raise HTTPException(status_code=500, detail=f"Unexpected error occurred while making LLM request: {str(e)}")
|
186 |
|
|
|
187 |
@app.post("/chat/", response_class=StreamingResponse, tags=["Chat"])
|
188 |
async def chat(request: ChatRequest, background_tasks: BackgroundTasks, api_key: str = Depends(get_api_key)):
|
|
|
|
|
|
|
189 |
try:
|
|
|
190 |
document_list = load_embeddings(request.index_id)
|
191 |
+
search_results = embeddings.search(request.query, 5)
|
|
|
|
|
192 |
context = "\n".join([document_list[idx[0]] for idx in search_results])
|
193 |
|
|
|
194 |
rag_prompt = f"Based on the following context, please answer the user's question:\n\nContext:\n{context}\n\nUser's question: {request.query}\n\nAnswer:"
|
195 |
system_prompt = "You are a helpful assistant tasked with providing answers using the context provided"
|
196 |
|
|
|
197 |
conversation_id = request.conversation_id or str(uuid.uuid4())
|
198 |
|
199 |
if request.enable_followup:
|
|
|
|
|
200 |
llm_request = {
|
201 |
"query": rag_prompt,
|
202 |
"model_id": 'openai/gpt-4o-mini',
|
|
|
204 |
"user_id": request.user_id
|
205 |
}
|
206 |
endpoint_url = "https://pvanand-general-chat.hf.space/v2/followup-agent"
|
|
|
207 |
else:
|
208 |
llm_request = {
|
209 |
"prompt": rag_prompt,
|
|
|
215 |
endpoint_url = "https://pvanand-audio-chat.hf.space/llm-agent"
|
216 |
|
217 |
logger.info(f"Starting chat response generation for user: {request.user_id} Full request: {llm_request}")
|
218 |
+
|
219 |
+
def response_generator():
|
220 |
full_response = ""
|
221 |
+
for chunk in stream_llm_request(api_key, llm_request, endpoint_url):
|
222 |
full_response += chunk
|
223 |
yield chunk
|
224 |
+
logger.info(f"Finished chat response generation for user: {request.user_id} Full response: {full_response}")
|
|
|
|
|
|
|
|
|
|
|
225 |
|
226 |
return StreamingResponse(response_generator(), media_type="text/event-stream")
|
227 |
|
|
|
230 |
raise HTTPException(status_code=500, detail=f"Error in chat endpoint: {str(e)}")
|
231 |
|
232 |
|
233 |
+
|
234 |
@app.on_event("startup")
|
235 |
async def startup_event():
|
236 |
check_and_index_csv_files()
|