Spaces:

pvanand
/

rag-chat

Sleeping

App Files Files Community

pvanand commited on Aug 17, 2024

Commit

bc15143

verified ·

1 Parent(s): d9309f4

add streaming response

Browse files

Files changed (1) hide show

main.py +32 -28

main.py CHANGED Viewed

@@ -1,7 +1,8 @@
-from fastapi import FastAPI, HTTPException, Query, Path, Header, Depends
 from fastapi.middleware.cors import CORSMiddleware
 from pydantic import BaseModel, Field
-from typing import List, Optional, Dict
 import json
 import os
 import logging
@@ -10,7 +11,7 @@ import pandas as pd
 import glob
 import uuid
 import httpx
 # Set up logging
 logging.basicConfig(level=logging.INFO)
 logger = logging.getLogger(__name__)
@@ -157,26 +158,27 @@ async def get_api_key(x_api_key: str = Header(...)) -> str:
         raise HTTPException(status_code=403, detail="Invalid API key")
     return x_api_key
-async def make_llm_request(api_key: str, llm_request: Dict[str, str]) -> Dict:
     """
-    Make a request to the LLM service.
     """
     try:
         async with httpx.AsyncClient() as client:
-            response = await client.post(
                 "https://pvanand-audio-chat.hf.space/llm-agent",
                 headers={
-                    "accept": "application/json",
                     "X-API-Key": api_key,
                     "Content-Type": "application/json"
                 },
                 json=llm_request
-            )
-        if response.status_code != 200:
-            raise HTTPException(status_code=response.status_code, detail="Error from LLM service")
-        return response.json()
     except httpx.HTTPError as e:
         logger.error(f"HTTP error occurred while making LLM request: {str(e)}")
         raise HTTPException(status_code=500, detail=f"HTTP error occurred while making LLM request: {str(e)}")
@@ -184,9 +186,8 @@ async def make_llm_request(api_key: str, llm_request: Dict[str, str]) -> Dict:
         logger.error(f"Unexpected error occurred while making LLM request: {str(e)}")
         raise HTTPException(status_code=500, detail=f"Unexpected error occurred while making LLM request: {str(e)}")
-@app.post("/rag-chat/", response_model=dict, tags=["Chat"])
-async def chat(request: ChatRequest, api_key: str = Depends(get_api_key)):
     """
     Chat endpoint that uses embeddings search and LLM for response generation.
     """
@@ -199,29 +200,32 @@ async def chat(request: ChatRequest, api_key: str = Depends(get_api_key)):
         context = "\n".join([document_list[idx[0]] for idx in search_results])
         # Create RAG prompt
-        rag_prompt = f"please answer the user's question:\n\nUser's question:{request.query} Based on the following context, \n\nContext:\n{context} \n\nAnswer:"
-        rag_system_prompt = "You are a helpful assistant tasked with providing answers from the given context"
         # Generate conversation_id if not provided
         conversation_id = request.conversation_id or str(uuid.uuid4())
         # Prepare the request for the LLM service
         llm_request = {
-            "prompt": rag_prompt,
-            "system_message": rag_system_prompt,
             "model_id": request.model_id,
             "conversation_id": conversation_id,
             "user_id": request.user_id
         }
-        # Make request to LLM service
-        llm_response = await make_llm_request(api_key, llm_request)
-        logger.info(f"Chat response generated successfully for user: {request.user_id}")
-        return {
-            "response": llm_response,
-            "conversation_id": conversation_id
-        }
     except Exception as e:
         logger.error(f"Error in chat endpoint: {str(e)}")

+from fastapi import FastAPI, HTTPException, Header, Depends, BackgroundTasks, Query
 from fastapi.middleware.cors import CORSMiddleware
+from fastapi.responses import StreamingResponse
 from pydantic import BaseModel, Field
+from typing import List, Optional, Dict, AsyncGenerator
 import json
 import os
 import logging
 import glob
 import uuid
 import httpx
+import asyncio
 # Set up logging
 logging.basicConfig(level=logging.INFO)
 logger = logging.getLogger(__name__)
         raise HTTPException(status_code=403, detail="Invalid API key")
     return x_api_key
+async def stream_llm_request(api_key: str, llm_request: Dict[str, str]) -> AsyncGenerator[str, None]:
     """
+    Make a streaming request to the LLM service.
     """
     try:
         async with httpx.AsyncClient() as client:
+            async with client.stream(
+                "POST",
                 "https://pvanand-audio-chat.hf.space/llm-agent",
                 headers={
+                    "accept": "text/event-stream",
                     "X-API-Key": api_key,
                     "Content-Type": "application/json"
                 },
                 json=llm_request
+            ) as response:
+                if response.status_code != 200:
+                    raise HTTPException(status_code=response.status_code, detail="Error from LLM service")
+                async for chunk in response.aiter_text():
+                    yield chunk
     except httpx.HTTPError as e:
         logger.error(f"HTTP error occurred while making LLM request: {str(e)}")
         raise HTTPException(status_code=500, detail=f"HTTP error occurred while making LLM request: {str(e)}")
         logger.error(f"Unexpected error occurred while making LLM request: {str(e)}")
         raise HTTPException(status_code=500, detail=f"Unexpected error occurred while making LLM request: {str(e)}")
+@app.post("/chat/", response_class=StreamingResponse, tags=["Chat"])
+async def chat(request: ChatRequest, background_tasks: BackgroundTasks, api_key: str = Depends(get_api_key)):
     """
     Chat endpoint that uses embeddings search and LLM for response generation.
     """
         context = "\n".join([document_list[idx[0]] for idx in search_results])
         # Create RAG prompt
+        rag_prompt = f"Based on the following context, please answer the user's question:\n\nContext:\n{context}\n\nUser's question: {request.query}\n\nAnswer:"
         # Generate conversation_id if not provided
         conversation_id = request.conversation_id or str(uuid.uuid4())
         # Prepare the request for the LLM service
         llm_request = {
+            "prompt": request.query,
+            "system_message": rag_prompt,
             "model_id": request.model_id,
             "conversation_id": conversation_id,
             "user_id": request.user_id
         }
+        async def response_generator():
+            full_response = ""
+            async for chunk in stream_llm_request(api_key, llm_request):
+                full_response += chunk
+                yield chunk
+            # Here you might want to add logic to save the conversation or perform other background tasks
+            # For example:
+            # background_tasks.add_task(save_conversation, request.user_id, conversation_id, request.query, full_response)
+        logger.info(f"Starting chat response generation for user: {request.user_id}")
+        return StreamingResponse(response_generator(), media_type="text/event-stream")
     except Exception as e:
         logger.error(f"Error in chat endpoint: {str(e)}")