Spaces:

pvanand
/

rag-chat

Sleeping

App Files Files Community

pvanand commited on Aug 17, 2024

Commit

d9309f4

•

1 Parent(s): bb33281

add llm endpoint

Browse files

Files changed (1) hide show

main.py +92 -2

main.py CHANGED Viewed

@@ -1,13 +1,15 @@
-from fastapi import FastAPI, HTTPException, Query, Path
 from fastapi.middleware.cors import CORSMiddleware
 from pydantic import BaseModel, Field
-from typing import List
 import json
 import os
 import logging
 from txtai.embeddings import Embeddings
 import pandas as pd
 import glob
 # Set up logging
 logging.basicConfig(level=logging.INFO)
@@ -19,6 +21,8 @@ app = FastAPI(
     version="1.0.0"
 )
 # Enable CORS
 app.add_middleware(
     CORSMiddleware,
@@ -138,6 +142,92 @@ def check_and_index_csv_files():
         else:
             logger.info(f"Index already exists for: {csv_file}")
 @app.on_event("startup")
 async def startup_event():
     check_and_index_csv_files()

+from fastapi import FastAPI, HTTPException, Query, Path, Header, Depends
 from fastapi.middleware.cors import CORSMiddleware
 from pydantic import BaseModel, Field
+from typing import List, Optional, Dict
 import json
 import os
 import logging
 from txtai.embeddings import Embeddings
 import pandas as pd
 import glob
+import uuid
+import httpx
 # Set up logging
 logging.basicConfig(level=logging.INFO)
     version="1.0.0"
 )
+CHAT_AUTH_KEY = os.environ.get("CHAT_AUTH_KEY", "default_secret_key")
 # Enable CORS
 app.add_middleware(
     CORSMiddleware,
         else:
             logger.info(f"Index already exists for: {csv_file}")
+# ... [Previous code for DocumentRequest, QueryRequest, save_embeddings, load_embeddings, create_index, query_index, process_csv_file, check_and_index_csv_files remains the same]
+class ChatRequest(BaseModel):
+    query: str = Field(..., description="The user's query")
+    index_id: str = Field(..., description="Unique identifier for the index to query")
+    conversation_id: Optional[str] = Field(None, description="Unique identifier for the conversation")
+    model_id: str = Field(..., description="Identifier for the LLM model to use")
+    user_id: str = Field(..., description="Unique identifier for the user")
+async def get_api_key(x_api_key: str = Header(...)) -> str:
+    if x_api_key != CHAT_AUTH_KEY:
+        raise HTTPException(status_code=403, detail="Invalid API key")
+    return x_api_key
+async def make_llm_request(api_key: str, llm_request: Dict[str, str]) -> Dict:
+    """
+    Make a request to the LLM service.
+    """
+    try:
+        async with httpx.AsyncClient() as client:
+            response = await client.post(
+                "https://pvanand-audio-chat.hf.space/llm-agent",
+                headers={
+                    "accept": "application/json",
+                    "X-API-Key": api_key,
+                    "Content-Type": "application/json"
+                },
+                json=llm_request
+            )
+        if response.status_code != 200:
+            raise HTTPException(status_code=response.status_code, detail="Error from LLM service")
+        return response.json()
+    except httpx.HTTPError as e:
+        logger.error(f"HTTP error occurred while making LLM request: {str(e)}")
+        raise HTTPException(status_code=500, detail=f"HTTP error occurred while making LLM request: {str(e)}")
+    except Exception as e:
+        logger.error(f"Unexpected error occurred while making LLM request: {str(e)}")
+        raise HTTPException(status_code=500, detail=f"Unexpected error occurred while making LLM request: {str(e)}")
+@app.post("/rag-chat/", response_model=dict, tags=["Chat"])
+async def chat(request: ChatRequest, api_key: str = Depends(get_api_key)):
+    """
+    Chat endpoint that uses embeddings search and LLM for response generation.
+    """
+    try:
+        # Load embeddings for the specified index
+        document_list = load_embeddings(request.index_id)
+        # Perform embeddings search
+        search_results = embeddings.search(request.query, 5)  # Get top 5 relevant results
+        context = "\n".join([document_list[idx[0]] for idx in search_results])
+        # Create RAG prompt
+        rag_prompt = f"please answer the user's question:\n\nUser's question:{request.query} Based on the following context, \n\nContext:\n{context} \n\nAnswer:"
+        rag_system_prompt = "You are a helpful assistant tasked with providing answers from the given context"
+        # Generate conversation_id if not provided
+        conversation_id = request.conversation_id or str(uuid.uuid4())
+        # Prepare the request for the LLM service
+        llm_request = {
+            "prompt": rag_prompt,
+            "system_message": rag_system_prompt,
+            "model_id": request.model_id,
+            "conversation_id": conversation_id,
+            "user_id": request.user_id
+        }
+        # Make request to LLM service
+        llm_response = await make_llm_request(api_key, llm_request)
+        logger.info(f"Chat response generated successfully for user: {request.user_id}")
+        return {
+            "response": llm_response,
+            "conversation_id": conversation_id
+        }
+    except Exception as e:
+        logger.error(f"Error in chat endpoint: {str(e)}")
+        raise HTTPException(status_code=500, detail=f"Error in chat endpoint: {str(e)}")
 @app.on_event("startup")
 async def startup_event():
     check_and_index_csv_files()