Spaces:

Maximofn
/

IriusRiskTestChallenge

Sleeping

App Files Files Community

Maximofn commited on Mar 19

Commit

0065184

1 Parent(s): 4b4c28d

feat(SECURITY): :lock: Implement rate limiting for API endpoints and update request handling for text generation and summarization.

Browse files

Files changed (2) hide show

app.py +30 -13
requirements.txt +2 -1

app.py CHANGED Viewed

@@ -4,9 +4,12 @@ from transformers import AutoModelForCausalLM, AutoTokenizer
 import torch
 from functools import partial
 from fastapi.responses import JSONResponse
-from fastapi import Security, Depends
 from fastapi.security.api_key import APIKeyHeader, APIKey
 from fastapi.middleware.cors import CORSMiddleware
 from langchain_core.messages import HumanMessage, AIMessage
 from langgraph.checkpoint.memory import MemorySaver
@@ -16,7 +19,10 @@ import os
 from dotenv import load_dotenv
 load_dotenv()
-# Configuración de API Key
 API_KEY_NAME = "X-API-Key"
 API_KEY = os.getenv("API_KEY")
 api_key_header = APIKeyHeader(name=API_KEY_NAME, auto_error=False)
@@ -148,6 +154,10 @@ app = FastAPI(
     ]
 )
 # Configure the security scheme in the OpenAPI documentation
 app.openapi_tags = [
     {"name": "Authentication", "description": "Protected endpoints that require API Key"}
@@ -186,21 +196,25 @@ async def general_exception_handler(request, exc):
 # Welcome endpoint
 @app.get("/")
-async def api_home():
     """Welcome endpoint"""
     return {"detail": "Welcome to Máximo Fernández Núñez IriusRisk test challenge"}
 # Generate endpoint
 @app.post("/generate")
 async def generate(
-    request: QueryRequest,
     api_key: APIKey = Depends(get_api_key)
 ):
     """
     Endpoint to generate text using the language model
     Args:
-        request: QueryRequest
             query: str
             thread_id: str = "default"
             system_prompt: str = DEFAULT_SYSTEM_PROMPT
@@ -211,16 +225,16 @@ async def generate(
     """
     try:
         # Configure the thread ID
-        config = {"configurable": {"thread_id": request.thread_id}}
         # Create the input message
-        input_messages = [HumanMessage(content=request.query)]
         # Invoke the graph with custom system prompt
         # Combine config parameters into a single dictionary
         combined_config = {
             **config,
-            "model": {"system_prompt": request.system_prompt}
         }
         # Invoke the graph with proper argument count
@@ -245,15 +259,18 @@ async def generate(
         )
 @app.post("/summarize")
 async def summarize(
-    request: SummaryRequest,
     api_key: APIKey = Depends(get_api_key)
 ):
     """
     Endpoint to generate a summary using the language model
     Args:
-        request: SummaryRequest
             text: str - The text to summarize
             thread_id: str = "default"
             max_length: int = 200 - Maximum summary length
@@ -264,13 +281,13 @@ async def summarize(
     """
     try:
         # Configure the thread ID
-        config = {"configurable": {"thread_id": request.thread_id}}
         # Create a specific system prompt for summarization
-        summary_system_prompt = f"Make a summary of the following text in no more than {request.max_length} words. Keep the most important information and eliminate unnecessary details."
         # Create the input message
-        input_messages = [HumanMessage(content=request.text)]
         # Combine config parameters into a single dictionary
         combined_config = {

 import torch
 from functools import partial
 from fastapi.responses import JSONResponse
+from fastapi import Security, Depends, Request
 from fastapi.security.api_key import APIKeyHeader, APIKey
 from fastapi.middleware.cors import CORSMiddleware
+from slowapi import Limiter, _rate_limit_exceeded_handler
+from slowapi.util import get_remote_address
+from slowapi.errors import RateLimitExceeded
 from langchain_core.messages import HumanMessage, AIMessage
 from langgraph.checkpoint.memory import MemorySaver
 from dotenv import load_dotenv
 load_dotenv()
+# Rate Limiter configuration
+limiter = Limiter(key_func=get_remote_address)
+# API Key configuration
 API_KEY_NAME = "X-API-Key"
 API_KEY = os.getenv("API_KEY")
 api_key_header = APIKeyHeader(name=API_KEY_NAME, auto_error=False)
     ]
 )
+# Configure the rate limiter in the application
+app.state.limiter = limiter
+app.add_exception_handler(RateLimitExceeded, _rate_limit_exceeded_handler)
 # Configure the security scheme in the OpenAPI documentation
 app.openapi_tags = [
     {"name": "Authentication", "description": "Protected endpoints that require API Key"}
 # Welcome endpoint
 @app.get("/")
+@limiter.limit("10/minute")
+async def api_home(request: Request):
     """Welcome endpoint"""
     return {"detail": "Welcome to Máximo Fernández Núñez IriusRisk test challenge"}
 # Generate endpoint
 @app.post("/generate")
+@limiter.limit("5/minute")
 async def generate(
+    request: Request,
+    query_request: QueryRequest,
     api_key: APIKey = Depends(get_api_key)
 ):
     """
     Endpoint to generate text using the language model
     Args:
+        request: Request - FastAPI request object for rate limiting
+        query_request: QueryRequest
             query: str
             thread_id: str = "default"
             system_prompt: str = DEFAULT_SYSTEM_PROMPT
     """
     try:
         # Configure the thread ID
+        config = {"configurable": {"thread_id": query_request.thread_id}}
         # Create the input message
+        input_messages = [HumanMessage(content=query_request.query)]
         # Invoke the graph with custom system prompt
         # Combine config parameters into a single dictionary
         combined_config = {
             **config,
+            "model": {"system_prompt": query_request.system_prompt}
         }
         # Invoke the graph with proper argument count
         )
 @app.post("/summarize")
+@limiter.limit("5/minute")
 async def summarize(
+    request: Request,
+    summary_request: SummaryRequest,
     api_key: APIKey = Depends(get_api_key)
 ):
     """
     Endpoint to generate a summary using the language model
     Args:
+        request: Request - FastAPI request object for rate limiting
+        summary_request: SummaryRequest
             text: str - The text to summarize
             thread_id: str = "default"
             max_length: int = 200 - Maximum summary length
     """
     try:
         # Configure the thread ID
+        config = {"configurable": {"thread_id": summary_request.thread_id}}
         # Create a specific system prompt for summarization
+        summary_system_prompt = f"Make a summary of the following text in no more than {summary_request.max_length} words. Keep the most important information and eliminate unnecessary details."
         # Create the input message
+        input_messages = [HumanMessage(content=summary_request.text)]
         # Combine config parameters into a single dictionary
         combined_config = {

requirements.txt CHANGED Viewed

@@ -8,4 +8,5 @@ langgraph>=0.2.27
 python-dotenv>=1.0.0
 transformers>=4.36.0
 torch>=2.0.0
-accelerate>=0.26.0

 python-dotenv>=1.0.0
 transformers>=4.36.0
 torch>=2.0.0
+accelerate>=0.26.0
+slowapi>=0.1.10