import os import google.generativeai as genai from chroma_db_utils import get_relevant_passage import time import datetime import google.api_core.exceptions # Constants MAX_RETRIES = 3 RETRY_DELAY = 1 # Initial delay in seconds MODEL_NAME = "gemini-1.5-flash" REQUESTS_PER_MINUTE = 3 # Free tier limit REQUEST_INTERVAL = 60 / REQUESTS_PER_MINUTE # Ensures we stay within the rate limit def make_rag_prompt(query: str, relevant_passage: str) -> str: """ Creates a prompt for the RAG model. """ escaped = relevant_passage.replace("'", "").replace('"', "").replace("\n", " ") prompt = f''' You are a helpful and informative bot that answers questions using the REFERENCE TEXT below. If the REFERENCE TEXT is irrelevant to the question, say "I cannot answer this question based on the provided information." QUESTION: {query} REFERENCE TEXT: {escaped} ANSWER: ''' return prompt def generate_answer(prompt: str) -> str: """ Calls the Gemini API with retries and rate limiting. """ gemini_api_key = os.getenv("GEMINI_API_KEY") if not gemini_api_key: raise ValueError("Gemini API Key not provided.") genai.configure(api_key=gemini_api_key) model = genai.GenerativeModel(MODEL_NAME) for attempt in range(MAX_RETRIES): start_time = datetime.datetime.now() print(f"{start_time}: Making Gemini API request (attempt {attempt + 1}/{MAX_RETRIES})...") try: response = model.generate_content(prompt) end_time = datetime.datetime.now() print(f"{end_time}: Gemini API request successful. Time taken: {end_time - start_time}") # Enforce rate limiting # time.sleep(REQUEST_INTERVAL) return response.text except google.api_core.exceptions.ResourceExhausted as e: if e.code == 429: # Too Many Requests delay = RETRY_DELAY * (2 ** attempt) # Exponential backoff print(f"Rate limit hit. Retrying in {delay} seconds (attempt {attempt + 1}/{MAX_RETRIES})...") time.sleep(delay) else: raise # Re-raise other exceptions raise Exception("Max retries exceeded for Gemini API request.") def handle_query(query: str, db, n_results: int = 5) -> str: """ Handles a user query by retrieving relevant passages and generating an answer. """ relevant_passages = get_relevant_passage(query, db, n_results) relevant_passage_str = " ".join(relevant_passages) prompt = make_rag_prompt(query, relevant_passage=relevant_passage_str) return generate_answer(prompt)