Seif-aber
document q&a assistant with Gemini & RAG
355fe19
from llama_index.core import SimpleDirectoryReader, VectorStoreIndex, Settings
from llama_index.core.node_parser import SentenceSplitter
from llama_index.embeddings.gemini import GeminiEmbedding
from llama_index.llms.gemini import Gemini
import logging
import os
GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")
# Configure logging
logger = logging.getLogger(__name__)
logging.basicConfig(level=logging.INFO)
def load_data(data_path: str) -> list[str]:
"""
Load documents from a directory.
Args:
data_path (str): Path to the directory containing documents
Returns:
list[str]: List of loaded documents or False if loading fails
"""
try:
logger.info(f"Loading documents from {data_path}")
loader = SimpleDirectoryReader(data_path)
documents = loader.load_data()
logger.info(f"Successfully loaded {len(documents)} documents")
return documents
except Exception as e:
logger.error(f"Failed to load data: {str(e)}")
return False
def get_gemini_embedding(documents: str):
"""
Create a query engine using Gemini embeddings.
Args:
documents (str): Documents to process
Returns:
QueryEngine: Configured query engine or False if setup fails
"""
try:
logger.info("Initializing Gemini embedding model and LLM")
gemini_embedding_model = GeminiEmbedding(model_name="models/embedding-001")
llm = Gemini(model="models/gemini-1.5-flash", api_key=GEMINI_API_KEY)
# Configure global settings
Settings.llm = llm
Settings.embed_model = gemini_embedding_model
Settings.node_parser = SentenceSplitter(chunk_size=1000, chunk_overlap=20)
logger.info("Creating vector store index")
index = VectorStoreIndex.from_documents(
documents=documents,
embed_model=gemini_embedding_model
)
logger.info("Creating query engine")
return index.as_query_engine()
except Exception as e:
logger.error(f"Failed to setup Gemini embedding: {str(e)}")
return False