|
from llama_index.core import SimpleDirectoryReader, VectorStoreIndex, Settings |
|
from llama_index.core.node_parser import SentenceSplitter |
|
from llama_index.embeddings.gemini import GeminiEmbedding |
|
from llama_index.llms.gemini import Gemini |
|
import logging |
|
import os |
|
|
|
GEMINI_API_KEY = os.getenv("GEMINI_API_KEY") |
|
|
|
|
|
logger = logging.getLogger(__name__) |
|
logging.basicConfig(level=logging.INFO) |
|
|
|
def load_data(data_path: str) -> list[str]: |
|
""" |
|
Load documents from a directory. |
|
|
|
Args: |
|
data_path (str): Path to the directory containing documents |
|
|
|
Returns: |
|
list[str]: List of loaded documents or False if loading fails |
|
""" |
|
try: |
|
logger.info(f"Loading documents from {data_path}") |
|
loader = SimpleDirectoryReader(data_path) |
|
documents = loader.load_data() |
|
logger.info(f"Successfully loaded {len(documents)} documents") |
|
return documents |
|
except Exception as e: |
|
logger.error(f"Failed to load data: {str(e)}") |
|
return False |
|
|
|
def get_gemini_embedding(documents: str): |
|
""" |
|
Create a query engine using Gemini embeddings. |
|
|
|
Args: |
|
documents (str): Documents to process |
|
|
|
Returns: |
|
QueryEngine: Configured query engine or False if setup fails |
|
""" |
|
try: |
|
logger.info("Initializing Gemini embedding model and LLM") |
|
gemini_embedding_model = GeminiEmbedding(model_name="models/embedding-001") |
|
llm = Gemini(model="models/gemini-1.5-flash", api_key=GEMINI_API_KEY) |
|
|
|
|
|
Settings.llm = llm |
|
Settings.embed_model = gemini_embedding_model |
|
Settings.node_parser = SentenceSplitter(chunk_size=1000, chunk_overlap=20) |
|
|
|
logger.info("Creating vector store index") |
|
index = VectorStoreIndex.from_documents( |
|
documents=documents, |
|
embed_model=gemini_embedding_model |
|
) |
|
|
|
logger.info("Creating query engine") |
|
return index.as_query_engine() |
|
except Exception as e: |
|
logger.error(f"Failed to setup Gemini embedding: {str(e)}") |
|
return False |