Spaces:

Seyfelislem
/

Document_QA_Assistant

Running

Seif-aber

document q&a assistant with Gemini & RAG

355fe19 4 months ago

2.13 kB

	from llama_index.core import SimpleDirectoryReader, VectorStoreIndex, Settings
	from llama_index.core.node_parser import SentenceSplitter
	from llama_index.embeddings.gemini import GeminiEmbedding
	from llama_index.llms.gemini import Gemini
	import logging
	import os

	GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")

	# Configure logging
	logger = logging.getLogger(__name__)
	logging.basicConfig(level=logging.INFO)

	def load_data(data_path: str) -> list[str]:
	"""
	Load documents from a directory.

	Args:
	data_path (str): Path to the directory containing documents

	Returns:
	list[str]: List of loaded documents or False if loading fails
	"""
	try:
	logger.info(f"Loading documents from {data_path}")
	loader = SimpleDirectoryReader(data_path)
	documents = loader.load_data()
	logger.info(f"Successfully loaded {len(documents)} documents")
	return documents
	except Exception as e:
	logger.error(f"Failed to load data: {str(e)}")
	return False

	def get_gemini_embedding(documents: str):
	"""
	Create a query engine using Gemini embeddings.

	Args:
	documents (str): Documents to process

	Returns:
	QueryEngine: Configured query engine or False if setup fails
	"""
	try:
	logger.info("Initializing Gemini embedding model and LLM")
	gemini_embedding_model = GeminiEmbedding(model_name="models/embedding-001")
	llm = Gemini(model="models/gemini-1.5-flash", api_key=GEMINI_API_KEY)

	# Configure global settings
	Settings.llm = llm
	Settings.embed_model = gemini_embedding_model
	Settings.node_parser = SentenceSplitter(chunk_size=1000, chunk_overlap=20)

	logger.info("Creating vector store index")
	index = VectorStoreIndex.from_documents(
	documents=documents,
	embed_model=gemini_embedding_model
	)

	logger.info("Creating query engine")
	return index.as_query_engine()
	except Exception as e:
	logger.error(f"Failed to setup Gemini embedding: {str(e)}")
	return False