Spaces:
Sleeping
Sleeping
import os | |
from dotenv import load_dotenv, find_dotenv | |
from llama_index.llms.azure_openai import AzureOpenAI | |
from llama_index.readers.file import PyMuPDFReader | |
from llama_index.core.chat_engine import ContextChatEngine | |
from llama_index.core import KnowledgeGraphIndex | |
from llama_index.core.node_parser import SentenceSplitter | |
from llama_index.embeddings.huggingface import HuggingFaceEmbedding | |
def initialize_openai_creds(): | |
"""Load environment variables and set API keys.""" | |
dotenv_path = find_dotenv() | |
if dotenv_path == "": | |
print("No .env file found. Make sure the .env file is in the correct directory.") | |
else: | |
print(f".env file found at: {dotenv_path}") | |
load_dotenv(dotenv_path) | |
# General Azure OpenAI settings for gpt35 and gpt-4o-mini | |
general_creds = { | |
"api_key": os.getenv('AZURE_OPENAI_API_KEY'), | |
"api_version": os.getenv("AZURE_API_VERSION"), | |
"endpoint": os.getenv("AZURE_OPENAI_ENDPOINT"), | |
"temperature": 0, # Default temperature for models | |
"gpt35_deployment_name": os.getenv("AZURE_DEPLOYMENT_NAME"), | |
"gpt4o_mini_deployment_name": os.getenv("GPT4O_MINI_DEPLOYMENT_NAME") | |
} | |
# GPT-4o specific settings | |
gpt4o_creds = { | |
"api_key": os.getenv('GPT4O_API_KEY'), | |
"api_version": os.getenv("GPT4O_API_VERSION"), | |
"endpoint": os.getenv("GPT4O_AZURE_ENDPOINT"), | |
"deployment_name": os.getenv("GPT4O_DEPLOYMENT_NAME"), | |
"temperature": os.getenv("GPT4O_TEMPERATURE", 0) # Default temperature for GPT-4o | |
} | |
return general_creds, gpt4o_creds | |
def initialize_openai_creds(): | |
"""Load environment variables and set API keys.""" | |
dotenv_path = find_dotenv() | |
if dotenv_path == "": | |
print("No .env file found. Make sure the .env file is in the correct directory.") | |
else: | |
print(f".env file found at: {dotenv_path}") | |
load_dotenv(dotenv_path) | |
# GPT-3.5 Credentials | |
gpt35_creds = { | |
"api_key": os.getenv('AZURE_OPENAI_API_KEY_GPT35'), | |
"api_version": os.getenv("AZURE_API_VERSION"), | |
"endpoint": os.getenv("AZURE_OPENAI_ENDPOINT_GPT35"), | |
"temperature": 0, # Default temperature for models | |
"deployment_name": os.getenv("AZURE_DEPLOYMENT_NAME_GPT35") | |
} | |
# GPT-4o-mini Credentials (shares the same API key as GPT-3.5 but different deployment name and endpoint) | |
gpt4o_mini_creds = { | |
"api_key": os.getenv('AZURE_OPENAI_API_KEY_GPT4O_MINI'), | |
"api_version": os.getenv("AZURE_API_VERSION"), | |
"endpoint": os.getenv("AZURE_OPENAI_ENDPOINT_GPT4O_MINI"), | |
"temperature": 0, # Default temperature for models | |
"deployment_name": os.getenv("GPT4O_MINI_DEPLOYMENT_NAME") | |
} | |
# GPT-4o specific credentials | |
gpt4o_creds = { | |
"api_key": os.getenv('GPT4O_API_KEY'), | |
"api_version": os.getenv("GPT4O_API_VERSION"), | |
"endpoint": os.getenv("GPT4O_AZURE_ENDPOINT"), | |
"deployment_name": os.getenv("GPT4O_DEPLOYMENT_NAME"), | |
"temperature": os.getenv("GPT4O_TEMPERATURE", 0) # Default temperature for GPT-4o | |
} | |
return gpt35_creds, gpt4o_mini_creds, gpt4o_creds | |
def create_llm(model: str, gpt35_creds: dict, gpt4o_mini_creds: dict, gpt4o_creds: dict): | |
""" | |
Initialize and return the Azure OpenAI LLM based on the selected model. | |
:param model: The model to initialize ("gpt35", "gpt4o", or "gpt-4o-mini"). | |
:param gpt35_creds: Credentials for gpt35. | |
:param gpt4o_mini_creds: Credentials for gpt-4o-mini. | |
:param gpt4o_creds: Credentials for gpt4o. | |
""" | |
if model == "gpt35": | |
return AzureOpenAI( | |
deployment_name=gpt35_creds["deployment_name"], | |
temperature=gpt35_creds["temperature"], | |
api_key=gpt35_creds["api_key"], | |
azure_endpoint=gpt35_creds["endpoint"], | |
api_version=gpt35_creds["api_version"] | |
) | |
elif model == "gpt-4o-mini": | |
return AzureOpenAI( | |
deployment_name=gpt4o_mini_creds["deployment_name"], | |
temperature=gpt4o_mini_creds["temperature"], | |
api_key=gpt4o_mini_creds["api_key"], | |
azure_endpoint=gpt4o_mini_creds["endpoint"], | |
api_version=gpt4o_mini_creds["api_version"] | |
) | |
elif model == "gpt4o": | |
return AzureOpenAI( | |
deployment_name=gpt4o_creds["deployment_name"], | |
temperature=gpt4o_creds["temperature"], | |
api_key=gpt4o_creds["api_key"], | |
azure_endpoint=gpt4o_creds["endpoint"], | |
api_version=gpt4o_creds["api_version"] | |
) | |
else: | |
raise ValueError(f"Invalid model: {model}. Choose from 'gpt35', 'gpt4o', or 'gpt-4o-mini'.") | |
def create_chat_engine(retriever, memory, llm): | |
"""Create and return the ContextChatEngine using the provided retriever and memory.""" | |
chat_engine = ContextChatEngine.from_defaults( | |
retriever=retriever, | |
memory=memory, | |
llm=llm | |
) | |
return chat_engine | |
def load_documents(filepaths): | |
""" | |
Load and return documents from specified file paths. | |
:param filepaths: A string (single file path) or a list of strings (multiple file paths). | |
:return: A list of loaded documents. | |
""" | |
loader = PyMuPDFReader() | |
# If a single string is passed, convert it to a list for consistent handling | |
if isinstance(filepaths, str): | |
filepaths = [filepaths] | |
# Load and accumulate documents | |
all_documents = [] | |
for filepath in filepaths: | |
documents = loader.load(file_path=filepath) | |
all_documents += documents | |
return all_documents | |
def create_kg_index( | |
documents, | |
storage_context, | |
llm, | |
max_triplets_per_chunk=10, | |
embed_model=HuggingFaceEmbedding(model_name="BAAI/bge-large-en-v1.5"), | |
include_embeddings=True, | |
chunk_size=512 | |
): | |
splitter = SentenceSplitter(chunk_size=chunk_size) | |
graph_index = KnowledgeGraphIndex.from_documents( | |
documents, | |
storage_context=storage_context, | |
max_triplets_per_chunk=max_triplets_per_chunk, | |
llm=llm, | |
embed_model=embed_model, | |
include_embeddings=include_embeddings, | |
transformations=[splitter] | |
) | |
return graph_index | |
from llama_index.core.indices.property_graph import SimpleLLMPathExtractor | |
from llama_index.core.indices.property_graph import DynamicLLMPathExtractor | |
from llama_index.graph_stores.neo4j import Neo4jPropertyGraphStore | |
from llama_index.core import PropertyGraphIndex | |
def create_pg_index( | |
llm, | |
documents, | |
graph_store, | |
max_triplets_per_chunk=10, | |
num_workers=4, | |
embed_kg_nodes=True, | |
embed_model=HuggingFaceEmbedding(model_name="BAAI/bge-large-en-v1.5") | |
): | |
splitter = SentenceSplitter(chunk_size=512) | |
# Initialize the LLM path extractor | |
kg_extractor = DynamicLLMPathExtractor( | |
llm=llm, | |
max_triplets_per_chunk=max_triplets_per_chunk, | |
num_workers=num_workers | |
) | |
# Create the Property Graph Index | |
graph_index = PropertyGraphIndex.from_documents( | |
documents, | |
property_graph_store=graph_store, | |
embed_model=embed_model, | |
embed_kg_nodes=embed_kg_nodes, | |
kg_extractors=[kg_extractor], | |
transformations=[splitter] | |
) | |
return graph_index |