Legal_Position_Generator / initialize.py
DocUA's picture
refactoring 2
bcdb6bd
import sys
import boto3
from pathlib import Path
from llama_index.core import Settings
from llama_index.core.storage.docstore import SimpleDocumentStore
from llama_index.retrievers.bm25 import BM25Retriever
from llama_index.core.retrievers import QueryFusionRetriever
from config import aws_access_key_id, aws_secret_access_key
class AppState:
_instance = None
retriever_bm25 = None
def __new__(cls):
if cls._instance is None:
cls._instance = super(AppState, cls).__new__(cls)
return cls._instance
# Параметри S3
BUCKET_NAME = "legal-position"
PREFIX_RETRIEVER = "Save_Index/"
LOCAL_DIR = Path("Save_Index_Local")
# Створюємо глобальний екземпляр стану
app_state = AppState()
def initialize_s3_client():
return boto3.client(
"s3",
aws_access_key_id=aws_access_key_id,
aws_secret_access_key=aws_secret_access_key,
region_name="eu-north-1"
)
def download_s3_file(s3_client, bucket_name, s3_key, local_path):
s3_client.download_file(bucket_name, s3_key, str(local_path))
print(f"Завантажено: {s3_key} -> {local_path}")
def download_s3_folder(s3_client, bucket_name, prefix, local_dir):
response = s3_client.list_objects_v2(Bucket=bucket_name, Prefix=prefix)
if 'Contents' in response:
for obj in response['Contents']:
s3_key = obj['Key']
if s3_key.endswith('/'):
continue
local_file_path = local_dir / Path(s3_key).relative_to(prefix)
local_file_path.parent.mkdir(parents=True, exist_ok=True)
s3_client.download_file(bucket_name, s3_key, str(local_file_path))
print(f"Завантажено: {s3_key} -> {local_file_path}")
def initialize_components():
try:
persist_path = Path("Save_Index_Local")
if not persist_path.exists():
raise FileNotFoundError(f"Directory not found: {persist_path}")
required_files = ['docstore_es_filter.json', 'bm25_retriever_es']
missing_files = [f for f in required_files if not (persist_path / f).exists()]
if missing_files:
raise FileNotFoundError(f"Missing required files: {', '.join(missing_files)}")
docstore = SimpleDocumentStore.from_persist_path(str(persist_path / "docstore_es_filter.json"))
bm25_retriever = BM25Retriever.from_persist_dir(str(persist_path / "bm25_retriever_es"))
# Зберігаємо retriever_bm25 в глобальному стані
app_state.retriever_bm25 = QueryFusionRetriever(
[bm25_retriever],
similarity_top_k=Settings.similarity_top_k,
num_queries=1,
use_async=True,
)
return True
except Exception as e:
print(f"Error initializing components: {str(e)}", file=sys.stderr)
return False