import sys import boto3 from pathlib import Path from llama_index.core import Settings from llama_index.core.storage.docstore import SimpleDocumentStore from llama_index.retrievers.bm25 import BM25Retriever from llama_index.core.retrievers import QueryFusionRetriever from config import aws_access_key_id, aws_secret_access_key class AppState: _instance = None retriever_bm25 = None def __new__(cls): if cls._instance is None: cls._instance = super(AppState, cls).__new__(cls) return cls._instance # Параметри S3 BUCKET_NAME = "legal-position" PREFIX_RETRIEVER = "Save_Index/" LOCAL_DIR = Path("Save_Index_Local") # Створюємо глобальний екземпляр стану app_state = AppState() def initialize_s3_client(): return boto3.client( "s3", aws_access_key_id=aws_access_key_id, aws_secret_access_key=aws_secret_access_key, region_name="eu-north-1" ) def download_s3_file(s3_client, bucket_name, s3_key, local_path): s3_client.download_file(bucket_name, s3_key, str(local_path)) print(f"Завантажено: {s3_key} -> {local_path}") def download_s3_folder(s3_client, bucket_name, prefix, local_dir): response = s3_client.list_objects_v2(Bucket=bucket_name, Prefix=prefix) if 'Contents' in response: for obj in response['Contents']: s3_key = obj['Key'] if s3_key.endswith('/'): continue local_file_path = local_dir / Path(s3_key).relative_to(prefix) local_file_path.parent.mkdir(parents=True, exist_ok=True) s3_client.download_file(bucket_name, s3_key, str(local_file_path)) print(f"Завантажено: {s3_key} -> {local_file_path}") def initialize_components(): try: persist_path = Path("Save_Index_Local") if not persist_path.exists(): raise FileNotFoundError(f"Directory not found: {persist_path}") required_files = ['docstore_es_filter.json', 'bm25_retriever_es'] missing_files = [f for f in required_files if not (persist_path / f).exists()] if missing_files: raise FileNotFoundError(f"Missing required files: {', '.join(missing_files)}") docstore = SimpleDocumentStore.from_persist_path(str(persist_path / "docstore_es_filter.json")) bm25_retriever = BM25Retriever.from_persist_dir(str(persist_path / "bm25_retriever_es")) # Зберігаємо retriever_bm25 в глобальному стані app_state.retriever_bm25 = QueryFusionRetriever( [bm25_retriever], similarity_top_k=Settings.similarity_top_k, num_queries=1, use_async=True, ) return True except Exception as e: print(f"Error initializing components: {str(e)}", file=sys.stderr) return False