Spaces:
Running
Running
import sys | |
import boto3 | |
from pathlib import Path | |
from llama_index.core import Settings | |
from llama_index.core.storage.docstore import SimpleDocumentStore | |
from llama_index.retrievers.bm25 import BM25Retriever | |
from llama_index.core.retrievers import QueryFusionRetriever | |
from config import aws_access_key_id, aws_secret_access_key | |
class AppState: | |
_instance = None | |
retriever_bm25 = None | |
def __new__(cls): | |
if cls._instance is None: | |
cls._instance = super(AppState, cls).__new__(cls) | |
return cls._instance | |
# Параметри S3 | |
BUCKET_NAME = "legal-position" | |
PREFIX_RETRIEVER = "Save_Index/" | |
LOCAL_DIR = Path("Save_Index_Local") | |
# Створюємо глобальний екземпляр стану | |
app_state = AppState() | |
def initialize_s3_client(): | |
return boto3.client( | |
"s3", | |
aws_access_key_id=aws_access_key_id, | |
aws_secret_access_key=aws_secret_access_key, | |
region_name="eu-north-1" | |
) | |
def download_s3_file(s3_client, bucket_name, s3_key, local_path): | |
s3_client.download_file(bucket_name, s3_key, str(local_path)) | |
print(f"Завантажено: {s3_key} -> {local_path}") | |
def download_s3_folder(s3_client, bucket_name, prefix, local_dir): | |
response = s3_client.list_objects_v2(Bucket=bucket_name, Prefix=prefix) | |
if 'Contents' in response: | |
for obj in response['Contents']: | |
s3_key = obj['Key'] | |
if s3_key.endswith('/'): | |
continue | |
local_file_path = local_dir / Path(s3_key).relative_to(prefix) | |
local_file_path.parent.mkdir(parents=True, exist_ok=True) | |
s3_client.download_file(bucket_name, s3_key, str(local_file_path)) | |
print(f"Завантажено: {s3_key} -> {local_file_path}") | |
def initialize_components(): | |
try: | |
persist_path = Path("Save_Index_Local") | |
if not persist_path.exists(): | |
raise FileNotFoundError(f"Directory not found: {persist_path}") | |
required_files = ['docstore_es_filter.json', 'bm25_retriever_es'] | |
missing_files = [f for f in required_files if not (persist_path / f).exists()] | |
if missing_files: | |
raise FileNotFoundError(f"Missing required files: {', '.join(missing_files)}") | |
docstore = SimpleDocumentStore.from_persist_path(str(persist_path / "docstore_es_filter.json")) | |
bm25_retriever = BM25Retriever.from_persist_dir(str(persist_path / "bm25_retriever_es")) | |
# Зберігаємо retriever_bm25 в глобальному стані | |
app_state.retriever_bm25 = QueryFusionRetriever( | |
[bm25_retriever], | |
similarity_top_k=Settings.similarity_top_k, | |
num_queries=1, | |
use_async=True, | |
) | |
return True | |
except Exception as e: | |
print(f"Error initializing components: {str(e)}", file=sys.stderr) | |
return False |