browser-backend / data.py
atwang's picture
update code to download dataset files from separate repo
07356cd
raw
history blame contribute delete
997 Bytes
import os
import pickle
from typing import Any
import faiss
from huggingface_hub import hf_hub_download
def load_indexes_local(index_files: dict[str, str], *, data_folder: str, **kw) -> dict[str, Any]:
indexes = {}
for index_type, index_file in index_files.items():
indexes[index_type] = faiss.read_index(os.path.join(data_folder, index_file))
return indexes
def load_indexes_hf(index_files: dict[str, str], *, repo_name: str, **kw) -> dict[str, Any]:
indexes = {}
for index_type, index_file in index_files.items():
indexes[index_type] = faiss.read_index(
hf_hub_download(repo_id=repo_name, filename=index_file, repo_type="dataset")
)
return indexes
def load_index_pickle(index_file: str, repo_name: str) -> Any:
index_to_id_dict_file = hf_hub_download(
repo_id=repo_name,
filename=index_file,
repo_type="dataset",
)
with open(index_to_id_dict_file, "rb") as f:
return pickle.load(f)