Spaces:
Running
on
Zero
Running
on
Zero
Commit
·
d56bf2d
1
Parent(s):
2db96ca
Drop the plan to use shards
Browse files
app.py
CHANGED
@@ -128,22 +128,15 @@ def get_model(
|
|
128 |
)
|
129 |
|
130 |
|
131 |
-
def
|
132 |
-
|
133 |
-
|
134 |
-
merged_ivfdata_path = Path("temp.ivfdata")
|
135 |
-
|
136 |
-
index = faiss.read_index(str(empty_path))
|
137 |
-
merged_ivfdata_path.unlink(missing_ok=True) # overwrite previous if it exists (TODO: do I need this?)
|
138 |
-
merge_ondisk(index, shard_paths, str(merged_ivfdata_path))
|
139 |
-
|
140 |
-
return index
|
141 |
|
142 |
|
143 |
def get_index(dir: Path, search_time_s: float) -> Dataset:
|
144 |
-
# NOTE: a private attr
|
145 |
index: Dataset = Dataset.from_parquet(str(dir / "ids.parquet")) # type: ignore
|
146 |
-
faiss_index =
|
147 |
index._indexes["embedding"] = FaissIndex(None, None, None, faiss_index)
|
148 |
|
149 |
with open(dir / "params.json", "r") as f:
|
|
|
128 |
)
|
129 |
|
130 |
|
131 |
+
def open_ondisk(dir: Path) -> faiss.Index:
|
132 |
+
# without IO_FLAG_ONDISK_SAME_DIR, read_index gets on-disk indices in working dir
|
133 |
+
return faiss.read_index(str(dir / "index.faiss"), faiss.IO_FLAG_ONDISK_SAME_DIR)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
134 |
|
135 |
|
136 |
def get_index(dir: Path, search_time_s: float) -> Dataset:
|
137 |
+
# NOTE: use a private attr to load the index with IO_FLAG_ONDISK_SAME_DIR!
|
138 |
index: Dataset = Dataset.from_parquet(str(dir / "ids.parquet")) # type: ignore
|
139 |
+
faiss_index = open_ondisk(dir)
|
140 |
index._indexes["embedding"] = FaissIndex(None, None, None, faiss_index)
|
141 |
|
142 |
with open(dir / "params.json", "r") as f:
|