File size: 2,337 Bytes
4a51346
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
import os
import shutil
import tempfile
from typing import Generator

import pytest
from chromadb.db.index.hnswlib import Hnswlib
from chromadb.config import Settings
import uuid
import numpy as np


@pytest.fixture(scope="module")
def settings() -> Generator[Settings, None, None]:
    save_path = tempfile.gettempdir() + "/tests/hnswlib/"
    yield Settings(persist_directory=save_path)
    if os.path.exists(save_path):
        shutil.rmtree(save_path)


def test_count_tracking(settings: Settings) -> None:
    hnswlib = Hnswlib("test", settings, {}, 2)
    hnswlib._init_index(2)
    assert hnswlib._index_metadata["curr_elements"] == 0
    assert hnswlib._index_metadata["total_elements_added"] == 0
    idA, idB = uuid.uuid4(), uuid.uuid4()

    embeddingA = np.random.rand(1, 2)
    hnswlib.add([idA], embeddingA.tolist())
    assert (
        hnswlib._index_metadata["curr_elements"]
        == hnswlib._index_metadata["total_elements_added"]
        == 1
    )
    embeddingB = np.random.rand(1, 2)
    hnswlib.add([idB], embeddingB.tolist())
    assert (
        hnswlib._index_metadata["curr_elements"]
        == hnswlib._index_metadata["total_elements_added"]
        == 2
    )
    hnswlib.delete_from_index(ids=[idA])
    assert hnswlib._index_metadata["curr_elements"] == 1
    assert hnswlib._index_metadata["total_elements_added"] == 2
    hnswlib.delete_from_index(ids=[idB])
    assert hnswlib._index_metadata["curr_elements"] == 0
    assert hnswlib._index_metadata["total_elements_added"] == 2


def test_add_delete_large_amount(settings: Settings) -> None:
    # Test adding a large number of records
    N = 2000
    D = 512
    large_records = np.random.rand(N, D).astype(np.float32).tolist()
    ids = [uuid.uuid4() for _ in range(N)]
    hnswlib = Hnswlib("test", settings, {}, N)
    hnswlib._init_index(D)
    hnswlib.add(ids, large_records)
    assert hnswlib._index_metadata["curr_elements"] == N
    assert hnswlib._index_metadata["total_elements_added"] == N

    # Test deleting a large number of records by getting a random subset of the ids
    ids_to_delete = np.random.choice(np.array(ids), size=100, replace=False).tolist()
    hnswlib.delete_from_index(ids_to_delete)

    assert hnswlib._index_metadata["curr_elements"] == N - 100
    assert hnswlib._index_metadata["total_elements_added"] == N