File size: 3,892 Bytes
ed4d993
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
import os
from typing import List, Literal, Optional, overload

import nomic  # type: ignore[import]
from langchain_core.embeddings import Embeddings
from nomic import embed


class NomicEmbeddings(Embeddings):
    """NomicEmbeddings embedding model.

    Example:
        .. code-block:: python

            from langchain_nomic import NomicEmbeddings

            model = NomicEmbeddings()
    """

    @overload
    def __init__(
        self,
        *,
        model: str,
        nomic_api_key: Optional[str] = ...,
        dimensionality: Optional[int] = ...,
        inference_mode: Literal["remote"] = ...,
    ):
        ...

    @overload
    def __init__(
        self,
        *,
        model: str,
        nomic_api_key: Optional[str] = ...,
        dimensionality: Optional[int] = ...,
        inference_mode: Literal["local", "dynamic"],
        device: Optional[str] = ...,
    ):
        ...

    @overload
    def __init__(
        self,
        *,
        model: str,
        nomic_api_key: Optional[str] = ...,
        dimensionality: Optional[int] = ...,
        inference_mode: str,
        device: Optional[str] = ...,
    ):
        ...

    def __init__(
        self,
        *,
        model: str,
        nomic_api_key: Optional[str] = None,
        dimensionality: Optional[int] = None,
        inference_mode: str = "remote",
        device: Optional[str] = None,
        vision_model: Optional[str] = None,
    ):
        """Initialize NomicEmbeddings model.

        Args:
            model: model name
            nomic_api_key: optionally, set the Nomic API key. Uses the NOMIC_API_KEY
                environment variable by default.
            dimensionality: The embedding dimension, for use with Matryoshka-capable
                models. Defaults to full-size.
            inference_mode: How to generate embeddings. One of `remote`, `local`
                (Embed4All), or `dynamic` (automatic). Defaults to `remote`.
            device: The device to use for local embeddings. Choices include
                `cpu`, `gpu`, `nvidia`, `amd`, or a specific device name. See
                the docstring for `GPT4All.__init__` for more info. Typically
                defaults to CPU. Do not use on macOS.
        """
        _api_key = nomic_api_key or os.environ.get("NOMIC_API_KEY")
        if _api_key:
            nomic.login(_api_key)
        self.model = model
        self.dimensionality = dimensionality
        self.inference_mode = inference_mode
        self.device = device
        self.vision_model = vision_model

    def embed(self, texts: List[str], *, task_type: str) -> List[List[float]]:
        """Embed texts.

        Args:
            texts: list of texts to embed
            task_type: the task type to use when embedding. One of `search_query`,
                `search_document`, `classification`, `clustering`
        """

        output = embed.text(
            texts=texts,
            model=self.model,
            task_type=task_type,
            dimensionality=self.dimensionality,
            inference_mode=self.inference_mode,
            device=self.device,
        )
        return output["embeddings"]

    def embed_documents(self, texts: List[str]) -> List[List[float]]:
        """Embed search docs.

        Args:
            texts: list of texts to embed as documents
        """
        return self.embed(
            texts=texts,
            task_type="search_document",
        )

    def embed_query(self, text: str) -> List[float]:
        """Embed query text.

        Args:
            text: query text
        """
        return self.embed(
            texts=[text],
            task_type="search_query",
        )[0]

    def embed_image(self, uris: List[str]) -> List[List[float]]:
        return embed.image(
            images=uris,
            model=self.vision_model,
        )["embeddings"]