Spaces:
Runtime error
Runtime error
File size: 3,892 Bytes
ed4d993 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 |
import os
from typing import List, Literal, Optional, overload
import nomic # type: ignore[import]
from langchain_core.embeddings import Embeddings
from nomic import embed
class NomicEmbeddings(Embeddings):
"""NomicEmbeddings embedding model.
Example:
.. code-block:: python
from langchain_nomic import NomicEmbeddings
model = NomicEmbeddings()
"""
@overload
def __init__(
self,
*,
model: str,
nomic_api_key: Optional[str] = ...,
dimensionality: Optional[int] = ...,
inference_mode: Literal["remote"] = ...,
):
...
@overload
def __init__(
self,
*,
model: str,
nomic_api_key: Optional[str] = ...,
dimensionality: Optional[int] = ...,
inference_mode: Literal["local", "dynamic"],
device: Optional[str] = ...,
):
...
@overload
def __init__(
self,
*,
model: str,
nomic_api_key: Optional[str] = ...,
dimensionality: Optional[int] = ...,
inference_mode: str,
device: Optional[str] = ...,
):
...
def __init__(
self,
*,
model: str,
nomic_api_key: Optional[str] = None,
dimensionality: Optional[int] = None,
inference_mode: str = "remote",
device: Optional[str] = None,
vision_model: Optional[str] = None,
):
"""Initialize NomicEmbeddings model.
Args:
model: model name
nomic_api_key: optionally, set the Nomic API key. Uses the NOMIC_API_KEY
environment variable by default.
dimensionality: The embedding dimension, for use with Matryoshka-capable
models. Defaults to full-size.
inference_mode: How to generate embeddings. One of `remote`, `local`
(Embed4All), or `dynamic` (automatic). Defaults to `remote`.
device: The device to use for local embeddings. Choices include
`cpu`, `gpu`, `nvidia`, `amd`, or a specific device name. See
the docstring for `GPT4All.__init__` for more info. Typically
defaults to CPU. Do not use on macOS.
"""
_api_key = nomic_api_key or os.environ.get("NOMIC_API_KEY")
if _api_key:
nomic.login(_api_key)
self.model = model
self.dimensionality = dimensionality
self.inference_mode = inference_mode
self.device = device
self.vision_model = vision_model
def embed(self, texts: List[str], *, task_type: str) -> List[List[float]]:
"""Embed texts.
Args:
texts: list of texts to embed
task_type: the task type to use when embedding. One of `search_query`,
`search_document`, `classification`, `clustering`
"""
output = embed.text(
texts=texts,
model=self.model,
task_type=task_type,
dimensionality=self.dimensionality,
inference_mode=self.inference_mode,
device=self.device,
)
return output["embeddings"]
def embed_documents(self, texts: List[str]) -> List[List[float]]:
"""Embed search docs.
Args:
texts: list of texts to embed as documents
"""
return self.embed(
texts=texts,
task_type="search_document",
)
def embed_query(self, text: str) -> List[float]:
"""Embed query text.
Args:
text: query text
"""
return self.embed(
texts=[text],
task_type="search_query",
)[0]
def embed_image(self, uris: List[str]) -> List[List[float]]:
return embed.image(
images=uris,
model=self.vision_model,
)["embeddings"]
|