Spaces:
Runtime error
Runtime error
import logging | |
from typing import List, Optional | |
import requests | |
from langchain_core.embeddings import Embeddings | |
from langchain_core.pydantic_v1 import BaseModel | |
logger = logging.getLogger(__name__) | |
class LlamafileEmbeddings(BaseModel, Embeddings): | |
"""Llamafile lets you distribute and run large language models with a | |
single file. | |
To get started, see: https://github.com/Mozilla-Ocho/llamafile | |
To use this class, you will need to first: | |
1. Download a llamafile. | |
2. Make the downloaded file executable: `chmod +x path/to/model.llamafile` | |
3. Start the llamafile in server mode with embeddings enabled: | |
`./path/to/model.llamafile --server --nobrowser --embedding` | |
Example: | |
.. code-block:: python | |
from langchain_community.embeddings import LlamafileEmbeddings | |
embedder = LlamafileEmbeddings() | |
doc_embeddings = embedder.embed_documents( | |
[ | |
"Alpha is the first letter of the Greek alphabet", | |
"Beta is the second letter of the Greek alphabet", | |
] | |
) | |
query_embedding = embedder.embed_query( | |
"What is the second letter of the Greek alphabet" | |
) | |
""" | |
base_url: str = "http://localhost:8080" | |
"""Base url where the llamafile server is listening.""" | |
request_timeout: Optional[int] = None | |
"""Timeout for server requests""" | |
def _embed(self, text: str) -> List[float]: | |
try: | |
response = requests.post( | |
url=f"{self.base_url}/embedding", | |
headers={ | |
"Content-Type": "application/json", | |
}, | |
json={ | |
"content": text, | |
}, | |
timeout=self.request_timeout, | |
) | |
except requests.exceptions.ConnectionError: | |
raise requests.exceptions.ConnectionError( | |
f"Could not connect to Llamafile server. Please make sure " | |
f"that a server is running at {self.base_url}." | |
) | |
# Raise exception if we got a bad (non-200) response status code | |
response.raise_for_status() | |
contents = response.json() | |
if "embedding" not in contents: | |
raise KeyError( | |
"Unexpected output from /embedding endpoint, output dict " | |
"missing 'embedding' key." | |
) | |
embedding = contents["embedding"] | |
# Sanity check the embedding vector: | |
# Prior to llamafile v0.6.2, if the server was not started with the | |
# `--embedding` option, the embedding endpoint would always return a | |
# 0-vector. See issue: | |
# https://github.com/Mozilla-Ocho/llamafile/issues/243 | |
# So here we raise an exception if the vector sums to exactly 0. | |
if sum(embedding) == 0.0: | |
raise ValueError( | |
"Embedding sums to 0, did you start the llamafile server with " | |
"the `--embedding` option enabled?" | |
) | |
return embedding | |
def embed_documents(self, texts: List[str]) -> List[List[float]]: | |
"""Embed documents using a llamafile server running at `self.base_url`. | |
llamafile server should be started in a separate process before invoking | |
this method. | |
Args: | |
texts: The list of texts to embed. | |
Returns: | |
List of embeddings, one for each text. | |
""" | |
doc_embeddings = [] | |
for text in texts: | |
doc_embeddings.append(self._embed(text)) | |
return doc_embeddings | |
def embed_query(self, text: str) -> List[float]: | |
"""Embed a query using a llamafile server running at `self.base_url`. | |
llamafile server should be started in a separate process before invoking | |
this method. | |
Args: | |
text: The text to embed. | |
Returns: | |
Embeddings for the text. | |
""" | |
return self._embed(text) | |