File size: 4,009 Bytes
ed4d993
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
import logging
from typing import List, Optional

import requests
from langchain_core.embeddings import Embeddings
from langchain_core.pydantic_v1 import BaseModel

logger = logging.getLogger(__name__)


class LlamafileEmbeddings(BaseModel, Embeddings):
    """Llamafile lets you distribute and run large language models with a
    single file.

    To get started, see: https://github.com/Mozilla-Ocho/llamafile

    To use this class, you will need to first:

    1. Download a llamafile.
    2. Make the downloaded file executable: `chmod +x path/to/model.llamafile`
    3. Start the llamafile in server mode with embeddings enabled:

        `./path/to/model.llamafile --server --nobrowser --embedding`

    Example:
        .. code-block:: python

            from langchain_community.embeddings import LlamafileEmbeddings
            embedder = LlamafileEmbeddings()
            doc_embeddings = embedder.embed_documents(
                [
                    "Alpha is the first letter of the Greek alphabet",
                    "Beta is the second letter of the Greek alphabet",
                ]
            )
            query_embedding = embedder.embed_query(
                "What is the second letter of the Greek alphabet"
            )

    """

    base_url: str = "http://localhost:8080"
    """Base url where the llamafile server is listening."""

    request_timeout: Optional[int] = None
    """Timeout for server requests"""

    def _embed(self, text: str) -> List[float]:
        try:
            response = requests.post(
                url=f"{self.base_url}/embedding",
                headers={
                    "Content-Type": "application/json",
                },
                json={
                    "content": text,
                },
                timeout=self.request_timeout,
            )
        except requests.exceptions.ConnectionError:
            raise requests.exceptions.ConnectionError(
                f"Could not connect to Llamafile server. Please make sure "
                f"that a server is running at {self.base_url}."
            )

        # Raise exception if we got a bad (non-200) response status code
        response.raise_for_status()

        contents = response.json()
        if "embedding" not in contents:
            raise KeyError(
                "Unexpected output from /embedding endpoint, output dict "
                "missing 'embedding' key."
            )

        embedding = contents["embedding"]

        # Sanity check the embedding vector:
        # Prior to llamafile v0.6.2, if the server was not started with the
        # `--embedding` option, the embedding endpoint would always return a
        # 0-vector. See issue:
        # https://github.com/Mozilla-Ocho/llamafile/issues/243
        # So here we raise an exception if the vector sums to exactly 0.
        if sum(embedding) == 0.0:
            raise ValueError(
                "Embedding sums to 0, did you start the llamafile server with "
                "the `--embedding` option enabled?"
            )

        return embedding

    def embed_documents(self, texts: List[str]) -> List[List[float]]:
        """Embed documents using a llamafile server running at `self.base_url`.
        llamafile server should be started in a separate process before invoking
        this method.

        Args:
            texts: The list of texts to embed.

        Returns:
            List of embeddings, one for each text.
        """
        doc_embeddings = []
        for text in texts:
            doc_embeddings.append(self._embed(text))
        return doc_embeddings

    def embed_query(self, text: str) -> List[float]:
        """Embed a query using a llamafile server running at `self.base_url`.
        llamafile server should be started in a separate process before invoking
        this method.

        Args:
            text: The text to embed.

        Returns:
            Embeddings for the text.
        """
        return self._embed(text)