Spaces:
Runtime error
Runtime error
| """Pinecone reader.""" | |
| from typing import Any, Dict, List, Optional | |
| from gpt_index.readers.base import BaseReader | |
| from gpt_index.readers.schema.base import Document | |
| class PineconeReader(BaseReader): | |
| """Pinecone reader. | |
| Args: | |
| api_key (str): Pinecone API key. | |
| environment (str): Pinecone environment. | |
| """ | |
| def __init__(self, api_key: str, environment: str): | |
| """Initialize with parameters.""" | |
| try: | |
| import pinecone # noqa: F401 | |
| except ImportError: | |
| raise ImportError( | |
| "`pinecone` package not found, please run `pip install pinecone-client`" | |
| ) | |
| self._api_key = api_key | |
| self._environment = environment | |
| pinecone.init(api_key=api_key, environment=environment) | |
| def load_data( | |
| self, | |
| index_name: str, | |
| id_to_text_map: Dict[str, str], | |
| vector: Optional[List[float]], | |
| top_k: int, | |
| separate_documents: bool = True, | |
| include_values: bool = True, | |
| **query_kwargs: Any | |
| ) -> List[Document]: | |
| """Load data from Pinecone. | |
| Args: | |
| index_name (str): Name of the index. | |
| id_to_text_map (Dict[str, str]): A map from ID's to text. | |
| separate_documents (Optional[bool]): Whether to return separate | |
| documents per retrieved entry. Defaults to True. | |
| vector (List[float]): Query vector. | |
| top_k (int): Number of results to return. | |
| include_values (bool): Whether to include the embedding in the response. | |
| Defaults to True. | |
| **query_kwargs: Keyword arguments to pass to the query. | |
| Arguments are the exact same as those found in | |
| Pinecone's reference documentation for the | |
| query method. | |
| Returns: | |
| List[Document]: A list of documents. | |
| """ | |
| import pinecone | |
| index = pinecone.Index(index_name) | |
| if "include_values" not in query_kwargs: | |
| query_kwargs["include_values"] = True | |
| response = index.query(top_k=top_k, vector=vector, **query_kwargs) | |
| documents = [] | |
| for match in response.matches: | |
| if match.id not in id_to_text_map: | |
| raise ValueError("ID not found in id_to_text_map.") | |
| text = id_to_text_map[match.id] | |
| embedding = match.values | |
| if len(embedding) == 0: | |
| embedding = None | |
| documents.append(Document(text=text, embedding=embedding)) | |
| if not separate_documents: | |
| text_list = [doc.get_text() for doc in documents] | |
| text = "\n\n".join(text_list) | |
| documents = [Document(text=text)] | |
| return documents | |