|
"""Base schema for data structures.""" |
|
from abc import abstractmethod |
|
from dataclasses import dataclass |
|
from typing import Any, Dict, List, Optional |
|
|
|
from dataclasses_json import DataClassJsonMixin |
|
|
|
|
|
@dataclass |
|
class BaseDocument(DataClassJsonMixin): |
|
"""Base document. |
|
|
|
Generic abstract interfaces that captures both index structs |
|
as well as documents. |
|
|
|
""" |
|
|
|
|
|
text: Optional[str] = None |
|
doc_id: Optional[str] = None |
|
embedding: Optional[List[float]] = None |
|
|
|
|
|
extra_info: Optional[Dict[str, Any]] = None |
|
|
|
@classmethod |
|
@abstractmethod |
|
def get_type(cls) -> str: |
|
"""Get Document type.""" |
|
|
|
def get_text(self) -> str: |
|
"""Get text.""" |
|
if self.text is None: |
|
raise ValueError("text field not set.") |
|
return self.text |
|
|
|
def get_doc_id(self) -> str: |
|
"""Get doc_id.""" |
|
if self.doc_id is None: |
|
raise ValueError("doc_id not set.") |
|
return self.doc_id |
|
|
|
@property |
|
def is_doc_id_none(self) -> bool: |
|
"""Check if doc_id is None.""" |
|
return self.doc_id is None |
|
|
|
def get_embedding(self) -> List[float]: |
|
"""Get embedding. |
|
|
|
Errors if embedding is None. |
|
|
|
""" |
|
if self.embedding is None: |
|
raise ValueError("embedding not set.") |
|
return self.embedding |
|
|
|
@property |
|
def extra_info_str(self) -> Optional[str]: |
|
"""Extra info string.""" |
|
if self.extra_info is None: |
|
return None |
|
|
|
return "\n".join([f"{k}: {str(v)}" for k, v in self.extra_info.items()]) |
|
|