File size: 1,421 Bytes
6a1bcc1 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 |
from .text_utils import split_words
from .stream import StreamInstanceOperator, InstanceOperatorWithGlobalAccess, Artifact
from datasets import Value, Features, Dataset, Sequence
from dataclasses import field
from typing import Dict, Any
from abc import ABC, abstractmethod
class Validator(ABC):
pass
class ValidateSchema(Validator, StreamInstanceOperator):
schema: Features = None
def verify(self):
assert isinstance(self.schema, Features), "Schema must be an instance of Features"
assert self.schema is not None, "Schema must be specified"
def verify_first_instance(self, instance):
for field in self.standart_fields:
assert field in instance, f'Field "{field}" is missing in the first instance'
def process(self, instance: Dict[str, Any], stream_name: str = None) -> Dict[str, Any]:
return instance
class StandardSchema(Features):
def __init__(self):
super().__init__(
{
"source": Value("string"),
"target": Value("string"),
"references": Sequence(Value("string")),
"metrics": Sequence(Value("string")),
"parser": Value("string"),
# 'group': Value('string'),
# 'guidance': Value('string'),
}
)
class ValidateStandartSchema:
schema: Features = field(default_factory=StandardSchema)
|