File size: 1,656 Bytes
e8425dc 2746bef e8425dc 2746bef f2336e3 2636a15 f2336e3 2636a15 c60c34e 2746bef e8425dc 2746bef e8425dc 2746bef e8425dc 2636a15 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 |
import json
import re
from typing import Any
from .operator import BaseFieldOperator
class ToString(BaseFieldOperator):
def process(self, instance):
return str(instance)
class ToListByComma(BaseFieldOperator):
def process(self, instance):
output = [x.strip() for x in instance.split(",")]
return output
class RegexParser(BaseFieldOperator):
"""
A processor that uses regex in order to parse a string.
"""
regex: str
termination_regex: str = None
def process(self, text):
if self.termination_regex is not None and re.fullmatch(self.termination_regex, text):
return []
matches = re.findall(self.regex, text)
return matches
class LoadJson(BaseFieldOperator):
def process(self, text):
try:
return json.loads(text)
except json.JSONDecodeError:
return []
class ListToEmptyEntitiesTuples(BaseFieldOperator):
def process(self, lst):
try:
return [(str(item), "") for item in lst]
except json.JSONDecodeError:
return []
class DictOfListsToPairs(BaseFieldOperator):
position_key_before_value: bool = True
def process(self, obj):
try:
result = []
for key, values in obj.items():
for value in values:
assert isinstance(value, str)
pair = (key, value) if self.position_key_before_value else (value, key)
result.append(pair)
return result
except:
return []
# add_to_catalog(ToString('prediction'), 'processors', 'to_string')
|