import subprocess
import spacy
import pyinflect
import pandas as pd
from typing import Tuple, Union, Any
class Parser:
def __init__(
self
) -> None:
self.parser = self.__init_parser("en_core_web_sm")
self.__overrides = self.__load_overrides("data/overrides.csv")
def __load_overrides(
self,
file_path: str
) -> Any:
dic = pd.read_csv(file_path).set_index('VB').to_dict()
fun = lambda verb, tag: dic[tag].get(verb, False)
return fun
def __init_parser(
self,
model: str
) -> spacy.language:
parser = None
try:
parser = spacy.load(model)
except:
print(f"* Downloading {model} model...")
_ = subprocess.Popen(
f"python -m spacy download {model}",
stdout=subprocess.PIPE,
shell=True
).communicate()
parser = spacy.load(model)
return parser
def __format_error(
self,
error: str
) -> str:
template = """
{}
"""
return template.format(error)
def __format_output(
self,
infinitive: str,
past: str,
participle: str,
) -> str:
template = """
|Infinitive| Simple Past | Past Participle |
| :----: | :----: | :----: |
|{} | {}| {}|
"""
return template.format(infinitive, past, participle)
def is_in_overrides(
self,
verb: str,
tense: str
) -> Tuple[str, bool]:
res = self.__overrides(verb, tense)
if isinstance(res, bool):
return res, ""
return True, res
def __get_inflections(
self,
infinitive: spacy.tokens.token.Token,
tense: str
) -> Union[str, None]:
# Check if verb is in overrides file
res, inflections = self.is_in_overrides(infinitive.text, tense)
if not res:
form1 = infinitive._.inflect(tense, form_num=0)
form2 = infinitive._.inflect(tense, form_num=1)
inflections = list(set([form1, form2]))
if len(inflections) == 1 and inflections[0] == None:
return None
inflections = '/'.join(inflections)
return inflections
def __getAllTenses(
self,
tk_verb: spacy.tokens.token.Token
) -> Tuple[str,str,str,str]:
infinitive = tk_verb._.inflect('VB')
past = self.__get_inflections(tk_verb, 'VBD')
participle = self.__get_inflections(tk_verb, 'VBN')
return infinitive, past, participle
def __tokenizer(
self,
verb: str
) -> spacy.tokens.token.Token:
return self.parser(verb)[0]
def get(
self,
verb: str
) -> Tuple[str,str]:
verb = verb.strip().lower()
error, output = "", ""
if verb == "":
error = self.__format_error(
f"Error: The Verb field can not be empty!"
)
return error, output
tk_verb = self.__tokenizer(verb)
infinitive, past, participle = self.__getAllTenses(tk_verb)
if infinitive is None or past is None or participle is None:
error = self.__format_error(
f"Error: The verb '{verb}' has not been found or not spelled correctly!"
)
return error, output
print(f"{verb} -> {infinitive},{past},{participle}")
output = self.__format_output(infinitive, past, participle)
return error, output