Spaces:
Sleeping
Sleeping
import subprocess | |
import spacy | |
import pyinflect | |
import pandas as pd | |
from typing import Tuple, Union, Any | |
class Parser: | |
def __init__( | |
self | |
) -> None: | |
self.parser = self.__init_parser("en_core_web_sm") | |
self.__overrides = self.__load_overrides("data/overrides.csv") | |
def __load_overrides( | |
self, | |
file_path: str | |
) -> Any: | |
dic = pd.read_csv(file_path).set_index('VB').to_dict() | |
fun = lambda verb, tag: dic[tag].get(verb, False) | |
return fun | |
def __init_parser( | |
self, | |
model: str | |
) -> spacy.language: | |
parser = None | |
try: | |
parser = spacy.load(model) | |
except: | |
print(f"* Downloading {model} model...") | |
_ = subprocess.Popen( | |
f"python -m spacy download {model}", | |
stdout=subprocess.PIPE, | |
shell=True | |
).communicate() | |
parser = spacy.load(model) | |
return parser | |
def __format_error( | |
self, | |
error: str | |
) -> str: | |
template = """ | |
<center> | |
<div class="alert alert-warning" role="alert"> | |
<h6><b>{}</b></h6> | |
</div> | |
</center> | |
""" | |
return template.format(error) | |
def __format_output( | |
self, | |
infinitive: str, | |
past: str, | |
participle: str, | |
) -> str: | |
template = """ | |
|Infinitive| Simple Past | Past Participle | | |
| :----: | :----: | :----: | | |
|{} | {}| {}| | |
""" | |
return template.format(infinitive, past, participle) | |
def is_in_overrides( | |
self, | |
verb: str, | |
tense: str | |
) -> Tuple[str, bool]: | |
res = self.__overrides(verb, tense) | |
if isinstance(res, bool): | |
return res, "" | |
return True, res | |
def __get_inflections( | |
self, | |
infinitive: spacy.tokens.token.Token, | |
tense: str | |
) -> Union[str, None]: | |
# Check if verb is in overrides file | |
res, inflections = self.is_in_overrides(infinitive.text, tense) | |
if not res: | |
form1 = infinitive._.inflect(tense, form_num=0) | |
form2 = infinitive._.inflect(tense, form_num=1) | |
inflections = list(set([form1, form2])) | |
if len(inflections) == 1 and inflections[0] == None: | |
return None | |
inflections = '/'.join(inflections) | |
return inflections | |
def __getAllTenses( | |
self, | |
tk_verb: spacy.tokens.token.Token | |
) -> Tuple[str,str,str,str]: | |
infinitive = tk_verb._.inflect('VB') | |
past = self.__get_inflections(tk_verb, 'VBD') | |
participle = self.__get_inflections(tk_verb, 'VBN') | |
return infinitive, past, participle | |
def __tokenizer( | |
self, | |
verb: str | |
) -> spacy.tokens.token.Token: | |
return self.parser(verb)[0] | |
def get( | |
self, | |
verb: str | |
) -> Tuple[str,str]: | |
verb = verb.strip().lower() | |
error, output = "", "" | |
if verb == "": | |
error = self.__format_error( | |
f"Error: The Verb field can not be empty!" | |
) | |
return error, output | |
tk_verb = self.__tokenizer(verb) | |
infinitive, past, participle = self.__getAllTenses(tk_verb) | |
if infinitive is None or past is None or participle is None: | |
error = self.__format_error( | |
f"Error: The verb '<b>{verb}</b>' has not been found or not spelled correctly!" | |
) | |
return error, output | |
print(f"{verb} -> {infinitive},{past},{participle}") | |
output = self.__format_output(infinitive, past, participle) | |
return error, output |