nanom's picture
Added tense correction file for errors coming from pyinflect library database
80c77a7
raw
history blame
3.78 kB
import subprocess
import spacy
import pyinflect
import pandas as pd
from typing import Tuple, Union, Any
class Parser:
def __init__(
self
) -> None:
self.parser = self.__init_parser("en_core_web_sm")
self.__overrides = self.__load_overrides("data/overrides.csv")
def __load_overrides(
self,
file_path: str
) -> Any:
dic = pd.read_csv(file_path).set_index('VB').to_dict()
fun = lambda verb, tag: dic[tag].get(verb, False)
return fun
def __init_parser(
self,
model: str
) -> spacy.language:
parser = None
try:
parser = spacy.load(model)
except:
print(f"* Downloading {model} model...")
_ = subprocess.Popen(
f"python -m spacy download {model}",
stdout=subprocess.PIPE,
shell=True
).communicate()
parser = spacy.load(model)
return parser
def __format_error(
self,
error: str
) -> str:
template = """
<center>
<div class="alert alert-warning" role="alert">
<h6><b>{}</b></h6>
</div>
</center>
"""
return template.format(error)
def __format_output(
self,
infinitive: str,
past: str,
participle: str,
) -> str:
template = """
|Infinitive| Simple Past | Past Participle |
| :----: | :----: | :----: |
|{} | {}| {}|
"""
return template.format(infinitive, past, participle)
def is_in_overrides(
self,
verb: str,
tense: str
) -> Tuple[str, bool]:
res = self.__overrides(verb, tense)
if isinstance(res, bool):
return res, ""
return True, res
def __get_inflections(
self,
infinitive: spacy.tokens.token.Token,
tense: str
) -> Union[str, None]:
# Check if verb is in overrides file
res, inflections = self.is_in_overrides(infinitive.text, tense)
if not res:
form1 = infinitive._.inflect(tense, form_num=0)
form2 = infinitive._.inflect(tense, form_num=1)
inflections = list(set([form1, form2]))
if len(inflections) == 1 and inflections[0] == None:
return None
inflections = '/'.join(inflections)
return inflections
def __getAllTenses(
self,
tk_verb: spacy.tokens.token.Token
) -> Tuple[str,str,str,str]:
infinitive = tk_verb._.inflect('VB')
past = self.__get_inflections(tk_verb, 'VBD')
participle = self.__get_inflections(tk_verb, 'VBN')
return infinitive, past, participle
def __tokenizer(
self,
verb: str
) -> spacy.tokens.token.Token:
return self.parser(verb)[0]
def get(
self,
verb: str
) -> Tuple[str,str]:
verb = verb.strip().lower()
error, output = "", ""
if verb == "":
error = self.__format_error(
f"Error: The Verb field can not be empty!"
)
return error, output
tk_verb = self.__tokenizer(verb)
infinitive, past, participle = self.__getAllTenses(tk_verb)
if infinitive is None or past is None or participle is None:
error = self.__format_error(
f"Error: The verb '<b>{verb}</b>' has not been found or not spelled correctly!"
)
return error, output
output = self.__format_output(infinitive, past, participle)
return error, output