import enum import subprocess import spacy import pyinflect from difflib import ndiff from typing import List, Union, Tuple, Dict # BES auxiliary “be” Let it **be**. # HVS forms of “have” I**’ve** seen the Queen # MD verb, modal auxiliary VerbType=mod This **could** work. # VB verb, base form VerbForm=inf I want to **go**. # VBD verb, past tense VerbForm=fin Tense=past This **was** a sentence. # VBG verb, gerund or present participle VerbForm=part Tense=pres Aspect=prog I am **going**. # VBN verb, past participle VerbForm=part Tense=past Aspect=perf The treasure was **lost**. # VBP verb, non-3rd person singular present VerbForm=fin Tense=pres I **want** to go. # VBZ verb, 3rd person singular present VerbForm=fin Tense=pres Number=sing Person=3 He **wants** to go. class APVoice: class Tense(enum.Enum): simple_present = { 'aux':[None,'VBZ'], 'main':['VBZ','VBP', 'VB'], 'tobe':{'NN':'is{}','NNS':'are{}'} } simple_past = { 'aux':[None, 'VBD'], 'main':['VBD', 'VB'], 'tobe':{'NN':'was{}','NNS':'were{}'} } future_simple = { 'aux':['MD'], 'main':['VB'], 'tobe':{'NN':'will{} be','NNS':'will{} be'} } present_cont = { 'aux':['VBP','VBZ'], 'main':['VBG'], 'tobe':{'NN':'is{} being','NNS':'are{} being'} } past_cont = { 'aux':['VBD'], 'main':['VBG'], 'tobe':{'NN':'was{} being','NNS':'were{} being'} } present_perfect = { 'aux':['VBP','VBZ'], 'main':['VBN'], 'tobe':{'NN':'has{} been','NNS':'have{} been'} } def __init__( self ) -> None: self.parser = None self.__init_parser(model="en_core_web_sm") def __init_parser( self, model: str ) -> None: self.parser = None try: self.parser = spacy.load(model) except: print(f"* Downloading {model} model...") _ = subprocess.Popen( f"python -m spacy download {model}", stdout=subprocess.PIPE, shell=True).communicate() self.parser = spacy.load(model) def verb2participle( self, verb: str ) -> str: tk = self.parser(verb)[0] return tk._.inflect('VBN') def subjp2objp( self, pronoun: str ) -> str: """ Convert Subject pronouns to Object pronouns. """ mapping = {"i":"me","you":"you","we":"us","they":"them","he":"him","she":"her", "it":"it"} return mapping.get(pronoun.lower(), None) def get_gramatical_number( self, dobj_data: List[List[Tuple[str,str,str]]] ) -> Union[str, None]: result = [tag for _,dep,tag in dobj_data if dep == 'dobj'] if len(result) == 0: result = None else: result = result[0].replace('NNP', 'NN') return result def get_verbal_tense( self, verb_data: List[List[Tuple[str,str,str,int]]] ) -> Union[str, None]: aux, neg, root = verb_data root = root[0][2] if len(root) > 0 else None aux = aux[0][2] if len(aux) > 0 else None tense_name = None for tense in self.Tense: if aux in tense.value['aux'] and root in tense.value['main']: tense_name = tense.name break return tense_name def get_subj( self, sentence: str, ) -> Tuple[ List[Tuple[str,str,str]], str]: out_data = [] for tk in self.parser(sentence): if "subj" in tk.dep_: out_data = [(t,t.dep_,t.tag_) for t in tk.subtree] break out_str = ' '.join([t.text for t,_,_ in out_data]) return out_data, out_str def get_verb( self, sentence: str, ) -> Tuple[ List[List[Tuple[str,str,str,int]]], str]: main_data = [] aux_data = [] neg_data = [] out_data = [] for tk in self.parser(sentence): if "ROOT" in tk.dep_: main_data = [ (tk,tk.dep_,tk.tag_,tk.i)] aux_data = [(t,t.dep_,t.tag_,t.i) for t in tk.children if t.dep_ == "aux"] neg_data = [(t,t.dep_,t.tag_,t.i) for t in tk.children if t.dep_ == "neg"] out_data = [aux_data, neg_data, main_data] break out_str = sorted([tup for list_ in out_data for tup in list_], key=lambda x: x[3]) out_str = ' '.join([t.text for t,_,_,_ in out_str]) return out_data, out_str def get_dobj( self, sentence: str, ) -> Tuple[ List[Tuple[str,str,str]], str]: out_data = [] for tk in self.parser(sentence): if "dobj" in tk.dep_: out_data = [(t,t.dep_,t.tag_)for t in tk.subtree] break out_str = ' '.join([t.text for t,_,_ in out_data]) return out_data, out_str def get_complement( self, subj: str, verb: str, dobj: str, full_sentence: str, ) -> str: concat_sentence = subj + ' ' + verb + ' ' + dobj diff = "" for tk in ndiff(concat_sentence.split(), full_sentence.split()): mark, word = tk[0], tk[2:] if mark == '+': diff += word + " " return diff.strip() def active2passive( self, active_sentence: str, debug: bool=False ) -> Dict[str, str]: active_sentence = active_sentence.strip() if active_sentence == "": raise RuntimeError( f"Error: The sentence does not be empty!" ) subj_data, subj_str = self.get_subj(active_sentence) if debug: print(subj_data) if subj_str == "": raise RuntimeError( f"Error: The sentence's subject has not been found or the sentence does not be the correct format!" ) verb_data, verb_str = self.get_verb(active_sentence) if debug: print(verb_data) if verb_str == "": raise RuntimeError( f"Error: The sentence's verb has not been found or the sentence does not be the correct format!" ) dobj_data, dobj_str = self.get_dobj(active_sentence) if debug: print(dobj_data) if dobj_str == "": raise RuntimeError( f"Error: The sentence's direct object has not been found or the sentence does not be the correct format!" ) complement = self.get_complement(subj_str, verb_str, dobj_str, active_sentence) # Get pasive subject p_subj = dobj_str # Get tense + participle verb verbal_tense = self.get_verbal_tense(verb_data) if debug: print(verbal_tense) if verbal_tense is None: raise RuntimeError( f"Error: The sentence does not be the correct format or the verbal tense has not been implemented yet!" ) _, neg_data, main_data = verb_data neg = " not" if len(neg_data) > 0 else "" gramatical_number = self.get_gramatical_number(dobj_data) if debug: print(gramatical_number) p_tobe = self.Tense[verbal_tense].value['tobe'][gramatical_number].format(neg) p_verb = self.verb2participle(main_data[0][0].text) # Convert active_object to pasive_agent p_agent = "by " for tk,_,tag in subj_data: word = tk.text if tag == 'PRP': word = self.subjp2objp(word) p_agent += word + " " return { 'subject': p_subj.capitalize(), 'tobe':p_tobe, 'participle': p_verb, 'agent': p_agent[0].lower() + p_agent[1:].strip(), 'complement':complement }