Spaces:

nanom
/

to_passive_voice

Running

File size: 8,110 Bytes

import enum
import subprocess
import spacy
import pyinflect
from difflib import ndiff
from typing import List, Union, Tuple, Dict

# BES	auxiliary “be”		Let it **be**.
# HVS	forms of “have”		I**’ve** seen the Queen
# MD	verb, modal auxiliary	VerbType=mod	This **could** work.
# VB	verb, base form	VerbForm=inf	I want to **go**.
# VBD	verb, past tense	VerbForm=fin Tense=past	This **was** a sentence.
# VBG	verb, gerund or present participle	VerbForm=part Tense=pres Aspect=prog	I am **going**.
# VBN	verb, past participle	VerbForm=part Tense=past Aspect=perf	The treasure was **lost**.
# VBP	verb, non-3rd person singular present	VerbForm=fin Tense=pres	I **want** to go.
# VBZ	verb, 3rd person singular present	VerbForm=fin Tense=pres Number=sing Person=3	He **wants** to go.

class APVoice:
    class Tense(enum.Enum):
        simple_present  = {
            'aux':[None,'VBZ'], 
            'main':['VBZ','VBP', 'VB'], 
            'tobe':{'NN':'is{}','NNS':'are{}'}
        }
        simple_past     = {
            'aux':[None, 'VBD'], 
            'main':['VBD', 'VB'], 
            'tobe':{'NN':'was{}','NNS':'were{}'}
        }
        future_simple   = {
            'aux':['MD'], 
            'main':['VB'], 
            'tobe':{'NN':'will{} be','NNS':'will{} be'}
        }
        present_cont    = {
            'aux':['VBP','VBZ'],
            'main':['VBG'], 
            'tobe':{'NN':'is{} being','NNS':'are{} being'}
        }
        past_cont       = {
            'aux':['VBD'],
            'main':['VBG'], 
            'tobe':{'NN':'was{} being','NNS':'were{} being'}
        }
        present_perfect = {
            'aux':['VBP','VBZ'],
            'main':['VBN'], 
            'tobe':{'NN':'has{} been','NNS':'have{} been'}
        }

    def __init__(
        self
    ) -> None:

        self.parser = None
        self.__init_parser(model="en_core_web_sm")

    def __init_parser(
        self,
        model: str
    ) -> None:

        self.parser = None
        try:
            self.parser = spacy.load(model)
        except:
            print(f"* Downloading {model} model...")
            _ = subprocess.Popen(
                f"python -m spacy download {model}", 
                stdout=subprocess.PIPE, 
                shell=True).communicate()

            self.parser = spacy.load(model)

    def verb2participle(
        self,
        verb: str
    ) -> str:

        tk = self.parser(verb)[0]
        return tk._.inflect('VBN')
    
    def subjp2objp(
        self,
        pronoun: str
    ) -> str:
        """
        Convert Subject pronouns to Object pronouns.
        """
        mapping = {"i":"me","you":"you","we":"us","they":"them","he":"him","she":"her", "it":"it"}
        return mapping.get(pronoun.lower(), None)

    def get_gramatical_number(
        self,
        dobj_data: List[List[Tuple[str,str,str]]]
    ) -> Union[str, None]:

        result = [tag for _,dep,tag in dobj_data if dep == 'dobj']
        if len(result) == 0:
            result = None
        else:
            result = result[0].replace('NNP', 'NN')

        return result
    
    def get_verbal_tense(
        self,
        verb_data: List[List[Tuple[str,str,str,int]]]
    ) -> Union[str, None]:
        
        aux, neg, root = verb_data

        root = root[0][2] if len(root) > 0 else None
        aux = aux[0][2] if len(aux) > 0 else None

        tense_name = None
        for tense in self.Tense:
            if aux in tense.value['aux'] and root in tense.value['main']:
                tense_name = tense.name
                break
        
        return tense_name

    def get_subj(
        self,
        sentence: str,
    ) -> Tuple[ List[Tuple[str,str,str]], str]:

        out_data = []
        for tk in self.parser(sentence):
            if "subj" in tk.dep_:
                out_data = [(t,t.dep_,t.tag_) for t in tk.subtree]
                break
        
        out_str = ' '.join([t.text for t,_,_ in out_data])
        return out_data, out_str
    
    def get_verb(
        self,
        sentence: str,
    ) -> Tuple[ List[List[Tuple[str,str,str,int]]], str]:

        main_data = []
        aux_data = []
        neg_data = []
        out_data = []

        for tk in self.parser(sentence):
            if "ROOT" in tk.dep_:
                main_data = [ (tk,tk.dep_,tk.tag_,tk.i)]
                aux_data = [(t,t.dep_,t.tag_,t.i) for t in tk.children if t.dep_ == "aux"]
                neg_data = [(t,t.dep_,t.tag_,t.i) for t in tk.children if t.dep_ == "neg"]
                out_data = [aux_data, neg_data, main_data]
                break

        out_str = sorted([tup for list_ in out_data for tup in list_], key=lambda x: x[3])
        out_str = ' '.join([t.text for t,_,_,_ in out_str])
        return out_data, out_str

    def get_dobj(
        self,
        sentence: str,
    ) -> Tuple[ List[Tuple[str,str,str]], str]:

        out_data = []
        for tk in self.parser(sentence):
            if "dobj" in tk.dep_:
                out_data = [(t,t.dep_,t.tag_)for t in tk.subtree]
                break

        out_str = ' '.join([t.text for t,_,_ in out_data])
        return out_data, out_str

    def get_complement(
        self,
        subj: str,
        verb: str,
        dobj: str,
        full_sentence: str,
    ) -> str:
        
        concat_sentence = subj + ' ' + verb + ' ' + dobj
        diff = ""  
        for tk in ndiff(concat_sentence.split(), full_sentence.split()):
            mark, word = tk[0], tk[2:]
            if mark == '+':
                diff += word + " "
        
        return diff.strip()

    def active2passive(
        self, 
        active_sentence: str, 
        debug: bool=False
    ) -> Dict[str, str]:

        active_sentence = active_sentence.strip()
        if active_sentence == "":
            raise RuntimeError(
                    f"Error: The sentence does not be empty!"
                )

        subj_data, subj_str = self.get_subj(active_sentence)
        if debug: print(subj_data)
        if subj_str == "":
            raise RuntimeError(
                f"Error: The sentence's subject has not been found or the sentence does not be the correct format!"
            )

        verb_data, verb_str = self.get_verb(active_sentence)
        if debug: print(verb_data)
        if verb_str == "":
            raise RuntimeError(
                f"Error: The sentence's verb has not been found or the sentence does not be the correct format!"
            )
        
        dobj_data, dobj_str = self.get_dobj(active_sentence)
        if debug: print(dobj_data)
        if dobj_str == "":
            raise RuntimeError(
                f"Error: The sentence's direct object has not been found or the sentence does not be the correct format!"
            )

        complement = self.get_complement(subj_str, verb_str, dobj_str, active_sentence)

        # Get pasive subject
        p_subj = dobj_str

        # Get tense + participle verb
        verbal_tense = self.get_verbal_tense(verb_data)
        if debug: print(verbal_tense)
        if verbal_tense is None:
            raise RuntimeError(
                f"Error: The sentence does not be the correct format or the verbal tense has not been implemented yet!"
            ) 

        _, neg_data, main_data = verb_data
        neg = " not" if len(neg_data) > 0 else ""
        gramatical_number = self.get_gramatical_number(dobj_data)
        if debug: print(gramatical_number)
        p_tobe = self.Tense[verbal_tense].value['tobe'][gramatical_number].format(neg)
        p_verb = self.verb2participle(main_data[0][0].text)

        # Convert active_object to pasive_agent
        p_agent = "by "
        for tk,_,tag in subj_data:
            word = tk.text
            if tag == 'PRP':
                word = self.subjp2objp(word)
            p_agent += word + " "

        return {
            'subject': p_subj.capitalize(),
            'tobe':p_tobe, 
            'participle': p_verb, 
            'agent': p_agent[0].lower() + p_agent[1:].strip(),
            'complement':complement
        }