File size: 4,393 Bytes
79e12fd
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ba8d0da
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
import logging, regex
import gradio
from email_parser import utils, nlp
from email_parser.doc_email import Email

def print_highlighted_text(text, df_result, offset=0):
    iter_match = regex.finditer("\s|$", text)
    start_pos = 0
    list_values = []
    for match in iter_match:
        word = match.string[start_pos:match.start()]

        df_entity = df_result.query(f"{start_pos + offset}>=start & {match.start() + offset}<=end").head(1)
        if len(df_entity) == 1:
            entity = df_entity["entity"].values[0]
        else:
            entity = None
        list_values.append((word, entity))
        # list_values.append((match.string[match.start():match.end()], None))
        start_pos = match.end()
    return list_values


def display_email(text, part=1):
    doc = Email(text)
    list_emails = doc.list_emails
    if part <= len(list_emails):
        text = list_emails[int(part-1)]["body"]
        header = list_emails[int(part-1)]["header"]
        lang = nlp.f_detect_language(text)

        if len(header)>0:
            df_results_header = nlp.f_ner(header, lang=lang)
            df_results_header = Email.f_find_person_in_header(header, df_result=df_results_header)
            list_words_headers = print_highlighted_text(header, df_results_header)
        else:
            list_words_headers = []

        df_result = nlp.f_ner(text, lang=lang)
        df_signature = nlp.f_detect_email_signature(text, df_ner=df_result)
        if df_signature is not None and len(df_signature) > 0:
            start_signature_position = df_signature["start"].values[0]
            text_body = text[:start_signature_position]
            text_signature = text[start_signature_position:]
            list_words_signature = print_highlighted_text(text_signature, df_result, offset=start_signature_position)
        else:
            text_body = text
            list_words_signature = []
        list_words_body = print_highlighted_text(text_body, df_result)

        return None, lang, list_words_headers, list_words_body, list_words_signature
    else:
        return f"Email number {int(part)} was requested but only {len(list_emails)} emails was found in this thread", \
               None, None, None, None


utils.f_setup_logger(level_sysout=logging.ERROR, level_file=logging.INFO, folder_path="logs")


iface = gradio.Interface(title="Parser of email",
                         description="Small application that can extract a specific email in a thread of email,"
                                     " highlights the entities found in the text (person, organization, date,...)"
                                     " and extract email signature if any.",
                         fn=display_email,
                         inputs=["textbox",
                             gradio.inputs.Number(default=1, label="Email number in thread")],
                         outputs=[
                              gradio.outputs.Textbox(type="str", label="Error"),
                              gradio.outputs.Textbox(type="str", label="Language"),
                              gradio.outputs.HighlightedText(label="Header"),
                              gradio.outputs.HighlightedText(label="Body"),
                              gradio.outputs.HighlightedText(label="Signature")],
                        examples=[["""Bonjour Vincent,
Merci de m’avoir rappelé hier.
Seriez vous disponible pour un rendez vous la semaine prochaine?
Merci,
Jean-Baptiste""", 1],  ["""Hello Jack,

I hope you had nice holiday as well.
Please find attached the requested documents,

Best Regards,
George
Vice president of Something
email: [email protected]
tel: 512-222-5555

On Mon, Jan 7, 2022 at 12:39 PM, Jack <[email protected]> wrote:

Hello George,

I wish you a happy new year. I hope you had nice holidays.
Did you see Garry during your vacation?
Do you have the documents I requested earlier?

Thanks,
Jack


""", 1] ,  ["""Hello Jack,

I hope you had nice holiday as well.
Please find attached the requested documents,

Best Regards,
George
Vice president of Something
email: [email protected]
tel: 512-222-5555

On Mon, Jan 7, 2022 at 12:39 PM, Jack <[email protected]> wrote:

Hello George,

I wish you a happy new year. I hope you had nice holidays.
Did you see Garry during your vacation?
Do you have the documents I requested earlier?

Thanks,
Jack


""", 2] ])


iface.launch()