Spaces:
Running
Running
File size: 5,167 Bytes
e51f125 5bd749f e51f125 79e12fd 3c51821 c564c57 3c51821 79e12fd 0acdafb 79e12fd 0acdafb 79e12fd 0acdafb 79e12fd c564c57 79e12fd 3c51821 79e12fd ba8d0da |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 |
import os
os.system("pip uninstall -y gradio")
os.system("pip install gradio==2.7.5.2")
os.system("pip install typing-extensions --upgrade")
import logging, regex
import gradio
from email_parser import utils, nlp
from email_parser.doc_email import Email
def print_highlighted_text(text, df_result, offset=0):
iter_match = regex.finditer("\s|$", text)
start_pos = 0
list_values = []
for match in iter_match:
word = match.string[start_pos:match.start()]
logging.debug(f"word '{word}' was found between {start_pos} and {match.start()}")
df_entity = df_result.query(f"{start_pos + offset}>=start & {start_pos + offset}<=end").head(1)
logging.debug(f"Found entites are: {df_entity}")
if len(df_entity) == 1:
entity = df_entity["entity"].values[0]
else:
entity = None
list_values.append((word, entity))
# list_values.append((match.string[match.start():match.end()], None))
start_pos = match.end()
return list_values
def display_email(text, part=1):
doc = Email(text)
list_emails = doc.list_emails
if part <= len(list_emails):
text = list_emails[int(part-1)]["body"]
header = list_emails[int(part-1)]["header"]
lang = nlp.f_detect_language(text)
if len(header)>0:
df_results_header = nlp.f_ner(header, lang=lang)
df_results_header = Email.f_find_person_in_header(header, df_result=df_results_header)
list_words_headers = print_highlighted_text(header, df_results_header)
else:
list_words_headers = None
df_result = nlp.f_ner(text, lang=lang)
logging.debug(f"NER results for text '{text}' are: {df_result}")
df_signature = nlp.f_detect_email_signature(text, df_ner=df_result)
if df_signature is not None and len(df_signature) > 0:
start_signature_position = df_signature["start"].values[0]
text_body = text[:start_signature_position]
text_signature = text[start_signature_position:]
list_words_signature = print_highlighted_text(text_signature, df_result, offset=start_signature_position)
else:
text_body = text
list_words_signature = None
list_words_body = print_highlighted_text(text_body, df_result)
return None, lang, list_words_headers, list_words_body, list_words_signature
else:
return f"Email number {int(part)} was requested but only {len(list_emails)} emails was found in this thread", \
None, None, None, None
utils.f_setup_logger(level_sysout=logging.INFO, level_file=logging.INFO, folder_path="logs")
iface = gradio.Interface(title="Parser of email",
description="Small application that can extract a specific email in a thread of email,"
" highlights the entities found in the text (person, organization, date,...)"
" and extract email signature if any.",
article="*The model used to detect signature is described in detail here: "
"<a href=\"https://medium.com/@jean-baptiste.polle/lstm-model-for-email-signature-detection-8e990384fefa\">"
"https://medium.com/@jean-baptiste.polle/lstm-model-for-email-signature-detection-8e990384fefa"
"</a>",
fn=display_email,
inputs=["textbox",
gradio.inputs.Number(default=1, label="Email number in thread")],
outputs=[
gradio.outputs.Textbox(type="str", label="Error"),
gradio.outputs.Textbox(type="str", label="Language"),
gradio.outputs.HighlightedText(label="Header"),
gradio.outputs.HighlightedText(label="Body"),
gradio.outputs.HighlightedText(label="Signature")],
examples=[["""Bonjour Vincent,
Merci de m’avoir rappelé hier.
Seriez vous disponible pour un rendez vous la semaine prochaine?
Merci,
Jean-Baptiste""", 1], ["""Hello Jack,
I hope you had nice holiday as well.
Please find attached the requested documents,
Best Regards,
George
Vice president of Something
email: [email protected]
tel: 512-222-5555
On Mon, Jan 7, 2022 at 12:39 PM, Jack <[email protected]> wrote:
Hello George,
I wish you a happy new year. I hope you had nice holidays.
Did you see Garry during your vacation?
Do you have the documents I requested earlier?
Thanks,
Jack
""", 1] , ["""Hello Jack,
I hope you had nice holiday as well.
Please find attached the requested documents,
Best Regards,
George
Vice president of Something
email: [email protected]
tel: 512-222-5555
On Mon, Jan 7, 2022 at 12:39 PM, Jack <[email protected]> wrote:
Hello George,
I wish you a happy new year. I hope you had nice holidays.
Did you see Garry during your vacation?
Do you have the documents I requested earlier?
Thanks,
Jack
""", 2] ])
iface.launch() |