Spaces:
Runtime error
Runtime error
import gradio as gr | |
import re | |
from transformers import MBartForConditionalGeneration, MBart50TokenizerFast, pipeline | |
model = MBartForConditionalGeneration.from_pretrained("facebook/mbart-large-50-one-to-many-mmt") | |
tokenizer = MBart50TokenizerFast.from_pretrained("facebook/mbart-large-50-one-to-many-mmt", src_lang="en_XX") | |
pipe2 = pipeline('summarization', model="Tiju1996/conversation-summ") | |
def process_text(text): | |
# Remove all reference citations | |
text = re.sub(r'\[[0-9]*\]', '', text) | |
# Remove all footnotes | |
text = re.sub(r'\[\d*\]', '', text) | |
# Remove all images | |
text = re.sub(r'(\[[^\]]*\])?\[[^\]]*\]', '', text) | |
# Remove all non-string characters | |
text = re.sub(r'[^\x00-\x7F]+', '', text) | |
# Remove all emojis | |
emoji_pattern = re.compile("[" | |
u"\U0001F600-\U0001F64F" # emoticons | |
u"\U0001F300-\U0001F5FF" # symbols & pictographs | |
u"\U0001F680-\U0001F6FF" # transport & map symbols | |
u"\U0001F1E0-\U0001F1FF" # flags (iOS) | |
u"\U00002702-\U000027B0" | |
u"\U000024C2-\U0001F251" | |
"]+", flags=re.UNICODE) | |
text = emoji_pattern.sub(r'', text) | |
# Remove all HTML tags | |
text = re.sub(r'<.*?>', '', text) | |
#Remove all hyperlinks from the text | |
text=re.sub(r'\[([^\]]+)\]\(([^)]+)\)', r'\1', text) | |
#Remove all url from the text | |
text=re.sub(r'http\S+', '', text) | |
# Strip whitespace | |
text = text.strip(" ") | |
return text | |
def summarize(article_en_raw): | |
article_en=process_text(article_en_raw) | |
summary_en=pipe2(article_en) | |
model_inputs = tokenizer(summary_en[0]['summary_text'], return_tensors="pt") | |
generated_tokens = model.generate( | |
**model_inputs, | |
forced_bos_token_id=tokenizer.lang_code_to_id["hi_IN"] | |
) | |
translation = tokenizer.batch_decode(generated_tokens, skip_special_tokens=True) | |
return translation[0] | |
input_text = gr.inputs.Textbox(lines=20, label="Enter text document to be summarized") | |
output_text = gr.outputs.Textbox(label="Summarized Text") | |
#gr.Interface(fn=summarize, inputs=input_text, outputs=output_text, title="Text Summarization App", description="Enter a text document and get its summarized version.").launch() | |
gradio_interface = gr.Interface(fn=summarize, inputs=input_text, outputs=output_text, | |
title="DistilBART Text Summarization App", | |
description="Enter a text document and get its summarized version.") | |
if __name__ == "__main__": | |
gradio_interface.launch() | |