Spaces:
Sleeping
Sleeping
File size: 1,536 Bytes
66df4ad a5d0aa6 ab4c612 63efd0b 4df673e 1928de9 66df4ad 63efd0b 4df673e 66df4ad 4df673e 66df4ad 4df673e 66df4ad 4df673e 852ff2e 1928de9 4df673e 66df4ad |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 |
import gradio as gr
import subprocess
subprocess.check_call(["pip", "install", "transformers"])
subprocess.check_call(["pip", "install", "torch"])
subprocess.check_call(["pip", "install", "sentencepiece"])
from transformers import MBartForConditionalGeneration, MBart50TokenizerFast
from transformers import pipeline
summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
model = MBartForConditionalGeneration.from_pretrained("facebook/mbart-large-50-many-to-many-mmt")
tokenizer = MBart50TokenizerFast.from_pretrained("facebook/mbart-large-50-many-to-many-mmt")
def summariser(ar_en, lang):
summ = summarizer(ar_en, max_length=130, min_length=30, do_sample=False)[0]['summary_text']
tokenizer.src_lang = "en_XX"
encoded_ar = tokenizer(summ, return_tensors="pt")
if(lang=='Hindi'):
coi='hi_IN'
if(lang=='Gujrati'):
coi='gu_IN'
if(lang=='Bengali'):
coi='bn_IN'
if(lang=='Tamil'):
coi='ta_IN'
generated_tokens = model.generate(
**encoded_ar,
forced_bos_token_id=tokenizer.lang_code_to_id[coi]
)
output = tokenizer.batch_decode(generated_tokens, skip_special_tokens=True)[0]
return output
iface = gr.Interface(
fn=summariser,
inputs=[gr.Textbox(label="Enter the paragraph in English", placeholder="Type here..."), gr.Radio(["Hindi", "Gujrati", "Bengali", "Tamil"], label="Language to be summarised in:")],
outputs=gr.Textbox(label="Summarised Text"),
title="English to Indic Summariser"
)
iface.launch() |