asFrants's picture
finalized auth
d0f5098
raw
history blame
9.2 kB
import re
import gradio as gr
from pydantic import BaseModel
from transformers import pipeline
from loguru import logger
# from pydantic import BaseModel
# RU_SUMMARY_MODEL = "IlyaGusev/rubart-large-sum"
# RU_SUMMARY_MODEL = "IlyaGusev/mbart_ru_sum_gazeta"
RU_SUMMARY_MODEL = "csebuetnlp/mT5_multilingual_XLSum"
# RU_SENTIMENT_MODEL = "IlyaGusev/rubart-large-sentiment"
RU_SENTIMENT_MODEL = "blanchefort/rubert-base-cased-sentiment"
EN_SUMMARY_MODEL = "csebuetnlp/mT5_multilingual_XLSum"
EN_SENTIMENT_MODEL = "distilbert-base-uncased-finetuned-sst-2-english"
DEFAULT_EN_TEXT = """Flags on official buildings are being flown at half-mast and a minute's silence will be observed at midday.
Fourteen people were shot dead at the Faculty of Arts building of Charles University in the capital by a student who then killed himself.
Police are working to uncover the motive behind the attack.
It is one of the deadliest assaults by a lone gunman in Europe this century.
Those killed in Thursday's attack included Lenka Hlavkova, head of the Institute of Musicology at the university.
Other victims were named as translator and Finnish literature expert Jan Dlask and student Lucie Spindlerova.
The shooting began at around 15:00 local time (14:00 GMT) at the Faculty of Arts building off Jan Palach Square in the centre of the Czech capital.
The gunman opened fire in the corridors and classrooms of the building, before shooting himself as security forces closed in on him, police say.
US tourist Hannah Mallicoat told the BBC that she and her family had been on Jan Palach Square during the attack.
"A crowd of people were crossing the street when the first shot hit. I thought it was something like a firecracker or a car backfire until I heard the second shot and people started running," she said.
"I saw a bullet hit the ground on the other side of the square about 30ft [9m] away before ducking into a store. The whole area was blocked off and dozens of police cars and ambulances were going towards the university."
In a statement, Czech Prime Minister Petr Fiala said the country had been shocked by this "horrendous act".
"It is hard to find the words to express condemnation on the one hand and, on the other, the pain and sorrow that our entire society is feeling in these days before Christmas."
The gunman is thought to have killed his father at a separate location. He is also suspected in the killing of a young man and his two-month-old daughter who were found dead in a forest on the outskirts of Prague on 15 December.
"""
DEFAULT_RU_TEXT = """В результате взрыва на заправке, который произошел накануне вечером,
пострадали 56 человек, 13 из них — дети, сообщил минздрав Дагестана.
Погибли 12 человек, в том числе двое несовершеннолетних. На место происшествия
приехала глава минздрава республики Татьяна Беляева, она держит под личным контролем
оказание помощи пострадавшим. В Махачкалу вылетел первый заместитель министра здравоохранения России Виктор Фисенко.
Врачам и пострадавшим помогают волонтеры Всероссийского студенческого корпуса спасателей
и сотрудники некоммерческой организации «Добровольцы Дагестана», сообщило министерство молодежи Дагестана.
Жители республики массово пришли сдавать кровь, заявил региональный минздрав.
«Просим отложить визит на станцию переливания на завтра. Запасы крови есть,
доноров для их пополнения на данный час тоже уже немало», — написало ведомство.
"""
class TextRequest(BaseModel):
text: str
class Result(BaseModel):
sentiment_score: float
sentiment_label: str
summary: str
def to_str(self):
return f"Summary: {self.summary}\nSentiment: {self.sentiment_label} ({self.sentiment_score:.3f})"
# class Response(BaseModel):
# results: List[Result] # list of Result objects
class Summarizer:
ru_summary_pipe: pipeline
ru_sentiment_pipe: pipeline
en_summary_pipe: pipeline
en_sentiment_pipe: pipeline
# sum_model_name = "csebuetnlp/mT5_multilingual_XLSum"
# sum_tokenizer = AutoTokenizer.from_pretrained(sum_model_name)
# sum_model = AutoModelForSeq2SeqLM.from_pretrained(sum_model_name)
def __init__(self) -> None:
sum_pipe = pipeline(
"summarization", model=RU_SUMMARY_MODEL, max_length=100, truncation=True
)
self.ru_summary_pipe = sum_pipe
self.ru_sentiment_pipe = pipeline(
"sentiment-analysis", model=RU_SENTIMENT_MODEL
)
self.en_summary_pipe = sum_pipe
self.en_sentiment_pipe = pipeline(
"sentiment-analysis", model=EN_SENTIMENT_MODEL
)
def mT5_summarize(self, text: str) -> str:
"""Handle text with mT5 model without pipeline"""
def whitespace_handler(text: str):
return re.sub("\s+", " ", re.sub("\n+", " ", text.strip()))
input_ids = self.sum_tokenizer(
[whitespace_handler(text)],
return_tensors="pt",
padding="max_length",
truncation=True,
max_length=512,
)["input_ids"]
output_ids = self.sum_model.generate(
input_ids=input_ids, max_length=84, no_repeat_ngram_size=2, num_beams=4
)[0]
summary = self.sum_tokenizer.decode(
output_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False
)
return summary
def get_pipe(self, lang: str):
logger.info(f"Pipe language: {lang}")
if lang == "en":
return self.en_summary_pipe, self.en_sentiment_pipe
if lang == "ru":
return self.ru_summary_pipe, self.ru_sentiment_pipe
raise ValueError(f"Language {lang} is not supported")
def summarize(self, req: TextRequest, lang: str = "en") -> Result:
sum_pipe, sent_pipe = self.get_pipe(lang)
response_summary = sum_pipe(req)
logger.info(response_summary)
response_sentiment = sent_pipe(req)
logger.info(response_sentiment)
result = Result(
summary=response_summary[0]["summary_text"],
sentiment_label=response_sentiment[0]["label"],
sentiment_score=response_sentiment[0]["score"],
)
return result
def get_summary(self, req: TextRequest, lang: str = "en") -> str:
return self.summarize(req, lang).to_str()
if __name__ == "__main__":
pipe = Summarizer()
with gr.Blocks() as demo:
with gr.Row():
with gr.Column(scale=2, min_width=600):
en_sum_description = gr.Markdown(
value=f"Model for Summary: {EN_SUMMARY_MODEL}"
)
en_sent_description = gr.Markdown(
value=f"Model for Sentiment: {EN_SENTIMENT_MODEL}"
)
en_inputs = gr.Textbox(
label="en_input",
lines=5,
value=DEFAULT_EN_TEXT,
placeholder=DEFAULT_EN_TEXT,
)
en_lang = gr.Textbox(value="en", visible=False)
en_outputs = gr.Textbox(
label="en_output",
lines=5,
placeholder="Summary and Sentiment would be here...",
)
en_inbtn = gr.Button("Proceed")
with gr.Column(scale=2, min_width=600):
ru_sum_description = gr.Markdown(
value=f"Model for Summary: {RU_SUMMARY_MODEL}"
)
ru_sent_description = gr.Markdown(
value=f"Model for Sentiment: {RU_SENTIMENT_MODEL}"
)
ru_inputs = gr.Textbox(
label="ru_input",
lines=5,
value=DEFAULT_RU_TEXT,
placeholder=DEFAULT_RU_TEXT,
)
ru_lang = gr.Textbox(value="ru", visible=False)
ru_outputs = gr.Textbox(
label="ru_output",
lines=5,
placeholder="Здесь будет обобщение и эмоциональный окрас текста...",
)
ru_inbtn = gr.Button("Запустить")
en_inbtn.click(
pipe.get_summary,
[en_inputs, en_lang],
[en_outputs],
)
ru_inbtn.click(
pipe.get_summary,
[ru_inputs, ru_lang],
[ru_outputs],
)
demo.launch(show_api=False)