Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -13,7 +13,7 @@ summarization_model_name = 'sarahai/ruT5-base-summarizer'
|
|
13 |
summarization_model = T5ForConditionalGeneration.from_pretrained(summarization_model_name)
|
14 |
summarization_tokenizer = T5Tokenizer.from_pretrained(summarization_model_name)
|
15 |
|
16 |
-
def translate(text, translation_model, translation_tokenizer, src_lang='uzb_Cyrl', tgt_lang='rus_Cyrl', a=16, b=1.5, max_input_length=
|
17 |
translation_tokenizer.src_lang = src_lang
|
18 |
translation_tokenizer.tgt_lang = tgt_lang
|
19 |
inputs = translation_tokenizer(text, return_tensors='pt', padding=True, truncation=True, max_length=max_input_length)
|
@@ -25,8 +25,8 @@ def translate(text, translation_model, translation_tokenizer, src_lang='uzb_Cyrl
|
|
25 |
translated_text = translation_tokenizer.decode(outputs[0], skip_special_tokens=True)
|
26 |
return translated_text
|
27 |
|
28 |
-
def summarize(translated_text, summarization_model, summarization_tokenizer, max_length=250
|
29 |
-
input_ids = summarization_tokenizer.encode("summarize: " + translated_text, return_tensors="pt", max_length=
|
30 |
summary_ids = summarization_model.generate(
|
31 |
input_ids,
|
32 |
max_length=max_length,
|
@@ -49,7 +49,7 @@ if st.button("Перевести и суммаризировать"):
|
|
49 |
st.text_area("Переведенный текст (на русском):", value=translated_text, height=200)
|
50 |
|
51 |
with st.spinner('Суммаризируем...'):
|
52 |
-
summary_text = summarize(translated_text, summarization_model, summarization_tokenizer, max_length=250
|
53 |
st.text_area("Суммаризация (на русском):", value=summary_text, height=100)
|
54 |
else:
|
55 |
st.warning("Пожалуйста, введите текст на узбекском языке для перевода.")
|
|
|
13 |
summarization_model = T5ForConditionalGeneration.from_pretrained(summarization_model_name)
|
14 |
summarization_tokenizer = T5Tokenizer.from_pretrained(summarization_model_name)
|
15 |
|
16 |
+
def translate(text, translation_model, translation_tokenizer, src_lang='uzb_Cyrl', tgt_lang='rus_Cyrl', a=16, b=1.5, max_input_length=2048):
|
17 |
translation_tokenizer.src_lang = src_lang
|
18 |
translation_tokenizer.tgt_lang = tgt_lang
|
19 |
inputs = translation_tokenizer(text, return_tensors='pt', padding=True, truncation=True, max_length=max_input_length)
|
|
|
25 |
translated_text = translation_tokenizer.decode(outputs[0], skip_special_tokens=True)
|
26 |
return translated_text
|
27 |
|
28 |
+
def summarize(translated_text, summarization_model, summarization_tokenizer, max_length=250):
|
29 |
+
input_ids = summarization_tokenizer.encode("summarize: " + translated_text, return_tensors="pt", max_length=2048, truncation=True)
|
30 |
summary_ids = summarization_model.generate(
|
31 |
input_ids,
|
32 |
max_length=max_length,
|
|
|
49 |
st.text_area("Переведенный текст (на русском):", value=translated_text, height=200)
|
50 |
|
51 |
with st.spinner('Суммаризируем...'):
|
52 |
+
summary_text = summarize(translated_text, summarization_model, summarization_tokenizer, max_length=250)
|
53 |
st.text_area("Суммаризация (на русском):", value=summary_text, height=100)
|
54 |
else:
|
55 |
st.warning("Пожалуйста, введите текст на узбекском языке для перевода.")
|