sarahai commited on
Commit
bf19ee0
·
verified ·
1 Parent(s): 60ce1fd

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +4 -4
app.py CHANGED
@@ -13,7 +13,7 @@ summarization_model_name = 'sarahai/ruT5-base-summarizer'
13
  summarization_model = T5ForConditionalGeneration.from_pretrained(summarization_model_name)
14
  summarization_tokenizer = T5Tokenizer.from_pretrained(summarization_model_name)
15
 
16
- def translate(text, translation_model, translation_tokenizer, src_lang='uzb_Cyrl', tgt_lang='rus_Cyrl', a=16, b=1.5, max_input_length=1024):
17
  translation_tokenizer.src_lang = src_lang
18
  translation_tokenizer.tgt_lang = tgt_lang
19
  inputs = translation_tokenizer(text, return_tensors='pt', padding=True, truncation=True, max_length=max_input_length)
@@ -25,8 +25,8 @@ def translate(text, translation_model, translation_tokenizer, src_lang='uzb_Cyrl
25
  translated_text = translation_tokenizer.decode(outputs[0], skip_special_tokens=True)
26
  return translated_text
27
 
28
- def summarize(translated_text, summarization_model, summarization_tokenizer, max_length=250, min_length=150):
29
- input_ids = summarization_tokenizer.encode("summarize: " + translated_text, return_tensors="pt", max_length=1024, truncation=True)
30
  summary_ids = summarization_model.generate(
31
  input_ids,
32
  max_length=max_length,
@@ -49,7 +49,7 @@ if st.button("Перевести и суммаризировать"):
49
  st.text_area("Переведенный текст (на русском):", value=translated_text, height=200)
50
 
51
  with st.spinner('Суммаризируем...'):
52
- summary_text = summarize(translated_text, summarization_model, summarization_tokenizer, max_length=250, min_length=150)
53
  st.text_area("Суммаризация (на русском):", value=summary_text, height=100)
54
  else:
55
  st.warning("Пожалуйста, введите текст на узбекском языке для перевода.")
 
13
  summarization_model = T5ForConditionalGeneration.from_pretrained(summarization_model_name)
14
  summarization_tokenizer = T5Tokenizer.from_pretrained(summarization_model_name)
15
 
16
+ def translate(text, translation_model, translation_tokenizer, src_lang='uzb_Cyrl', tgt_lang='rus_Cyrl', a=16, b=1.5, max_input_length=2048):
17
  translation_tokenizer.src_lang = src_lang
18
  translation_tokenizer.tgt_lang = tgt_lang
19
  inputs = translation_tokenizer(text, return_tensors='pt', padding=True, truncation=True, max_length=max_input_length)
 
25
  translated_text = translation_tokenizer.decode(outputs[0], skip_special_tokens=True)
26
  return translated_text
27
 
28
+ def summarize(translated_text, summarization_model, summarization_tokenizer, max_length=250):
29
+ input_ids = summarization_tokenizer.encode("summarize: " + translated_text, return_tensors="pt", max_length=2048, truncation=True)
30
  summary_ids = summarization_model.generate(
31
  input_ids,
32
  max_length=max_length,
 
49
  st.text_area("Переведенный текст (на русском):", value=translated_text, height=200)
50
 
51
  with st.spinner('Суммаризируем...'):
52
+ summary_text = summarize(translated_text, summarization_model, summarization_tokenizer, max_length=250)
53
  st.text_area("Суммаризация (на русском):", value=summary_text, height=100)
54
  else:
55
  st.warning("Пожалуйста, введите текст на узбекском языке для перевода.")