amirhoseinsedaghati commited on
Commit
1a4bd79
·
verified ·
1 Parent(s): 4ca5e8f

Update pages/Translate_Text.py

Browse files
Files changed (1) hide show
  1. pages/Translate_Text.py +62 -19
pages/Translate_Text.py CHANGED
@@ -4,18 +4,26 @@ from configs.download_files import FileDownloader
4
  from configs.db_configs import add_one_item
5
  from streamlit.components.v1 import html
6
  from configs.html_features import set_image
 
 
7
 
8
- def translate_text_to_text(text, source_lang, target_lang):
9
- prefix = f'translate {source_lang} to {target_lang}: '
 
 
10
  text = prefix + text
11
- tokenizer = AutoTokenizer.from_pretrained('stevhliu/my_awesome_opus_books_model')
12
- input_ids = tokenizer(text, return_tensors='pt').input_ids
13
- model = AutoModelForSeq2SeqLM.from_pretrained('stevhliu/my_awesome_opus_books_model')
14
- output_ids = model.generate(input_ids, max_new_tokens=len(input_ids[0]) * 3, do_sample=False)
15
- translated_text = tokenizer.decode(output_ids[0], skip_special_tokens=True)
16
  return translated_text
17
 
18
 
 
 
 
 
19
  def main():
20
  st.title('Text Translator')
21
  im1, im2, im3 = st.columns([1, 5.3, 1])
@@ -26,26 +34,61 @@ def main():
26
  html(set_image(url), height=500, width=500)
27
  with im3:
28
  pass
29
- languages = ['English', 'French']
30
- source_lang = st.sidebar.selectbox('Source Language', languages)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
31
  target_lang = st.sidebar.selectbox('Target Language', languages, index=1)
32
  text = st.text_area('Text Translator', placeholder='Enter your input text here ...', height=200, label_visibility='hidden')
 
33
  if st.button('translate it'):
34
  if text != '':
35
- if (source_lang == 'English' and target_lang == 'English') or (source_lang == 'French' and target_lang == 'French'):
36
- st.error('Expected different values for source and target languages, but got the same values!')
37
-
38
- else:
39
- with st.expander('Original Text'):
40
- st.write(text)
41
- add_one_item(text, 'Text Translator')
42
 
43
- with st.expander('Translated Text'):
44
- translated_text = translate_text_to_text(text, source_lang, target_lang)
45
- st.write(translated_text)
 
46
 
 
 
 
 
 
 
47
  with st.expander('Download Translated Text'):
48
  FileDownloader(translated_text, 'txt').download()
 
 
 
 
 
 
 
 
 
 
 
 
49
  else:
50
  st.error('Please enter a non-empty text.')
51
 
 
4
  from configs.db_configs import add_one_item
5
  from streamlit.components.v1 import html
6
  from configs.html_features import set_image
7
+ from sacrebleu.compat import corpus_bleu
8
+ import pandas as pd
9
 
10
+
11
+
12
+ def translate_text_to_text(text, target_lang):
13
+ prefix = f'translate to {target_lang}: '
14
  text = prefix + text
15
+ tokenizer = AutoTokenizer.from_pretrained('CohereForAI/aya-101')
16
+ inputs = tokenizer(text, return_tensors='pt')
17
+ model = AutoModelForSeq2SeqLM.from_pretrained('CohereForAI/aya-101')
18
+ outputs = model.generate(inputs, max_new_tokens=len(inputs.input_ids[0]) * 3, do_sample=False)
19
+ translated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
20
  return translated_text
21
 
22
 
23
+ def validate_translation(original_text, translated_text):
24
+ return corpus_bleu(translated_text, [original_text])
25
+
26
+
27
  def main():
28
  st.title('Text Translator')
29
  im1, im2, im3 = st.columns([1, 5.3, 1])
 
34
  html(set_image(url), height=500, width=500)
35
  with im3:
36
  pass
37
+
38
+ languages = ['Afrikaans', 'Amharic', 'Arabic', 'Azerbaijani', 'Belarusian',
39
+ 'Bengali', 'Bulgarian', 'Catalan', 'Cebuano', 'Czech', 'Welsh',
40
+ 'Danish', 'German', 'Greek', 'English', 'Esperanto', 'Estonian',
41
+ 'Basque', 'Finnish', 'Filipino', 'French', 'Western Frisian',
42
+ 'Scottish Gaelic', 'Irish', 'Galician', 'Gujarati', 'Haitian',
43
+ 'Hausa', 'Hebrew', 'Hindi', 'Hungarian', 'Armenian', 'Igbo',
44
+ 'Indonesian', 'Icelandic', 'Italian', 'Javanese', 'Japanese',
45
+ 'Kannada', 'Georgian', 'Kazakh', 'Khmer', 'Kirghiz', 'Korean',
46
+ 'Kurdish', 'Lao', 'Latvian', 'Latin', 'Lithuanian', 'Luxembourgish',
47
+ 'Malayalam', 'Marathi', 'Macedonian', 'Malagasy', 'Maltese',
48
+ 'Mongolian', 'Maori', 'Malay', 'Burmese', 'Nepali', 'Dutch',
49
+ 'Norwegian', 'Pedi', 'Nyanja', 'Odia', 'Panjabi', 'Persian',
50
+ 'Polish', 'Portuguese', 'Pushto', 'Romanian', 'Russian', 'Sinhala',
51
+ 'Slovak', 'Slovenian', 'Samoan', 'Shona', 'Sindhi', 'Somali',
52
+ 'Southern Sotho', 'Spanish', 'Albanian', 'Serbian', 'Sundanese',
53
+ 'Swahili', 'Swedish', 'Tamil', 'Telugu', 'Tajik', 'Thai', 'Turkish',
54
+ 'Twi', 'Ukrainian', 'Urdu', 'Uzbek', 'Vietnamese', 'Xhosa', 'Yiddish',
55
+ 'Yoruba', 'Chinese','Zulu'
56
+ ]
57
+ # languages = ['English', 'French']
58
+ # source_lang = st.sidebar.selectbox('Source Language', languages)
59
  target_lang = st.sidebar.selectbox('Target Language', languages, index=1)
60
  text = st.text_area('Text Translator', placeholder='Enter your input text here ...', height=200, label_visibility='hidden')
61
+
62
  if st.button('translate it'):
63
  if text != '':
64
+ # if (source_lang == 'English' and target_lang == 'English') or (source_lang == 'French' and target_lang == 'French'):
65
+ # st.error('Expected different values for source and target languages, but got the same values!')
 
 
 
 
 
66
 
67
+ # else:
68
+ with st.expander('Original Text'):
69
+ st.write(text)
70
+ add_one_item(text, 'Text Translator')
71
 
72
+ with st.expander('Translated Text'):
73
+ translated_text = translate_text_to_text(text, target_lang)
74
+ st.write(translated_text)
75
+
76
+ col1, col2 = st.columns(2)
77
+ with col1:
78
  with st.expander('Download Translated Text'):
79
  FileDownloader(translated_text, 'txt').download()
80
+
81
+ with col2:
82
+ with st.expander('Translated Text Validation'):
83
+ bleu_score = validate_translation(text, translated_text)
84
+ df = pd.DataFrame({
85
+ 'Brevity Penalty' : bleu_score.bp,
86
+ 'the length of the original text' : bleu_score.ref_len,
87
+ 'the length of the translated text' : bleu_score.sys_len,
88
+ 'Ratio' : bleu_score.ratio
89
+ }, index=1)
90
+ st.dataframe(df)
91
+
92
  else:
93
  st.error('Please enter a non-empty text.')
94