File size: 8,702 Bytes
dc149ba
 
 
 
 
 
 
 
 
 
 
 
 
 
3ad5346
 
 
dc149ba
8c5885d
 
158e38e
8c5885d
 
dc149ba
 
 
e939d89
93baa69
dc149ba
e939d89
 
 
 
b0ae3fd
 
 
b2d474f
7239594
 
 
e939d89
93baa69
9e1c3ab
 
e939d89
9e1c3ab
dc149ba
8c5885d
 
 
 
46abd0a
8c5885d
46abd0a
8c5885d
 
 
 
 
 
5e8469e
 
 
8c5885d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
98b9978
31097f0
3ad5346
5e8469e
98b9978
 
 
 
 
 
 
5e8469e
98b9978
8c5885d
 
98b9978
dc149ba
 
 
 
 
 
 
 
 
 
 
 
e939d89
592978b
dc149ba
 
2866119
dc149ba
 
 
 
 
2866119
dc149ba
 
2866119
dc149ba
de3cada
b52f918
8112e48
f010b24
 
 
 
31097f0
592978b
 
 
3ad5346
 
 
 
 
592978b
9215493
dc149ba
 
cc4bac3
a71436a
bccf7be
 
33e3967
 
 
5b11a3e
bccf7be
33e3967
 
bccf7be
33e3967
bccf7be
 
 
 
 
 
 
 
 
 
 
33e3967
bccf7be
a71436a
 
93baa69
d895aa7
 
 
 
93baa69
c603514
 
 
 
93baa69
c603514
93baa69
765a96d
 
 
93baa69
e3a8bf2
765a96d
 
9e1c3ab
765a96d
d895aa7
 
9e1c3ab
 
 
 
d895aa7
 
 
 
9e1c3ab
f64ffeb
9c13f88
f64ffeb
9c13f88
765a96d
9c13f88
ea13813
765a96d
 
 
ea13813
765a96d
 
 
9e1c3ab
765a96d
d895aa7
 
9e1c3ab
93baa69
 
de3cada
c603514
 
72801d9
 
 
93baa69
9e1c3ab
 
33e3967
 
de3cada
 
8c5885d
 
 
90e8b1d
76ef102
27f72d1
76ef102
90e8b1d
 
65fbb2d
de3cada
 
 
d895aa7
de3cada
8c5885d
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
"""
translation program for simple text
1. detect language from langdetect
2. translate to target language given by user

Example from
https://www.thepythoncode.com/article/machine-translation-using-huggingface-transformers-in-python 

user_input:
    string: string to be translated
    target_lang: language to be translated to

Returns:
    string: translated string of text

try this : https://pypi.org/project/EasyNMT/
and this : https://huggingface.co/IDEA-CCNL/Randeng-Deltalm-362M-En-Zh
"""
from __future__ import annotations
from typing import Iterable
import gradio as gr
from gradio.themes.base import Base
from gradio.themes.utils import colors, fonts, sizes
import argparse

import langid
from transformers import pipeline, AutoModelForSeq2SeqLM, AutoTokenizer, logging
from easynmt import EasyNMT

# Initialize logging
logging.set_verbosity_info()
logger = logging.get_logger("transformers")

# # Initialize nllb-200 models
# tokenizer = AutoTokenizer.from_pretrained("facebook/nllb-200-distilled-600M")
# model = AutoModelForSeq2SeqLM.from_pretrained("facebook/nllb-200-distilled-600M")

# Initialize mbart50 models
mbart_m2en_model = EasyNMT("mbart50_m2en")
mbart_en2m_model = EasyNMT("mbart50_en2m")
logger.info("mbart50 models initialized")

# Initialize m2m_100 models
m2m_model = EasyNMT("m2m_100_1.2B")
logger.info("m2m_100 models initialized")


class myTheme(Base):
    def __init__(
        self,
        *,
        primary_hue: colors.Color | str = colors.red,
        secondary_hue: colors.Color | str = colors.blue,
        neutral_hue: colors.Color | str = colors.orange,
        spacing_size: sizes.Size | str = sizes.spacing_md,
        radius_size: sizes.Size | str = sizes.radius_md,
        text_size: sizes.Size | str = sizes.text_lg,
        font: fonts.Font
        | str
        | Iterable[fonts.Font | str] = (
            fonts.GoogleFont("handjet"),
            "cursive",
            # "sans-serif",
        ),
        font_mono: fonts.Font
        | str
        | Iterable[fonts.Font | str] = (
            fonts.GoogleFont("IBM Plex Mono"),
            "ui-monospace",
            "monospace",
        ),
    ):
        super().__init__(
            primary_hue=primary_hue,
            secondary_hue=secondary_hue,
            neutral_hue=neutral_hue,
            spacing_size=spacing_size,
            radius_size=radius_size,
            text_size=text_size,
            font=font,
            font_mono=font_mono,
        )
        super().set(
            body_background_fill="repeating-linear-gradient(135deg, *primary_800, *primary_800 10px, *primary_900 10px, *primary_900 20px)",
            button_primary_background_fill="linear-gradient(90deg, *primary_600, *secondary_800)",
            button_primary_background_fill_hover="linear-gradient(45deg, *primary_200, *secondary_300)",
            button_primary_text_color="white",
            slider_color="*secondary_300",
            slider_color_dark="*secondary_600",
            block_title_text_weight="600",
            block_border_width="3px",
            block_shadow="*shadow_drop_lg",
            button_shadow="*shadow_drop_lg",
            button_large_padding="24px",
        )


def detect_lang(article):
    """
    Language Detection using library langid

    Args:
        article (string): article that user wish to translate
        target_lang (string): language user want to translate article into

    Returns:
        string: detected language short form
    """

    result_lang = langid.classify(article)
    logger.info(f"language detected as {result_lang}")
    return result_lang[0]


def opus_trans(article, target_language):
    """
    Translation by Helsinki-NLP model

    Args:
        article (string): article that user wishes to translate
        target_language (string): language that user wishes to translate article into

    Returns:
        string: translated piece of article based off target_language
    """

    result_lang = detect_lang(article)

    if target_language == "English":
        target_lang = "en"
    elif target_language == "Chinese":
        target_lang = "zh"

    if result_lang != target_lang:
        task_name = f"translation_{result_lang}_to_{target_lang}"
        model_name = f"Helsinki-NLP/opus-mt-{result_lang}-{target_lang}"
        try:
            translator = pipeline(task_name, model=model_name, tokenizer=model_name)
            translated = translator(article)[0]["translation_text"]
        except:
            translated = "Error: Model doesn't exist"
    else:
        translated = "Error: You chose the same language as the article detected language. Please reselect language and try again."
    return translated


def nllb_trans(article, target_language):
    result_lang = detect_lang(article)

    inputs = tokenizer(article, return_tensors="pt")

    if target_language == "English":
        target_lang = "eng_Latn"
        target_language = "en"
    elif target_language == "Chinese":
        target_lang = "zho_Hans"
        target_language = "zh"

    if result_lang != target_language:
        translated_tokens = model.generate(
            **inputs,
            forced_bos_token_id=tokenizer.lang_code_to_id[target_lang],
            max_length=30,
        )
        translated = tokenizer.batch_decode(
            translated_tokens, skip_special_tokens=True
        )[0]
    else:
        translated = "Error: You chose the same language as the article detected language. Please reselect language and try again."

    return translated


def mbart_trans(article, target_language):
    logger.info(f"Article to translate : {article}")
    logger.info(f"Chose which translation model: mbart model")
    logger.info(f"Language selected: {target_language}")

    result_lang = detect_lang(article)
    if target_language == "English":
        target_lang = "en"
    elif target_language == "Chinese":
        target_lang = "zh"

    if result_lang != target_lang:
        if target_language == "English":
            translated = mbart_m2en_model.translate(article, target_lang="en")
            logger.info(f"Translated Result: {translated}")
            return translated
        else:
            translated = mbart_en2m_model.translate(article, target_lang="zh")
            logger.info(f"Translated Result: {translated}")
            return translated
    else:
        logger.warning(
            "Error: You chose the same language as the article detected language. Please reselect language and try again."
        )
        return "Error: You chose the same language as the article detected language. Please reselect language and try again."


def m2m_trans(article, target_language):
    logger.info(f"Article to translate : {article}")
    logger.info(f"Chose which translation model: m2m model")
    logger.info(f"Language selected: {target_language}")

    result_lang = detect_lang(article)
    if target_language == "English":
        target_lang = "en"
    elif target_language == "Chinese":
        target_lang = "zh"

    if result_lang != target_lang:
        if target_language == "English":
            translated = m2m_model.translate(article, target_lang)
            logger.info(f"Translation Result: {translated}")
            return translated
        elif target_language == "Chinese":
            translated = m2m_model.translate(article, target_lang)
            logger.info(f"Translation Result: {translated}")
            return translated
    else:
        logger.warning(
            f"Error: You chose the same language as the article detected language. Please reselect language and try again."
        )
        return "Error: You chose the same language as the article detected language. Please reselect language and try again."


def translate(article, toolkit, target_language):
    # if toolkit == "OPUS":
    #     translated = opus_trans(article, target_language)
    # if toolkit == "NLLB":
    #     translated = nllb_trans(article, target_language)
    if toolkit == "MBART":
        translated = mbart_trans(article, target_language)
    elif toolkit == "M2M":
        translated = m2m_trans(article, target_language)

    return translated


myTheme = myTheme()

with gr.Blocks(theme=myTheme) as demo:
    article = gr.Textbox(label="Article")
    toolkit_select = gr.Radio(
        ["MBART", "M2M"], label="Select Translation Model", value="MBART"
    )
    lang_select = gr.Radio(["English", "Chinese"], label="Select Desired Language")
    result = gr.Textbox(label="Translated Result")
    trans_btn = gr.Button("Translate")
    trans_btn.click(
        fn=translate, inputs=[article, toolkit_select, lang_select], outputs=result
    )


demo.launch()