Spaces:
Sleeping
Sleeping
Good trascription and translation, but text2speech doesn't works good
Browse files- lang_list.py +172 -357
- requirements.txt +4 -1
- translatube.py +108 -60
lang_list.py
CHANGED
@@ -1,360 +1,175 @@
|
|
1 |
-
#
|
2 |
-
|
3 |
-
"
|
4 |
-
"
|
5 |
-
"
|
6 |
-
"
|
7 |
-
"
|
8 |
-
"
|
9 |
-
"
|
10 |
-
"
|
11 |
-
"
|
12 |
-
"
|
13 |
-
"
|
14 |
-
"
|
15 |
-
"
|
16 |
-
"
|
17 |
-
"
|
18 |
-
"
|
19 |
-
"
|
20 |
-
"
|
21 |
-
"
|
22 |
-
"
|
23 |
-
"
|
24 |
-
"
|
25 |
-
"
|
26 |
-
"
|
27 |
-
"
|
28 |
-
"
|
29 |
-
"
|
30 |
-
"
|
31 |
-
"
|
32 |
-
"
|
33 |
-
"
|
34 |
-
"
|
35 |
-
"
|
36 |
-
"
|
37 |
-
"
|
38 |
-
"
|
39 |
-
"
|
40 |
-
"
|
41 |
-
"
|
42 |
-
"
|
43 |
-
"
|
44 |
-
"
|
45 |
-
"
|
46 |
-
"
|
47 |
-
"
|
48 |
-
"
|
49 |
-
"
|
50 |
-
"
|
51 |
-
"
|
52 |
-
"
|
53 |
-
"
|
54 |
-
"
|
55 |
-
"ltz": "Luxembourgish",
|
56 |
-
"lug": "Ganda",
|
57 |
-
"luo": "Luo",
|
58 |
-
"lvs": "Standard Latvian",
|
59 |
-
"mai": "Maithili",
|
60 |
-
"mal": "Malayalam",
|
61 |
-
"mar": "Marathi",
|
62 |
-
"mkd": "Macedonian",
|
63 |
-
"mlt": "Maltese",
|
64 |
-
"mni": "Meitei",
|
65 |
-
"mya": "Burmese",
|
66 |
-
"nld": "Dutch",
|
67 |
-
"nno": "Norwegian Nynorsk",
|
68 |
-
"nob": "Norwegian Bokm\u00e5l",
|
69 |
-
"npi": "Nepali",
|
70 |
-
"nya": "Nyanja",
|
71 |
-
"oci": "Occitan",
|
72 |
-
"ory": "Odia",
|
73 |
-
"pan": "Punjabi",
|
74 |
-
"pbt": "Southern Pashto",
|
75 |
-
"pes": "Western Persian",
|
76 |
-
"pol": "Polish",
|
77 |
-
"por": "Portuguese",
|
78 |
-
"ron": "Romanian",
|
79 |
-
"rus": "Russian",
|
80 |
-
"slk": "Slovak",
|
81 |
-
"slv": "Slovenian",
|
82 |
-
"sna": "Shona",
|
83 |
-
"snd": "Sindhi",
|
84 |
-
"som": "Somali",
|
85 |
-
"spa": "Spanish",
|
86 |
-
"srp": "Serbian",
|
87 |
-
"swe": "Swedish",
|
88 |
-
"swh": "Swahili",
|
89 |
-
"tam": "Tamil",
|
90 |
-
"tel": "Telugu",
|
91 |
-
"tgk": "Tajik",
|
92 |
-
"tgl": "Tagalog",
|
93 |
-
"tha": "Thai",
|
94 |
-
"tur": "Turkish",
|
95 |
-
"ukr": "Ukrainian",
|
96 |
-
"urd": "Urdu",
|
97 |
-
"uzn": "Northern Uzbek",
|
98 |
-
"vie": "Vietnamese",
|
99 |
-
"xho": "Xhosa",
|
100 |
-
"yor": "Yoruba",
|
101 |
-
"yue": "Cantonese",
|
102 |
-
"zlm": "Colloquial Malay",
|
103 |
-
"zsm": "Standard Malay",
|
104 |
-
"zul": "Zulu",
|
105 |
}
|
106 |
-
original_language_code_to_name = {
|
107 |
-
"afr": "Afrikaans",
|
108 |
-
"amh": "አማርኛ",
|
109 |
-
"arb": "العربية",
|
110 |
-
"ary": "الدارجة المغربية",
|
111 |
-
"arz": "العامية المصرية",
|
112 |
-
"asm": "অসমীয়া",
|
113 |
-
"ast": "Asturianu",
|
114 |
-
"azj": "Azərbaycanca",
|
115 |
-
"bel": "Беларуская",
|
116 |
-
"ben": "বাংলা",
|
117 |
-
"bos": "Bosanski",
|
118 |
-
"bul": "Български",
|
119 |
-
"cat": "Català",
|
120 |
-
"ceb": "Cebuano",
|
121 |
-
"ces": "Čeština",
|
122 |
-
"ckb": "کوردی ناوەندی",
|
123 |
-
"cmn": "普通话",
|
124 |
-
"cym": "Cymraeg",
|
125 |
-
"dan": "Dansk",
|
126 |
-
"deu": "Deutsch",
|
127 |
-
"ell": "Ελληνικά",
|
128 |
-
"eng": "English",
|
129 |
-
"est": "Eesti",
|
130 |
-
"eus": "Euskara",
|
131 |
-
"fin": "Suomi",
|
132 |
-
"fra": "Français",
|
133 |
-
"gaz": "Afaan Oromoo",
|
134 |
-
"gle": "Gaeilge",
|
135 |
-
"glg": "Galego",
|
136 |
-
"guj": "ગુજરાતી",
|
137 |
-
"heb": "עברית",
|
138 |
-
"hin": "हिंदी",
|
139 |
-
"hrv": "Hrvatski",
|
140 |
-
"hun": "Magyar",
|
141 |
-
"hye": "Հայերեն",
|
142 |
-
"ibo": "Igbo",
|
143 |
-
"ind": "Bahasa Indonesia",
|
144 |
-
"isl": "Íslenska",
|
145 |
-
"ita": "Italiano",
|
146 |
-
"jav": "Basa Jawa",
|
147 |
-
"jpn": "日本語",
|
148 |
-
"kam": "Kikamba",
|
149 |
-
"kan": "ಕನ್ನಡ",
|
150 |
-
"kat": "ქართული",
|
151 |
-
"kaz": "Қазақ тілі",
|
152 |
-
"kea": "Kriolu di Kabuverdianu",
|
153 |
-
"khk": "Халх",
|
154 |
-
"khm": "ខ្មែរ",
|
155 |
-
"kir": "Кыргызча",
|
156 |
-
"kor": "한국어",
|
157 |
-
"lao": "ລາວ",
|
158 |
-
"lit": "Lietuvių",
|
159 |
-
"ltz": "Lëtzebuergesch",
|
160 |
-
"lug": "Luganda",
|
161 |
-
"luo": "Dholuo",
|
162 |
-
"lvs": "Latviešu",
|
163 |
-
"mai": "मैथिली",
|
164 |
-
"mal": "മലയാളം",
|
165 |
-
"mar": "मराठी",
|
166 |
-
"mkd": "Македонски",
|
167 |
-
"mlt": "Malti",
|
168 |
-
"mni": "মৈতৈলোন",
|
169 |
-
"mya": "မြန်မာ",
|
170 |
-
"nld": "Nederlands",
|
171 |
-
"nno": "Nynorsk",
|
172 |
-
"nob": "Bokmål",
|
173 |
-
"npi": "नेपाली",
|
174 |
-
"nya": "Chichewa",
|
175 |
-
"oci": "Occitan",
|
176 |
-
"ory": "ଓଡ଼ିଆ",
|
177 |
-
"pan": "ਪੰਜਾਬੀ",
|
178 |
-
"pbt": "پښتو",
|
179 |
-
"pes": "فارسی",
|
180 |
-
"pol": "Polski",
|
181 |
-
"por": "Português",
|
182 |
-
"ron": "Română",
|
183 |
-
"rus": "Русский",
|
184 |
-
"slk": "Slovenčina",
|
185 |
-
"slv": "Slovenščina",
|
186 |
-
"sna": "ChiShona",
|
187 |
-
"snd": "سنڌي",
|
188 |
-
"som": "Soomaali",
|
189 |
-
"spa": "Español",
|
190 |
-
"srp": "Српски",
|
191 |
-
"swe": "Svenska",
|
192 |
-
"swh": "Kiswahili",
|
193 |
-
"tam": "தமிழ்",
|
194 |
-
"tel": "తెలుగు",
|
195 |
-
"tgk": "Тоҷикӣ",
|
196 |
-
"tgl": "Tagalog",
|
197 |
-
"tha": "ไทย",
|
198 |
-
"tur": "Türkçe",
|
199 |
-
"ukr": "Українська",
|
200 |
-
"urd": "اردو",
|
201 |
-
"uzn": "O‘zbekcha",
|
202 |
-
"vie": "Tiếng Việt",
|
203 |
-
"xho": "IsiXhosa",
|
204 |
-
"yor": "Yorùbá",
|
205 |
-
"yue": "粤语",
|
206 |
-
"zlm": "Bahasa Melayu",
|
207 |
-
"zsm": "Bahasa Melayu",
|
208 |
-
"zul": "IsiZulu",
|
209 |
-
}
|
210 |
-
LANGUAGE_NAME_TO_CODE = {v: k for k, v in language_code_to_name.items()}
|
211 |
-
ORIGINAL_LANGUAGE_NAME_TO_CODE = {v: k for k, v in original_language_code_to_name.items()}
|
212 |
-
|
213 |
-
# Source langs: S2ST / S2TT / ASR don't need source lang
|
214 |
-
# T2TT / T2ST use this
|
215 |
-
text_source_language_codes = [
|
216 |
-
"afr",
|
217 |
-
"amh",
|
218 |
-
"arb",
|
219 |
-
"ary",
|
220 |
-
"arz",
|
221 |
-
"asm",
|
222 |
-
"azj",
|
223 |
-
"bel",
|
224 |
-
"ben",
|
225 |
-
"bos",
|
226 |
-
"bul",
|
227 |
-
"cat",
|
228 |
-
"ceb",
|
229 |
-
"ces",
|
230 |
-
"ckb",
|
231 |
-
"cmn",
|
232 |
-
"cym",
|
233 |
-
"dan",
|
234 |
-
"deu",
|
235 |
-
"ell",
|
236 |
-
"eng",
|
237 |
-
"est",
|
238 |
-
"eus",
|
239 |
-
"fin",
|
240 |
-
"fra",
|
241 |
-
"gaz",
|
242 |
-
"gle",
|
243 |
-
"glg",
|
244 |
-
"guj",
|
245 |
-
"heb",
|
246 |
-
"hin",
|
247 |
-
"hrv",
|
248 |
-
"hun",
|
249 |
-
"hye",
|
250 |
-
"ibo",
|
251 |
-
"ind",
|
252 |
-
"isl",
|
253 |
-
"ita",
|
254 |
-
"jav",
|
255 |
-
"jpn",
|
256 |
-
"kan",
|
257 |
-
"kat",
|
258 |
-
"kaz",
|
259 |
-
"khk",
|
260 |
-
"khm",
|
261 |
-
"kir",
|
262 |
-
"kor",
|
263 |
-
"lao",
|
264 |
-
"lit",
|
265 |
-
"lug",
|
266 |
-
"luo",
|
267 |
-
"lvs",
|
268 |
-
"mai",
|
269 |
-
"mal",
|
270 |
-
"mar",
|
271 |
-
"mkd",
|
272 |
-
"mlt",
|
273 |
-
"mni",
|
274 |
-
"mya",
|
275 |
-
"nld",
|
276 |
-
"nno",
|
277 |
-
"nob",
|
278 |
-
"npi",
|
279 |
-
"nya",
|
280 |
-
"ory",
|
281 |
-
"pan",
|
282 |
-
"pbt",
|
283 |
-
"pes",
|
284 |
-
"pol",
|
285 |
-
"por",
|
286 |
-
"ron",
|
287 |
-
"rus",
|
288 |
-
"slk",
|
289 |
-
"slv",
|
290 |
-
"sna",
|
291 |
-
"snd",
|
292 |
-
"som",
|
293 |
-
"spa",
|
294 |
-
"srp",
|
295 |
-
"swe",
|
296 |
-
"swh",
|
297 |
-
"tam",
|
298 |
-
"tel",
|
299 |
-
"tgk",
|
300 |
-
"tgl",
|
301 |
-
"tha",
|
302 |
-
"tur",
|
303 |
-
"ukr",
|
304 |
-
"urd",
|
305 |
-
"uzn",
|
306 |
-
"vie",
|
307 |
-
"yor",
|
308 |
-
"yue",
|
309 |
-
"zsm",
|
310 |
-
"zul",
|
311 |
-
]
|
312 |
-
TEXT_SOURCE_LANGUAGE_NAMES = sorted([language_code_to_name[code] for code in text_source_language_codes])
|
313 |
|
314 |
-
#
|
315 |
-
|
316 |
-
|
317 |
-
"
|
318 |
-
"
|
319 |
-
"
|
320 |
-
"
|
321 |
-
"
|
322 |
-
"
|
323 |
-
"
|
324 |
-
"
|
325 |
-
"
|
326 |
-
"
|
327 |
-
"
|
328 |
-
"
|
329 |
-
"
|
330 |
-
"
|
331 |
-
"
|
332 |
-
"
|
333 |
-
"
|
334 |
-
"
|
335 |
-
"
|
336 |
-
"
|
337 |
-
"
|
338 |
-
"
|
339 |
-
"
|
340 |
-
"
|
341 |
-
"
|
342 |
-
"
|
343 |
-
"
|
344 |
-
"
|
345 |
-
"
|
346 |
-
"
|
347 |
-
"
|
348 |
-
"
|
349 |
-
"
|
350 |
-
"
|
351 |
-
"
|
352 |
-
"
|
353 |
-
|
354 |
-
|
355 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
356 |
|
357 |
-
|
358 |
-
|
359 |
-
|
360 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Languages dict
|
2 |
+
LANGUAGE_NAME_TO_CODE = {
|
3 |
+
"العربية": "ar_AR",
|
4 |
+
"Čeština": "cs_CZ",
|
5 |
+
"Deutsch": "de_DE",
|
6 |
+
"English": "en_XX",
|
7 |
+
"Español": "es_XX",
|
8 |
+
"Eesti": "et_EE",
|
9 |
+
"Suomi": "fi_FI",
|
10 |
+
"Français": "fr_XX",
|
11 |
+
"ગુજરાતી": "gu_IN",
|
12 |
+
"हिन्दी": "hi_IN",
|
13 |
+
"Italiano": "it_IT",
|
14 |
+
"日本語": "ja_XX",
|
15 |
+
"Қазақ": "kk_KZ",
|
16 |
+
"한국어": "ko_KR",
|
17 |
+
"Lietuvių": "lt_LT",
|
18 |
+
"Latviešu": "lv_LV",
|
19 |
+
"ဗမာ": "my_MM",
|
20 |
+
"नेपाली": "ne_NP",
|
21 |
+
"Nederlands": "nl_XX",
|
22 |
+
"Română": "ro_RO",
|
23 |
+
"Русский": "ru_RU",
|
24 |
+
"සිංහල": "si_LK",
|
25 |
+
"Türkçe": "tr_TR",
|
26 |
+
"Tiếng Việt": "vi_VN",
|
27 |
+
"中文": "zh_CN",
|
28 |
+
"Afrikaans": "af_ZA",
|
29 |
+
"Azərbaycan": "az_AZ",
|
30 |
+
"বাংলা": "bn_IN",
|
31 |
+
"فارسی": "fa_IR",
|
32 |
+
"עברית": "he_IL",
|
33 |
+
"Hrvatski": "hr_HR",
|
34 |
+
"Indonesia": "id_ID",
|
35 |
+
"ქართული": "ka_GE",
|
36 |
+
"ខ្មែរ": "km_KH",
|
37 |
+
"Македонски": "mk_MK",
|
38 |
+
"മലയാളം": "ml_IN",
|
39 |
+
"Монгол": "mn_MN",
|
40 |
+
"मराठी": "mr_IN",
|
41 |
+
"Polski": "pl_PL",
|
42 |
+
"پښتو": "ps_AF",
|
43 |
+
"Português": "pt_XX",
|
44 |
+
"Svenska": "sv_SE",
|
45 |
+
"Kiswahili": "sw_KE",
|
46 |
+
"தமிழ்": "ta_IN",
|
47 |
+
"తెలుగు": "te_IN",
|
48 |
+
"ไทย": "th_TH",
|
49 |
+
"Tagalog": "tl_XX",
|
50 |
+
"Українська": "uk_UA",
|
51 |
+
"اردو": "ur_PK",
|
52 |
+
"isiXhosa": "xh_ZA",
|
53 |
+
"Galego": "gl_ES",
|
54 |
+
"Slovenščina": "sl_SI"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
55 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
56 |
|
57 |
+
# Whisper languages dict
|
58 |
+
WHISPER_LANGUAGES = {
|
59 |
+
"en": "english",
|
60 |
+
"zh": "chinese",
|
61 |
+
"de": "german",
|
62 |
+
"es": "spanish",
|
63 |
+
"ru": "russian",
|
64 |
+
"ko": "korean",
|
65 |
+
"fr": "french",
|
66 |
+
"ja": "japanese",
|
67 |
+
"pt": "portuguese",
|
68 |
+
"tr": "turkish",
|
69 |
+
"pl": "polish",
|
70 |
+
"ca": "catalan",
|
71 |
+
"nl": "dutch",
|
72 |
+
"ar": "arabic",
|
73 |
+
"sv": "swedish",
|
74 |
+
"it": "italian",
|
75 |
+
"id": "indonesian",
|
76 |
+
"hi": "hindi",
|
77 |
+
"fi": "finnish",
|
78 |
+
"vi": "vietnamese",
|
79 |
+
"he": "hebrew",
|
80 |
+
"uk": "ukrainian",
|
81 |
+
"el": "greek",
|
82 |
+
"ms": "malay",
|
83 |
+
"cs": "czech",
|
84 |
+
"ro": "romanian",
|
85 |
+
"da": "danish",
|
86 |
+
"hu": "hungarian",
|
87 |
+
"ta": "tamil",
|
88 |
+
"no": "norwegian",
|
89 |
+
"th": "thai",
|
90 |
+
"ur": "urdu",
|
91 |
+
"hr": "croatian",
|
92 |
+
"bg": "bulgarian",
|
93 |
+
"lt": "lithuanian",
|
94 |
+
"la": "latin",
|
95 |
+
"mi": "maori",
|
96 |
+
"ml": "malayalam",
|
97 |
+
"cy": "welsh",
|
98 |
+
"sk": "slovak",
|
99 |
+
"te": "telugu",
|
100 |
+
"fa": "persian",
|
101 |
+
"lv": "latvian",
|
102 |
+
"bn": "bengali",
|
103 |
+
"sr": "serbian",
|
104 |
+
"az": "azerbaijani",
|
105 |
+
"sl": "slovenian",
|
106 |
+
"kn": "kannada",
|
107 |
+
"et": "estonian",
|
108 |
+
"mk": "macedonian",
|
109 |
+
"br": "breton",
|
110 |
+
"eu": "basque",
|
111 |
+
"is": "icelandic",
|
112 |
+
"hy": "armenian",
|
113 |
+
"ne": "nepali",
|
114 |
+
"mn": "mongolian",
|
115 |
+
"bs": "bosnian",
|
116 |
+
"kk": "kazakh",
|
117 |
+
"sq": "albanian",
|
118 |
+
"sw": "swahili",
|
119 |
+
"gl": "galician",
|
120 |
+
"mr": "marathi",
|
121 |
+
"pa": "punjabi",
|
122 |
+
"si": "sinhala",
|
123 |
+
"km": "khmer",
|
124 |
+
"sn": "shona",
|
125 |
+
"yo": "yoruba",
|
126 |
+
"so": "somali",
|
127 |
+
"af": "afrikaans",
|
128 |
+
"oc": "occitan",
|
129 |
+
"ka": "georgian",
|
130 |
+
"be": "belarusian",
|
131 |
+
"tg": "tajik",
|
132 |
+
"sd": "sindhi",
|
133 |
+
"gu": "gujarati",
|
134 |
+
"am": "amharic",
|
135 |
+
"yi": "yiddish",
|
136 |
+
"lo": "lao",
|
137 |
+
"uz": "uzbek",
|
138 |
+
"fo": "faroese",
|
139 |
+
"ht": "haitian creole",
|
140 |
+
"ps": "pashto",
|
141 |
+
"tk": "turkmen",
|
142 |
+
"nn": "nynorsk",
|
143 |
+
"mt": "maltese",
|
144 |
+
"sa": "sanskrit",
|
145 |
+
"lb": "luxembourgish",
|
146 |
+
"my": "myanmar",
|
147 |
+
"bo": "tibetan",
|
148 |
+
"tl": "tagalog",
|
149 |
+
"mg": "malagasy",
|
150 |
+
"as": "assamese",
|
151 |
+
"tt": "tatar",
|
152 |
+
"haw": "hawaiian",
|
153 |
+
"ln": "lingala",
|
154 |
+
"ha": "hausa",
|
155 |
+
"ba": "bashkir",
|
156 |
+
"jw": "javanese",
|
157 |
+
"su": "sundanese",
|
158 |
+
}
|
159 |
|
160 |
+
def union_language_dict():
|
161 |
+
# Create a dictionary to store the language codes
|
162 |
+
language_dict = {}
|
163 |
+
# Iterate over the LANGUAGE_NAME_TO_CODE dictionary
|
164 |
+
for language_name, language_code in LANGUAGE_NAME_TO_CODE.items():
|
165 |
+
# Extract the language code (the first two characters before the underscore)
|
166 |
+
lang_code = language_code.split('_')[0].lower()
|
167 |
+
|
168 |
+
# Check if the language code is present in WHISPER_LANGUAGES
|
169 |
+
if lang_code in WHISPER_LANGUAGES:
|
170 |
+
# Construct the entry for the resulting dictionary
|
171 |
+
language_dict[language_name] = {
|
172 |
+
"transcriber": lang_code,
|
173 |
+
"translator": language_code
|
174 |
+
}
|
175 |
+
return language_dict
|
requirements.txt
CHANGED
@@ -10,4 +10,7 @@ twitch-dl
|
|
10 |
pytube
|
11 |
pyperclip
|
12 |
transformers
|
13 |
-
git+https://github.com/openai/whisper.git
|
|
|
|
|
|
|
|
10 |
pytube
|
11 |
pyperclip
|
12 |
transformers
|
13 |
+
git+https://github.com/openai/whisper.git
|
14 |
+
sentencepiece
|
15 |
+
protobuf
|
16 |
+
git+https://github.com/suno-ai/bark.git
|
translatube.py
CHANGED
@@ -4,16 +4,21 @@ import urllib.parse as urlparse
|
|
4 |
from pytube import YouTube
|
5 |
import re
|
6 |
import subprocess
|
7 |
-
import
|
8 |
-
from
|
9 |
-
|
10 |
import torch
|
11 |
import whisper
|
|
|
|
|
|
|
12 |
|
13 |
# get device
|
14 |
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
15 |
# device = torch.device("cpu")
|
16 |
-
|
|
|
|
|
17 |
|
18 |
YOUTUBE = "youtube"
|
19 |
TWITCH = "twitch"
|
@@ -25,22 +30,19 @@ def copy_url_from_clipboard():
|
|
25 |
def clear_video_url():
|
26 |
visible = False
|
27 |
image = gr.Image(visible=visible, scale=1)
|
28 |
-
source_languaje = gr.Dropdown(visible=visible, label="Source languaje", show_label=True, value="English", choices=
|
29 |
-
target_languaje = gr.Dropdown(visible=visible, label="Target languaje", show_label=True, value="Español", choices=
|
30 |
-
|
31 |
-
transcribe_audio_button = gr.Button(size="lg", value="transcribe audio", min_width="10px", scale=0, visible=visible)
|
32 |
original_audio = gr.Audio(label="Original audio", elem_id="original_audio", visible=visible, interactive=False)
|
33 |
original_audio_transcribed = gr.Textbox(label="Original audio transcribed", elem_id="original_audio_transcribed", interactive=False, visible=visible)
|
34 |
original_audio_translated = gr.Textbox(label="Original audio translated", elem_id="original_audio_translated", interactive=False, visible=visible)
|
35 |
translated_audio = gr.Audio(label="Translated audio", elem_id="translated_audio", visible=visible)
|
36 |
-
transcribe_audio_button = gr.Button(size="lg", value="transcribe audio", min_width="10px", scale=0, visible=visible)
|
37 |
return (
|
38 |
"",
|
39 |
image,
|
40 |
source_languaje,
|
41 |
target_languaje,
|
42 |
-
|
43 |
-
transcribe_audio_button,
|
44 |
original_audio,
|
45 |
original_audio_transcribed,
|
46 |
translated_audio,
|
@@ -66,14 +68,13 @@ def get_youtube_video_id(url):
|
|
66 |
return None
|
67 |
|
68 |
def is_valid_url(url):
|
69 |
-
source_languaje = gr.Dropdown(visible=True, label="Source languaje", show_label=True, value="English", choices=
|
70 |
-
target_languaje = gr.Dropdown(visible=True, label="Target languaje", show_label=True, value="Español", choices=
|
71 |
-
|
72 |
original_audio = gr.Audio(label="Original audio", elem_id="original_audio", visible=True, interactive=False)
|
73 |
original_audio_transcribed = gr.Textbox(label="Original audio transcribed", elem_id="original_audio_transcribed", interactive=False, visible=True)
|
74 |
original_audio_translated = gr.Textbox(label="Original audio translated", elem_id="original_audio_translated", interactive=False, visible=True)
|
75 |
translated_audio = gr.Audio(label="Translated audio", elem_id="translated_audio", visible=True)
|
76 |
-
transcribe_audio_button = gr.Button(size="lg", value="transcribe audio", min_width="10px", scale=0, visible=True)
|
77 |
if "youtube" in url.lower() or "youtu.be" in url.lower():
|
78 |
thumbnail = get_youtube_video_id(url)
|
79 |
if thumbnail:
|
@@ -81,12 +82,11 @@ def is_valid_url(url):
|
|
81 |
gr.Image(value=thumbnail, visible=True, show_download_button=False, container=False),
|
82 |
source_languaje,
|
83 |
target_languaje,
|
84 |
-
|
85 |
gr.Textbox(value=YOUTUBE, label="Stream page", elem_id="stream_page", visible=False),
|
86 |
original_audio,
|
87 |
original_audio_transcribed,
|
88 |
translated_audio,
|
89 |
-
transcribe_audio_button,
|
90 |
original_audio_translated,
|
91 |
)
|
92 |
else:
|
@@ -94,12 +94,11 @@ def is_valid_url(url):
|
|
94 |
gr.Image(value="assets/youtube-no-thumbnails.webp", visible=True, show_download_button=False, container=False),
|
95 |
source_languaje,
|
96 |
target_languaje,
|
97 |
-
|
98 |
gr.Textbox(value=YOUTUBE, label="Stream page", elem_id="stream_page", visible=False),
|
99 |
original_audio,
|
100 |
original_audio_transcribed,
|
101 |
translated_audio,
|
102 |
-
transcribe_audio_button,
|
103 |
original_audio_translated,
|
104 |
)
|
105 |
elif "twitch" in url.lower() or "twitch.tv" in url.lower():
|
@@ -107,36 +106,33 @@ def is_valid_url(url):
|
|
107 |
gr.Image(value="assets/twitch.webp", visible=True, show_download_button=False, container=False),
|
108 |
source_languaje,
|
109 |
target_languaje,
|
110 |
-
|
111 |
gr.Textbox(value=TWITCH, label="Stream page", elem_id="stream_page", visible=False),
|
112 |
original_audio,
|
113 |
original_audio_transcribed,
|
114 |
translated_audio,
|
115 |
-
transcribe_audio_button,
|
116 |
original_audio_translated,
|
117 |
)
|
118 |
else:
|
119 |
visible = False
|
120 |
image = gr.Image(value="assets/youtube_error.webp", visible=visible, show_download_button=False, container=False)
|
121 |
-
source_languaje = gr.Dropdown(visible=visible, label="Source languaje", show_label=True, value="English", choices=
|
122 |
-
target_languaje = gr.Dropdown(visible=visible, label="Target languaje", show_label=True, value="Español", choices=
|
123 |
-
|
124 |
stream_page = gr.Textbox(value=ERROR, label="Stream page", elem_id="stream_page", visible=visible)
|
125 |
original_audio = gr.Audio(label="Original audio", elem_id="original_audio", visible=visible, interactive=False)
|
126 |
original_audio_transcribed = gr.Textbox(label="Original audio transcribed", elem_id="original_audio_transcribed", interactive=False, visible=visible)
|
127 |
original_audio_translated = gr.Textbox(label="Original audio translated", elem_id="original_audio_translated", interactive=False, visible=visible)
|
128 |
translated_audio = gr.Audio(label="Translated audio", elem_id="translated_audio", visible=visible)
|
129 |
-
transcribe_audio_button = gr.Button(size="lg", value="transcribe audio", min_width="10px", scale=0, visible=visible)
|
130 |
return (
|
131 |
image,
|
132 |
source_languaje,
|
133 |
target_languaje,
|
134 |
-
|
135 |
stream_page,
|
136 |
original_audio,
|
137 |
original_audio_transcribed,
|
138 |
translated_audio,
|
139 |
-
transcribe_audio_button,
|
140 |
original_audio_translated,
|
141 |
)
|
142 |
|
@@ -175,16 +171,20 @@ def get_audio_from_video(url, stream_page):
|
|
175 |
gr.Textbox(value=filename, label="Stream page", elem_id="stream_page", visible=False)
|
176 |
)
|
177 |
|
178 |
-
def trascribe_audio(audio_path):
|
|
|
|
|
|
|
|
|
179 |
audio = whisper.load_audio(audio_path)
|
180 |
audio = whisper.pad_or_trim(audio)
|
181 |
|
182 |
-
|
183 |
-
|
184 |
-
_, probs = model.detect_language(mel)
|
185 |
|
186 |
-
|
187 |
-
|
|
|
188 |
|
189 |
# Save the result to a file
|
190 |
filename = "result.txt"
|
@@ -192,7 +192,16 @@ def trascribe_audio(audio_path):
|
|
192 |
f.write(result.text)
|
193 |
|
194 |
# Remove audio file
|
195 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
196 |
|
197 |
return (
|
198 |
result.text,
|
@@ -200,18 +209,42 @@ def trascribe_audio(audio_path):
|
|
200 |
)
|
201 |
|
202 |
def translate(original_audio_transcribed_path, source_languaje, target_languaje):
|
203 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
204 |
with open(original_audio_transcribed_path, "r") as f:
|
205 |
-
|
206 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
207 |
|
208 |
# Save the result to a file
|
209 |
filename = "translated_text.txt"
|
210 |
with open(filename, "w") as f:
|
211 |
-
f.write(
|
212 |
|
213 |
-
# Remove
|
214 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
215 |
|
216 |
return (
|
217 |
translated,
|
@@ -219,14 +252,26 @@ def translate(original_audio_transcribed_path, source_languaje, target_languaje)
|
|
219 |
)
|
220 |
|
221 |
def tex2speech(original_audio_translated_path):
|
222 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
223 |
|
224 |
with gr.Blocks() as demo:
|
|
|
225 |
with gr.Row(variant="panel"):
|
226 |
url_textbox = gr.Textbox(placeholder="Add video URL here", label="Video URL", elem_id="video_url", scale=1, interactive=True)
|
227 |
copy_button = gr.Button(size="sm", icon="icons/copy.svg", value="", min_width="10px", scale=0)
|
228 |
delete_button = gr.Button(size="sm", icon="icons/delete.svg", value="", min_width="10px", scale=0)
|
229 |
-
copy_button.click(fn=copy_url_from_clipboard, outputs=url_textbox)
|
230 |
|
231 |
stream_page = gr.Textbox(label="Stream page", elem_id="stream_page", visible=False)
|
232 |
visible = False
|
@@ -234,11 +279,10 @@ with gr.Blocks() as demo:
|
|
234 |
image = gr.Image(visible=visible, scale=1)
|
235 |
with gr.Column():
|
236 |
with gr.Row():
|
237 |
-
source_languaje = gr.Dropdown(visible=visible, label="Source languaje", show_label=True, value="English", choices=
|
238 |
-
target_languaje = gr.Dropdown(visible=visible, label="Target languaje", show_label=True, value="Español", choices=
|
239 |
with gr.Row():
|
240 |
-
|
241 |
-
transcribe_audio_button = gr.Button(size="lg", value="transcribe audio", min_width="10px", scale=0, visible=visible)
|
242 |
|
243 |
original_audio = gr.Audio(label="Original audio", elem_id="original_audio", visible=visible, interactive=False)
|
244 |
original_audio_path = gr.Textbox(label="Stream page", elem_id="stream_page", visible=False)
|
@@ -247,40 +291,44 @@ with gr.Blocks() as demo:
|
|
247 |
original_audio_translated = gr.Textbox(label="Original audio translated", elem_id="original_audio_translated", interactive=False, visible=visible)
|
248 |
original_audio_translated_path = gr.Textbox(label="Original audio translated", elem_id="original_audio_translated", visible=False)
|
249 |
translated_audio = gr.Audio(label="Translated audio", elem_id="translated_audio", visible=visible)
|
250 |
-
|
251 |
-
|
252 |
-
|
|
|
|
|
|
|
253 |
outputs=[
|
|
|
254 |
image,
|
255 |
source_languaje,
|
256 |
target_languaje,
|
257 |
-
|
258 |
-
stream_page,
|
259 |
original_audio,
|
260 |
original_audio_transcribed,
|
261 |
translated_audio,
|
262 |
-
transcribe_audio_button,
|
263 |
original_audio_translated,
|
264 |
]
|
265 |
)
|
266 |
-
|
267 |
-
fn=
|
|
|
268 |
outputs=[
|
269 |
-
url_textbox,
|
270 |
image,
|
271 |
source_languaje,
|
272 |
target_languaje,
|
273 |
-
|
274 |
-
|
275 |
original_audio,
|
276 |
original_audio_transcribed,
|
277 |
translated_audio,
|
278 |
original_audio_translated,
|
279 |
]
|
280 |
)
|
281 |
-
|
282 |
-
original_audio.change(fn=trascribe_audio, inputs=original_audio_path, outputs=[original_audio_transcribed, original_audio_transcribed_path])
|
283 |
original_audio_transcribed.change(fn=translate, inputs=[original_audio_transcribed_path, source_languaje, target_languaje], outputs=[original_audio_translated, original_audio_translated_path])
|
284 |
-
original_audio_translated.change(fn=tex2speech, inputs=original_audio_translated_path, outputs=translated_audio)
|
|
|
|
|
285 |
|
286 |
demo.launch()
|
|
|
4 |
from pytube import YouTube
|
5 |
import re
|
6 |
import subprocess
|
7 |
+
import torch
|
8 |
+
from transformers import MBartForConditionalGeneration, MBart50TokenizerFast, pipeline
|
9 |
+
from lang_list import union_language_dict
|
10 |
import torch
|
11 |
import whisper
|
12 |
+
from bark import SAMPLE_RATE, generate_audio, preload_models
|
13 |
+
from scipy.io.wavfile import write as write_wav
|
14 |
+
import gc
|
15 |
|
16 |
# get device
|
17 |
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
18 |
# device = torch.device("cpu")
|
19 |
+
|
20 |
+
# Create a dictionary to store the language codes
|
21 |
+
language_dict = union_language_dict()
|
22 |
|
23 |
YOUTUBE = "youtube"
|
24 |
TWITCH = "twitch"
|
|
|
30 |
def clear_video_url():
|
31 |
visible = False
|
32 |
image = gr.Image(visible=visible, scale=1)
|
33 |
+
source_languaje = gr.Dropdown(visible=visible, label="Source languaje", show_label=True, value="English", choices=language_dict, scale=1, interactive=True)
|
34 |
+
target_languaje = gr.Dropdown(visible=visible, label="Target languaje", show_label=True, value="Español", choices=language_dict, scale=1, interactive=True)
|
35 |
+
translate_button = gr.Button(size="lg", value="translate", min_width="10px", scale=0, visible=visible)
|
|
|
36 |
original_audio = gr.Audio(label="Original audio", elem_id="original_audio", visible=visible, interactive=False)
|
37 |
original_audio_transcribed = gr.Textbox(label="Original audio transcribed", elem_id="original_audio_transcribed", interactive=False, visible=visible)
|
38 |
original_audio_translated = gr.Textbox(label="Original audio translated", elem_id="original_audio_translated", interactive=False, visible=visible)
|
39 |
translated_audio = gr.Audio(label="Translated audio", elem_id="translated_audio", visible=visible)
|
|
|
40 |
return (
|
41 |
"",
|
42 |
image,
|
43 |
source_languaje,
|
44 |
target_languaje,
|
45 |
+
translate_button,
|
|
|
46 |
original_audio,
|
47 |
original_audio_transcribed,
|
48 |
translated_audio,
|
|
|
68 |
return None
|
69 |
|
70 |
def is_valid_url(url):
|
71 |
+
source_languaje = gr.Dropdown(visible=True, label="Source languaje", show_label=True, value="English", choices=language_dict, scale=1, interactive=True)
|
72 |
+
target_languaje = gr.Dropdown(visible=True, label="Target languaje", show_label=True, value="Español", choices=language_dict, scale=1, interactive=True)
|
73 |
+
translate_button = gr.Button(size="lg", value="translate", min_width="10px", scale=0, visible=True)
|
74 |
original_audio = gr.Audio(label="Original audio", elem_id="original_audio", visible=True, interactive=False)
|
75 |
original_audio_transcribed = gr.Textbox(label="Original audio transcribed", elem_id="original_audio_transcribed", interactive=False, visible=True)
|
76 |
original_audio_translated = gr.Textbox(label="Original audio translated", elem_id="original_audio_translated", interactive=False, visible=True)
|
77 |
translated_audio = gr.Audio(label="Translated audio", elem_id="translated_audio", visible=True)
|
|
|
78 |
if "youtube" in url.lower() or "youtu.be" in url.lower():
|
79 |
thumbnail = get_youtube_video_id(url)
|
80 |
if thumbnail:
|
|
|
82 |
gr.Image(value=thumbnail, visible=True, show_download_button=False, container=False),
|
83 |
source_languaje,
|
84 |
target_languaje,
|
85 |
+
translate_button,
|
86 |
gr.Textbox(value=YOUTUBE, label="Stream page", elem_id="stream_page", visible=False),
|
87 |
original_audio,
|
88 |
original_audio_transcribed,
|
89 |
translated_audio,
|
|
|
90 |
original_audio_translated,
|
91 |
)
|
92 |
else:
|
|
|
94 |
gr.Image(value="assets/youtube-no-thumbnails.webp", visible=True, show_download_button=False, container=False),
|
95 |
source_languaje,
|
96 |
target_languaje,
|
97 |
+
translate_button,
|
98 |
gr.Textbox(value=YOUTUBE, label="Stream page", elem_id="stream_page", visible=False),
|
99 |
original_audio,
|
100 |
original_audio_transcribed,
|
101 |
translated_audio,
|
|
|
102 |
original_audio_translated,
|
103 |
)
|
104 |
elif "twitch" in url.lower() or "twitch.tv" in url.lower():
|
|
|
106 |
gr.Image(value="assets/twitch.webp", visible=True, show_download_button=False, container=False),
|
107 |
source_languaje,
|
108 |
target_languaje,
|
109 |
+
translate_button,
|
110 |
gr.Textbox(value=TWITCH, label="Stream page", elem_id="stream_page", visible=False),
|
111 |
original_audio,
|
112 |
original_audio_transcribed,
|
113 |
translated_audio,
|
|
|
114 |
original_audio_translated,
|
115 |
)
|
116 |
else:
|
117 |
visible = False
|
118 |
image = gr.Image(value="assets/youtube_error.webp", visible=visible, show_download_button=False, container=False)
|
119 |
+
source_languaje = gr.Dropdown(visible=visible, label="Source languaje", show_label=True, value="English", choices=language_dict, scale=1, interactive=True)
|
120 |
+
target_languaje = gr.Dropdown(visible=visible, label="Target languaje", show_label=True, value="Español", choices=language_dict, scale=1, interactive=True)
|
121 |
+
translate_button = gr.Button(size="lg", value="translate", min_width="10px", scale=0, visible=visible)
|
122 |
stream_page = gr.Textbox(value=ERROR, label="Stream page", elem_id="stream_page", visible=visible)
|
123 |
original_audio = gr.Audio(label="Original audio", elem_id="original_audio", visible=visible, interactive=False)
|
124 |
original_audio_transcribed = gr.Textbox(label="Original audio transcribed", elem_id="original_audio_transcribed", interactive=False, visible=visible)
|
125 |
original_audio_translated = gr.Textbox(label="Original audio translated", elem_id="original_audio_translated", interactive=False, visible=visible)
|
126 |
translated_audio = gr.Audio(label="Translated audio", elem_id="translated_audio", visible=visible)
|
|
|
127 |
return (
|
128 |
image,
|
129 |
source_languaje,
|
130 |
target_languaje,
|
131 |
+
translate_button,
|
132 |
stream_page,
|
133 |
original_audio,
|
134 |
original_audio_transcribed,
|
135 |
translated_audio,
|
|
|
136 |
original_audio_translated,
|
137 |
)
|
138 |
|
|
|
171 |
gr.Textbox(value=filename, label="Stream page", elem_id="stream_page", visible=False)
|
172 |
)
|
173 |
|
174 |
+
def trascribe_audio(audio_path, source_lang):
|
175 |
+
# Load the model
|
176 |
+
trascribe_model = whisper.load_model("large-v2", device=device)
|
177 |
+
|
178 |
+
# load audio and pad/trim it to fit 30 seconds
|
179 |
audio = whisper.load_audio(audio_path)
|
180 |
audio = whisper.pad_or_trim(audio)
|
181 |
|
182 |
+
# make log-Mel spectrogram and move to the same device as the model
|
183 |
+
mel = whisper.log_mel_spectrogram(audio).to(trascribe_model.device)
|
|
|
184 |
|
185 |
+
# Decode the result
|
186 |
+
options = whisper.DecodingOptions(fp16 = False, language = language_dict[source_lang]['transcriber'])
|
187 |
+
result = whisper.decode(trascribe_model, mel, options)
|
188 |
|
189 |
# Save the result to a file
|
190 |
filename = "result.txt"
|
|
|
192 |
f.write(result.text)
|
193 |
|
194 |
# Remove audio file
|
195 |
+
subprocess.run(["rm", audio_path])
|
196 |
+
|
197 |
+
# free gpu memory
|
198 |
+
del trascribe_model
|
199 |
+
del audio
|
200 |
+
del mel
|
201 |
+
del options
|
202 |
+
if device == "cuda":
|
203 |
+
torch.cuda.empty_cache()
|
204 |
+
gc.collect()
|
205 |
|
206 |
return (
|
207 |
result.text,
|
|
|
209 |
)
|
210 |
|
211 |
def translate(original_audio_transcribed_path, source_languaje, target_languaje):
|
212 |
+
# model
|
213 |
+
translate_model = MBartForConditionalGeneration.from_pretrained("facebook/mbart-large-50-many-to-many-mmt").to(device)
|
214 |
+
translate_tokenizer = MBart50TokenizerFast.from_pretrained("facebook/mbart-large-50-many-to-many-mmt")
|
215 |
+
|
216 |
+
# Get source and target languaje codes
|
217 |
+
source_languaje_code = language_dict[source_languaje]["translator"]
|
218 |
+
target_languaje_code = language_dict[target_languaje]["translator"]
|
219 |
+
|
220 |
+
# Get the transcribed text
|
221 |
with open(original_audio_transcribed_path, "r") as f:
|
222 |
+
transcribed_text = f.read()
|
223 |
+
|
224 |
+
# Translate the text
|
225 |
+
encoded = translate_tokenizer(transcribed_text, return_tensors="pt").to(device)
|
226 |
+
generated_tokens = translate_model.generate(
|
227 |
+
**encoded,
|
228 |
+
forced_bos_token_id=translate_tokenizer.lang_code_to_id[target_languaje_code]
|
229 |
+
)
|
230 |
+
translated = translate_tokenizer.batch_decode(generated_tokens, skip_special_tokens=True)[0]
|
231 |
|
232 |
# Save the result to a file
|
233 |
filename = "translated_text.txt"
|
234 |
with open(filename, "w") as f:
|
235 |
+
f.write(translated)
|
236 |
|
237 |
+
# Remove transcribed file
|
238 |
+
subprocess.run(["rm", original_audio_transcribed_path])
|
239 |
+
|
240 |
+
# free gpu memory
|
241 |
+
del translate_model
|
242 |
+
del translate_tokenizer
|
243 |
+
del encoded
|
244 |
+
del generated_tokens
|
245 |
+
if device == "cuda":
|
246 |
+
torch.cuda.empty_cache()
|
247 |
+
gc.collect()
|
248 |
|
249 |
return (
|
250 |
translated,
|
|
|
252 |
)
|
253 |
|
254 |
def tex2speech(original_audio_translated_path):
|
255 |
+
with open(original_audio_translated_path, "r") as f:
|
256 |
+
translated_text = f.read()
|
257 |
+
|
258 |
+
preload_models()
|
259 |
+
speech_array = generate_audio(translated_text, history_prompt="v2/es_speaker_1")
|
260 |
+
|
261 |
+
translated_audio_path = "translated_audio.wav"
|
262 |
+
write_wav(translated_audio_path, SAMPLE_RATE, speech_array)
|
263 |
+
|
264 |
+
return translated_audio_path
|
265 |
+
|
266 |
+
def delete_translated_audio(translated_audio_path):
|
267 |
+
subprocess.run(["rm", translated_audio_path])
|
268 |
|
269 |
with gr.Blocks() as demo:
|
270 |
+
# Layout
|
271 |
with gr.Row(variant="panel"):
|
272 |
url_textbox = gr.Textbox(placeholder="Add video URL here", label="Video URL", elem_id="video_url", scale=1, interactive=True)
|
273 |
copy_button = gr.Button(size="sm", icon="icons/copy.svg", value="", min_width="10px", scale=0)
|
274 |
delete_button = gr.Button(size="sm", icon="icons/delete.svg", value="", min_width="10px", scale=0)
|
|
|
275 |
|
276 |
stream_page = gr.Textbox(label="Stream page", elem_id="stream_page", visible=False)
|
277 |
visible = False
|
|
|
279 |
image = gr.Image(visible=visible, scale=1)
|
280 |
with gr.Column():
|
281 |
with gr.Row():
|
282 |
+
source_languaje = gr.Dropdown(visible=visible, label="Source languaje", show_label=True, value="English", choices=language_dict, scale=1, interactive=True)
|
283 |
+
target_languaje = gr.Dropdown(visible=visible, label="Target languaje", show_label=True, value="Español", choices=language_dict, scale=1, interactive=True)
|
284 |
with gr.Row():
|
285 |
+
translate_button = gr.Button(size="lg", value="translate", min_width="10px", scale=0, visible=visible)
|
|
|
286 |
|
287 |
original_audio = gr.Audio(label="Original audio", elem_id="original_audio", visible=visible, interactive=False)
|
288 |
original_audio_path = gr.Textbox(label="Stream page", elem_id="stream_page", visible=False)
|
|
|
291 |
original_audio_translated = gr.Textbox(label="Original audio translated", elem_id="original_audio_translated", interactive=False, visible=visible)
|
292 |
original_audio_translated_path = gr.Textbox(label="Original audio translated", elem_id="original_audio_translated", visible=False)
|
293 |
translated_audio = gr.Audio(label="Translated audio", elem_id="translated_audio", visible=visible)
|
294 |
+
translated_audio_translated_path = gr.Textbox(label="translated audio translated", elem_id="translated_audio_translated", visible=False)
|
295 |
+
|
296 |
+
# Events
|
297 |
+
copy_button.click(fn=copy_url_from_clipboard, outputs=url_textbox)
|
298 |
+
delete_button.click(
|
299 |
+
fn=clear_video_url,
|
300 |
outputs=[
|
301 |
+
url_textbox,
|
302 |
image,
|
303 |
source_languaje,
|
304 |
target_languaje,
|
305 |
+
translate_button,
|
|
|
306 |
original_audio,
|
307 |
original_audio_transcribed,
|
308 |
translated_audio,
|
|
|
309 |
original_audio_translated,
|
310 |
]
|
311 |
)
|
312 |
+
url_textbox.change(
|
313 |
+
fn=is_valid_url,
|
314 |
+
inputs=url_textbox,
|
315 |
outputs=[
|
|
|
316 |
image,
|
317 |
source_languaje,
|
318 |
target_languaje,
|
319 |
+
translate_button,
|
320 |
+
stream_page,
|
321 |
original_audio,
|
322 |
original_audio_transcribed,
|
323 |
translated_audio,
|
324 |
original_audio_translated,
|
325 |
]
|
326 |
)
|
327 |
+
translate_button.click(fn=get_audio_from_video, inputs=[url_textbox, stream_page], outputs=[original_audio, original_audio_path])
|
328 |
+
original_audio.change(fn=trascribe_audio, inputs=[original_audio_path, source_languaje], outputs=[original_audio_transcribed, original_audio_transcribed_path])
|
329 |
original_audio_transcribed.change(fn=translate, inputs=[original_audio_transcribed_path, source_languaje, target_languaje], outputs=[original_audio_translated, original_audio_translated_path])
|
330 |
+
# original_audio_translated.change(fn=tex2speech, inputs=original_audio_translated_path, outputs=translated_audio)
|
331 |
+
# translated_audio.change(fn=delete_translated_audio, inputs=translated_audio)
|
332 |
+
|
333 |
|
334 |
demo.launch()
|