Ruslan Magana Vsevolodovna commited on
Commit
a216bdd
1 Parent(s): 3162e54

Add application file

Browse files
Files changed (5) hide show
  1. README.md +2 -1
  2. app.py +229 -0
  3. demo/tryagain.mp4 +0 -0
  4. requirements.txt +8 -0
  5. utils.py +37 -0
README.md CHANGED
@@ -1,8 +1,9 @@
1
  ---
2
  title: Youtube Video Translator
3
- emoji: 🐠
4
  colorFrom: yellow
5
  colorTo: purple
 
6
  sdk: gradio
7
  sdk_version: 3.2
8
  app_file: app.py
 
1
  ---
2
  title: Youtube Video Translator
3
+ emoji: 🐨
4
  colorFrom: yellow
5
  colorTo: purple
6
+ python_version: 3.8.9
7
  sdk: gradio
8
  sdk_version: 3.2
9
  app_file: app.py
app.py ADDED
@@ -0,0 +1,229 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # coding=utf8
2
+ # Youtube Video Translator
3
+ # Developed by Ruslan Magana Vsevolodovna
4
+ # https://ruslanmv.com/
5
+
6
+ # importing all necessary libraries
7
+ import pathlib
8
+ import sys, os
9
+ from gtts import gTTS
10
+ import gradio as gr
11
+ import os
12
+ import speech_recognition as sr
13
+ from googletrans import Translator, constants
14
+ from pprint import pprint
15
+ from moviepy.editor import *
16
+ from pytube import YouTube
17
+ from youtube_transcript_api import YouTubeTranscriptApi
18
+ from utils import *
19
+
20
+ def download_video(url):
21
+ print("Downloading...")
22
+ local_file = (
23
+ YouTube(url)
24
+ .streams.filter(progressive=True, file_extension="mp4")
25
+ .first()
26
+ .download()
27
+ )
28
+ print("Downloaded")
29
+ return local_file
30
+
31
+ def validate_url(url):
32
+ import validators
33
+ if not validators.url(url):
34
+ print("Hi there URL seems invalid ")
35
+
36
+
37
+ def cleanup():
38
+ import pathlib
39
+ import glob
40
+ types = ('*.mp4', '*.wav') # the tuple of file types
41
+ #Finding mp4 and wave files
42
+ junks = []
43
+ for files in types:
44
+ junks.extend(glob.glob(files))
45
+ try:
46
+ # Deleting those files
47
+ for junk in junks:
48
+ print("Deleting",junk)
49
+ # Setting the path for the file to delete
50
+ file = pathlib.Path(junk)
51
+ # Calling the unlink method on the path
52
+ file.unlink()
53
+ except Exception:
54
+ print("I cannot delete the file because it is being used by another process")
55
+
56
+ def getSize(filename):
57
+ st = os.stat(filename)
58
+ return st.st_size
59
+
60
+
61
+ def generate_transcript(url,lang_api):
62
+ id = url[url.index("=")+1:]
63
+ transcript = YouTubeTranscriptApi.get_transcript(id,languages=[lang_api])
64
+ script = ""
65
+ for text in transcript:
66
+ t = text["text"]
67
+ if t != '[Music]':
68
+ script += t + " "
69
+ return script
70
+
71
+
72
+ def video_to_translate(url,initial_language,final_language):
73
+
74
+ #Internal definitions
75
+ if initial_language == "English":
76
+ lang_in='en-US'
77
+ lang_api='en'
78
+ elif initial_language == "Italian":
79
+ lang_in='it-IT'
80
+ lang_api='it'
81
+ elif initial_language == "Spanish":
82
+ lang_in='es-MX'
83
+ lang_api='es'
84
+ elif initial_language == "Russian":
85
+ lang_in='ru-RU'
86
+ lang_api='rus'
87
+ elif initial_language == "German":
88
+ lang_in='de-DE'
89
+ lang_api='de'
90
+ elif initial_language == "Japanese":
91
+ lang_in='ja-JP'
92
+ lang_api='ja'
93
+ if final_language == "English":
94
+ lang='en'
95
+ elif final_language == "Italian":
96
+ lang='it'
97
+ elif final_language == "Spanish":
98
+ lang='es'
99
+ elif final_language == "Russian":
100
+ lang='ru'
101
+ elif final_language == "German":
102
+ lang='de'
103
+ elif final_language == "Japanese":
104
+ lang='ja'
105
+
106
+ # Initial directory
107
+ home_dir = os.getcwd()
108
+ print('Initial directory:',home_dir)
109
+ cleanup()
110
+ # Temporal directory
111
+ temp_dir=os.path.join(home_dir, "temp")
112
+ print('Temporal directory:',temp_dir)
113
+ #Create temp directory
114
+ pathlib.Path(temp_dir).mkdir(parents=True, exist_ok=True)
115
+ # Go to temp directory
116
+ os.chdir(temp_dir)
117
+ print('Changing temporal directory',os.getcwd())
118
+ # Cleaning previous files
119
+ cleanup()
120
+ file_obj=download_video(url)
121
+ print(file_obj)
122
+ # Insert Local Video File Path
123
+ videoclip = VideoFileClip(file_obj)
124
+ try:
125
+ # Trying to get transcripts
126
+ text = generate_transcript(url,lang_api)
127
+ print("Transcript Found")
128
+ except Exception:
129
+ print("No Transcript Found")
130
+ # Trying to recognize audio
131
+ # Insert Local Audio File Path
132
+ videoclip.audio.write_audiofile("audio.wav",codec='pcm_s16le')
133
+ # initialize the recognizer
134
+ r = sr.Recognizer()
135
+ # open the file
136
+ with sr.AudioFile("audio.wav") as source:
137
+ # listen for the data (load audio to memory)
138
+ audio_data = r.record(source)
139
+ # recognize (convert from speech to text)
140
+ print("Recognize from ",lang_in)
141
+ #There is a limit of 10 MB on all single requests sent to the API using local file
142
+ size_wav=getSize("audio.wav")
143
+ if size_wav > 50000000:
144
+ print("The wav is too large")
145
+ audio_chunks=split_audio_wav("audio.wav")
146
+ text=""
147
+ for chunk in audio_chunks:
148
+ print("Converting audio to text",chunk)
149
+ try:
150
+ text_chunk= r.recognize_google(audio_data, language = lang_in)
151
+ except Exception:
152
+ print("This video cannot be recognized")
153
+ cleanup()
154
+ # Return back to main directory
155
+ os.chdir(home_dir)
156
+ return "./demo/tryagain.mp4"
157
+ text=text+text_chunk+" "
158
+ text=str(text)
159
+ print(type(text))
160
+
161
+ else:
162
+ text = r.recognize_google(audio_data, language = lang_in)
163
+ #print(text)
164
+ print("Destination language ",lang)
165
+
166
+ # init the Google API translator
167
+ translator = Translator()
168
+
169
+
170
+ try:
171
+ translation = translator.translate(text, dest=lang)
172
+ except Exception:
173
+ print("This text cannot be translated")
174
+ cleanup()
175
+ # Return back to main directory
176
+ os.chdir(home_dir)
177
+ return "./demo/tryagain.mp4"
178
+
179
+ #translation.text
180
+ trans=translation.text
181
+
182
+ myobj = gTTS(text=trans, lang=lang, slow=False)
183
+ myobj.save("audio.wav")
184
+ # loading audio file
185
+ audioclip = AudioFileClip("audio.wav")
186
+
187
+ # adding audio to the video clip
188
+ new_audioclip = CompositeAudioClip([audioclip])
189
+ videoclip.audio = new_audioclip
190
+ new_video="video_translated_"+lang+".mp4"
191
+
192
+ # Return back to main directory
193
+ os.chdir(home_dir)
194
+ print('Final directory',os.getcwd())
195
+
196
+ videoclip.write_videofile(new_video)
197
+
198
+ videoclip.close()
199
+ del file_obj
200
+
201
+ return new_video
202
+
203
+ initial_language = gr.inputs.Dropdown(["English","Italian","Japanese","Russian","Spanish","German"])
204
+ final_language = gr.inputs.Dropdown([ "Russian","Italian","Spanish","German","English","Japanese"])
205
+ url =gr.inputs.Textbox(label = "Enter the YouTube URL below:")
206
+
207
+
208
+ gr.Interface(fn = video_to_translate,
209
+ inputs = [url,initial_language,final_language],
210
+ outputs = 'video',
211
+ verbose = True,
212
+ title = 'Video Youtube Translator',
213
+ description = 'A simple application that translates Youtube videos from English, Italian, Japanese, Russian, Spanish, and German to Italian, Spanish, Russian, English and Japanese. Wait one minute to process.',
214
+ article =
215
+ '''<div>
216
+ <p style="text-align: center"> All you need to do is to paste the Youtube link and hit submit, then wait for compiling. After that click on Play/Pause for listing to the video. The video is saved in an mp4 format.
217
+ For more information visit <a href="https://ruslanmv.com/">ruslanmv.com</a>
218
+ </p>
219
+ </div>''',
220
+
221
+ examples = [
222
+ ["https://www.youtube.com/watch?v=Cu3R5it4cQs&list", "English","Italian"],
223
+ ["https://www.youtube.com/watch?v=fkGCLIQx1MI", "English","Spanish"],
224
+ ["https://www.youtube.com/watch?v=fkGCLIQx1MI", "English","Russian"],
225
+ ["https://www.youtube.com/watch?v=_5YeX8eCLgA&ab_channel=TheTelegraph", "Russian","English"],
226
+ ["https://www.youtube.com/watch?v=qzzweIQoIOU", "Japanese","English"],
227
+ ["https://www.youtube.com/watch?v=eo17uDr2_XA", "German","Spanish"]
228
+ ]
229
+ ).launch()
demo/tryagain.mp4 ADDED
Binary file (307 kB). View file
 
requirements.txt ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ pip==22.2.2
2
+ gradio==3.0.24
3
+ googletrans==4.0.0rc1
4
+ moviepy
5
+ SpeechRecognition
6
+ gTTS
7
+ youtube_transcript_api
8
+ pytube
utils.py ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from pydub import AudioSegment
2
+ #from pydub.utils import mediainfo
3
+ from pydub.utils import make_chunks
4
+ import math
5
+ #flac_audio = AudioSegment.from_file("sample.flac", "flac")
6
+ #flac_audio.export("audio.wav", format="wav")
7
+ def split_audio_wav(filename):
8
+ myaudio = AudioSegment.from_file(filename , "wav")
9
+ channel_count = myaudio.channels #Get channels
10
+ sample_width = myaudio.sample_width #Get sample width
11
+ duration_in_sec = len(myaudio) / 1000#Length of audio in sec
12
+ sample_rate = myaudio.frame_rate
13
+ print("sample_width=", sample_width)
14
+ print("channel_count=", channel_count)
15
+ print("duration_in_sec=", duration_in_sec)
16
+ print("frame_rate=", sample_rate)
17
+ bit_rate =16 #assumption , you can extract from mediainfo("test.wav") dynamically
18
+ wav_file_size = (sample_rate * bit_rate * channel_count * duration_in_sec) / 8
19
+ print("wav_file_size = ",wav_file_size)
20
+ file_split_size = 40000000 # 40mb OR 40, 000, 000 bytes
21
+ total_chunks = wav_file_size // file_split_size
22
+ #Get chunk size by following method #There are more than one ofcourse
23
+ #for duration_in_sec (X) --> wav_file_size (Y)
24
+ #So whats duration in sec (K) --> for file size of 40Mb
25
+ # K = X * 40Mb / Y
26
+ chunk_length_in_sec = math.ceil((duration_in_sec * 40000000 ) /wav_file_size) #in sec
27
+ chunk_length_ms = chunk_length_in_sec * 1000
28
+ chunks = make_chunks(myaudio, chunk_length_ms)
29
+ number_chunks=len(chunks)
30
+ chunks_list=[]
31
+ #Export all of the individual chunks as wav files
32
+ for i, chunk in enumerate(chunks):
33
+ chunk_name = "chunk{0}.wav".format(i)
34
+ print("exporting", chunk_name)
35
+ chunk.export(chunk_name, format="wav")
36
+ chunks_list.append(chunk_name)
37
+ return chunks_list