|
|
|
|
|
|
|
|
|
|
|
|
|
import pathlib |
|
import sys, os |
|
from gtts import gTTS |
|
import gradio as gr |
|
import os |
|
import speech_recognition as sr |
|
from googletrans import Translator, constants |
|
from pprint import pprint |
|
from moviepy.editor import * |
|
from pytube import YouTube |
|
from youtube_transcript_api import YouTubeTranscriptApi |
|
from utils import * |
|
|
|
def download_video(url): |
|
print("Downloading...") |
|
local_file = ( |
|
YouTube(url) |
|
.streams.filter(progressive=True, file_extension="mp4") |
|
.first() |
|
.download() |
|
) |
|
print("Downloaded") |
|
return local_file |
|
|
|
def validate_url(url): |
|
import validators |
|
if not validators.url(url): |
|
print("Hi there URL seems invalid ") |
|
|
|
|
|
def cleanup(): |
|
import pathlib |
|
import glob |
|
types = ('*.mp4', '*.wav') |
|
|
|
junks = [] |
|
for files in types: |
|
junks.extend(glob.glob(files)) |
|
try: |
|
|
|
for junk in junks: |
|
print("Deleting",junk) |
|
|
|
file = pathlib.Path(junk) |
|
|
|
file.unlink() |
|
except Exception: |
|
print("I cannot delete the file because it is being used by another process") |
|
|
|
def getSize(filename): |
|
st = os.stat(filename) |
|
return st.st_size |
|
|
|
|
|
def generate_transcript(url,lang_api): |
|
id = url[url.index("=")+1:] |
|
transcript = YouTubeTranscriptApi.get_transcript(id,languages=[lang_api]) |
|
script = "" |
|
for text in transcript: |
|
t = text["text"] |
|
if t != '[Music]': |
|
script += t + " " |
|
return script |
|
|
|
|
|
def video_to_translate(url,initial_language,final_language): |
|
|
|
|
|
if initial_language == "English": |
|
lang_in='en-US' |
|
lang_api='en' |
|
elif initial_language == "Italian": |
|
lang_in='it-IT' |
|
lang_api='it' |
|
elif initial_language == "Spanish": |
|
lang_in='es-MX' |
|
lang_api='es' |
|
elif initial_language == "Russian": |
|
lang_in='ru-RU' |
|
lang_api='rus' |
|
elif initial_language == "German": |
|
lang_in='de-DE' |
|
lang_api='de' |
|
elif initial_language == "Japanese": |
|
lang_in='ja-JP' |
|
lang_api='ja' |
|
if final_language == "English": |
|
lang='en' |
|
elif final_language == "Italian": |
|
lang='it' |
|
elif final_language == "Spanish": |
|
lang='es' |
|
elif final_language == "Russian": |
|
lang='ru' |
|
elif final_language == "German": |
|
lang='de' |
|
elif final_language == "Japanese": |
|
lang='ja' |
|
|
|
|
|
home_dir = os.getcwd() |
|
print('Initial directory:',home_dir) |
|
cleanup() |
|
|
|
temp_dir=os.path.join(home_dir, "temp") |
|
print('Temporal directory:',temp_dir) |
|
|
|
pathlib.Path(temp_dir).mkdir(parents=True, exist_ok=True) |
|
|
|
os.chdir(temp_dir) |
|
print('Changing temporal directory',os.getcwd()) |
|
|
|
cleanup() |
|
file_obj=download_video(url) |
|
print(file_obj) |
|
|
|
videoclip = VideoFileClip(file_obj) |
|
try: |
|
|
|
text = generate_transcript(url,lang_api) |
|
print("Transcript Found") |
|
except Exception: |
|
print("No Transcript Found") |
|
|
|
|
|
videoclip.audio.write_audiofile("audio.wav",codec='pcm_s16le') |
|
|
|
r = sr.Recognizer() |
|
|
|
with sr.AudioFile("audio.wav") as source: |
|
|
|
audio_data = r.record(source) |
|
|
|
print("Recognize from ",lang_in) |
|
|
|
size_wav=getSize("audio.wav") |
|
if size_wav > 50000000: |
|
print("The wav is too large") |
|
audio_chunks=split_audio_wav("audio.wav") |
|
text="" |
|
for chunk in audio_chunks: |
|
print("Converting audio to text",chunk) |
|
try: |
|
text_chunk= r.recognize_google(audio_data, language = lang_in) |
|
except Exception: |
|
print("This video cannot be recognized") |
|
cleanup() |
|
|
|
os.chdir(home_dir) |
|
return "./demo/tryagain.mp4" |
|
text=text+text_chunk+" " |
|
text=str(text) |
|
print(type(text)) |
|
|
|
else: |
|
text = r.recognize_google(audio_data, language = lang_in) |
|
|
|
print("Destination language ",lang) |
|
|
|
|
|
translator = Translator() |
|
|
|
|
|
try: |
|
translation = translator.translate(text, dest=lang) |
|
except Exception: |
|
print("This text cannot be translated") |
|
cleanup() |
|
|
|
os.chdir(home_dir) |
|
return "./demo/tryagain.mp4" |
|
|
|
|
|
trans=translation.text |
|
|
|
myobj = gTTS(text=trans, lang=lang, slow=False) |
|
myobj.save("audio.wav") |
|
|
|
audioclip = AudioFileClip("audio.wav") |
|
|
|
|
|
new_audioclip = CompositeAudioClip([audioclip]) |
|
videoclip.audio = new_audioclip |
|
new_video="video_translated_"+lang+".mp4" |
|
|
|
|
|
os.chdir(home_dir) |
|
print('Final directory',os.getcwd()) |
|
|
|
videoclip.write_videofile(new_video) |
|
|
|
videoclip.close() |
|
del file_obj |
|
|
|
return new_video |
|
|
|
initial_language = gr.inputs.Dropdown(["English","Italian","Japanese","Russian","Spanish","German"]) |
|
final_language = gr.inputs.Dropdown([ "Russian","Italian","Spanish","German","English","Japanese"]) |
|
url =gr.inputs.Textbox(label = "Enter the YouTube URL below:") |
|
|
|
|
|
gr.Interface(fn = video_to_translate, |
|
inputs = [url,initial_language,final_language], |
|
outputs = 'video', |
|
verbose = True, |
|
title = 'Video Youtube Translator', |
|
description = 'A simple application that translates Youtube videos from English, Italian, Japanese, Russian, Spanish, and German to Italian, Spanish, Russian, English and Japanese. Wait one minute to process.', |
|
article = |
|
'''<div> |
|
<p style="text-align: center"> All you need to do is to paste the Youtube link and hit submit, then wait for compiling. After that click on Play/Pause for listing to the video. The video is saved in an mp4 format. |
|
For more information visit <a href="https://ruslanmv.com/">ruslanmv.com</a> |
|
</p> |
|
</div>''', |
|
|
|
examples = [ |
|
["https://www.youtube.com/watch?v=Cu3R5it4cQs&list", "English","Italian"], |
|
["https://www.youtube.com/watch?v=fkGCLIQx1MI", "English","Spanish"], |
|
["https://www.youtube.com/watch?v=fkGCLIQx1MI", "English","Russian"], |
|
["https://www.youtube.com/watch?v=_5YeX8eCLgA&ab_channel=TheTelegraph", "Russian","English"], |
|
["https://www.youtube.com/watch?v=qzzweIQoIOU", "Japanese","English"], |
|
["https://www.youtube.com/watch?v=eo17uDr2_XA", "German","Spanish"] |
|
] |
|
).launch() |