Spaces:

ruslanmv
/

Youtube-Video-Translator

Build error

App Files Files Community

Ruslan Magana Vsevolodovna commited on Sep 3, 2022

Commit

a216bdd

•

1 Parent(s): 3162e54

Add application file

Browse files

Files changed (5) hide show

README.md +2 -1
app.py +229 -0
demo/tryagain.mp4 +0 -0
requirements.txt +8 -0
utils.py +37 -0

README.md CHANGED Viewed

@@ -1,8 +1,9 @@
 ---
 title: Youtube Video Translator
-emoji: 🐠
 colorFrom: yellow
 colorTo: purple
 sdk: gradio
 sdk_version: 3.2
 app_file: app.py

 ---
 title: Youtube Video Translator
+emoji: 🐨
 colorFrom: yellow
 colorTo: purple
+python_version: 3.8.9
 sdk: gradio
 sdk_version: 3.2
 app_file: app.py

app.py ADDED Viewed

	@@ -0,0 +1,229 @@

+# coding=utf8
+# Youtube Video Translator
+# Developed by Ruslan Magana Vsevolodovna
+# https://ruslanmv.com/
+# importing all necessary libraries
+import pathlib
+import sys, os
+from gtts import gTTS
+import gradio as gr
+import os
+import speech_recognition as sr
+from googletrans import Translator, constants
+from pprint import pprint
+from moviepy.editor import *
+from pytube import YouTube
+from youtube_transcript_api import YouTubeTranscriptApi
+from utils import *
+def download_video(url):
+    print("Downloading...")
+    local_file = (
+        YouTube(url)
+        .streams.filter(progressive=True, file_extension="mp4")
+        .first()
+        .download()
+    )
+    print("Downloaded")
+    return local_file
+def validate_url(url):
+    import validators
+    if not validators.url(url):
+        print("Hi there URL seems invalid ")
+def cleanup():
+    import pathlib
+    import glob
+    types = ('*.mp4', '*.wav') # the tuple of file types
+    #Finding mp4 and wave files
+    junks = []
+    for files in types:
+        junks.extend(glob.glob(files))
+    try:
+        # Deleting those files
+        for junk in junks:
+            print("Deleting",junk)
+            # Setting the path for the file to delete
+            file = pathlib.Path(junk)
+            # Calling the unlink method on the path
+            file.unlink()
+    except Exception:
+        print("I cannot delete the file because it is being used by another process")
+def getSize(filename):
+    st = os.stat(filename)
+    return st.st_size
+def generate_transcript(url,lang_api):
+    id = url[url.index("=")+1:]
+    transcript = YouTubeTranscriptApi.get_transcript(id,languages=[lang_api])
+    script = ""
+    for text in transcript:
+        t = text["text"]
+        if t != '[Music]':
+            script += t + " "
+    return script
+def video_to_translate(url,initial_language,final_language):
+    #Internal definitions
+    if initial_language == "English":
+        lang_in='en-US'
+        lang_api='en'
+    elif initial_language == "Italian":
+        lang_in='it-IT'
+        lang_api='it'
+    elif initial_language == "Spanish":
+        lang_in='es-MX'
+        lang_api='es'
+    elif initial_language == "Russian":
+        lang_in='ru-RU'
+        lang_api='rus'
+    elif initial_language == "German":
+        lang_in='de-DE'
+        lang_api='de'
+    elif initial_language == "Japanese":
+        lang_in='ja-JP'
+        lang_api='ja'
+    if final_language == "English":
+        lang='en'
+    elif final_language == "Italian":
+        lang='it'
+    elif final_language == "Spanish":
+        lang='es'
+    elif final_language == "Russian":
+        lang='ru'
+    elif final_language == "German":
+        lang='de'
+    elif final_language == "Japanese":
+        lang='ja'
+    # Initial directory
+    home_dir = os.getcwd()
+    print('Initial directory:',home_dir)
+    cleanup()
+    # Temporal directory
+    temp_dir=os.path.join(home_dir, "temp")
+    print('Temporal directory:',temp_dir)
+    #Create temp directory
+    pathlib.Path(temp_dir).mkdir(parents=True, exist_ok=True)
+    # Go to temp directory
+    os.chdir(temp_dir)
+    print('Changing temporal directory',os.getcwd())
+    # Cleaning previous files
+    cleanup()
+    file_obj=download_video(url)
+    print(file_obj)
+# Insert Local Video File Path
+    videoclip = VideoFileClip(file_obj)
+    try:
+        # Trying to get transcripts
+        text = generate_transcript(url,lang_api)
+        print("Transcript Found")
+    except Exception:
+        print("No Transcript Found")
+        # Trying to recognize audio
+        # Insert Local Audio File Path
+        videoclip.audio.write_audiofile("audio.wav",codec='pcm_s16le')
+    # initialize the recognizer
+        r = sr.Recognizer()
+        # open the file
+        with sr.AudioFile("audio.wav") as source:
+            # listen for the data (load audio to memory)
+            audio_data = r.record(source)
+            # recognize (convert from speech to text)
+            print("Recognize from ",lang_in)
+            #There is a limit of 10 MB on all single requests sent to the API using local file
+            size_wav=getSize("audio.wav")
+            if  size_wav > 50000000:
+                print("The wav is too large")
+                audio_chunks=split_audio_wav("audio.wav")
+                text=""
+                for chunk in audio_chunks:
+                    print("Converting audio to text",chunk)
+                    try:
+                        text_chunk= r.recognize_google(audio_data, language = lang_in)
+                    except Exception:
+                        print("This video cannot be recognized")
+                        cleanup()
+                        # Return back to main directory
+                        os.chdir(home_dir)
+                        return "./demo/tryagain.mp4"
+                    text=text+text_chunk+" "
+                text=str(text)
+                print(type(text))
+            else:
+                text = r.recognize_google(audio_data, language = lang_in)
+        #print(text)
+    print("Destination language ",lang)
+    # init the Google API translator
+    translator = Translator()
+    try:
+        translation = translator.translate(text, dest=lang)
+    except Exception:
+        print("This text cannot be translated")
+        cleanup()
+        # Return back to main directory
+        os.chdir(home_dir)
+        return "./demo/tryagain.mp4"
+    #translation.text
+    trans=translation.text
+    myobj = gTTS(text=trans, lang=lang, slow=False)
+    myobj.save("audio.wav")
+    # loading audio file
+    audioclip = AudioFileClip("audio.wav")
+    # adding audio to the video clip
+    new_audioclip = CompositeAudioClip([audioclip])
+    videoclip.audio = new_audioclip
+    new_video="video_translated_"+lang+".mp4"
+    # Return back to main directory
+    os.chdir(home_dir)
+    print('Final directory',os.getcwd())
+    videoclip.write_videofile(new_video)
+    videoclip.close()
+    del file_obj
+    return new_video
+initial_language = gr.inputs.Dropdown(["English","Italian","Japanese","Russian","Spanish","German"])
+final_language = gr.inputs.Dropdown([ "Russian","Italian","Spanish","German","English","Japanese"])
+url =gr.inputs.Textbox(label = "Enter the YouTube URL below:")
+gr.Interface(fn = video_to_translate,
+            inputs = [url,initial_language,final_language],
+            outputs = 'video',
+            verbose = True,
+            title = 'Video Youtube Translator',
+            description = 'A simple application that translates Youtube videos from English, Italian, Japanese, Russian, Spanish, and German  to  Italian, Spanish, Russian, English and Japanese.  Wait one minute to process.',
+            article =
+                        '''<div>
+                            <p style="text-align: center"> All you need to do is to paste the Youtube link  and hit submit, then wait for compiling. After that click on Play/Pause for listing to the video. The video is saved in an mp4 format.
+                            For more information visit <a href="https://ruslanmv.com/">ruslanmv.com</a>
+                            </p>
+                        </div>''',
+           examples = [
+                        ["https://www.youtube.com/watch?v=Cu3R5it4cQs&list", "English","Italian"],
+                        ["https://www.youtube.com/watch?v=fkGCLIQx1MI", "English","Spanish"],
+                        ["https://www.youtube.com/watch?v=fkGCLIQx1MI", "English","Russian"],
+                        ["https://www.youtube.com/watch?v=_5YeX8eCLgA&ab_channel=TheTelegraph", "Russian","English"],
+                        ["https://www.youtube.com/watch?v=qzzweIQoIOU", "Japanese","English"],
+                        ["https://www.youtube.com/watch?v=eo17uDr2_XA", "German","Spanish"]
+                        ]
+            ).launch()

demo/tryagain.mp4 ADDED Viewed

Binary file (307 kB). View file

requirements.txt ADDED Viewed

	@@ -0,0 +1,8 @@

+pip==22.2.2
+gradio==3.0.24
+googletrans==4.0.0rc1
+moviepy
+SpeechRecognition
+gTTS
+youtube_transcript_api
+pytube

utils.py ADDED Viewed

	@@ -0,0 +1,37 @@

+from pydub import AudioSegment
+#from pydub.utils import mediainfo
+from pydub.utils import make_chunks
+import math
+#flac_audio = AudioSegment.from_file("sample.flac", "flac")
+#flac_audio.export("audio.wav", format="wav")
+def split_audio_wav(filename):
+    myaudio = AudioSegment.from_file(filename , "wav")
+    channel_count = myaudio.channels    #Get channels
+    sample_width = myaudio.sample_width #Get sample width
+    duration_in_sec = len(myaudio) / 1000#Length of audio in sec
+    sample_rate = myaudio.frame_rate
+    print("sample_width=", sample_width)
+    print("channel_count=", channel_count)
+    print("duration_in_sec=", duration_in_sec)
+    print("frame_rate=", sample_rate)
+    bit_rate =16  #assumption , you can extract from mediainfo("test.wav") dynamically
+    wav_file_size = (sample_rate * bit_rate * channel_count * duration_in_sec) / 8
+    print("wav_file_size = ",wav_file_size)
+    file_split_size = 40000000  # 40mb OR 40, 000, 000 bytes
+    total_chunks =  wav_file_size // file_split_size
+    #Get chunk size by following method #There are more than one ofcourse
+    #for  duration_in_sec (X) -->  wav_file_size (Y)
+    #So   whats duration in sec  (K) --> for file size of 40Mb
+    #  K = X * 40Mb / Y
+    chunk_length_in_sec = math.ceil((duration_in_sec * 40000000 ) /wav_file_size)   #in sec
+    chunk_length_ms = chunk_length_in_sec * 1000
+    chunks = make_chunks(myaudio, chunk_length_ms)
+    number_chunks=len(chunks)
+    chunks_list=[]
+    #Export all of the individual chunks as wav files
+    for i, chunk in enumerate(chunks):
+        chunk_name = "chunk{0}.wav".format(i)
+        print("exporting", chunk_name)
+        chunk.export(chunk_name, format="wav")
+        chunks_list.append(chunk_name)
+    return chunks_list