# coding=utf8
# Youtube Video Translator
# Developed by Ruslan Magana Vsevolodovna
# https://ruslanmv.com/

# importing all necessary libraries
import httpcore
#setattr(httpcore, 'SyncHTTPTransport', Any)
import pathlib
import sys, os
from gtts import gTTS
import gradio as gr
import os
import speech_recognition as sr
from googletrans import Translator, constants
from pprint import pprint
from moviepy.editor import *
from pytube import YouTube
from youtube_transcript_api import YouTubeTranscriptApi
from utils import *
import json
import re
from pytube import YouTube
from yt_dlp import YoutubeDL
from yt_dlp import YoutubeDL
import os

import yt_dlp

def download_video(url):
    """
    Downloads a video from YouTube using yt-dlp with browser authentication.
    """
    print("Starting download...")

    ydl_opts = {
        'format': 'bestvideo+bestaudio/best',  # Ensures best quality
        'merge_output_format': 'mp4',  # Ensures final output is MP4
        'outtmpl': '%(title)s.%(ext)s',  # Saves file with video title
        'quiet': False,  # Shows progress
        'cookies': 'youtube_cookies.txt',  # Use exported cookies
        'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/109.0.0.0 Safari/537.36',  # Mimic browser
    }

    try:
        with yt_dlp.YoutubeDL(ydl_opts) as ydl:
            info = ydl.extract_info(url, download=True)
            local_file = ydl.prepare_filename(info)  # Get output filename
            print(f"✅ Downloaded: {local_file}")
            return local_file
    except Exception as e:
        print(f"❌ Download failed: {str(e)}")
        return None

# Example Usage
url = "https://www.youtube.com/watch?v=uLVRZE8OAI4"
download_video(url)


def validate_youtube(url):
    """
    Validates a YouTube URL, checks if the video exists, and returns whether its length exceeds 10 minutes.
    Uses yt-dlp for more robust URL handling.

    :param url: str - YouTube video URL
    :return: bool - True if the URL is invalid or video is longer than 10 minutes, otherwise False
    """
    try:
        with YoutubeDL({'quiet': True, 'no_warnings': True}) as ydl:
            info = ydl.extract_info(url, download=False)
            video_length = info.get('duration')  # Video length in seconds

            if video_length is None: # Handle cases where duration isn't available.
                print("Could not determine video length.")
                return True # Treat as invalid for now.  Consider returning None if you want to handle differently.

            if video_length > 600:
                print("Your video is longer than 10 minutes.")
                return True
            else:
                print("Your video is 10 minutes or shorter.")
                return False

    except Exception as e:
        print(f"Error: The provided URL is invalid or not accessible. ({e})")
        return True  # Return True since the URL is invalid
        
def validate_url(url):
    import validators
    if not validators.url(url):
        print("Hi there URL seems invalid ")
        return True
    else:
        return False   
def cleanup():
    import pathlib
    import glob
    types = ('*.mp4', '*.wav') # the tuple of file types
    #Finding mp4 and wave files
    junks = []
    for files in types:
        junks.extend(glob.glob(files))
    try:    
        # Deleting those files
        for junk in junks:
            print("Deleting",junk)
            # Setting the path for the file to delete
            file = pathlib.Path(junk)
            # Calling the unlink method on the path
            file.unlink()               
    except Exception:
        print("I cannot delete the file because it is being used by another process")         

def getSize(filename):
    st = os.stat(filename)
    return st.st_size


def clean_transcript(transcript_list):
    script = ""
    for text in transcript_list:
        t = text["text"]
        if( (t != '[music]')  and  \
            (t != '[Music]')  and  \
            (t != '[музыка]') and  \
            (t != '[Музыка]') and  \
            (t != '[musik]')  and  \
            (t != '[Musik]')  and  \
            (t != '[musica]') and  \
            (t != '[Musica]') and  \
            (t != '[música]') and  \
            (t != '[Música]') and  \
            (t != '[音楽]')   and \
            (t != '[音乐]')     
          ) :
            script += t + " "
    return script
    
    
def get_transcript(url,desired_language):
    id_you= url[url.index("=")+1:]
    try: 
        # retrieve the available transcripts
        transcript_list = YouTubeTranscriptApi.list_transcripts(id_you)

    except Exception:
        print('TranscriptsDisabled:')
        is_translated = False
        return " ", " ", is_translated 

    lista=[]
    transcript_translation_languages=[]
    # iterate over all available transcripts
    for transcript in transcript_list:
        lista.extend([
        transcript.language_code,
        transcript.is_generated,
        transcript.is_translatable,
        transcript_translation_languages.append(transcript.translation_languages),
                     ])
    print(lista)
    n_size=int(len(lista)/4)
    print("There are {} avialable scripts".format(n_size))
    import numpy as np
    matrix = np.array(lista)
    shape = (n_size,4)
    matrix=matrix.reshape(shape)
    matrix=matrix.tolist()
    is_manually=False
    is_automatic=False
    for lista in matrix: 
        #print(lista)
        language_code=lista[0]
        is_generated=lista[1]
        is_translatable=lista[2]
        if not is_generated and is_translatable : 
            print("Script found manually generated")
            is_manually=True
            language_code_man=language_code
        if  is_generated and is_translatable :
            print("Script found automatic generated")
            is_automatic=True
            language_code_au=language_code
            
    if  is_manually:
        # we try filter for manually created transcripts
        print('We extract manually created transcripts')
        transcript = transcript_list.find_manually_created_transcript([language_code]) 
  
    elif is_automatic:
        print('We  extract generated transcript')
        # or automatically generated ones, but not translated
        transcript = transcript_list.find_generated_transcript([language_code])
    else:
        print('We try find the transcript')
        # we directly filter for the language you are looking for, using the transcript list
        transcript = transcript_list.find_transcript([language_code])

    is_translated = False
    if is_translatable :
        for available_trad in  transcript_translation_languages[0]:
            if available_trad['language_code']==desired_language:
                print("It was found the translation for lang:",desired_language)
                print('We translate directly the transcript')
                transcript_translated =  transcript.translate(desired_language)
                transcript_translated=transcript_translated.fetch()
                translated=clean_transcript(transcript_translated)
                is_translated = True
    script_translated = ""            
    if is_translated :
        script_translated = translated

    transcript=transcript.fetch()
    script = clean_transcript(transcript)
        
    return script, script_translated, is_translated

# Set environment variables
home_dir = os.getcwd()
temp_dir=os.path.join(home_dir, "temp")
#Create temp directory
pathlib.Path(temp_dir).mkdir(parents=True, exist_ok=True)
os.environ['home_dir'] = home_dir
os.environ['temp_dir'] = temp_dir

def video_to_translate(url,initial_language,final_language):
    print('Checking the url')
    check =validate_youtube(url)
    if check is True: return "./demo/tryagain2.mp4"

    #Internal definitions
    if initial_language == "English":
        lang_in='en-US'
        lang_api='en'
    elif initial_language == "Italian":
        lang_in='it-IT'
        lang_api='it'
    elif initial_language == "Chinese":
        lang_in='zh-CN'
        lang_api='zh'
    elif initial_language == "Spanish":
        lang_in='es-MX'
        lang_api='es'
    elif initial_language == "Russian":
        lang_in='ru-RU'
        lang_api='rus'
    elif initial_language == "German":
        lang_in='de-DE'
        lang_api='de'
    elif initial_language == "Japanese":
        lang_in='ja-JP'
        lang_api='ja'
    if final_language == "English":
        lang='en'
    elif final_language == "Italian":
        lang='it'
    elif final_language == "Spanish":
        lang='es'
    elif final_language == "Russian":
        lang='ru'
    elif final_language == "German":
        lang='de'
    elif final_language == "Vietnamese":
        lang='vi'
    elif final_language == "Japanese":
        lang='ja'        
    # Initial directory
    home_dir= os.getenv('home_dir')
    print('Initial directory:',home_dir)
    # Cleaning previous files
    cleanup()
    file_obj=download_video(url)
    print(file_obj)
# Insert Local Video File Path
    videoclip = VideoFileClip(file_obj)
    is_traduc=False
    # Trying to get transcripts

    text, trans, is_traduc = get_transcript(url,desired_language=lang)
    print("Transcript Found")

    if not is_traduc:
        print("No Transcript Found")
        # Trying to recognize audio
        # Insert Local Audio File Path
        videoclip.audio.write_audiofile("audio.wav",codec='pcm_s16le')
    # initialize the recognizer
        r = sr.Recognizer()
        # open the file
        with sr.AudioFile("audio.wav") as source:
            # listen for the data (load audio to memory)
            audio_data = r.record(source)
            # recognize (convert from speech to text)
            print("Recognize from ",lang_in)
            #There is a limit of 10 MB on all single requests sent to the API using local file
            size_wav=getSize("audio.wav")
            if  size_wav > 50000000:
                print("The wav is too large")
                audio_chunks=split_audio_wav("audio.wav")
                text=""
                for chunk in audio_chunks:
                    print("Converting audio to text",chunk)
                    try:
                        text_chunk= r.recognize_google(audio_data, language = lang_in)
                    except Exception:
                        print("This video cannot be recognized")
                        cleanup()
                        return "./demo/tryagain.mp4"
                    text=text+text_chunk+" "
                text=str(text)
                print(type(text))
                
            else:
                try:
                        text = r.recognize_google(audio_data, language = lang_in)
                except Exception:
                        print("This video cannot be recognized")
                        cleanup()
                        return "./demo/tryagain.mp4"
                
        #print(text)
        print("Destination language ",lang)

        # init the Google API translator
        translator = Translator()


        try:
            translation = translator.translate(text, dest=lang)
        except Exception:
            print("This text cannot be translated")
            cleanup()
            return "./demo/tryagain.mp4"
        
        #translation.text
        trans=translation.text

    myobj = gTTS(text=trans, lang=lang, slow=False) 
    myobj.save("audio.wav") 
    # loading audio file
    audioclip = AudioFileClip("audio.wav")
    
    # adding audio to the video clip
    new_audioclip = CompositeAudioClip([audioclip])
    videoclip.audio = new_audioclip
    new_video="video_translated_"+lang+".mp4"
  
    # Return back to main directory
    os.chdir(home_dir)
    print('Final directory',os.getcwd())

    videoclip.write_videofile(new_video)

    videoclip.close()
    del file_obj

    return new_video

initial_language = gr.Dropdown(choices=["English", "Italian", "Japanese", "Russian", "Spanish", "German"], label="Initial Language")
final_language = gr.Dropdown(choices=["Russian", "Italian", "Spanish", "German", "English", "Japanese"], label="Final Language")
url = gr.Textbox(label="Enter the YouTube URL below:")
gr.Interface(
    fn=video_to_translate,
    inputs=[url, initial_language, final_language],
    outputs="video",
    title="Video YouTube Translator",
    description="A simple application that translates YouTube small videos from English, Italian, Japanese, Russian, Spanish, and German to Italian, Spanish, Russian, English, and Japanese. Wait one minute to process.",
    article="""<div>
                <p style="text-align: center"> All you need to do is to paste the YouTube link and hit submit, then wait for compiling. After that, click on Play/Pause to listen to the video. The video is saved in an MP4 format.
                The length video limit is 10 minutes. For more information visit <a href="https://ruslanmv.com/">ruslanmv.com</a>.
                </p>
               </div>""",
    examples=[
        ["https://youtu.be/uLVRZE8OAI4?si=LA08t9hUJHLYg8K_", "English", "Spanish"],

    ],
).launch()