import os
import gradio as gr
import whisper
from transformers import M2M100ForConditionalGeneration, M2M100Tokenizer
from docx import Document
from reportlab.pdfgen import canvas
from reportlab.pdfbase.ttfonts import TTFont
from reportlab.pdfbase import pdfmetrics
from reportlab.lib.pagesizes import A4
import arabic_reshaper
from bidi.algorithm import get_display
from pptx import Presentation
import subprocess
import shlex
import yt_dlp
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.service import Service as ChromeService
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import time

# Load the Whisper model (smaller model for faster transcription)
model = whisper.load_model("tiny")

# Load M2M100 translation model for different languages
def load_translation_model(target_language):
    lang_codes = {
        "fa": "fa",  # Persian (Farsi)
        "es": "es",  # Spanish
        "fr": "fr",  # French
        "de": "de",  # German
        "it": "it",  # Italian
        "pt": "pt",  # Portuguese
    }
    target_lang_code = lang_codes.get(target_language)
    if not target_lang_code:
        raise ValueError(f"Translation model for {target_language} not supported")

    tokenizer = M2M100Tokenizer.from_pretrained("facebook/m2m100_418M")
    translation_model = M2M100ForConditionalGeneration.from_pretrained("facebook/m2m100_418M")

    tokenizer.src_lang = "en"
    tokenizer.tgt_lang = target_lang_code

    return tokenizer, translation_model

def translate_text(text, tokenizer, model):
    try:
        inputs = tokenizer(text, return_tensors="pt", padding=True, truncation=True)
        translated = model.generate(**inputs, forced_bos_token_id=tokenizer.get_lang_id(tokenizer.tgt_lang))
        return tokenizer.decode(translated[0], skip_special_tokens=True)
    except Exception as e:
        raise RuntimeError(f"Error during translation: {e}")

# Helper function to format timestamps in SRT format
def format_timestamp(seconds):
    milliseconds = int((seconds % 1) * 1000)
    seconds = int(seconds)
    hours = seconds // 3600
    minutes = (seconds % 3600) // 60
    seconds = seconds % 60
    return f"{hours:02}:{minutes:02}:{seconds:02},{milliseconds:03}"

def write_srt(transcription, output_file, tokenizer=None, translation_model=None):
    with open(output_file, "w") as f:
        for i, segment in enumerate(transcription['segments']):
            start = segment['start']
            end = segment['end']
            text = segment['text']
            
            if translation_model:
                text = translate_text(text, tokenizer, translation_model)
            
            start_time = format_timestamp(start)
            end_time = format_timestamp(end)
            
            f.write(f"{i + 1}\n")
            f.write(f"{start_time} --> {end_time}\n")
            f.write(f"{text.strip()}\n\n")

def embed_hardsub_in_video(video_file, srt_file, output_video):
    command = f'ffmpeg -i "{video_file}" -vf "subtitles=\'{srt_file}\'" -c:v libx264 -crf 23 -preset medium "{output_video}"'
    try:
        process = subprocess.run(shlex.split(command), capture_output=True, text=True, timeout=300)
        if process.returncode != 0:
            raise RuntimeError(f"ffmpeg error: {process.stderr}")
    except subprocess.TimeoutExpired:
        raise RuntimeError("ffmpeg process timed out.")
    except Exception as e:
        raise RuntimeError(f"Error running ffmpeg: {e}")

def write_word(transcription, output_file, tokenizer=None, translation_model=None, target_language=None):
    doc = Document()
    rtl = target_language == "fa"
    for i, segment in enumerate(transcription['segments']):
        text = segment['text']
        if translation_model:
            text = translate_text(text, tokenizer, translation_model)
        para = doc.add_paragraph(f"{i + 1}. {text.strip()}")
        if rtl:
            para.paragraph_format.right_to_left = True
    doc.save(output_file)

def reverse_text_for_rtl(text):
    return ' '.join([word[::-1] for word in text.split()])

def write_pdf(transcription, output_file, tokenizer=None, translation_model=None):
    c = canvas.Canvas(output_file, pagesize=A4)
    app_dir = os.path.dirname(os.path.abspath(__file__))
    nazanin_font_path = os.path.join(app_dir, 'B-NAZANIN.TTF')
    arial_font_path = os.path.join(app_dir, 'Arial.ttf')

    if os.path.exists(nazanin_font_path):
        try:
            pdfmetrics.registerFont(TTFont('B-Nazanin', nazanin_font_path))
        except Exception as e:
            raise RuntimeError(f"Error registering B-Nazanin font: {e}.")
    else:
        raise FileNotFoundError(f"B-Nazanin font file not found at {nazanin_font_path}. Please ensure it is available.")

    if os.path.exists(arial_font_path):
        try:
            pdfmetrics.registerFont(TTFont('Arial', arial_font_path))
        except Exception as e:
            raise RuntimeError(f"Error registering Arial font: {e}.")
    else:
        raise FileNotFoundError(f"Arial font file not found at {arial_font_path}. Please ensure it is available.")

    y_position = A4[1] - 50  
    line_height = 20

    for i, segment in enumerate(transcription['segments']):
        text = segment['text']

        if translation_model:
            text = translate_text(text, tokenizer, translation_model)

        line = f"{i + 1}. {text.strip()}"

        target_language = None
        if translation_model:
            target_language = tokenizer.tgt_lang

        if target_language in ['fa', 'ar']:
            reshaped_text = arabic_reshaper.reshape(line)
            bidi_text = get_display(reshaped_text)
            c.setFont('B-Nazanin', 12)  
            c.drawRightString(A4[0] - 50, y_position, bidi_text)
        else:
            c.setFont('Arial', 12)
            c.drawString(50, y_position, line)

        if y_position < 50:
            c.showPage()
            y_position = A4[1] - 50

        y_position -= line_height

    c.save()
    return output_file

def write_ppt(transcription, output_file, tokenizer=None, translation_model=None):
    ppt = Presentation()
    slide = ppt.slides.add_slide(ppt.slide_layouts[5])
    text_buffer = ""  
    max_chars_per_slide = 400  

    for i, segment in enumerate(transcription['segments']):
        text = segment['text']
        
        if translation_model:
            text = translate_text(text, tokenizer, translation_model)

        line = f"{i + 1}. {text.strip()}\n"

        if len(text_buffer) + len(line) > max_chars_per_slide:
            slide.shapes.title.text = "Transcription"
            textbox = slide.shapes.add_textbox(left=0, top=0, width=ppt.slide_width, height=ppt.slide_height)
            textbox.text = text_buffer.strip()
            
            slide = ppt.slides.add_slide(ppt.slide_layouts[5])
            text_buffer = line  
        else:
            text_buffer += line

    if text_buffer:
        slide.shapes.title.text = ""
        textbox = slide.shapes.add_textbox(left=0, top=0, width=ppt.slide_width, height=ppt.slide_height)
        textbox.text = text_buffer.strip()

    ppt.save(output_file)

# Download YouTube Video using yt_dlp or Selenium
def download_from_ssyoutube(modified_url):
    driver = webdriver.Chrome(service=ChromeService(ChromeDriverManager().install()))
    driver.get(modified_url)
    
    try:
        WebDriverWait(driver, 20).until(
            EC.element_to_be_clickable((By.PARTIAL_LINK_TEXT, "Low quality"))
        ).click()

        WebDriverWait(driver, 20).until(
            EC.element_to_be_clickable((By.PARTIAL_LINK_TEXT, "Download"))
        ).click()

        time.sleep(10)
        driver.quit()
        return "Video downloaded successfully!"
    
    except Exception as e:
        driver.quit()
        raise RuntimeError(f"Failed to download video: {e}")

def modify_youtube_url(url):
    youtube_pos = url.find("youtube")
    if youtube_pos == -1:
        raise ValueError("Invalid YouTube URL.")
    
    modified_url = "https://ss" + url[youtube_pos:]
    return modified_url

def download_youtube_video(url):
    try:
        modified_url = modify_youtube_url(url)
        return download_from_ssyoutube(modified_url)
    except Exception as e:
        raise RuntimeError(f"Error downloading YouTube video: {e}")

def transcribe_video(video_file, video_url, language, target_language, output_format):
    if video_url:
        video_file_path = download_youtube_video(video_url)
    else:
        video_file_path = video_file

    transcription = model.transcribe(video_file_path)

    if target_language != "en":
        tokenizer, translation_model = load_translation_model(target_language)
    else:
        tokenizer, translation_model = None, None

    output_file = None

    if output_format == "SRT":
        output_file = "output.srt"
        write_srt(transcription, output_file, tokenizer, translation_model)
    elif output_format == "Word":
        output_file = "output.docx"
        write_word(transcription, output_file, tokenizer, translation_model, target_language)
    elif output_format == "PDF":
        output_file = "output.pdf"
        write_pdf(transcription, output_file, tokenizer, translation_model)
    elif output_format == "PPT":
        output_file = "output.pptx"
        write_ppt(transcription, output_file, tokenizer, translation_model)

    return output_file


def main():
    with gr.Blocks() as app:
        gr.Markdown("# Transcribe, Translate and Format YouTube Video Content")

        video_url_input = gr.Textbox(label="YouTube Video URL (or leave blank for video file upload)")
        video_file_input = gr.File(label="Upload Video File (leave blank for YouTube URL)")
        language_input = gr.Dropdown(choices=["en"], label="Video Language", value="en")
        target_language_input = gr.Dropdown(choices=["en", "fa", "es", "fr", "de", "it", "pt"], label="Target Language", value="en")
        output_format_input = gr.Dropdown(choices=["SRT", "Word", "PDF", "PPT"], label="Output Format", value="SRT")

        output_file = gr.File(label="Download Transcription", interactive=False)

        transcribe_button = gr.Button("Transcribe & Translate")

        def transcribe_and_translate(video_file, video_url, language, target_language, output_format):
            output = transcribe_video(video_file.name if video_file else None, video_url, language, target_language, output_format)
            return output

        transcribe_button.click(
            transcribe_and_translate,
            inputs=[video_file_input, video_url_input, language_input, target_language_input, output_format_input],
            outputs=output_file
        )

    app.launch()

if __name__ == "__main__":
    main()