Spaces:

visionaries666
/

younes_edition

Sleeping

App Files Files Community

younes21000 commited on Oct 23, 2024

Commit

32f4242

verified ·

1 Parent(s): c1510f0

Update app.py

Browse files

Files changed (1) hide show

app.py +97 -114

app.py CHANGED Viewed

@@ -1,6 +1,6 @@
 import gradio as gr
 import whisper
-import os
 from transformers import M2M100ForConditionalGeneration, M2M100Tokenizer
 from docx import Document
 from reportlab.pdfgen import canvas
@@ -13,6 +13,13 @@ from pptx import Presentation
 import subprocess
 import shlex
 import yt_dlp
 # Load the Whisper model (smaller model for faster transcription)
 model = whisper.load_model("tiny")
@@ -26,12 +33,6 @@ def load_translation_model(target_language):
         "de": "de",  # German
         "it": "it",  # Italian
         "pt": "pt",  # Portuguese
-        "ar": "ar",  # Arabic
-        "zh": "zh",  # Chinese
-        "hi": "hi",  # Hindi
-        "ja": "ja",  # Japanese
-        "ko": "ko",  # Korean
-        "ru": "ru",  # Russian
     }
     target_lang_code = lang_codes.get(target_language)
     if not target_lang_code:
@@ -62,7 +63,6 @@ def format_timestamp(seconds):
     seconds = seconds % 60
     return f"{hours:02}:{minutes:02}:{seconds:02},{milliseconds:03}"
-# Corrected write_srt function
 def write_srt(transcription, output_file, tokenizer=None, translation_model=None):
     with open(output_file, "w") as f:
         for i, segment in enumerate(transcription['segments']):
@@ -80,7 +80,6 @@ def write_srt(transcription, output_file, tokenizer=None, translation_model=None
             f.write(f"{start_time} --> {end_time}\n")
             f.write(f"{text.strip()}\n\n")
-# Embedding subtitles into video (hardsub)
 def embed_hardsub_in_video(video_file, srt_file, output_video):
     command = f'ffmpeg -i "{video_file}" -vf "subtitles=\'{srt_file}\'" -c:v libx264 -crf 23 -preset medium "{output_video}"'
     try:
@@ -92,7 +91,6 @@ def embed_hardsub_in_video(video_file, srt_file, output_video):
     except Exception as e:
         raise RuntimeError(f"Error running ffmpeg: {e}")
-# Helper function to write Word documents
 def write_word(transcription, output_file, tokenizer=None, translation_model=None, target_language=None):
     doc = Document()
     rtl = target_language == "fa"
@@ -105,22 +103,15 @@ def write_word(transcription, output_file, tokenizer=None, translation_model=Non
             para.paragraph_format.right_to_left = True
     doc.save(output_file)
-# Helper function to reverse text for RTL
 def reverse_text_for_rtl(text):
     return ' '.join([word[::-1] for word in text.split()])
-# Helper function to write PDF documents
 def write_pdf(transcription, output_file, tokenizer=None, translation_model=None):
-    # Create PDF with A4 page size
     c = canvas.Canvas(output_file, pagesize=A4)
-    # Get the directory where app.py is located
     app_dir = os.path.dirname(os.path.abspath(__file__))
-    # Construct the full path to the font files
     nazanin_font_path = os.path.join(app_dir, 'B-NAZANIN.TTF')
     arial_font_path = os.path.join(app_dir, 'Arial.ttf')
-    # Register B-Nazanin font
     if os.path.exists(nazanin_font_path):
         try:
             pdfmetrics.registerFont(TTFont('B-Nazanin', nazanin_font_path))
@@ -129,7 +120,6 @@ def write_pdf(transcription, output_file, tokenizer=None, translation_model=None
     else:
         raise FileNotFoundError(f"B-Nazanin font file not found at {nazanin_font_path}. Please ensure it is available.")
-    # Register Arial font
     if os.path.exists(arial_font_path):
         try:
             pdfmetrics.registerFont(TTFont('Arial', arial_font_path))
@@ -138,170 +128,163 @@ def write_pdf(transcription, output_file, tokenizer=None, translation_model=None
     else:
         raise FileNotFoundError(f"Arial font file not found at {arial_font_path}. Please ensure it is available.")
-    # Initialize y position from top of page
-    y_position = A4[1] - 50  # Start 50 points from top
     line_height = 20
-    # Process each segment
     for i, segment in enumerate(transcription['segments']):
         text = segment['text']
-        # Translate if translation model is provided
         if translation_model:
             text = translate_text(text, tokenizer, translation_model)
-        # Format the line with segment number
         line = f"{i + 1}. {text.strip()}"
-        # Determine target language for font and text direction
         target_language = None
         if translation_model:
-            # Assuming target language can be inferred from the tokenizer
             target_language = tokenizer.tgt_lang
-        # Reshape and reorder the text for correct RTL display if necessary
         if target_language in ['fa', 'ar']:
             reshaped_text = arabic_reshaper.reshape(line)
             bidi_text = get_display(reshaped_text)
-            # Set font for RTL languages
             c.setFont('B-Nazanin', 12)
-            # Draw the text right-aligned
-            c.drawRightString(A4[0] - 50, y_position, bidi_text)  # 50 points margin from right
         else:
-            c.setFont('Arial', 12)  # Use Arial for other languages
-            c.drawString(50, y_position, line)  # Left aligned
-        # Add new page if needed
-        if y_position < 50:  # Leave 50 points margin at bottom
             c.showPage()
-            y_position = A4[1] - 50  # Reset y position for new page
-        # Update y position for next line
         y_position -= line_height
-    # Save the PDF
     c.save()
     return output_file
-# Helper function to write PowerPoint slides
 def write_ppt(transcription, output_file, tokenizer=None, translation_model=None):
     ppt = Presentation()
-    slide = ppt.slides.add_slide(ppt.slide_layouts[5])  # Create the first slide
-    text_buffer = ""  # Initialize an empty buffer to accumulate text
-    max_chars_per_slide = 400  # Set a character limit for each slide
     for i, segment in enumerate(transcription['segments']):
         text = segment['text']
-        # Translate if translation model is provided
         if translation_model:
             text = translate_text(text, tokenizer, translation_model)
-        # Format the line with segment number
         line = f"{i + 1}. {text.strip()}\n"
-        # Check if adding this line exceeds the character limit
         if len(text_buffer) + len(line) > max_chars_per_slide:
-            # If so, add the accumulated text to the current slide
-            slide.shapes.title.text = "Transcription"  # Set the title for the slide
             textbox = slide.shapes.add_textbox(left=0, top=0, width=ppt.slide_width, height=ppt.slide_height)
             textbox.text = text_buffer.strip()
-            # Create a new slide and reset the buffer
             slide = ppt.slides.add_slide(ppt.slide_layouts[5])
-            text_buffer = line  # Start the new slide with the current line
         else:
-            # Otherwise, keep accumulating text
             text_buffer += line
-    # Add any remaining text in the buffer to the last slide
     if text_buffer:
-        slide.shapes.title.text = ""  # Set the title for the last slide
         textbox = slide.shapes.add_textbox(left=0, top=0, width=ppt.slide_width, height=ppt.slide_height)
         textbox.text = text_buffer.strip()
     ppt.save(output_file)
-# Function to download YouTube video
 def download_youtube_video(url):
-    ydl_opts = {
-        'format': 'mp4',
-        'outtmpl': 'downloaded_video.mp4',
-        'nocheckcertificate': True,  # Disable certificate check
-    }
-    with yt_dlp.YoutubeDL(ydl_opts) as ydl:
-        ydl.download([url])
-    return 'downloaded_video.mp4'
-# Transcribing video and generating output
 def transcribe_video(video_file, video_url, language, target_language, output_format):
     if video_url:
         video_file_path = download_youtube_video(video_url)
     else:
-        video_file_path = video_file.name
-    result = model.transcribe(video_file_path, language=language)
-    video_name = os.path.splitext(video_file_path)[0]
     if target_language != "en":
-        try:
-            tokenizer, translation_model = load_translation_model(target_language)
-        except Exception as e:
-            raise RuntimeError(f"Error loading translation model: {e}")
     else:
         tokenizer, translation_model = None, None
-    srt_file = f"{video_name}.srt"
-    write_srt(result, srt_file, tokenizer, translation_model)
     if output_format == "SRT":
-        return srt_file
-    elif output_format == "Video with Hardsub":
-        output_video = f"{video_name}_with_subtitles.mp4"
-        try:
-            embed_hardsub_in_video(video_file_path, srt_file, output_video)
-            return output_video
-        except Exception as e:
-            raise RuntimeError(f"Error embedding subtitles in video: {e}")
     elif output_format == "Word":
-        word_file = f"{video_name}.docx"
-        write_word(result, word_file, tokenizer, translation_model, target_language)
-        return word_file
     elif output_format == "PDF":
-        pdf_file = f"{video_name}.pdf"
-        write_pdf(result, pdf_file, tokenizer, translation_model)
-        return pdf_file
-    elif output_format == "PowerPoint":
-        ppt_file = f"{video_name}.pptx"
-        write_ppt(result, ppt_file, tokenizer, translation_model)
-        return ppt_file
-# Gradio interface with YouTube URL
-iface = gr.Interface(
-    fn=transcribe_video,
-    inputs=[
-        gr.File(label="Upload Video File (or leave empty for YouTube link)"),  # Removed 'optional=True'
-        gr.Textbox(label="YouTube Video URL (optional)", placeholder="https://www.youtube.com/watch?v=..."),
-        gr.Dropdown(label="Select Original Video Language", choices=["en", "es", "fr", "de", "it", "pt"], value="en"),
-        gr.Dropdown(label="Select Subtitle Translation Language", choices=["en", "fa", "es", "de", "fr", "it", "pt"], value="fa"),
-        gr.Radio(label="Choose Output Format", choices=["SRT", "Video with Hardsub", "Word", "PDF", "PowerPoint"], value="Video with Hardsub")
-    ],
-    outputs=gr.File(label="Download File"),
-    title="Video Subtitle Generator with Translation & Multi-Format Output (Supports YouTube)",
-    description=(
-        "This tool allows you to generate subtitles from a video file or YouTube link using Whisper, "
-        "translate the subtitles into multiple languages using M2M100, and export them "
-        "in various formats including SRT, hardcoded subtitles in video, Word, PDF, or PowerPoint."
-    ),
-    theme="compact",
-    live=False
-)
-if __name__ == "__main__":
-    iface.launch()

+import os
 import gradio as gr
 import whisper
 from transformers import M2M100ForConditionalGeneration, M2M100Tokenizer
 from docx import Document
 from reportlab.pdfgen import canvas
 import subprocess
 import shlex
 import yt_dlp
+from selenium import webdriver
+from selenium.webdriver.common.by import By
+from selenium.webdriver.chrome.service import Service as ChromeService
+from webdriver_manager.chrome import ChromeDriverManager
+from selenium.webdriver.support.ui import WebDriverWait
+from selenium.webdriver.support import expected_conditions as EC
+import time
 # Load the Whisper model (smaller model for faster transcription)
 model = whisper.load_model("tiny")
         "de": "de",  # German
         "it": "it",  # Italian
         "pt": "pt",  # Portuguese
     }
     target_lang_code = lang_codes.get(target_language)
     if not target_lang_code:
     seconds = seconds % 60
     return f"{hours:02}:{minutes:02}:{seconds:02},{milliseconds:03}"
 def write_srt(transcription, output_file, tokenizer=None, translation_model=None):
     with open(output_file, "w") as f:
         for i, segment in enumerate(transcription['segments']):
             f.write(f"{start_time} --> {end_time}\n")
             f.write(f"{text.strip()}\n\n")
 def embed_hardsub_in_video(video_file, srt_file, output_video):
     command = f'ffmpeg -i "{video_file}" -vf "subtitles=\'{srt_file}\'" -c:v libx264 -crf 23 -preset medium "{output_video}"'
     try:
     except Exception as e:
         raise RuntimeError(f"Error running ffmpeg: {e}")
 def write_word(transcription, output_file, tokenizer=None, translation_model=None, target_language=None):
     doc = Document()
     rtl = target_language == "fa"
             para.paragraph_format.right_to_left = True
     doc.save(output_file)
 def reverse_text_for_rtl(text):
     return ' '.join([word[::-1] for word in text.split()])
 def write_pdf(transcription, output_file, tokenizer=None, translation_model=None):
     c = canvas.Canvas(output_file, pagesize=A4)
     app_dir = os.path.dirname(os.path.abspath(__file__))
     nazanin_font_path = os.path.join(app_dir, 'B-NAZANIN.TTF')
     arial_font_path = os.path.join(app_dir, 'Arial.ttf')
     if os.path.exists(nazanin_font_path):
         try:
             pdfmetrics.registerFont(TTFont('B-Nazanin', nazanin_font_path))
     else:
         raise FileNotFoundError(f"B-Nazanin font file not found at {nazanin_font_path}. Please ensure it is available.")
     if os.path.exists(arial_font_path):
         try:
             pdfmetrics.registerFont(TTFont('Arial', arial_font_path))
     else:
         raise FileNotFoundError(f"Arial font file not found at {arial_font_path}. Please ensure it is available.")
+    y_position = A4[1] - 50
     line_height = 20
     for i, segment in enumerate(transcription['segments']):
         text = segment['text']
         if translation_model:
             text = translate_text(text, tokenizer, translation_model)
         line = f"{i + 1}. {text.strip()}"
         target_language = None
         if translation_model:
             target_language = tokenizer.tgt_lang
         if target_language in ['fa', 'ar']:
             reshaped_text = arabic_reshaper.reshape(line)
             bidi_text = get_display(reshaped_text)
             c.setFont('B-Nazanin', 12)
+            c.drawRightString(A4[0] - 50, y_position, bidi_text)
         else:
+            c.setFont('Arial', 12)
+            c.drawString(50, y_position, line)
+        if y_position < 50:
             c.showPage()
+            y_position = A4[1] - 50
         y_position -= line_height
     c.save()
     return output_file
 def write_ppt(transcription, output_file, tokenizer=None, translation_model=None):
     ppt = Presentation()
+    slide = ppt.slides.add_slide(ppt.slide_layouts[5])
+    text_buffer = ""
+    max_chars_per_slide = 400
     for i, segment in enumerate(transcription['segments']):
         text = segment['text']
         if translation_model:
             text = translate_text(text, tokenizer, translation_model)
         line = f"{i + 1}. {text.strip()}\n"
         if len(text_buffer) + len(line) > max_chars_per_slide:
+            slide.shapes.title.text = "Transcription"
             textbox = slide.shapes.add_textbox(left=0, top=0, width=ppt.slide_width, height=ppt.slide_height)
             textbox.text = text_buffer.strip()
             slide = ppt.slides.add_slide(ppt.slide_layouts[5])
+            text_buffer = line
         else:
             text_buffer += line
     if text_buffer:
+        slide.shapes.title.text = ""
         textbox = slide.shapes.add_textbox(left=0, top=0, width=ppt.slide_width, height=ppt.slide_height)
         textbox.text = text_buffer.strip()
     ppt.save(output_file)
+# Download YouTube Video using yt_dlp or Selenium
+def download_from_ssyoutube(modified_url):
+    driver = webdriver.Chrome(service=ChromeService(ChromeDriverManager().install()))
+    driver.get(modified_url)
+    try:
+        WebDriverWait(driver, 20).until(
+            EC.element_to_be_clickable((By.PARTIAL_LINK_TEXT, "Low quality"))
+        ).click()
+        WebDriverWait(driver, 20).until(
+            EC.element_to_be_clickable((By.PARTIAL_LINK_TEXT, "Download"))
+        ).click()
+        time.sleep(10)
+        driver.quit()
+        return "Video downloaded successfully!"
+    except Exception as e:
+        driver.quit()
+        raise RuntimeError(f"Failed to download video: {e}")
+def modify_youtube_url(url):
+    youtube_pos = url.find("youtube")
+    if youtube_pos == -1:
+        raise ValueError("Invalid YouTube URL.")
+    modified_url = "https://ss" + url[youtube_pos:]
+    return modified_url
 def download_youtube_video(url):
+    try:
+        modified_url = modify_youtube_url(url)
+        return download_from_ssyoutube(modified_url)
+    except Exception as e:
+        raise RuntimeError(f"Error downloading YouTube video: {e}")
 def transcribe_video(video_file, video_url, language, target_language, output_format):
     if video_url:
         video_file_path = download_youtube_video(video_url)
     else:
+        video_file_path = video_file
+    transcription = model.transcribe(video_file_path)
     if target_language != "en":
+        tokenizer, translation_model = load_translation_model(target_language)
     else:
         tokenizer, translation_model = None, None
+    output_file = None
     if output_format == "SRT":
+        output_file = "output.srt"
+        write_srt(transcription, output_file, tokenizer, translation_model)
     elif output_format == "Word":
+        output_file = "output.docx"
+        write_word(transcription, output_file, tokenizer, translation_model, target_language)
     elif output_format == "PDF":
+        output_file = "output.pdf"
+        write_pdf(transcription, output_file, tokenizer, translation_model)
+    elif output_format == "PPT":
+        output_file = "output.pptx"
+        write_ppt(transcription, output_file, tokenizer, translation_model)
+    return output_file
+def main():
+    with gr.Blocks() as app:
+        gr.Markdown("# Transcribe, Translate and Format YouTube Video Content")
+        video_url_input = gr.Textbox(label="YouTube Video URL (or leave blank for video file upload)")
+        video_file_input = gr.File(label="Upload Video File (leave blank for YouTube URL)")
+        language_input = gr.Dropdown(choices=["en"], label="Video Language", value="en")
+        target_language_input = gr.Dropdown(choices=["en", "fa", "es", "fr", "de", "it", "pt"], label="Target Language", value="en")
+        output_format_input = gr.Dropdown(choices=["SRT", "Word", "PDF", "PPT"], label="Output Format", value="SRT")
+        output_file = gr.File(label="Download Transcription", interactive=False)
+        transcribe_button = gr.Button("Transcribe & Translate")
+        def transcribe_and_translate(video_file, video_url, language, target_language, output_format):
+            output = transcribe_video(video_file.name if video_file else None, video_url, language, target_language, output_format)
+            return output
+        transcribe_button.click(
+            transcribe_and_translate,
+            inputs=[video_file_input, video_url_input, language_input, target_language_input, output_format_input],
+            outputs=output_file
+        )
+    app.launch()
+if __name__ == "__main__":
+    main()