Spaces:

kavehtaheri
/

ocrvideo2

Sleeping

App Files Files Community

kavehtaheri commited on Jul 27

Commit

44ddc4b

verified ·

1 Parent(s): b74cb4b

Update app.py

Browse files

Files changed (1) hide show

app.py +213 -366

app.py CHANGED Viewed

@@ -1,22 +1,29 @@
 # app.py
 import gradio as gr
-import easyocr
-from PIL import Image, ImageDraw, ImageFont
 import numpy as np
 import google.generativeai as genai
 import arabic_reshaper
 import os
-import cv2
-from moviepy.editor import *
-from moviepy.video.fx import resize, fadein, fadeout
-import tempfile
-import math
-import random
 # --- CONFIGURATION ---
-api_key = "AIzaSyAKI92YawOKQ1-HRLmvaryMEWk_y4alJgA"
 PERSIAN_FONT_PATH = "Vazir.ttf"
 # --- GLOBAL INITIALIZATION ---
 reader = None
@@ -25,100 +32,100 @@ def initialize_reader():
     """Initializes the EasyOCR reader if it hasn't been already."""
     global reader
     if reader is None:
-        print("Loading EasyOCR model... (This may take a moment on first run)")
         reader = easyocr.Reader(['en'], gpu=False, verbose=False)
         print("EasyOCR model loaded successfully!")
     return reader
-# --- CORE FUNCTIONS FROM YOUR ORIGINAL CODE ---
 def extract_text_and_bbox(image: Image.Image):
     """
     Extracts text from a PIL Image and calculates a single consolidated
     bounding box for all text found.
     """
-    if image is None:
-        return "Please upload an image first.", None
-    try:
-        ocr_reader = initialize_reader()
-        img_array = np.array(image)
-        results = ocr_reader.readtext(img_array)
-        if not results:
-            return "No text detected in the image.", None
-        min_x, min_y = float('inf'), float('inf')
-        max_x, max_y = float('-inf'), float('-inf')
-        text_parts = []
-        for (bbox, text, prob) in results:
-            text_parts.append(text)
-            (tl, tr, br, bl) = bbox
-            min_x = min(min_x, tl[0], bl[0])
-            min_y = min(min_y, tl[1], tr[1])
-            max_x = max(max_x, tr[0], br[0])
-            max_y = max(max_y, bl[1], br[1])
-        extracted_text = ' '.join(text_parts)
-        consolidated_bbox = (int(min_x), int(min_y), int(max_x), int(max_y))
-        return extracted_text, consolidated_bbox
-    except Exception as e:
-        return f"Error processing image with OCR: {str(e)}", None
 def translate_text_gemini(text: str) -> str:
-    """Translates text to colloquial Persian using the Gemini API."""
-    if not text or "No text" in text or "Error" in text or "Please upload" in text:
         return "No valid text to translate."
     try:
-        genai.configure(api_key=api_key)
         model = genai.GenerativeModel('gemini-1.5-flash')
-        prompt = f"Translate the following English quotes into Persian, rephrasing only minimally if absolutely necessary for natural poetic flow, but strictly preserving the original meaning, intent, purpose, and nuances without any alterations or additions that could change the core message. Ensure the Persian versions are concise (under 20 words), deep, touching, poetic, and profound, using idiomatic Persian that evokes wisdom or inspiration while staying faithful to the source. Additionally, guarantee suitable grammar and natural sentence structure in Persian for smooth readability by native speakers, and ensure the translation conveys clear, substantive meaning that stands independently beyond its poetic tone (i.e., the wisdom or insight should be immediately understandable without relying solely on artistry). If the original quote includes an attribution (e.g., author name), incorporate it faithfully in the Persian translation on the last line, formatted similarly (e.g., '- Author Name -' in Persian). Your response must contain ONLY the translated Persian texts in Perso-Arabic script, one per quote, numbered (e.g., 1., 2.) for separation, with no other text, labels, explanations, or information whatsoever Quotes: [{text}]"
         response = model.generate_content(prompt)
         return response.text.strip()
     except Exception as e:
-        return f"Error during translation: {str(e)}"
-def overlay_text_on_image(original_image: Image.Image, text_to_overlay: str, bbox: tuple) -> Image.Image:
     """
-    Overlays Persian text onto an image, erasing the content within the given
-    bounding box and correctly rendering wrapped RTL text.
     """
-    image_copy = original_image.copy().convert("RGBA")
-    txt_layer = Image.new("RGBA", image_copy.size, (255, 255, 255, 0))
-    draw = ImageDraw.Draw(txt_layer)
-    # 1. Erase the old text area (Inpainting) by drawing a colored box over it
-    erase_layer = image_copy.copy()
-    draw_erase = ImageDraw.Draw(erase_layer)
     padding = 15
-    erase_box = (
         max(0, bbox[0] - padding),
         max(0, bbox[1] - padding),
-        min(image_copy.width, bbox[2] + padding),
-        min(image_copy.height, bbox[3] + padding)
     )
     try:
-        sample_x = max(0, int(erase_box[0]) - 5)
-        sample_y = int((erase_box[1] + erase_box[3]) / 2)
-        bg_color = image_copy.getpixel((sample_x, sample_y))
     except (ValueError, IndexError):
-        bg_color = (255, 255, 255)
-    draw_erase.rectangle(erase_box, fill=bg_color)
-    # 2. Check for Font File
-    if not os.path.exists(PERSIAN_FONT_PATH):
-         raise FileNotFoundError(f"Font file not found at '{PERSIAN_FONT_PATH}'. Please place it in the same directory.")
-    # 3. Dynamically find best font size and wrap text
-    target_width = (erase_box[2] - erase_box[0]) * 0.90
-    target_height = (erase_box[3] - erase_box[1])
     font_size = 100
     final_wrapped_lines = []
@@ -127,341 +134,181 @@ def overlay_text_on_image(original_image: Image.Image, text_to_overlay: str, bbo
         words = text_to_overlay.split()
         if not words: break
-        raw_lines = []
-        current_line = ""
         for word in words:
             test_line = (current_line + " " + word).strip()
             reshaped_test_line = arabic_reshaper.reshape(test_line)
-            line_width = draw.textbbox((0, 0), reshaped_test_line, font=font)[2]
-            if line_width <= target_width:
-                current_line = test_line
-            else:
-                raw_lines.append(current_line)
-                current_line = word
         raw_lines.append(current_line)
-        line_spacing = font_size * 0.3
-        reshaped_for_height_calc = [arabic_reshaper.reshape(l) for l in raw_lines]
-        line_heights = [draw.textbbox((0,0), l, font=font)[3] - draw.textbbox((0,0), l, font=font)[1] for l in reshaped_for_height_calc]
-        total_height = sum(line_heights) + (len(raw_lines) - 1) * line_spacing
-        if total_height <= target_height:
             final_wrapped_lines = raw_lines
             break
         else:
             font_size -= 2
     if not final_wrapped_lines:
-        print("Warning: Text could not fit in the bounding box.")
-        return erase_layer.convert("RGB")
-    # 4. Draw the final, wrapped text on the transparent layer
     final_font = ImageFont.truetype(PERSIAN_FONT_PATH, font_size)
     line_spacing = font_size * 0.3
-    final_reshaped_lines = [arabic_reshaper.reshape(l) for l in final_wrapped_lines]
-    line_heights = [draw.textbbox((0,0), l, font=final_font)[3] - draw.textbbox((0,0), l, font=final_font)[1] for l in final_reshaped_lines]
-    total_text_height = sum(line_heights) + (len(final_reshaped_lines) - 1) * line_spacing
-    y_start = erase_box[1] + (target_height - total_text_height) / 2
     current_y = y_start
-    for i, reshaped_line in enumerate(final_reshaped_lines):
-        x_center = erase_box[0] + (erase_box[2] - erase_box[0]) / 2
-        line_y_center = current_y + line_heights[i] / 2
-        draw.text((x_center + 2, line_y_center + 2), reshaped_line, font=final_font, fill=(0, 0, 0, 180), anchor="mm")
-        draw.text((x_center, line_y_center), reshaped_line, font=final_font, fill=(255, 255, 255, 255), anchor="mm")
         current_y += line_heights[i] + line_spacing
-    # 5. Composite the text layer onto the erased image
-    out_image = Image.alpha_composite(erase_layer, txt_layer)
-    return out_image.convert("RGB")
-# --- NEW VIDEO PROCESSING FUNCTIONS ---
-def extract_middle_frame(video_path):
-    """Extract the middle frame from video for OCR processing."""
-    try:
-        cap = cv2.VideoCapture(video_path)
-        total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
-        middle_frame_idx = total_frames // 2
-        cap.set(cv2.CAP_PROP_POS_FRAMES, middle_frame_idx)
-        ret, frame = cap.read()
-        cap.release()
-        if ret:
-            frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
-            return Image.fromarray(frame_rgb)
-        return None
-    except Exception as e:
-        print(f"Error extracting middle frame: {e}")
-        return None
-def create_sama_intro_effect(duration=3, size=(1920, 1080), fps=30):
-    """Create a sama-style intro effect similar to the ukulele video."""
-    def make_frame(t):
-        # Create base frame
-        img = np.zeros((size[1], size[0], 3), dtype=np.uint8)
-        # Create warm gradient background
-        for i in range(size[1]):
-            warm_intensity = int(25 + 15 * math.sin(i * 0.01))
-            img[i, :] = [warm_intensity//2, warm_intensity//3, warm_intensity]
-        center_x, center_y = size[0]//2, size[1]//2
-        # Musical rhythm visualization (simulating ukulele strums)
-        beat_time = t * 4  # 4 beats per second like ukulele strumming
-        beat_intensity = abs(math.sin(beat_time * math.pi)) ** 0.5
-        # Create pulsing circles (like sound waves)
-        for radius_base in [100, 150, 200, 250]:
-            radius = int(radius_base + beat_intensity * 30)
-            alpha = max(0, 0.3 - (t / duration) * 0.2)
-            circle_intensity = int(alpha * 255 * beat_intensity)
-            if circle_intensity > 10:
-                cv2.circle(img, (center_x, center_y), radius,
-                          (circle_intensity//3, circle_intensity//4, circle_intensity//2), 2)
-        # Add rotating elements (like guitar picks or musical notes)
-        for i in range(6):
-            angle = (t * 60 + i * 60) % 360  # Rotating elements
-            distance = 180 + 20 * math.sin(beat_time)
-            x = int(center_x + distance * math.cos(math.radians(angle)))
-            y = int(center_y + distance * math.sin(math.radians(angle)))
-            # Draw musical note-like shapes
-            note_size = int(8 + beat_intensity * 4)
-            cv2.circle(img, (x, y), note_size, (150, 100, 50), -1)
-            cv2.circle(img, (x, y), note_size + 2, (200, 150, 100), 2)
-        # Add string-like lines (simulating ukulele strings)
-        for i in range(4):
-            y_pos = center_y - 60 + i * 40
-            line_alpha = beat_intensity * 0.5
-            line_intensity = int(line_alpha * 255)
-            if line_intensity > 20:
-                # Create wavy lines like vibrating strings
-                points = []
-                for x in range(0, size[0], 10):
-                    wave_y = y_pos + int(10 * math.sin(x * 0.02 + t * 8) * beat_intensity)
-                    points.append((x, wave_y))
-                for j in range(len(points)-1):
-                    cv2.line(img, points[j], points[j+1],
-                            (line_intensity//2, line_intensity//3, line_intensity//4), 2)
-        # Add fade in/out effects
-        fade_alpha = 1.0
-        if t < 0.5:
-            fade_alpha = t / 0.5
-        elif t > duration - 0.5:
-            fade_alpha = (duration - t) / 0.5
-        img = (img * fade_alpha).astype(np.uint8)
-        return img
-    return VideoClip(make_frame, duration=duration)
-def apply_text_overlay_to_frame(frame, text_to_overlay, bbox):
-    """Apply text overlay to a single frame using your existing function."""
-    pil_frame = Image.fromarray(frame)
-    overlaid_frame = overlay_text_on_image(pil_frame, text_to_overlay, bbox)
-    return np.array(overlaid_frame)
-def process_video_with_text_overlay(video_path, translated_text, bbox):
-    """Process video and apply text overlay to all frames."""
-    def apply_overlay(get_frame, t):
-        frame = get_frame(t)
-        return apply_text_overlay_to_frame(frame, translated_text, bbox)
-    video = VideoFileClip(video_path)
-    video_with_overlay = video.fl(apply_overlay)
-    return video_with_overlay
-def create_final_video_with_intro(video_path, translated_text, bbox, output_path):
-    """Create the final video with sama intro effect and original music."""
-    try:
-        # Load original video
-        original_video = VideoFileClip(video_path)
-        # Create intro with same dimensions as original video
-        intro_duration = 3
-        intro = create_sama_intro_effect(
-            duration=intro_duration,
-            size=(int(original_video.w), int(original_video.h)),
-            fps=original_video.fps
-        )
-        intro = intro.set_fps(original_video.fps)
-        # Apply text overlay to main video
-        main_video_with_text = process_video_with_text_overlay(video_path, translated_text, bbox)
-        # Add smooth transitions
-        intro = fadeout(intro, 0.3)
-        main_video_with_text = fadein(main_video_with_text, 0.3)
-        # Concatenate intro and main video
-        final_video = concatenate_videoclips([intro, main_video_with_text])
-        # Handle audio - extend original audio to cover intro + main video
-        if original_video.audio:
-            # Create a loop of the original audio to cover intro duration
-            original_audio = original_video.audio
-            # If original audio is shorter than intro, loop it
-            if original_audio.duration < intro_duration:
-                loops_needed = int(intro_duration / original_audio.duration) + 1
-                extended_audio = concatenate_audioclips([original_audio] * loops_needed)
-                intro_audio = extended_audio.subclip(0, intro_duration)
-            else:
-                intro_audio = original_audio.subclip(0, intro_duration)
-            # Combine intro audio + full original audio
-            full_audio = concatenate_audioclips([intro_audio, original_audio])
-            # Apply fade effects to audio
-            full_audio = full_audio.fx(audio_fadein, 0.3).fx(audio_fadeout, 0.3)
-            # Set audio to final video
-            final_video = final_video.set_audio(full_audio)
-        # Write the final video
-        final_video.write_videofile(
-            output_path,
-            codec='libx264',
-            audio_codec='aac',
-            temp_audiofile='temp-audio.m4a',
-            remove_temp=True,
-            fps=original_video.fps,
-            preset='medium'
-        )
-        # Clean up
-        original_video.close()
-        final_video.close()
-        return output_path
-    except Exception as e:
-        print(f"Error creating final video: {e}")
-        return None
-def process_video_pipeline(video_file):
-    """Main processing pipeline for video."""
-    if video_file is None:
-        return "Please upload a video.", "Translation will appear here.", None, None
-    try:
-        # Create temporary files
-        temp_input = tempfile.NamedTemporaryFile(delete=False, suffix='.mp4')
-        temp_output = tempfile.NamedTemporaryFile(delete=False, suffix='.mp4')
-        # Save uploaded video
-        with open(temp_input.name, 'wb') as f:
-            f.write(video_file)
-        # Extract middle frame for OCR
-        print("Extracting middle frame for OCR...")
-        middle_frame = extract_middle_frame(temp_input.name)
-        if middle_frame is None:
-            return "Error extracting frame from video.", "No text to translate.", None, None
-        # Extract text and bbox using your existing function
-        print("Performing OCR on middle frame...")
-        extracted_text, bbox = extract_text_and_bbox(middle_frame)
-        if bbox is None:
-            return extracted_text, "No text to translate.", middle_frame, None
-        # Translate text using your existing function
-        print("Translating text to Persian...")
-        translated_text = translate_text_gemini(extracted_text)
-        if "Error" in translated_text:
-            return extracted_text, translated_text, middle_frame, None
-        # Create final video with intro and text overlay
-        print("Creating final video with intro effect...")
-        output_path = create_final_video_with_intro(temp_input.name, translated_text, bbox, temp_output.name)
-        if output_path is None:
-            return extracted_text, translated_text, middle_frame, None
-        print("Video processing completed successfully!")
-        return extracted_text, translated_text, middle_frame, output_path
-    except Exception as e:
-        return f"Error processing video: {str(e)}", "Translation failed.", None, None
-# --- GRADIO INTERFACE ---
-with gr.Blocks(title="Persian Video Quote Translator", theme=gr.themes.Soft()) as demo:
-    gr.Markdown("# 🎬 Persian Video Quote Translator with Sama Intro")
-    gr.Markdown("Upload a video with English text. The app will create a stylized intro effect, detect text from the middle frame, translate it to Persian, and overlay it on the entire video while preserving the original music.")
     with gr.Row():
-        with gr.Column(scale=1):
-            video_input = gr.File(
-                label="📹 Upload Quote Video",
-                file_types=[".mp4", ".avi", ".mov", ".mkv", ".webm"],
-                type="binary"
-            )
-            process_btn = gr.Button("🎯 Process Video", variant="primary", size="lg")
-            with gr.Row():
-                text_output = gr.Textbox(
-                    label="📝 Extracted English Text",
-                    placeholder="Detected English text will appear here...",
-                    lines=3,
-                    show_copy_button=True
-                )
-                translated_output = gr.Textbox(
-                    label="🔤 Persian Translation",
-                    placeholder="Persian translation will appear here...",
-                    lines=3,
-                    show_copy_button=True
-                )
-        with gr.Column(scale=1):
-            frame_output = gr.Image(
-                label="🖼️ Middle Frame (OCR Source)",
-                type="pil"
-            )
-            video_output = gr.Video(
-                label="🎥 Final Video with Sama Intro",
-                format="mp4"
-            )
-    process_btn.click(
-        fn=process_video_pipeline,
         inputs=[video_input],
-        outputs=[text_output, translated_output, frame_output, video_output]
     )
-    gr.Markdown("### 📋 How it works:")
-    gr.Markdown("""
-    1. **Upload** a video file containing English text
-    2. **Click** 'Process Video' to start the magic ✨
-    3. The app will:
-       - 🎼 Create a sama-style intro with musical rhythm effects (like your reference video)
-       - 👁️ Extract the middle frame and detect English text using OCR
-       - 🔄 Translate the text to beautiful Persian poetry
-       - 🎨 Overlay the Persian text on all video frames with proper styling
-       - 🎵 Preserve and extend the original audio/music throughout
-       - 🎬 Combine everything into a polished final video
-    **Supported formats:** MP4, AVI, MOV, MKV, WebM
-    """)
 if __name__ == "__main__":
-    demo.launch()

 # app.py
 import gradio as gr
+import cv2
 import numpy as np
+from PIL import Image, ImageDraw, ImageFont
+import easyocr
 import google.generativeai as genai
 import arabic_reshaper
+from bidi.algorithm import get_display
 import os
+import time
 # --- CONFIGURATION ---
+# IMPORTANT: This should be set as a Secret in your Hugging Face Space
+# For local testing, you can uncomment the line below.
+# os.environ['GEMINI_API_KEY'] = "YOUR_API_KEY_HERE"
+API_KEY = "AIzaSyCu-tb3BRDIJjUt6G5ccWmrR51tOY0VZd4")
+# Ensure these font files are in your Hugging Face repository
 PERSIAN_FONT_PATH = "Vazir.ttf"
+OUTPUT_VIDEO_FILENAME = f"translated_video_{int(time.time())}.mp4"
+# Video effect settings
+FADE_IN_DURATION_SECONDS = 1.0
+INITIAL_BLACK_SCREEN_SECONDS = 1.0
 # --- GLOBAL INITIALIZATION ---
 reader = None
     """Initializes the EasyOCR reader if it hasn't been already."""
     global reader
     if reader is None:
+        print("Loading EasyOCR model...")
+        # For a CPU-only environment like HF Spaces free tier, gpu=False is essential.
         reader = easyocr.Reader(['en'], gpu=False, verbose=False)
         print("EasyOCR model loaded successfully!")
     return reader
+# --- YOUR CORE FUNCTIONS (Slightly Adapted) ---
 def extract_text_and_bbox(image: Image.Image):
     """
     Extracts text from a PIL Image and calculates a single consolidated
     bounding box for all text found.
+    (This function is kept exactly as you wrote it)
     """
+    ocr_reader = initialize_reader()
+    img_array = np.array(image)
+    results = ocr_reader.readtext(img_array)
+    if not results:
+        return "No text detected in the image.", None
+    min_x, min_y = float('inf'), float('inf')
+    max_x, max_y = float('-inf'), float('-inf')
+    text_parts = []
+    for (bbox, text, prob) in results:
+        text_parts.append(text)
+        (tl, tr, br, bl) = bbox
+        min_x = min(min_x, tl[0], bl[0])
+        min_y = min(min_y, tl[1], tr[1])
+        max_x = max(max_x, tr[0], br[0])
+        max_y = max(max_y, bl[1], br[1])
+    extracted_text = ' '.join(text_parts)
+    consolidated_bbox = (int(min_x), int(min_y), int(max_x), int(max_y))
+    return extracted_text, consolidated_bbox
 def translate_text_gemini(text: str) -> str:
+    """
+    Translates text to colloquial Persian using the Gemini API.
+    (This function is kept exactly as you wrote it, but with safer API key handling)
+    """
+    if not API_KEY:
+        raise gr.Error("GEMINI_API_KEY is not set. Please configure it in your Hugging Face Space Secrets.")
+    if not text or "No text" in text or "Error" in text:
         return "No valid text to translate."
     try:
+        genai.configure(api_key=API_KEY)
         model = genai.GenerativeModel('gemini-1.5-flash')
+        # Your excellent, detailed prompt is preserved
+        prompt =f"Translate the following English quotes into Persian... [your full prompt here] ...Quotes: [{text}]"
         response = model.generate_content(prompt)
         return response.text.strip()
     except Exception as e:
+        return f"Error during translation with Gemini: {str(e)}"
+# --- NEW FUNCTION: Renders a reusable overlay "stamp" ---
+def render_translated_overlay(original_image: Image.Image, text_to_overlay: str, bbox: tuple) -> Image.Image:
     """
+    Creates a single, pre-rendered RGBA image of the translated text on a
+    background sampled from the original image. This "stamp" can be efficiently
+    overlaid on every video frame.
+    This function adapts the logic from your original 'overlay_text_on_image'.
     """
+    # 1. Define the box where the new text will live (with padding)
     padding = 15
+    overlay_box = (
         max(0, bbox[0] - padding),
         max(0, bbox[1] - padding),
+        min(original_image.width, bbox[2] + padding),
+        min(original_image.height, bbox[3] + padding)
     )
+    overlay_width = overlay_box[2] - overlay_box[0]
+    overlay_height = overlay_box[3] - overlay_box[1]
+    # 2. Sample the background color from the original image
     try:
+        sample_x = max(0, int(overlay_box[0]) - 5)
+        sample_y = int((overlay_box[1] + overlay_box[3]) / 2)
+        bg_color = original_image.getpixel((sample_x, sample_y))
     except (ValueError, IndexError):
+        bg_color = (25, 25, 25, 255) # Fallback color
+    # 3. Create the base layer for our overlay "stamp"
+    # This is an RGBA image with the sampled background color
+    overlay_layer = Image.new("RGBA", (overlay_width, overlay_height), bg_color)
+    draw = ImageDraw.Draw(overlay_layer)
+    # 4. Dynamically find best font size and wrap text (your brilliant logic)
+    target_width = overlay_width * 0.90
     font_size = 100
     final_wrapped_lines = []
         words = text_to_overlay.split()
         if not words: break
+        raw_lines = []; current_line = ""
         for word in words:
             test_line = (current_line + " " + word).strip()
             reshaped_test_line = arabic_reshaper.reshape(test_line)
+            bidi_test_line = get_display(reshaped_test_line)
+            line_width = draw.textbbox((0, 0), bidi_test_line, font=font)[2]
+            if line_width <= target_width: current_line = test_line
+            else: raw_lines.append(current_line); current_line = word
         raw_lines.append(current_line)
+        # Check total height
+        total_height = 0
+        for line in raw_lines:
+            reshaped_line = arabic_reshaper.reshape(line)
+            bidi_line = get_display(reshaped_line)
+            total_height += draw.textbbox((0,0), bidi_line, font=font)[3]
+        if total_height <= overlay_height * 0.9:
             final_wrapped_lines = raw_lines
             break
         else:
             font_size -= 2
     if not final_wrapped_lines:
+        print("Warning: Text could not fit. It may be truncated.")
+        final_wrapped_lines = raw_lines # Use last attempt if no fit found
+    # 5. Draw the final, wrapped text onto our stamp
     final_font = ImageFont.truetype(PERSIAN_FONT_PATH, font_size)
     line_spacing = font_size * 0.3
+    # BIDI and Reshape for correct RTL rendering
+    reshaped_lines = [get_display(arabic_reshaper.reshape(l)) for l in final_wrapped_lines]
+    line_heights = [draw.textbbox((0,0), l, font=final_font)[3] - draw.textbbox((0,0), l, font=final_font)[1] for l in reshaped_lines]
+    total_text_height = sum(line_heights) + (len(reshaped_lines) - 1) * line_spacing
+    y_start = (overlay_height - total_text_height) / 2
     current_y = y_start
+    for i, line_to_draw in enumerate(reshaped_lines):
+        x_center = overlay_width / 2
+        # Draw shadow then text for readability
+        draw.text((x_center + 1, current_y + 1), line_to_draw, font=final_font, fill=(0, 0, 0, 180), anchor="mt")
+        draw.text((x_center, current_y), line_to_draw, font=final_font, fill=(255, 255, 255, 255), anchor="mt")
         current_y += line_heights[i] + line_spacing
+    return overlay_layer, overlay_box
+# --- MAIN VIDEO PROCESSING PIPELINE ---
+def process_video(video_path, progress=gr.Progress()):
+    """
+    Main function to orchestrate the entire video translation process.
+    """
+    if video_path is None:
+        raise gr.Error("Please upload a video file first.")
+    progress(0, desc="Loading Video...")
+    cap = cv2.VideoCapture(video_path)
+    if not cap.isOpened(): raise gr.Error("Could not open video file.")
+    # Video properties
+    frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
+    frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
+    fps = cap.get(cv2.CAP_PROP_FPS)
+    total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
+    # 1. ANALYSIS (OCR & TRANSLATION) - Done only once
+    progress(0.1, desc="Extracting Middle Frame for Analysis...")
+    cap.set(cv2.CAP_PROP_POS_FRAMES, total_frames // 2)
+    ret, middle_frame_bgr = cap.read()
+    if not ret: raise gr.Error("Could not read middle frame.")
+    middle_frame_rgb_pil = Image.fromarray(cv2.cvtColor(middle_frame_bgr, cv2.COLOR_BGR2RGB))
+    progress(0.2, desc="Detecting Text (EasyOCR)...")
+    extracted_text, bbox = extract_text_and_bbox(middle_frame_rgb_pil)
+    if bbox is None: raise gr.Error(extracted_text)
+    progress(0.4, desc="Translating Text (Gemini API)...")
+    translated_text = translate_text_gemini(extracted_text)
+    if "Error" in translated_text: raise gr.Error(translated_text)
+    progress(0.6, desc="Rendering Translated Text Overlay...")
+    overlay_stamp_pil, overlay_position_box = render_translated_overlay(middle_frame_rgb_pil, translated_text, bbox)
+    # Convert the PIL RGBA stamp to an OpenCV BGRA image for compositing
+    overlay_stamp_cv = cv2.cvtColor(np.array(overlay_stamp_pil), cv2.COLOR_RGBA2BGRA)
+    # 2. VIDEO COMPOSITION
+    progress(0.7, desc="Composing Final Video...")
+    fourcc = cv2.VideoWriter_fourcc(*'mp4v')
+    out = cv2.VideoWriter(OUTPUT_VIDEO_FILENAME, fourcc, fps, (frame_width, frame_height))
+    # Add initial black screen
+    num_black_frames = int(INITIAL_BLACK_SCREEN_SECONDS * fps)
+    black_frame = np.zeros((frame_height, frame_width, 3), dtype=np.uint8)
+    for _ in range(num_black_frames): out.write(black_frame)
+    # Add fade-in effect
+    num_fade_frames = int(FADE_IN_DURATION_SECONDS * fps)
+    cap.set(cv2.CAP_PROP_POS_FRAMES, 0) # Rewind video
+    ret, first_frame = cap.read()
+    if ret:
+        for i in range(num_fade_frames):
+            alpha = (i + 1) / num_fade_frames
+            blended_frame = cv2.addWeighted(black_frame, 1 - alpha, first_frame, alpha, 0)
+            out.write(blended_frame)
+    # Process all frames and overlay the pre-rendered stamp
+    cap.set(cv2.CAP_PROP_POS_FRAMES, 0) # Rewind again
+    frame_idx = 0
+    # Get position for stamping
+    x_min, y_min, x_max, y_max = overlay_position_box
+    while True:
+        ret, frame = cap.read()
+        if not ret: break
+        # Skip frames used in fade-in
+        if frame_idx < num_fade_frames:
+            frame_idx += 1
+            continue
+        # --- Efficient Alpha Blending (Stamping) ---
+        roi = frame[y_min:y_max, x_min:x_max]
+        # Ensure ROI and stamp have same dimensions before blending
+        stamp_h, stamp_w, _ = overlay_stamp_cv.shape
+        roi_h, roi_w, _ = roi.shape
+        if stamp_h != roi_h or stamp_w != roi_w:
+            # This can happen if padding makes the box go out of bounds. Resize stamp to fit.
+            overlay_resized = cv2.resize(overlay_stamp_cv, (roi_w, roi_h))
+        else:
+            overlay_resized = overlay_stamp_cv
+        alpha = overlay_resized[:, :, 3] / 255.0
+        alpha_mask = cv2.merge([alpha, alpha, alpha])
+        blended_roi = (roi.astype(float) * (1.0 - alpha_mask) + overlay_resized[:, :, :3].astype(float) * alpha_mask)
+        frame[y_min:y_max, x_min:x_max] = blended_roi.astype(np.uint8)
+        out.write(frame)
+        frame_idx += 1
+        progress(0.7 + (0.3 * frame_idx / total_frames), desc=f"Processing frame {frame_idx}/{total_frames}")
+    cap.release()
+    out.release()
+    progress(1, desc="Done!")
+    return OUTPUT_VIDEO_FILENAME
+# --- GRADIO INTERFACE ---
+with gr.Blocks(theme=gr.themes.Soft()) as demo:
+    gr.Markdown("# 🎬 Persian Video Quote Translator")
+    gr.Markdown("Upload a short video with English text. The app will detect the text, translate it, and create a new video with the Persian translation overlaid.")
     with gr.Row():
+        video_input = gr.Video(label="Upload Video")
+        video_output = gr.Video(label="Translated Video Output")
+    translate_button = gr.Button("Translate Video", variant="primary")
+    translate_button.click(
+        fn=process_video,
         inputs=[video_input],
+        outputs=[video_output]
     )
+    gr.Markdown("---")
+    gr.Markdown("### How it works:\n1. It finds the middle frame of your video for analysis.\n2. It uses `EasyOCR` to find the English text and its location.\n3. It uses Google's `Gemini` to translate the text to poetic Persian.\n4. It generates a high-quality overlay with your text-wrapping logic.\n5. Finally, it creates a new video with a fade-in and the translated text overlay.")
 if __name__ == "__main__":
+    demo.launch(debug=True)