kavehtaheri commited on
Commit
32860f3
·
verified ·
1 Parent(s): 6daf130

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +240 -176
app.py CHANGED
@@ -1,207 +1,271 @@
 
 
1
  import gradio as gr
2
- import requests
 
 
 
 
 
3
  import os
4
- import json
5
  import time
6
- # CORRECT, HIGH-LEVEL IMPORT for MoviePy. This brings in all editing functions.
7
- from moviepy.editor import VideoFileClip
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8
 
9
- # --- 1. CONFIGURATION & CONSTANTS ---
10
- # Securely load API key from Hugging Face Space secrets.
11
- ONE_API_KEY = os.environ.get("ONE_API_KEY", "268976:66f4f58a2a905") # Using your key for now, secrets are better.
12
- # The custom endpoint for the one-api.ir service.
13
- ONE_API_URL = "https://api.one-api.ir/chatbot/v1/gpt4o/"
 
14
 
15
- # --- MASTER PROMPTS ---
16
- PROMPT_SHORTS_MODE = """
17
- You are an expert producer of viral short-form content. Analyze the provided SRT transcript to find the single most impactful, hook-worthy segment for a video under 3 minutes (ideally 60-90 seconds).
18
 
19
- PRIORITIES: Strong Hook, Single Clear Point, High Energy, Clear Payoff.
 
20
 
21
- **Input SRT Content:**
22
- {transcript_content}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
23
 
24
- **Instructions:**
25
- Your output MUST be a single, valid JSON object and nothing else. Do not include any text, code blocks, or explanations before or after the JSON object.
26
 
27
- {{
28
- "clip_title_suggestion": "A catchy, clickbait-style title for this short clip.",
29
- "reasoning": "Briefly explain why this segment is perfect for a short video, referencing the hook and payoff.",
30
- "final_clip_start_seconds": <The precise start time in total seconds from the SRT>,
31
- "final_clip_end_seconds": <The precise end time in total seconds from the SRT>
32
- }}
33
- """
34
 
35
- PROMPT_NARRATIVE_MODE = """
36
- You are an expert video editor and storyteller. Analyze the provided transcript to find the most compelling narrative segment between 5 and 12 minutes long.
37
 
38
- METHODOLOGY: Identify the peak moment, then find the corresponding setup and resolution to create a complete narrative arc.
 
 
 
 
 
39
 
40
- **Input SRT Content:**
41
- {transcript_content}
42
 
43
- **Instructions:**
44
- Your output MUST be a single, valid JSON object and nothing else. Do not include any text, code blocks, or explanations before or after the JSON object.
45
 
46
- {{
47
- "narrative_summary": "A one-sentence summary of the story told in the extracted clip.",
48
- "reasoning": "Explain why this segment works as a standalone narrative, mentioning the peak moment and how the start/end points provide a full arc.",
49
- "final_clip_start_seconds": <The precise start time in total seconds from the SRT>,
50
- "final_clip_end_seconds": <The precise end time in total seconds from the SRT>
51
- }}
52
- """
53
 
54
- # --- 2. LLM AGENT WRAPPER ---
 
 
55
 
56
- def call_gpt4o_oneapi(transcript_content, prompt_template):
57
- """Makes a robust, custom API call to the one-api.ir service."""
58
- if not ONE_API_KEY or not ONE_API_URL:
59
- raise ValueError("ONE_API_KEY and ONE_API_URL secrets are not set correctly.")
60
 
61
- headers = {
62
- "one-api-token": ONE_API_KEY,
63
- "Content-Type": "application/json"
64
- }
65
- final_prompt = prompt_template.format(transcript_content=transcript_content)
66
- payload = [{"role": "user", "content": final_prompt}]
67
 
68
- try:
69
- response = requests.post(ONE_API_URL, headers=headers, json=payload, timeout=180)
70
- response.raise_for_status()
71
- result = response.json()
72
-
73
- if "result" not in result or not isinstance(result.get("result"), list) or len(result["result"]) == 0:
74
- return f"Error: Unexpected JSON structure from API. 'result' list not found or empty.\nFull response: {json.dumps(result)}"
75
 
76
- first_item_in_result = result["result"][0]
 
 
 
77
 
78
- if isinstance(first_item_in_result, dict):
79
- message_content = first_item_in_result.get("content")
80
- if message_content:
81
- return message_content
82
- else:
83
- return f"Error: 'content' key not found in API response dictionary.\nFull response: {json.dumps(result)}"
84
- elif isinstance(first_item_in_result, str):
85
- return first_item_in_result
86
- else:
87
- return f"Error: Unknown item type in API 'result' list.\nFull response: {json.dumps(result)}"
88
 
89
- except requests.exceptions.HTTPError as e:
90
- return f"HTTP Error calling API: {e}\nResponse Body: {e.response.text}"
91
- except requests.exceptions.RequestException as e:
92
- return f"Error connecting to API: {str(e)}"
93
- except json.JSONDecodeError:
94
- return f"Error: Failed to decode JSON from API response.\nResponse Body: {response.text}"
95
 
96
- # --- 3. CORE ORCHESTRATOR FUNCTION ---
 
 
97
 
98
- def generate_viral_clip(video_file, srt_file, analysis_mode, progress=gr.Progress()):
99
- if not video_file or not srt_file:
100
- return "Error: Please upload both a video file and an SRT file.", None
101
- if not ONE_API_KEY or not ONE_API_URL:
102
- return "Error: API keys for OneAPI are not configured correctly in the Space secrets.", None
103
 
104
- video = None
105
- new_clip = None
106
- try:
107
- progress(0.1, desc="Reading SRT file...")
108
- with open(srt_file, 'r', encoding='utf-8') as f:
109
- transcript_content = f.read()
110
-
111
- progress(0.2, desc="Preparing analysis prompt...")
112
- prompt_template = PROMPT_SHORTS_MODE if analysis_mode == "Viral Spot for Shorts (< 3 mins)" else PROMPT_NARRATIVE_MODE
113
-
114
- progress(0.4, desc="Calling AI for analysis...")
115
- llm_response_str = call_gpt4o_oneapi(transcript_content, prompt_template)
116
-
117
- progress(0.7, desc="Parsing AI response...")
118
- if llm_response_str.startswith("Error") or llm_response_str.startswith("HTTP Error"):
119
- return llm_response_str, None
120
-
121
- try:
122
- cleaned_response = llm_response_str.strip()
123
- if cleaned_response.startswith("```json"):
124
- cleaned_response = cleaned_response[7:]
125
- if cleaned_response.endswith("```"):
126
- cleaned_response = cleaned_response[:-3]
127
-
128
- parsed_response = json.loads(cleaned_response)
129
-
130
- if not isinstance(parsed_response, dict):
131
- raise TypeError(f"AI did not return a valid JSON object. It returned a {type(parsed_response).__name__}.")
132
-
133
- start_time = float(parsed_response['final_clip_start_seconds'])
134
- end_time = float(parsed_response['final_clip_end_seconds'])
135
- reasoning = parsed_response.get('reasoning', 'No reasoning provided.')
136
-
137
- summary = (f"✅ Analysis Complete!\n\n"
138
- f"Reasoning: {reasoning}\n\n"
139
- f"Title Suggestion: {parsed_response.get('clip_title_suggestion', 'N/A')}\n"
140
- f"Narrative Summary: {parsed_response.get('narrative_summary', 'N/A')}\n\n"
141
- f"Clipping video from {time.strftime('%H:%M:%S', time.gmtime(start_time))} to {time.strftime('%H:%M:%S', time.gmtime(end_time))}.")
142
-
143
- except (json.JSONDecodeError, KeyError, TypeError) as e:
144
- error_msg = f"Error: Failed to parse AI response. Details: {e}\n\nRaw AI Response:\n---\n{llm_response_str}"
145
- return error_msg, None
146
-
147
- progress(0.8, desc="Clipping video...")
148
- output_filename = "viral_clip.mp4"
149
-
150
- # FIX: Load video using the correct high-level object
151
- video = VideoFileClip(video_file)
152
-
153
- if end_time > video.duration:
154
- end_time = video.duration
155
- summary += f"\n\n⚠️ Warning: End time was beyond video duration, adjusted to {end_time:.2f}s."
156
-
157
- # Now .subclip() will exist because we imported from moviepy.editor
158
- new_clip = video.subclip(start_time, end_time)
159
- new_clip.write_videofile(output_filename, codec="libx264", audio_codec="aac")
160
 
161
- progress(1.0, desc="Done!")
162
- return summary, output_filename
 
163
 
164
- except Exception as e:
165
- import traceback
166
- tb_str = traceback.format_exc()
167
- return f"An unexpected error occurred in the main process: {str(e)}\n\nTraceback:\n{tb_str}", None
168
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
169
  finally:
170
- # Manually close the clips to release file resources, preventing locks.
171
- if new_clip:
172
- new_clip.close()
173
- if video:
174
- video.close()
175
-
176
- # --- 4. GRADIO UI DEFINITION ---
177
- with gr.Blocks(theme=gr.themes.Soft()) as demo:
178
- gr.Markdown(
179
- """
180
- # 🎬 AI Viral Video Extractor
181
- This tool uses an AI agent to analyze a video transcript and automatically clip the most viral segment.
182
- **⚠️ Important Setup:** For best security, configure `ONE_API_KEY` in your Hugging Face **Space Settings > Secrets**.
183
- """
184
- )
185
  with gr.Row():
186
- with gr.Column(scale=1):
187
- video_input = gr.Video(label="1. Upload Original Video")
188
- srt_input = gr.File(label="2. Upload English SRT File", file_types=['.srt'])
189
- mode_input = gr.Radio(
190
- label="3. Select Analysis Mode",
191
- choices=["Viral Spot for Shorts (< 3 mins)", "Viral Narrative Clip (5-12 mins)"],
192
- value="Viral Narrative Clip (5-12 mins)"
193
- )
194
- submit_button = gr.Button("🚀 Generate Viral Clip", variant="primary")
195
-
196
- with gr.Column(scale=2):
197
- summary_output = gr.Textbox(label="Analysis Summary", lines=12, interactive=False)
198
- video_output = gr.Video(label="Generated Clip", interactive=False)
199
-
200
- submit_button.click(
201
- fn=generate_viral_clip,
202
- inputs=[video_input, srt_input, mode_input],
203
- outputs=[summary_output, video_output],
204
- )
205
 
206
  if __name__ == "__main__":
207
  demo.launch(debug=True)
 
 
1
+ # app.py
2
+
3
  import gradio as gr
4
+ import cv2
5
+ import numpy as np
6
+ from PIL import Image, ImageDraw, ImageFont
7
+ import easyocr
8
+ import google.generativeai as genai
9
+ import arabic_reshaper
10
  import os
 
11
  import time
12
+ import ffmpeg
13
+
14
+ # --- CONFIGURATION ---
15
+ API_KEY = os.getenv("GEMINI_API_KEY", "AIzaSyCu-tb3BRDIJjUt6G5ccWmrR51tOY0VZd4")
16
+ PERSIAN_FONT_PATH = "Vazir.ttf"
17
+ FADE_IN_DURATION_SECONDS = 1.0
18
+
19
+ # --- GLOBAL INITIALIZATION ---
20
+ reader = None
21
+ def initialize_reader():
22
+ """Initializes the EasyOCR reader if it hasn't been already."""
23
+ global reader
24
+ if reader is None:
25
+ print("Loading EasyOCR model...")
26
+ reader = easyocr.Reader(['en'], gpu=False, verbose=False)
27
+ print("EasyOCR model loaded successfully!")
28
+ return reader
29
+
30
+ # --- CORE PROCESSING FUNCTIONS ---
31
+
32
+ ### NEW ###: This function now also returns the average height of the original text.
33
+ def extract_text_and_bbox(image: Image.Image):
34
+ """Extracts text, a consolidated bounding box, and the average original text height."""
35
+ ocr_reader = initialize_reader()
36
+ img_array = np.array(image)
37
+ results = ocr_reader.readtext(img_array)
38
+ if not results: return "No text detected in the image.", None, None
39
+
40
+ min_x, min_y = float('inf'), float('inf')
41
+ max_x, max_y = float('-inf'), float('-inf')
42
+ text_parts = []
43
+ original_heights = [] ### NEW ###: List to store heights of each detected text box.
44
+
45
+ for (bbox, text, prob) in results:
46
+ text_parts.append(text)
47
+ (tl, tr, br, bl) = bbox
48
+ min_x = min(min_x, tl[0], bl[0])
49
+ min_y = min(min_y, tl[1], tr[1])
50
+ max_x = max(max_x, tr[0], br[0])
51
+ max_y = max(max_y, bl[1], br[1])
52
+
53
+ # ### NEW ###: Calculate the height of this specific text box and add it.
54
+ # This is a direct measure of the original font's pixel size.
55
+ original_heights.append(br[1] - tr[1])
56
+
57
+ extracted_text = ' '.join(text_parts)
58
+ consolidated_bbox = (int(min_x), int(min_y), int(max_x), int(max_y))
59
+
60
+ # ### NEW ###: Calculate the average height from all detected text parts.
61
+ average_original_height = sum(original_heights) / len(original_heights) if original_heights else 30 # Fallback
62
+
63
+ return extracted_text, consolidated_bbox, average_original_height
64
+
65
+ def translate_text_gemini(text: str) -> str:
66
+ """Translates text to colloquial Persian using the Gemini API."""
67
+ if not API_KEY or "YOUR_GEMINI_API_KEY_HERE" in API_KEY:
68
+ raise gr.Error("GEMINI_API_KEY is not set. Please add it as a Secret in your Hugging Face Space.")
69
+ if not text or "No text" in text:
70
+ return "No valid text to translate."
71
+
72
+ try:
73
+ genai.configure(api_key=API_KEY)
74
+ model = genai.GenerativeModel('gemini-1.5-flash')
75
+ prompt = f"Translate the following English quotes into Persian. The translation should be colloquial, poetic, concise, and meaningful. Preserve the original message and tone. Avoid literal translations. Provide only the translated Persian text. Quotes: [{text}]"
76
+ response = model.generate_content(prompt)
77
+ return response.text.strip()
78
+ except Exception as e:
79
+ return f"Error during translation with Gemini: {str(e)}"
80
+
81
+ ### NEW ###: This function now accepts `average_original_height` to guide its font sizing.
82
+ def render_translated_overlay(original_image: Image.Image, text_to_overlay: str, bbox: tuple, average_original_height: float) -> (Image.Image, tuple):
83
+ """
84
+ Creates an overlay layer with correctly rendered, wrapped Persian text,
85
+ sized to match the original text's height.
86
+ """
87
+ padding = 15
88
+ overlay_box = (
89
+ max(0, bbox[0] - padding),
90
+ max(0, bbox[1] - padding),
91
+ min(original_image.width, bbox[2] + padding),
92
+ min(original_image.height, bbox[3] + padding)
93
+ )
94
+ overlay_width = overlay_box[2] - overlay_box[0]
95
+ overlay_height = overlay_box[3] - overlay_box[1]
96
 
97
+ try:
98
+ sample_x = max(0, int(overlay_box[0]) - 5)
99
+ sample_y = int((overlay_box[1] + overlay_box[3]) / 2)
100
+ bg_color = original_image.getpixel((sample_x, sample_y))
101
+ except (ValueError, IndexError):
102
+ bg_color = (25, 25, 25)
103
 
104
+ overlay_layer = Image.new("RGBA", (overlay_width, overlay_height), bg_color)
105
+ draw = ImageDraw.Draw(overlay_layer)
 
106
 
107
+ if not os.path.exists(PERSIAN_FONT_PATH):
108
+ raise FileNotFoundError(f"Font file not found at '{PERSIAN_FONT_PATH}'. Please upload it to your Space.")
109
 
110
+ target_width = overlay_width * 0.90
111
+ target_height = overlay_height * 0.90
112
+
113
+ # ### NEW ###: This is the key change! We start the font size based on the original text's measured height.
114
+ # The 0.95 multiplier accounts for typical font padding, giving a closer visual match.
115
+ font_size = int(average_original_height * 0.95)
116
+
117
+ final_wrapped_lines = []
118
+
119
+ # This loop now starts with an intelligent font size and only shrinks if the wrapped
120
+ # text is too tall for the bounding box (a necessary fallback).
121
+ while font_size > 10:
122
+ font = ImageFont.truetype(PERSIAN_FONT_PATH, font_size)
123
+ words = text_to_overlay.split()
124
+ if not words: break
125
+
126
+ raw_lines = []
127
+ current_line = ""
128
+ for word in words:
129
+ test_line = (current_line + " " + word).strip()
130
+ reshaped_test_line = arabic_reshaper.reshape(test_line)
131
+ line_width = draw.textbbox((0, 0), reshaped_test_line, font=font)[2]
132
+
133
+ if line_width <= target_width:
134
+ current_line = test_line
135
+ else:
136
+ raw_lines.append(current_line)
137
+ current_line = word
138
+ raw_lines.append(current_line)
139
+
140
+ line_spacing = font_size * 0.3
141
+ reshaped_for_height_calc = [arabic_reshaper.reshape(l) for l in raw_lines]
142
+ line_heights = [draw.textbbox((0,0), l, font=font)[3] - draw.textbbox((0,0), l, font=font)[1] for l in reshaped_for_height_calc]
143
+ total_height = sum(line_heights) + (len(raw_lines) - 1) * line_spacing
144
+
145
+ if total_height <= target_height:
146
+ final_wrapped_lines = raw_lines
147
+ break
148
+ else:
149
+ font_size -= 2 # Shrink font and try again if it doesn't fit
150
 
151
+ if not final_wrapped_lines:
152
+ final_wrapped_lines = [text_to_overlay]
153
 
154
+ final_font = ImageFont.truetype(PERSIAN_FONT_PATH, font_size)
155
+ line_spacing = font_size * 0.3
156
+ final_reshaped_lines = [arabic_reshaper.reshape(l) for l in final_wrapped_lines]
157
+ line_heights = [draw.textbbox((0,0), l, font=final_font)[3] - draw.textbbox((0,0), l, font=final_font)[1] for l in final_reshaped_lines]
158
+ total_text_height = sum(line_heights) + (len(final_reshaped_lines) - 1) * line_spacing
 
 
159
 
160
+ y_start = (overlay_height - total_text_height) / 2
161
+ current_y = y_start
162
 
163
+ for i, reshaped_line in enumerate(final_reshaped_lines):
164
+ x_center = overlay_width / 2
165
+ line_y_center = current_y + line_heights[i] / 2
166
+
167
+ draw.text((x_center + 1, line_y_center + 1), reshaped_line, font=final_font, fill=(0, 0, 0, 180), anchor="mm")
168
+ draw.text((x_center, line_y_center), reshaped_line, font=final_font, fill=(255, 255, 255, 255), anchor="mm")
169
 
170
+ current_y += line_heights[i] + line_spacing
 
171
 
172
+ return overlay_layer, overlay_box
 
173
 
 
 
 
 
 
 
 
174
 
175
+ # --- MAIN VIDEO PROCESSING PIPELINE ---
176
+ def process_video(video_path, progress=gr.Progress()):
177
+ if video_path is None: raise gr.Error("Please upload a video file first.")
178
 
179
+ progress(0, desc="Loading Video & Analyzing...")
180
+ cap = cv2.VideoCapture(video_path)
181
+ if not cap.isOpened(): raise gr.Error("Could not open video file.")
 
182
 
183
+ frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
184
+ frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
185
+ fps = cap.get(cv2.CAP_PROP_FPS)
186
+ total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
 
 
187
 
188
+ cap.set(cv2.CAP_PROP_POS_FRAMES, total_frames // 2)
189
+ ret, middle_frame_bgr = cap.read()
190
+ if not ret: raise gr.Error("Could not read middle frame.")
191
+ middle_frame_rgb_pil = Image.fromarray(cv2.cvtColor(middle_frame_bgr, cv2.COLOR_BGR2RGB))
 
 
 
192
 
193
+ progress(0.2, desc="Detecting & Measuring Text (EasyOCR)...")
194
+ # ### NEW ###: Capture the average_original_height from our updated function.
195
+ extracted_text, bbox, avg_height = extract_text_and_bbox(middle_frame_rgb_pil)
196
+ if bbox is None: raise gr.Error(extracted_text)
197
 
198
+ progress(0.4, desc="Translating Text (Gemini API)...")
199
+ translated_text = translate_text_gemini(extracted_text)
200
+ if "Error" in translated_text: raise gr.Error(translated_text)
 
 
 
 
 
 
 
201
 
202
+ progress(0.5, desc="Rendering Translated Text Overlay...")
203
+ # ### NEW ###: Pass the measured average height to the rendering function.
204
+ overlay_stamp_pil, overlay_position_box = render_translated_overlay(middle_frame_rgb_pil, translated_text, bbox, avg_height)
205
+
206
+ overlay_stamp_cv = cv2.cvtColor(np.array(overlay_stamp_pil), cv2.COLOR_RGBA2BGRA)
 
207
 
208
+ timestamp = int(time.time())
209
+ temp_silent_path = f"temp_silent_{timestamp}.mp4"
210
+ final_output_path = f"translated_video_{timestamp}.mp4"
211
 
212
+ progress(0.6, desc="Composing Silent Video with Overlay...")
213
+ fourcc = cv2.VideoWriter_fourcc(*'mp4v')
214
+ out = cv2.VideoWriter(temp_silent_path, fourcc, fps, (frame_width, frame_height))
 
 
215
 
216
+ cap.set(cv2.CAP_PROP_POS_FRAMES, 0)
217
+ frame_idx = 0
218
+ x_min, y_min, x_max, y_max = overlay_position_box
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
219
 
220
+ while True:
221
+ ret, frame = cap.read()
222
+ if not ret: break
223
 
224
+ roi = frame[y_min:y_max, x_min:x_max]
225
+ alpha = overlay_stamp_cv[:, :, 3] / 255.0
226
+ alpha_mask = cv2.merge([alpha, alpha, alpha])
227
+ blended_roi = (roi.astype(float) * (1.0 - alpha_mask) + overlay_stamp_cv[:, :, :3].astype(float) * alpha_mask)
228
+ frame[y_min:y_max, x_min:x_max] = blended_roi.astype(np.uint8)
229
+
230
+ out.write(frame)
231
+ frame_idx += 1
232
+ progress(0.6 + (0.3 * frame_idx / total_frames), desc=f"Processing frame {frame_idx}/{total_frames}")
233
+
234
+ cap.release(); out.release()
235
+
236
+ progress(0.95, desc="Merging Audio and Applying Fade (ffmpeg)...")
237
+ try:
238
+ input_video = ffmpeg.input(temp_silent_path)
239
+ input_audio = ffmpeg.input(video_path).audio
240
+
241
+ (ffmpeg.output(
242
+ input_video.video.filter('fade', type='in', start_time=0, duration=FADE_IN_DURATION_SECONDS),
243
+ input_audio, final_output_path, vcodec='libx264', acodec='copy', shortest=None
244
+ ).run(overwrite_output=True, quiet=True))
245
+ except ffmpeg.Error as e:
246
+ print('ffmpeg stdout:', e.stdout.decode('utf8', errors='ignore'))
247
+ print('ffmpeg stderr:', e.stderr.decode('utf8', errors='ignore'))
248
+ raise gr.Error(f"ffmpeg error: {e.stderr.decode('utf8', errors='ignore')}")
249
  finally:
250
+ if os.path.exists(temp_silent_path): os.remove(temp_silent_path)
251
+
252
+ progress(1, desc="Done!")
253
+ return final_output_path
254
+
255
+ # --- GRADIO INTERFACE ---
256
+ with gr.Blocks(theme=gr.themes.Soft(), title="Persian Video Quote Translator") as demo:
257
+ gr.Markdown("# 🎬 Persian Video Quote Translator")
258
+ gr.Markdown("Upload a short video containing English text. The app will detect the text, replace it with a poetic Persian translation, and preserve the original audio and video duration.")
 
 
 
 
 
 
259
  with gr.Row():
260
+ video_input = gr.Video(label="Upload Video")
261
+ video_output = gr.Video(label="Translated Video Output")
262
+ translate_button = gr.Button("Translate Video", variant="primary")
263
+
264
+ translate_button.click(fn=process_video, inputs=[video_input], outputs=[video_output])
265
+
266
+ gr.Markdown("---")
267
+ gr.Markdown("### How it works:\n1. It analyzes the middle frame to **measure the original text's height** and find its location.\n2. It uses the Gemini API to get a high-quality, poetic Persian translation.\n3. It renders the Persian text at a size that **matches the original**, wrapping it smartly to fit.\n4. It composites this new text overlay onto every frame of the video.\n5. Finally, it uses `ffmpeg` to merge the new video with the **original audio** and add a 1-second fade-in effect.")
 
 
 
 
 
 
 
 
 
 
 
268
 
269
  if __name__ == "__main__":
270
  demo.launch(debug=True)
271
+