kavehtaheri commited on
Commit
2194011
·
verified ·
1 Parent(s): 24d53ed

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +170 -333
app.py CHANGED
@@ -11,18 +11,18 @@ from bidi.algorithm import get_display
11
  import os
12
  import time
13
  import ffmpeg
14
- import tempfile
15
- import shutil
16
 
17
  # --- CONFIGURATION ---
18
- API_KEY ="AIzaSyCu-tb3BRDIJjUt6G5ccWmrR51tOY0VZd4"
19
- PERSIAN_FONT_PATH = "Vazir.ttf"
 
 
20
  FADE_IN_DURATION_SECONDS = 1.0
21
 
22
  # --- GLOBAL INITIALIZATION ---
23
  reader = None
24
-
25
  def initialize_reader():
 
26
  global reader
27
  if reader is None:
28
  print("Loading EasyOCR model...")
@@ -30,106 +30,54 @@ def initialize_reader():
30
  print("EasyOCR model loaded successfully!")
31
  return reader
32
 
33
- # --- CORE FUNCTIONS ---
34
  def extract_text_and_bbox(image: Image.Image):
35
- """Extract text and calculate consolidated bounding box"""
36
- if image is None:
37
- return "Please upload an image first.", None
38
-
39
- try:
40
- ocr_reader = initialize_reader()
41
- img_array = np.array(image)
42
- results = ocr_reader.readtext(img_array)
43
-
44
- if not results:
45
- return "No text detected in the image.", None
46
-
47
- min_x, min_y = float('inf'), float('inf')
48
- max_x, max_y = float('-inf'), float('-inf')
49
-
50
- text_parts = []
51
- for (bbox, text, prob) in results:
52
- text_parts.append(text)
53
- (tl, tr, br, bl) = bbox
54
- min_x = min(min_x, tl[0], bl[0])
55
- min_y = min(min_y, tl[1], tr[1])
56
- max_x = max(max_x, tr[0], br[0])
57
- max_y = max(max_y, bl[1], br[1])
58
-
59
- extracted_text = ' '.join(text_parts)
60
- consolidated_bbox = (int(min_x), int(min_y), int(max_x), int(max_y))
61
 
62
- return extracted_text, consolidated_bbox
63
-
64
- except Exception as e:
65
- return f"Error processing image with OCR: {str(e)}", None
66
 
67
  def translate_text_gemini(text: str) -> str:
68
- """Translate text to Persian using Gemini API"""
69
- if not API_KEY:
70
- raise gr.Error("GEMINI_API_KEY is not set.")
71
-
72
- if not text or "No text" in text or "Error" in text or "Please upload" in text:
73
  return "No valid text to translate."
74
-
75
  try:
76
  genai.configure(api_key=API_KEY)
77
  model = genai.GenerativeModel('gemini-1.5-flash')
78
- prompt = f"""Translate the following English quotes into Persian. The translation should be:
79
- - Colloquial and natural
80
- - Poetic and meaningful
81
- - Concise (under 20 words)
82
- - Preserving the original meaning and tone
83
- - Using proper Persian grammar
84
-
85
- Provide only the translated Persian text. Quotes: [{text}]"""
86
-
87
  response = model.generate_content(prompt)
88
  return response.text.strip()
89
  except Exception as e:
90
  return f"Error during translation with Gemini: {str(e)}"
91
 
92
- def wrap_persian_text_properly(text, font, max_width, draw):
93
- """Properly wrap Persian text maintaining RTL flow"""
94
- words = text.split()
95
- lines = []
96
- current_line_words = []
97
-
98
- for word in words:
99
- # Test with current line + new word
100
- test_words = current_line_words + [word]
101
- test_line = ' '.join(test_words)
102
-
103
- # Process this test line for RTL to get actual display width
104
- try:
105
- reshaped_test = arabic_reshaper.reshape(test_line)
106
- display_test = get_display(reshaped_test)
107
- test_width = draw.textbbox((0, 0), display_test, font=font)[2]
108
- except:
109
- # Fallback if RTL processing fails
110
- test_width = draw.textbbox((0, 0), test_line, font=font)[2]
111
-
112
- if test_width <= max_width:
113
- current_line_words.append(word)
114
- else:
115
- # Save current line and start new one
116
- if current_line_words:
117
- lines.append(' '.join(current_line_words))
118
- current_line_words = [word]
119
-
120
- # Don't forget the last line
121
- if current_line_words:
122
- lines.append(' '.join(current_line_words))
123
-
124
- return lines
125
-
126
- def render_translated_overlay(original_image: Image.Image, text_to_overlay: str, bbox: tuple) -> tuple:
127
- """Render Persian text overlay with proper RTL support"""
128
-
129
- # Check for font file
130
- if not os.path.exists(PERSIAN_FONT_PATH):
131
- raise FileNotFoundError(f"Font file not found at '{PERSIAN_FONT_PATH}'. Please ensure Vazir.ttf is in the repository.")
132
-
133
  padding = 15
134
  overlay_box = (
135
  max(0, bbox[0] - padding),
@@ -137,232 +85,166 @@ def render_translated_overlay(original_image: Image.Image, text_to_overlay: str,
137
  min(original_image.width, bbox[2] + padding),
138
  min(original_image.height, bbox[3] + padding)
139
  )
140
-
141
  overlay_width = overlay_box[2] - overlay_box[0]
142
  overlay_height = overlay_box[3] - overlay_box[1]
143
-
144
- # Sample background color
145
  try:
 
146
  sample_x = max(0, int(overlay_box[0]) - 5)
147
  sample_y = int((overlay_box[1] + overlay_box[3]) / 2)
148
  bg_color = original_image.getpixel((sample_x, sample_y))
149
  except (ValueError, IndexError):
150
- bg_color = (25, 25, 25, 255)
151
-
152
  overlay_layer = Image.new("RGBA", (overlay_width, overlay_height), bg_color)
153
  draw = ImageDraw.Draw(overlay_layer)
154
-
 
 
 
 
 
155
  target_width = overlay_width * 0.90
 
156
  font_size = 100
157
  final_wrapped_lines = []
158
-
159
- # Find optimal font size
160
  while font_size > 10:
161
  font = ImageFont.truetype(PERSIAN_FONT_PATH, font_size)
162
-
163
- # Wrap text with current font size
164
- wrapped_lines = wrap_persian_text_properly(text_to_overlay, font, target_width, draw)
165
-
166
- # Calculate total height needed
167
- total_height = 0
168
- for line in wrapped_lines:
169
- try:
170
- # Process each line for RTL to get accurate height
171
- reshaped_line = arabic_reshaper.reshape(line)
172
- display_line = get_display(reshaped_line)
173
- line_bbox = draw.textbbox((0, 0), display_line, font=font)
174
- line_height = line_bbox[3] - line_bbox[1]
175
- total_height += line_height
176
- except:
177
- # Fallback height calculation
178
- line_bbox = draw.textbbox((0, 0), line, font=font)
179
- line_height = line_bbox[3] - line_bbox[1]
180
- total_height += line_height
181
-
182
- # Add spacing between lines
183
- total_height += (len(wrapped_lines) - 1) * (font_size * 0.3)
184
-
185
- if total_height <= overlay_height * 0.9:
186
- final_wrapped_lines = wrapped_lines
187
  break
188
  else:
189
  font_size -= 2
190
-
191
  if not final_wrapped_lines:
192
- final_wrapped_lines = [text_to_overlay]
193
-
194
- # Render text with proper RTL processing
195
  final_font = ImageFont.truetype(PERSIAN_FONT_PATH, font_size)
196
  line_spacing = font_size * 0.3
197
-
198
- # Process each line for RTL and calculate positions
199
- processed_lines = []
200
- line_heights = []
201
-
202
- for line in final_wrapped_lines:
203
- try:
204
- # CRITICAL: Process each line individually for RTL
205
- reshaped_line = arabic_reshaper.reshape(line)
206
- display_line = get_display(reshaped_line)
207
- processed_lines.append(display_line)
208
-
209
- line_bbox = draw.textbbox((0, 0), display_line, font=final_font)
210
- line_height = line_bbox[3] - line_bbox[1]
211
- line_heights.append(line_height)
212
- except Exception as e:
213
- print(f"RTL processing failed for line '{line}': {e}")
214
- # Fallback to original line
215
- processed_lines.append(line)
216
- line_bbox = draw.textbbox((0, 0), line, font=final_font)
217
- line_height = line_bbox[3] - line_bbox[1]
218
- line_heights.append(line_height)
219
-
220
- total_text_height = sum(line_heights) + (len(processed_lines) - 1) * line_spacing
221
  y_start = (overlay_height - total_text_height) / 2
222
-
223
- # Draw each line
224
  current_y = y_start
225
- for i, display_line in enumerate(processed_lines):
226
- # Calculate line width and center position
227
- line_bbox = draw.textbbox((0, 0), display_line, font=final_font)
228
- line_width = line_bbox[2] - line_bbox[0]
229
- x_position = (overlay_width - line_width) / 2
230
-
231
- # Draw shadow for better readability
232
- draw.text((x_position + 1, current_y + 1), display_line, font=final_font, fill=(0, 0, 0, 180))
233
- # Draw main text
234
- draw.text((x_position, current_y), display_line, font=final_font, fill=(255, 255, 255, 255))
235
 
 
 
 
 
 
 
236
  current_y += line_heights[i] + line_spacing
237
-
238
- return overlay_layer, overlay_box
239
 
240
- def process_image(image):
241
- """Process image: detect, translate, and overlay text"""
242
- if image is None:
243
- return "Please upload an image.", "Translation will appear here.", None
244
-
245
- # Extract text
246
- extracted_text, bbox = extract_text_and_bbox(image)
247
- if bbox is None:
248
- return extracted_text, "No text to translate.", None
249
-
250
- # Translate text
251
- translated_text = translate_text_gemini(extracted_text)
252
- if "Error" in translated_text:
253
- return extracted_text, translated_text, None
254
-
255
- # Create overlay
256
- overlay_layer, overlay_box = render_translated_overlay(image, translated_text, bbox)
257
-
258
- # Apply overlay to image
259
- image_copy = image.copy().convert("RGBA")
260
-
261
- # Create background rectangle
262
- draw = ImageDraw.Draw(image_copy)
263
- draw.rectangle(overlay_box, fill=image.getpixel((overlay_box[0], overlay_box[1])))
264
-
265
- # Paste overlay
266
- image_copy.paste(overlay_layer, (overlay_box[0], overlay_box[1]), overlay_layer)
267
-
268
- return extracted_text, translated_text, image_copy.convert("RGB")
269
 
 
270
  def process_video(video_path, progress=gr.Progress()):
271
- """Process video: detect text in middle frame, translate, and overlay on all frames"""
272
- if video_path is None:
273
- raise gr.Error("Please upload a video file first.")
274
-
275
  progress(0, desc="Loading Video & Analyzing...")
276
  cap = cv2.VideoCapture(video_path)
277
- if not cap.isOpened():
278
- raise gr.Error("Could not open video file.")
279
-
280
- # Get video properties
281
  frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
282
  frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
283
  fps = cap.get(cv2.CAP_PROP_FPS)
284
  total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
285
-
286
- # Extract middle frame for text detection
287
  cap.set(cv2.CAP_PROP_POS_FRAMES, total_frames // 2)
288
  ret, middle_frame_bgr = cap.read()
289
- if not ret:
290
- raise gr.Error("Could not read middle frame.")
291
-
292
  middle_frame_rgb_pil = Image.fromarray(cv2.cvtColor(middle_frame_bgr, cv2.COLOR_BGR2RGB))
293
-
294
  progress(0.2, desc="Detecting Text (EasyOCR)...")
295
  extracted_text, bbox = extract_text_and_bbox(middle_frame_rgb_pil)
296
- if bbox is None:
297
- raise gr.Error(extracted_text)
298
-
299
  progress(0.4, desc="Translating Text (Gemini API)...")
300
  translated_text = translate_text_gemini(extracted_text)
301
- if "Error" in translated_text:
302
- raise gr.Error(translated_text)
303
-
304
  progress(0.5, desc="Rendering Translated Text Overlay...")
305
  overlay_stamp_pil, overlay_position_box = render_translated_overlay(middle_frame_rgb_pil, translated_text, bbox)
 
306
  overlay_stamp_cv = cv2.cvtColor(np.array(overlay_stamp_pil), cv2.COLOR_RGBA2BGRA)
307
-
308
- # Create temporary files
309
  timestamp = int(time.time())
310
- temp_dir = tempfile.mkdtemp()
311
- temp_silent_path = os.path.join(temp_dir, f"temp_silent_{timestamp}.mp4")
312
  final_output_path = f"translated_video_{timestamp}.mp4"
313
-
314
- try:
315
- progress(0.6, desc="Composing Silent Video with Overlay...")
316
- fourcc = cv2.VideoWriter_fourcc(*'mp4v')
317
- out = cv2.VideoWriter(temp_silent_path, fourcc, fps, (frame_width, frame_height))
318
-
319
- cap.set(cv2.CAP_PROP_POS_FRAMES, 0)
320
- frame_idx = 0
321
- x_min, y_min, x_max, y_max = overlay_position_box
322
-
323
- while True:
324
- ret, frame = cap.read()
325
- if not ret:
326
- break
327
-
328
- # Extract ROI and resize overlay if needed
329
- roi = frame[y_min:y_max, x_min:x_max]
330
- stamp_h, stamp_w, _ = overlay_stamp_cv.shape
331
- roi_h, roi_w, _ = roi.shape
332
-
333
- if stamp_h != roi_h or stamp_w != roi_w:
334
- overlay_resized = cv2.resize(overlay_stamp_cv, (roi_w, roi_h))
335
- else:
336
- overlay_resized = overlay_stamp_cv
337
-
338
- # Alpha blend
339
- alpha = overlay_resized[:, :, 3] / 255.0
340
- alpha_mask = cv2.merge([alpha, alpha, alpha])
341
-
342
- blended_roi = (roi.astype(float) * (1.0 - alpha_mask) +
343
- overlay_resized[:, :, :3].astype(float) * alpha_mask)
344
-
345
- frame[y_min:y_max, x_min:x_max] = blended_roi.astype(np.uint8)
346
-
347
- out.write(frame)
348
- frame_idx += 1
349
- progress(0.6 + (0.3 * frame_idx / total_frames),
350
- desc=f"Processing frame {frame_idx}/{total_frames}")
351
-
352
- cap.release()
353
- out.release()
354
 
355
- progress(0.95, desc="Merging Audio and Applying Fade (ffmpeg)...")
 
 
356
 
357
- # Use ffmpeg to merge video and audio
 
 
 
 
 
 
 
 
 
 
 
358
  input_video = ffmpeg.input(temp_silent_path)
359
- input_audio = ffmpeg.input(video_path)
360
-
361
  (
362
  ffmpeg
363
  .output(
364
  input_video.video.filter('fade', type='in', start_time=0, duration=FADE_IN_DURATION_SECONDS),
365
- input_audio.audio,
366
  final_output_path,
367
  vcodec='libx264',
368
  acodec='copy',
@@ -370,79 +252,34 @@ def process_video(video_path, progress=gr.Progress()):
370
  )
371
  .run(overwrite_output=True, quiet=True)
372
  )
373
-
374
- progress(1, desc="Done!")
375
- return final_output_path
376
-
377
  except ffmpeg.Error as e:
378
- print('ffmpeg stdout:', e.stdout.decode('utf8') if e.stdout else 'None')
379
- print('ffmpeg stderr:', e.stderr.decode('utf8') if e.stderr else 'None')
380
- raise gr.Error(f"ffmpeg error: {e.stderr.decode('utf8') if e.stderr else 'Unknown error'}")
381
-
382
  finally:
383
- # Clean up temporary directory
384
- if os.path.exists(temp_dir):
385
- shutil.rmtree(temp_dir)
 
 
386
 
387
  # --- GRADIO INTERFACE ---
388
- with gr.Blocks(theme=gr.themes.Soft(), title="Persian Text Translator") as demo:
389
- gr.Markdown("# 🎬📝 Persian Text Translator")
390
- gr.Markdown("Upload an image or video with English text. The app will detect, translate to Persian, and overlay the text back properly.")
391
-
392
- with gr.Tabs():
393
- with gr.TabItem("📝 Image Translation"):
394
- gr.Markdown("Upload an image with English text for Persian translation overlay.")
395
-
396
- with gr.Row():
397
- with gr.Column(scale=1):
398
- image_input = gr.Image(label="Upload Quote Image", type="pil", sources=["upload", "clipboard"])
399
- img_text_output = gr.Textbox(label="Extracted English Text", lines=3, show_copy_button=True)
400
- img_translated_output = gr.Textbox(label="Persian Translation", lines=3, show_copy_button=True)
401
-
402
- with gr.Column(scale=1):
403
- image_output = gr.Image(label="Translated Image Output", type="pil")
404
-
405
- image_input.change(
406
- fn=process_image,
407
- inputs=[image_input],
408
- outputs=[img_text_output, img_translated_output, image_output]
409
- )
410
-
411
- with gr.TabItem("🎬 Video Translation"):
412
- gr.Markdown("Upload a video with English text. The app will preserve audio and add Persian translation overlay.")
413
-
414
- with gr.Row():
415
- video_input = gr.Video(label="Upload Video")
416
- video_output = gr.Video(label="Translated Video Output")
417
-
418
- translate_button = gr.Button("Translate Video", variant="primary")
419
- translate_button.click(
420
- fn=process_video,
421
- inputs=[video_input],
422
- outputs=[video_output]
423
- )
424
 
425
  gr.Markdown("---")
426
- gr.Markdown("""
427
- ### How it works:
428
- **Image Mode:**
429
- 1. Detects English text using OCR
430
- 2. Translates to Persian using Gemini AI
431
- 3. Overlays properly formatted RTL Persian text
432
-
433
- **Video Mode:**
434
- 1. Analyzes middle frame to detect text location
435
- 2. Translates English text to Persian
436
- 3. Applies Persian overlay to all frames with background
437
- 4. Merges with original audio and adds fade-in effect
438
-
439
- **Features:**
440
- - Proper RTL (Right-to-Left) Persian text rendering
441
- - Automatic font sizing to fit available space
442
- - Text wrapping for longer translations
443
- - Background color sampling for natural overlay
444
- - Audio preservation in video mode
445
- """)
446
 
447
  if __name__ == "__main__":
448
  demo.launch(debug=True)
 
11
  import os
12
  import time
13
  import ffmpeg
 
 
14
 
15
  # --- CONFIGURATION ---
16
+ # IMPORTANT: For deployment on Hugging Face, set this as a "Secret".
17
+ # For local testing, you can paste your key here.
18
+ API_KEY = "AIzaSyCu-tb3BRDIJjUt6G5ccWmrR51tOY0VZd4"
19
+ PERSIAN_FONT_PATH = "Vazir.ttf" # Make sure this font file is in your repository
20
  FADE_IN_DURATION_SECONDS = 1.0
21
 
22
  # --- GLOBAL INITIALIZATION ---
23
  reader = None
 
24
  def initialize_reader():
25
+ """Initializes the EasyOCR reader if it hasn't been already."""
26
  global reader
27
  if reader is None:
28
  print("Loading EasyOCR model...")
 
30
  print("EasyOCR model loaded successfully!")
31
  return reader
32
 
33
+ # --- CORE PROCESSING FUNCTIONS ---
34
  def extract_text_and_bbox(image: Image.Image):
35
+ """Extracts text and a consolidated bounding box from a PIL Image."""
36
+ ocr_reader = initialize_reader()
37
+ img_array = np.array(image)
38
+ results = ocr_reader.readtext(img_array)
39
+ if not results: return "No text detected in the image.", None
40
+
41
+ min_x, min_y = float('inf'), float('inf')
42
+ max_x, max_y = float('-inf'), float('-inf')
43
+ text_parts = []
44
+
45
+ for (bbox, text, prob) in results:
46
+ text_parts.append(text)
47
+ (tl, tr, br, bl) = bbox
48
+ min_x = min(min_x, tl[0], bl[0])
49
+ min_y = min(min_y, tl[1], tr[1])
50
+ max_x = max(max_x, tr[0], br[0])
51
+ max_y = max(max_y, bl[1], br[1])
 
 
 
 
 
 
 
 
 
52
 
53
+ extracted_text = ' '.join(text_parts)
54
+ consolidated_bbox = (int(min_x), int(min_y), int(max_x), int(max_y))
55
+ return extracted_text, consolidated_bbox
 
56
 
57
  def translate_text_gemini(text: str) -> str:
58
+ """Translates text to colloquial Persian using the Gemini API."""
59
+ if not API_KEY or "YOUR_GEMINI_API_KEY_HERE" in API_KEY:
60
+ raise gr.Error("GEMINI_API_KEY is not set. Please add it as a Secret in your Hugging Face Space.")
61
+ if not text or "No text" in text:
 
62
  return "No valid text to translate."
63
+
64
  try:
65
  genai.configure(api_key=API_KEY)
66
  model = genai.GenerativeModel('gemini-1.5-flash')
67
+ prompt = f"Translate the following English quotes into Persian. The translation should be colloquial, poetic, concise, and meaningful. Preserve the original message and tone. Avoid literal translations. Provide only the translated Persian text. Quotes: [{text}]"
 
 
 
 
 
 
 
 
68
  response = model.generate_content(prompt)
69
  return response.text.strip()
70
  except Exception as e:
71
  return f"Error during translation with Gemini: {str(e)}"
72
 
73
+ # ### --- THE NEW AND CORRECTED TEXT OVERLAY FUNCTION --- ###
74
+ # This function is now based on the superior logic from your textoverimage.txt script.
75
+ def render_translated_overlay(original_image: Image.Image, text_to_overlay: str, bbox: tuple) -> Image.Image:
76
+ """
77
+ Creates an overlay layer with correctly rendered, wrapped Persian text.
78
+ This function erases the background area defined by the bbox and draws new text.
79
+ """
80
+ # 1. Define the area to work with, adding padding
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
81
  padding = 15
82
  overlay_box = (
83
  max(0, bbox[0] - padding),
 
85
  min(original_image.width, bbox[2] + padding),
86
  min(original_image.height, bbox[3] + padding)
87
  )
 
88
  overlay_width = overlay_box[2] - overlay_box[0]
89
  overlay_height = overlay_box[3] - overlay_box[1]
90
+
91
+ # 2. Create the background layer by sampling a color from the original image
92
  try:
93
+ # Sample color from just outside the original text box to get a clean background
94
  sample_x = max(0, int(overlay_box[0]) - 5)
95
  sample_y = int((overlay_box[1] + overlay_box[3]) / 2)
96
  bg_color = original_image.getpixel((sample_x, sample_y))
97
  except (ValueError, IndexError):
98
+ bg_color = (25, 25, 25) # Fallback color
99
+
100
  overlay_layer = Image.new("RGBA", (overlay_width, overlay_height), bg_color)
101
  draw = ImageDraw.Draw(overlay_layer)
102
+
103
+ # 3. Check for Font File
104
+ if not os.path.exists(PERSIAN_FONT_PATH):
105
+ raise FileNotFoundError(f"Font file not found at '{PERSIAN_FONT_PATH}'. Please upload it to your Space.")
106
+
107
+ # 4. Dynamically find the best font size and wrap the text
108
  target_width = overlay_width * 0.90
109
+ target_height = overlay_height * 0.90
110
  font_size = 100
111
  final_wrapped_lines = []
112
+
 
113
  while font_size > 10:
114
  font = ImageFont.truetype(PERSIAN_FONT_PATH, font_size)
115
+ words = text_to_overlay.split()
116
+ if not words: break
117
+
118
+ raw_lines = []
119
+ current_line = ""
120
+ for word in words:
121
+ test_line = (current_line + " " + word).strip()
122
+ # To measure width correctly, we MUST reshape it first. This is the key.
123
+ reshaped_test_line = get_display(arabic_reshaper.reshape(test_line))
124
+ line_width = draw.textbbox((0, 0), reshaped_test_line, font=font)[2]
125
+
126
+ if line_width <= target_width:
127
+ current_line = test_line
128
+ else:
129
+ raw_lines.append(current_line)
130
+ current_line = word
131
+ raw_lines.append(current_line)
132
+
133
+ line_spacing = font_size * 0.3
134
+ reshaped_for_height_calc = [get_display(arabic_reshaper.reshape(l)) for l in raw_lines]
135
+ line_heights = [draw.textbbox((0,0), l, font=font)[3] - draw.textbbox((0,0), l, font=font)[1] for l in reshaped_for_height_calc]
136
+ total_height = sum(line_heights) + (len(raw_lines) - 1) * line_spacing
137
+
138
+ if total_height <= target_height:
139
+ final_wrapped_lines = raw_lines
140
  break
141
  else:
142
  font_size -= 2
143
+
144
  if not final_wrapped_lines:
145
+ final_wrapped_lines = [text_to_overlay] # Fallback
146
+
147
+ # 5. Draw the final, wrapped, and correctly shaped text
148
  final_font = ImageFont.truetype(PERSIAN_FONT_PATH, font_size)
149
  line_spacing = font_size * 0.3
150
+
151
+ # Reshape final lines for drawing and calculate total height
152
+ final_display_lines = [get_display(arabic_reshaper.reshape(l)) for l in final_wrapped_lines]
153
+ line_heights = [draw.textbbox((0,0), l, font=final_font)[3] - draw.textbbox((0,0), l, font=final_font)[1] for l in final_display_lines]
154
+ total_text_height = sum(line_heights) + (len(final_display_lines) - 1) * line_spacing
155
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
156
  y_start = (overlay_height - total_text_height) / 2
 
 
157
  current_y = y_start
158
+
159
+ for i, display_line in enumerate(final_display_lines):
160
+ x_center = overlay_width / 2
161
+ line_y_center = current_y + line_heights[i] / 2
 
 
 
 
 
 
162
 
163
+ # Use anchor="mm" to perfectly center the text block horizontally and vertically
164
+ # Draw a subtle shadow for better readability
165
+ draw.text((x_center + 1, line_y_center + 1), display_line, font=final_font, fill=(0, 0, 0, 180), anchor="mm")
166
+ # Draw the main text
167
+ draw.text((x_center, line_y_center), display_line, font=final_font, fill=(255, 255, 255, 255), anchor="mm")
168
+
169
  current_y += line_heights[i] + line_spacing
 
 
170
 
171
+ return overlay_layer, overlay_box
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
172
 
173
+ # --- MAIN VIDEO PROCESSING PIPELINE ---
174
  def process_video(video_path, progress=gr.Progress()):
175
+ if video_path is None: raise gr.Error("Please upload a video file first.")
176
+
 
 
177
  progress(0, desc="Loading Video & Analyzing...")
178
  cap = cv2.VideoCapture(video_path)
179
+ if not cap.isOpened(): raise gr.Error("Could not open video file.")
180
+
 
 
181
  frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
182
  frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
183
  fps = cap.get(cv2.CAP_PROP_FPS)
184
  total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
185
+
186
+ # Analyze the middle frame for text
187
  cap.set(cv2.CAP_PROP_POS_FRAMES, total_frames // 2)
188
  ret, middle_frame_bgr = cap.read()
189
+ if not ret: raise gr.Error("Could not read middle frame.")
 
 
190
  middle_frame_rgb_pil = Image.fromarray(cv2.cvtColor(middle_frame_bgr, cv2.COLOR_BGR2RGB))
191
+
192
  progress(0.2, desc="Detecting Text (EasyOCR)...")
193
  extracted_text, bbox = extract_text_and_bbox(middle_frame_rgb_pil)
194
+ if bbox is None: raise gr.Error(extracted_text)
195
+
 
196
  progress(0.4, desc="Translating Text (Gemini API)...")
197
  translated_text = translate_text_gemini(extracted_text)
198
+ if "Error" in translated_text: raise gr.Error(translated_text)
199
+
 
200
  progress(0.5, desc="Rendering Translated Text Overlay...")
201
  overlay_stamp_pil, overlay_position_box = render_translated_overlay(middle_frame_rgb_pil, translated_text, bbox)
202
+ # Convert the overlay to a format OpenCV can use (BGRA)
203
  overlay_stamp_cv = cv2.cvtColor(np.array(overlay_stamp_pil), cv2.COLOR_RGBA2BGRA)
204
+
 
205
  timestamp = int(time.time())
206
+ temp_silent_path = f"temp_silent_{timestamp}.mp4"
 
207
  final_output_path = f"translated_video_{timestamp}.mp4"
208
+
209
+ progress(0.6, desc="Composing Silent Video with Overlay...")
210
+ fourcc = cv2.VideoWriter_fourcc(*'mp4v')
211
+ out = cv2.VideoWriter(temp_silent_path, fourcc, fps, (frame_width, frame_height))
212
+
213
+ cap.set(cv2.CAP_PROP_POS_FRAMES, 0) # Rewind video to the beginning
214
+ frame_idx = 0
215
+ x_min, y_min, x_max, y_max = overlay_position_box
216
+
217
+ while True:
218
+ ret, frame = cap.read()
219
+ if not ret: break
220
+
221
+ # Define the region of interest (ROI) where the overlay will go
222
+ roi = frame[y_min:y_max, x_min:x_max]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
223
 
224
+ # Simple alpha blending
225
+ alpha = overlay_stamp_cv[:, :, 3] / 255.0
226
+ alpha_mask = cv2.merge([alpha, alpha, alpha])
227
 
228
+ blended_roi = (roi.astype(float) * (1.0 - alpha_mask) + overlay_stamp_cv[:, :, :3].astype(float) * alpha_mask)
229
+ frame[y_min:y_max, x_min:x_max] = blended_roi.astype(np.uint8)
230
+
231
+ out.write(frame)
232
+ frame_idx += 1
233
+ progress(0.6 + (0.3 * frame_idx / total_frames), desc=f"Processing frame {frame_idx}/{total_frames}")
234
+
235
+ cap.release()
236
+ out.release()
237
+
238
+ progress(0.95, desc="Merging Audio and Applying Fade (ffmpeg)...")
239
+ try:
240
  input_video = ffmpeg.input(temp_silent_path)
241
+ input_audio = ffmpeg.input(video_path).audio # Select audio stream only
242
+
243
  (
244
  ffmpeg
245
  .output(
246
  input_video.video.filter('fade', type='in', start_time=0, duration=FADE_IN_DURATION_SECONDS),
247
+ input_audio,
248
  final_output_path,
249
  vcodec='libx264',
250
  acodec='copy',
 
252
  )
253
  .run(overwrite_output=True, quiet=True)
254
  )
 
 
 
 
255
  except ffmpeg.Error as e:
256
+ print('ffmpeg stdout:', e.stdout.decode('utf8', errors='ignore'))
257
+ print('ffmpeg stderr:', e.stderr.decode('utf8', errors='ignore'))
258
+ raise gr.Error(f"ffmpeg error: {e.stderr.decode('utf8', errors='ignore')}")
 
259
  finally:
260
+ if os.path.exists(temp_silent_path):
261
+ os.remove(temp_silent_path)
262
+
263
+ progress(1, desc="Done!")
264
+ return final_output_path
265
 
266
  # --- GRADIO INTERFACE ---
267
+ with gr.Blocks(theme=gr.themes.Soft(), title="Persian Video Quote Translator") as demo:
268
+ gr.Markdown("# 🎬 Persian Video Quote Translator")
269
+ gr.Markdown("Upload a short video containing English text. The app will detect the text, replace it with a poetic Persian translation, and preserve the original audio and video duration.")
270
+ with gr.Row():
271
+ video_input = gr.Video(label="Upload Video")
272
+ video_output = gr.Video(label="Translated Video Output")
273
+ translate_button = gr.Button("Translate Video", variant="primary")
274
+
275
+ translate_button.click(
276
+ fn=process_video,
277
+ inputs=[video_input],
278
+ outputs=[video_output]
279
+ )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
280
 
281
  gr.Markdown("---")
282
+ gr.Markdown("### How it works:\n1. It analyzes the middle frame to find the text and its location.\n2. It uses the Gemini API to get a high-quality, poetic Persian translation.\n3. It renders the Persian text correctly onto a background that matches the original video.\n4. It composites this new text overlay onto every frame of the video.\n5. Finally, it uses `ffmpeg` to merge the new video with the **original audio** and add a 1-second fade-in effect.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
283
 
284
  if __name__ == "__main__":
285
  demo.launch(debug=True)