Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -7,57 +7,42 @@ import numpy as np
|
|
7 |
import google.generativeai as genai
|
8 |
import arabic_reshaper
|
9 |
import os
|
10 |
-
import
|
|
|
|
|
11 |
import tempfile
|
12 |
-
import
|
13 |
-
import
|
14 |
|
15 |
# --- CONFIGURATION ---
|
16 |
-
# IMPORTANT: Replace with your actual Gemini API key
|
17 |
api_key = "AIzaSyAKI92YawOKQ1-HRLmvaryMEWk_y4alJgA"
|
18 |
-
# Ensure the Vazir.ttf font file is in the same directory as this script
|
19 |
PERSIAN_FONT_PATH = "Vazir.ttf"
|
20 |
|
21 |
# --- GLOBAL INITIALIZATION ---
|
22 |
-
# Lazily initialize the OCR reader to avoid loading it on script import
|
23 |
reader = None
|
24 |
-
translation_cache =- The text "Output video paths need to persist outside the temporary directory for Gradio display" seems like a note accidentally included.
|
25 |
-
{}
|
26 |
|
27 |
def initialize_reader():
|
28 |
"""Initializes the EasyOCR reader if it hasn't been already."""
|
29 |
global reader
|
30 |
-
# Output video paths need to persist outside the temporary directory for Gradio display.
|
31 |
-
|
32 |
if reader is None:
|
33 |
print("Loading EasyOCR model... (This may take a moment on first run)")
|
34 |
-
# We only need to detect English, as we are translating from it.
|
35 |
reader = easyocr.Reader(['en'], gpu=False, verbose=False)
|
36 |
print("EasyOCR model loaded successfully!")
|
37 |
return reader
|
38 |
|
39 |
-
# --- CORE FUNCTIONS ---
|
40 |
|
41 |
def extract_text_and_bbox(image: Image.Image):
|
42 |
"""
|
43 |
Extracts text from a PIL Image and calculates a single consolidated
|
44 |
-
bounding box for all text found.
|
45 |
"""
|
46 |
if image is None:
|
47 |
return "Please upload an image first.", None
|
48 |
|
49 |
try:
|
50 |
-
# Resize for faster OCR (max width 640)
|
51 |
-
ocr_width = 640
|
52 |
-
if image.width > ocr_width:
|
53 |
-
scale_factor = ocr_width / image.width
|
54 |
-
ocr_image = image.resize((ocr_width, int(image.height * scale_factor)))
|
55 |
-
else:
|
56 |
-
ocr_image = image
|
57 |
-
scale_factor = 1.0
|
58 |
-
|
59 |
ocr_reader = initialize_reader()
|
60 |
-
img_array = np.array(
|
61 |
results = ocr_reader.readtext(img_array)
|
62 |
|
63 |
if not results:
|
@@ -76,18 +61,13 @@ def extract_text_and_bbox(image: Image.Image):
|
|
76 |
max_y = max(max_y, bl[1], br[1])
|
77 |
|
78 |
extracted_text = ' '.join(text_parts)
|
79 |
-
|
80 |
-
consolidated_bbox = (
|
81 |
-
int(min_x / scale_factor), int(min_y / scale_factor),
|
82 |
-
int(max_x / scale_factor), int(max_y / scale_factor)
|
83 |
-
)
|
84 |
|
85 |
return extracted_text, consolidated_bbox
|
86 |
|
87 |
except Exception as e:
|
88 |
return f"Error processing image with OCR: {str(e)}", None
|
89 |
|
90 |
-
|
91 |
def translate_text_gemini(text: str) -> str:
|
92 |
"""Translates text to colloquial Persian using the Gemini API."""
|
93 |
if not text or "No text" in text or "Error" in text or "Please upload" in text:
|
@@ -96,18 +76,13 @@ def translate_text_gemini(text: str) -> str:
|
|
96 |
try:
|
97 |
genai.configure(api_key=api_key)
|
98 |
model = genai.GenerativeModel('gemini-1.5-flash')
|
99 |
-
prompt =f"Translate the following English quotes into Persian, rephrasing only minimally if absolutely necessary for natural poetic flow, but strictly preserving the original meaning, intent, purpose, and nuances without any alterations or additions that could change the core message. Ensure the Persian versions are concise (under 20 words), deep, touching, poetic, and profound, using idiomatic Persian that evokes wisdom or inspiration while staying faithful to the source. Additionally, guarantee suitable grammar and natural sentence structure in Persian for smooth readability by native speakers, and ensure the translation conveys clear, substantive meaning that stands independently beyond its poetic tone (i.e., the wisdom or insight should be immediately understandable without relying solely on artistry). If the original quote includes an attribution (e.g., author name), incorporate it faithfully in the Persian translation on the last line, formatted similarly (e.g.,
|
100 |
|
101 |
response = model.generate_content(prompt)
|
102 |
-
|
103 |
-
# Strip numbering if present (assuming single quote)
|
104 |
-
if translated.startswith('1. '):
|
105 |
-
translated = translated[3:].strip()
|
106 |
-
return translated
|
107 |
except Exception as e:
|
108 |
return f"Error during translation: {str(e)}"
|
109 |
|
110 |
-
# --- THE NEW AND CORRECTED IMAGE OVERLAY FUNCTION ---
|
111 |
def overlay_text_on_image(original_image: Image.Image, text_to_overlay: str, bbox: tuple) -> Image.Image:
|
112 |
"""
|
113 |
Overlays Persian text onto an image, erasing the content within the given
|
@@ -133,7 +108,7 @@ def overlay_text_on_image(original_image: Image.Image, text_to_overlay: str, bbo
|
|
133 |
sample_y = int((erase_box[1] + erase_box[3]) / 2)
|
134 |
bg_color = image_copy.getpixel((sample_x, sample_y))
|
135 |
except (ValueError, IndexError):
|
136 |
-
bg_color = (255, 255, 255)
|
137 |
|
138 |
draw_erase.rectangle(erase_box, fill=bg_color)
|
139 |
|
@@ -156,9 +131,7 @@ def overlay_text_on_image(original_image: Image.Image, text_to_overlay: str, bbo
|
|
156 |
current_line = ""
|
157 |
for word in words:
|
158 |
test_line = (current_line + " " + word).strip()
|
159 |
-
# To measure width, we MUST reshape it first. This is the key.
|
160 |
reshaped_test_line = arabic_reshaper.reshape(test_line)
|
161 |
-
# Use textbbox for more accurate size calculation
|
162 |
line_width = draw.textbbox((0, 0), reshaped_test_line, font=font)[2]
|
163 |
|
164 |
if line_width <= target_width:
|
@@ -187,7 +160,6 @@ def overlay_text_on_image(original_image: Image.Image, text_to_overlay: str, bbo
|
|
187 |
final_font = ImageFont.truetype(PERSIAN_FONT_PATH, font_size)
|
188 |
line_spacing = font_size * 0.3
|
189 |
|
190 |
-
# Reshape the final lines for drawing
|
191 |
final_reshaped_lines = [arabic_reshaper.reshape(l) for l in final_wrapped_lines]
|
192 |
line_heights = [draw.textbbox((0,0), l, font=final_font)[3] - draw.textbbox((0,0), l, font=final_font)[1] for l in final_reshaped_lines]
|
193 |
total_text_height = sum(line_heights) + (len(final_reshaped_lines) - 1) * line_spacing
|
@@ -199,9 +171,7 @@ def overlay_text_on_image(original_image: Image.Image, text_to_overlay: str, bbo
|
|
199 |
x_center = erase_box[0] + (erase_box[2] - erase_box[0]) / 2
|
200 |
line_y_center = current_y + line_heights[i] / 2
|
201 |
|
202 |
-
# Draw a subtle shadow for better readability
|
203 |
draw.text((x_center + 2, line_y_center + 2), reshaped_line, font=final_font, fill=(0, 0, 0, 180), anchor="mm")
|
204 |
-
# Draw the main text
|
205 |
draw.text((x_center, line_y_center), reshaped_line, font=final_font, fill=(255, 255, 255, 255), anchor="mm")
|
206 |
|
207 |
current_y += line_heights[i] + line_spacing
|
@@ -210,120 +180,288 @@ def overlay_text_on_image(original_image: Image.Image, text_to_overlay: str, bbo
|
|
210 |
out_image = Image.alpha_composite(erase_layer, txt_layer)
|
211 |
return out_image.convert("RGB")
|
212 |
|
|
|
213 |
|
214 |
-
|
215 |
-
|
216 |
-
|
217 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
218 |
return None
|
|
|
|
|
|
|
|
|
|
|
|
|
219 |
|
220 |
-
|
221 |
-
|
222 |
-
|
223 |
-
|
224 |
-
|
225 |
-
|
226 |
-
|
227 |
-
|
228 |
-
|
229 |
-
|
230 |
-
|
231 |
-
|
232 |
-
|
233 |
-
|
234 |
-
|
235 |
-
|
236 |
-
|
237 |
-
|
238 |
-
|
239 |
-
|
240 |
-
|
241 |
-
|
242 |
-
|
243 |
-
|
244 |
-
|
245 |
-
|
246 |
-
|
247 |
-
|
248 |
-
|
249 |
-
|
250 |
-
|
251 |
-
|
252 |
-
|
253 |
-
progress(0.4, desc="Processing frames...")
|
254 |
-
num_frames = len(frames)
|
255 |
-
if num_frames == 0:
|
256 |
-
return None
|
257 |
-
|
258 |
-
prev_translated_text = None
|
259 |
-
prev_bbox = None
|
260 |
-
|
261 |
-
for i, frame_path in enumerate(frames):
|
262 |
-
progress(0.4 + (i / num_frames) * 0.4, desc=f"Processing frame {i+1}/{num_frames}")
|
263 |
|
264 |
-
|
265 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
266 |
|
267 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
268 |
|
269 |
-
|
270 |
-
|
271 |
-
|
272 |
-
|
273 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
274 |
|
275 |
-
#
|
276 |
-
if
|
277 |
-
|
|
|
|
|
278 |
else:
|
279 |
-
|
280 |
-
|
281 |
-
|
282 |
-
|
283 |
-
continue
|
284 |
-
translation_cache[extracted_text] = translated_text
|
285 |
|
286 |
-
#
|
287 |
-
|
288 |
-
# To further optimize, perhaps skip OCR if previous had no text, but for simplicity, keep as is.
|
289 |
|
290 |
-
#
|
291 |
-
|
292 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
293 |
|
294 |
-
|
295 |
-
|
296 |
-
|
297 |
-
|
|
|
298 |
|
299 |
-
|
300 |
-
|
301 |
-
|
|
|
|
|
302 |
|
303 |
-
|
|
|
|
|
|
|
|
|
304 |
|
305 |
-
|
306 |
-
|
307 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
308 |
|
309 |
# --- GRADIO INTERFACE ---
|
310 |
|
311 |
-
with gr.Blocks(title="Persian Quote
|
312 |
-
gr.Markdown("#
|
313 |
-
gr.Markdown("Upload a video with English text. The app will
|
314 |
-
gr.Markdown("**Note:** For best performance on free tier, use short videos (<30s). Longer videos may take time and could reconnect.")
|
315 |
|
316 |
with gr.Row():
|
317 |
with gr.Column(scale=1):
|
318 |
-
video_input = gr.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
319 |
with gr.Column(scale=1):
|
320 |
-
|
321 |
-
|
322 |
-
|
323 |
-
|
324 |
-
|
325 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
326 |
)
|
327 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
328 |
if __name__ == "__main__":
|
329 |
demo.launch()
|
|
|
7 |
import google.generativeai as genai
|
8 |
import arabic_reshaper
|
9 |
import os
|
10 |
+
import cv2
|
11 |
+
from moviepy.editor import *
|
12 |
+
from moviepy.video.fx import resize, fadein, fadeout
|
13 |
import tempfile
|
14 |
+
import math
|
15 |
+
import random
|
16 |
|
17 |
# --- CONFIGURATION ---
|
|
|
18 |
api_key = "AIzaSyAKI92YawOKQ1-HRLmvaryMEWk_y4alJgA"
|
|
|
19 |
PERSIAN_FONT_PATH = "Vazir.ttf"
|
20 |
|
21 |
# --- GLOBAL INITIALIZATION ---
|
|
|
22 |
reader = None
|
|
|
|
|
23 |
|
24 |
def initialize_reader():
|
25 |
"""Initializes the EasyOCR reader if it hasn't been already."""
|
26 |
global reader
|
|
|
|
|
27 |
if reader is None:
|
28 |
print("Loading EasyOCR model... (This may take a moment on first run)")
|
|
|
29 |
reader = easyocr.Reader(['en'], gpu=False, verbose=False)
|
30 |
print("EasyOCR model loaded successfully!")
|
31 |
return reader
|
32 |
|
33 |
+
# --- CORE FUNCTIONS FROM YOUR ORIGINAL CODE ---
|
34 |
|
35 |
def extract_text_and_bbox(image: Image.Image):
|
36 |
"""
|
37 |
Extracts text from a PIL Image and calculates a single consolidated
|
38 |
+
bounding box for all text found.
|
39 |
"""
|
40 |
if image is None:
|
41 |
return "Please upload an image first.", None
|
42 |
|
43 |
try:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
44 |
ocr_reader = initialize_reader()
|
45 |
+
img_array = np.array(image)
|
46 |
results = ocr_reader.readtext(img_array)
|
47 |
|
48 |
if not results:
|
|
|
61 |
max_y = max(max_y, bl[1], br[1])
|
62 |
|
63 |
extracted_text = ' '.join(text_parts)
|
64 |
+
consolidated_bbox = (int(min_x), int(min_y), int(max_x), int(max_y))
|
|
|
|
|
|
|
|
|
65 |
|
66 |
return extracted_text, consolidated_bbox
|
67 |
|
68 |
except Exception as e:
|
69 |
return f"Error processing image with OCR: {str(e)}", None
|
70 |
|
|
|
71 |
def translate_text_gemini(text: str) -> str:
|
72 |
"""Translates text to colloquial Persian using the Gemini API."""
|
73 |
if not text or "No text" in text or "Error" in text or "Please upload" in text:
|
|
|
76 |
try:
|
77 |
genai.configure(api_key=api_key)
|
78 |
model = genai.GenerativeModel('gemini-1.5-flash')
|
79 |
+
prompt = f"Translate the following English quotes into Persian, rephrasing only minimally if absolutely necessary for natural poetic flow, but strictly preserving the original meaning, intent, purpose, and nuances without any alterations or additions that could change the core message. Ensure the Persian versions are concise (under 20 words), deep, touching, poetic, and profound, using idiomatic Persian that evokes wisdom or inspiration while staying faithful to the source. Additionally, guarantee suitable grammar and natural sentence structure in Persian for smooth readability by native speakers, and ensure the translation conveys clear, substantive meaning that stands independently beyond its poetic tone (i.e., the wisdom or insight should be immediately understandable without relying solely on artistry). If the original quote includes an attribution (e.g., author name), incorporate it faithfully in the Persian translation on the last line, formatted similarly (e.g., '- Author Name -' in Persian). Your response must contain ONLY the translated Persian texts in Perso-Arabic script, one per quote, numbered (e.g., 1., 2.) for separation, with no other text, labels, explanations, or information whatsoever Quotes: [{text}]"
|
80 |
|
81 |
response = model.generate_content(prompt)
|
82 |
+
return response.text.strip()
|
|
|
|
|
|
|
|
|
83 |
except Exception as e:
|
84 |
return f"Error during translation: {str(e)}"
|
85 |
|
|
|
86 |
def overlay_text_on_image(original_image: Image.Image, text_to_overlay: str, bbox: tuple) -> Image.Image:
|
87 |
"""
|
88 |
Overlays Persian text onto an image, erasing the content within the given
|
|
|
108 |
sample_y = int((erase_box[1] + erase_box[3]) / 2)
|
109 |
bg_color = image_copy.getpixel((sample_x, sample_y))
|
110 |
except (ValueError, IndexError):
|
111 |
+
bg_color = (255, 255, 255)
|
112 |
|
113 |
draw_erase.rectangle(erase_box, fill=bg_color)
|
114 |
|
|
|
131 |
current_line = ""
|
132 |
for word in words:
|
133 |
test_line = (current_line + " " + word).strip()
|
|
|
134 |
reshaped_test_line = arabic_reshaper.reshape(test_line)
|
|
|
135 |
line_width = draw.textbbox((0, 0), reshaped_test_line, font=font)[2]
|
136 |
|
137 |
if line_width <= target_width:
|
|
|
160 |
final_font = ImageFont.truetype(PERSIAN_FONT_PATH, font_size)
|
161 |
line_spacing = font_size * 0.3
|
162 |
|
|
|
163 |
final_reshaped_lines = [arabic_reshaper.reshape(l) for l in final_wrapped_lines]
|
164 |
line_heights = [draw.textbbox((0,0), l, font=final_font)[3] - draw.textbbox((0,0), l, font=final_font)[1] for l in final_reshaped_lines]
|
165 |
total_text_height = sum(line_heights) + (len(final_reshaped_lines) - 1) * line_spacing
|
|
|
171 |
x_center = erase_box[0] + (erase_box[2] - erase_box[0]) / 2
|
172 |
line_y_center = current_y + line_heights[i] / 2
|
173 |
|
|
|
174 |
draw.text((x_center + 2, line_y_center + 2), reshaped_line, font=final_font, fill=(0, 0, 0, 180), anchor="mm")
|
|
|
175 |
draw.text((x_center, line_y_center), reshaped_line, font=final_font, fill=(255, 255, 255, 255), anchor="mm")
|
176 |
|
177 |
current_y += line_heights[i] + line_spacing
|
|
|
180 |
out_image = Image.alpha_composite(erase_layer, txt_layer)
|
181 |
return out_image.convert("RGB")
|
182 |
|
183 |
+
# --- NEW VIDEO PROCESSING FUNCTIONS ---
|
184 |
|
185 |
+
def extract_middle_frame(video_path):
|
186 |
+
"""Extract the middle frame from video for OCR processing."""
|
187 |
+
try:
|
188 |
+
cap = cv2.VideoCapture(video_path)
|
189 |
+
total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
|
190 |
+
middle_frame_idx = total_frames // 2
|
191 |
+
|
192 |
+
cap.set(cv2.CAP_PROP_POS_FRAMES, middle_frame_idx)
|
193 |
+
ret, frame = cap.read()
|
194 |
+
cap.release()
|
195 |
+
|
196 |
+
if ret:
|
197 |
+
frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
|
198 |
+
return Image.fromarray(frame_rgb)
|
199 |
return None
|
200 |
+
except Exception as e:
|
201 |
+
print(f"Error extracting middle frame: {e}")
|
202 |
+
return None
|
203 |
+
|
204 |
+
def create_sama_intro_effect(duration=3, size=(1920, 1080), fps=30):
|
205 |
+
"""Create a sama-style intro effect similar to the ukulele video."""
|
206 |
|
207 |
+
def make_frame(t):
|
208 |
+
# Create base frame
|
209 |
+
img = np.zeros((size[1], size[0], 3), dtype=np.uint8)
|
210 |
+
|
211 |
+
# Create warm gradient background
|
212 |
+
for i in range(size[1]):
|
213 |
+
warm_intensity = int(25 + 15 * math.sin(i * 0.01))
|
214 |
+
img[i, :] = [warm_intensity//2, warm_intensity//3, warm_intensity]
|
215 |
+
|
216 |
+
center_x, center_y = size[0]//2, size[1]//2
|
217 |
+
|
218 |
+
# Musical rhythm visualization (simulating ukulele strums)
|
219 |
+
beat_time = t * 4 # 4 beats per second like ukulele strumming
|
220 |
+
beat_intensity = abs(math.sin(beat_time * math.pi)) ** 0.5
|
221 |
+
|
222 |
+
# Create pulsing circles (like sound waves)
|
223 |
+
for radius_base in [100, 150, 200, 250]:
|
224 |
+
radius = int(radius_base + beat_intensity * 30)
|
225 |
+
alpha = max(0, 0.3 - (t / duration) * 0.2)
|
226 |
+
circle_intensity = int(alpha * 255 * beat_intensity)
|
227 |
+
|
228 |
+
if circle_intensity > 10:
|
229 |
+
cv2.circle(img, (center_x, center_y), radius,
|
230 |
+
(circle_intensity//3, circle_intensity//4, circle_intensity//2), 2)
|
231 |
+
|
232 |
+
# Add rotating elements (like guitar picks or musical notes)
|
233 |
+
for i in range(6):
|
234 |
+
angle = (t * 60 + i * 60) % 360 # Rotating elements
|
235 |
+
distance = 180 + 20 * math.sin(beat_time)
|
236 |
+
|
237 |
+
x = int(center_x + distance * math.cos(math.radians(angle)))
|
238 |
+
y = int(center_y + distance * math.sin(math.radians(angle)))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
239 |
|
240 |
+
# Draw musical note-like shapes
|
241 |
+
note_size = int(8 + beat_intensity * 4)
|
242 |
+
cv2.circle(img, (x, y), note_size, (150, 100, 50), -1)
|
243 |
+
cv2.circle(img, (x, y), note_size + 2, (200, 150, 100), 2)
|
244 |
+
|
245 |
+
# Add string-like lines (simulating ukulele strings)
|
246 |
+
for i in range(4):
|
247 |
+
y_pos = center_y - 60 + i * 40
|
248 |
+
line_alpha = beat_intensity * 0.5
|
249 |
+
line_intensity = int(line_alpha * 255)
|
250 |
|
251 |
+
if line_intensity > 20:
|
252 |
+
# Create wavy lines like vibrating strings
|
253 |
+
points = []
|
254 |
+
for x in range(0, size[0], 10):
|
255 |
+
wave_y = y_pos + int(10 * math.sin(x * 0.02 + t * 8) * beat_intensity)
|
256 |
+
points.append((x, wave_y))
|
257 |
+
|
258 |
+
for j in range(len(points)-1):
|
259 |
+
cv2.line(img, points[j], points[j+1],
|
260 |
+
(line_intensity//2, line_intensity//3, line_intensity//4), 2)
|
261 |
+
|
262 |
+
# Add fade in/out effects
|
263 |
+
fade_alpha = 1.0
|
264 |
+
if t < 0.5:
|
265 |
+
fade_alpha = t / 0.5
|
266 |
+
elif t > duration - 0.5:
|
267 |
+
fade_alpha = (duration - t) / 0.5
|
268 |
|
269 |
+
img = (img * fade_alpha).astype(np.uint8)
|
270 |
+
|
271 |
+
return img
|
272 |
+
|
273 |
+
return VideoClip(make_frame, duration=duration)
|
274 |
+
|
275 |
+
def apply_text_overlay_to_frame(frame, text_to_overlay, bbox):
|
276 |
+
"""Apply text overlay to a single frame using your existing function."""
|
277 |
+
pil_frame = Image.fromarray(frame)
|
278 |
+
overlaid_frame = overlay_text_on_image(pil_frame, text_to_overlay, bbox)
|
279 |
+
return np.array(overlaid_frame)
|
280 |
+
|
281 |
+
def process_video_with_text_overlay(video_path, translated_text, bbox):
|
282 |
+
"""Process video and apply text overlay to all frames."""
|
283 |
+
def apply_overlay(get_frame, t):
|
284 |
+
frame = get_frame(t)
|
285 |
+
return apply_text_overlay_to_frame(frame, translated_text, bbox)
|
286 |
+
|
287 |
+
video = VideoFileClip(video_path)
|
288 |
+
video_with_overlay = video.fl(apply_overlay)
|
289 |
+
return video_with_overlay
|
290 |
+
|
291 |
+
def create_final_video_with_intro(video_path, translated_text, bbox, output_path):
|
292 |
+
"""Create the final video with sama intro effect and original music."""
|
293 |
+
try:
|
294 |
+
# Load original video
|
295 |
+
original_video = VideoFileClip(video_path)
|
296 |
+
|
297 |
+
# Create intro with same dimensions as original video
|
298 |
+
intro_duration = 3
|
299 |
+
intro = create_sama_intro_effect(
|
300 |
+
duration=intro_duration,
|
301 |
+
size=(int(original_video.w), int(original_video.h)),
|
302 |
+
fps=original_video.fps
|
303 |
+
)
|
304 |
+
intro = intro.set_fps(original_video.fps)
|
305 |
+
|
306 |
+
# Apply text overlay to main video
|
307 |
+
main_video_with_text = process_video_with_text_overlay(video_path, translated_text, bbox)
|
308 |
+
|
309 |
+
# Add smooth transitions
|
310 |
+
intro = fadeout(intro, 0.3)
|
311 |
+
main_video_with_text = fadein(main_video_with_text, 0.3)
|
312 |
+
|
313 |
+
# Concatenate intro and main video
|
314 |
+
final_video = concatenate_videoclips([intro, main_video_with_text])
|
315 |
+
|
316 |
+
# Handle audio - extend original audio to cover intro + main video
|
317 |
+
if original_video.audio:
|
318 |
+
# Create a loop of the original audio to cover intro duration
|
319 |
+
original_audio = original_video.audio
|
320 |
|
321 |
+
# If original audio is shorter than intro, loop it
|
322 |
+
if original_audio.duration < intro_duration:
|
323 |
+
loops_needed = int(intro_duration / original_audio.duration) + 1
|
324 |
+
extended_audio = concatenate_audioclips([original_audio] * loops_needed)
|
325 |
+
intro_audio = extended_audio.subclip(0, intro_duration)
|
326 |
else:
|
327 |
+
intro_audio = original_audio.subclip(0, intro_duration)
|
328 |
+
|
329 |
+
# Combine intro audio + full original audio
|
330 |
+
full_audio = concatenate_audioclips([intro_audio, original_audio])
|
|
|
|
|
331 |
|
332 |
+
# Apply fade effects to audio
|
333 |
+
full_audio = full_audio.fx(audio_fadein, 0.3).fx(audio_fadeout, 0.3)
|
|
|
334 |
|
335 |
+
# Set audio to final video
|
336 |
+
final_video = final_video.set_audio(full_audio)
|
337 |
+
|
338 |
+
# Write the final video
|
339 |
+
final_video.write_videofile(
|
340 |
+
output_path,
|
341 |
+
codec='libx264',
|
342 |
+
audio_codec='aac',
|
343 |
+
temp_audiofile='temp-audio.m4a',
|
344 |
+
remove_temp=True,
|
345 |
+
fps=original_video.fps,
|
346 |
+
preset='medium'
|
347 |
+
)
|
348 |
+
|
349 |
+
# Clean up
|
350 |
+
original_video.close()
|
351 |
+
final_video.close()
|
352 |
+
|
353 |
+
return output_path
|
354 |
+
|
355 |
+
except Exception as e:
|
356 |
+
print(f"Error creating final video: {e}")
|
357 |
+
return None
|
358 |
+
|
359 |
+
def process_video_pipeline(video_file):
|
360 |
+
"""Main processing pipeline for video."""
|
361 |
+
if video_file is None:
|
362 |
+
return "Please upload a video.", "Translation will appear here.", None, None
|
363 |
+
|
364 |
+
try:
|
365 |
+
# Create temporary files
|
366 |
+
temp_input = tempfile.NamedTemporaryFile(delete=False, suffix='.mp4')
|
367 |
+
temp_output = tempfile.NamedTemporaryFile(delete=False, suffix='.mp4')
|
368 |
+
|
369 |
+
# Save uploaded video
|
370 |
+
with open(temp_input.name, 'wb') as f:
|
371 |
+
f.write(video_file)
|
372 |
|
373 |
+
# Extract middle frame for OCR
|
374 |
+
print("Extracting middle frame for OCR...")
|
375 |
+
middle_frame = extract_middle_frame(temp_input.name)
|
376 |
+
if middle_frame is None:
|
377 |
+
return "Error extracting frame from video.", "No text to translate.", None, None
|
378 |
|
379 |
+
# Extract text and bbox using your existing function
|
380 |
+
print("Performing OCR on middle frame...")
|
381 |
+
extracted_text, bbox = extract_text_and_bbox(middle_frame)
|
382 |
+
if bbox is None:
|
383 |
+
return extracted_text, "No text to translate.", middle_frame, None
|
384 |
|
385 |
+
# Translate text using your existing function
|
386 |
+
print("Translating text to Persian...")
|
387 |
+
translated_text = translate_text_gemini(extracted_text)
|
388 |
+
if "Error" in translated_text:
|
389 |
+
return extracted_text, translated_text, middle_frame, None
|
390 |
|
391 |
+
# Create final video with intro and text overlay
|
392 |
+
print("Creating final video with intro effect...")
|
393 |
+
output_path = create_final_video_with_intro(temp_input.name, translated_text, bbox, temp_output.name)
|
394 |
+
if output_path is None:
|
395 |
+
return extracted_text, translated_text, middle_frame, None
|
396 |
+
|
397 |
+
print("Video processing completed successfully!")
|
398 |
+
return extracted_text, translated_text, middle_frame, output_path
|
399 |
+
|
400 |
+
except Exception as e:
|
401 |
+
return f"Error processing video: {str(e)}", "Translation failed.", None, None
|
402 |
|
403 |
# --- GRADIO INTERFACE ---
|
404 |
|
405 |
+
with gr.Blocks(title="Persian Video Quote Translator", theme=gr.themes.Soft()) as demo:
|
406 |
+
gr.Markdown("# π¬ Persian Video Quote Translator with Sama Intro")
|
407 |
+
gr.Markdown("Upload a video with English text. The app will create a stylized intro effect, detect text from the middle frame, translate it to Persian, and overlay it on the entire video while preserving the original music.")
|
|
|
408 |
|
409 |
with gr.Row():
|
410 |
with gr.Column(scale=1):
|
411 |
+
video_input = gr.File(
|
412 |
+
label="πΉ Upload Quote Video",
|
413 |
+
file_types=[".mp4", ".avi", ".mov", ".mkv", ".webm"],
|
414 |
+
type="binary"
|
415 |
+
)
|
416 |
+
|
417 |
+
process_btn = gr.Button("π― Process Video", variant="primary", size="lg")
|
418 |
+
|
419 |
+
with gr.Row():
|
420 |
+
text_output = gr.Textbox(
|
421 |
+
label="π Extracted English Text",
|
422 |
+
placeholder="Detected English text will appear here...",
|
423 |
+
lines=3,
|
424 |
+
show_copy_button=True
|
425 |
+
)
|
426 |
+
|
427 |
+
translated_output = gr.Textbox(
|
428 |
+
label="π€ Persian Translation",
|
429 |
+
placeholder="Persian translation will appear here...",
|
430 |
+
lines=3,
|
431 |
+
show_copy_button=True
|
432 |
+
)
|
433 |
+
|
434 |
with gr.Column(scale=1):
|
435 |
+
frame_output = gr.Image(
|
436 |
+
label="πΌοΈ Middle Frame (OCR Source)",
|
437 |
+
type="pil"
|
438 |
+
)
|
439 |
+
|
440 |
+
video_output = gr.Video(
|
441 |
+
label="π₯ Final Video with Sama Intro",
|
442 |
+
format="mp4"
|
443 |
+
)
|
444 |
+
|
445 |
+
process_btn.click(
|
446 |
+
fn=process_video_pipeline,
|
447 |
+
inputs=[video_input],
|
448 |
+
outputs=[text_output, translated_output, frame_output, video_output]
|
449 |
)
|
450 |
|
451 |
+
gr.Markdown("### π How it works:")
|
452 |
+
gr.Markdown("""
|
453 |
+
1. **Upload** a video file containing English text
|
454 |
+
2. **Click** 'Process Video' to start the magic β¨
|
455 |
+
3. The app will:
|
456 |
+
- πΌ Create a sama-style intro with musical rhythm effects (like your reference video)
|
457 |
+
- ποΈ Extract the middle frame and detect English text using OCR
|
458 |
+
- π Translate the text to beautiful Persian poetry
|
459 |
+
- π¨ Overlay the Persian text on all video frames with proper styling
|
460 |
+
- π΅ Preserve and extend the original audio/music throughout
|
461 |
+
- π¬ Combine everything into a polished final video
|
462 |
+
|
463 |
+
**Supported formats:** MP4, AVI, MOV, MKV, WebM
|
464 |
+
""")
|
465 |
+
|
466 |
if __name__ == "__main__":
|
467 |
demo.launch()
|