kavehtaheri commited on
Commit
b12b399
·
verified ·
1 Parent(s): 32860f3

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +176 -252
app.py CHANGED
@@ -1,270 +1,194 @@
1
- # app.py
2
-
3
  import gradio as gr
4
- import cv2
5
- import numpy as np
6
- from PIL import Image, ImageDraw, ImageFont
7
- import easyocr
8
- import google.generativeai as genai
9
- import arabic_reshaper
10
  import os
 
11
  import time
12
- import ffmpeg
13
-
14
- # --- CONFIGURATION ---
15
- API_KEY = os.getenv("GEMINI_API_KEY", "AIzaSyCu-tb3BRDIJjUt6G5ccWmrR51tOY0VZd4")
16
- PERSIAN_FONT_PATH = "Vazir.ttf"
17
- FADE_IN_DURATION_SECONDS = 1.0
18
-
19
- # --- GLOBAL INITIALIZATION ---
20
- reader = None
21
- def initialize_reader():
22
- """Initializes the EasyOCR reader if it hasn't been already."""
23
- global reader
24
- if reader is None:
25
- print("Loading EasyOCR model...")
26
- reader = easyocr.Reader(['en'], gpu=False, verbose=False)
27
- print("EasyOCR model loaded successfully!")
28
- return reader
29
-
30
- # --- CORE PROCESSING FUNCTIONS ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
31
 
32
- ### NEW ###: This function now also returns the average height of the original text.
33
- def extract_text_and_bbox(image: Image.Image):
34
- """Extracts text, a consolidated bounding box, and the average original text height."""
35
- ocr_reader = initialize_reader()
36
- img_array = np.array(image)
37
- results = ocr_reader.readtext(img_array)
38
- if not results: return "No text detected in the image.", None, None
39
-
40
- min_x, min_y = float('inf'), float('inf')
41
- max_x, max_y = float('-inf'), float('-inf')
42
- text_parts = []
43
- original_heights = [] ### NEW ###: List to store heights of each detected text box.
44
-
45
- for (bbox, text, prob) in results:
46
- text_parts.append(text)
47
- (tl, tr, br, bl) = bbox
48
- min_x = min(min_x, tl[0], bl[0])
49
- min_y = min(min_y, tl[1], tr[1])
50
- max_x = max(max_x, tr[0], br[0])
51
- max_y = max(max_y, bl[1], br[1])
52
-
53
- # ### NEW ###: Calculate the height of this specific text box and add it.
54
- # This is a direct measure of the original font's pixel size.
55
- original_heights.append(br[1] - tr[1])
56
-
57
- extracted_text = ' '.join(text_parts)
58
- consolidated_bbox = (int(min_x), int(min_y), int(max_x), int(max_y))
59
-
60
- # ### NEW ###: Calculate the average height from all detected text parts.
61
- average_original_height = sum(original_heights) / len(original_heights) if original_heights else 30 # Fallback
62
-
63
- return extracted_text, consolidated_bbox, average_original_height
64
-
65
- def translate_text_gemini(text: str) -> str:
66
- """Translates text to colloquial Persian using the Gemini API."""
67
- if not API_KEY or "YOUR_GEMINI_API_KEY_HERE" in API_KEY:
68
- raise gr.Error("GEMINI_API_KEY is not set. Please add it as a Secret in your Hugging Face Space.")
69
- if not text or "No text" in text:
70
- return "No valid text to translate."
71
-
72
  try:
73
- genai.configure(api_key=API_KEY)
74
- model = genai.GenerativeModel('gemini-1.5-flash')
75
- prompt = f"Translate the following English quotes into Persian. The translation should be colloquial, poetic, concise, and meaningful. Preserve the original message and tone. Avoid literal translations. Provide only the translated Persian text. Quotes: [{text}]"
76
- response = model.generate_content(prompt)
77
- return response.text.strip()
78
- except Exception as e:
79
- return f"Error during translation with Gemini: {str(e)}"
80
-
81
- ### NEW ###: This function now accepts `average_original_height` to guide its font sizing.
82
- def render_translated_overlay(original_image: Image.Image, text_to_overlay: str, bbox: tuple, average_original_height: float) -> (Image.Image, tuple):
83
- """
84
- Creates an overlay layer with correctly rendered, wrapped Persian text,
85
- sized to match the original text's height.
86
- """
87
- padding = 15
88
- overlay_box = (
89
- max(0, bbox[0] - padding),
90
- max(0, bbox[1] - padding),
91
- min(original_image.width, bbox[2] + padding),
92
- min(original_image.height, bbox[3] + padding)
93
- )
94
- overlay_width = overlay_box[2] - overlay_box[0]
95
- overlay_height = overlay_box[3] - overlay_box[1]
96
-
97
- try:
98
- sample_x = max(0, int(overlay_box[0]) - 5)
99
- sample_y = int((overlay_box[1] + overlay_box[3]) / 2)
100
- bg_color = original_image.getpixel((sample_x, sample_y))
101
- except (ValueError, IndexError):
102
- bg_color = (25, 25, 25)
103
-
104
- overlay_layer = Image.new("RGBA", (overlay_width, overlay_height), bg_color)
105
- draw = ImageDraw.Draw(overlay_layer)
106
-
107
- if not os.path.exists(PERSIAN_FONT_PATH):
108
- raise FileNotFoundError(f"Font file not found at '{PERSIAN_FONT_PATH}'. Please upload it to your Space.")
109
-
110
- target_width = overlay_width * 0.90
111
- target_height = overlay_height * 0.90
112
-
113
- # ### NEW ###: This is the key change! We start the font size based on the original text's measured height.
114
- # The 0.95 multiplier accounts for typical font padding, giving a closer visual match.
115
- font_size = int(average_original_height * 0.95)
116
-
117
- final_wrapped_lines = []
118
-
119
- # This loop now starts with an intelligent font size and only shrinks if the wrapped
120
- # text is too tall for the bounding box (a necessary fallback).
121
- while font_size > 10:
122
- font = ImageFont.truetype(PERSIAN_FONT_PATH, font_size)
123
- words = text_to_overlay.split()
124
- if not words: break
125
 
126
- raw_lines = []
127
- current_line = ""
128
- for word in words:
129
- test_line = (current_line + " " + word).strip()
130
- reshaped_test_line = arabic_reshaper.reshape(test_line)
131
- line_width = draw.textbbox((0, 0), reshaped_test_line, font=font)[2]
132
 
133
- if line_width <= target_width:
134
- current_line = test_line
 
 
135
  else:
136
- raw_lines.append(current_line)
137
- current_line = word
138
- raw_lines.append(current_line)
139
-
140
- line_spacing = font_size * 0.3
141
- reshaped_for_height_calc = [arabic_reshaper.reshape(l) for l in raw_lines]
142
- line_heights = [draw.textbbox((0,0), l, font=font)[3] - draw.textbbox((0,0), l, font=font)[1] for l in reshaped_for_height_calc]
143
- total_height = sum(line_heights) + (len(raw_lines) - 1) * line_spacing
144
-
145
- if total_height <= target_height:
146
- final_wrapped_lines = raw_lines
147
- break
148
  else:
149
- font_size -= 2 # Shrink font and try again if it doesn't fit
150
-
151
- if not final_wrapped_lines:
152
- final_wrapped_lines = [text_to_overlay]
153
-
154
- final_font = ImageFont.truetype(PERSIAN_FONT_PATH, font_size)
155
- line_spacing = font_size * 0.3
156
- final_reshaped_lines = [arabic_reshaper.reshape(l) for l in final_wrapped_lines]
157
- line_heights = [draw.textbbox((0,0), l, font=final_font)[3] - draw.textbbox((0,0), l, font=final_font)[1] for l in final_reshaped_lines]
158
- total_text_height = sum(line_heights) + (len(final_reshaped_lines) - 1) * line_spacing
159
-
160
- y_start = (overlay_height - total_text_height) / 2
161
- current_y = y_start
162
-
163
- for i, reshaped_line in enumerate(final_reshaped_lines):
164
- x_center = overlay_width / 2
165
- line_y_center = current_y + line_heights[i] / 2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
166
 
167
- draw.text((x_center + 1, line_y_center + 1), reshaped_line, font=final_font, fill=(0, 0, 0, 180), anchor="mm")
168
- draw.text((x_center, line_y_center), reshaped_line, font=final_font, fill=(255, 255, 255, 255), anchor="mm")
169
-
170
- current_y += line_heights[i] + line_spacing
171
-
172
- return overlay_layer, overlay_box
173
-
174
-
175
- # --- MAIN VIDEO PROCESSING PIPELINE ---
176
- def process_video(video_path, progress=gr.Progress()):
177
- if video_path is None: raise gr.Error("Please upload a video file first.")
178
-
179
- progress(0, desc="Loading Video & Analyzing...")
180
- cap = cv2.VideoCapture(video_path)
181
- if not cap.isOpened(): raise gr.Error("Could not open video file.")
182
-
183
- frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
184
- frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
185
- fps = cap.get(cv2.CAP_PROP_FPS)
186
- total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
187
-
188
- cap.set(cv2.CAP_PROP_POS_FRAMES, total_frames // 2)
189
- ret, middle_frame_bgr = cap.read()
190
- if not ret: raise gr.Error("Could not read middle frame.")
191
- middle_frame_rgb_pil = Image.fromarray(cv2.cvtColor(middle_frame_bgr, cv2.COLOR_BGR2RGB))
192
-
193
- progress(0.2, desc="Detecting & Measuring Text (EasyOCR)...")
194
- # ### NEW ###: Capture the average_original_height from our updated function.
195
- extracted_text, bbox, avg_height = extract_text_and_bbox(middle_frame_rgb_pil)
196
- if bbox is None: raise gr.Error(extracted_text)
197
 
198
- progress(0.4, desc="Translating Text (Gemini API)...")
199
- translated_text = translate_text_gemini(extracted_text)
200
- if "Error" in translated_text: raise gr.Error(translated_text)
201
 
202
- progress(0.5, desc="Rendering Translated Text Overlay...")
203
- # ### NEW ###: Pass the measured average height to the rendering function.
204
- overlay_stamp_pil, overlay_position_box = render_translated_overlay(middle_frame_rgb_pil, translated_text, bbox, avg_height)
205
 
206
- overlay_stamp_cv = cv2.cvtColor(np.array(overlay_stamp_pil), cv2.COLOR_RGBA2BGRA)
207
-
208
- timestamp = int(time.time())
209
- temp_silent_path = f"temp_silent_{timestamp}.mp4"
210
- final_output_path = f"translated_video_{timestamp}.mp4"
211
-
212
- progress(0.6, desc="Composing Silent Video with Overlay...")
213
- fourcc = cv2.VideoWriter_fourcc(*'mp4v')
214
- out = cv2.VideoWriter(temp_silent_path, fourcc, fps, (frame_width, frame_height))
215
-
216
- cap.set(cv2.CAP_PROP_POS_FRAMES, 0)
217
- frame_idx = 0
218
- x_min, y_min, x_max, y_max = overlay_position_box
219
-
220
- while True:
221
- ret, frame = cap.read()
222
- if not ret: break
223
-
224
- roi = frame[y_min:y_max, x_min:x_max]
225
- alpha = overlay_stamp_cv[:, :, 3] / 255.0
226
- alpha_mask = cv2.merge([alpha, alpha, alpha])
227
- blended_roi = (roi.astype(float) * (1.0 - alpha_mask) + overlay_stamp_cv[:, :, :3].astype(float) * alpha_mask)
228
- frame[y_min:y_max, x_min:x_max] = blended_roi.astype(np.uint8)
229
-
230
- out.write(frame)
231
- frame_idx += 1
232
- progress(0.6 + (0.3 * frame_idx / total_frames), desc=f"Processing frame {frame_idx}/{total_frames}")
233
-
234
- cap.release(); out.release()
235
-
236
- progress(0.95, desc="Merging Audio and Applying Fade (ffmpeg)...")
237
- try:
238
- input_video = ffmpeg.input(temp_silent_path)
239
- input_audio = ffmpeg.input(video_path).audio
240
-
241
- (ffmpeg.output(
242
- input_video.video.filter('fade', type='in', start_time=0, duration=FADE_IN_DURATION_SECONDS),
243
- input_audio, final_output_path, vcodec='libx264', acodec='copy', shortest=None
244
- ).run(overwrite_output=True, quiet=True))
245
- except ffmpeg.Error as e:
246
- print('ffmpeg stdout:', e.stdout.decode('utf8', errors='ignore'))
247
- print('ffmpeg stderr:', e.stderr.decode('utf8', errors='ignore'))
248
- raise gr.Error(f"ffmpeg error: {e.stderr.decode('utf8', errors='ignore')}")
249
  finally:
250
- if os.path.exists(temp_silent_path): os.remove(temp_silent_path)
251
-
252
- progress(1, desc="Done!")
253
- return final_output_path
254
-
255
- # --- GRADIO INTERFACE ---
256
- with gr.Blocks(theme=gr.themes.Soft(), title="Persian Video Quote Translator") as demo:
257
- gr.Markdown("# 🎬 Persian Video Quote Translator")
258
- gr.Markdown("Upload a short video containing English text. The app will detect the text, replace it with a poetic Persian translation, and preserve the original audio and video duration.")
 
 
 
 
 
259
  with gr.Row():
260
- video_input = gr.Video(label="Upload Video")
261
- video_output = gr.Video(label="Translated Video Output")
262
- translate_button = gr.Button("Translate Video", variant="primary")
263
-
264
- translate_button.click(fn=process_video, inputs=[video_input], outputs=[video_output])
265
-
266
- gr.Markdown("---")
267
- gr.Markdown("### How it works:\n1. It analyzes the middle frame to **measure the original text's height** and find its location.\n2. It uses the Gemini API to get a high-quality, poetic Persian translation.\n3. It renders the Persian text at a size that **matches the original**, wrapping it smartly to fit.\n4. It composites this new text overlay onto every frame of the video.\n5. Finally, it uses `ffmpeg` to merge the new video with the **original audio** and add a 1-second fade-in effect.")
 
 
 
 
 
 
 
 
 
 
 
268
 
269
  if __name__ == "__main__":
270
  demo.launch(debug=True)
 
 
 
1
  import gradio as gr
2
+ import requests
 
 
 
 
 
3
  import os
4
+ import json
5
  import time
6
+ # CORRECT, HIGH-LEVEL IMPORT for MoviePy. This brings in all editing functions.
7
+ from moviepy.editor import VideoFileClip
8
+ import traceback # Import traceback for detailed error logging
9
+
10
+ # --- 1. CONFIGURATION & CONSTANTS ---
11
+ # Securely load API key from Hugging Face Space secrets.
12
+ ONE_API_KEY = os.environ.get("ONE_API_KEY", "268976:66f4f58a2a905")
13
+ # The custom endpoint for the one-api.ir service.
14
+ ONE_API_URL = "https://api.one-api.ir/chatbot/v1/gpt4o/"
15
+
16
+ # --- MASTER PROMPTS ---
17
+ PROMPT_SHORTS_MODE = """
18
+ You are an expert producer of viral short-form content. Analyze the provided SRT transcript to find the single most impactful, hook-worthy segment for a video under 3 minutes (ideally 60-90 seconds).
19
+ PRIORITIES: Strong Hook, Single Clear Point, High Energy, Clear Payoff.
20
+ **Input SRT Content:**
21
+ {transcript_content}
22
+ **Instructions:**
23
+ Your output MUST be a single, valid JSON object and nothing else. Do not include any text, code blocks, or explanations before or after the JSON object.
24
+ {{
25
+ "clip_title_suggestion": "A catchy, clickbait-style title for this short clip.",
26
+ "reasoning": "Briefly explain why this segment is perfect for a short video, referencing the hook and payoff.",
27
+ "final_clip_start_seconds": <The precise start time in total seconds from the SRT>,
28
+ "final_clip_end_seconds": <The precise end time in total seconds from the SRT>
29
+ }}
30
+ """
31
+
32
+ PROMPT_NARRATIVE_MODE = """
33
+ You are an expert video editor and storyteller. Analyze the provided transcript to find the most compelling narrative segment between 5 and 12 minutes long.
34
+ METHODOLOGY: Identify the peak moment, then find the corresponding setup and resolution to create a complete narrative arc.
35
+ **Input SRT Content:**
36
+ {transcript_content}
37
+ **Instructions:**
38
+ Your output MUST be a single, valid JSON object and nothing else. Do not include any text, code blocks, or explanations before or after the JSON object.
39
+ {{
40
+ "narrative_summary": "A one-sentence summary of the story told in the extracted clip.",
41
+ "reasoning": "Explain why this segment works as a standalone narrative, mentioning the peak moment and how the start/end points provide a full arc.",
42
+ "final_clip_start_seconds": <The precise start time in total seconds from the SRT>,
43
+ "final_clip_end_seconds": <The precise end time in total seconds from the SRT>
44
+ }}
45
+ """
46
+
47
+ # --- 2. LLM AGENT WRAPPER ---
48
+ def call_gpt4o_oneapi(transcript_content, prompt_template):
49
+ """Makes a robust, custom API call to the one-api.ir service."""
50
+ if not ONE_API_KEY or not ONE_API_URL:
51
+ raise ValueError("ONE_API_KEY and ONE_API_URL secrets are not set correctly.")
52
+
53
+ headers = {
54
+ "one-api-token": ONE_API_KEY,
55
+ "Content-Type": "application/json"
56
+ }
57
+ final_prompt = prompt_template.format(transcript_content=transcript_content)
58
+ payload = [{"role": "user", "content": final_prompt}]
59
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
60
  try:
61
+ response = requests.post(ONE_API_URL, headers=headers, json=payload, timeout=180)
62
+ response.raise_for_status()
63
+ result = response.json()
64
+
65
+ if "result" not in result or not isinstance(result.get("result"), list) or len(result["result"]) == 0:
66
+ return f"Error: Unexpected JSON structure from API. 'result' list not found or empty.\nFull response: {json.dumps(result)}"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
67
 
68
+ first_item_in_result = result["result"][0]
 
 
 
 
 
69
 
70
+ if isinstance(first_item_in_result, dict):
71
+ message_content = first_item_in_result.get("content")
72
+ if message_content:
73
+ return message_content
74
  else:
75
+ return f"Error: 'content' key not found in API response dictionary.\nFull response: {json.dumps(result)}"
76
+ elif isinstance(first_item_in_result, str):
77
+ return first_item_in_result
 
 
 
 
 
 
 
 
 
78
  else:
79
+ return f"Error: Unknown item type in API 'result' list.\nFull response: {json.dumps(result)}"
80
+
81
+ except requests.exceptions.HTTPError as e:
82
+ return f"HTTP Error calling API: {e}\nResponse Body: {e.response.text}"
83
+ except requests.exceptions.RequestException as e:
84
+ return f"Error connecting to API: {str(e)}"
85
+ except json.JSONDecodeError:
86
+ return f"Error: Failed to decode JSON from API response.\nResponse Body: {response.text}"
87
+
88
+ # --- 3. CORE ORCHESTRATOR FUNCTION ---
89
+ def generate_viral_clip(video_file, srt_file, analysis_mode, progress=gr.Progress()):
90
+ if not video_file or not srt_file:
91
+ return "Error: Please upload both a video file and an SRT file.", None
92
+ if not ONE_API_KEY or not ONE_API_URL:
93
+ return "Error: API keys for OneAPI are not configured correctly in the Space secrets.", None
94
+
95
+ video = None
96
+ new_clip = None
97
+ try:
98
+ progress(0.1, desc="Reading SRT file...")
99
+ with open(srt_file, 'r', encoding='utf-8') as f:
100
+ transcript_content = f.read()
101
+
102
+ progress(0.2, desc="Preparing analysis prompt...")
103
+ prompt_template = PROMPT_SHORTS_MODE if analysis_mode == "Viral Spot for Shorts (< 3 mins)" else PROMPT_NARRATIVE_MODE
104
+
105
+ progress(0.4, desc="Calling AI for analysis...")
106
+ llm_response_str = call_gpt4o_oneapi(transcript_content, prompt_template)
107
+
108
+ progress(0.7, desc="Parsing AI response...")
109
+ if llm_response_str.startswith("Error") or llm_response_str.startswith("HTTP Error"):
110
+ return llm_response_str, None
111
+
112
+ try:
113
+ cleaned_response = llm_response_str.strip()
114
+ if cleaned_response.startswith("```json"):
115
+ cleaned_response = cleaned_response[7:]
116
+ if cleaned_response.endswith("```"):
117
+ cleaned_response = cleaned_response[:-3]
118
+
119
+ parsed_response = json.loads(cleaned_response)
120
+
121
+ if not isinstance(parsed_response, dict):
122
+ raise TypeError(f"AI did not return a valid JSON object. It returned a {type(parsed_response).__name__}.")
123
+
124
+ start_time = float(parsed_response['final_clip_start_seconds'])
125
+ end_time = float(parsed_response['final_clip_end_seconds'])
126
+ reasoning = parsed_response.get('reasoning', 'No reasoning provided.')
127
+
128
+ summary = (f"✅ Analysis Complete!\n\n"
129
+ f"Reasoning: {reasoning}\n\n"
130
+ f"Title Suggestion: {parsed_response.get('clip_title_suggestion', 'N/A')}\n"
131
+ f"Narrative Summary: {parsed_response.get('narrative_summary', 'N/A')}\n\n"
132
+ f"Clipping video from {time.strftime('%H:%M:%S', time.gmtime(start_time))} to {time.strftime('%H:%M:%S', time.gmtime(end_time))}.")
133
+
134
+ except (json.JSONDecodeError, KeyError, TypeError) as e:
135
+ error_msg = f"Error: Failed to parse AI response. Details: {e}\n\nRaw AI Response:\n---\n{llm_response_str}"
136
+ return error_msg, None
137
+
138
+ progress(0.8, desc="Clipping video...")
139
+ output_filename = "viral_clip.mp4"
140
 
141
+ video = VideoFileClip(video_file)
142
+
143
+ if end_time > video.duration:
144
+ end_time = video.duration
145
+ summary += f"\n\n⚠️ Warning: End time was beyond video duration, adjusted to {end_time:.2f}s."
146
+
147
+ new_clip = video.subclip(start_time, end_time)
148
+ new_clip.write_videofile(output_filename, codec="libx264", audio_codec="aac")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
149
 
150
+ progress(1.0, desc="Done!")
151
+ return summary, output_filename
 
152
 
153
+ except Exception as e:
154
+ tb_str = traceback.format_exc()
155
+ return f"An unexpected error occurred in the main process: {str(e)}\n\nTraceback:\n{tb_str}", None
156
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
157
  finally:
158
+ if new_clip:
159
+ new_clip.close()
160
+ if video:
161
+ video.close()
162
+
163
+ # --- 4. GRADIO UI DEFINITION ---
164
+ with gr.Blocks(theme=gr.themes.Soft()) as demo:
165
+ gr.Markdown(
166
+ """
167
+ # 🎬 AI Viral Video Extractor
168
+ This tool uses an AI agent to analyze a video transcript and automatically clip the most viral segment.
169
+ **⚠️ Important Setup:** For best security, configure `ONE_API_KEY` in your Hugging Face **Space Settings > Secrets**.
170
+ """
171
+ )
172
  with gr.Row():
173
+ with gr.Column(scale=1):
174
+ video_input = gr.Video(label="1. Upload Original Video")
175
+ srt_input = gr.File(label="2. Upload English SRT File", file_types=['.srt'])
176
+ mode_input = gr.Radio(
177
+ label="3. Select Analysis Mode",
178
+ choices=["Viral Spot for Shorts (< 3 mins)", "Viral Narrative Clip (5-12 mins)"],
179
+ value="Viral Narrative Clip (5-12 mins)"
180
+ )
181
+ submit_button = gr.Button("🚀 Generate Viral Clip", variant="primary")
182
+
183
+ with gr.Column(scale=2):
184
+ summary_output = gr.Textbox(label="Analysis Summary", lines=12, interactive=False)
185
+ video_output = gr.Video(label="Generated Clip", interactive=False)
186
+
187
+ submit_button.click(
188
+ fn=generate_viral_clip,
189
+ inputs=[video_input, srt_input, mode_input],
190
+ outputs=[summary_output, video_output],
191
+ )
192
 
193
  if __name__ == "__main__":
194
  demo.launch(debug=True)