Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -1,270 +1,194 @@
|
|
1 |
-
# app.py
|
2 |
-
|
3 |
import gradio as gr
|
4 |
-
import
|
5 |
-
import numpy as np
|
6 |
-
from PIL import Image, ImageDraw, ImageFont
|
7 |
-
import easyocr
|
8 |
-
import google.generativeai as genai
|
9 |
-
import arabic_reshaper
|
10 |
import os
|
|
|
11 |
import time
|
12 |
-
|
13 |
-
|
14 |
-
#
|
15 |
-
|
16 |
-
|
17 |
-
|
18 |
-
|
19 |
-
#
|
20 |
-
|
21 |
-
|
22 |
-
|
23 |
-
|
24 |
-
|
25 |
-
|
26 |
-
|
27 |
-
|
28 |
-
|
29 |
-
|
30 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
31 |
|
32 |
-
### NEW ###: This function now also returns the average height of the original text.
|
33 |
-
def extract_text_and_bbox(image: Image.Image):
|
34 |
-
"""Extracts text, a consolidated bounding box, and the average original text height."""
|
35 |
-
ocr_reader = initialize_reader()
|
36 |
-
img_array = np.array(image)
|
37 |
-
results = ocr_reader.readtext(img_array)
|
38 |
-
if not results: return "No text detected in the image.", None, None
|
39 |
-
|
40 |
-
min_x, min_y = float('inf'), float('inf')
|
41 |
-
max_x, max_y = float('-inf'), float('-inf')
|
42 |
-
text_parts = []
|
43 |
-
original_heights = [] ### NEW ###: List to store heights of each detected text box.
|
44 |
-
|
45 |
-
for (bbox, text, prob) in results:
|
46 |
-
text_parts.append(text)
|
47 |
-
(tl, tr, br, bl) = bbox
|
48 |
-
min_x = min(min_x, tl[0], bl[0])
|
49 |
-
min_y = min(min_y, tl[1], tr[1])
|
50 |
-
max_x = max(max_x, tr[0], br[0])
|
51 |
-
max_y = max(max_y, bl[1], br[1])
|
52 |
-
|
53 |
-
# ### NEW ###: Calculate the height of this specific text box and add it.
|
54 |
-
# This is a direct measure of the original font's pixel size.
|
55 |
-
original_heights.append(br[1] - tr[1])
|
56 |
-
|
57 |
-
extracted_text = ' '.join(text_parts)
|
58 |
-
consolidated_bbox = (int(min_x), int(min_y), int(max_x), int(max_y))
|
59 |
-
|
60 |
-
# ### NEW ###: Calculate the average height from all detected text parts.
|
61 |
-
average_original_height = sum(original_heights) / len(original_heights) if original_heights else 30 # Fallback
|
62 |
-
|
63 |
-
return extracted_text, consolidated_bbox, average_original_height
|
64 |
-
|
65 |
-
def translate_text_gemini(text: str) -> str:
|
66 |
-
"""Translates text to colloquial Persian using the Gemini API."""
|
67 |
-
if not API_KEY or "YOUR_GEMINI_API_KEY_HERE" in API_KEY:
|
68 |
-
raise gr.Error("GEMINI_API_KEY is not set. Please add it as a Secret in your Hugging Face Space.")
|
69 |
-
if not text or "No text" in text:
|
70 |
-
return "No valid text to translate."
|
71 |
-
|
72 |
try:
|
73 |
-
|
74 |
-
|
75 |
-
|
76 |
-
|
77 |
-
|
78 |
-
|
79 |
-
return f"Error during translation with Gemini: {str(e)}"
|
80 |
-
|
81 |
-
### NEW ###: This function now accepts `average_original_height` to guide its font sizing.
|
82 |
-
def render_translated_overlay(original_image: Image.Image, text_to_overlay: str, bbox: tuple, average_original_height: float) -> (Image.Image, tuple):
|
83 |
-
"""
|
84 |
-
Creates an overlay layer with correctly rendered, wrapped Persian text,
|
85 |
-
sized to match the original text's height.
|
86 |
-
"""
|
87 |
-
padding = 15
|
88 |
-
overlay_box = (
|
89 |
-
max(0, bbox[0] - padding),
|
90 |
-
max(0, bbox[1] - padding),
|
91 |
-
min(original_image.width, bbox[2] + padding),
|
92 |
-
min(original_image.height, bbox[3] + padding)
|
93 |
-
)
|
94 |
-
overlay_width = overlay_box[2] - overlay_box[0]
|
95 |
-
overlay_height = overlay_box[3] - overlay_box[1]
|
96 |
-
|
97 |
-
try:
|
98 |
-
sample_x = max(0, int(overlay_box[0]) - 5)
|
99 |
-
sample_y = int((overlay_box[1] + overlay_box[3]) / 2)
|
100 |
-
bg_color = original_image.getpixel((sample_x, sample_y))
|
101 |
-
except (ValueError, IndexError):
|
102 |
-
bg_color = (25, 25, 25)
|
103 |
-
|
104 |
-
overlay_layer = Image.new("RGBA", (overlay_width, overlay_height), bg_color)
|
105 |
-
draw = ImageDraw.Draw(overlay_layer)
|
106 |
-
|
107 |
-
if not os.path.exists(PERSIAN_FONT_PATH):
|
108 |
-
raise FileNotFoundError(f"Font file not found at '{PERSIAN_FONT_PATH}'. Please upload it to your Space.")
|
109 |
-
|
110 |
-
target_width = overlay_width * 0.90
|
111 |
-
target_height = overlay_height * 0.90
|
112 |
-
|
113 |
-
# ### NEW ###: This is the key change! We start the font size based on the original text's measured height.
|
114 |
-
# The 0.95 multiplier accounts for typical font padding, giving a closer visual match.
|
115 |
-
font_size = int(average_original_height * 0.95)
|
116 |
-
|
117 |
-
final_wrapped_lines = []
|
118 |
-
|
119 |
-
# This loop now starts with an intelligent font size and only shrinks if the wrapped
|
120 |
-
# text is too tall for the bounding box (a necessary fallback).
|
121 |
-
while font_size > 10:
|
122 |
-
font = ImageFont.truetype(PERSIAN_FONT_PATH, font_size)
|
123 |
-
words = text_to_overlay.split()
|
124 |
-
if not words: break
|
125 |
|
126 |
-
|
127 |
-
current_line = ""
|
128 |
-
for word in words:
|
129 |
-
test_line = (current_line + " " + word).strip()
|
130 |
-
reshaped_test_line = arabic_reshaper.reshape(test_line)
|
131 |
-
line_width = draw.textbbox((0, 0), reshaped_test_line, font=font)[2]
|
132 |
|
133 |
-
|
134 |
-
|
|
|
|
|
135 |
else:
|
136 |
-
|
137 |
-
|
138 |
-
|
139 |
-
|
140 |
-
line_spacing = font_size * 0.3
|
141 |
-
reshaped_for_height_calc = [arabic_reshaper.reshape(l) for l in raw_lines]
|
142 |
-
line_heights = [draw.textbbox((0,0), l, font=font)[3] - draw.textbbox((0,0), l, font=font)[1] for l in reshaped_for_height_calc]
|
143 |
-
total_height = sum(line_heights) + (len(raw_lines) - 1) * line_spacing
|
144 |
-
|
145 |
-
if total_height <= target_height:
|
146 |
-
final_wrapped_lines = raw_lines
|
147 |
-
break
|
148 |
else:
|
149 |
-
|
150 |
-
|
151 |
-
|
152 |
-
|
153 |
-
|
154 |
-
|
155 |
-
|
156 |
-
|
157 |
-
|
158 |
-
|
159 |
-
|
160 |
-
|
161 |
-
|
162 |
-
|
163 |
-
|
164 |
-
|
165 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
166 |
|
167 |
-
|
168 |
-
|
169 |
-
|
170 |
-
|
171 |
-
|
172 |
-
|
173 |
-
|
174 |
-
|
175 |
-
# --- MAIN VIDEO PROCESSING PIPELINE ---
|
176 |
-
def process_video(video_path, progress=gr.Progress()):
|
177 |
-
if video_path is None: raise gr.Error("Please upload a video file first.")
|
178 |
-
|
179 |
-
progress(0, desc="Loading Video & Analyzing...")
|
180 |
-
cap = cv2.VideoCapture(video_path)
|
181 |
-
if not cap.isOpened(): raise gr.Error("Could not open video file.")
|
182 |
-
|
183 |
-
frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
|
184 |
-
frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
|
185 |
-
fps = cap.get(cv2.CAP_PROP_FPS)
|
186 |
-
total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
|
187 |
-
|
188 |
-
cap.set(cv2.CAP_PROP_POS_FRAMES, total_frames // 2)
|
189 |
-
ret, middle_frame_bgr = cap.read()
|
190 |
-
if not ret: raise gr.Error("Could not read middle frame.")
|
191 |
-
middle_frame_rgb_pil = Image.fromarray(cv2.cvtColor(middle_frame_bgr, cv2.COLOR_BGR2RGB))
|
192 |
-
|
193 |
-
progress(0.2, desc="Detecting & Measuring Text (EasyOCR)...")
|
194 |
-
# ### NEW ###: Capture the average_original_height from our updated function.
|
195 |
-
extracted_text, bbox, avg_height = extract_text_and_bbox(middle_frame_rgb_pil)
|
196 |
-
if bbox is None: raise gr.Error(extracted_text)
|
197 |
|
198 |
-
|
199 |
-
|
200 |
-
if "Error" in translated_text: raise gr.Error(translated_text)
|
201 |
|
202 |
-
|
203 |
-
|
204 |
-
|
205 |
|
206 |
-
overlay_stamp_cv = cv2.cvtColor(np.array(overlay_stamp_pil), cv2.COLOR_RGBA2BGRA)
|
207 |
-
|
208 |
-
timestamp = int(time.time())
|
209 |
-
temp_silent_path = f"temp_silent_{timestamp}.mp4"
|
210 |
-
final_output_path = f"translated_video_{timestamp}.mp4"
|
211 |
-
|
212 |
-
progress(0.6, desc="Composing Silent Video with Overlay...")
|
213 |
-
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
|
214 |
-
out = cv2.VideoWriter(temp_silent_path, fourcc, fps, (frame_width, frame_height))
|
215 |
-
|
216 |
-
cap.set(cv2.CAP_PROP_POS_FRAMES, 0)
|
217 |
-
frame_idx = 0
|
218 |
-
x_min, y_min, x_max, y_max = overlay_position_box
|
219 |
-
|
220 |
-
while True:
|
221 |
-
ret, frame = cap.read()
|
222 |
-
if not ret: break
|
223 |
-
|
224 |
-
roi = frame[y_min:y_max, x_min:x_max]
|
225 |
-
alpha = overlay_stamp_cv[:, :, 3] / 255.0
|
226 |
-
alpha_mask = cv2.merge([alpha, alpha, alpha])
|
227 |
-
blended_roi = (roi.astype(float) * (1.0 - alpha_mask) + overlay_stamp_cv[:, :, :3].astype(float) * alpha_mask)
|
228 |
-
frame[y_min:y_max, x_min:x_max] = blended_roi.astype(np.uint8)
|
229 |
-
|
230 |
-
out.write(frame)
|
231 |
-
frame_idx += 1
|
232 |
-
progress(0.6 + (0.3 * frame_idx / total_frames), desc=f"Processing frame {frame_idx}/{total_frames}")
|
233 |
-
|
234 |
-
cap.release(); out.release()
|
235 |
-
|
236 |
-
progress(0.95, desc="Merging Audio and Applying Fade (ffmpeg)...")
|
237 |
-
try:
|
238 |
-
input_video = ffmpeg.input(temp_silent_path)
|
239 |
-
input_audio = ffmpeg.input(video_path).audio
|
240 |
-
|
241 |
-
(ffmpeg.output(
|
242 |
-
input_video.video.filter('fade', type='in', start_time=0, duration=FADE_IN_DURATION_SECONDS),
|
243 |
-
input_audio, final_output_path, vcodec='libx264', acodec='copy', shortest=None
|
244 |
-
).run(overwrite_output=True, quiet=True))
|
245 |
-
except ffmpeg.Error as e:
|
246 |
-
print('ffmpeg stdout:', e.stdout.decode('utf8', errors='ignore'))
|
247 |
-
print('ffmpeg stderr:', e.stderr.decode('utf8', errors='ignore'))
|
248 |
-
raise gr.Error(f"ffmpeg error: {e.stderr.decode('utf8', errors='ignore')}")
|
249 |
finally:
|
250 |
-
if
|
251 |
-
|
252 |
-
|
253 |
-
|
254 |
-
|
255 |
-
# --- GRADIO
|
256 |
-
with gr.Blocks(theme=gr.themes.Soft()
|
257 |
-
gr.Markdown(
|
258 |
-
|
|
|
|
|
|
|
|
|
|
|
259 |
with gr.Row():
|
260 |
-
|
261 |
-
|
262 |
-
|
263 |
-
|
264 |
-
|
265 |
-
|
266 |
-
|
267 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
268 |
|
269 |
if __name__ == "__main__":
|
270 |
demo.launch(debug=True)
|
|
|
|
|
|
|
1 |
import gradio as gr
|
2 |
+
import requests
|
|
|
|
|
|
|
|
|
|
|
3 |
import os
|
4 |
+
import json
|
5 |
import time
|
6 |
+
# CORRECT, HIGH-LEVEL IMPORT for MoviePy. This brings in all editing functions.
|
7 |
+
from moviepy.editor import VideoFileClip
|
8 |
+
import traceback # Import traceback for detailed error logging
|
9 |
+
|
10 |
+
# --- 1. CONFIGURATION & CONSTANTS ---
|
11 |
+
# Securely load API key from Hugging Face Space secrets.
|
12 |
+
ONE_API_KEY = os.environ.get("ONE_API_KEY", "268976:66f4f58a2a905")
|
13 |
+
# The custom endpoint for the one-api.ir service.
|
14 |
+
ONE_API_URL = "https://api.one-api.ir/chatbot/v1/gpt4o/"
|
15 |
+
|
16 |
+
# --- MASTER PROMPTS ---
|
17 |
+
PROMPT_SHORTS_MODE = """
|
18 |
+
You are an expert producer of viral short-form content. Analyze the provided SRT transcript to find the single most impactful, hook-worthy segment for a video under 3 minutes (ideally 60-90 seconds).
|
19 |
+
PRIORITIES: Strong Hook, Single Clear Point, High Energy, Clear Payoff.
|
20 |
+
**Input SRT Content:**
|
21 |
+
{transcript_content}
|
22 |
+
**Instructions:**
|
23 |
+
Your output MUST be a single, valid JSON object and nothing else. Do not include any text, code blocks, or explanations before or after the JSON object.
|
24 |
+
{{
|
25 |
+
"clip_title_suggestion": "A catchy, clickbait-style title for this short clip.",
|
26 |
+
"reasoning": "Briefly explain why this segment is perfect for a short video, referencing the hook and payoff.",
|
27 |
+
"final_clip_start_seconds": <The precise start time in total seconds from the SRT>,
|
28 |
+
"final_clip_end_seconds": <The precise end time in total seconds from the SRT>
|
29 |
+
}}
|
30 |
+
"""
|
31 |
+
|
32 |
+
PROMPT_NARRATIVE_MODE = """
|
33 |
+
You are an expert video editor and storyteller. Analyze the provided transcript to find the most compelling narrative segment between 5 and 12 minutes long.
|
34 |
+
METHODOLOGY: Identify the peak moment, then find the corresponding setup and resolution to create a complete narrative arc.
|
35 |
+
**Input SRT Content:**
|
36 |
+
{transcript_content}
|
37 |
+
**Instructions:**
|
38 |
+
Your output MUST be a single, valid JSON object and nothing else. Do not include any text, code blocks, or explanations before or after the JSON object.
|
39 |
+
{{
|
40 |
+
"narrative_summary": "A one-sentence summary of the story told in the extracted clip.",
|
41 |
+
"reasoning": "Explain why this segment works as a standalone narrative, mentioning the peak moment and how the start/end points provide a full arc.",
|
42 |
+
"final_clip_start_seconds": <The precise start time in total seconds from the SRT>,
|
43 |
+
"final_clip_end_seconds": <The precise end time in total seconds from the SRT>
|
44 |
+
}}
|
45 |
+
"""
|
46 |
+
|
47 |
+
# --- 2. LLM AGENT WRAPPER ---
|
48 |
+
def call_gpt4o_oneapi(transcript_content, prompt_template):
|
49 |
+
"""Makes a robust, custom API call to the one-api.ir service."""
|
50 |
+
if not ONE_API_KEY or not ONE_API_URL:
|
51 |
+
raise ValueError("ONE_API_KEY and ONE_API_URL secrets are not set correctly.")
|
52 |
+
|
53 |
+
headers = {
|
54 |
+
"one-api-token": ONE_API_KEY,
|
55 |
+
"Content-Type": "application/json"
|
56 |
+
}
|
57 |
+
final_prompt = prompt_template.format(transcript_content=transcript_content)
|
58 |
+
payload = [{"role": "user", "content": final_prompt}]
|
59 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
60 |
try:
|
61 |
+
response = requests.post(ONE_API_URL, headers=headers, json=payload, timeout=180)
|
62 |
+
response.raise_for_status()
|
63 |
+
result = response.json()
|
64 |
+
|
65 |
+
if "result" not in result or not isinstance(result.get("result"), list) or len(result["result"]) == 0:
|
66 |
+
return f"Error: Unexpected JSON structure from API. 'result' list not found or empty.\nFull response: {json.dumps(result)}"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
67 |
|
68 |
+
first_item_in_result = result["result"][0]
|
|
|
|
|
|
|
|
|
|
|
69 |
|
70 |
+
if isinstance(first_item_in_result, dict):
|
71 |
+
message_content = first_item_in_result.get("content")
|
72 |
+
if message_content:
|
73 |
+
return message_content
|
74 |
else:
|
75 |
+
return f"Error: 'content' key not found in API response dictionary.\nFull response: {json.dumps(result)}"
|
76 |
+
elif isinstance(first_item_in_result, str):
|
77 |
+
return first_item_in_result
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
78 |
else:
|
79 |
+
return f"Error: Unknown item type in API 'result' list.\nFull response: {json.dumps(result)}"
|
80 |
+
|
81 |
+
except requests.exceptions.HTTPError as e:
|
82 |
+
return f"HTTP Error calling API: {e}\nResponse Body: {e.response.text}"
|
83 |
+
except requests.exceptions.RequestException as e:
|
84 |
+
return f"Error connecting to API: {str(e)}"
|
85 |
+
except json.JSONDecodeError:
|
86 |
+
return f"Error: Failed to decode JSON from API response.\nResponse Body: {response.text}"
|
87 |
+
|
88 |
+
# --- 3. CORE ORCHESTRATOR FUNCTION ---
|
89 |
+
def generate_viral_clip(video_file, srt_file, analysis_mode, progress=gr.Progress()):
|
90 |
+
if not video_file or not srt_file:
|
91 |
+
return "Error: Please upload both a video file and an SRT file.", None
|
92 |
+
if not ONE_API_KEY or not ONE_API_URL:
|
93 |
+
return "Error: API keys for OneAPI are not configured correctly in the Space secrets.", None
|
94 |
+
|
95 |
+
video = None
|
96 |
+
new_clip = None
|
97 |
+
try:
|
98 |
+
progress(0.1, desc="Reading SRT file...")
|
99 |
+
with open(srt_file, 'r', encoding='utf-8') as f:
|
100 |
+
transcript_content = f.read()
|
101 |
+
|
102 |
+
progress(0.2, desc="Preparing analysis prompt...")
|
103 |
+
prompt_template = PROMPT_SHORTS_MODE if analysis_mode == "Viral Spot for Shorts (< 3 mins)" else PROMPT_NARRATIVE_MODE
|
104 |
+
|
105 |
+
progress(0.4, desc="Calling AI for analysis...")
|
106 |
+
llm_response_str = call_gpt4o_oneapi(transcript_content, prompt_template)
|
107 |
+
|
108 |
+
progress(0.7, desc="Parsing AI response...")
|
109 |
+
if llm_response_str.startswith("Error") or llm_response_str.startswith("HTTP Error"):
|
110 |
+
return llm_response_str, None
|
111 |
+
|
112 |
+
try:
|
113 |
+
cleaned_response = llm_response_str.strip()
|
114 |
+
if cleaned_response.startswith("```json"):
|
115 |
+
cleaned_response = cleaned_response[7:]
|
116 |
+
if cleaned_response.endswith("```"):
|
117 |
+
cleaned_response = cleaned_response[:-3]
|
118 |
+
|
119 |
+
parsed_response = json.loads(cleaned_response)
|
120 |
+
|
121 |
+
if not isinstance(parsed_response, dict):
|
122 |
+
raise TypeError(f"AI did not return a valid JSON object. It returned a {type(parsed_response).__name__}.")
|
123 |
+
|
124 |
+
start_time = float(parsed_response['final_clip_start_seconds'])
|
125 |
+
end_time = float(parsed_response['final_clip_end_seconds'])
|
126 |
+
reasoning = parsed_response.get('reasoning', 'No reasoning provided.')
|
127 |
+
|
128 |
+
summary = (f"✅ Analysis Complete!\n\n"
|
129 |
+
f"Reasoning: {reasoning}\n\n"
|
130 |
+
f"Title Suggestion: {parsed_response.get('clip_title_suggestion', 'N/A')}\n"
|
131 |
+
f"Narrative Summary: {parsed_response.get('narrative_summary', 'N/A')}\n\n"
|
132 |
+
f"Clipping video from {time.strftime('%H:%M:%S', time.gmtime(start_time))} to {time.strftime('%H:%M:%S', time.gmtime(end_time))}.")
|
133 |
+
|
134 |
+
except (json.JSONDecodeError, KeyError, TypeError) as e:
|
135 |
+
error_msg = f"Error: Failed to parse AI response. Details: {e}\n\nRaw AI Response:\n---\n{llm_response_str}"
|
136 |
+
return error_msg, None
|
137 |
+
|
138 |
+
progress(0.8, desc="Clipping video...")
|
139 |
+
output_filename = "viral_clip.mp4"
|
140 |
|
141 |
+
video = VideoFileClip(video_file)
|
142 |
+
|
143 |
+
if end_time > video.duration:
|
144 |
+
end_time = video.duration
|
145 |
+
summary += f"\n\n⚠️ Warning: End time was beyond video duration, adjusted to {end_time:.2f}s."
|
146 |
+
|
147 |
+
new_clip = video.subclip(start_time, end_time)
|
148 |
+
new_clip.write_videofile(output_filename, codec="libx264", audio_codec="aac")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
149 |
|
150 |
+
progress(1.0, desc="Done!")
|
151 |
+
return summary, output_filename
|
|
|
152 |
|
153 |
+
except Exception as e:
|
154 |
+
tb_str = traceback.format_exc()
|
155 |
+
return f"An unexpected error occurred in the main process: {str(e)}\n\nTraceback:\n{tb_str}", None
|
156 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
157 |
finally:
|
158 |
+
if new_clip:
|
159 |
+
new_clip.close()
|
160 |
+
if video:
|
161 |
+
video.close()
|
162 |
+
|
163 |
+
# --- 4. GRADIO UI DEFINITION ---
|
164 |
+
with gr.Blocks(theme=gr.themes.Soft()) as demo:
|
165 |
+
gr.Markdown(
|
166 |
+
"""
|
167 |
+
# 🎬 AI Viral Video Extractor
|
168 |
+
This tool uses an AI agent to analyze a video transcript and automatically clip the most viral segment.
|
169 |
+
**⚠️ Important Setup:** For best security, configure `ONE_API_KEY` in your Hugging Face **Space Settings > Secrets**.
|
170 |
+
"""
|
171 |
+
)
|
172 |
with gr.Row():
|
173 |
+
with gr.Column(scale=1):
|
174 |
+
video_input = gr.Video(label="1. Upload Original Video")
|
175 |
+
srt_input = gr.File(label="2. Upload English SRT File", file_types=['.srt'])
|
176 |
+
mode_input = gr.Radio(
|
177 |
+
label="3. Select Analysis Mode",
|
178 |
+
choices=["Viral Spot for Shorts (< 3 mins)", "Viral Narrative Clip (5-12 mins)"],
|
179 |
+
value="Viral Narrative Clip (5-12 mins)"
|
180 |
+
)
|
181 |
+
submit_button = gr.Button("🚀 Generate Viral Clip", variant="primary")
|
182 |
+
|
183 |
+
with gr.Column(scale=2):
|
184 |
+
summary_output = gr.Textbox(label="Analysis Summary", lines=12, interactive=False)
|
185 |
+
video_output = gr.Video(label="Generated Clip", interactive=False)
|
186 |
+
|
187 |
+
submit_button.click(
|
188 |
+
fn=generate_viral_clip,
|
189 |
+
inputs=[video_input, srt_input, mode_input],
|
190 |
+
outputs=[summary_output, video_output],
|
191 |
+
)
|
192 |
|
193 |
if __name__ == "__main__":
|
194 |
demo.launch(debug=True)
|