kavehtaheri commited on
Commit
c157612
·
verified ·
1 Parent(s): a9bf664

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +254 -0
app.py ADDED
@@ -0,0 +1,254 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import requests
3
+ import google.generativeai as genai
4
+ import os
5
+ import json
6
+ import time
7
+ from moviepy.editor import VideoFileClip
8
+
9
+ # --- 1. CONFIGURATION & CONSTANTS ---
10
+
11
+ # Load API keys from Hugging Face Space secrets
12
+ ONE_API_KEY ="268976:66f4f58a2a905"
13
+ ONE_API_URL ="https://api.one-api.ir/chatbot/v1/gpt4o/"
14
+ GEMINI_API_KEY ="AIzaSyAKI92YawOKQ1-HRLmvaryMEWk_y4alJgA"
15
+
16
+ # --- MASTER PROMPTS ---
17
+
18
+ # Prompt for Mode 1: "Viral Spot for Shorts (< 3 mins)"
19
+ PROMPT_SHORTS_MODE = """
20
+ You are an expert producer of viral short-form content for platforms like YouTube Shorts, TikTok, and Instagram Reels. Your task is to analyze a transcript and identify the single most impactful, hook-worthy segment that can stand alone as a compelling video under 3 minutes, ideally around 60-90 seconds.
21
+
22
+ PRIORITIES:
23
+ 1. **Strong Hook:** The segment must start immediately with a shocking statement, a controversial question, or a highly emotional moment.
24
+ 2. **Single, Clear Point:** The clip should focus on ONE idea. Do not try to tell a complex story.
25
+ 3. **High-Energy & Pacing:** Identify a section where the speaker is energetic or the topic is intense.
26
+ 4. **Clear Payoff:** The segment must have a quick, satisfying conclusion or punchline.
27
+
28
+ Analyze the provided SRT transcript and find the absolute best segment that fits these criteria. The timestamps in the SRT file are in the format HH:MM:SS,ms.
29
+
30
+ **Input SRT Content:**
31
+ {transcript_content}
32
+
33
+ **Instructions:**
34
+ Your output MUST be a single, valid JSON object and nothing else. Do not include any text before or after the JSON object. The times must be in total seconds.
35
+
36
+ {{
37
+ "clip_title_suggestion": "A catchy, clickbait-style title for this short clip.",
38
+ "reasoning": "Briefly explain why this segment is perfect for a short video, referencing the hook and payoff.",
39
+ "final_clip_start_seconds": <The precise start time in total seconds from the SRT>,
40
+ "final_clip_end_seconds": <The precise end time in total seconds from the SRT>
41
+ }}
42
+ """
43
+
44
+ # Prompt for Mode 2: "Viral Narrative Clip (5-12 mins)"
45
+ PROMPT_NARRATIVE_MODE = """
46
+ You are an expert video editor and storyteller. Your task is to analyze a transcript to find the most compelling and cohesive narrative segment that is between 5 and 12 minutes long.
47
+
48
+ METHODOLOGY:
49
+ 1. **Identify the Peak Moment:** First, locate the single most insightful, profound, or climactic moment in the entire conversation. This is your anchor.
50
+ 2. **Establish the Narrative Arc:**
51
+ * **Find the Setup:** Trace backward from the peak moment to find the true beginning of this story or argument. Where is the context given? Where does the thesis for this specific sub-topic begin?
52
+ * **Find the Resolution:** Trace forward from the peak moment to find the natural conclusion of this story or argument. Where does the speaker summarize the point or transition away?
53
+ 3. **Ensure Cohesion & Duration:** The final selected segment MUST flow as a complete, self-contained story. Adjust the start and end boundaries to ensure the final duration is between 5 and 12 minutes while preserving the narrative integrity.
54
+
55
+ Analyze the provided SRT transcript using this methodology. The timestamps in the SRT file are in the format HH:MM:SS,ms.
56
+
57
+ **Input SRT Content:**
58
+ {transcript_content}
59
+
60
+ **Instructions:**
61
+ Your output MUST be a single, valid JSON object and nothing else. Do not include any text before or after the JSON object. The times must be in total seconds.
62
+
63
+ {{
64
+ "narrative_summary": "A one-sentence summary of the story told in the extracted clip.",
65
+ "reasoning": "Explain why this segment works as a standalone narrative, mentioning the peak moment and how the start/end points provide a full arc.",
66
+ "final_clip_start_seconds": <The precise start time in total seconds from the SRT>,
67
+ "final_clip_end_seconds": <The precise end time in total seconds from the SRT>
68
+ }}
69
+ """
70
+
71
+
72
+ # --- 2. LLM AGENT WRAPPER FUNCTIONS ---
73
+
74
+ def call_gpt4o_agent(transcript_content, prompt_template):
75
+ """Makes an API call to a OneAPI-compatible endpoint for GPT-4o."""
76
+ if not ONE_API_KEY or not ONE_API_URL:
77
+ raise ValueError("ONE_API_KEY and ONE_API_URL secrets are not set in the Hugging Face Space.")
78
+
79
+ headers = {
80
+ "Authorization": f"Bearer {ONE_API_KEY}",
81
+ "Content-Type": "application/json"
82
+ }
83
+
84
+ final_prompt = prompt_template.format(transcript_content=transcript_content)
85
+
86
+ # Standard OpenAI payload format
87
+ payload = {
88
+ "model": "gpt-4o",
89
+ "messages": [{"role": "user", "content": final_prompt}],
90
+ "temperature": 0.2, # Lower temperature for more predictable, structured output
91
+ "response_format": {"type": "json_object"} # Use JSON mode for reliability
92
+ }
93
+
94
+ try:
95
+ response = requests.post(ONE_API_URL, headers=headers, json=payload, timeout=180) # 3 minute timeout
96
+ response.raise_for_status() # Raise an exception for bad status codes (4xx or 5xx)
97
+
98
+ # The content is usually in the 'choices' list
99
+ result = response.json()
100
+ return result['choices'][0]['message']['content']
101
+
102
+ except requests.exceptions.RequestException as e:
103
+ return f"Error in API call: {str(e)}"
104
+ except KeyError:
105
+ return f"Error: Unexpected response format from API. Full response: {response.text}"
106
+
107
+
108
+ def call_gemini_agent(transcript_content, prompt_template):
109
+ """Makes an API call to the Google Gemini API."""
110
+ if not GEMINI_API_KEY:
111
+ raise ValueError("GEMINI_API_KEY secret is not set in the Hugging Face Space.")
112
+
113
+ try:
114
+ genai.configure(api_key=GEMINI_API_KEY)
115
+
116
+ # Configure the model to return JSON directly
117
+ generation_config = genai.GenerationConfig(
118
+ response_mime_type="application/json",
119
+ temperature=0.2
120
+ )
121
+
122
+ model = genai.GenerativeModel(
123
+ 'gemini-1.5-flash-latest',
124
+ generation_config=generation_config
125
+ )
126
+
127
+ final_prompt = prompt_template.format(transcript_content=transcript_content)
128
+
129
+ response = model.generate_content(final_prompt)
130
+ return response.text.strip()
131
+
132
+ except Exception as e:
133
+ return f"Error calling Gemini API: {str(e)}"
134
+
135
+
136
+ # --- 3. CORE ORCHESTRATOR FUNCTION ---
137
+
138
+ def generate_viral_clip(video_file, srt_file, analysis_mode, llm_agent, progress=gr.Progress()):
139
+ """
140
+ Main function to orchestrate the analysis and clipping process.
141
+ """
142
+ # --- Input Validation ---
143
+ if not video_file or not srt_file:
144
+ return "Error: Please upload both a video file and an SRT file.", None
145
+ if not ONE_API_KEY or not ONE_API_URL or not GEMINI_API_KEY:
146
+ return "Error: API keys are not configured correctly in the Space secrets.", None
147
+
148
+ try:
149
+ # --- Step 1: Read SRT Content ---
150
+ progress(0.1, desc="Reading SRT file...")
151
+ with open(srt_file.name, 'r', encoding='utf-8') as f:
152
+ transcript_content = f.read()
153
+
154
+ # --- Step 2: Select Prompt based on Mode ---
155
+ progress(0.2, desc="Preparing analysis prompt...")
156
+ if analysis_mode == "Viral Spot for Shorts (< 3 mins)":
157
+ prompt_template = PROMPT_SHORTS_MODE
158
+ else:
159
+ prompt_template = PROMPT_NARRATIVE_MODE
160
+
161
+ # --- Step 3: Call Selected LLM Agent ---
162
+ progress(0.4, desc=f"Calling {llm_agent} for analysis... (This may take a minute)")
163
+ if llm_agent == "GPT-4o (via OneAPI)":
164
+ llm_response_str = call_gpt4o_agent(transcript_content, prompt_template)
165
+ else: # Google Gemini
166
+ llm_response_str = call_gemini_agent(transcript_content, prompt_template)
167
+
168
+ # --- Step 4: Parse Structured JSON Response ---
169
+ progress(0.7, desc="Parsing AI response...")
170
+ if llm_response_str.startswith("Error"):
171
+ return llm_response_str, None
172
+
173
+ try:
174
+ # Clean the response just in case the LLM adds markdown backticks
175
+ cleaned_response = llm_response_str.strip().replace("```json", "").replace("```", "")
176
+ parsed_response = json.loads(cleaned_response)
177
+
178
+ start_time = float(parsed_response['final_clip_start_seconds'])
179
+ end_time = float(parsed_response['final_clip_end_seconds'])
180
+ reasoning = parsed_response.get('reasoning', 'No reasoning provided.')
181
+
182
+ # Format a nice summary for the user
183
+ summary = (f"✅ Analysis Complete!\n\n"
184
+ f"Reasoning: {reasoning}\n\n"
185
+ f"Title Suggestion: {parsed_response.get('clip_title_suggestion', 'N/A')}\n"
186
+ f"Narrative Summary: {parsed_response.get('narrative_summary', 'N/A')}\n\n"
187
+ f"Clipping video from {time.strftime('%H:%M:%S', time.gmtime(start_time))} to {time.strftime('%H:%M:%S', time.gmtime(end_time))}.")
188
+
189
+ except (json.JSONDecodeError, KeyError, TypeError) as e:
190
+ error_msg = f"Error: Failed to parse the AI's response. It may not be valid JSON.\n\nError Details: {str(e)}\n\nRaw AI Response:\n---\n{llm_response_str}"
191
+ return error_msg, None
192
+
193
+ # --- Step 5: Trim Original Video using Timestamps ---
194
+ progress(0.8, desc="Clipping video... (This can be slow for large files)")
195
+
196
+ output_filename = "viral_clip.mp4"
197
+ with VideoFileClip(video_file.name) as video:
198
+ # Check if times are within video duration
199
+ if end_time > video.duration:
200
+ end_time = video.duration
201
+ summary += f"\n\n⚠️ Warning: End time was beyond video duration, adjusted to {end_time:.2f}s."
202
+
203
+ new_clip = video.subclip(start_time, end_time)
204
+ new_clip.write_videofile(output_filename, codec="libx264", audio_codec="aac")
205
+
206
+ progress(1.0, desc="Done!")
207
+ return summary, output_filename
208
+
209
+ except Exception as e:
210
+ return f"An unexpected error occurred: {str(e)}", None
211
+
212
+
213
+ # --- 4. GRADIO UI DEFINITION ---
214
+
215
+ with gr.Blocks(theme=gr.themes.Soft()) as demo:
216
+ gr.Markdown(
217
+ """
218
+ # 🎬 Multi-Agent Viral Video Extractor
219
+ This tool uses a powerful LLM agent (GPT-4o or Gemini) to analyze a video transcript and automatically clip the most viral segment.
220
+
221
+ **⚠️ Important Setup:**
222
+ 1. This Hugging Face Space must have `ONE_API_KEY`, `ONE_API_URL`, and `GEMINI_API_KEY` configured in its **Settings > Secrets**.
223
+ 2. The process involves uploading large files and intensive AI analysis, so please be patient.
224
+ """
225
+ )
226
+ with gr.Row():
227
+ with gr.Column(scale=1):
228
+ video_input = gr.Video(label="1. Upload Original Video")
229
+ srt_input = gr.File(label="2. Upload English SRT File", file_types=['.srt'])
230
+ mode_input = gr.Radio(
231
+ label="3. Select Analysis Mode",
232
+ choices=["Viral Spot for Shorts (< 3 mins)", "Viral Narrative Clip (5-12 mins)"],
233
+ value="Viral Narrative Clip (5-12 mins)"
234
+ )
235
+ agent_input = gr.Radio(
236
+ label="4. Select AI Agent",
237
+ choices=["GPT-4o (via OneAPI)", "Google Gemini"],
238
+ value="Google Gemini"
239
+ )
240
+ submit_button = gr.Button("🚀 Generate Viral Clip", variant="primary")
241
+
242
+ with gr.Column(scale=2):
243
+ summary_output = gr.Textbox(label="Analysis Summary", lines=10, interactive=False)
244
+ video_output = gr.Video(label="Generated Clip", interactive=False)
245
+
246
+ submit_button.click(
247
+ fn=generate_viral_clip,
248
+ inputs=[video_input, srt_input, mode_input, agent_input],
249
+ outputs=[summary_output, video_output],
250
+ #api_name="generate_clip" # Uncomment to create an API endpoint
251
+ )
252
+
253
+ if __name__ == "__main__":
254
+ demo.launch(debug=True)