Update app.py
Browse files
app.py
CHANGED
@@ -2,7 +2,7 @@ import gradio as gr
|
|
2 |
import asyncio
|
3 |
from pathlib import Path
|
4 |
from google import genai
|
5 |
-
from google.genai import types
|
6 |
import os
|
7 |
from dataclasses import dataclass
|
8 |
from typing import Dict
|
@@ -17,11 +17,9 @@ class ContentRequest:
|
|
17 |
prompt_key: str
|
18 |
|
19 |
class ContentGenerator:
|
20 |
-
|
21 |
-
def __init__(self, api_key=None):
|
22 |
self.current_prompts = self._load_default_prompts()
|
23 |
-
|
24 |
-
self.client = genai.Client(api_key=api_key) if api_key else None
|
25 |
|
26 |
def _load_default_prompts(self) -> Dict[str, str]:
|
27 |
"""Load default prompts and examples from files and CSVs."""
|
@@ -64,63 +62,42 @@ class ContentGenerator:
|
|
64 |
# Load base prompts and inject examples
|
65 |
prompts = {}
|
66 |
for key in ["previews", "clips", "description", "timestamps", "titles_and_thumbnails"]:
|
67 |
-
|
68 |
-
|
69 |
-
|
70 |
-
|
71 |
-
|
72 |
-
|
73 |
-
|
74 |
-
|
75 |
-
|
76 |
-
|
77 |
-
|
78 |
-
|
79 |
-
|
80 |
-
prompts[key] = prompt
|
81 |
-
except FileNotFoundError:
|
82 |
-
print(f"Warning: Prompt file prompts/{key}.txt not found. Using empty prompt.")
|
83 |
-
prompts[key] = "" # Use empty prompt if file is missing
|
84 |
-
except Exception as e:
|
85 |
-
print(f"Warning: Error loading prompt file prompts/{key}.txt: {e}")
|
86 |
-
prompts[key] = ""
|
87 |
|
88 |
return prompts
|
89 |
|
90 |
async def generate_content(self, request: ContentRequest, transcript: str) -> str:
|
91 |
"""Generate content using Gemini asynchronously."""
|
92 |
-
# Check if client is initialized
|
93 |
-
if not self.client:
|
94 |
-
return f"Error: Google AI Client not initialized. Please provide an API key."
|
95 |
try:
|
96 |
print(f"\nFull prompt for {request.prompt_key}:")
|
97 |
print("=== SYSTEM PROMPT ===")
|
98 |
-
|
99 |
-
system_prompt = self.current_prompts.get(request.prompt_key, "")
|
100 |
-
if not system_prompt:
|
101 |
-
print(f"Warning: Empty system prompt for {request.prompt_key}")
|
102 |
-
print(system_prompt)
|
103 |
print("=== END SYSTEM PROMPT ===\n")
|
104 |
|
105 |
response = self.client.models.generate_content(
|
106 |
model="gemini-2.5-pro-exp-03-25",
|
107 |
-
config=types.GenerateContentConfig(system_instruction=
|
108 |
contents=transcript
|
109 |
)
|
110 |
|
111 |
-
if response and hasattr(response, '
|
112 |
return response.text
|
113 |
else:
|
114 |
-
|
115 |
-
|
116 |
-
print(f"Unexpected Gemini response structure for {request.prompt_key}. Response: {response}")
|
117 |
-
return f"Error: Unexpected response structure for {request.prompt_key}. Details: {error_details}"
|
118 |
-
|
119 |
-
except types.PermissionDeniedError as e:
|
120 |
-
print(f"Permission Denied Error generating content for {request.prompt_key}: {e}")
|
121 |
-
return f"Error generating content: Permission Denied. Please check your Google API Key. Details: {str(e)}"
|
122 |
except Exception as e:
|
123 |
-
print(f"Error generating content for {request.prompt_key}: {e}")
|
124 |
return f"Error generating content: {str(e)}"
|
125 |
|
126 |
def extract_video_id(url: str) -> str:
|
@@ -141,8 +118,7 @@ def get_transcript(video_id: str) -> str:
|
|
141 |
|
142 |
class TranscriptProcessor:
|
143 |
def __init__(self):
|
144 |
-
|
145 |
-
self.generator = ContentGenerator(api_key=None) # No key needed at init
|
146 |
|
147 |
|
148 |
def _get_youtube_transcript(self, url: str) -> str:
|
@@ -154,91 +130,47 @@ class TranscriptProcessor:
|
|
154 |
except Exception as e:
|
155 |
raise Exception(f"Error fetching YouTube transcript: {str(e)}")
|
156 |
|
157 |
-
|
158 |
-
async def process_transcript(self, audio_file, assemblyai_api_key: str):
|
159 |
"""Process input and generate all content."""
|
160 |
-
|
161 |
-
raise ValueError("No audio file provided.")
|
162 |
-
if not assemblyai_api_key:
|
163 |
-
raise ValueError("AssemblyAI API Key is required.")
|
164 |
-
|
165 |
-
audio_path = Path(audio_file.name) # Use Path object
|
166 |
-
if not audio_path.exists():
|
167 |
-
raise FileNotFoundError(f"Audio file not found at path: {audio_path}")
|
168 |
-
|
169 |
try:
|
170 |
-
|
171 |
-
aai.settings.api_key = assemblyai_api_key
|
172 |
-
print(f"Transcribing file: {audio_path}")
|
173 |
config = aai.TranscriptionConfig(speaker_labels=True, language_code="en")
|
174 |
-
|
175 |
-
transcript_iter = transcriber.transcribe(str(audio_path), config=config) # Ensure path is string
|
176 |
-
|
177 |
-
if transcript_iter.error:
|
178 |
-
raise Exception(f"AssemblyAI Transcription Error: {transcript_iter.error}")
|
179 |
-
if not transcript_iter.text:
|
180 |
-
return "Error: Transcription resulted in empty text."
|
181 |
-
|
182 |
transcript = transcript_iter.text
|
183 |
-
print("Transcription successful.")
|
184 |
|
185 |
# Process each type sequentially
|
186 |
sections = {}
|
187 |
-
|
188 |
-
|
189 |
-
|
190 |
-
print("Starting content generation tasks...")
|
191 |
-
# Create concurrent tasks for Gemini generation
|
192 |
-
for key in keys:
|
193 |
-
tasks.append(asyncio.create_task(
|
194 |
-
self.generator.generate_content(ContentRequest(key), transcript)
|
195 |
-
))
|
196 |
-
|
197 |
-
# Wait for all tasks to complete
|
198 |
-
results = await asyncio.gather(*tasks)
|
199 |
-
print("Content generation tasks completed.")
|
200 |
-
|
201 |
-
# Assign results back to sections
|
202 |
-
for i, key in enumerate(keys):
|
203 |
-
sections[key] = results[i]
|
204 |
|
205 |
# Combine into markdown with H2 headers
|
206 |
markdown = f"""
|
207 |
## Titles and Thumbnails
|
208 |
-
|
209 |
{sections['titles_and_thumbnails']}
|
210 |
-
|
211 |
## Twitter Description
|
212 |
-
|
213 |
{sections['description']}
|
214 |
-
|
215 |
## Preview Clips
|
216 |
-
|
217 |
{sections['previews']}
|
218 |
-
|
219 |
## Twitter Clips
|
220 |
-
|
221 |
{sections['clips']}
|
222 |
-
|
223 |
## Timestamps
|
224 |
-
|
225 |
{sections['timestamps']}
|
226 |
"""
|
227 |
return markdown
|
228 |
|
229 |
except Exception as e:
|
230 |
-
# Log the full traceback for debugging
|
231 |
-
import traceback
|
232 |
-
print(f"Error during transcript processing: {traceback.format_exc()}")
|
233 |
return f"Error processing input: {str(e)}"
|
234 |
|
235 |
def update_prompts(self, *values) -> str:
|
236 |
"""Update the current session's prompts."""
|
237 |
-
|
238 |
-
|
239 |
-
|
240 |
-
|
241 |
-
return
|
|
|
242 |
|
243 |
|
244 |
def create_interface():
|
@@ -249,29 +181,12 @@ def create_interface():
|
|
249 |
gr.Markdown(
|
250 |
"""
|
251 |
# Gemini Podcast Content Generator
|
252 |
-
Generate preview clips, timestamps, descriptions and more from
|
253 |
-
|
254 |
-
**Important:** Enter your API keys below before uploading your audio file.
|
255 |
"""
|
256 |
)
|
257 |
|
258 |
-
with gr.Tab("Generate Content
|
259 |
-
# --- ADDED API KEY INPUTS ---
|
260 |
-
google_api_key_input = gr.Textbox(
|
261 |
-
label="Google API Key",
|
262 |
-
placeholder="Enter your Google AI Studio API Key here (e.g., AIza...)",
|
263 |
-
type="password",
|
264 |
-
info="Your GCP account needs to have billing enabled to use the 2.5 pro model.",
|
265 |
-
# value=os.getenv("GOOGLE_API_KEY", "") # Optionally preload from env if available
|
266 |
-
)
|
267 |
-
assemblyai_api_key_input = gr.Textbox(
|
268 |
-
label="AssemblyAI API Key",
|
269 |
-
placeholder="Enter your AssemblyAI API Key here",
|
270 |
-
type="password",
|
271 |
-
# value=os.getenv("ASSEMBLYAI_API_KEY", "") # Optionally preload from env if available
|
272 |
-
)
|
273 |
-
# --- END OF ADDED INPUTS ---
|
274 |
-
|
275 |
input_audio = gr.File(
|
276 |
label="Upload Audio File",
|
277 |
file_count="single",
|
@@ -279,103 +194,72 @@ def create_interface():
|
|
279 |
)
|
280 |
submit_btn = gr.Button("Generate Content with Gemini")
|
281 |
|
282 |
-
output = gr.Markdown(
|
283 |
|
284 |
-
|
285 |
-
async def process_wrapper(google_key, assemblyai_key, audio_file_obj):
|
286 |
print("Process wrapper started")
|
287 |
-
|
288 |
-
|
289 |
-
print(f"Received Google Key: {'*' * (len(google_key) - 4) + google_key[-4:] if len(google_key) > 4 else '****'}")
|
290 |
-
print(f"Received AssemblyAI Key: {'*' * (len(assemblyai_key) - 4) + assemblyai_key[-4:] if len(assemblyai_key) > 4 else '****'}")
|
291 |
-
print(f"Audio file object received: Name='{getattr(audio_file_obj, 'name', 'N/A')}'")
|
292 |
-
|
293 |
-
# Show processing message
|
294 |
-
yield gr.update(value="Processing... Setting up clients and starting transcription...")
|
295 |
|
296 |
try:
|
297 |
-
|
298 |
-
|
299 |
-
|
300 |
-
processor.generator.client = genai.Client(api_key=google_key)
|
301 |
-
print("Google client initialized.")
|
302 |
-
|
303 |
-
# 3. Call process_transcript, passing the AssemblyAI key and audio object
|
304 |
-
yield gr.update(value="Processing... Transcribing audio with AssemblyAI...")
|
305 |
-
result = await processor.process_transcript(audio_file_obj, assemblyai_key)
|
306 |
-
print("Process completed, returning results.")
|
307 |
-
yield gr.update(value=result) # Final update with the result
|
308 |
-
|
309 |
-
except types.PermissionDeniedError as e:
|
310 |
-
error_msg = f"# Error\n\nPermission Denied: Please check your Google API Key. Details: {str(e)}"
|
311 |
-
print(error_msg)
|
312 |
-
yield gr.update(value=error_msg)
|
313 |
except Exception as e:
|
314 |
-
|
315 |
-
|
316 |
-
print(f"Error in process_wrapper: {traceback.format_exc()}")
|
317 |
-
error_msg = f"# Error\n\nAn unexpected error occurred: {str(e)}"
|
318 |
-
yield gr.update(value=error_msg) # Update output with error
|
319 |
|
320 |
-
# Modify the submit_btn.click inputs to include the API key textboxes
|
321 |
submit_btn.click(
|
322 |
fn=process_wrapper,
|
323 |
-
|
324 |
-
inputs=[google_api_key_input, assemblyai_api_key_input, input_audio],
|
325 |
outputs=output,
|
326 |
-
|
327 |
)
|
328 |
|
329 |
with gr.Tab("Customize Prompts"):
|
330 |
gr.Markdown(
|
331 |
"""
|
332 |
-
## Customize Generation Prompts
|
333 |
Here you can experiment with different prompts during your session.
|
334 |
Changes will remain active until you reload the page.
|
335 |
-
|
336 |
Tip: Copy your preferred prompts somewhere safe if you want to reuse them later!
|
337 |
"""
|
338 |
)
|
339 |
|
340 |
-
# Use the keys defined earlier for consistency
|
341 |
-
prompt_keys = ["previews", "clips", "description", "timestamps", "titles_and_thumbnails"]
|
342 |
prompt_inputs = [
|
343 |
gr.Textbox(
|
344 |
label=f"{key.replace('_', ' ').title()} Prompt",
|
345 |
lines=10,
|
346 |
-
value=processor.generator.current_prompts
|
347 |
)
|
348 |
-
for key in
|
|
|
|
|
|
|
|
|
|
|
|
|
349 |
]
|
350 |
status = gr.Textbox(label="Status", interactive=False)
|
351 |
|
352 |
-
#
|
353 |
-
|
354 |
-
|
355 |
-
|
356 |
-
|
357 |
-
|
358 |
-
|
359 |
-
# --- End Simplified Update Logic ---
|
360 |
-
|
361 |
-
|
362 |
-
# Reset button - fetches defaults again
|
363 |
-
reset_btn = gr.Button("Reset to Default Gemini Prompts")
|
364 |
-
# Define a helper function for reset to avoid complex lambda
|
365 |
-
def reset_prompts_ui():
|
366 |
-
default_prompts_dict = processor.generator._load_default_prompts()
|
367 |
-
processor.generator.current_prompts = default_prompts_dict # Update internal state
|
368 |
-
# Return values in the correct order for outputs
|
369 |
-
return [ "Prompts reset to defaults!" ] + [ default_prompts_dict.get(key, "") for key in prompt_keys ]
|
370 |
|
|
|
|
|
371 |
reset_btn.click(
|
372 |
-
fn=
|
373 |
-
|
374 |
-
|
|
|
|
|
375 |
)
|
376 |
|
377 |
return app
|
378 |
|
379 |
if __name__ == "__main__":
|
380 |
-
|
381 |
-
app.launch()
|
|
|
2 |
import asyncio
|
3 |
from pathlib import Path
|
4 |
from google import genai
|
5 |
+
from google.genai import types
|
6 |
import os
|
7 |
from dataclasses import dataclass
|
8 |
from typing import Dict
|
|
|
17 |
prompt_key: str
|
18 |
|
19 |
class ContentGenerator:
|
20 |
+
def __init__(self,api_key):
|
|
|
21 |
self.current_prompts = self._load_default_prompts()
|
22 |
+
self.client = genai.Client(api_key=api_key)
|
|
|
23 |
|
24 |
def _load_default_prompts(self) -> Dict[str, str]:
|
25 |
"""Load default prompts and examples from files and CSVs."""
|
|
|
62 |
# Load base prompts and inject examples
|
63 |
prompts = {}
|
64 |
for key in ["previews", "clips", "description", "timestamps", "titles_and_thumbnails"]:
|
65 |
+
prompt = Path(f"prompts/{key}.txt").read_text()
|
66 |
+
|
67 |
+
# Inject relevant examples
|
68 |
+
if key == "timestamps":
|
69 |
+
prompt = prompt.replace("{timestamps_examples}", timestamp_examples)
|
70 |
+
elif key == "titles_and_thumbnails":
|
71 |
+
prompt = prompt.replace("{title_examples}", title_examples)
|
72 |
+
elif key == "description":
|
73 |
+
prompt = prompt.replace("{description_examples}", description_examples)
|
74 |
+
elif key == "clips":
|
75 |
+
prompt = prompt.replace("{clip_examples}", clip_examples)
|
76 |
+
|
77 |
+
prompts[key] = prompt
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
78 |
|
79 |
return prompts
|
80 |
|
81 |
async def generate_content(self, request: ContentRequest, transcript: str) -> str:
|
82 |
"""Generate content using Gemini asynchronously."""
|
|
|
|
|
|
|
83 |
try:
|
84 |
print(f"\nFull prompt for {request.prompt_key}:")
|
85 |
print("=== SYSTEM PROMPT ===")
|
86 |
+
print(self.current_prompts[request.prompt_key])
|
|
|
|
|
|
|
|
|
87 |
print("=== END SYSTEM PROMPT ===\n")
|
88 |
|
89 |
response = self.client.models.generate_content(
|
90 |
model="gemini-2.5-pro-exp-03-25",
|
91 |
+
config=types.GenerateContentConfig(system_instruction=self.current_prompts[request.prompt_key]),
|
92 |
contents=transcript
|
93 |
)
|
94 |
|
95 |
+
if response and hasattr(response, 'candidates'):
|
96 |
return response.text
|
97 |
else:
|
98 |
+
return f"Error: Unexpected response structure for {request.prompt_key}"
|
99 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
100 |
except Exception as e:
|
|
|
101 |
return f"Error generating content: {str(e)}"
|
102 |
|
103 |
def extract_video_id(url: str) -> str:
|
|
|
118 |
|
119 |
class TranscriptProcessor:
|
120 |
def __init__(self):
|
121 |
+
self.generator = ContentGenerator(api_key=os.getenv("GOOGLE_API_KEY"))
|
|
|
122 |
|
123 |
|
124 |
def _get_youtube_transcript(self, url: str) -> str:
|
|
|
130 |
except Exception as e:
|
131 |
raise Exception(f"Error fetching YouTube transcript: {str(e)}")
|
132 |
|
133 |
+
async def process_transcript(self, audio_file):
|
|
|
134 |
"""Process input and generate all content."""
|
135 |
+
audio_path = audio_file.name
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
136 |
try:
|
137 |
+
aai.settings.api_key = os.getenv("ASSEMBLYAI_API_KEY")
|
|
|
|
|
138 |
config = aai.TranscriptionConfig(speaker_labels=True, language_code="en")
|
139 |
+
transcript_iter = aai.Transcriber().transcribe(str(audio_path), config=config)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
140 |
transcript = transcript_iter.text
|
|
|
141 |
|
142 |
# Process each type sequentially
|
143 |
sections = {}
|
144 |
+
for key in ["titles_and_thumbnails", "description", "previews", "clips", "timestamps"]:
|
145 |
+
result = await self.generator.generate_content(ContentRequest(key), transcript)
|
146 |
+
sections[key] = result
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
147 |
|
148 |
# Combine into markdown with H2 headers
|
149 |
markdown = f"""
|
150 |
## Titles and Thumbnails
|
|
|
151 |
{sections['titles_and_thumbnails']}
|
|
|
152 |
## Twitter Description
|
|
|
153 |
{sections['description']}
|
|
|
154 |
## Preview Clips
|
|
|
155 |
{sections['previews']}
|
|
|
156 |
## Twitter Clips
|
|
|
157 |
{sections['clips']}
|
|
|
158 |
## Timestamps
|
|
|
159 |
{sections['timestamps']}
|
160 |
"""
|
161 |
return markdown
|
162 |
|
163 |
except Exception as e:
|
|
|
|
|
|
|
164 |
return f"Error processing input: {str(e)}"
|
165 |
|
166 |
def update_prompts(self, *values) -> str:
|
167 |
"""Update the current session's prompts."""
|
168 |
+
self.generator.current_prompts.update(zip(
|
169 |
+
["previews", "clips", "description", "timestamps", "titles_and_thumbnails"],
|
170 |
+
values
|
171 |
+
))
|
172 |
+
return "Prompts updated for this session!"
|
173 |
+
|
174 |
|
175 |
|
176 |
def create_interface():
|
|
|
181 |
gr.Markdown(
|
182 |
"""
|
183 |
# Gemini Podcast Content Generator
|
184 |
+
Generate preview clips, timestamps, descriptions and more from an audio file using Gemini.
|
185 |
+
Simply upload an audio file to get started and Gemini handles the rest.
|
|
|
186 |
"""
|
187 |
)
|
188 |
|
189 |
+
with gr.Tab("Generate Content"):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
190 |
input_audio = gr.File(
|
191 |
label="Upload Audio File",
|
192 |
file_count="single",
|
|
|
194 |
)
|
195 |
submit_btn = gr.Button("Generate Content with Gemini")
|
196 |
|
197 |
+
output = gr.Markdown() # Single markdown output
|
198 |
|
199 |
+
async def process_wrapper(text):
|
|
|
200 |
print("Process wrapper started")
|
201 |
+
print(f"Input text: {text[:100]}...")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
202 |
|
203 |
try:
|
204 |
+
result = await processor.process_transcript(text)
|
205 |
+
print("Process completed, got results")
|
206 |
+
return result
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
207 |
except Exception as e:
|
208 |
+
print(f"Error in process_wrapper: {str(e)}")
|
209 |
+
return f"# Error\n\n{str(e)}"
|
|
|
|
|
|
|
210 |
|
|
|
211 |
submit_btn.click(
|
212 |
fn=process_wrapper,
|
213 |
+
inputs=input_audio,
|
|
|
214 |
outputs=output,
|
215 |
+
queue=True
|
216 |
)
|
217 |
|
218 |
with gr.Tab("Customize Prompts"):
|
219 |
gr.Markdown(
|
220 |
"""
|
221 |
+
## Customize Generation Prompts
|
222 |
Here you can experiment with different prompts during your session.
|
223 |
Changes will remain active until you reload the page.
|
|
|
224 |
Tip: Copy your preferred prompts somewhere safe if you want to reuse them later!
|
225 |
"""
|
226 |
)
|
227 |
|
|
|
|
|
228 |
prompt_inputs = [
|
229 |
gr.Textbox(
|
230 |
label=f"{key.replace('_', ' ').title()} Prompt",
|
231 |
lines=10,
|
232 |
+
value=processor.generator.current_prompts[key]
|
233 |
)
|
234 |
+
for key in [
|
235 |
+
"previews",
|
236 |
+
"clips",
|
237 |
+
"description",
|
238 |
+
"timestamps",
|
239 |
+
"titles_and_thumbnails"
|
240 |
+
]
|
241 |
]
|
242 |
status = gr.Textbox(label="Status", interactive=False)
|
243 |
|
244 |
+
# Update prompts when they change
|
245 |
+
for prompt in prompt_inputs:
|
246 |
+
prompt.change(
|
247 |
+
fn=processor.update_prompts,
|
248 |
+
inputs=prompt_inputs,
|
249 |
+
outputs=[status]
|
250 |
+
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
251 |
|
252 |
+
# Reset button
|
253 |
+
reset_btn = gr.Button("Reset to Default Prompts")
|
254 |
reset_btn.click(
|
255 |
+
fn=lambda: (
|
256 |
+
processor.update_prompts(*processor.generator.current_prompts.values()),
|
257 |
+
*processor.generator.current_prompts.values(),
|
258 |
+
),
|
259 |
+
outputs=[status] + prompt_inputs,
|
260 |
)
|
261 |
|
262 |
return app
|
263 |
|
264 |
if __name__ == "__main__":
|
265 |
+
create_interface().launch()
|
|