dwarkesh commited on
Commit
6389c61
·
verified ·
1 Parent(s): 17841e1

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +72 -188
app.py CHANGED
@@ -2,7 +2,7 @@ import gradio as gr
2
  import asyncio
3
  from pathlib import Path
4
  from google import genai
5
- from google.genai import types # Import types for error handling
6
  import os
7
  from dataclasses import dataclass
8
  from typing import Dict
@@ -17,11 +17,9 @@ class ContentRequest:
17
  prompt_key: str
18
 
19
  class ContentGenerator:
20
- # Modified __init__ slightly - allow api_key=None initially
21
- def __init__(self, api_key=None):
22
  self.current_prompts = self._load_default_prompts()
23
- # Initialize client only if key is provided, otherwise set to None
24
- self.client = genai.Client(api_key=api_key) if api_key else None
25
 
26
  def _load_default_prompts(self) -> Dict[str, str]:
27
  """Load default prompts and examples from files and CSVs."""
@@ -64,63 +62,42 @@ class ContentGenerator:
64
  # Load base prompts and inject examples
65
  prompts = {}
66
  for key in ["previews", "clips", "description", "timestamps", "titles_and_thumbnails"]:
67
- try: # Add try-except for file reading
68
- prompt = Path(f"prompts/{key}.txt").read_text()
69
-
70
- # Inject relevant examples
71
- if key == "timestamps":
72
- prompt = prompt.replace("{timestamps_examples}", timestamp_examples)
73
- elif key == "titles_and_thumbnails":
74
- prompt = prompt.replace("{title_examples}", title_examples)
75
- elif key == "description":
76
- prompt = prompt.replace("{description_examples}", description_examples)
77
- elif key == "clips":
78
- prompt = prompt.replace("{clip_examples}", clip_examples)
79
-
80
- prompts[key] = prompt
81
- except FileNotFoundError:
82
- print(f"Warning: Prompt file prompts/{key}.txt not found. Using empty prompt.")
83
- prompts[key] = "" # Use empty prompt if file is missing
84
- except Exception as e:
85
- print(f"Warning: Error loading prompt file prompts/{key}.txt: {e}")
86
- prompts[key] = ""
87
 
88
  return prompts
89
 
90
  async def generate_content(self, request: ContentRequest, transcript: str) -> str:
91
  """Generate content using Gemini asynchronously."""
92
- # Check if client is initialized
93
- if not self.client:
94
- return f"Error: Google AI Client not initialized. Please provide an API key."
95
  try:
96
  print(f"\nFull prompt for {request.prompt_key}:")
97
  print("=== SYSTEM PROMPT ===")
98
- # Ensure prompt exists
99
- system_prompt = self.current_prompts.get(request.prompt_key, "")
100
- if not system_prompt:
101
- print(f"Warning: Empty system prompt for {request.prompt_key}")
102
- print(system_prompt)
103
  print("=== END SYSTEM PROMPT ===\n")
104
 
105
  response = self.client.models.generate_content(
106
  model="gemini-2.5-pro-exp-03-25",
107
- config=types.GenerateContentConfig(system_instruction=system_prompt),
108
  contents=transcript
109
  )
110
 
111
- if response and hasattr(response, 'text'): # Simpler check for Gemini API response
112
  return response.text
113
  else:
114
- # Try to get more details if possible
115
- error_details = getattr(response, 'prompt_feedback', 'Unknown reason')
116
- print(f"Unexpected Gemini response structure for {request.prompt_key}. Response: {response}")
117
- return f"Error: Unexpected response structure for {request.prompt_key}. Details: {error_details}"
118
-
119
- except types.PermissionDeniedError as e:
120
- print(f"Permission Denied Error generating content for {request.prompt_key}: {e}")
121
- return f"Error generating content: Permission Denied. Please check your Google API Key. Details: {str(e)}"
122
  except Exception as e:
123
- print(f"Error generating content for {request.prompt_key}: {e}")
124
  return f"Error generating content: {str(e)}"
125
 
126
  def extract_video_id(url: str) -> str:
@@ -141,8 +118,7 @@ def get_transcript(video_id: str) -> str:
141
 
142
  class TranscriptProcessor:
143
  def __init__(self):
144
- # Initialize generator without API key initially
145
- self.generator = ContentGenerator(api_key=None) # No key needed at init
146
 
147
 
148
  def _get_youtube_transcript(self, url: str) -> str:
@@ -154,91 +130,47 @@ class TranscriptProcessor:
154
  except Exception as e:
155
  raise Exception(f"Error fetching YouTube transcript: {str(e)}")
156
 
157
- # Modify process_transcript to accept the AssemblyAI key
158
- async def process_transcript(self, audio_file, assemblyai_api_key: str):
159
  """Process input and generate all content."""
160
- if not audio_file:
161
- raise ValueError("No audio file provided.")
162
- if not assemblyai_api_key:
163
- raise ValueError("AssemblyAI API Key is required.")
164
-
165
- audio_path = Path(audio_file.name) # Use Path object
166
- if not audio_path.exists():
167
- raise FileNotFoundError(f"Audio file not found at path: {audio_path}")
168
-
169
  try:
170
- # Set AssemblyAI key just before use
171
- aai.settings.api_key = assemblyai_api_key
172
- print(f"Transcribing file: {audio_path}")
173
  config = aai.TranscriptionConfig(speaker_labels=True, language_code="en")
174
- transcriber = aai.Transcriber()
175
- transcript_iter = transcriber.transcribe(str(audio_path), config=config) # Ensure path is string
176
-
177
- if transcript_iter.error:
178
- raise Exception(f"AssemblyAI Transcription Error: {transcript_iter.error}")
179
- if not transcript_iter.text:
180
- return "Error: Transcription resulted in empty text."
181
-
182
  transcript = transcript_iter.text
183
- print("Transcription successful.")
184
 
185
  # Process each type sequentially
186
  sections = {}
187
- tasks = []
188
- keys = ["titles_and_thumbnails", "description", "previews", "clips", "timestamps"]
189
-
190
- print("Starting content generation tasks...")
191
- # Create concurrent tasks for Gemini generation
192
- for key in keys:
193
- tasks.append(asyncio.create_task(
194
- self.generator.generate_content(ContentRequest(key), transcript)
195
- ))
196
-
197
- # Wait for all tasks to complete
198
- results = await asyncio.gather(*tasks)
199
- print("Content generation tasks completed.")
200
-
201
- # Assign results back to sections
202
- for i, key in enumerate(keys):
203
- sections[key] = results[i]
204
 
205
  # Combine into markdown with H2 headers
206
  markdown = f"""
207
  ## Titles and Thumbnails
208
-
209
  {sections['titles_and_thumbnails']}
210
-
211
  ## Twitter Description
212
-
213
  {sections['description']}
214
-
215
  ## Preview Clips
216
-
217
  {sections['previews']}
218
-
219
  ## Twitter Clips
220
-
221
  {sections['clips']}
222
-
223
  ## Timestamps
224
-
225
  {sections['timestamps']}
226
  """
227
  return markdown
228
 
229
  except Exception as e:
230
- # Log the full traceback for debugging
231
- import traceback
232
- print(f"Error during transcript processing: {traceback.format_exc()}")
233
  return f"Error processing input: {str(e)}"
234
 
235
  def update_prompts(self, *values) -> str:
236
  """Update the current session's prompts."""
237
- keys = ["previews", "clips", "description", "timestamps", "titles_and_thumbnails"]
238
- self.generator.current_prompts.update(zip(keys, values))
239
- # Check if all keys were updated correctly
240
- updated_keys_str = ", ".join(k for k, v in zip(keys, values) if v is not None)
241
- return f"Prompts updated for this session: {updated_keys_str}"
 
242
 
243
 
244
  def create_interface():
@@ -249,29 +181,12 @@ def create_interface():
249
  gr.Markdown(
250
  """
251
  # Gemini Podcast Content Generator
252
- Generate preview clips, timestamps, descriptions and more from podcast transcripts using Gemini.
253
-
254
- **Important:** Enter your API keys below before uploading your audio file.
255
  """
256
  )
257
 
258
- with gr.Tab("Generate Content with Gemini"):
259
- # --- ADDED API KEY INPUTS ---
260
- google_api_key_input = gr.Textbox(
261
- label="Google API Key",
262
- placeholder="Enter your Google AI Studio API Key here (e.g., AIza...)",
263
- type="password",
264
- info="Your GCP account needs to have billing enabled to use the 2.5 pro model.",
265
- # value=os.getenv("GOOGLE_API_KEY", "") # Optionally preload from env if available
266
- )
267
- assemblyai_api_key_input = gr.Textbox(
268
- label="AssemblyAI API Key",
269
- placeholder="Enter your AssemblyAI API Key here",
270
- type="password",
271
- # value=os.getenv("ASSEMBLYAI_API_KEY", "") # Optionally preload from env if available
272
- )
273
- # --- END OF ADDED INPUTS ---
274
-
275
  input_audio = gr.File(
276
  label="Upload Audio File",
277
  file_count="single",
@@ -279,103 +194,72 @@ def create_interface():
279
  )
280
  submit_btn = gr.Button("Generate Content with Gemini")
281
 
282
- output = gr.Markdown(label="Generated Content") # Added label
283
 
284
- # Modify the wrapper function signature to accept API keys
285
- async def process_wrapper(google_key, assemblyai_key, audio_file_obj):
286
  print("Process wrapper started")
287
- # 1. Validate inputs
288
-
289
- print(f"Received Google Key: {'*' * (len(google_key) - 4) + google_key[-4:] if len(google_key) > 4 else '****'}")
290
- print(f"Received AssemblyAI Key: {'*' * (len(assemblyai_key) - 4) + assemblyai_key[-4:] if len(assemblyai_key) > 4 else '****'}")
291
- print(f"Audio file object received: Name='{getattr(audio_file_obj, 'name', 'N/A')}'")
292
-
293
- # Show processing message
294
- yield gr.update(value="Processing... Setting up clients and starting transcription...")
295
 
296
  try:
297
- # 2. Re-initialize/Update Google client with the provided key *before* processing
298
- # This assumes processor.generator exists and is the correct instance
299
- print("Initializing Google Client...")
300
- processor.generator.client = genai.Client(api_key=google_key)
301
- print("Google client initialized.")
302
-
303
- # 3. Call process_transcript, passing the AssemblyAI key and audio object
304
- yield gr.update(value="Processing... Transcribing audio with AssemblyAI...")
305
- result = await processor.process_transcript(audio_file_obj, assemblyai_key)
306
- print("Process completed, returning results.")
307
- yield gr.update(value=result) # Final update with the result
308
-
309
- except types.PermissionDeniedError as e:
310
- error_msg = f"# Error\n\nPermission Denied: Please check your Google API Key. Details: {str(e)}"
311
- print(error_msg)
312
- yield gr.update(value=error_msg)
313
  except Exception as e:
314
- # Log the full traceback for debugging
315
- import traceback
316
- print(f"Error in process_wrapper: {traceback.format_exc()}")
317
- error_msg = f"# Error\n\nAn unexpected error occurred: {str(e)}"
318
- yield gr.update(value=error_msg) # Update output with error
319
 
320
- # Modify the submit_btn.click inputs to include the API key textboxes
321
  submit_btn.click(
322
  fn=process_wrapper,
323
- # Order matters: matches the function signature (google_key, assemblyai_key, audio_file_obj)
324
- inputs=[google_api_key_input, assemblyai_api_key_input, input_audio],
325
  outputs=output,
326
- # Removed queue=True as yield requires it to be False or None (default)
327
  )
328
 
329
  with gr.Tab("Customize Prompts"):
330
  gr.Markdown(
331
  """
332
- ## Customize Generation Prompts for Gemini
333
  Here you can experiment with different prompts during your session.
334
  Changes will remain active until you reload the page.
335
-
336
  Tip: Copy your preferred prompts somewhere safe if you want to reuse them later!
337
  """
338
  )
339
 
340
- # Use the keys defined earlier for consistency
341
- prompt_keys = ["previews", "clips", "description", "timestamps", "titles_and_thumbnails"]
342
  prompt_inputs = [
343
  gr.Textbox(
344
  label=f"{key.replace('_', ' ').title()} Prompt",
345
  lines=10,
346
- value=processor.generator.current_prompts.get(key, "") # Use .get for safety
347
  )
348
- for key in prompt_keys
 
 
 
 
 
 
349
  ]
350
  status = gr.Textbox(label="Status", interactive=False)
351
 
352
- # --- Simplified Update Logic ---
353
- update_btn = gr.Button("Update Session Prompts")
354
- update_btn.click(
355
- fn=processor.update_prompts,
356
- inputs=prompt_inputs,
357
- outputs=[status]
358
- )
359
- # --- End Simplified Update Logic ---
360
-
361
-
362
- # Reset button - fetches defaults again
363
- reset_btn = gr.Button("Reset to Default Gemini Prompts")
364
- # Define a helper function for reset to avoid complex lambda
365
- def reset_prompts_ui():
366
- default_prompts_dict = processor.generator._load_default_prompts()
367
- processor.generator.current_prompts = default_prompts_dict # Update internal state
368
- # Return values in the correct order for outputs
369
- return [ "Prompts reset to defaults!" ] + [ default_prompts_dict.get(key, "") for key in prompt_keys ]
370
 
 
 
371
  reset_btn.click(
372
- fn=reset_prompts_ui,
373
- inputs=None, # No inputs needed
374
- outputs=[status] + prompt_inputs # Update status and all textboxes
 
 
375
  )
376
 
377
  return app
378
 
379
  if __name__ == "__main__":
380
- app = create_interface()
381
- app.launch()
 
2
  import asyncio
3
  from pathlib import Path
4
  from google import genai
5
+ from google.genai import types
6
  import os
7
  from dataclasses import dataclass
8
  from typing import Dict
 
17
  prompt_key: str
18
 
19
  class ContentGenerator:
20
+ def __init__(self,api_key):
 
21
  self.current_prompts = self._load_default_prompts()
22
+ self.client = genai.Client(api_key=api_key)
 
23
 
24
  def _load_default_prompts(self) -> Dict[str, str]:
25
  """Load default prompts and examples from files and CSVs."""
 
62
  # Load base prompts and inject examples
63
  prompts = {}
64
  for key in ["previews", "clips", "description", "timestamps", "titles_and_thumbnails"]:
65
+ prompt = Path(f"prompts/{key}.txt").read_text()
66
+
67
+ # Inject relevant examples
68
+ if key == "timestamps":
69
+ prompt = prompt.replace("{timestamps_examples}", timestamp_examples)
70
+ elif key == "titles_and_thumbnails":
71
+ prompt = prompt.replace("{title_examples}", title_examples)
72
+ elif key == "description":
73
+ prompt = prompt.replace("{description_examples}", description_examples)
74
+ elif key == "clips":
75
+ prompt = prompt.replace("{clip_examples}", clip_examples)
76
+
77
+ prompts[key] = prompt
 
 
 
 
 
 
 
78
 
79
  return prompts
80
 
81
  async def generate_content(self, request: ContentRequest, transcript: str) -> str:
82
  """Generate content using Gemini asynchronously."""
 
 
 
83
  try:
84
  print(f"\nFull prompt for {request.prompt_key}:")
85
  print("=== SYSTEM PROMPT ===")
86
+ print(self.current_prompts[request.prompt_key])
 
 
 
 
87
  print("=== END SYSTEM PROMPT ===\n")
88
 
89
  response = self.client.models.generate_content(
90
  model="gemini-2.5-pro-exp-03-25",
91
+ config=types.GenerateContentConfig(system_instruction=self.current_prompts[request.prompt_key]),
92
  contents=transcript
93
  )
94
 
95
+ if response and hasattr(response, 'candidates'):
96
  return response.text
97
  else:
98
+ return f"Error: Unexpected response structure for {request.prompt_key}"
99
+
 
 
 
 
 
 
100
  except Exception as e:
 
101
  return f"Error generating content: {str(e)}"
102
 
103
  def extract_video_id(url: str) -> str:
 
118
 
119
  class TranscriptProcessor:
120
  def __init__(self):
121
+ self.generator = ContentGenerator(api_key=os.getenv("GOOGLE_API_KEY"))
 
122
 
123
 
124
  def _get_youtube_transcript(self, url: str) -> str:
 
130
  except Exception as e:
131
  raise Exception(f"Error fetching YouTube transcript: {str(e)}")
132
 
133
+ async def process_transcript(self, audio_file):
 
134
  """Process input and generate all content."""
135
+ audio_path = audio_file.name
 
 
 
 
 
 
 
 
136
  try:
137
+ aai.settings.api_key = os.getenv("ASSEMBLYAI_API_KEY")
 
 
138
  config = aai.TranscriptionConfig(speaker_labels=True, language_code="en")
139
+ transcript_iter = aai.Transcriber().transcribe(str(audio_path), config=config)
 
 
 
 
 
 
 
140
  transcript = transcript_iter.text
 
141
 
142
  # Process each type sequentially
143
  sections = {}
144
+ for key in ["titles_and_thumbnails", "description", "previews", "clips", "timestamps"]:
145
+ result = await self.generator.generate_content(ContentRequest(key), transcript)
146
+ sections[key] = result
 
 
 
 
 
 
 
 
 
 
 
 
 
 
147
 
148
  # Combine into markdown with H2 headers
149
  markdown = f"""
150
  ## Titles and Thumbnails
 
151
  {sections['titles_and_thumbnails']}
 
152
  ## Twitter Description
 
153
  {sections['description']}
 
154
  ## Preview Clips
 
155
  {sections['previews']}
 
156
  ## Twitter Clips
 
157
  {sections['clips']}
 
158
  ## Timestamps
 
159
  {sections['timestamps']}
160
  """
161
  return markdown
162
 
163
  except Exception as e:
 
 
 
164
  return f"Error processing input: {str(e)}"
165
 
166
  def update_prompts(self, *values) -> str:
167
  """Update the current session's prompts."""
168
+ self.generator.current_prompts.update(zip(
169
+ ["previews", "clips", "description", "timestamps", "titles_and_thumbnails"],
170
+ values
171
+ ))
172
+ return "Prompts updated for this session!"
173
+
174
 
175
 
176
  def create_interface():
 
181
  gr.Markdown(
182
  """
183
  # Gemini Podcast Content Generator
184
+ Generate preview clips, timestamps, descriptions and more from an audio file using Gemini.
185
+ Simply upload an audio file to get started and Gemini handles the rest.
 
186
  """
187
  )
188
 
189
+ with gr.Tab("Generate Content"):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
190
  input_audio = gr.File(
191
  label="Upload Audio File",
192
  file_count="single",
 
194
  )
195
  submit_btn = gr.Button("Generate Content with Gemini")
196
 
197
+ output = gr.Markdown() # Single markdown output
198
 
199
+ async def process_wrapper(text):
 
200
  print("Process wrapper started")
201
+ print(f"Input text: {text[:100]}...")
 
 
 
 
 
 
 
202
 
203
  try:
204
+ result = await processor.process_transcript(text)
205
+ print("Process completed, got results")
206
+ return result
 
 
 
 
 
 
 
 
 
 
 
 
 
207
  except Exception as e:
208
+ print(f"Error in process_wrapper: {str(e)}")
209
+ return f"# Error\n\n{str(e)}"
 
 
 
210
 
 
211
  submit_btn.click(
212
  fn=process_wrapper,
213
+ inputs=input_audio,
 
214
  outputs=output,
215
+ queue=True
216
  )
217
 
218
  with gr.Tab("Customize Prompts"):
219
  gr.Markdown(
220
  """
221
+ ## Customize Generation Prompts
222
  Here you can experiment with different prompts during your session.
223
  Changes will remain active until you reload the page.
 
224
  Tip: Copy your preferred prompts somewhere safe if you want to reuse them later!
225
  """
226
  )
227
 
 
 
228
  prompt_inputs = [
229
  gr.Textbox(
230
  label=f"{key.replace('_', ' ').title()} Prompt",
231
  lines=10,
232
+ value=processor.generator.current_prompts[key]
233
  )
234
+ for key in [
235
+ "previews",
236
+ "clips",
237
+ "description",
238
+ "timestamps",
239
+ "titles_and_thumbnails"
240
+ ]
241
  ]
242
  status = gr.Textbox(label="Status", interactive=False)
243
 
244
+ # Update prompts when they change
245
+ for prompt in prompt_inputs:
246
+ prompt.change(
247
+ fn=processor.update_prompts,
248
+ inputs=prompt_inputs,
249
+ outputs=[status]
250
+ )
 
 
 
 
 
 
 
 
 
 
 
251
 
252
+ # Reset button
253
+ reset_btn = gr.Button("Reset to Default Prompts")
254
  reset_btn.click(
255
+ fn=lambda: (
256
+ processor.update_prompts(*processor.generator.current_prompts.values()),
257
+ *processor.generator.current_prompts.values(),
258
+ ),
259
+ outputs=[status] + prompt_inputs,
260
  )
261
 
262
  return app
263
 
264
  if __name__ == "__main__":
265
+ create_interface().launch()