Brunwo commited on
Commit
276796f
·
1 Parent(s): 001156c
Files changed (5) hide show
  1. .gitignore +2 -0
  2. app.py +149 -131
  3. packages.txt +1 -0
  4. testGradioAPI.py +16 -0
  5. test_load_messages.py +32 -0
.gitignore CHANGED
@@ -5,3 +5,5 @@ pwa-report.json
5
  server.pem
6
  flagged/log.csv
7
  .aider*
 
 
 
5
  server.pem
6
  flagged/log.csv
7
  .aider*
8
+ locales/fr/LC_MESSAGES/messages.mo
9
+ locales/en/LC_MESSAGES/messages.mo
app.py CHANGED
@@ -15,6 +15,7 @@ from pydantic import BaseModel, ValidationError
15
  from pypdf import PdfReader
16
  from tenacity import retry, retry_if_exception_type
17
 
 
18
  import re
19
  import requests
20
  from dotenv import load_dotenv
@@ -25,15 +26,22 @@ import gettext
25
 
26
  from gradio.themes.utils.theme_dropdown import create_theme_dropdown
27
 
28
- #dropdown, js = create_theme_dropdown()
29
 
30
  # Setup gettext
31
  def setup_translation(lang_code):
32
- # The translation domain (like an app-specific identifier)
 
33
  locale_path = os.path.join(os.path.dirname(__file__), 'locales')
34
- translation = gettext.translation('messages', localedir=locale_path, languages=[lang_code])
35
- translation.install()
36
- return translation.gettext # Return the translation function '_'
 
 
 
 
 
 
 
37
 
38
 
39
  def read_readme():
@@ -130,14 +138,7 @@ def update_instructions_language(lang):
130
  }
131
 
132
  print(INSTRUCTION_TEMPLATES["podcast"]["intro"])
133
-
134
- return (
135
- INSTRUCTION_TEMPLATES["podcast"]["intro"],
136
- INSTRUCTION_TEMPLATES["podcast"]["text_instructions"],
137
- INSTRUCTION_TEMPLATES["podcast"]["scratch_pad"],
138
- INSTRUCTION_TEMPLATES["podcast"]["prelude"],
139
- INSTRUCTION_TEMPLATES["podcast"]["dialog"]
140
- )
141
 
142
 
143
  # Function to update instruction fields based on template selection
@@ -250,140 +251,157 @@ def get_text_from_url(url: str) -> str:
250
  def generate_audio(
251
  url: str,
252
  openai_api_key: str = None,
253
- text_model: str = "o1-preview-2024-09-12",
254
  audio_model: str = "tts-1",
255
  speaker_1_voice: str = "alloy",
256
  speaker_2_voice: str = "echo",
257
  api_base: str = None,
258
- intro_instructions: str = '',
259
- text_instructions: str = '',
260
- scratch_pad_instructions: str = '',
261
- prelude_dialog: str = '',
262
- podcast_dialog_instructions: str = '',
263
  edited_transcript: str = None,
264
  user_feedback: str = None,
265
  original_text: str = None,
266
  debug = False,
267
  ) -> tuple:
268
- # Validate API Key
269
- if not os.getenv("OPENAI_API_KEY") and not openai_api_key:
270
- raise gr.Error("OpenAI API key is required")
271
-
272
- combined_text = original_text or ""
273
-
274
- # If there's no original text, fetch it from the provided URL
275
- if not combined_text:
276
- combined_text = get_text_from_url(url)
277
-
278
- # Configure the LLM based on selected model and api_base
279
- @retry(retry=retry_if_exception_type(ValidationError))
280
- @conditional_llm(model=text_model, api_base=api_base, api_key=openai_api_key)
281
- def generate_dialogue(text: str, intro_instructions: str, text_instructions: str, scratch_pad_instructions: str,
282
- prelude_dialog: str, podcast_dialog_instructions: str,
283
- edited_transcript: str = None, user_feedback: str = None, ) -> Dialogue:
284
- """
285
- {intro_instructions}
286
-
287
- Here is the original input text:
288
-
289
- <input_text>
290
- {text}
291
- </input_text>
292
 
293
- {text_instructions}
294
-
295
- <scratchpad>
296
- {scratch_pad_instructions}
297
- </scratchpad>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
298
 
299
- {prelude_dialog}
 
 
300
 
301
- <podcast_dialogue>
302
- {podcast_dialog_instructions}
303
- </podcast_dialogue>
304
- {edited_transcript}{user_feedback}
305
- """
306
-
307
- instruction_improve='Based on the original text, please generate an improved version of the dialogue by incorporating the edits, comments and feedback.'
308
- edited_transcript_processed="\nPreviously generated edited transcript, with specific edits and comments that I want you to carefully address:\n"+"<edited_transcript>\n"+edited_transcript+"</edited_transcript>" if edited_transcript !="" else ""
309
- user_feedback_processed="\nOverall user feedback:\n\n"+user_feedback if user_feedback !="" else ""
310
-
311
- if edited_transcript_processed.strip()!='' or user_feedback_processed.strip()!='':
312
- user_feedback_processed="<requested_improvements>"+user_feedback_processed+"\n\n"+instruction_improve+"</requested_improvements>"
313
-
314
- if debug:
315
- logger.info (edited_transcript_processed)
316
- logger.info (user_feedback_processed)
317
-
318
- # Generate the dialogue using the LLM
319
- llm_output = generate_dialogue(
320
- combined_text,
321
- intro_instructions=intro_instructions,
322
- text_instructions=text_instructions,
323
- scratch_pad_instructions=scratch_pad_instructions,
324
- prelude_dialog=prelude_dialog,
325
- podcast_dialog_instructions=podcast_dialog_instructions,
326
- edited_transcript=edited_transcript_processed,
327
- user_feedback=user_feedback_processed
328
- )
329
 
330
- # Generate audio from the transcript
331
- audio = b""
332
- transcript = ""
333
- characters = 0
334
-
335
- with cf.ThreadPoolExecutor() as executor:
336
- futures = []
337
- for line in llm_output.dialogue:
338
- transcript_line = f"{line.speaker}: {line.text}"
339
- voice = speaker_1_voice if line.speaker == "speaker-1" else speaker_2_voice
340
- future = executor.submit(get_mp3, line.text, voice, audio_model, openai_api_key)
341
- futures.append((future, transcript_line))
342
- characters += len(line.text)
343
-
344
- for future, transcript_line in futures:
345
- audio_chunk = future.result()
346
- audio += audio_chunk
347
- transcript += transcript_line + "\n\n"
348
-
349
- logger.info(f"Generated {characters} characters of audio")
350
-
351
- temporary_directory = "./gradio_cached_examples/tmp/"
352
- os.makedirs(temporary_directory, exist_ok=True)
353
-
354
- # Use a temporary file -- Gradio's audio component doesn't work with raw bytes in Safari
355
- temporary_file = NamedTemporaryFile(
356
- dir=temporary_directory,
357
- delete=False,
358
- suffix=".mp3",
359
- )
360
- temporary_file.write(audio)
361
- temporary_file.close()
362
 
363
- # Delete any files in the temp directory that end with .mp3 and are over a day old
364
- for file in glob.glob(f"{temporary_directory}*.mp3"):
365
- if os.path.isfile(file) and time.time() - os.path.getmtime(file) > 24 * 60 * 60:
366
- os.remove(file)
367
 
368
- return temporary_file.name, transcript, combined_text
369
 
370
- def validate_and_generate_audio(*args):
371
- url = args[0]
372
- if not url:
373
- return None, None, None, "Please provide a valid URL before generating audio."
374
- try:
375
- audio_file, transcript, original_text = generate_audio(*args)
376
- return audio_file, transcript, original_text, None # Return None as the error when successful
377
  except Exception as e:
378
  # If an error occurs during generation, return None for the outputs and the error message
379
  return None, None, None, str(e)
380
 
381
- def edit_and_regenerate(edited_transcript, user_feedback, *args):
382
- # Replace the original transcript and feedback in the args with the new ones
383
- #new_args = list(args)
384
- #new_args[-2] = edited_transcript # Update edited transcript
385
- #new_args[-1] = user_feedback # Update user feedback
386
- return validate_and_generate_audio(*new_args)
 
 
 
 
 
 
 
 
 
 
 
 
387
 
388
 
389
 
@@ -393,7 +411,7 @@ def process_feedback_and_regenerate(feedback, *args):
393
  # Add user feedback to the args
394
  new_args = list(args)
395
  new_args.append(feedback) # Add user feedback as a new argument
396
- return validate_and_generate_audio(*new_args)
397
 
398
  with gr.Blocks(theme='lone17/kotaemon', title="Text to Audio") as demo:
399
  with gr.Row(equal_height=True):
@@ -447,7 +465,7 @@ with gr.Blocks(theme='lone17/kotaemon', title="Text to Audio") as demo:
447
  text_model = gr.Dropdown(
448
  label="Text Generation Model",
449
  choices=STANDARD_TEXT_MODELS,
450
- value="o1-preview-2024-09-12", #"gpt-4o-mini",
451
  info="Select the model to generate the dialogue text.",
452
  )
453
  audio_model = gr.Dropdown(
@@ -549,7 +567,7 @@ with gr.Blocks(theme='lone17/kotaemon', title="Text to Audio") as demo:
549
  )
550
 
551
  submit_btn.click(
552
- fn=validate_and_generate_audio,
553
  inputs=[
554
  url_input, openai_api_key, text_model, audio_model,
555
  speaker_1_voice, speaker_2_voice, api_base,
@@ -573,7 +591,7 @@ with gr.Blocks(theme='lone17/kotaemon', title="Text to Audio") as demo:
573
  )
574
 
575
  regenerate_btn.click(
576
- fn=lambda use_edit, edit, *args: validate_and_generate_audio(
577
  *args[:12], # All inputs up to podcast_dialog_instructions
578
  edit if use_edit else "", # Use edited transcript if checkbox is checked, otherwise empty string
579
  *args[12:] # user_feedback and original_text_output
 
15
  from pypdf import PdfReader
16
  from tenacity import retry, retry_if_exception_type
17
 
18
+ import locale
19
  import re
20
  import requests
21
  from dotenv import load_dotenv
 
26
 
27
  from gradio.themes.utils.theme_dropdown import create_theme_dropdown
28
 
 
29
 
30
  # Setup gettext
31
  def setup_translation(lang_code):
32
+
33
+ locale.setlocale(locale.LC_ALL, 'en_US.UTF-8')
34
  locale_path = os.path.join(os.path.dirname(__file__), 'locales')
35
+ try:
36
+ translation = gettext.translation('messages', localedir=locale_path, languages=[lang_code])
37
+ translation.install()
38
+ return translation.gettext # Return the translation function '_'
39
+ except FileNotFoundError:
40
+ logger.error(f"Translation file for language '{lang_code}' not found.")
41
+ return lambda s: s # Fallback to no translation
42
+ except UnicodeDecodeError as e:
43
+ logger.error(f"UnicodeDecodeError: {e}")
44
+ return lambda s: s # Fallback to no translation
45
 
46
 
47
  def read_readme():
 
138
  }
139
 
140
  print(INSTRUCTION_TEMPLATES["podcast"]["intro"])
141
+ return update_instructions("podcast")
 
 
 
 
 
 
 
142
 
143
 
144
  # Function to update instruction fields based on template selection
 
251
  def generate_audio(
252
  url: str,
253
  openai_api_key: str = None,
254
+ text_model: str = "gpt-4o-mini-2024-07-18",
255
  audio_model: str = "tts-1",
256
  speaker_1_voice: str = "alloy",
257
  speaker_2_voice: str = "echo",
258
  api_base: str = None,
259
+ intro_instructions: str = INSTRUCTION_TEMPLATES["podcast"]["intro"],
260
+ text_instructions: str = INSTRUCTION_TEMPLATES["podcast"]["text_instructions"],
261
+ scratch_pad_instructions: str = INSTRUCTION_TEMPLATES["podcast"]["scratch_pad"],
262
+ prelude_dialog: str = INSTRUCTION_TEMPLATES["podcast"]["prelude"],
263
+ podcast_dialog_instructions: str = INSTRUCTION_TEMPLATES["podcast"]["dialog"],
264
  edited_transcript: str = None,
265
  user_feedback: str = None,
266
  original_text: str = None,
267
  debug = False,
268
  ) -> tuple:
269
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
270
 
271
+ if not url:
272
+ return None, None, None, "Please provide a valid URL before generating audio."
273
+
274
+ try:
275
+
276
+ # Validate API Key
277
+ if not os.getenv("OPENAI_API_KEY") and not openai_api_key:
278
+ raise gr.Error("OpenAI API key is required")
279
+
280
+ combined_text = original_text or ""
281
+
282
+ # If there's no original text, fetch it from the provided URL
283
+ if not combined_text:
284
+ combined_text = get_text_from_url(url)
285
+
286
+ # Configure the LLM based on selected model and api_base
287
+ @retry(retry=retry_if_exception_type(ValidationError))
288
+ @conditional_llm(model=text_model, api_base=api_base, api_key=openai_api_key)
289
+ def generate_dialogue(text: str, intro_instructions: str, text_instructions: str, scratch_pad_instructions: str,
290
+ prelude_dialog: str, podcast_dialog_instructions: str,
291
+ edited_transcript: str = None, user_feedback: str = None, ) -> Dialogue:
292
+ """
293
+ {intro_instructions}
294
+
295
+ Here is the original input text:
296
+
297
+ <input_text>
298
+ {text}
299
+ </input_text>
300
+
301
+ {text_instructions}
302
+
303
+ <scratchpad>
304
+ {scratch_pad_instructions}
305
+ </scratchpad>
306
+
307
+ {prelude_dialog}
308
+
309
+ <podcast_dialogue>
310
+ {podcast_dialog_instructions}
311
+ </podcast_dialogue>
312
+ {edited_transcript}{user_feedback}
313
+ """
314
+
315
+ instruction_improve='Based on the original text, please generate an improved version of the dialogue by incorporating the edits, comments and feedback.'
316
+ edited_transcript_processed="\nPreviously generated edited transcript, with specific edits and comments that I want you to carefully address:\n"+"<edited_transcript>\n"+edited_transcript+"</edited_transcript>" if edited_transcript !="" else ""
317
+ user_feedback_processed="\nOverall user feedback:\n\n"+user_feedback if user_feedback !="" else ""
318
+
319
+ if edited_transcript_processed.strip()!='' or user_feedback_processed.strip()!='':
320
+ user_feedback_processed="<requested_improvements>"+user_feedback_processed+"\n\n"+instruction_improve+"</requested_improvements>"
321
 
322
+ if debug:
323
+ logger.info (edited_transcript_processed)
324
+ logger.info (user_feedback_processed)
325
 
326
+ # Generate the dialogue using the LLM
327
+ llm_output = generate_dialogue(
328
+ combined_text,
329
+ intro_instructions=intro_instructions,
330
+ text_instructions=text_instructions,
331
+ scratch_pad_instructions=scratch_pad_instructions,
332
+ prelude_dialog=prelude_dialog,
333
+ podcast_dialog_instructions=podcast_dialog_instructions,
334
+ edited_transcript=edited_transcript_processed,
335
+ user_feedback=user_feedback_processed
336
+ )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
337
 
338
+ # Generate audio from the transcript
339
+ audio = b""
340
+ transcript = ""
341
+ characters = 0
342
+
343
+ with cf.ThreadPoolExecutor() as executor:
344
+ futures = []
345
+ for line in llm_output.dialogue:
346
+ transcript_line = f"{line.speaker}: {line.text}"
347
+ voice = speaker_1_voice if line.speaker == "speaker-1" else speaker_2_voice
348
+ future = executor.submit(get_mp3, line.text, voice, audio_model, openai_api_key)
349
+ futures.append((future, transcript_line))
350
+ characters += len(line.text)
351
+
352
+ for future, transcript_line in futures:
353
+ audio_chunk = future.result()
354
+ audio += audio_chunk
355
+ transcript += transcript_line + "\n\n"
356
+
357
+ logger.info(f"Generated {characters} characters of audio")
358
+
359
+ temporary_directory = "./gradio_cached_examples/tmp/"
360
+ os.makedirs(temporary_directory, exist_ok=True)
361
+
362
+ # Use a temporary file -- Gradio's audio component doesn't work with raw bytes in Safari
363
+ temporary_file = NamedTemporaryFile(
364
+ dir=temporary_directory,
365
+ delete=False,
366
+ suffix=".mp3",
367
+ )
368
+ temporary_file.write(audio)
369
+ temporary_file.close()
370
 
371
+ # Delete any files in the temp directory that end with .mp3 and are over a day old
372
+ for file in glob.glob(f"{temporary_directory}*.mp3"):
373
+ if os.path.isfile(file) and time.time() - os.path.getmtime(file) > 24 * 60 * 60:
374
+ os.remove(file)
375
 
 
376
 
377
+
378
+ # audio_file, transcript, original_text = generate_audio(*args)
379
+ # return audio_file, transcript, original_text, None # Return None as the error when successful
380
+
381
+ return temporary_file.name, transcript, combined_text, None
382
+
 
383
  except Exception as e:
384
  # If an error occurs during generation, return None for the outputs and the error message
385
  return None, None, None, str(e)
386
 
387
+
388
+ # def validate_and_generate_audio(*args):
389
+ # url = args[0]
390
+ # if not url:
391
+ # return None, None, None, "Please provide a valid URL before generating audio."
392
+ # try:
393
+ # audio_file, transcript, original_text = generate_audio(*args)
394
+ # return audio_file, transcript, original_text, None # Return None as the error when successful
395
+ # except Exception as e:
396
+ # # If an error occurs during generation, return None for the outputs and the error message
397
+ # return None, None, None, str(e)
398
+
399
+ # def edit_and_regenerate(edited_transcript, user_feedback, *args):
400
+ # # Replace the original transcript and feedback in the args with the new ones
401
+ # #new_args = list(args)
402
+ # #new_args[-2] = edited_transcript # Update edited transcript
403
+ # #new_args[-1] = user_feedback # Update user feedback
404
+ # return validate_and_generate_audio(*new_args)
405
 
406
 
407
 
 
411
  # Add user feedback to the args
412
  new_args = list(args)
413
  new_args.append(feedback) # Add user feedback as a new argument
414
+ return generate_audio(*new_args)
415
 
416
  with gr.Blocks(theme='lone17/kotaemon', title="Text to Audio") as demo:
417
  with gr.Row(equal_height=True):
 
465
  text_model = gr.Dropdown(
466
  label="Text Generation Model",
467
  choices=STANDARD_TEXT_MODELS,
468
+ value="gpt-4o-mini", #"gpt-4o-mini",
469
  info="Select the model to generate the dialogue text.",
470
  )
471
  audio_model = gr.Dropdown(
 
567
  )
568
 
569
  submit_btn.click(
570
+ fn=generate_audio,
571
  inputs=[
572
  url_input, openai_api_key, text_model, audio_model,
573
  speaker_1_voice, speaker_2_voice, api_base,
 
591
  )
592
 
593
  regenerate_btn.click(
594
+ fn=lambda use_edit, edit, *args: generate_audio(
595
  *args[:12], # All inputs up to podcast_dialog_instructions
596
  edit if use_edit else "", # Use edited transcript if checkbox is checked, otherwise empty string
597
  *args[12:] # user_feedback and original_text_output
packages.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ gettext
testGradioAPI.py ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from gradio_client import Client
2
+
3
+ client = Client("http://127.0.0.1:7860/")
4
+ result = client.predict(
5
+ param_0="Hello!!",
6
+ param_1="Hello!!",
7
+ param_2="o1-preview-2024-09-12",
8
+ param_3="tts-1",
9
+ param_4="alloy",
10
+ param_5="echo",
11
+ param_6="Hello!!",
12
+ param_12="Hello!!",
13
+ param_13="Hello!!",
14
+ api_name="/validate_and_generate_audio"
15
+ )
16
+ print(result)
test_load_messages.py ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gettext
2
+ import os
3
+ import locale
4
+
5
+ locale.setlocale(locale.LC_ALL, 'en_US.UTF-8')
6
+
7
+ def load_translation(lang_code):
8
+ locale_path = os.path.join(os.path.dirname(__file__), 'locales')
9
+ try:
10
+ translation = gettext().translation('messages', localedir=locale_path, languages=[lang_code])
11
+ translation.install()
12
+ return translation.gettext # Return the translation function '_'
13
+ except FileNotFoundError:
14
+ print(f"Translation file for language '{lang_code}' not found.")
15
+ return lambda s: s # Fallback to no translation
16
+ except UnicodeDecodeError as e:
17
+ print(f"UnicodeDecodeError: {e}")
18
+ return lambda s: s # Fallback to no translation
19
+
20
+ def test_load_messages():
21
+ print("Testing English Translations:")
22
+ _ = load_translation('en')
23
+ print(_("podcast.intro"))
24
+ print(_("podcast.text_instructions"))
25
+
26
+ print("\nTesting French Translations:")
27
+ _ = load_translation('fr')
28
+ print(_("podcast.intro"))
29
+ print(_("podcast.text_instructions"))
30
+ print(_("podcast.scratch_pad"))
31
+ if __name__ == "__main__":
32
+ test_load_messages()