Spaces:
Sleeping
Sleeping
Brunwo
commited on
Commit
·
276796f
1
Parent(s):
001156c
WIP
Browse files- .gitignore +2 -0
- app.py +149 -131
- packages.txt +1 -0
- testGradioAPI.py +16 -0
- test_load_messages.py +32 -0
.gitignore
CHANGED
@@ -5,3 +5,5 @@ pwa-report.json
|
|
5 |
server.pem
|
6 |
flagged/log.csv
|
7 |
.aider*
|
|
|
|
|
|
5 |
server.pem
|
6 |
flagged/log.csv
|
7 |
.aider*
|
8 |
+
locales/fr/LC_MESSAGES/messages.mo
|
9 |
+
locales/en/LC_MESSAGES/messages.mo
|
app.py
CHANGED
@@ -15,6 +15,7 @@ from pydantic import BaseModel, ValidationError
|
|
15 |
from pypdf import PdfReader
|
16 |
from tenacity import retry, retry_if_exception_type
|
17 |
|
|
|
18 |
import re
|
19 |
import requests
|
20 |
from dotenv import load_dotenv
|
@@ -25,15 +26,22 @@ import gettext
|
|
25 |
|
26 |
from gradio.themes.utils.theme_dropdown import create_theme_dropdown
|
27 |
|
28 |
-
#dropdown, js = create_theme_dropdown()
|
29 |
|
30 |
# Setup gettext
|
31 |
def setup_translation(lang_code):
|
32 |
-
|
|
|
33 |
locale_path = os.path.join(os.path.dirname(__file__), 'locales')
|
34 |
-
|
35 |
-
|
36 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
37 |
|
38 |
|
39 |
def read_readme():
|
@@ -130,14 +138,7 @@ def update_instructions_language(lang):
|
|
130 |
}
|
131 |
|
132 |
print(INSTRUCTION_TEMPLATES["podcast"]["intro"])
|
133 |
-
|
134 |
-
return (
|
135 |
-
INSTRUCTION_TEMPLATES["podcast"]["intro"],
|
136 |
-
INSTRUCTION_TEMPLATES["podcast"]["text_instructions"],
|
137 |
-
INSTRUCTION_TEMPLATES["podcast"]["scratch_pad"],
|
138 |
-
INSTRUCTION_TEMPLATES["podcast"]["prelude"],
|
139 |
-
INSTRUCTION_TEMPLATES["podcast"]["dialog"]
|
140 |
-
)
|
141 |
|
142 |
|
143 |
# Function to update instruction fields based on template selection
|
@@ -250,140 +251,157 @@ def get_text_from_url(url: str) -> str:
|
|
250 |
def generate_audio(
|
251 |
url: str,
|
252 |
openai_api_key: str = None,
|
253 |
-
text_model: str = "
|
254 |
audio_model: str = "tts-1",
|
255 |
speaker_1_voice: str = "alloy",
|
256 |
speaker_2_voice: str = "echo",
|
257 |
api_base: str = None,
|
258 |
-
intro_instructions: str =
|
259 |
-
text_instructions: str =
|
260 |
-
scratch_pad_instructions: str =
|
261 |
-
prelude_dialog: str =
|
262 |
-
podcast_dialog_instructions: str =
|
263 |
edited_transcript: str = None,
|
264 |
user_feedback: str = None,
|
265 |
original_text: str = None,
|
266 |
debug = False,
|
267 |
) -> tuple:
|
268 |
-
|
269 |
-
if not os.getenv("OPENAI_API_KEY") and not openai_api_key:
|
270 |
-
raise gr.Error("OpenAI API key is required")
|
271 |
-
|
272 |
-
combined_text = original_text or ""
|
273 |
-
|
274 |
-
# If there's no original text, fetch it from the provided URL
|
275 |
-
if not combined_text:
|
276 |
-
combined_text = get_text_from_url(url)
|
277 |
-
|
278 |
-
# Configure the LLM based on selected model and api_base
|
279 |
-
@retry(retry=retry_if_exception_type(ValidationError))
|
280 |
-
@conditional_llm(model=text_model, api_base=api_base, api_key=openai_api_key)
|
281 |
-
def generate_dialogue(text: str, intro_instructions: str, text_instructions: str, scratch_pad_instructions: str,
|
282 |
-
prelude_dialog: str, podcast_dialog_instructions: str,
|
283 |
-
edited_transcript: str = None, user_feedback: str = None, ) -> Dialogue:
|
284 |
-
"""
|
285 |
-
{intro_instructions}
|
286 |
-
|
287 |
-
Here is the original input text:
|
288 |
-
|
289 |
-
<input_text>
|
290 |
-
{text}
|
291 |
-
</input_text>
|
292 |
|
293 |
-
|
294 |
-
|
295 |
-
|
296 |
-
|
297 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
298 |
|
299 |
-
|
|
|
|
|
300 |
|
301 |
-
|
302 |
-
|
303 |
-
|
304 |
-
|
305 |
-
|
306 |
-
|
307 |
-
|
308 |
-
|
309 |
-
|
310 |
-
|
311 |
-
|
312 |
-
user_feedback_processed="<requested_improvements>"+user_feedback_processed+"\n\n"+instruction_improve+"</requested_improvements>"
|
313 |
-
|
314 |
-
if debug:
|
315 |
-
logger.info (edited_transcript_processed)
|
316 |
-
logger.info (user_feedback_processed)
|
317 |
-
|
318 |
-
# Generate the dialogue using the LLM
|
319 |
-
llm_output = generate_dialogue(
|
320 |
-
combined_text,
|
321 |
-
intro_instructions=intro_instructions,
|
322 |
-
text_instructions=text_instructions,
|
323 |
-
scratch_pad_instructions=scratch_pad_instructions,
|
324 |
-
prelude_dialog=prelude_dialog,
|
325 |
-
podcast_dialog_instructions=podcast_dialog_instructions,
|
326 |
-
edited_transcript=edited_transcript_processed,
|
327 |
-
user_feedback=user_feedback_processed
|
328 |
-
)
|
329 |
|
330 |
-
|
331 |
-
|
332 |
-
|
333 |
-
|
334 |
-
|
335 |
-
|
336 |
-
|
337 |
-
|
338 |
-
|
339 |
-
|
340 |
-
|
341 |
-
|
342 |
-
|
343 |
-
|
344 |
-
|
345 |
-
|
346 |
-
|
347 |
-
|
348 |
-
|
349 |
-
|
350 |
-
|
351 |
-
|
352 |
-
|
353 |
-
|
354 |
-
|
355 |
-
|
356 |
-
|
357 |
-
|
358 |
-
|
359 |
-
|
360 |
-
|
361 |
-
|
362 |
|
363 |
-
|
364 |
-
|
365 |
-
|
366 |
-
|
367 |
|
368 |
-
return temporary_file.name, transcript, combined_text
|
369 |
|
370 |
-
|
371 |
-
|
372 |
-
|
373 |
-
|
374 |
-
|
375 |
-
|
376 |
-
return audio_file, transcript, original_text, None # Return None as the error when successful
|
377 |
except Exception as e:
|
378 |
# If an error occurs during generation, return None for the outputs and the error message
|
379 |
return None, None, None, str(e)
|
380 |
|
381 |
-
|
382 |
-
|
383 |
-
|
384 |
-
|
385 |
-
|
386 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
387 |
|
388 |
|
389 |
|
@@ -393,7 +411,7 @@ def process_feedback_and_regenerate(feedback, *args):
|
|
393 |
# Add user feedback to the args
|
394 |
new_args = list(args)
|
395 |
new_args.append(feedback) # Add user feedback as a new argument
|
396 |
-
return
|
397 |
|
398 |
with gr.Blocks(theme='lone17/kotaemon', title="Text to Audio") as demo:
|
399 |
with gr.Row(equal_height=True):
|
@@ -447,7 +465,7 @@ with gr.Blocks(theme='lone17/kotaemon', title="Text to Audio") as demo:
|
|
447 |
text_model = gr.Dropdown(
|
448 |
label="Text Generation Model",
|
449 |
choices=STANDARD_TEXT_MODELS,
|
450 |
-
value="
|
451 |
info="Select the model to generate the dialogue text.",
|
452 |
)
|
453 |
audio_model = gr.Dropdown(
|
@@ -549,7 +567,7 @@ with gr.Blocks(theme='lone17/kotaemon', title="Text to Audio") as demo:
|
|
549 |
)
|
550 |
|
551 |
submit_btn.click(
|
552 |
-
fn=
|
553 |
inputs=[
|
554 |
url_input, openai_api_key, text_model, audio_model,
|
555 |
speaker_1_voice, speaker_2_voice, api_base,
|
@@ -573,7 +591,7 @@ with gr.Blocks(theme='lone17/kotaemon', title="Text to Audio") as demo:
|
|
573 |
)
|
574 |
|
575 |
regenerate_btn.click(
|
576 |
-
fn=lambda use_edit, edit, *args:
|
577 |
*args[:12], # All inputs up to podcast_dialog_instructions
|
578 |
edit if use_edit else "", # Use edited transcript if checkbox is checked, otherwise empty string
|
579 |
*args[12:] # user_feedback and original_text_output
|
|
|
15 |
from pypdf import PdfReader
|
16 |
from tenacity import retry, retry_if_exception_type
|
17 |
|
18 |
+
import locale
|
19 |
import re
|
20 |
import requests
|
21 |
from dotenv import load_dotenv
|
|
|
26 |
|
27 |
from gradio.themes.utils.theme_dropdown import create_theme_dropdown
|
28 |
|
|
|
29 |
|
30 |
# Setup gettext
|
31 |
def setup_translation(lang_code):
|
32 |
+
|
33 |
+
locale.setlocale(locale.LC_ALL, 'en_US.UTF-8')
|
34 |
locale_path = os.path.join(os.path.dirname(__file__), 'locales')
|
35 |
+
try:
|
36 |
+
translation = gettext.translation('messages', localedir=locale_path, languages=[lang_code])
|
37 |
+
translation.install()
|
38 |
+
return translation.gettext # Return the translation function '_'
|
39 |
+
except FileNotFoundError:
|
40 |
+
logger.error(f"Translation file for language '{lang_code}' not found.")
|
41 |
+
return lambda s: s # Fallback to no translation
|
42 |
+
except UnicodeDecodeError as e:
|
43 |
+
logger.error(f"UnicodeDecodeError: {e}")
|
44 |
+
return lambda s: s # Fallback to no translation
|
45 |
|
46 |
|
47 |
def read_readme():
|
|
|
138 |
}
|
139 |
|
140 |
print(INSTRUCTION_TEMPLATES["podcast"]["intro"])
|
141 |
+
return update_instructions("podcast")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
142 |
|
143 |
|
144 |
# Function to update instruction fields based on template selection
|
|
|
251 |
def generate_audio(
|
252 |
url: str,
|
253 |
openai_api_key: str = None,
|
254 |
+
text_model: str = "gpt-4o-mini-2024-07-18",
|
255 |
audio_model: str = "tts-1",
|
256 |
speaker_1_voice: str = "alloy",
|
257 |
speaker_2_voice: str = "echo",
|
258 |
api_base: str = None,
|
259 |
+
intro_instructions: str = INSTRUCTION_TEMPLATES["podcast"]["intro"],
|
260 |
+
text_instructions: str = INSTRUCTION_TEMPLATES["podcast"]["text_instructions"],
|
261 |
+
scratch_pad_instructions: str = INSTRUCTION_TEMPLATES["podcast"]["scratch_pad"],
|
262 |
+
prelude_dialog: str = INSTRUCTION_TEMPLATES["podcast"]["prelude"],
|
263 |
+
podcast_dialog_instructions: str = INSTRUCTION_TEMPLATES["podcast"]["dialog"],
|
264 |
edited_transcript: str = None,
|
265 |
user_feedback: str = None,
|
266 |
original_text: str = None,
|
267 |
debug = False,
|
268 |
) -> tuple:
|
269 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
270 |
|
271 |
+
if not url:
|
272 |
+
return None, None, None, "Please provide a valid URL before generating audio."
|
273 |
+
|
274 |
+
try:
|
275 |
+
|
276 |
+
# Validate API Key
|
277 |
+
if not os.getenv("OPENAI_API_KEY") and not openai_api_key:
|
278 |
+
raise gr.Error("OpenAI API key is required")
|
279 |
+
|
280 |
+
combined_text = original_text or ""
|
281 |
+
|
282 |
+
# If there's no original text, fetch it from the provided URL
|
283 |
+
if not combined_text:
|
284 |
+
combined_text = get_text_from_url(url)
|
285 |
+
|
286 |
+
# Configure the LLM based on selected model and api_base
|
287 |
+
@retry(retry=retry_if_exception_type(ValidationError))
|
288 |
+
@conditional_llm(model=text_model, api_base=api_base, api_key=openai_api_key)
|
289 |
+
def generate_dialogue(text: str, intro_instructions: str, text_instructions: str, scratch_pad_instructions: str,
|
290 |
+
prelude_dialog: str, podcast_dialog_instructions: str,
|
291 |
+
edited_transcript: str = None, user_feedback: str = None, ) -> Dialogue:
|
292 |
+
"""
|
293 |
+
{intro_instructions}
|
294 |
+
|
295 |
+
Here is the original input text:
|
296 |
+
|
297 |
+
<input_text>
|
298 |
+
{text}
|
299 |
+
</input_text>
|
300 |
+
|
301 |
+
{text_instructions}
|
302 |
+
|
303 |
+
<scratchpad>
|
304 |
+
{scratch_pad_instructions}
|
305 |
+
</scratchpad>
|
306 |
+
|
307 |
+
{prelude_dialog}
|
308 |
+
|
309 |
+
<podcast_dialogue>
|
310 |
+
{podcast_dialog_instructions}
|
311 |
+
</podcast_dialogue>
|
312 |
+
{edited_transcript}{user_feedback}
|
313 |
+
"""
|
314 |
+
|
315 |
+
instruction_improve='Based on the original text, please generate an improved version of the dialogue by incorporating the edits, comments and feedback.'
|
316 |
+
edited_transcript_processed="\nPreviously generated edited transcript, with specific edits and comments that I want you to carefully address:\n"+"<edited_transcript>\n"+edited_transcript+"</edited_transcript>" if edited_transcript !="" else ""
|
317 |
+
user_feedback_processed="\nOverall user feedback:\n\n"+user_feedback if user_feedback !="" else ""
|
318 |
+
|
319 |
+
if edited_transcript_processed.strip()!='' or user_feedback_processed.strip()!='':
|
320 |
+
user_feedback_processed="<requested_improvements>"+user_feedback_processed+"\n\n"+instruction_improve+"</requested_improvements>"
|
321 |
|
322 |
+
if debug:
|
323 |
+
logger.info (edited_transcript_processed)
|
324 |
+
logger.info (user_feedback_processed)
|
325 |
|
326 |
+
# Generate the dialogue using the LLM
|
327 |
+
llm_output = generate_dialogue(
|
328 |
+
combined_text,
|
329 |
+
intro_instructions=intro_instructions,
|
330 |
+
text_instructions=text_instructions,
|
331 |
+
scratch_pad_instructions=scratch_pad_instructions,
|
332 |
+
prelude_dialog=prelude_dialog,
|
333 |
+
podcast_dialog_instructions=podcast_dialog_instructions,
|
334 |
+
edited_transcript=edited_transcript_processed,
|
335 |
+
user_feedback=user_feedback_processed
|
336 |
+
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
337 |
|
338 |
+
# Generate audio from the transcript
|
339 |
+
audio = b""
|
340 |
+
transcript = ""
|
341 |
+
characters = 0
|
342 |
+
|
343 |
+
with cf.ThreadPoolExecutor() as executor:
|
344 |
+
futures = []
|
345 |
+
for line in llm_output.dialogue:
|
346 |
+
transcript_line = f"{line.speaker}: {line.text}"
|
347 |
+
voice = speaker_1_voice if line.speaker == "speaker-1" else speaker_2_voice
|
348 |
+
future = executor.submit(get_mp3, line.text, voice, audio_model, openai_api_key)
|
349 |
+
futures.append((future, transcript_line))
|
350 |
+
characters += len(line.text)
|
351 |
+
|
352 |
+
for future, transcript_line in futures:
|
353 |
+
audio_chunk = future.result()
|
354 |
+
audio += audio_chunk
|
355 |
+
transcript += transcript_line + "\n\n"
|
356 |
+
|
357 |
+
logger.info(f"Generated {characters} characters of audio")
|
358 |
+
|
359 |
+
temporary_directory = "./gradio_cached_examples/tmp/"
|
360 |
+
os.makedirs(temporary_directory, exist_ok=True)
|
361 |
+
|
362 |
+
# Use a temporary file -- Gradio's audio component doesn't work with raw bytes in Safari
|
363 |
+
temporary_file = NamedTemporaryFile(
|
364 |
+
dir=temporary_directory,
|
365 |
+
delete=False,
|
366 |
+
suffix=".mp3",
|
367 |
+
)
|
368 |
+
temporary_file.write(audio)
|
369 |
+
temporary_file.close()
|
370 |
|
371 |
+
# Delete any files in the temp directory that end with .mp3 and are over a day old
|
372 |
+
for file in glob.glob(f"{temporary_directory}*.mp3"):
|
373 |
+
if os.path.isfile(file) and time.time() - os.path.getmtime(file) > 24 * 60 * 60:
|
374 |
+
os.remove(file)
|
375 |
|
|
|
376 |
|
377 |
+
|
378 |
+
# audio_file, transcript, original_text = generate_audio(*args)
|
379 |
+
# return audio_file, transcript, original_text, None # Return None as the error when successful
|
380 |
+
|
381 |
+
return temporary_file.name, transcript, combined_text, None
|
382 |
+
|
|
|
383 |
except Exception as e:
|
384 |
# If an error occurs during generation, return None for the outputs and the error message
|
385 |
return None, None, None, str(e)
|
386 |
|
387 |
+
|
388 |
+
# def validate_and_generate_audio(*args):
|
389 |
+
# url = args[0]
|
390 |
+
# if not url:
|
391 |
+
# return None, None, None, "Please provide a valid URL before generating audio."
|
392 |
+
# try:
|
393 |
+
# audio_file, transcript, original_text = generate_audio(*args)
|
394 |
+
# return audio_file, transcript, original_text, None # Return None as the error when successful
|
395 |
+
# except Exception as e:
|
396 |
+
# # If an error occurs during generation, return None for the outputs and the error message
|
397 |
+
# return None, None, None, str(e)
|
398 |
+
|
399 |
+
# def edit_and_regenerate(edited_transcript, user_feedback, *args):
|
400 |
+
# # Replace the original transcript and feedback in the args with the new ones
|
401 |
+
# #new_args = list(args)
|
402 |
+
# #new_args[-2] = edited_transcript # Update edited transcript
|
403 |
+
# #new_args[-1] = user_feedback # Update user feedback
|
404 |
+
# return validate_and_generate_audio(*new_args)
|
405 |
|
406 |
|
407 |
|
|
|
411 |
# Add user feedback to the args
|
412 |
new_args = list(args)
|
413 |
new_args.append(feedback) # Add user feedback as a new argument
|
414 |
+
return generate_audio(*new_args)
|
415 |
|
416 |
with gr.Blocks(theme='lone17/kotaemon', title="Text to Audio") as demo:
|
417 |
with gr.Row(equal_height=True):
|
|
|
465 |
text_model = gr.Dropdown(
|
466 |
label="Text Generation Model",
|
467 |
choices=STANDARD_TEXT_MODELS,
|
468 |
+
value="gpt-4o-mini", #"gpt-4o-mini",
|
469 |
info="Select the model to generate the dialogue text.",
|
470 |
)
|
471 |
audio_model = gr.Dropdown(
|
|
|
567 |
)
|
568 |
|
569 |
submit_btn.click(
|
570 |
+
fn=generate_audio,
|
571 |
inputs=[
|
572 |
url_input, openai_api_key, text_model, audio_model,
|
573 |
speaker_1_voice, speaker_2_voice, api_base,
|
|
|
591 |
)
|
592 |
|
593 |
regenerate_btn.click(
|
594 |
+
fn=lambda use_edit, edit, *args: generate_audio(
|
595 |
*args[:12], # All inputs up to podcast_dialog_instructions
|
596 |
edit if use_edit else "", # Use edited transcript if checkbox is checked, otherwise empty string
|
597 |
*args[12:] # user_feedback and original_text_output
|
packages.txt
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
gettext
|
testGradioAPI.py
ADDED
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from gradio_client import Client
|
2 |
+
|
3 |
+
client = Client("http://127.0.0.1:7860/")
|
4 |
+
result = client.predict(
|
5 |
+
param_0="Hello!!",
|
6 |
+
param_1="Hello!!",
|
7 |
+
param_2="o1-preview-2024-09-12",
|
8 |
+
param_3="tts-1",
|
9 |
+
param_4="alloy",
|
10 |
+
param_5="echo",
|
11 |
+
param_6="Hello!!",
|
12 |
+
param_12="Hello!!",
|
13 |
+
param_13="Hello!!",
|
14 |
+
api_name="/validate_and_generate_audio"
|
15 |
+
)
|
16 |
+
print(result)
|
test_load_messages.py
ADDED
@@ -0,0 +1,32 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import gettext
|
2 |
+
import os
|
3 |
+
import locale
|
4 |
+
|
5 |
+
locale.setlocale(locale.LC_ALL, 'en_US.UTF-8')
|
6 |
+
|
7 |
+
def load_translation(lang_code):
|
8 |
+
locale_path = os.path.join(os.path.dirname(__file__), 'locales')
|
9 |
+
try:
|
10 |
+
translation = gettext().translation('messages', localedir=locale_path, languages=[lang_code])
|
11 |
+
translation.install()
|
12 |
+
return translation.gettext # Return the translation function '_'
|
13 |
+
except FileNotFoundError:
|
14 |
+
print(f"Translation file for language '{lang_code}' not found.")
|
15 |
+
return lambda s: s # Fallback to no translation
|
16 |
+
except UnicodeDecodeError as e:
|
17 |
+
print(f"UnicodeDecodeError: {e}")
|
18 |
+
return lambda s: s # Fallback to no translation
|
19 |
+
|
20 |
+
def test_load_messages():
|
21 |
+
print("Testing English Translations:")
|
22 |
+
_ = load_translation('en')
|
23 |
+
print(_("podcast.intro"))
|
24 |
+
print(_("podcast.text_instructions"))
|
25 |
+
|
26 |
+
print("\nTesting French Translations:")
|
27 |
+
_ = load_translation('fr')
|
28 |
+
print(_("podcast.intro"))
|
29 |
+
print(_("podcast.text_instructions"))
|
30 |
+
print(_("podcast.scratch_pad"))
|
31 |
+
if __name__ == "__main__":
|
32 |
+
test_load_messages()
|