Spaces:

Mightypeacock
/

webtoaudio

Sleeping

App Files Files Community

Brunwo commited on Oct 2, 2024

Commit

276796f

1 Parent(s): 001156c

WIP

Browse files

Files changed (5) hide show

.gitignore +2 -0
app.py +149 -131
packages.txt +1 -0
testGradioAPI.py +16 -0
test_load_messages.py +32 -0

.gitignore CHANGED Viewed

@@ -5,3 +5,5 @@ pwa-report.json
 server.pem
 flagged/log.csv
 .aider*

 server.pem
 flagged/log.csv
 .aider*
+locales/fr/LC_MESSAGES/messages.mo
+locales/en/LC_MESSAGES/messages.mo

app.py CHANGED Viewed

@@ -15,6 +15,7 @@ from pydantic import BaseModel, ValidationError
 from pypdf import PdfReader
 from tenacity import retry, retry_if_exception_type
 import re
 import requests
 from dotenv import load_dotenv
@@ -25,15 +26,22 @@ import gettext
 from gradio.themes.utils.theme_dropdown import create_theme_dropdown
-#dropdown, js = create_theme_dropdown()
 # Setup gettext
 def setup_translation(lang_code):
-    # The translation domain (like an app-specific identifier)
     locale_path = os.path.join(os.path.dirname(__file__), 'locales')
-    translation = gettext.translation('messages', localedir=locale_path, languages=[lang_code])
-    translation.install()
-    return translation.gettext  # Return the translation function '_'
 def read_readme():
@@ -130,14 +138,7 @@ def update_instructions_language(lang):
     }
     print(INSTRUCTION_TEMPLATES["podcast"]["intro"])
-    return (
-        INSTRUCTION_TEMPLATES["podcast"]["intro"],
-        INSTRUCTION_TEMPLATES["podcast"]["text_instructions"],
-        INSTRUCTION_TEMPLATES["podcast"]["scratch_pad"],
-        INSTRUCTION_TEMPLATES["podcast"]["prelude"],
-        INSTRUCTION_TEMPLATES["podcast"]["dialog"]
-           )
 # Function to update instruction fields based on template selection
@@ -250,140 +251,157 @@ def get_text_from_url(url: str) -> str:
 def generate_audio(
     url: str,
     openai_api_key: str = None,
-    text_model: str = "o1-preview-2024-09-12",
     audio_model: str = "tts-1",
     speaker_1_voice: str = "alloy",
     speaker_2_voice: str = "echo",
     api_base: str = None,
-    intro_instructions: str = '',
-    text_instructions: str = '',
-    scratch_pad_instructions: str = '',
-    prelude_dialog: str = '',
-    podcast_dialog_instructions: str = '',
     edited_transcript: str = None,
     user_feedback: str = None,
     original_text: str = None,
     debug = False,
 ) -> tuple:
-    # Validate API Key
-    if not os.getenv("OPENAI_API_KEY") and not openai_api_key:
-        raise gr.Error("OpenAI API key is required")
-    combined_text = original_text or ""
-    # If there's no original text, fetch it from the provided URL
-    if not combined_text:
-        combined_text = get_text_from_url(url)
-    # Configure the LLM based on selected model and api_base
-    @retry(retry=retry_if_exception_type(ValidationError))
-    @conditional_llm(model=text_model, api_base=api_base, api_key=openai_api_key)
-    def generate_dialogue(text: str, intro_instructions: str, text_instructions: str, scratch_pad_instructions: str,
-                          prelude_dialog: str, podcast_dialog_instructions: str,
-                          edited_transcript: str = None, user_feedback: str = None, ) -> Dialogue:
-        """
-        {intro_instructions}
-        Here is the original input text:
-        <input_text>
-        {text}
-        </input_text>
-        {text_instructions}
-        <scratchpad>
-        {scratch_pad_instructions}
-        </scratchpad>
-        {prelude_dialog}
-        <podcast_dialogue>
-        {podcast_dialog_instructions}
-        </podcast_dialogue>
-        {edited_transcript}{user_feedback}
-        """
-    instruction_improve='Based on the original text, please generate an improved version of the dialogue by incorporating the edits, comments and feedback.'
-    edited_transcript_processed="\nPreviously generated edited transcript, with specific edits and comments that I want you to carefully address:\n"+"<edited_transcript>\n"+edited_transcript+"</edited_transcript>" if edited_transcript !="" else ""
-    user_feedback_processed="\nOverall user feedback:\n\n"+user_feedback if user_feedback !="" else ""
-    if edited_transcript_processed.strip()!='' or user_feedback_processed.strip()!='':
-        user_feedback_processed="<requested_improvements>"+user_feedback_processed+"\n\n"+instruction_improve+"</requested_improvements>"
-    if debug:
-        logger.info (edited_transcript_processed)
-        logger.info (user_feedback_processed)
-    # Generate the dialogue using the LLM
-    llm_output = generate_dialogue(
-        combined_text,
-        intro_instructions=intro_instructions,
-        text_instructions=text_instructions,
-        scratch_pad_instructions=scratch_pad_instructions,
-        prelude_dialog=prelude_dialog,
-        podcast_dialog_instructions=podcast_dialog_instructions,
-        edited_transcript=edited_transcript_processed,
-        user_feedback=user_feedback_processed
-    )
-    # Generate audio from the transcript
-    audio = b""
-    transcript = ""
-    characters = 0
-    with cf.ThreadPoolExecutor() as executor:
-        futures = []
-        for line in llm_output.dialogue:
-            transcript_line = f"{line.speaker}: {line.text}"
-            voice = speaker_1_voice if line.speaker == "speaker-1" else speaker_2_voice
-            future = executor.submit(get_mp3, line.text, voice, audio_model, openai_api_key)
-            futures.append((future, transcript_line))
-            characters += len(line.text)
-        for future, transcript_line in futures:
-            audio_chunk = future.result()
-            audio += audio_chunk
-            transcript += transcript_line + "\n\n"
-    logger.info(f"Generated {characters} characters of audio")
-    temporary_directory = "./gradio_cached_examples/tmp/"
-    os.makedirs(temporary_directory, exist_ok=True)
-    # Use a temporary file -- Gradio's audio component doesn't work with raw bytes in Safari
-    temporary_file = NamedTemporaryFile(
-        dir=temporary_directory,
-        delete=False,
-        suffix=".mp3",
-    )
-    temporary_file.write(audio)
-    temporary_file.close()
-    # Delete any files in the temp directory that end with .mp3 and are over a day old
-    for file in glob.glob(f"{temporary_directory}*.mp3"):
-        if os.path.isfile(file) and time.time() - os.path.getmtime(file) > 24 * 60 * 60:
-            os.remove(file)
-    return temporary_file.name, transcript, combined_text
-def validate_and_generate_audio(*args):
-    url = args[0]
-    if not url:
-        return None, None, None, "Please provide a valid URL before generating audio."
-    try:
-        audio_file, transcript, original_text = generate_audio(*args)
-        return audio_file, transcript, original_text, None  # Return None as the error when successful
     except Exception as e:
         # If an error occurs during generation, return None for the outputs and the error message
         return None, None, None, str(e)
-def edit_and_regenerate(edited_transcript, user_feedback, *args):
-    # Replace the original transcript and feedback in the args with the new ones
-    #new_args = list(args)
-    #new_args[-2] = edited_transcript  # Update edited transcript
-    #new_args[-1] = user_feedback  # Update user feedback
-    return validate_and_generate_audio(*new_args)
@@ -393,7 +411,7 @@ def process_feedback_and_regenerate(feedback, *args):
     # Add user feedback to the args
     new_args = list(args)
     new_args.append(feedback)  # Add user feedback as a new argument
-    return validate_and_generate_audio(*new_args)
 with gr.Blocks(theme='lone17/kotaemon', title="Text to Audio") as demo:
     with gr.Row(equal_height=True):
@@ -447,7 +465,7 @@ with gr.Blocks(theme='lone17/kotaemon', title="Text to Audio") as demo:
             text_model = gr.Dropdown(
                 label="Text Generation Model",
                 choices=STANDARD_TEXT_MODELS,
-                value="o1-preview-2024-09-12", #"gpt-4o-mini",
                 info="Select the model to generate the dialogue text.",
             )
             audio_model = gr.Dropdown(
@@ -549,7 +567,7 @@ with gr.Blocks(theme='lone17/kotaemon', title="Text to Audio") as demo:
         )
     submit_btn.click(
-        fn=validate_and_generate_audio,
         inputs=[
             url_input, openai_api_key, text_model, audio_model,
             speaker_1_voice, speaker_2_voice, api_base,
@@ -573,7 +591,7 @@ with gr.Blocks(theme='lone17/kotaemon', title="Text to Audio") as demo:
     )
     regenerate_btn.click(
-        fn=lambda use_edit, edit, *args: validate_and_generate_audio(
             *args[:12],  # All inputs up to podcast_dialog_instructions
             edit if use_edit else "",  # Use edited transcript if checkbox is checked, otherwise empty string
             *args[12:]  # user_feedback and original_text_output

 from pypdf import PdfReader
 from tenacity import retry, retry_if_exception_type
+import locale
 import re
 import requests
 from dotenv import load_dotenv
 from gradio.themes.utils.theme_dropdown import create_theme_dropdown
 # Setup gettext
 def setup_translation(lang_code):
+    locale.setlocale(locale.LC_ALL, 'en_US.UTF-8')
     locale_path = os.path.join(os.path.dirname(__file__), 'locales')
+    try:
+        translation = gettext.translation('messages', localedir=locale_path, languages=[lang_code])
+        translation.install()
+        return translation.gettext  # Return the translation function '_'
+    except FileNotFoundError:
+        logger.error(f"Translation file for language '{lang_code}' not found.")
+        return lambda s: s  # Fallback to no translation
+    except UnicodeDecodeError as e:
+        logger.error(f"UnicodeDecodeError: {e}")
+        return lambda s: s  # Fallback to no translation
 def read_readme():
     }
     print(INSTRUCTION_TEMPLATES["podcast"]["intro"])
+    return update_instructions("podcast")
 # Function to update instruction fields based on template selection
 def generate_audio(
     url: str,
     openai_api_key: str = None,
+    text_model: str = "gpt-4o-mini-2024-07-18",
     audio_model: str = "tts-1",
     speaker_1_voice: str = "alloy",
     speaker_2_voice: str = "echo",
     api_base: str = None,
+    intro_instructions: str = INSTRUCTION_TEMPLATES["podcast"]["intro"],
+    text_instructions: str = INSTRUCTION_TEMPLATES["podcast"]["text_instructions"],
+    scratch_pad_instructions: str = INSTRUCTION_TEMPLATES["podcast"]["scratch_pad"],
+    prelude_dialog: str = INSTRUCTION_TEMPLATES["podcast"]["prelude"],
+    podcast_dialog_instructions: str = INSTRUCTION_TEMPLATES["podcast"]["dialog"],
     edited_transcript: str = None,
     user_feedback: str = None,
     original_text: str = None,
     debug = False,
 ) -> tuple:
+    if not url:
+        return None, None, None, "Please provide a valid URL before generating audio."
+    try:
+        # Validate API Key
+        if not os.getenv("OPENAI_API_KEY") and not openai_api_key:
+            raise gr.Error("OpenAI API key is required")
+        combined_text = original_text or ""
+        # If there's no original text, fetch it from the provided URL
+        if not combined_text:
+            combined_text = get_text_from_url(url)
+        # Configure the LLM based on selected model and api_base
+        @retry(retry=retry_if_exception_type(ValidationError))
+        @conditional_llm(model=text_model, api_base=api_base, api_key=openai_api_key)
+        def generate_dialogue(text: str, intro_instructions: str, text_instructions: str, scratch_pad_instructions: str,
+                            prelude_dialog: str, podcast_dialog_instructions: str,
+                            edited_transcript: str = None, user_feedback: str = None, ) -> Dialogue:
+            """
+            {intro_instructions}
+            Here is the original input text:
+            <input_text>
+            {text}
+            </input_text>
+            {text_instructions}
+            <scratchpad>
+            {scratch_pad_instructions}
+            </scratchpad>
+            {prelude_dialog}
+            <podcast_dialogue>
+            {podcast_dialog_instructions}
+            </podcast_dialogue>
+            {edited_transcript}{user_feedback}
+            """
+        instruction_improve='Based on the original text, please generate an improved version of the dialogue by incorporating the edits, comments and feedback.'
+        edited_transcript_processed="\nPreviously generated edited transcript, with specific edits and comments that I want you to carefully address:\n"+"<edited_transcript>\n"+edited_transcript+"</edited_transcript>" if edited_transcript !="" else ""
+        user_feedback_processed="\nOverall user feedback:\n\n"+user_feedback if user_feedback !="" else ""
+        if edited_transcript_processed.strip()!='' or user_feedback_processed.strip()!='':
+            user_feedback_processed="<requested_improvements>"+user_feedback_processed+"\n\n"+instruction_improve+"</requested_improvements>"
+        if debug:
+            logger.info (edited_transcript_processed)
+            logger.info (user_feedback_processed)
+        # Generate the dialogue using the LLM
+        llm_output = generate_dialogue(
+            combined_text,
+            intro_instructions=intro_instructions,
+            text_instructions=text_instructions,
+            scratch_pad_instructions=scratch_pad_instructions,
+            prelude_dialog=prelude_dialog,
+            podcast_dialog_instructions=podcast_dialog_instructions,
+            edited_transcript=edited_transcript_processed,
+            user_feedback=user_feedback_processed
+        )
+        # Generate audio from the transcript
+        audio = b""
+        transcript = ""
+        characters = 0
+        with cf.ThreadPoolExecutor() as executor:
+            futures = []
+            for line in llm_output.dialogue:
+                transcript_line = f"{line.speaker}: {line.text}"
+                voice = speaker_1_voice if line.speaker == "speaker-1" else speaker_2_voice
+                future = executor.submit(get_mp3, line.text, voice, audio_model, openai_api_key)
+                futures.append((future, transcript_line))
+                characters += len(line.text)
+            for future, transcript_line in futures:
+                audio_chunk = future.result()
+                audio += audio_chunk
+                transcript += transcript_line + "\n\n"
+        logger.info(f"Generated {characters} characters of audio")
+        temporary_directory = "./gradio_cached_examples/tmp/"
+        os.makedirs(temporary_directory, exist_ok=True)
+        # Use a temporary file -- Gradio's audio component doesn't work with raw bytes in Safari
+        temporary_file = NamedTemporaryFile(
+            dir=temporary_directory,
+            delete=False,
+            suffix=".mp3",
+        )
+        temporary_file.write(audio)
+        temporary_file.close()
+        # Delete any files in the temp directory that end with .mp3 and are over a day old
+        for file in glob.glob(f"{temporary_directory}*.mp3"):
+            if os.path.isfile(file) and time.time() - os.path.getmtime(file) > 24 * 60 * 60:
+                os.remove(file)
+        # audio_file, transcript, original_text = generate_audio(*args)
+        # return audio_file, transcript, original_text, None  # Return None as the error when successful
+        return temporary_file.name, transcript, combined_text, None
     except Exception as e:
         # If an error occurs during generation, return None for the outputs and the error message
         return None, None, None, str(e)
+# def validate_and_generate_audio(*args):
+#     url = args[0]
+#     if not url:
+#         return None, None, None, "Please provide a valid URL before generating audio."
+#     try:
+#         audio_file, transcript, original_text = generate_audio(*args)
+#         return audio_file, transcript, original_text, None  # Return None as the error when successful
+#     except Exception as e:
+#         # If an error occurs during generation, return None for the outputs and the error message
+#         return None, None, None, str(e)
+# def edit_and_regenerate(edited_transcript, user_feedback, *args):
+#     # Replace the original transcript and feedback in the args with the new ones
+#     #new_args = list(args)
+#     #new_args[-2] = edited_transcript  # Update edited transcript
+#     #new_args[-1] = user_feedback  # Update user feedback
+#     return validate_and_generate_audio(*new_args)
     # Add user feedback to the args
     new_args = list(args)
     new_args.append(feedback)  # Add user feedback as a new argument
+    return generate_audio(*new_args)
 with gr.Blocks(theme='lone17/kotaemon', title="Text to Audio") as demo:
     with gr.Row(equal_height=True):
             text_model = gr.Dropdown(
                 label="Text Generation Model",
                 choices=STANDARD_TEXT_MODELS,
+                value="gpt-4o-mini", #"gpt-4o-mini",
                 info="Select the model to generate the dialogue text.",
             )
             audio_model = gr.Dropdown(
         )
     submit_btn.click(
+        fn=generate_audio,
         inputs=[
             url_input, openai_api_key, text_model, audio_model,
             speaker_1_voice, speaker_2_voice, api_base,
     )
     regenerate_btn.click(
+        fn=lambda use_edit, edit, *args: generate_audio(
             *args[:12],  # All inputs up to podcast_dialog_instructions
             edit if use_edit else "",  # Use edited transcript if checkbox is checked, otherwise empty string
             *args[12:]  # user_feedback and original_text_output

packages.txt ADDED Viewed

	@@ -0,0 +1 @@


1	+ gettext

testGradioAPI.py ADDED Viewed

	@@ -0,0 +1,16 @@

+from gradio_client import Client
+client = Client("http://127.0.0.1:7860/")
+result = client.predict(
+		param_0="Hello!!",
+		param_1="Hello!!",
+		param_2="o1-preview-2024-09-12",
+		param_3="tts-1",
+		param_4="alloy",
+		param_5="echo",
+		param_6="Hello!!",
+		param_12="Hello!!",
+		param_13="Hello!!",
+		api_name="/validate_and_generate_audio"
+)
+print(result)

test_load_messages.py ADDED Viewed

	@@ -0,0 +1,32 @@

+import gettext
+import os
+import locale
+locale.setlocale(locale.LC_ALL, 'en_US.UTF-8')
+def load_translation(lang_code):
+    locale_path = os.path.join(os.path.dirname(__file__), 'locales')
+    try:
+        translation = gettext().translation('messages', localedir=locale_path, languages=[lang_code])
+        translation.install()
+        return translation.gettext  # Return the translation function '_'
+    except FileNotFoundError:
+        print(f"Translation file for language '{lang_code}' not found.")
+        return lambda s: s  # Fallback to no translation
+    except UnicodeDecodeError as e:
+        print(f"UnicodeDecodeError: {e}")
+        return lambda s: s  # Fallback to no translation
+def test_load_messages():
+    print("Testing English Translations:")
+    _ = load_translation('en')
+    print(_("podcast.intro"))
+    print(_("podcast.text_instructions"))
+    print("\nTesting French Translations:")
+    _ = load_translation('fr')
+    print(_("podcast.intro"))
+    print(_("podcast.text_instructions"))
+    print(_("podcast.scratch_pad"))
+if __name__ == "__main__":
+    test_load_messages()