Spaces:

BadriNarayanan
/

gradio-text-to-speech-app

Sleeping

App Files Files Community

BadriNarayanan commited on Oct 22, 2024

Commit

abb986e

2 Parent(s): 517b5fd 3c3b34b

Modified Logo

Browse files

Files changed (3) hide show

app.py +165 -1
logo.jpg +0 -0
logo/logo.jpg +0 -0

app.py CHANGED Viewed

@@ -303,7 +303,7 @@ with gr.Blocks(theme=custom_theme, css=custom_css) as app:
             )
         with gr.Column(scale=1, elem_id="logo-column"):
-            gr.Image("logo/logo-removebg-preview.png", label="", show_label=False)
     with gr.Row():
         with gr.Column(scale=1):
@@ -382,5 +382,169 @@ with gr.Blocks(theme=custom_theme, css=custom_css) as app:
             """
         )
 if __name__ == "__main__":
     app.launch(share=True)

             )
         with gr.Column(scale=1, elem_id="logo-column"):
+            gr.Image("logo/logo.jpg", label="", show_label=False)
     with gr.Row():
         with gr.Column(scale=1):
             """
         )
+<<<<<<< HEAD
+=======
+    # Text input for the prompt
+    gen_text_input_emotional = gr.Textbox(label="Text to Generate", lines=10)
+    # Model choice
+    model_choice_emotional = gr.Radio(
+        choices=["F5-TTS", "E2-TTS"], label="Choose TTS Model", value="F5-TTS"
+    )
+    with gr.Accordion("Advanced Settings", open=False):
+        remove_silence_emotional = gr.Checkbox(
+            label="Remove Silences",
+            value=True,
+        )
+    # Generate button
+    generate_emotional_btn = gr.Button("Generate Emotional Speech", variant="primary")
+    # Output audio
+    audio_output_emotional = gr.Audio(label="Synthesized Audio")
+    @gpu_decorator
+    def generate_emotional_speech(
+        regular_audio,
+        regular_ref_text,
+        gen_text,
+        *args,
+    ):
+        num_additional_speech_types = max_speech_types - 1
+        speech_type_names_list = args[:num_additional_speech_types]
+        speech_type_audios_list = args[num_additional_speech_types:2 * num_additional_speech_types]
+        speech_type_ref_texts_list = args[2 * num_additional_speech_types:3 * num_additional_speech_types]
+        model_choice = args[3 * num_additional_speech_types]
+        remove_silence = args[3 * num_additional_speech_types + 1]
+        # Collect the speech types and their audios into a dict
+        speech_types = {'Regular': {'audio': regular_audio, 'ref_text': regular_ref_text}}
+        for name_input, audio_input, ref_text_input in zip(speech_type_names_list, speech_type_audios_list, speech_type_ref_texts_list):
+            if name_input and audio_input:
+                speech_types[name_input] = {'audio': audio_input, 'ref_text': ref_text_input}
+        # Parse the gen_text into segments
+        segments = parse_speechtypes_text(gen_text)
+        # For each segment, generate speech
+        generated_audio_segments = []
+        current_emotion = 'Regular'
+        for segment in segments:
+            emotion = segment['emotion']
+            text = segment['text']
+            if emotion in speech_types:
+                current_emotion = emotion
+            else:
+                # If emotion not available, default to Regular
+                current_emotion = 'Regular'
+            ref_audio = speech_types[current_emotion]['audio']
+            ref_text = speech_types[current_emotion].get('ref_text', '')
+            # Generate speech for this segment
+            audio, _ = infer(ref_audio, ref_text, text, model_choice, remove_silence, 0)
+            sr, audio_data = audio
+            generated_audio_segments.append(audio_data)
+        # Concatenate all audio segments
+        if generated_audio_segments:
+            final_audio_data = np.concatenate(generated_audio_segments)
+            return (sr, final_audio_data)
+        else:
+            gr.Warning("No audio generated.")
+            return None
+    generate_emotional_btn.click(
+        generate_emotional_speech,
+        inputs=[
+            regular_audio,
+            regular_ref_text,
+            gen_text_input_emotional,
+        ] + speech_type_names + speech_type_audios + speech_type_ref_texts + [
+            model_choice_emotional,
+            remove_silence_emotional,
+        ],
+        outputs=audio_output_emotional,
+    )
+    # Validation function to disable Generate button if speech types are missing
+    def validate_speech_types(
+        gen_text,
+        regular_name,
+        *args
+    ):
+        num_additional_speech_types = max_speech_types - 1
+        speech_type_names_list = args[:num_additional_speech_types]
+        # Collect the speech types names
+        speech_types_available = set()
+        if regular_name:
+            speech_types_available.add(regular_name)
+        for name_input in speech_type_names_list:
+            if name_input:
+                speech_types_available.add(name_input)
+        # Parse the gen_text to get the speech types used
+        segments = parse_emotional_text(gen_text)
+        speech_types_in_text = set(segment['emotion'] for segment in segments)
+        # Check if all speech types in text are available
+        missing_speech_types = speech_types_in_text - speech_types_available
+        if missing_speech_types:
+            # Disable the generate button
+            return gr.update(interactive=False)
+        else:
+            # Enable the generate button
+            return gr.update(interactive=True)
+    gen_text_input_emotional.change(
+        validate_speech_types,
+        inputs=[gen_text_input_emotional, regular_name] + speech_type_names,
+        outputs=generate_emotional_btn
+    )
+with gr.Blocks() as app:
+    gr.Markdown(
+        """
+# Antriksh AI
+"""
+    )
+    # Add the image here
+    gr.Image(
+        value="logo.jpg",
+        label="AI System Logo",
+        show_label=False,
+        width=300,
+        height=150
+    )
+    gr.TabbedInterface([app_tts, app_podcast, app_emotional, app_credits], ["TTS", "Podcast", "Multi-Style", "Credits"])
+@click.command()
+@click.option("--port", "-p", default=None, type=int, help="Port to run the app on")
+@click.option("--host", "-H", default=None, help="Host to run the app on")
+@click.option(
+    "--share",
+    "-s",
+    default=False,
+    is_flag=True,
+    help="Share the app via Gradio share link",
+)
+@click.option("--api", "-a", default=True, is_flag=True, help="Allow API access")
+def main(port, host, share, api):
+    global app
+    print(f"Starting app...")
+    app.queue(api_open=api).launch(
+        server_name=host, server_port=port, share=share, show_api=api
+    )
+>>>>>>> 3c3b34b0ce3a85c2e202414d6764288cad249a97
 if __name__ == "__main__":
     app.launch(share=True)

logo.jpg ADDED Viewed

logo/logo.jpg ADDED Viewed