Spaces:

Steveeeeeeen
/

Zonos

Running on Zero

Steveeeeeeen HF staff commited on 14 days ago

Commit

97132bd

verified ·

1 Parent(s): cce1550

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -9,6 +9,10 @@ from zonos.conditioning import make_cond_dict, supported_language_codes
 MODELS_CACHE = {}
 device = "cuda"
 def load_model(model_name: str):
     """
     Loads or retrieves a cached Zonos model, sets it to eval and bfloat16.
@@ -74,9 +78,18 @@ def tts(text, speaker_audio, selected_language, model_choice):
     return (sr_out, wav_out.numpy())
 def build_demo():
-    with gr.Blocks() as demo:
-        gr.Markdown("# Simple Zonos TTS Demo")
         with gr.Row():
             text_input = gr.Textbox(
                 label="Text Prompt",

 MODELS_CACHE = {}
 device = "cuda"
+banner_url = "https://huggingface.co/datasets/Steveeeeeeen/random_images/resolve/main/ZonosHeader.png"
+BANNER = f'<div style="display: flex; justify-content: space-around;"><img src="{banner_url}" alt="Banner" style="width: 40vw; min-width: 150px; max-width: 300px;"> </div>'
 def load_model(model_name: str):
     """
     Loads or retrieves a cached Zonos model, sets it to eval and bfloat16.
     return (sr_out, wav_out.numpy())
 def build_demo():
+    with gr.Blocks(theme='davehornik/Tealy') as demo:
+        gr.HTML(BANNER, elem_id="banner")
+        gr.Markdown("## Zonos-v0.1 TTS Demo")
+        gr.Markdown(
+            """
+> **Zero-shot TTS with Voice Cloning**: Input text and a 10–30 second speaker sample to generate high-quality text-to-speech output.
+> **Audio Prefix Inputs**: Enhance speaker matching by adding an audio prefix to the text, enabling behaviors like whispering that are hard to achieve with voice cloning alone.
+> **Multilingual Support**: Supports English, Japanese, Chinese, French, and German.
+            """
+        )
         with gr.Row():
             text_input = gr.Textbox(
                 label="Text Prompt",