Spaces:

TArtx
/

parler_tts_British

Sleeping

App Files Files Community

TArtx commited on Dec 7, 2024

Commit

f38c401

verified ·

1 Parent(s): 5fdd9cc

Update app.py

Browse files

Files changed (1) hide show

app.py +27 -75

app.py CHANGED Viewed

@@ -7,12 +7,12 @@ import re
 from parler_tts import ParlerTTSForConditionalGeneration
 from transformers import AutoTokenizer, AutoFeatureExtractor, set_seed
-# Set device
-device = "cuda:0" if torch.cuda.is_available() else "cpu"
-# Load Mini model and associated components
 repo_id = "TArtx/parler-tts-mini-v1-finetuned-12"
-model = ParlerTTSForConditionalGeneration.from_pretrained(repo_id).to(device)
 tokenizer = AutoTokenizer.from_pretrained(repo_id)
 feature_extractor = AutoFeatureExtractor.from_pretrained(repo_id)
@@ -33,7 +33,6 @@ def preprocess(text):
     text = text.replace("-", " ")
     if text[-1] not in punctuation:
         text = f"{text}."
     abbreviations_pattern = r'\b[A-Z][A-Z\.]+\b'
     def separate_abb(chunk):
@@ -48,78 +47,31 @@ def preprocess(text):
 # TTS generation function
 def gen_tts(text, description):
-    inputs = tokenizer(description.strip(), return_tensors="pt").to(device)
-    prompt = tokenizer(preprocess(text), return_tensors="pt").to(device)
-    set_seed(SEED)
-    generation = model.generate(
-        input_ids=inputs.input_ids,
-        prompt_input_ids=prompt.input_ids,
-        attention_mask=inputs.attention_mask,
-        prompt_attention_mask=prompt.attention_mask,
-        do_sample=True,
-        temperature=1.0,
-    )
-    audio_arr = generation.cpu().numpy().squeeze()
-    return SAMPLE_RATE, audio_arr
-# CSS for styling
-css = """
-        #share-btn-container {
-            display: flex;
-            padding-left: 0.5rem !important;
-            padding-right: 0.5rem !important;
-            background-color: #000000;
-            justify-content: center;
-            align-items: center;
-            border-radius: 9999px !important;
-            width: 13rem;
-            margin-top: 10px;
-            margin-left: auto;
-            flex: unset !important;
-        }
-        #share-btn {
-            all: initial;
-            color: #ffffff;
-            font-weight: 600;
-            cursor: pointer;
-            font-family: 'IBM Plex Sans', sans-serif;
-            margin-left: 0.5rem !important;
-            padding-top: 0.25rem !important;
-            padding-bottom: 0.25rem !important;
-            right:0;
-        }
-        #share-btn * {
-            all: unset !important;
-        }
-        #share-btn-container div:nth-child(-n+2){
-            width: auto !important;
-            min-height: 0px !important;
-        }
-        #share-btn-container .wrap {
-            display: none !important;
-        }
-"""
 # Gradio interface
-with gr.Blocks(css=css) as block:
-    gr.HTML(
-        """
-            <div style="text-align: center; max-width: 700px; margin: 0 auto;">
-              <div
-                style="display: inline-flex; align-items: center; gap: 0.8rem; font-size: 1.75rem;"
-              >
-                <h1 style="font-weight: 900; margin-bottom: 7px; line-height: normal;">
-                  Parler-TTS 🗣️
-                </h1>
-              </div>
-            </div>
         """
-    )
-    gr.HTML(
-        f"""
-        <p><a href="https://github.com/huggingface/parler-tts"> Parler-TTS</a> is a training and inference library for
-        high-fidelity text-to-speech (TTS) models. The demo uses the Mini v1 model by default.</p>
         """
     )
     with gr.Row():
@@ -136,4 +88,4 @@ with gr.Blocks(css=css) as block:
 # Launch the interface
 block.queue()
-block.launch(share=True)

 from parler_tts import ParlerTTSForConditionalGeneration
 from transformers import AutoTokenizer, AutoFeatureExtractor, set_seed
+# Set device to CPU only
+device = "cpu"
+# Load Mini model and associated components with low memory usage
 repo_id = "TArtx/parler-tts-mini-v1-finetuned-12"
+model = ParlerTTSForConditionalGeneration.from_pretrained(repo_id, low_cpu_mem_usage=True).to(device)
 tokenizer = AutoTokenizer.from_pretrained(repo_id)
 feature_extractor = AutoFeatureExtractor.from_pretrained(repo_id)
     text = text.replace("-", " ")
     if text[-1] not in punctuation:
         text = f"{text}."
     abbreviations_pattern = r'\b[A-Z][A-Z\.]+\b'
     def separate_abb(chunk):
 # TTS generation function
 def gen_tts(text, description):
+    try:
+        # Tokenize inputs and prompts with truncation to avoid memory issues
+        inputs = tokenizer(description.strip(), return_tensors="pt", truncation=True, max_length=128).to(device)
+        prompt = tokenizer(preprocess(text), return_tensors="pt", truncation=True, max_length=128).to(device)
+        set_seed(SEED)
+        generation = model.generate(
+            input_ids=inputs.input_ids,
+            prompt_input_ids=prompt.input_ids,
+            attention_mask=inputs.attention_mask,
+            prompt_attention_mask=prompt.prompt_attention_mask,
+            do_sample=True,
+            temperature=1.0,
+        )
+        audio_arr = generation.cpu().numpy().squeeze()
+        return SAMPLE_RATE, audio_arr
+    except Exception as e:
+        return SAMPLE_RATE, f"Error: {str(e)}"
 # Gradio interface
+with gr.Blocks() as block:
+    gr.Markdown(
         """
+        ## Parler-TTS 🗣️
+        Parler-TTS is a training and inference library for high-fidelity text-to-speech (TTS) models. This demo uses the Mini v1 model.
         """
     )
     with gr.Row():
 # Launch the interface
 block.queue()
+block.launch()