multi_parler_tts

Running on Zero

PHBJT commited on 16 days ago

Commit

44bb8b9

•

1 Parent(s): 03d612a

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -50,34 +50,40 @@ def format_description(raw_description, do_format=True):
         return raw_description
     messages = [{
         "role": "user",
-        "content": f"""Format this voice description to match exactly:
 "a [gender] with a [pitch] voice speaks [speed] in a [environment], [delivery style]"
-Where:
 - gender: man/woman
 - pitch: slightly low-pitched/moderate pitch/high-pitched
-- speed: slowly/moderately/quickly
 - environment: close-sounding and clear/distant-sounding and noisy
 - delivery style: with monotone delivery/with animated delivery
-Description to format: {raw_description}"""
     }]
     input_text = smol_tokenizer.apply_chat_template(messages, tokenize=False)
     inputs = smol_tokenizer.encode(input_text, return_tensors="pt").to(device)
     outputs = smol_model.generate(
         inputs,
-        max_new_tokens=200,
-        temperature=0.7,
         top_p=0.9,
         do_sample=True
     )
     formatted = smol_tokenizer.decode(outputs[0], skip_special_tokens=True)
-    # Extract the formatted description from the response
-    try:
-        return formatted.split("a ")[-1].strip()
-    except:
-        return raw_description
 def preprocess(text):
     text = number_normalizer(text).strip()

         return raw_description
     messages = [{
+        "role": "system",
+        "content": "You are a helpful assistant that formats voice descriptions precisely according to the template provided."
+    }, {
         "role": "user",
+        "content": f"""Format this voice description exactly as:
 "a [gender] with a [pitch] voice speaks [speed] in a [environment], [delivery style]"
+Required format:
 - gender: man/woman
 - pitch: slightly low-pitched/moderate pitch/high-pitched
+- speed: slowly/moderately/quickly
 - environment: close-sounding and clear/distant-sounding and noisy
 - delivery style: with monotone delivery/with animated delivery
+Input description: {raw_description}
+Return only the formatted description, nothing else."""
     }]
     input_text = smol_tokenizer.apply_chat_template(messages, tokenize=False)
     inputs = smol_tokenizer.encode(input_text, return_tensors="pt").to(device)
     outputs = smol_model.generate(
         inputs,
+        max_new_tokens=100,
+        temperature=0.2,
         top_p=0.9,
         do_sample=True
     )
     formatted = smol_tokenizer.decode(outputs[0], skip_special_tokens=True)
+    # Extract just the formatted description
+    if "a woman" in formatted.lower() or "a man" in formatted.lower():
+        return formatted.strip()
+    return raw_description
 def preprocess(text):
     text = number_normalizer(text).strip()