Spaces:
Sleeping
Sleeping
Update src/text_to_speech.py
Browse files- src/text_to_speech.py +24 -61
src/text_to_speech.py
CHANGED
@@ -14,74 +14,37 @@ def generate_audio(text, filename="output.mp3", gender="female", speed="normal")
|
|
14 |
gender (str): "male" (use MMS-TTS) or "female" (use gTTS).
|
15 |
speed (str): "slow", "normal", or "fast" (only for gTTS).
|
16 |
"""
|
17 |
-
print("\n[DEBUG] Function: generate_audio")
|
18 |
-
print(f"Received parameters -> text: {text[:50]}..., filename: {filename}, gender: {gender}, speed: {speed}")
|
19 |
-
|
20 |
lang = "vi"
|
21 |
|
22 |
if gender.lower() == "female":
|
23 |
-
|
24 |
speed_mapping = {"slow": True, "normal": False, "fast": False}
|
25 |
slow = speed_mapping.get(speed.lower(), False)
|
26 |
|
27 |
-
|
28 |
-
|
29 |
-
|
30 |
-
|
31 |
-
|
32 |
-
|
33 |
-
|
34 |
-
|
35 |
-
|
36 |
-
|
37 |
-
|
38 |
-
|
|
|
|
|
|
|
|
|
|
|
39 |
else:
|
40 |
-
print(
|
41 |
-
import os
|
42 |
-
|
43 |
def text_to_speech(gender, speed):
|
44 |
-
"""
|
45 |
-
Convert text files in the folder to speech and save as audio files.
|
46 |
-
|
47 |
-
Parameters:
|
48 |
-
gender (str): "male" (use MMS-TTS) or "female" (use gTTS).
|
49 |
-
speed (str): "slow", "normal", or "fast" (only for gTTS).
|
50 |
-
"""
|
51 |
-
print("\n[DEBUG] Function: text_to_speech")
|
52 |
-
print(f"Received parameters -> gender: {gender}, speed: {speed}")
|
53 |
-
|
54 |
text_folder = "./"
|
55 |
-
|
56 |
-
|
57 |
-
# Get all valid text files (excluding specific ones)
|
58 |
-
text_files = sorted([
|
59 |
-
f for f in os.listdir(text_folder)
|
60 |
-
if f.endswith('.txt') and f not in ["text.txt", "requirements.txt"]
|
61 |
-
])
|
62 |
-
|
63 |
-
print(f"[DEBUG] Found text files: {text_files}")
|
64 |
-
|
65 |
-
if not text_files:
|
66 |
-
print("[WARNING] No text files found for conversion.")
|
67 |
-
|
68 |
for text_file in text_files:
|
69 |
-
|
70 |
-
|
71 |
-
|
72 |
-
|
73 |
-
content = file.read()
|
74 |
-
|
75 |
-
audio_file = text_file.replace("txt", "mp3")
|
76 |
-
print(f"[DEBUG] Generating audio for: {text_file} -> {audio_file}")
|
77 |
-
|
78 |
-
generate_audio(content, audio_file, gender=gender, speed=speed)
|
79 |
-
|
80 |
-
# Verify if audio was created
|
81 |
-
if os.path.exists(audio_file):
|
82 |
-
print(f"✅ Audio file created: {audio_file}")
|
83 |
-
else:
|
84 |
-
print(f"[ERROR] Audio file {audio_file} was not created!")
|
85 |
-
|
86 |
-
except Exception as e:
|
87 |
-
print(f"[ERROR] Failed to process {text_file}: {e}")
|
|
|
14 |
gender (str): "male" (use MMS-TTS) or "female" (use gTTS).
|
15 |
speed (str): "slow", "normal", or "fast" (only for gTTS).
|
16 |
"""
|
|
|
|
|
|
|
17 |
lang = "vi"
|
18 |
|
19 |
if gender.lower() == "female":
|
20 |
+
# gTTS chỉ có giọng nữ
|
21 |
speed_mapping = {"slow": True, "normal": False, "fast": False}
|
22 |
slow = speed_mapping.get(speed.lower(), False)
|
23 |
|
24 |
+
tts = gTTS(text=text, lang=lang, slow=slow)
|
25 |
+
tts.save(filename)
|
26 |
+
print(f"✅ Audio saved as {filename}")
|
27 |
+
|
28 |
+
elif gender.lower() == "male":
|
29 |
+
# MMS-TTS cho giọng nam
|
30 |
+
model = VitsModel.from_pretrained("facebook/mms-tts-vie")
|
31 |
+
tokenizer = AutoTokenizer.from_pretrained("facebook/mms-tts-vie")
|
32 |
+
|
33 |
+
inputs = tokenizer(text, return_tensors="pt")
|
34 |
+
with torch.no_grad():
|
35 |
+
output = model(**inputs).waveform
|
36 |
+
|
37 |
+
# Lưu file âm thanh
|
38 |
+
torchaudio.save(filename, output, 24000, backend="sox_io")
|
39 |
+
print(f"✅ Audio saved as {filename}")
|
40 |
+
|
41 |
else:
|
42 |
+
print("⚠️ Giọng không hợp lệ! Chỉ hỗ trợ 'male' hoặc 'female'.")
|
|
|
|
|
43 |
def text_to_speech(gender, speed):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
44 |
text_folder = "./"
|
45 |
+
text_files = sorted([f for f in os.listdir(text_folder) if f.endswith('.txt') and f != "text.txt" and f != "requirements.txt"])
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
46 |
for text_file in text_files:
|
47 |
+
with open(f"{text_file}", "r", encoding="utf-8") as file:
|
48 |
+
content = file.read()
|
49 |
+
audio_file = text_file.replace("txt","mp3")
|
50 |
+
generate_audio(content, f"{audio_file}", gender=gender, speed=speed)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|