Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -6,43 +6,27 @@ import numpy as np
|
|
6 |
import wave
|
7 |
|
8 |
#tts
|
9 |
-
from balacoon_tts import TTS
|
10 |
-
from threading import Lock
|
11 |
-
from huggingface_hub import hf_hub_download, list_repo_files
|
12 |
-
import io
|
13 |
import tempfile
|
|
|
|
|
|
|
14 |
|
15 |
-
|
16 |
-
|
17 |
-
|
18 |
-
for name in list_repo_files(repo_id="balacoon/tts"):
|
19 |
-
if name == tts_model_str:
|
20 |
-
if not os.path.isfile(os.path.join(os.getcwd(), name)):
|
21 |
-
hf_hub_download(
|
22 |
-
repo_id="balacoon/tts",
|
23 |
-
filename=name,
|
24 |
-
local_dir=os.getcwd(),
|
25 |
-
)
|
26 |
-
|
27 |
-
|
28 |
-
tts = TTS(os.path.join(os.getcwd(), tts_model_str))
|
29 |
|
30 |
def text_to_speech(text):
|
31 |
with tempfile.NamedTemporaryFile(delete=False, suffix='.wav') as temp_file:
|
32 |
-
|
33 |
-
|
34 |
-
|
35 |
-
|
36 |
-
|
37 |
-
|
38 |
-
|
39 |
-
|
40 |
-
|
41 |
-
|
42 |
-
|
43 |
-
samples = np.ascontiguousarray(samples)
|
44 |
-
fp.writeframes(samples)
|
45 |
-
return output_file
|
46 |
|
47 |
def combine_audio_files(audio_files):
|
48 |
data= []
|
|
|
6 |
import wave
|
7 |
|
8 |
#tts
|
|
|
|
|
|
|
|
|
9 |
import tempfile
|
10 |
+
import torchaudio
|
11 |
+
from speechbrain.inference.TTS import FastSpeech2
|
12 |
+
from speechbrain.inference.vocoders import HIFIGAN
|
13 |
|
14 |
+
fastspeech2 = FastSpeech2.from_hparams(source="speechbrain/tts-fastspeech2-ljspeech", savedir="pretrained_models/tts-fastspeech2-ljspeech")
|
15 |
+
hifi_gan = HIFIGAN.from_hparams(source="speechbrain/tts-hifigan-ljspeech", savedir="pretrained_models/tts-hifigan-ljspeech")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
16 |
|
17 |
def text_to_speech(text):
|
18 |
with tempfile.NamedTemporaryFile(delete=False, suffix='.wav') as temp_file:
|
19 |
+
mel_output, durations, pitch, energy = fastspeech2.encode_text(
|
20 |
+
[text],
|
21 |
+
pace=1.0, # scale up/down the speed
|
22 |
+
pitch_rate=1.0, # scale up/down the pitch
|
23 |
+
energy_rate=1.0, # scale up/down the energy
|
24 |
+
)
|
25 |
+
# Running Vocoder (spectrogram-to-waveform)
|
26 |
+
waveforms = hifi_gan.decode_batch(mel_output)
|
27 |
+
# Save the waverform
|
28 |
+
torchaudio.save(temp_file.name, waveforms.squeeze(1), 22050)
|
29 |
+
return temp_file.name
|
|
|
|
|
|
|
30 |
|
31 |
def combine_audio_files(audio_files):
|
32 |
data= []
|