Spaces:

amir22010
/

MarketMate

Sleeping

amir22010 commited on Oct 19

Commit

8cb91c0

•

1 Parent(s): 8ede9d9

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -6,43 +6,27 @@ import numpy as np
 import wave
 #tts
-from balacoon_tts import TTS
-from threading import Lock
-from huggingface_hub import hf_hub_download, list_repo_files
-import io
 import tempfile
-#tts cpu model
-tts_model_str = "en_us_hifi_jets_cpu.addon"
-for name in list_repo_files(repo_id="balacoon/tts"):
-    if name == tts_model_str:
-        if not os.path.isfile(os.path.join(os.getcwd(), name)):
-            hf_hub_download(
-                repo_id="balacoon/tts",
-                filename=name,
-                local_dir=os.getcwd(),
-            )
-tts = TTS(os.path.join(os.getcwd(), tts_model_str))
 def text_to_speech(text):
     with tempfile.NamedTemporaryFile(delete=False, suffix='.wav') as temp_file:
-        if len(text) > 1024:
-            # truncate the text
-            text_str = text[:1024]
-        else:
-            text_str = text
-        with locker:
-            global tts
-            samples = tts.synthesize(text_str, "92")
-        output_file = temp_file.name
-        with wave.open(f"{output_file}", "w") as fp:
-            fp.setparams((1, 2, tts.get_sampling_rate(), len(samples), "NONE", "NONE"))
-            samples = np.ascontiguousarray(samples)
-            fp.writeframes(samples)
-    return output_file
 def combine_audio_files(audio_files):
     data= []

 import wave
 #tts
 import tempfile
+import torchaudio
+from speechbrain.inference.TTS import FastSpeech2
+from speechbrain.inference.vocoders import HIFIGAN
+fastspeech2 = FastSpeech2.from_hparams(source="speechbrain/tts-fastspeech2-ljspeech", savedir="pretrained_models/tts-fastspeech2-ljspeech")
+hifi_gan = HIFIGAN.from_hparams(source="speechbrain/tts-hifigan-ljspeech", savedir="pretrained_models/tts-hifigan-ljspeech")
 def text_to_speech(text):
     with tempfile.NamedTemporaryFile(delete=False, suffix='.wav') as temp_file:
+        mel_output, durations, pitch, energy = fastspeech2.encode_text(
+          [text],
+          pace=1.0,        # scale up/down the speed
+          pitch_rate=1.0,  # scale up/down the pitch
+          energy_rate=1.0, # scale up/down the energy
+        )
+        # Running Vocoder (spectrogram-to-waveform)
+        waveforms = hifi_gan.decode_batch(mel_output)
+        # Save the waverform
+        torchaudio.save(temp_file.name, waveforms.squeeze(1), 22050)
+    return temp_file.name
 def combine_audio_files(audio_files):
     data= []