anyantudre commited on
Commit
1ffb58d
·
verified ·
1 Parent(s): e41ca58

Update text_to_speech.py

Browse files
Files changed (1) hide show
  1. text_to_speech.py +29 -32
text_to_speech.py CHANGED
@@ -1,40 +1,37 @@
1
  import time
2
  import torch
3
- from transformers import set_seed
4
- from transformers import VitsTokenizer, VitsModel
5
 
6
- def synthesize_facebook(s:str, iso3:str) -> str:
7
- '''
8
- For given text, speak it.
9
-
10
- Parameters
11
- ----------
12
- s: str
13
- The written text.
14
- is03:str
15
- The ISO-3 code of the text's language.
16
 
17
- Returns
18
- ----------
19
- synth:str
20
- The synthesized audio.
21
- '''
22
-
23
- # Ensure replicability
24
- set_seed(555)
25
- start_time = time.time()
26
 
27
- # Load synthesizer
28
- tokenizer = VitsTokenizer.from_pretrained(f"facebook/mms-tts-{iso3}")
29
- model = VitsModel.from_pretrained(f"facebook/mms-tts-{iso3}")
30
-
31
- inputs = tokenizer(text=s, return_tensors="pt")
 
 
 
 
 
 
32
 
33
- # Inference
34
- with torch.no_grad():
35
- outputs = model(**inputs)
36
 
37
- synth = outputs.waveform[0]
 
 
 
 
 
 
 
 
38
 
39
- print("Time elapsed: ", int(time.time() - start_time), " seconds")
40
- return synth.numpy()
 
1
  import time
2
  import torch
3
+ import scipy
4
+ from transformers import set_seed, pipeline
5
 
 
 
 
 
 
 
 
 
 
 
6
 
7
+ def goai_tts(texte, device):
8
+ """
9
+ Pour un texte donné, donner le speech en Mooré correspondant
 
 
 
 
 
 
10
 
11
+ Paramètres
12
+ ----------
13
+ texte: str
14
+ Le texte écrit.
15
+ device: str
16
+ GPU ou CPU
17
+
18
+ Return
19
+ ------
20
+ L'audio synthétisé.
21
+ """
22
 
23
+ ### assurer la reproductibilité
24
+ set_seed(2024)
 
25
 
26
+ start_time = time.time()
27
+
28
+ ### charger le modèle TTS
29
+ model_id = "anyantudre/mms-tts-mos-V1"
30
+ synthesiser = pipeline("text-to-speech", model_id, device=device) # add device=0 if you want to use a GPU
31
+
32
+ ### inférence
33
+ speech = synthesiser(texte)
34
+ wavfile = scipy.io.wavfile.write("finetuned_output.wav", rate=speech["sampling_rate"], data=speech["audio"][0])
35
 
36
+ print("Temps écoulé: ", int(time.time() - start_time), " seconds")
37
+ return wavfile