Spaces:
Running
Running
Guenter Bartsch
commited on
Commit
·
882ecb2
1
Parent(s):
5947be5
upgrade multi-lang handling
Browse files
app.py
CHANGED
@@ -5,7 +5,6 @@ import librosa
|
|
5 |
import streamlit as st
|
6 |
|
7 |
from zerovox.tts.synthesize import ZeroVoxTTS
|
8 |
-
from zerovox.g2p.g2p import DEFAULT_G2P_MODEL_NAME_DE, DEFAULT_G2P_MODEL_NAME_EN
|
9 |
|
10 |
SAMPLE_RATE=24000 # FIXME
|
11 |
|
@@ -15,11 +14,13 @@ SAMPLE_SENTENCE_EN = "A rainbow is an optical phenomenon caused by refraction, i
|
|
15 |
#SAMPLE_SENTENCE_EN = "Welcome to the world of speech synthesis!"
|
16 |
SAMPLE_SENTENCE_DE = "Der Regenbogen ist ein atmosphärisch-optisches Phänomen, das als kreisbogenförmiges farbiges Lichtband in einem von der Sonne beschienenen Regenschauer erscheint."
|
17 |
|
|
|
|
|
18 |
if "lang" not in st.session_state:
|
19 |
-
st.session_state.lang =
|
20 |
|
21 |
if "text" not in st.session_state:
|
22 |
-
st.session_state.text = SAMPLE_SENTENCE_EN
|
23 |
|
24 |
if "message" not in st.session_state:
|
25 |
st.session_state.message = "READY."
|
@@ -62,15 +63,10 @@ def do_synth():
|
|
62 |
|
63 |
status.update(label="loading the model...", state="running")
|
64 |
|
65 |
-
|
66 |
-
|
67 |
-
st.session_state.modelcfg, st.session_state.synth = ZeroVoxTTS.load_model(ZeroVoxTTS.get_default_model(),
|
68 |
-
g2p=g2p_model,
|
69 |
-
lang=lang,
|
70 |
meldec_model=st.session_state['meldec'],
|
71 |
infer_device='cpu',
|
72 |
num_threads=-1,
|
73 |
-
do_compile=False,
|
74 |
verbose=True)
|
75 |
|
76 |
synth = st.session_state.synth
|
@@ -114,7 +110,9 @@ with tab1:
|
|
114 |
|
115 |
st.checkbox("Custom voice", key='custom_voice')
|
116 |
|
117 |
-
|
|
|
|
|
118 |
|
119 |
if st.session_state.custom_voice:
|
120 |
|
@@ -136,7 +134,8 @@ with tab1:
|
|
136 |
speakers = [s for s in ZeroVoxTTS.available_speakerrefs()]
|
137 |
speakerref.selectbox("Voice", speakers, key='speakerref')
|
138 |
|
139 |
-
|
|
|
140 |
|
141 |
|
142 |
with tab2:
|
@@ -149,7 +148,7 @@ status = st.status(st.session_state.message, state="complete")
|
|
149 |
|
150 |
col1, col2 = st.columns([0.8, 0.2])
|
151 |
with col1:
|
152 |
-
text = st.
|
153 |
|
154 |
with col2:
|
155 |
lang = st.selectbox("Language",
|
|
|
5 |
import streamlit as st
|
6 |
|
7 |
from zerovox.tts.synthesize import ZeroVoxTTS
|
|
|
8 |
|
9 |
SAMPLE_RATE=24000 # FIXME
|
10 |
|
|
|
14 |
#SAMPLE_SENTENCE_EN = "Welcome to the world of speech synthesis!"
|
15 |
SAMPLE_SENTENCE_DE = "Der Regenbogen ist ein atmosphärisch-optisches Phänomen, das als kreisbogenförmiges farbiges Lichtband in einem von der Sonne beschienenen Regenschauer erscheint."
|
16 |
|
17 |
+
DEFAULT_LANGUAGE = 'en'
|
18 |
+
|
19 |
if "lang" not in st.session_state:
|
20 |
+
st.session_state.lang = DEFAULT_LANGUAGE
|
21 |
|
22 |
if "text" not in st.session_state:
|
23 |
+
st.session_state.text = SAMPLE_SENTENCE_EN if st.session_state.lang == 'en' else SAMPLE_SENTENCE_DE
|
24 |
|
25 |
if "message" not in st.session_state:
|
26 |
st.session_state.message = "READY."
|
|
|
63 |
|
64 |
status.update(label="loading the model...", state="running")
|
65 |
|
66 |
+
st.session_state.modelcfg, st.session_state.synth = ZeroVoxTTS.load_model(ZeroVoxTTS.get_default_model(st.session_state.lang),
|
|
|
|
|
|
|
|
|
67 |
meldec_model=st.session_state['meldec'],
|
68 |
infer_device='cpu',
|
69 |
num_threads=-1,
|
|
|
70 |
verbose=True)
|
71 |
|
72 |
synth = st.session_state.synth
|
|
|
110 |
|
111 |
st.checkbox("Custom voice", key='custom_voice')
|
112 |
|
113 |
+
col1, col2 = st.columns([0.6, 0.4], vertical_alignment="bottom")
|
114 |
+
with col1:
|
115 |
+
speakerref = st.empty()
|
116 |
|
117 |
if st.session_state.custom_voice:
|
118 |
|
|
|
134 |
speakers = [s for s in ZeroVoxTTS.available_speakerrefs()]
|
135 |
speakerref.selectbox("Voice", speakers, key='speakerref')
|
136 |
|
137 |
+
with col2:
|
138 |
+
st.audio(ZeroVoxTTS.get_speakerref(st.session_state.speakerref, SAMPLE_RATE), sample_rate=SAMPLE_RATE)
|
139 |
|
140 |
|
141 |
with tab2:
|
|
|
148 |
|
149 |
col1, col2 = st.columns([0.8, 0.2])
|
150 |
with col1:
|
151 |
+
text = st.text_area("Text to synthesize", key='text', on_change=do_synth, height=128)
|
152 |
|
153 |
with col2:
|
154 |
lang = st.selectbox("Language",
|