Spaces:

goooofy
/

zerovox-demo

Running

App Files Files Community

Guenter Bartsch commited on Feb 24

Commit

882ecb2

1 Parent(s): 5947be5

upgrade multi-lang handling

Browse files

Files changed (1) hide show

app.py +11 -12

app.py CHANGED Viewed

@@ -5,7 +5,6 @@ import librosa
 import streamlit as st
 from zerovox.tts.synthesize import ZeroVoxTTS
-from zerovox.g2p.g2p import DEFAULT_G2P_MODEL_NAME_DE, DEFAULT_G2P_MODEL_NAME_EN
 SAMPLE_RATE=24000 # FIXME
@@ -15,11 +14,13 @@ SAMPLE_SENTENCE_EN = "A rainbow is an optical phenomenon caused by refraction, i
 #SAMPLE_SENTENCE_EN = "Welcome to the world of speech synthesis!"
 SAMPLE_SENTENCE_DE = "Der Regenbogen ist ein atmosphärisch-optisches Phänomen, das als kreisbogenförmiges farbiges Lichtband in einem von der Sonne beschienenen Regenschauer erscheint."
 if "lang" not in st.session_state:
-    st.session_state.lang = "en"
 if "text" not in st.session_state:
-    st.session_state.text = SAMPLE_SENTENCE_EN
 if "message" not in st.session_state:
     st.session_state.message = "READY."
@@ -62,15 +63,10 @@ def do_synth():
         status.update(label="loading the model...", state="running")
-        g2p_model = DEFAULT_G2P_MODEL_NAME_DE if lang=='de' else DEFAULT_G2P_MODEL_NAME_EN
-        st.session_state.modelcfg, st.session_state.synth = ZeroVoxTTS.load_model(ZeroVoxTTS.get_default_model(),
-                                                                                  g2p=g2p_model,
-                                                                                  lang=lang,
                                                                                   meldec_model=st.session_state['meldec'],
                                                                                   infer_device='cpu',
                                                                                   num_threads=-1,
-                                                                                  do_compile=False,
                                                                                   verbose=True)
     synth = st.session_state.synth
@@ -114,7 +110,9 @@ with tab1:
     st.checkbox("Custom voice", key='custom_voice')
-    speakerref = st.empty()
     if st.session_state.custom_voice:
@@ -136,7 +134,8 @@ with tab1:
         speakers = [s for s in ZeroVoxTTS.available_speakerrefs()]
         speakerref.selectbox("Voice", speakers, key='speakerref')
-        st.audio(ZeroVoxTTS.get_speakerref(st.session_state.speakerref, SAMPLE_RATE), sample_rate=SAMPLE_RATE)
 with tab2:
@@ -149,7 +148,7 @@ status = st.status(st.session_state.message, state="complete")
 col1, col2 = st.columns([0.8, 0.2])
 with col1:
-    text = st.text_input("Text to synthesize", key='text', on_change=do_synth)
 with col2:
     lang = st.selectbox("Language",

 import streamlit as st
 from zerovox.tts.synthesize import ZeroVoxTTS
 SAMPLE_RATE=24000 # FIXME
 #SAMPLE_SENTENCE_EN = "Welcome to the world of speech synthesis!"
 SAMPLE_SENTENCE_DE = "Der Regenbogen ist ein atmosphärisch-optisches Phänomen, das als kreisbogenförmiges farbiges Lichtband in einem von der Sonne beschienenen Regenschauer erscheint."
+DEFAULT_LANGUAGE = 'en'
 if "lang" not in st.session_state:
+    st.session_state.lang = DEFAULT_LANGUAGE
 if "text" not in st.session_state:
+    st.session_state.text = SAMPLE_SENTENCE_EN if st.session_state.lang == 'en' else SAMPLE_SENTENCE_DE
 if "message" not in st.session_state:
     st.session_state.message = "READY."
         status.update(label="loading the model...", state="running")
+        st.session_state.modelcfg, st.session_state.synth = ZeroVoxTTS.load_model(ZeroVoxTTS.get_default_model(st.session_state.lang),
                                                                                   meldec_model=st.session_state['meldec'],
                                                                                   infer_device='cpu',
                                                                                   num_threads=-1,
                                                                                   verbose=True)
     synth = st.session_state.synth
     st.checkbox("Custom voice", key='custom_voice')
+    col1, col2 = st.columns([0.6, 0.4], vertical_alignment="bottom")
+    with col1:
+        speakerref = st.empty()
     if st.session_state.custom_voice:
         speakers = [s for s in ZeroVoxTTS.available_speakerrefs()]
         speakerref.selectbox("Voice", speakers, key='speakerref')
+        with col2:
+            st.audio(ZeroVoxTTS.get_speakerref(st.session_state.speakerref, SAMPLE_RATE), sample_rate=SAMPLE_RATE)
 with tab2:
 col1, col2 = st.columns([0.8, 0.2])
 with col1:
+    text = st.text_area("Text to synthesize", key='text', on_change=do_synth, height=128)
 with col2:
     lang = st.selectbox("Language",