Spaces:
Running
on
T4
Running
on
T4
github-actions[bot]
commited on
Commit
·
4657892
1
Parent(s):
623b3b9
Sync with https://github.com/mozilla-ai/document-to-podcast
Browse files
app.py
CHANGED
@@ -1,7 +1,6 @@
|
|
1 |
"""Streamlit app for converting documents to podcasts."""
|
2 |
|
3 |
import io
|
4 |
-
import os
|
5 |
import re
|
6 |
from pathlib import Path
|
7 |
|
@@ -23,16 +22,13 @@ from document_to_podcast.utils import stack_audio_segments
|
|
23 |
@st.cache_resource
|
24 |
def load_text_to_text_model():
|
25 |
return load_llama_cpp_model(
|
26 |
-
model_id="bartowski/Qwen2.5-
|
27 |
)
|
28 |
|
29 |
|
30 |
@st.cache_resource
|
31 |
-
def load_text_to_speech_model():
|
32 |
-
|
33 |
-
return load_tts_model("hexgrad/kLegacy/v0.19/kokoro-v0_19.pth")
|
34 |
-
else:
|
35 |
-
return load_tts_model("OuteAI/OuteTTS-0.2-500M-GGUF/OuteTTS-0.2-500M-FP16.gguf")
|
36 |
|
37 |
|
38 |
def numpy_to_wav(audio_array: np.ndarray, sample_rate: int) -> io.BytesIO:
|
@@ -115,29 +111,11 @@ if "clean_text" in st.session_state:
|
|
115 |
"[Docs for this Step](https://mozilla-ai.github.io/document-to-podcast/step-by-step-guide/#step-2-podcast-script-generation)"
|
116 |
)
|
117 |
st.divider()
|
|
|
|
|
|
|
118 |
|
119 |
text_model = load_text_to_text_model()
|
120 |
-
speech_model = load_text_to_speech_model()
|
121 |
-
|
122 |
-
if os.environ.get("HF_SPACE") == "TRUE":
|
123 |
-
tts_link = "- [hexgrad/Kokoro-82M](https://huggingface.co/hexgrad/Kokoro-82M)"
|
124 |
-
SPEAKERS = [
|
125 |
-
{
|
126 |
-
"id": 1,
|
127 |
-
"name": "Sarah",
|
128 |
-
"description": "The main host. She explains topics clearly using anecdotes and analogies, teaching in an engaging and captivating way.",
|
129 |
-
"voice_profile": "af_sarah",
|
130 |
-
},
|
131 |
-
{
|
132 |
-
"id": 2,
|
133 |
-
"name": "Michael",
|
134 |
-
"description": "The co-host. He keeps the conversation on track, asks curious follow-up questions, and reacts with excitement or confusion, often using interjections like hmm or umm.",
|
135 |
-
"voice_profile": "am_michael",
|
136 |
-
},
|
137 |
-
]
|
138 |
-
else:
|
139 |
-
tts_link = "- [OuteAI/OuteTTS-0.2-500M](https://huggingface.co/OuteAI/OuteTTS-0.2-500M-GGUF)"
|
140 |
-
SPEAKERS = DEFAULT_SPEAKERS
|
141 |
|
142 |
st.markdown(
|
143 |
"For this demo, we are using the following models: \n"
|
@@ -180,6 +158,15 @@ if "clean_text" in st.session_state:
|
|
180 |
speaker.get(x, None) for x in ["name", "description", "voice_profile"]
|
181 |
)
|
182 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
183 |
system_prompt = DEFAULT_PROMPT.replace("{SPEAKERS}", speakers_str)
|
184 |
with st.spinner("Generating Podcast..."):
|
185 |
text = ""
|
|
|
1 |
"""Streamlit app for converting documents to podcasts."""
|
2 |
|
3 |
import io
|
|
|
4 |
import re
|
5 |
from pathlib import Path
|
6 |
|
|
|
22 |
@st.cache_resource
|
23 |
def load_text_to_text_model():
|
24 |
return load_llama_cpp_model(
|
25 |
+
model_id="bartowski/Qwen2.5-7B-Instruct-GGUF/Qwen2.5-7B-Instruct-Q8_0.gguf"
|
26 |
)
|
27 |
|
28 |
|
29 |
@st.cache_resource
|
30 |
+
def load_text_to_speech_model(lang_code: str):
|
31 |
+
return load_tts_model("hexgrad/Kokoro-82M", **{"lang_code": lang_code})
|
|
|
|
|
|
|
32 |
|
33 |
|
34 |
def numpy_to_wav(audio_array: np.ndarray, sample_rate: int) -> io.BytesIO:
|
|
|
111 |
"[Docs for this Step](https://mozilla-ai.github.io/document-to-podcast/step-by-step-guide/#step-2-podcast-script-generation)"
|
112 |
)
|
113 |
st.divider()
|
114 |
+
tts_link = "- [hexgrad/Kokoro-82M](https://github.com/hexgrad/kokoro)"
|
115 |
+
|
116 |
+
SPEAKERS = DEFAULT_SPEAKERS
|
117 |
|
118 |
text_model = load_text_to_text_model()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
119 |
|
120 |
st.markdown(
|
121 |
"For this demo, we are using the following models: \n"
|
|
|
158 |
speaker.get(x, None) for x in ["name", "description", "voice_profile"]
|
159 |
)
|
160 |
)
|
161 |
+
if speakers[0]["voice_profile"][0] != speakers[1]["voice_profile"][0]:
|
162 |
+
raise ValueError(
|
163 |
+
"Both Kokoro speakers need to have the same language code. "
|
164 |
+
"More info here https://huggingface.co/hexgrad/Kokoro-82M/blob/main/VOICES.md"
|
165 |
+
)
|
166 |
+
# Get which language is used for generation from the first character of the Kokoro voice profile
|
167 |
+
language_code = speakers[0]["voice_profile"][0]
|
168 |
+
speech_model = load_text_to_speech_model(lang_code=language_code)
|
169 |
+
|
170 |
system_prompt = DEFAULT_PROMPT.replace("{SPEAKERS}", speakers_str)
|
171 |
with st.spinner("Generating Podcast..."):
|
172 |
text = ""
|