File size: 1,357 Bytes
3e423f6 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 |
import gc
import os
from contextlib import contextmanager
from time import time
from typing import Optional
import streamlit as st
from tortoise.api import TextToSpeech
from tortoise.utils.audio import load_voices
@contextmanager
def timeit(desc=""):
start = time()
yield
print(f"{desc} took {time() - start:.2f} seconds")
@st.cache_resource(max_entries=1)
def load_model(
model_dir,
high_vram,
kv_cache,
ar_checkpoint,
diff_checkpoint,
):
gc.collect()
return TextToSpeech(
models_dir=model_dir,
high_vram=high_vram,
kv_cache=kv_cache,
ar_checkpoint=ar_checkpoint,
diff_checkpoint=diff_checkpoint,
)
@st.cache_data
def list_voices(extra_voices_dir: Optional[str]):
voices = ["random"]
if extra_voices_dir and os.path.isdir(extra_voices_dir):
voices.extend(os.listdir(extra_voices_dir))
extra_voices_ls = [extra_voices_dir]
else:
extra_voices_ls = []
voices.extend(
[v for v in os.listdir("tortoise/voices") if v != "cond_latent_example"]
)
#
return voices, extra_voices_ls
@st.cache_resource(max_entries=1)
def load_voice_conditionings(voice, extra_voices_ls):
gc.collect()
voice_samples, conditioning_latents = load_voices(voice, extra_voices_ls)
return voice_samples, conditioning_latents
|