import streamlit as st import torch from models import build_model from kokoro import generate from scipy.io.wavfile import write as write_wav import os import re import numpy as np from phonemizer import phonemize from phonemizer.backend.espeak.espeak import EspeakBackend import subprocess @st.cache_resource def load_model(): device = 'cuda' if torch.cuda.is_available() else 'cpu' model = build_model('kokoro-v0_19.pth', device) return model, device @st.cache_data def load_voicepack(voice_name, device): try: voicepack = torch.load(f'voices/{voice_name}.pt', weights_only=True).to(device) return voicepack except FileNotFoundError: st.error(f"Errore: La voce '{voice_name}' non è stata trovata.") return None def get_espeak_path(): """Trova il percorso di espeak.""" try: result = subprocess.run(['which', 'espeak'], capture_output=True, text=True, check=True) return result.stdout.strip() except subprocess.CalledProcessError: st.error("Errore: espeak non trovato. Assicurati che espeak sia installato e nel PATH.") return None def count_tokens(text, lang='en-us'): """Conta i token di un testo usando phonemizer.""" espeak_path = get_espeak_path() if not espeak_path: return 0 os.environ['PHONEMIZER_ESPEAK_PATH'] = espeak_path out_ps = phonemize(text, backend='espeak', language=lang, preserve_punctuation=True, with_stress=True) ps = ' '.join(out_ps).replace(' ', ' ').strip() ps = [x for x in ps.split(' ') if x != ''] return len(ps) def split_text(text, max_tokens=500, lang='en-us'): """Suddivide il testo in segmenti più piccoli in base al numero di token.""" sentences = re.split(r'(?