Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
import os | |
import sys | |
import requests | |
import json | |
from huggingface_hub import HfApi | |
# start xVASynth service (no HTTP) | |
import resources.app.no_server as xvaserver | |
from gr_client import BlocksDemo | |
# model | |
hf_model_name = "Pendrokar/xvapitch_nvidia" | |
model_repo = HfApi() | |
commits = model_repo.list_repo_commits(repo_id=hf_model_name) | |
latest_commit_sha = commits[0].commit_id | |
hf_cache_models_path = f'/home/user/.cache/huggingface/hub/models--Pendrokar--xvapitch_nvidia/snapshots/{latest_commit_sha}/' | |
print(hf_cache_models_path) | |
commits = model_repo.list_repo_commits(repo_id='Pendrokar/xvasynth_lojban') | |
latest_commit_sha = commits[0].commit_id | |
hf_cache_lojban_models_path = f'/home/user/.cache/huggingface/hub/models--Pendrokar--xvasynth_lojban/snapshots/{latest_commit_sha}/' | |
print(hf_cache_lojban_models_path) | |
models_path = hf_cache_models_path | |
current_voice_model = None | |
base_speaker_emb = '' | |
def load_model(voice_model_name): | |
if voice_model_name == 'x_selpahi': | |
# Lojban | |
model_path = hf_cache_lojban_models_path + voice_model_name | |
model_type = 'FastPitch1.1' | |
else: | |
model_path = models_path + voice_model_name | |
model_type = 'xVAPitch' | |
language = 'en' # seems to have no effect if generated text is from a different language | |
data = { | |
'outputs': None, | |
'version': '3.0', | |
'model': model_path, | |
'modelType': model_type, | |
'base_lang': language, | |
'pluginsContext': '{}', | |
} | |
embs = base_speaker_emb | |
print('Loading voice model...') | |
try: | |
json_data = xvaserver.loadModel(data) | |
current_voice_model = voice_model_name | |
with open(model_path + '.json', 'r', encoding='utf-8') as f: | |
voice_model_json = json.load(f) | |
embs = voice_model_json['games'][0]['base_speaker_emb'] | |
except requests.exceptions.RequestException as err: | |
print(f'FAILED to load voice model: {err}') | |
return embs | |
class LocalBlocksDemo(BlocksDemo): | |
def predict( | |
self, | |
input_text, | |
voice, | |
lang, | |
pacing, | |
pitch, | |
energy, | |
anger, | |
happy, | |
sad, | |
surprise, | |
use_deepmoji | |
): | |
# grab only the first 1000 characters | |
input_text = input_text[:1000] | |
# load voice model if not the current model | |
if (current_voice_model != voice): | |
base_speaker_emb = load_model(voice) | |
model_type = 'xVAPitch' | |
pace = pacing if pacing else 1.0 | |
save_path = '/tmp/xvapitch_audio_sample.wav' | |
language = lang | |
use_sr = 0 | |
use_cleanup = 0 | |
pluginsContext = {} | |
pluginsContext["mantella_settings"] = { | |
"emAngry": (anger if anger > 0 else 0), | |
"emHappy": (happy if happy > 0 else 0), | |
"emSad": (sad if sad > 0 else 0), | |
"emSurprise": (surprise if surprise > 0 else 0), | |
"run_model": use_deepmoji | |
} | |
data = { | |
'pluginsContext': json.dumps(pluginsContext), | |
'modelType': model_type, | |
# pad with whitespaces as a workaround to avoid cutoffs | |
'sequence': input_text.center(len(input_text) + 2, ' '), | |
'pace': pace, | |
'outfile': save_path, | |
'vocoder': 'n/a', | |
'base_lang': language, | |
'base_emb': base_speaker_emb, | |
'useSR': use_sr, | |
'useCleanup': use_cleanup, | |
} | |
print('Synthesizing...') | |
try: | |
json_data = xvaserver.synthesize(data) | |
# response = requests.post('http://0.0.0.0:8008/synthesize', json=data, timeout=60) | |
# response.raise_for_status() # If the response contains an HTTP error status code, raise an exception | |
# json_data = json.loads(response.text) | |
except requests.exceptions.RequestException as err: | |
print('FAILED to synthesize: {err}') | |
save_path = '' | |
response = {'text': '{"message": "Failed"}'} | |
json_data = { | |
'arpabet': ['Failed'], | |
'durations': [0], | |
'em_anger': anger, | |
'em_happy': happy, | |
'em_sad': sad, | |
'em_surprise': surprise, | |
} | |
# print('server.log contents:') | |
# with open('resources/app/server.log', 'r') as f: | |
# print(f.read()) | |
arpabet_html = '<h6>ARPAbet & Phoneme lengths</h6>' | |
arpabet_symbols = json_data['arpabet'].split('|') | |
utter_time = 0 | |
for symb_i in range(len(json_data['durations'])): | |
# skip PAD symbol | |
if (arpabet_symbols[symb_i] == '<PAD>'): | |
continue | |
length = float(json_data['durations'][symb_i]) | |
arpa_length = str(round(length/2, 1)) | |
arpabet_html += '<strong\ | |
class="arpabet"\ | |
style="padding: 0 '\ | |
+ str(arpa_length)\ | |
+'em"'\ | |
+f" title=\"{utter_time} + {length}\""\ | |
+'>'\ | |
+ arpabet_symbols[symb_i]\ | |
+ '</strong> ' | |
utter_time += round(length, 1) | |
return [ | |
save_path, | |
arpabet_html, | |
round(json_data['em_angry'][0], 2), | |
round(json_data['em_happy'][0], 2), | |
round(json_data['em_sad'][0], 2), | |
round(json_data['em_surprise'][0], 2), | |
json_data | |
] | |
if __name__ == "__main__": | |
print('running custom Gradio interface') | |
demo = LocalBlocksDemo(models_path, lojban_models_path) | |
demo.block.launch() | |