import os import shutil import zipfile import gradio as gr os.system('chown -R ./.eggs') os.chdir('ParallelWaveGAN/') os.system('pip install --user -e .') os.chdir('..') os.system('gdown https://drive.google.com/uc?id=1Flw6Z0K2QdRrTn5F-gVt6HdR9TRPiaKy') shutil.move('VQMIVC-pretrained models/checkpoints/', '.') shutil.move('VQMIVC-pretrained models/vocoder/', '.') with zipfile.ZipFile('/content/VQMIVC/VQMIVC-pretrained models.zip', 'r') as zip_ref: zip_ref.extractall('/content/VQMIVC/') def inference(audio1, audio2): os.system("python convert_example.py -s "+ audio1.name+" -r "+ audio2.name+ " -c converted -m 'checkpoints/useCSMITrue_useCPMITrue_usePSMITrue_useAmpTrue/VQMIVC-model.ckpt-500.pt'") out = os.path.basename(str(audio1)).split(".")[0] + "_converted_gen.wav" return out inputs = [gr.inputs.Audio(label="Source Audio", type=file),gr.inputs.Audio(label="Reference Audio", type=file)] outputs = gr.outputs.Audio(label="Output Audio", type=file) title = "VITS" description = "demo for VITS: Conditional Variational Autoencoder with Adversarial Learning for End-to-End Text-to-Speech. To use it, simply add your text, or click one of the examples to load them. Read more at the links below." article = "

Conditional Variational Autoencoder with Adversarial Learning for End-to-End Text-to-Speech | Github Repo

" examples = [ ["We propose VITS, Conditional Variational Autoencoder with Adversarial Learning for End-to-End Text-to-Speech."], ["Our method adopts variational inference augmented with normalizing flows and an adversarial training process, which improves the expressive power of generative modeling."] ] gr.Interface(inference, inputs, outputs, title=title, description=description, article=article, examples=examples).launch()