Spaces:
Runtime error
Runtime error
File size: 8,419 Bytes
20d05ae 925d97e 20d05ae 925d97e 106b914 0cb3834 925d97e 106b914 925d97e 106b914 925d97e eb3ba2e 925d97e 42cf67e 925d97e f98d769 925d97e 106b914 925d97e 106b914 925d97e eb3ba2e 925d97e 106b914 925d97e 20d05ae 0cb3834 106b914 f65d2ed 106b914 a87192b 106b914 a87192b 106b914 a87192b 106b914 a87192b 106b914 a87192b 106b914 a87192b 106b914 a87192b 106b914 a87192b 106b914 0cb3834 106b914 0cb3834 106b914 a87192b 0cb3834 eb3ba2e 106b914 eb3ba2e 106b914 eb3ba2e 6f1ebe2 106b914 f98d769 6f1ebe2 106b914 6f1ebe2 eb3ba2e 106b914 eb3ba2e f98d769 eb3ba2e 106b914 6f1ebe2 f98d769 6f1ebe2 f98d769 925d97e 106b914 925d97e 106b914 925d97e 106b914 925d97e 106b914 925d97e 106b914 0cb3834 106b914 0cb3834 106b914 0cb3834 106b914 0cb3834 106b914 0cb3834 106b914 0cb3834 106b914 0cb3834 eb3ba2e 9db2503 106b914 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 |
import gradio as gr
import os
from constants import VOICE_METHODS, BARK_VOICES, EDGE_VOICES
import platform
from models.model import *
from tts.conversion import COQUI_LANGUAGES
import pytube
import os
import traceback
from pydub import AudioSegment
# from audio_enhance.functions import audio_enhance
def convert_yt_to_wav(url):
if not url:
return "Please enter the video link", None
try:
print(f"Converting video {url}...")
# Download the video using pytube
video = pytube.YouTube(url)
stream = video.streams.filter(only_audio=True).first()
video_output_folder = os.path.join(f"yt_videos") # Destination folder path
audio_output_folder = 'audios'
print("Downloading video")
video_file_path = stream.download(output_path=video_output_folder)
print(video_file_path)
file_name = os.path.basename(video_file_path)
audio_file_path = os.path.join(audio_output_folder, file_name.replace('.mp4','.wav'))
# Convert mp4 to wav
print("Converting to wav")
sound = AudioSegment.from_file(video_file_path, format="mp4")
sound.export(audio_file_path, format="wav")
if os.path.exists(video_file_path):
os.remove(video_file_path)
return "Success", audio_file_path
except ConnectionResetError as cre:
return "Connection lost, please refresh or try again later.", None
except Exception as e:
return str(e), None
with gr.Blocks() as app:
gr.HTML("<h1> Simple RVC Inference - by Juuxn 💻 </h1>")
gr.HTML("<h4> The current space uses only CPU, so it's only for inference. It is recommended to duplicate the space to avoid issues with processing queues. </h4>")
gr.Markdown("Simple RVC GPU Inference on Colab: [](https://colab.research.google.com/drive/1NKqqTR04HujeBxzwe7jbYEvNi8LbxD_N?usp=sharing)")
gr.Markdown(
"[](https://huggingface.co/spaces/juuxn/SimpleRVC?duplicate=true)\n\n"
)
gr.Markdown("Collection of models you can use: RVC + AI Kits. **[RVC Community Models](https://docs.google.com/spreadsheets/d/1owfUtQuLW9ReiIwg6U9UkkDmPOTkuNHf0OKQtWu1iaI)**")
with gr.Tab("Inference"):
model_url = gr.Textbox(placeholder="https://huggingface.co/AIVER-SE/BillieEilish/resolve/main/BillieEilish.zip", label="Model URL", show_label=True)
with gr.Row():
with gr.Column():
audio_path = gr.Audio(label="Audio File", show_label=True, type="filepath")
index_rate = gr.Slider(minimum=0, maximum=1, label="Search feature ratio:", value=0.75, interactive=True)
filter_radius1 = gr.Slider(minimum=0, maximum=7, label="Filter (breathing roughness reduction)", value=3, step=1, interactive=True)
with gr.Column():
f0_method = gr.Dropdown(choices=["harvest", "pm", "crepe", "crepe-tiny", "mangio-crepe", "mangio-crepe-tiny", "rmvpe"],
value="rmvpe",
label="Algorithm", show_label=True)
vc_transform0 = gr.Slider(minimum=-12, label="Number of semitones, up an octave: 12, down an octave: -12", value=0, maximum=12, step=1)
protect0 = gr.Slider(
minimum=0, maximum=0.5, label="Protect voiceless consonants and breathing sounds. 0.5 to disable.", value=0.33,
step=0.01,
interactive=True,
)
resample_sr1 = gr.Slider(
minimum=0,
maximum=48000,
label="Resample the output audio to the final sampling rate. 0 for no resampling.",
value=0,
step=1,
interactive=True,
)
# Output
with gr.Row():
vc_output1 = gr.Textbox(label="Output")
vc_output2 = gr.Audio(label="Output Audio")
btn = gr.Button(value="Convert")
btn.click(infer, inputs=[model_url, f0_method, audio_path, index_rate, vc_transform0, protect0, resample_sr1, filter_radius1], outputs=[vc_output1, vc_output2])
with gr.TabItem("TTS"):
with gr.Row():
tts_text = gr.Textbox(
label="Text:",
placeholder="Text you want to convert to speech...",
lines=6,
)
with gr.Column():
with gr.Row():
tts_model_url = gr.Textbox(placeholder="https://huggingface.co/AIVER-SE/BillieEilish/resolve/main/BillieEilish.zip", label="RVC Model URL", show_label=True)
with gr.Row():
tts_method = gr.Dropdown(choices=VOICE_METHODS, value="Edge-tts", label="TTS Method:", visible=True)
tts_model = gr.Dropdown(choices=EDGE_VOICES, label="TTS Model:", visible=True, interactive=True)
tts_api_key = gr.Textbox(label="ElevenLabs API Key", show_label=True, placeholder="4a4afce72349680c8e8b6fdcfaf2b65a", interactive=True, visible=False)
tts_coqui_languages = gr.Radio(
label="Language",
choices=COQUI_LANGUAGES,
value="en",
visible=False
)
tts_btn = gr.Button(value="Convert")
with gr.Row():
tts_vc_output1 = gr.Textbox(label="Output")
tts_vc_output2 = gr.Audio(label="Output Audio")
tts_btn.click(fn=tts_infer, inputs=[tts_text, tts_model_url, tts_method, tts_model, tts_api_key, tts_coqui_languages], outputs=[tts_vc_output1, tts_vc_output2])
tts_msg = gr.Markdown("""**I recommend creating an Eleven Labs account and entering your API key; it's free and you have a limit of 10k characters per month.** <br/>

""", visible=False)
tts_method.change(fn=update_tts_methods_voice, inputs=[tts_method], outputs=[tts_model, tts_msg, tts_api_key, tts_coqui_languages])
with gr.TabItem("Youtube"):
gr.Markdown("## Convert YouTube video to audio")
with gr.Row():
yt_url = gr.Textbox(
label="Video URL:",
placeholder="https://www.youtube.com/watch?v=3vEiqil5d3Q"
)
yt_btn = gr.Button(value="Convert")
with gr.Row():
yt_output1 = gr.Textbox(label="Output")
yt_output2 = gr.Audio(label="Output Audio")
yt_btn.click(fn=convert_yt_to_wav, inputs=[yt_url], outputs=[yt_output1, yt_output2])
with gr.Tab("Models"):
gr.HTML("<h4>Search models</h4>")
search_name = gr.Textbox(placeholder="Billie Eilish (RVC v2 - 100 epoch)", label="Name", show_label=True)
# Output
with gr.Row():
search_output = gr.Markdown(label="Output")
btn_search_model = gr.Button(value="Search")
btn_search_model.click(fn=search_model, inputs=[search_name], outputs=[search_output])
gr.HTML("<h4>Submit your model</h4>")
post_name = gr.Textbox(placeholder="Billie Eilish (RVC v2 - 100 epoch)", label="Name", show_label=True)
post_model_url = gr.Textbox(placeholder="https://huggingface.co/AIVER-SE/BillieEilish/resolve/main/BillieEilish.zip", label="Model URL", show_label=True)
post_creator = gr.Textbox(placeholder="Discord ID or link to creator's profile", label="Creator", show_label=True)
post_version = gr.Dropdown(choices=["RVC v1", "RVC v2"], value="RVC v1", label="Version", show_label=True)
# Output
with gr.Row():
post_output = gr.Markdown(label="Output")
btn_post_model = gr.Button(value="Post")
btn_post_model.click(fn=post_model, inputs=[post_name, post_model_url, post_version, post_creator], outputs=[post_output])
app.queue(concurrency_count=200, max_size=1022).launch()
#share=True
|