Spaces:
Running
Running
File size: 4,105 Bytes
9791162 71dda67 ed37553 71dda67 16f1b93 ad51a72 16f1b93 ed37553 11646d7 440ce22 11646d7 ae3756f 11646d7 ae3756f 11646d7 ae3756f 11646d7 ae3756f 11646d7 ae3756f 11646d7 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 |
import gradio as gr
import os
import requests
from tqdm import tqdm
import plotly.express as px
import pandas as pd
from svc_inference import main
from whisper.inference import check_and_download_model
# データの作成
data = {
'id': list(range(1, 57)), # 1から56までの数字
'x': [
28, 25, 5, 12, 8, 2, 0, -20, -15, -12, -20, 8, -30, 25, 0, 0, 2, -25,
-25, 20, 15, -2, 0, 15, -30, 15, 8, 28, -10, -22, 20, 20, 8, 20, 0,
0, -8, -10, -32, 0, 0, -8, 2, -25, -32, -20, -18, -5, 15, -22, -25,
-28, -30, 10, 25, 28
],
'y': [
0, -5, -15, -20, -18, -3, 8, 8, 12, 10, 10, -20, 6, -3, 12, -15, 12,
17, 10, -8, -15, -22, 8, 15, 10, -15, -18, -10, 8, 5, -10, -8, -25,
-5, -12, 12, 15, 6, 17, -12, -8, -8, 15, 17, 25, 4, 4, 0, 0, -20,
12, 12, 15, -19, 0, 0
]
}
df = pd.DataFrame(data)
def create_plot():
fig = px.scatter(df, x='x', y='y', text='id',
title='Voice Timbre Feature Mapping')
# マーカーのスタイルを設定(紫系の色に設定)
fig.update_traces(
marker=dict(
size=10,
color='#663399', # 紫色
line=dict(color='#4B0082', width=1) # より暗い紫の境界線
),
textposition='top center'
)
# レイアウトの設定
fig.update_layout(
height=600,
width=800,
clickmode='event+select',
plot_bgcolor='#eeeeee',
paper_bgcolor='white',
xaxis=dict(
showgrid=True,
zeroline=True,
range=[-35, 35] # x軸の範囲を設定
),
yaxis=dict(
showgrid=True,
zeroline=True,
range=[-30, 30] # y軸の範囲を設定
)
)
return fig
def run_main(audio_file, shift, speaker_id):
# 固定の引数を設定
class Args:
pass
args = Args()
args.config = "configs/base.yaml"
args.model = "./vits_pretrain/sovits5.0.pretrain.pth"
speaker_str = f"{speaker_id:04d}"
args.spk = f"./configs/singers/singer{speaker_str}.npy"
args.wave = audio_file
print(audio_file)
args.shift = shift
# オプショナルパラメータのデフォルト値設定
args.ppg = None
args.vec = None
args.pit = None
args.enable_retrieval = False
args.retrieval_index_prefix = ""
args.retrieval_ratio = 0.5
args.n_retrieval_vectors = 3
args.hubert_index_path = None
args.whisper_index_path = None
args.debug = False
try:
main(args)
return "svc_out.wav" # 音声ファイルのパスを返す
except Exception as e:
return None
# Gradio インターフェースの作成
with gr.Blocks() as demo:
gr.Markdown("# SVC (Singing Voice Conversion) System")
with gr.Row():
with gr.Column(scale=1.15):
plot = gr.Plot(value=create_plot())
with gr.Column(scale=1):
# 入力音声のアップロード
input_audio = gr.Audio(
label="Upload the audio you want to convert.",
type="filepath" # ファイルパスとして取得
)
# Speaker ID の選択
speaker_id = gr.Number(
label="Speaker ID (1-56)",
value=1,
minimum=1,
maximum=56,
step=1
)
# Pitch シフトのスライダー
shift = gr.Slider(
minimum=-12,
maximum=12,
value=0,
step=1,
label="Pitch Shift (from -12 to +12) "
)
# ボタン
run_btn = gr.Button(value="Convert Singing Voice", variant="primary", size="lg")
# 出力表示用
output_audio = gr.Audio(label="Audio After Conversion")
run_btn.click(
fn=run_main,
inputs=[input_audio, shift, speaker_id],
outputs=[output_audio]
)
# アプリケーションの起動
if __name__ == "__main__":
demo.launch()
|