Spaces:

atsushieee
/

sovits-test

Running

App Files Files Community

sovits-test / main.py

atsushieee

Update main.py

440ce22 3 months ago

raw

history blame

4.11 kB

	import gradio as gr
	import os
	import requests
	from tqdm import tqdm
	import plotly.express as px
	import pandas as pd


	from svc_inference import main
	from whisper.inference import check_and_download_model

	# データの作成
	data = {
	'id': list(range(1, 57)), # 1から56までの数字
	'x': [
	28, 25, 5, 12, 8, 2, 0, -20, -15, -12, -20, 8, -30, 25, 0, 0, 2, -25,
	-25, 20, 15, -2, 0, 15, -30, 15, 8, 28, -10, -22, 20, 20, 8, 20, 0,
	0, -8, -10, -32, 0, 0, -8, 2, -25, -32, -20, -18, -5, 15, -22, -25,
	-28, -30, 10, 25, 28
	],
	'y': [
	0, -5, -15, -20, -18, -3, 8, 8, 12, 10, 10, -20, 6, -3, 12, -15, 12,
	17, 10, -8, -15, -22, 8, 15, 10, -15, -18, -10, 8, 5, -10, -8, -25,
	-5, -12, 12, 15, 6, 17, -12, -8, -8, 15, 17, 25, 4, 4, 0, 0, -20,
	12, 12, 15, -19, 0, 0
	]
	}

	df = pd.DataFrame(data)

	def create_plot():
	fig = px.scatter(df, x='x', y='y', text='id',
	title='Voice Timbre Feature Mapping')

	# マーカーのスタイルを設定（紫系の色に設定）
	fig.update_traces(
	marker=dict(
	size=10,
	color='#663399', # 紫色
	line=dict(color='#4B0082', width=1) # より暗い紫の境界線
	),
	textposition='top center'
	)

	# レイアウトの設定
	fig.update_layout(
	height=600,
	width=800,
	clickmode='event+select',
	plot_bgcolor='#eeeeee',
	paper_bgcolor='white',
	xaxis=dict(
	showgrid=True,
	zeroline=True,
	range=[-35, 35] # x軸の範囲を設定
	),
	yaxis=dict(
	showgrid=True,
	zeroline=True,
	range=[-30, 30] # y軸の範囲を設定
	)
	)

	return fig

	def run_main(audio_file, shift, speaker_id):
	# 固定の引数を設定
	class Args:
	pass

	args = Args()
	args.config = "configs/base.yaml"
	args.model = "./vits_pretrain/sovits5.0.pretrain.pth"
	speaker_str = f"{speaker_id:04d}"
	args.spk = f"./configs/singers/singer{speaker_str}.npy"
	args.wave = audio_file
	print(audio_file)
	args.shift = shift

	# オプショナルパラメータのデフォルト値設定
	args.ppg = None
	args.vec = None
	args.pit = None
	args.enable_retrieval = False
	args.retrieval_index_prefix = ""
	args.retrieval_ratio = 0.5
	args.n_retrieval_vectors = 3
	args.hubert_index_path = None
	args.whisper_index_path = None
	args.debug = False

	try:
	main(args)
	return "svc_out.wav" # 音声ファイルのパスを返す
	except Exception as e:
	return None

	# Gradio インターフェースの作成
	with gr.Blocks() as demo:
	gr.Markdown("# SVC (Singing Voice Conversion) System")

	with gr.Row():
	with gr.Column(scale=1.15):
	plot = gr.Plot(value=create_plot())

	with gr.Column(scale=1):
	# 入力音声のアップロード
	input_audio = gr.Audio(
	label="Upload the audio you want to convert.",
	type="filepath" # ファイルパスとして取得
	)
	# Speaker ID の選択
	speaker_id = gr.Number(
	label="Speaker ID (1-56)",
	value=1,
	minimum=1,
	maximum=56,
	step=1
	)
	# Pitch シフトのスライダー
	shift = gr.Slider(
	minimum=-12,
	maximum=12,
	value=0,
	step=1,
	label="Pitch Shift (from -12 to +12) "
	)
	# ボタン
	run_btn = gr.Button(value="Convert Singing Voice", variant="primary", size="lg")

	# 出力表示用
	output_audio = gr.Audio(label="Audio After Conversion")


	run_btn.click(
	fn=run_main,
	inputs=[input_audio, shift, speaker_id],
	outputs=[output_audio]
	)

	# アプリケーションの起動
	if __name__ == "__main__":
	demo.launch()