File size: 4,105 Bytes
9791162
71dda67
 
 
ed37553
 
 
71dda67
16f1b93
ad51a72
16f1b93
ed37553
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
11646d7
 
440ce22
11646d7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ae3756f
11646d7
 
 
 
 
 
 
 
ae3756f
11646d7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ae3756f
11646d7
 
ae3756f
11646d7
 
ae3756f
11646d7
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
import gradio as gr
import os
import requests
from tqdm import tqdm
import plotly.express as px
import pandas as pd


from svc_inference import main
from whisper.inference import check_and_download_model

# データの作成
data = {
    'id': list(range(1, 57)),  # 1から56までの数字
    'x': [
        28, 25, 5, 12, 8, 2, 0, -20, -15, -12, -20, 8, -30, 25, 0, 0, 2, -25,
        -25, 20, 15, -2, 0, 15, -30, 15, 8, 28, -10, -22, 20, 20, 8, 20, 0,
        0, -8, -10, -32, 0, 0, -8, 2, -25, -32, -20, -18, -5, 15, -22, -25,
        -28, -30, 10, 25, 28
    ],
    'y': [
        0, -5, -15, -20, -18, -3, 8, 8, 12, 10, 10, -20, 6, -3, 12, -15, 12,
        17, 10, -8, -15, -22, 8, 15, 10, -15, -18, -10, 8, 5, -10, -8, -25,
        -5, -12, 12, 15, 6, 17, -12, -8, -8, 15, 17, 25, 4, 4, 0, 0, -20,
        12, 12, 15, -19, 0, 0
    ]
}

df = pd.DataFrame(data)

def create_plot():
    fig = px.scatter(df, x='x', y='y', text='id',
                    title='Voice Timbre Feature Mapping')
    
    # マーカーのスタイルを設定(紫系の色に設定)
    fig.update_traces(
        marker=dict(
            size=10,
            color='#663399',  # 紫色
            line=dict(color='#4B0082', width=1)  # より暗い紫の境界線
        ),
        textposition='top center'
    )
    
    # レイアウトの設定
    fig.update_layout(
        height=600,
        width=800,
        clickmode='event+select',
        plot_bgcolor='#eeeeee',
        paper_bgcolor='white',
        xaxis=dict(
            showgrid=True,
            zeroline=True,
            range=[-35, 35]  # x軸の範囲を設定
        ),
        yaxis=dict(
            showgrid=True,
            zeroline=True,
            range=[-30, 30]  # y軸の範囲を設定
        )
    )
    
    return fig

def run_main(audio_file, shift, speaker_id):
    # 固定の引数を設定
    class Args:
        pass
    
    args = Args()
    args.config = "configs/base.yaml"
    args.model = "./vits_pretrain/sovits5.0.pretrain.pth"
    speaker_str = f"{speaker_id:04d}"
    args.spk = f"./configs/singers/singer{speaker_str}.npy"
    args.wave = audio_file
    print(audio_file)
    args.shift = shift
    
    # オプショナルパラメータのデフォルト値設定
    args.ppg = None
    args.vec = None
    args.pit = None
    args.enable_retrieval = False
    args.retrieval_index_prefix = ""
    args.retrieval_ratio = 0.5
    args.n_retrieval_vectors = 3
    args.hubert_index_path = None
    args.whisper_index_path = None
    args.debug = False

    try:
        main(args)
        return "svc_out.wav"  # 音声ファイルのパスを返す
    except Exception as e:
        return None

# Gradio インターフェースの作成
with gr.Blocks() as demo:
    gr.Markdown("# SVC (Singing Voice Conversion) System")

    with gr.Row():
        with gr.Column(scale=1.15):
            plot = gr.Plot(value=create_plot())

        with gr.Column(scale=1):
            # 入力音声のアップロード
            input_audio = gr.Audio(
                label="Upload the audio you want to convert.",
                type="filepath"  # ファイルパスとして取得
            )
            # Speaker ID の選択
            speaker_id = gr.Number(
                label="Speaker ID (1-56)",
                value=1,
                minimum=1,
                maximum=56,
                step=1
            )
            # Pitch シフトのスライダー
            shift = gr.Slider(
                minimum=-12,
                maximum=12,
                value=0,
                step=1,
                label="Pitch Shift (from -12 to +12) "
            )
            # ボタン
            run_btn = gr.Button(value="Convert Singing Voice", variant="primary", size="lg")

    # 出力表示用
    output_audio = gr.Audio(label="Audio After Conversion")
    
    
    run_btn.click(
        fn=run_main,
        inputs=[input_audio, shift, speaker_id],
        outputs=[output_audio]
    )

# アプリケーションの起動
if __name__ == "__main__":
    demo.launch()