File size: 66,153 Bytes
51a61da
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
import multiprocessing
import os
import re
import torch
import glob
import gradio as gr
import librosa
import numpy as np
import soundfile as sf
from inference.infer_tool import Svc
import logging
import json
import yaml
import time
import subprocess
import shutil
import utils
import datetime
import traceback
from utils import mix_model
from onnxexport.model_onnx import SynthesizerTrn
from itertools import chain
from compress_model import removeOptimizer
from auto_slicer import AutoSlicer

logging.getLogger('numba').setLevel(logging.WARNING)
logging.getLogger('markdown_it').setLevel(logging.WARNING)
logging.getLogger('urllib3').setLevel(logging.WARNING)
logging.getLogger('matplotlib').setLevel(logging.WARNING)

workdir = "logs/44k"
diff_workdir = "logs/44k/diffusion"
config_dir = "configs/"
raw_path = "dataset_raw"
raw_wavs_path = "raw"
models_backup_path = 'models_backup'
root_dir = "checkpoints"
debug = False
sovits_params = {}
diff_params = {}

loaded = None

def debug_change():
    global debug
    debug = debug_button.value

def get_default_settings():
    global sovits_params, diff_params
    yaml_path = "settings.yaml"
    with open(yaml_path, 'r') as f:
        default_settings = yaml.safe_load(f)
    sovits_params = default_settings['sovits_params']
    diff_params = default_settings['diff_params']
    return sovits_params, diff_params

def save_default_settings(log_interval,eval_interval,keep_ckpts,batch_size,learning_rate,fp16_run,all_in_mem,num_workers,cache_all_data,cache_device,amp_dtype,diff_batch_size,diff_lr,diff_interval_log,diff_interval_val,diff_force_save):
    yaml_path = "settings.yaml"
    with open(yaml_path, 'r') as f:
        default_settings = yaml.safe_load(f)
    default_settings['sovits_params']['log_interval'] = int(log_interval)
    default_settings['sovits_params']['eval_interval'] = int(eval_interval)
    default_settings['sovits_params']['keep_ckpts'] = int(keep_ckpts)
    default_settings['sovits_params']['batch_size'] = int(batch_size)
    default_settings['sovits_params']['learning_rate'] = float(learning_rate)
    default_settings['sovits_params']['fp16_run'] = fp16_run
    default_settings['sovits_params']['all_in_mem'] = all_in_mem
    default_settings['diff_params']['num_workers'] = int(num_workers)
    default_settings['diff_params']['cache_all_data'] = cache_all_data
    default_settings['diff_params']['cache_device'] = str(cache_device)
    default_settings['diff_params']['amp_dtype'] = str(amp_dtype)
    default_settings['diff_params']['diff_batch_size'] = int(diff_batch_size)
    default_settings['diff_params']['diff_lr'] = float(diff_lr)
    default_settings['diff_params']['diff_interval_log'] = int(diff_interval_log)
    default_settings['diff_params']['diff_interval_val'] = int(diff_interval_val)
    default_settings['diff_params']['diff_force_save'] = int(diff_force_save)
    with open(yaml_path, 'w') as y:
        yaml.safe_dump(default_settings, y, default_flow_style=False, sort_keys=False)
        return "成功保存默认配置"

def get_model_info(choice_ckpt):
    pthfile = os.path.join(workdir, choice_ckpt)
    net = torch.load(pthfile, map_location=torch.device('cpu')) #cpu load
    spk_emb = net["model"].get("emb_g.weight")
    if spk_emb is None:
        return "所选模型缺少emb_g.weight,你可能选择了一个底模"
    _dim, _layer = spk_emb.size()
    model_type = {
        768: "Vec768-Layer12",
        256: "Vec256-Layer9 / HubertSoft",
        1024: "Whisper-PPG"
    }
    return model_type.get(_layer, "不受支持的模型")
    
def load_json_encoder(config_choice):
    config_file = os.path.join(config_dir + config_choice)
    with open(config_file, 'r') as f:
        config = json.load(f)
    try:
        config_encoder = str(config["model"]["speech_encoder"])
        return config_encoder
    except Exception as e:
        if "speech_encoder" in str(e):
            return "你的配置文件似乎是未作兼容的旧版,请根据文档指示对你的配置文件进行修改"
        else:
            return f"出错了: {e}"
        
def load_model_func(ckpt_name,cluster_name,config_name,enhance,diff_model_name,diff_config_name,only_diffusion,encoder,using_device):
    global model
    config_path = os.path.join(config_dir, config_name)
    diff_config_path = os.path.join(config_dir, diff_config_name) if diff_config_name != "no_diff_config" else "configs/diffusion.yaml"
    with open(config_path, 'r') as f:
        config = json.load(f)
    spk_dict = config["spk"]
    spk_name = config.get('spk', None)
    spk_choice = next(iter(spk_name)) if spk_name else "未检测到音色"
    ckpt_path = os.path.join(workdir, ckpt_name)
    _, _suffix = os.path.splitext(cluster_name)
    fr = True if _suffix == ".pkl" else False #如果是pkl后缀就启用特征检索
    cluster_path = os.path.join(workdir, cluster_name)
    diff_model_path = os.path.join(diff_workdir, diff_model_name)
    shallow_diffusion = True if diff_model_name != "no_diff" else False
    use_spk_mix = False
    device = None if using_device == "Auto" else using_device
    model = Svc(ckpt_path,
                    config_path,
                    device,
                    cluster_path,
                    enhance,
                    diff_model_path,
                    diff_config_path,
                    shallow_diffusion,
                    only_diffusion,
                    use_spk_mix,
                    fr)
    spk_list = list(spk_dict.keys())
    clip = 25 if encoder == "Whisper-PPG" else 0 #Whisper必须强制切片25秒
    device_name = torch.cuda.get_device_properties(model.dev).name if "cuda" in str(model.dev) else str(model.dev)
    index_or_kmeans = "特征索引" if fr is True else "聚类模型"
    clu_load = "未加载" if cluster_name == "no_clu" else cluster_name
    diff_load = "未加载" if diff_model_name == "no_diff" else diff_model_name
    output_msg = f"模型被成功加载到了{device_name}上\n{index_or_kmeans}{clu_load}\n扩散模型:{diff_load}"
    return output_msg, gr.Dropdown.update(choices=spk_list, value=spk_choice), clip

def Newload_model_func(ckpt_name,cluster_name,config_name2,enhance2,diff_model_name2,diff_config_name2,only_diffusion2,encoder2,using_device2):
    global model, loaded
    config_name = config_name2.value
    enhance = enhance2.value
    diff_model_name = diff_model_name2.value
    diff_config_name = (diff_config_name2).value
    only_diffusion = (only_diffusion2).value
    encoder = (encoder2).value
    using_device = (using_device2).value
    config_path = os.path.join(config_dir, config_name)
    diff_config_path = os.path.join(config_dir, diff_config_name) if diff_config_name != "no_diff_config" else "configs/diffusion.yaml"
    with open(config_path, 'r') as f:
        config = json.load(f)
    spk_dict = config["spk"]
    spk_name = config.get('spk', None)
    spk_choice = next(iter(spk_name)) if spk_name else "未检测到音色"
    ckpt_path = os.path.join(workdir, ckpt_name)
    _, _suffix = os.path.splitext(cluster_name)
    fr = True if _suffix == ".pkl" else False #如果是pkl后缀就启用特征检索
    cluster_path = os.path.join(workdir, cluster_name)
    diff_model_path = os.path.join(diff_workdir, diff_model_name)
    shallow_diffusion = True if diff_model_name != "no_diff" else False
    use_spk_mix = False
    device = None if using_device == "Auto" else using_device
    model = Svc(ckpt_path,
                    config_path,
                    device,
                    cluster_path,
                    enhance,
                    diff_model_path,
                    diff_config_path,
                    shallow_diffusion,
                    only_diffusion,
                    use_spk_mix,
                    fr)
    spk_list = list(spk_dict.keys())
    clip = 25 if encoder == "Whisper-PPG" else 0 #Whisper必须强制切片25秒
    device_name = torch.cuda.get_device_properties(model.dev).name if "cuda" in str(model.dev) else str(model.dev)
    index_or_kmeans = "特征索引" if fr is True else "聚类模型"
    clu_load = "未加载" if cluster_name == "no_clu" else cluster_name
    diff_load = "未加载" if diff_model_name == "no_diff" else diff_model_name
    loaded = cluster_name
    #output_msg = f"模型被成功加载到了{device_name}上\n{index_or_kmeans}:{clu_load}\n扩散模型:{diff_load}"
    #return output_msg, gr.Dropdown.update(choices=spk_list, value=spk_choice), clip

def get_file_options(directory, extension):
    return [file for file in os.listdir(directory) if file.endswith(extension)]

def load_options():
    ckpt_list = [file for file in get_file_options(workdir, ".pth") if not file.startswith("D_")]
    config_list = get_file_options(config_dir, ".json")
    cluster_list = ["no_clu"] + get_file_options(workdir, ".pt") + get_file_options(workdir, ".pkl") # 聚类和特征检索模型
    diff_list = ["no_diff"] + get_file_options(diff_workdir, ".pt")
    diff_config_list = get_file_options(config_dir, ".yaml")
    return ckpt_list, config_list, cluster_list, diff_list, diff_config_list

def refresh_options():
    ckpt_list, config_list, cluster_list, diff_list, diff_config_list = load_options()
    return (
        choice_ckpt.update(choices=ckpt_list),
        config_choice.update(choices=config_list),
        cluster_choice.update(choices=cluster_list),
        diff_choice.update(choices=diff_list),
        diff_config_choice.update(choices=diff_config_list)
    )

def vc_infer(sid, input_audio, input_audio_path, vc_transform, auto_f0, cluster_ratio, slice_db, noise_scale, pad_seconds, cl_num, lg_num, lgr_num, f0_predictor, enhancer_adaptive_key, cr_threshold, k_step, use_spk_mix, second_encoding, loudness_envelope_adjustment):
    if np.issubdtype(input_audio.dtype, np.integer):
        input_audio = (input_audio / np.iinfo(input_audio.dtype).max).astype(np.float32)
    if len(input_audio.shape) > 1:
        input_audio = librosa.to_mono(input_audio.transpose(1, 0))
    _audio = model.slice_inference(
        input_audio_path,
        sid,
        vc_transform,
        slice_db,
        cluster_ratio,
        auto_f0,
        noise_scale,
        pad_seconds,
        cl_num,
        lg_num,
        lgr_num,
        f0_predictor,
        enhancer_adaptive_key,
        cr_threshold,
        k_step,
        use_spk_mix,
        second_encoding,
        loudness_envelope_adjustment
    )  
    model.clear_empty()
    timestamp = str(int(time.time()))
    if not os.path.exists("results"):
        os.makedirs("results")
    output_file_name = os.path.splitext(os.path.basename(input_audio_path))[0] + "_" + sid + "_" + timestamp + ".wav"
    output_file_path = os.path.join("results", output_file_name)
    sf.write(output_file_path, _audio, model.target_sample, format="wav")
    return output_file_path

def vc_fn(sid, input_audio, vc_transform, auto_f0, cluster_ratio, slice_db, noise_scale, pad_seconds, cl_num, lg_num, lgr_num, f0_predictor, enhancer_adaptive_key, cr_threshold, k_step, use_spk_mix, second_encoding, loudness_envelope_adjustment):
    global model
    try:
        if input_audio is None:
            return "You need to upload an audio", None
        if model is None:
            return "You need to upload an model", None
        sampling_rate, audio = input_audio
        temp_path = "temp.wav"
        sf.write(temp_path, audio, sampling_rate, format="wav")
        output_file_path = vc_infer(sid, audio, temp_path, vc_transform, auto_f0, cluster_ratio, slice_db, noise_scale, pad_seconds, cl_num, lg_num, lgr_num, f0_predictor, enhancer_adaptive_key, cr_threshold, k_step, use_spk_mix, second_encoding, loudness_envelope_adjustment)
        os.remove(temp_path)
        return "Success", output_file_path
    except Exception as e:
        if debug: traceback.print_exc()
        raise gr.Error(e)

def vc_batch_fn(sid, input_audio_files, vc_transform, auto_f0, cluster_ratio, slice_db, noise_scale, pad_seconds, cl_num, lg_num, lgr_num, f0_predictor, enhancer_adaptive_key, cr_threshold, k_step, use_spk_mix, second_encoding, loudness_envelope_adjustment):
    global model
    try:
        if input_audio_files is None or len(input_audio_files) == 0:
            return "You need to upload at least one audio file"
        if model is None:
            return "You need to upload a model"
        for file_obj in input_audio_files:
            input_audio_path = file_obj.name
            audio, sampling_rate = sf.read(input_audio_path)
            vc_infer(sid, audio, input_audio_path, vc_transform, auto_f0, cluster_ratio, slice_db, noise_scale, pad_seconds, cl_num, lg_num, lgr_num, f0_predictor, enhancer_adaptive_key, cr_threshold, k_step, use_spk_mix, second_encoding, loudness_envelope_adjustment)
        return "批量推理完成,音频已经被保存到results文件夹"
    except Exception as e:
        if debug: traceback.print_exc()
        raise gr.Error(e)
    
def tts_fn(_text, _speaker, sid, vc_transform, auto_f0,cluster_ratio, slice_db, noise_scale,pad_seconds,cl_num,lg_num,lgr_num,f0_predictor,enhancer_adaptive_key,cr_threshold, k_step,use_spk_mix,second_encoding,loudness_envelope_adjustment):
    global model
    try:
        subprocess.run([r"python", "tts.py", _text, _speaker])
        sr = 44100
        y, sr = librosa.load("tts.wav")
        resampled_y = librosa.resample(y, orig_sr=sr, target_sr=sr)
        sf.write("tts.wav", resampled_y, sr, subtype = "PCM_16")
        input_audio = "tts.wav"
        audio, sampling_rate = sf.read(input_audio)
        if model is None:
            return "You need to upload a model", None
        output_file_path = vc_infer(sid, audio, input_audio, vc_transform, auto_f0, cluster_ratio, slice_db, noise_scale, pad_seconds, cl_num, lg_num, lgr_num, f0_predictor, enhancer_adaptive_key, cr_threshold, k_step, use_spk_mix, second_encoding, loudness_envelope_adjustment)
        return "Success", output_file_path
    except Exception as e:
        if debug: traceback.print_exc()
        raise gr.Error(e)

def load_raw_dirs():
    illegal_files = []
    #检查文件名
    allowed_pattern = re.compile(r'^[a-zA-Z0-9_@#$%^&()_+\-=\s\.]*$')
    for root, dirs, files in os.walk(raw_path):
        if root != raw_path:  # 只处理子文件夹内的文件
            for file in files:
                file_name, _ = os.path.splitext(file)
                if not allowed_pattern.match(file_name):
                    illegal_files.append(file)
    if len(illegal_files)!=0:
        return f"数据集文件名只能包含数字、字母、下划线,以下文件不符合要求,请改名后再试:{illegal_files}"
    #检查有没有小可爱不用wav文件当数据集
    for root, dirs, files in os.walk(raw_path):
        if root != raw_path:  # 只处理子文件夹内的文件
            for file in files:
                if not file.lower().endswith('.wav'):
                    illegal_files.append(file)
    if len(illegal_files)!=0:
        return f"以下文件为非wav格式文件,请删除后再试:{illegal_files}"
    spk_dirs = []
    with os.scandir(raw_path) as entries:
        for entry in entries:
            if entry.is_dir():
                spk_dirs.append(entry.name)
    if len(spk_dirs) != 0:
        return raw_dirs_list.update(value=spk_dirs)
    else:
        return raw_dirs_list.update(value="未找到数据集,请检查dataset_raw文件夹")

def dataset_preprocess(encoder, f0_predictor, use_diff, vol_aug, skip_loudnorm, num_processes):
    diff_arg = "--use_diff" if use_diff else ""
    vol_aug_arg = "--vol_aug" if vol_aug else ""
    skip_loudnorm_arg = "--skip_loudnorm" if skip_loudnorm else ""
    preprocess_commands = [
        r"python resample.py %s" % (skip_loudnorm_arg),
        r"python preprocess_flist_config.py --speech_encoder %s %s" % (encoder, vol_aug_arg),
        r"python preprocess_hubert_f0.py --num_processes %s --f0_predictor %s %s" % (num_processes ,f0_predictor, diff_arg)
        ]
    accumulated_output = ""
    #清空dataset
    dataset = os.listdir("dataset/44k")
    if len(dataset) != 0:
        for dir in dataset:
            dataset_dir = "dataset/44k/" + str(dir)
            if os.path.isdir(dataset_dir):
                shutil.rmtree(dataset_dir)
                accumulated_output += f"Deleting previous dataset: {dir}\n"
    for command in preprocess_commands:
        try:
            result = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, shell=True, text=True)
            accumulated_output += f"Command: {command}, Using Encoder: {encoder}, Using f0 Predictor: {f0_predictor}\n"
            yield accumulated_output, None
            progress_line = None
            for line in result.stdout:
                if r"it/s" in line or r"s/it" in line: #防止进度条刷屏
                    progress_line = line
                else:
                    accumulated_output += line
                if progress_line is None:
                    yield accumulated_output, None
                else:
                    yield accumulated_output + progress_line, None
            result.communicate()
        except subprocess.CalledProcessError as e:
            result = e.output
            accumulated_output += f"Error: {result}\n"
            yield accumulated_output, None
        if progress_line is not None:
            accumulated_output += progress_line
        accumulated_output += '-' * 50 + '\n'
        yield accumulated_output, None
        config_path = "configs/config.json"
    with open(config_path, 'r') as f:
        config = json.load(f)
    spk_name = config.get('spk', None)
    yield accumulated_output, gr.Textbox.update(value=spk_name)

def regenerate_config(encoder, vol_aug):
    vol_aug_arg = "--vol_aug" if vol_aug else ""
    cmd = r"python preprocess_flist_config.py --speech_encoder %s %s" % (encoder, vol_aug_arg)
    output = ""
    try:
        result = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, shell=True, text=True)
        for line in result.stdout:
            output += line
        output += "Regenerate config file successfully."
    except subprocess.CalledProcessError as e:
        result = e.output
        output += f"Error: {result}\n"
    return output

def clear_output():
    return gr.Textbox.update(value="Cleared!>_<")

def read_config(config_path):
    with open(config_path, 'r') as config_file:
        config_data = json.load(config_file)
    return config_data

def config_fn(log_interval, eval_interval, keep_ckpts, batch_size, lr, fp16_run, all_in_mem, diff_num_workers, diff_cache_all_data, diff_batch_size, diff_lr, diff_interval_log, diff_interval_val, diff_cache_device, diff_amp_dtype, diff_force_save):
    config_origin = "configs/config.json"
    diff_config = "configs/diffusion.yaml"
    config_data = read_config(config_origin)
    config_data['train']['log_interval'] = int(log_interval)
    config_data['train']['eval_interval'] = int(eval_interval)
    config_data['train']['keep_ckpts'] = int(keep_ckpts)
    config_data['train']['batch_size'] = int(batch_size)
    config_data['train']['learning_rate'] = float(lr)
    config_data['train']['fp16_run'] = fp16_run
    config_data['train']['all_in_mem'] = all_in_mem
    with open(config_origin, 'w') as config_file:
        json.dump(config_data, config_file, indent=4)
    with open(diff_config, 'r') as diff_yaml:
        diff_config_data = yaml.safe_load(diff_yaml)
    diff_config_data['train']['num_workers'] = int(diff_num_workers)
    diff_config_data['train']['cache_all_data'] = diff_cache_all_data
    diff_config_data['train']['batch_size'] = int(diff_batch_size)
    diff_config_data['train']['lr'] = float(diff_lr)
    diff_config_data['train']['interval_log'] = int(diff_interval_log)
    diff_config_data['train']['interval_val'] = int(diff_interval_val)
    diff_config_data['train']['cache_device'] = str(diff_cache_device)
    diff_config_data['train']['amp_dtype'] = str(diff_amp_dtype)
    diff_config_data['train']['interval_force_save'] = int(diff_force_save)
    with open(diff_config, 'w') as diff_yaml:
        yaml.safe_dump(diff_config_data, diff_yaml, default_flow_style=False, sort_keys=False)
    return "配置文件写入完成"

def check_dataset(dataset_path):
    if not os.listdir(dataset_path):
        return "数据集不存在,请检查dataset文件夹"
    no_npy_pt_files = True
    for root, dirs, files in os.walk(dataset_path):
        for file in files:
            if file.endswith('.npy') or file.endswith('.pt'):
                no_npy_pt_files = False
                break
    if no_npy_pt_files:
        return "数据集中未检测到f0和hubert文件,可能是预处理未完成"
    return None

def training(gpu_selection, encoder):
    config_data = read_config("configs/config.json")
    vol_emb = config_data["model"]["vol_embedding"]
    dataset_warn = check_dataset("dataset/44k")
    if dataset_warn is not None:
        return dataset_warn
    encoder_models = { #编码器好多,要塞不下了
        "vec256l9": ("D_0.pth", "G_0.pth", "pre_trained_model"),
        "vec768l12": ("D_0.pth", "G_0.pth", "pre_trained_model/768l12/vol_emb" if vol_emb else "pre_trained_model/768l12"),
        "hubertsoft": ("D_0.pth", "G_0.pth", "pre_trained_model/hubertsoft"),
        "whisper-ppg": ("D_0.pth", "G_0.pth", "pre_trained_model/whisper-ppg"),
        "cnhubertlarge": ("D_0.pth", "G_0.pth", "pre_trained_model/cnhubertlarge"),
        "dphubert": ("D_0.pth", "G_0.pth", "pre_trained_model/dphubert"),
        "whisper-ppg-large": ("D_0.pth", "G_0.pth", "pre_trained_model/whisper-ppg-large")
    }
    if encoder not in encoder_models:
        return "未知编码器"
    d_0_file, g_0_file, encoder_model_path = encoder_models[encoder]
    d_0_path = os.path.join(encoder_model_path, d_0_file)
    g_0_path = os.path.join(encoder_model_path, g_0_file)
    timestamp = datetime.datetime.now().strftime('%Y_%m_%d_%H_%M')
    new_backup_folder = os.path.join(models_backup_path, str(timestamp))
    if os.listdir(workdir) != ['diffusion']:
        os.makedirs(new_backup_folder, exist_ok=True)
        for file in os.listdir(workdir):
            if file != "diffusion":
                shutil.move(os.path.join(workdir, file), os.path.join(new_backup_folder, file))
    shutil.copy(d_0_path, os.path.join(workdir, "D_0.pth"))
    shutil.copy(g_0_path, os.path.join(workdir, "G_0.pth"))
    cmd = r"set CUDA_VISIBLE_DEVICES=%s && python train.py -c configs/config.json -m 44k" % (gpu_selection)
    subprocess.Popen(["cmd", "/c", "start", "cmd", "/k", cmd])
    return "已经在新的终端窗口开始训练,请监看终端窗口的训练日志。在终端中按Ctrl+C可暂停训练。"

def continue_training(gpu_selection, encoder):
    dataset_warn = check_dataset("dataset/44k")
    if dataset_warn is not None:
        return dataset_warn
    if encoder == "":
        return "请先选择预处理对应的编码器"
    all_files = os.listdir(workdir)
    model_files = [f for f in all_files if f.startswith('G_') and f.endswith('.pth')]
    if len(model_files) == 0:
        return "你还没有已开始的训练"
    cmd = r"set CUDA_VISIBLE_DEVICES=%s && python train.py -c configs/config.json -m 44k" % (gpu_selection)
    subprocess.Popen(["cmd", "/c", "start", "cmd", "/k", cmd])
    return "已经在新的终端窗口开始训练,请监看终端窗口的训练日志。在终端中按Ctrl+C可暂停训练。"

def kmeans_training(kmeans_gpu):
    if not os.listdir(r"dataset/44k"):
        return "数据集不存在,请检查dataset文件夹"
    cmd = r"python cluster/train_cluster.py --gpu" if kmeans_gpu else r"python cluster/train_cluster.py"
    subprocess.Popen(["cmd", "/c", "start", "cmd", "/k", cmd])
    return "已经在新的终端窗口开始训练,训练聚类模型不会输出日志,CPU训练一般需要5-10分钟左右"

def index_training():
    if not os.listdir(r"dataset/44k"):
        return "数据集不存在,请检查dataset文件夹"
    cmd = r"python train_index.py -c configs/config.json"
    subprocess.Popen(["cmd", "/c", "start", "cmd", "/k", cmd])
    return "已经在新的终端窗口开始训练"

def diff_training(encoder):
    if not os.listdir(r"dataset/44k"):
        return "数据集不存在,请检查dataset文件夹"
    pre_trained_model_768l12 = "pre_trained_model/diffusion/768l12/model_0.pt"
    pre_trained_model_hubertsoft = "pre_trained_model/diffusion/hubertsoft/model_0.pt"
    timestamp = datetime.datetime.now().strftime('%Y_%m_%d_%H_%M')
    new_backup_folder = os.path.join(models_backup_path, "diffusion", str(timestamp))
    if len(os.listdir(diff_workdir)) != 0:
        os.makedirs(new_backup_folder, exist_ok=True)
        for file in os.listdir(diff_workdir):
            shutil.move(os.path.join(diff_workdir, file), os.path.join(new_backup_folder, file))
    if encoder == "vec256l9" or encoder == "whisper-ppg":
        return "你所选的编码器暂时不支持训练扩散模型"
    elif encoder == "vec768l12":
        shutil.copy(pre_trained_model_768l12, os.path.join(diff_workdir, "model_0.pt"))
    elif encoder == "hubertsoft":
        shutil.copy(pre_trained_model_hubertsoft, os.path.join(diff_workdir, "model_0.pt"))
    else: 
        return "请先选择编码器"
    subprocess.Popen(["cmd", "/c", "start", "cmd", "/k", r"python train_diff.py -c configs/diffusion.yaml"])
    return "已经在新的终端窗口开始训练,请监看终端窗口的训练日志。在终端中按Ctrl+C可暂停训练。"

def diff_continue_training(encoder):
    if not os.listdir(r"dataset/44k"):
        return "数据集不存在,请检查dataset文件夹"
    if encoder == "":
        return "请先选择预处理对应的编码器"
    all_files = os.listdir(diff_workdir)
    model_files = [f for f in all_files if f.endswith('.pt')]
    if len(model_files) == 0:
        return "你还没有已开始的训练"
    subprocess.Popen(["cmd", "/c", "start", "cmd", "/k", r"python train_diff.py -c configs/diffusion.yaml"])
    return "已经在新的终端窗口开始训练,请监看终端窗口的训练日志。在终端中按Ctrl+C可暂停训练。"

def upload_mix_append_file(files,sfiles):
    try:
        if(sfiles == None):
            file_paths = [file.name for file in files]
        else:
            file_paths = [file.name for file in chain(files,sfiles)]
        p = {file:100 for file in file_paths}
        return file_paths,mix_model_output1.update(value=json.dumps(p,indent=2))
    except Exception as e:
        if debug: traceback.print_exc()
        raise gr.Error(e)

def mix_submit_click(js,mode):
    try:
        assert js.lstrip()!=""
        modes = {"凸组合":0, "线性组合":1}
        mode = modes[mode]
        data = json.loads(js)
        data = list(data.items())
        model_path,mix_rate = zip(*data)
        path = mix_model(model_path,mix_rate,mode)
        return f"成功,文件被保存在了{path}"
    except Exception as e:
        if debug: traceback.print_exc()
        raise gr.Error(e)

def updata_mix_info(files):
    try:
        if files == None : return mix_model_output1.update(value="")
        p = {file.name:100 for file in files}
        return mix_model_output1.update(value=json.dumps(p,indent=2))
    except Exception as e:
        if debug: traceback.print_exc()
        raise gr.Error(e)

def pth_identify():
    if not os.path.exists(root_dir):
        return f"未找到{root_dir}文件夹,请先创建一个{root_dir}文件夹并按第一步流程操作"
    model_dirs = [d for d in os.listdir(root_dir) if os.path.isdir(os.path.join(root_dir, d))]
    if not model_dirs:
        return f"未在{root_dir}文件夹中找到模型文件夹,请确保每个模型和配置文件都被放置在单独的文件夹中"
    valid_model_dirs = []
    for path in model_dirs:
        pth_files = glob.glob(f"{root_dir}/{path}/*.pth")
        json_files = glob.glob(f"{root_dir}/{path}/*.json")
        if len(pth_files) != 1 or len(json_files) != 1:
            return f"错误: 在{root_dir}/{path}中找到了{len(pth_files)}个.pth文件和{len(json_files)}个.json文件。应当确保每个文件夹内有且只有一个.pth文件和.json文件"
        valid_model_dirs.append(path)
        
    return f"成功识别了{len(valid_model_dirs)}个模型:{valid_model_dirs}"

def onnx_export():
    model_dirs = [d for d in os.listdir(root_dir) if os.path.isdir(os.path.join(root_dir, d))]
    try:
        for path in model_dirs:
            pth_files = glob.glob(f"{root_dir}/{path}/*.pth")
            json_files = glob.glob(f"{root_dir}/{path}/*.json")
            model_file = pth_files[0]
            json_file = json_files[0]
            with open(json_file, 'r') as config_file:
                config_data = json.load(config_file)
            channels = config_data["model"]["gin_channels"]
            if str(channels) == "256":
                para1 = 1
            if str(channels) == "768":
                para1 = 192
            device = torch.device("cpu")
            hps = utils.get_hparams_from_file(json_file)
            SVCVITS = SynthesizerTrn(
                hps.data.filter_length // 2 + 1,
                hps.train.segment_size // hps.data.hop_length,
                **hps.model)
            _ = utils.load_checkpoint(model_file, SVCVITS, None)
            _ = SVCVITS.eval().to(device)
            for i in SVCVITS.parameters():
                i.requires_grad = False       
            n_frame = 10
            test_hidden_unit = torch.rand(para1, n_frame, channels)
            test_pitch = torch.rand(1, n_frame)
            test_mel2ph = torch.arange(0, n_frame, dtype=torch.int64)[None] # torch.LongTensor([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]).unsqueeze(0)
            test_uv = torch.ones(1, n_frame, dtype=torch.float32)
            test_noise = torch.randn(1, 192, n_frame)
            test_sid = torch.LongTensor([0])
            input_names = ["c", "f0", "mel2ph", "uv", "noise", "sid"]
            output_names = ["audio", ]
            onnx_file = os.path.splitext(model_file)[0] + ".onnx"
            torch.onnx.export(SVCVITS,
                              (
                                  test_hidden_unit.to(device),
                                  test_pitch.to(device),
                                  test_mel2ph.to(device),
                                  test_uv.to(device),
                                  test_noise.to(device),
                                  test_sid.to(device)
                              ),
                              onnx_file,
                              dynamic_axes={
                                  "c": [0, 1],
                                  "f0": [1],
                                  "mel2ph": [1],
                                  "uv": [1],
                                  "noise": [2],
                              },
                              do_constant_folding=False,
                              opset_version=16,
                              verbose=False,
                              input_names=input_names,
                              output_names=output_names)
        return "转换成功,模型被保存在了checkpoints下的对应目录"
    except Exception as e:
        if debug: traceback.print_exc()
        return "转换错误:"+str(e)

def load_raw_audio(audio_path):
    if not os.path.isdir(audio_path):
        return "请输入正确的目录", None
    files = os.listdir(audio_path)
    wav_files = [file for file in files if file.lower().endswith('.wav')]
    if not wav_files:
        return "未在目录中找到.wav音频文件", None
    return "成功加载", wav_files

def slicer_fn(input_dir, output_dir, process_method, max_sec, min_sec):
    if output_dir == "":
        return "请先选择输出的文件夹"
    slicer = AutoSlicer()
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)
    for filename in os.listdir(input_dir):
        if filename.lower().endswith(".wav"):
            slicer.auto_slice(filename, input_dir, output_dir, max_sec)
    if process_method == "丢弃":
        for filename in os.listdir(output_dir):
            if filename.endswith(".wav"):
                filepath = os.path.join(output_dir, filename)
                audio, sr = librosa.load(filepath, sr=None, mono=False)
                if librosa.get_duration(y=audio, sr=sr) < min_sec:
                    os.remove(filepath)
    elif process_method == "将过短音频整合为长音频":
        slicer.merge_short(output_dir, max_sec, min_sec)
    file_count, max_duration, min_duration, orig_duration, final_duration = slicer.slice_count(input_dir, output_dir)
    hrs = int(final_duration / 3600)
    mins = int((final_duration % 3600) / 60)
    sec = format(float(final_duration % 60), '.2f')
    rate = format(100 * (final_duration / orig_duration), '.2f')
    return f"成功将音频切分为{file_count}条片段,其中最长{max_duration}秒,最短{min_duration}秒,切片后的音频总时长{hrs:02d}小时{mins:02d}{sec}秒,为原始音频时长的{rate}%"

def model_compression(_model):
    if _model == "":
        return "请先选择要压缩的模型"
    else:
        model_path = os.path.join(workdir, _model)
        filename, extension = os.path.splitext(_model)
        output_model_name = f"{filename}_compressed{extension}"
        output_path = os.path.join(workdir, output_model_name)
        removeOptimizer(model_path, output_path)
        return f"模型已成功被保存在了{output_path}"

# read ckpt list
ckpt_list, config_list, cluster_list, diff_list, diff_config_list = load_options()

#read GPU info
ngpu=torch.cuda.device_count()
gpu_infos=[]
if(torch.cuda.is_available()==False or ngpu==0):if_gpu_ok=False
else:
    if_gpu_ok = False
    for i in range(ngpu):
        gpu_name=torch.cuda.get_device_name(i)
        if("MX"in gpu_name):continue
        if("10"in gpu_name or "16"in gpu_name or "20"in gpu_name or "30"in gpu_name or "40"in gpu_name or "A50"in gpu_name.upper() or "70"in gpu_name or "80"in gpu_name or "90"in gpu_name or "M4"in gpu_name or"P4"in gpu_name or "T4"in gpu_name or "TITAN"in gpu_name.upper()):#A10#A100#V100#A40#P40#M40#K80
            if_gpu_ok=True#至少有一张能用的N卡
            gpu_infos.append("%s\t%s"%(i,gpu_name))
gpu_info="\n".join(gpu_infos)if if_gpu_ok==True and len(gpu_infos)>0 else "很遗憾您这没有能用的显卡来支持您训练"
gpus="-".join([i[0]for i in gpu_infos])

#read default params
sovits_params, diff_params = get_default_settings()

app = gr.Blocks()

def Newget_model_info(choice_ckpt2):
    choice_ckpt = str(choice_ckpt2)
    pthfile = os.path.join(workdir, choice_ckpt)
    net = torch.load(pthfile, map_location=torch.device('cpu')) #cpu load
    spk_emb = net["model"].get("emb_g.weight")
    if spk_emb is None:
        return "所选模型缺少emb_g.weight,你可能选择了一个底模"
    _dim, _layer = spk_emb.size()
    model_type = {
        768: "Vec768-Layer12",
        256: "Vec256-Layer9 / HubertSoft",
        1024: "Whisper-PPG"
    }
    return gr.Textbox(visible=False, value=model_type.get(_layer, "不受支持的模型"))

with app:
    gr.Markdown(value="""
        ### So-VITS-SVC 4.1-Stable
                
        修改自原项目及bilibili@麦哲云

        仅供个人娱乐和非商业用途,禁止用于血腥、暴力、性相关、政治相关内容

        weiui来自:bilibili@羽毛布団,交流③群:416656175
        
        镜像作者:bilibili@kiss丿冷鸟鸟,交流群:829974025

        """)
    with gr.Tabs():
        with gr.TabItem("目白阿尔丹/目白雅丹 (Mejiro Ardan)"):
            #with gr.Row():
            #    choice_ckpt = gr.Dropdown(label="模型选择", choices=ckpt_list, value="no_model")
            #    model_branch = gr.Textbox(label="模型编码器", placeholder="请先选择模型", interactive=False)
            #choice_ckpt = gr.Dropdown(value="G_106400.pth", visible=False)
            #with gr.Row():
            #    config_choice = gr.Dropdown(label="配置文件", choices=config_list, value="no_config")
            #    config_info = gr.Textbox(label="配置文件编码器", placeholder="请选择配置文件")
            config_choice = gr.Dropdown(value="config.json", visible=False)
            #gr.Markdown(value="""**请检查模型和配置文件的编码器是否匹配**""")
            #with gr.Row():
            #    diff_choice = gr.Dropdown(label="(可选)选择扩散模型", choices=diff_list, value="no_diff", interactive=True)
            #    diff_config_choice = gr.Dropdown(label="扩散模型配置文件", choices=diff_config_list, value="no_diff_config", interactive=True)
            diff_choice = gr.Dropdown(value="no_diff", visible=False)
            diff_config_choice = gr.Dropdown(value="no_diff_config", visible=False)
            with gr.Row():
                cluster_choice = gr.Dropdown(label="(可选)选择聚类模型/特征检索模型", choices=cluster_list, value="no_clu")
            with gr.Row():
                enhance = gr.Checkbox(label="是否使用NSF_HIFIGAN增强,该选项对部分训练集少的模型有一定的音质增强效果,但是对训练好的模型有反面效果,默认关闭", value=False)
                #only_diffusion = gr.Checkbox(label="是否使用全扩散推理,开启后将不使用So-VITS模型,仅使用扩散模型进行完整扩散推理,默认关闭", value=False)
                only_diffusion = gr.Checkbox(value=False, visible=False)
            #using_device = gr.Dropdown(label="推理设备,默认为自动选择", choices=["Auto","cuda","cpu"], value="Auto")
            using_device = gr.Dropdown(value='Auto', visible=False)
            #refresh = gr.Button("刷新选项")
            #loadckpt = gr.Button("加载模型", variant="primary")
            #with gr.Row():
            #    model_message = gr.Textbox(label="Output Message")
            #    sid = gr.Dropdown(label="So-VITS说话人", value="speaker0")
            sid = gr.Dropdown(value="1071", visible=False)
            
            #choice_ckpt.change(get_model_info, [choice_ckpt], [model_branch])
            model_branch = Newget_model_info("G_106400.pth")
            #config_choice.change(load_json_encoder, [config_choice], [config_info])
            #refresh.click(refresh_options,[],[choice_ckpt,config_choice,cluster_choice,diff_choice,diff_config_choice])

            gr.Markdown(value="""
                请稍等片刻,模型加载大约需要10秒。后续操作不需要重新加载模型
                """)
            with gr.Tabs():
                with gr.TabItem("单个音频上传"):
                    vc_input3 = gr.Audio(label="单个音频上传")
                with gr.TabItem("批量音频上传"):
                    vc_batch_files = gr.Files(label="批量音频上传", file_types=["audio"], file_count="multiple")
                with gr.TabItem("文字转语音(实验性)"):
                    gr.Markdown("""
                        文字转语音(TTS)说明:使用edge_tts服务生成音频,并转换为So-VITS模型音色。可以在输入文字中使用标点符号简单控制情绪
                        zh-CN-XiaoyiNeural:中文女声
                        zh-CN-YunxiNeural: 中文男声
                        ja-JP-NanamiNeural:日文女声
                        ja-JP-KeitaNeural:日文男声
                        zh-CN-liaoning-XiaobeiNeural:东北话女声
                        zh-CN-shaanxi-XiaoniNeural: 陕西话女声
                        zh-HK-HiuMaanNeural: 粤语女声
                        zh-HK-WanLungNeural: 粤语男声
                    """)
                    with gr.Row():
                        text_input = gr.Textbox(label = "在此输入需要转译的文字(建议打开自动f0预测)",)
                        tts_spk = gr.Dropdown(label = "选择原始音频音色(来自微软TTS)", choices=["zh-CN-XiaoyiNeural", "zh-CN-YunxiNeural", "zh-CN-liaoning-XiaobeiNeural", "zh-CN-shaanxi-XiaoniNeural", "zh-HK-HiuMaanNeural", "zh-HK-WanLungNeural", "ja-JP-NanamiNeural", "ja-JP-KeitaNeural"], value = "zh-CN-XiaoyiNeural")
                    #with gr.Row():
                    #    tts_rate = gr.Slider(label = "TTS语音变速(倍速)", minimum = 0, maximum = 3, value = 1)
                    #    tts_volume = gr.Slider(label = "TTS语音音量(相对值)", minimum = 0, maximum = 1.5, value = 1)

            with gr.Row():
                auto_f0 = gr.Checkbox(label="自动f0预测,配合聚类模型f0预测效果更好,会导致变调功能失效(仅限转换语音,歌声不要勾选此项会跑调)", value=False)
                f0_predictor = gr.Radio(label="f0预测器选择(如遇哑音可以更换f0预测器解决,crepe为原F0使用均值滤波器)", choices=["pm","crepe","harvest","dio"], value="pm")
                cr_threshold = gr.Number(label="F0过滤阈值,只有使用crepe时有效. 数值范围从0-1. 降低该值可减少跑调概率,但会增加哑音", value=0.05)
            with gr.Row():
                vc_transform = gr.Number(label="变调(整数,可以正负,半音数量,升高八度就是12)", value=0)
                cluster_ratio = gr.Number(label="聚类模型/特征检索混合比例,0-1之间,默认为0不启用聚类或特征检索,能提升音色相似度,但会导致咬字下降", value=0)
                k_step = gr.Slider(label="浅扩散步数,只有使用了扩散模型才有效,步数越大越接近扩散模型的结果", value=100, minimum = 1, maximum = 1000)
            with gr.Row():
                enhancer_adaptive_key = gr.Number(label="使NSF-HIFIGAN增强器适应更高的音域(单位为半音数)|默认为0", value=0,interactive=True)
                slice_db = gr.Number(label="切片阈值", value=-50)
                cl_num = gr.Number(label="音频自动切片,0为按默认方式切片,单位为秒/s,爆显存可以设置此处强制切片", value=0)
            with gr.Accordion("高级设置(一般不需要动)", open=False):
                noise_scale = gr.Number(label="noise_scale 建议不要动,会影响音质,玄学参数", value=0.4)
                pad_seconds = gr.Number(label="推理音频pad秒数,由于未知原因开头结尾会有异响,pad一小段静音段后就不会出现", value=0.5)
                lg_num = gr.Number(label="两端音频切片的交叉淡入长度,如果自动切片后出现人声不连贯可调整该数值,如果连贯建议采用默认值0,注意,该设置会影响推理速度,单位为秒/s", value=1)
                lgr_num = gr.Number(label="自动音频切片后,需要舍弃每段切片的头尾。该参数设置交叉长度保留的比例,范围0-1,左开右闭", value=0.75,interactive=True)
                second_encoding = gr.Checkbox(label = "二次编码,浅扩散前会对原始音频进行二次编码,玄学选项,效果时好时差,默认关闭", value=False)
                loudness_envelope_adjustment = gr.Number(label="输入源响度包络替换输出响度包络融合比例,越靠近1越使用输出响度包络", value = 0)
                use_spk_mix = gr.Checkbox(label="动态声线融合,暂时没做完", value=False, interactive=False)
            with gr.Row():
                vc_submit = gr.Button("音频转换", variant="primary")
                vc_batch_submit = gr.Button("批量转换", variant="primary")
                vc_tts_submit = gr.Button("文本转语音", variant="primary")
            vc_output1 = gr.Textbox(label="Output Message")
            vc_output2 = gr.Audio(label="Output Audio")

        def Newvc_fn(sid, input_audio, vc_transform, auto_f0, cluster_ratio, slice_db, noise_scale, pad_seconds, cl_num, lg_num, lgr_num, f0_predictor, enhancer_adaptive_key, cr_threshold, k_step, use_spk_mix, second_encoding, loudness_envelope_adjustment, clus2):
            global model, loaded
            if loaded != clus2:
                Newload_model_func("G_106400.pth",clus2,config_choice,enhance,diff_choice,diff_config_choice,only_diffusion,model_branch,using_device)
                loaded = clus2
            try:
                if input_audio is None:
                    return "You need to upload an audio", None
                if model is None:
                    return "You need to upload an model", None
                sampling_rate, audio = input_audio
                temp_path = "temp.wav"
                sf.write(temp_path, audio, sampling_rate, format="wav")
                output_file_path = vc_infer(sid, audio, temp_path, vc_transform, auto_f0, cluster_ratio, slice_db, noise_scale, pad_seconds, cl_num, lg_num, lgr_num, f0_predictor, enhancer_adaptive_key, cr_threshold, k_step, use_spk_mix, second_encoding, loudness_envelope_adjustment)
                os.remove(temp_path)
                return "Success", output_file_path
            except Exception as e:
                if debug: traceback.print_exc()
                raise gr.Error(e)
        
        #loadckpt.click(load_model_func,[choice_ckpt,cluster_choice,config_choice,enhance,diff_choice,diff_config_choice,only_diffusion,model_branch,using_device],[model_message, sid, cl_num])
        vc_submit.click(Newvc_fn, [sid, vc_input3, vc_transform,auto_f0,cluster_ratio, slice_db, noise_scale,pad_seconds,cl_num,lg_num,lgr_num,f0_predictor,enhancer_adaptive_key,cr_threshold,k_step,use_spk_mix,second_encoding,loudness_envelope_adjustment,cluster_choice], [vc_output1, vc_output2])
        vc_batch_submit.click(vc_batch_fn, [sid, vc_batch_files, vc_transform,auto_f0,cluster_ratio, slice_db, noise_scale,pad_seconds,cl_num,lg_num,lgr_num,f0_predictor,enhancer_adaptive_key,cr_threshold,k_step,use_spk_mix,second_encoding,loudness_envelope_adjustment], [vc_output1])
        vc_tts_submit.click(tts_fn, [text_input, tts_spk, sid, vc_transform,auto_f0,cluster_ratio, slice_db, noise_scale,pad_seconds,cl_num,lg_num,lgr_num,f0_predictor,enhancer_adaptive_key,cr_threshold,k_step,use_spk_mix,second_encoding,loudness_envelope_adjustment], [vc_output1, vc_output2])
        '''
        with gr.TabItem("训练"):
            gr.Markdown(value="""请将数据集文件夹放置在dataset_raw文件夹下,确认放置正确后点击下方获取数据集名称""")
            raw_dirs_list=gr.Textbox(label="Raw dataset directory(s):")
            get_raw_dirs=gr.Button("识别数据集", variant="primary")
            gr.Markdown(value="""确认数据集正确识别后请选择训练使用的特征编码器和f0预测器,**如果要训练扩散模型,请选择Vec768l12或hubertsoft,并确保So-VITS和扩散模型使用同一个编码器**""")
            with gr.Row():
                gr.Markdown(value="""**vec256l9**: ContentVec(256Layer9),旧版本叫v1,So-VITS-SVC 4.0的基础版本,**暂不支持扩散模型**
                                **vec768l12**: 特征输入更换为ContentVec的第12层Transformer输出,模型理论上会更加还原训练集音色
                                **hubertsoft**: So-VITS-SVC 3.0使用的编码器,咬字更为准确,但可能存在多说话人音色泄露问题
                                **whisper-ppg**: 来自OpenAI,咬字最为准确,但和Hubertsoft一样存在多说话人音色泄露,且显存占用和训练时间有明显增加。**暂不支持扩散模型**
                """)
                gr.Markdown(value="""**crepe**: 抗噪能力最强,但预处理速度慢(不过如果你的显卡很强的话速度会很快)
                                **pm**: 预处理速度快,但抗噪能力较弱
                                **dio**: 先前版本预处理默认使用的f0预测器
                                **harvest**: 有一定抗噪能力,预处理显存占用友好,速度比较慢
                """)
            with gr.Row():
                branch_selection = gr.Radio(label="选择训练使用的编码器", choices=["vec256l9","vec768l12","hubertsoft","whisper-ppg"], value="vec768l12", interactive=True)
                f0_predictor_selection = gr.Radio(label="选择训练使用的f0预测器", choices=["crepe","pm","dio","harvest"], value="crepe", interactive=True)
                use_diff = gr.Checkbox(label="是否使用浅扩散模型,如要训练浅扩散模型请勾选此项", value=True)
                vol_aug=gr.Checkbox(label="是否启用响度嵌入和音量增强,启用后可以根据输入源控制输出响度,但对数据集质量的要求更高。**仅支持vec768l12编码器**", value=False)
            with gr.Row():
                skip_loudnorm = gr.Checkbox(label="是否跳过响度匹配,如果你已经用音频处理软件做过响度匹配,请勾选此处")
                num_processes = gr.Slider(label="预处理使用的CPU线程数,可以大幅加快预处理速度,但线程数过大容易爆显存,建议12G显存设置为2", minimum=1, maximum=multiprocessing.cpu_count(), value=1, step=1)
            with gr.Row():
                raw_preprocess=gr.Button("数据预处理", variant="primary")
                regenerate_config_btn=gr.Button("重新生成配置文件", variant="primary")
            preprocess_output=gr.Textbox(label="预处理输出信息,完成后请检查一下是否有报错信息,如无则可以进行下一步", max_lines=999)
            clear_preprocess_output=gr.Button("清空输出信息")
            with gr.Group():
                gr.Markdown(value="""填写训练设置和超参数""")
                with gr.Row():
                    gr.Textbox(label="当前使用显卡信息", value=gpu_info)
                    gpu_selection=gr.Textbox(label="多卡用户请指定希望训练使用的显卡ID(0,1,2...)", value=gpus, interactive=True)
                with gr.Row():
                    log_interval=gr.Textbox(label="每隔多少步(steps)生成一次评估日志", value=sovits_params['log_interval'])
                    eval_interval=gr.Textbox(label="每隔多少步(steps)验证并保存一次模型", value=sovits_params['eval_interval'])
                    keep_ckpts=gr.Textbox(label="仅保留最新的X个模型,超出该数字的旧模型会被删除。设置为0则永不删除", value=sovits_params['keep_ckpts'])
                with gr.Row():
                    batch_size=gr.Textbox(label="批量大小,每步取多少条数据进行训练,大batch有助于训练但显著增加显存占用。6G显存建议设定为4", value=sovits_params['batch_size'])
                    lr=gr.Textbox(label="学习率,一般不用动,批量大小较大时可以适当增大学习率,但强烈不建议超过0.0002,有炸炉风险", value=sovits_params['learning_rate'])
                    fp16_run=gr.Checkbox(label="是否使用fp16混合精度训练,fp16训练可能降低显存占用和训练时间,但对模型质量的影响尚未查证", value=sovits_params['fp16_run'])
                    all_in_mem=gr.Checkbox(label="是否加载所有数据集到内存中,硬盘IO过于低下、同时内存容量远大于数据集体积时可以启用,能显著加快训练速度", value=sovits_params['all_in_mem'])
                with gr.Row():
                    gr.Markdown("请检查右侧的说话人列表是否和你要训练的目标说话人一致,确认无误后点击写入配置文件,然后就可以开始训练了")
                    speakers=gr.Textbox(label="说话人列表")
            with gr.Accordion(label = "扩散模型配置(训练扩散模型需要写入此处)", open=True):
                with gr.Row():
                    diff_num_workers = gr.Number(label="num_workers, 如果你的电脑配置较高,可以将这里设置为0加快训练速度", value=diff_params['num_workers'])
                    diff_cache_all_data = gr.Checkbox(label="是否缓存数据,启用后可以加快训练速度,关闭后可以节省显存或内存,但会减慢训练速度", value=diff_params['cache_all_data'])
                    diff_cache_device = gr.Radio(label="若启用缓存数据,使用显存(cuda)还是内存(cpu)缓存,如果显卡显存充足,选择cuda以加快训练速度", choices=["cuda","cpu"], value=diff_params['cache_device'])
                    diff_amp_dtype = gr.Radio(label="训练数据类型,fp16可能会有更快的训练速度,前提是你的显卡支持", choices=["fp32","fp16"], value=diff_params['amp_dtype'])
                with gr.Row():
                    diff_batch_size = gr.Number(label="批量大小(batch_size),根据显卡显存设置,小显存适当降低该项,6G显存可以设定为48,但该数值不要超过数据集总数量的1/4", value=diff_params['diff_batch_size'])
                    diff_lr = gr.Number(label="学习率(一般不需要动)", value=diff_params['diff_lr'])
                    diff_interval_log = gr.Number(label="每隔多少步(steps)生成一次评估日志", value = diff_params['diff_interval_log'])
                    diff_interval_val = gr.Number(label="每隔多少步(steps)验证并保存一次模型,如果你的批量大小较大,可以适当减少这里的数字,但不建议设置为1000以下", value=diff_params['diff_interval_val'])
                    diff_force_save = gr.Number(label="每隔多少步强制保留模型,只有该步数的倍数保存的模型会被保留,其余会被删除。设置为与验证步数相同的值则每个模型都会被保留", value=diff_params['diff_force_save'])
            with gr.Row():
                save_params=gr.Button("将当前设置保存为默认设置", variant="primary")
                write_config=gr.Button("写入配置文件", variant="primary")
            write_config_output=gr.Textbox(label="输出信息")

            gr.Markdown(value="""**点击从头开始训练**将会自动将已有的训练进度保存到models_backup文件夹,并自动装载预训练模型。
                **继续上一次的训练进度**将从上一个保存模型的进度继续训练。继续训练进度无需重新预处理和写入配置文件。
                关于扩散、聚类和特征检索的详细说明请看[此处](https://www.yuque.com/umoubuton/ueupp5/kmui02dszo5zrqkz)。
                """)
            with gr.Row():
                with gr.Column():
                    start_training=gr.Button("从头开始训练", variant="primary")
                    training_output=gr.Textbox(label="训练输出信息")
                with gr.Column():
                    continue_training_btn=gr.Button("继续上一次的训练进度", variant="primary")
                    continue_training_output=gr.Textbox(label="训练输出信息")
            with gr.Row():
                with gr.Column():
                    diff_training_btn=gr.Button("从头训练扩散模型", variant="primary")
                    diff_training_output=gr.Textbox(label="训练输出信息")
                with gr.Column():
                    diff_continue_training_btn=gr.Button("继续训练扩散模型", variant="primary")
                    diff_continue_training_output=gr.Textbox(label="训练输出信息") 
            with gr.Accordion(label = "聚类、特征检索训练", open=False):
                with gr.Row():               
                    with gr.Column():
                        kmeans_button=gr.Button("训练聚类模型", variant="primary")
                        kmeans_gpu = gr.Checkbox(label="使用GPU训练", value=True)
                        kmeans_output=gr.Textbox(label="训练输出信息")
                    with gr.Column():
                        index_button=gr.Button("训练特征检索模型", variant="primary")
                        index_output=gr.Textbox(label="训练输出信息")
            '''
        with gr.TabItem("小工具/实验室特性"):
            gr.Markdown(value="""
                        ### So-vits-svc 4.1 小工具/实验室特性
                        提供了一些有趣或实用的小工具,可以自行探索
                        """)
            with gr.Tabs():
                with gr.TabItem("静态声线融合"):
                    gr.Markdown(value="""
                        <font size=2> 介绍:该功能可以将多个声音模型合成为一个声音模型(多个模型参数的凸组合或线性组合),从而制造出现实中不存在的声线 
                                          注意:
                                          1.该功能仅支持单说话人的模型
                                          2.如果强行使用多说话人模型,需要保证多个模型的说话人数量相同,这样可以混合同一个SpaekerID下的声音
                                          3.保证所有待混合模型的config.json中的model字段是相同的
                                          4.输出的混合模型可以使用待合成模型的任意一个config.json,但聚类模型将不能使用
                                          5.批量上传模型的时候最好把模型放到一个文件夹选中后一起上传
                                          6.混合比例调整建议大小在0-100之间,也可以调为其他数字,但在线性组合模式下会出现未知的效果
                                          7.混合完毕后,文件将会保存在项目根目录中,文件名为output.pth
                                          8.凸组合模式会将混合比例执行Softmax使混合比例相加为1,而线性组合模式不会
                        </font>
                        """)
                    mix_model_path = gr.Files(label="选择需要混合模型文件")
                    mix_model_upload_button = gr.UploadButton("选择/追加需要混合模型文件", file_count="multiple")
                    mix_model_output1 = gr.Textbox(
                                            label="混合比例调整,单位/%",
                                            interactive = True
                                         )
                    mix_mode = gr.Radio(choices=["凸组合", "线性组合"], label="融合模式",value="凸组合",interactive = True)
                    mix_submit = gr.Button("声线融合启动", variant="primary")
                    mix_model_output2 = gr.Textbox(
                                            label="Output Message"
                                         )
                with gr.TabItem("onnx转换"):
                    gr.Markdown(value="""
                        提供了将.pth模型(批量)转换为.onnx模型的功能
                        源项目本身自带转换的功能,但不支持批量,操作也不够简单,这个工具可以支持在WebUI中以可视化的操作方式批量转换.onnx模型
                        有人可能会问,转.onnx模型有什么作用呢?相信我,如果你问出了这个问题,说明这个工具你应该用不上

                        ### Step 1: 
                        在整合包根目录下新建一个"checkpoints"文件夹,将pth模型和对应的json配置文件按目录分别放置到checkpoints文件夹下
                        看起来应该像这样:
                        checkpoints
                        ├───xxxx
                        │   ├───xxxx.pth
                        │   └───xxxx.json
                        ├───xxxx
                        │   ├───xxxx.pth
                        │   └───xxxx.json
                        └───……
                        """)
                    pth_dir_msg = gr.Textbox(label="识别待转换模型", placeholder="请将模型和配置文件按上述说明放置在正确位置")
                    pth_dir_identify_btn = gr.Button("识别", variant="primary")
                    gr.Markdown(value="""
                        ### Step 2:
                        识别正确后点击下方开始转换,转换一个模型可能需要一分钟甚至更久
                        """)
                    pth2onnx_btn = gr.Button("开始转换", variant="primary")
                    pth2onnx_msg = gr.Textbox(label="输出信息")

                with gr.TabItem("智能音频切片"):
                    gr.Markdown(value="""
                        该工具可以实现对音频的切片,无需调整参数即可完成符合要求的数据集制作。
                        数据集要求的音频切片约在2-15秒内,用传统的Slicer-GUI切片工具需要精准调参和二次切片才能符合要求,该工具省去了上述繁琐的操作,只要上传原始音频即可一键制作数据集。
                    """)
                    with gr.Row():
                        raw_audio_path = gr.Textbox(label="原始音频文件夹", placeholder="包含所有待切片音频的文件夹,示例: D:\干声\speakers")
                        load_raw_audio_btn = gr.Button("加载原始音频", variant = "primary")
                    load_raw_audio_output = gr.Textbox(label = "输出信息")
                    raw_audio_dataset = gr.Textbox(label = "音频列表", value = "")
                    slicer_output_dir = gr.Textbox(label = "输出目录", placeholder = "选择输出目录")
                    with gr.Row():
                        process_method = gr.Radio(label = "对过短音频的处理方式", choices = ["丢弃","将过短音频整合为长音频"], value = "丢弃")
                        max_sec = gr.Number(label = "切片的最长秒数", value = 15)
                        min_sec = gr.Number(label = "切片的最短秒数", value = 2)
                    slicer_btn = gr.Button("开始切片", variant = "primary")
                    slicer_output_msg = gr.Textbox(label = "输出信息")

                    mix_model_path.change(updata_mix_info,[mix_model_path],[mix_model_output1])
                    mix_model_upload_button.upload(upload_mix_append_file, [mix_model_upload_button,mix_model_path], [mix_model_path,mix_model_output1])
                    mix_submit.click(mix_submit_click, [mix_model_output1,mix_mode], [mix_model_output2])
                    pth_dir_identify_btn.click(pth_identify, [], [pth_dir_msg])
                    pth2onnx_btn.click(onnx_export, [], [pth2onnx_msg])
                    load_raw_audio_btn.click(load_raw_audio, [raw_audio_path], [load_raw_audio_output, raw_audio_dataset])
                    slicer_btn.click(slicer_fn, [raw_audio_path, slicer_output_dir, process_method, max_sec, min_sec], [slicer_output_msg])
                
                with gr.TabItem("模型压缩工具"):
                    gr.Markdown(value="""
                        该工具可以实现对模型的体积压缩,在**不影响模型推理功能**的情况下,将原本约600M的So-VITS模型压缩至约200M, 大大减少了硬盘的压力。
                        **注意:压缩后的模型将无法继续训练,请在确认封炉后再压缩。**
                        将模型文件放置在logs/44k下,然后选择需要压缩的模型
                    """)
                    model_to_compress = gr.Dropdown(label="模型选择", choices=ckpt_list, value="")
                    compress_model_btn = gr.Button("压缩模型", variant="primary")
                    compress_model_output = gr.Textbox(label="输出信息", value="")

                    compress_model_btn.click(model_compression, [model_to_compress], [compress_model_output])
        """
        get_raw_dirs.click(load_raw_dirs,[],[raw_dirs_list])
        raw_preprocess.click(dataset_preprocess,[branch_selection, f0_predictor_selection, use_diff, vol_aug, skip_loudnorm, num_processes],[preprocess_output, speakers])
        regenerate_config_btn.click(regenerate_config,[branch_selection, vol_aug],[preprocess_output])
        clear_preprocess_output.click(clear_output,[],[preprocess_output])
        save_params.click(save_default_settings, [log_interval,eval_interval,keep_ckpts,batch_size,lr,fp16_run,all_in_mem,diff_num_workers,diff_cache_all_data,diff_cache_device,diff_amp_dtype,diff_batch_size,diff_lr,diff_interval_log,diff_interval_val,diff_force_save], [write_config_output])
        write_config.click(config_fn,[log_interval, eval_interval, keep_ckpts, batch_size, lr, fp16_run, all_in_mem, diff_num_workers, diff_cache_all_data, diff_batch_size, diff_lr, diff_interval_log, diff_interval_val, diff_cache_device, diff_amp_dtype, diff_force_save],[write_config_output])
        start_training.click(training,[gpu_selection, branch_selection],[training_output])
        diff_training_btn.click(diff_training,[branch_selection],[diff_training_output])
        continue_training_btn.click(continue_training,[gpu_selection, branch_selection],[continue_training_output])
        diff_continue_training_btn.click(diff_continue_training,[branch_selection],[diff_continue_training_output])
        kmeans_button.click(kmeans_training,[kmeans_gpu],[kmeans_output])
        index_button.click(index_training, [], [index_output])
        """
    with gr.Tabs():
        with gr.Row(variant="panel"):
            with gr.Column():
                gr.Markdown(value="""
                    <font size=2> WebUI设置</font>
                    """)
                debug_button = gr.Checkbox(label="Debug模式,反馈BUG需要打开,打开后控制台可以显示具体错误提示", value=debug)

        debug_button.change(debug_change,[],[])

        app.queue(concurrency_count=1022, max_size=2044).launch()