Mahiruoshi commited on
Commit
50ea4f2
·
1 Parent(s): 00d287a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +148 -11
app.py CHANGED
@@ -16,9 +16,10 @@ logging.basicConfig(
16
  logger = logging.getLogger(__name__)
17
 
18
  import warnings
19
-
20
  warnings.filterwarnings("ignore", category=UserWarning, module="gradio.blocks")
21
 
 
 
22
  from datetime import datetime
23
  import re
24
  import torch
@@ -26,12 +27,16 @@ import utils
26
  from infer import infer, latest_version, get_net_g
27
  import gradio as gr
28
  import numpy as np
29
- from tools.sentence import extrac, is_japanese, is_chinese
30
- import sys, os
31
  import math
32
 
 
 
33
  from tools.translate import translate
34
 
 
 
35
  net_g = None
36
 
37
  cara_list = ["ひまり","たえ","彩","日菜","美咲","ましろ","燐子","香子","珠緒","たえ"]
@@ -52,11 +57,15 @@ BandList = {
52
  "西克菲尔特音乐学院":["晶","未知留","八千代","栞","美帆"]
53
  }
54
 
55
- if sys.platform == "darwin" and torch.backends.mps.is_available():
56
- device = "mps"
57
- os.environ["PYTORCH_ENABLE_MPS_FALLBACK"] = "1"
58
- else:
59
- device = "cpu"
 
 
 
 
60
 
61
  def generate_audio(
62
  text,
@@ -67,7 +76,8 @@ def generate_audio(
67
  speaker,
68
  language,
69
  ):
70
- audio_list = []
 
71
  with torch.no_grad():
72
  if language == 'Auto':
73
  language = "EN"
@@ -76,7 +86,7 @@ def generate_audio(
76
  elif is_chinese(text):
77
  language = "ZH"
78
  current_time = datetime.now()
79
- print(str(current_time)+':'+str(speaker)+ text+":"+language)
80
  audio = infer(
81
  text,
82
  sdp_ratio=sdp_ratio,
@@ -136,6 +146,77 @@ def tts_fn(
136
  audio_fin.append(silence_data)
137
  return (hps.data.sampling_rate, np.concatenate(audio_fin))
138
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
139
  def loadmodel(model):
140
  _ = net_g.eval()
141
  _ = utils.load_checkpoint(model, net_g, None, skip_optimizer=True)
@@ -231,5 +312,61 @@ if __name__ == "__main__":
231
  outputs=[audio_output],
232
  )
233
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
234
  print("推理页面已开启!")
235
- app.launch()
 
16
  logger = logging.getLogger(__name__)
17
 
18
  import warnings
 
19
  warnings.filterwarnings("ignore", category=UserWarning, module="gradio.blocks")
20
 
21
+ import shutil
22
+
23
  from datetime import datetime
24
  import re
25
  import torch
 
27
  from infer import infer, latest_version, get_net_g
28
  import gradio as gr
29
  import numpy as np
30
+ from tools.sentence import extrac, is_japanese, is_chinese, seconds_to_ass_time, extract_text_from_file, remove_annotations
31
+ import sys
32
  import math
33
 
34
+ from scipy.io.wavfile import write
35
+
36
  from tools.translate import translate
37
 
38
+ import random
39
+
40
  net_g = None
41
 
42
  cara_list = ["ひまり","たえ","彩","日菜","美咲","ましろ","燐子","香子","珠緒","たえ"]
 
57
  "西克菲尔特音乐学院":["晶","未知留","八千代","栞","美帆"]
58
  }
59
 
60
+ device = (
61
+ "cuda:0"
62
+ if torch.cuda.is_available()
63
+ else (
64
+ "mps"
65
+ if sys.platform == "darwin" and torch.backends.mps.is_available()
66
+ else "cpu"
67
+ )
68
+ )
69
 
70
  def generate_audio(
71
  text,
 
76
  speaker,
77
  language,
78
  ):
79
+ if len(text) < 2:
80
+ return
81
  with torch.no_grad():
82
  if language == 'Auto':
83
  language = "EN"
 
86
  elif is_chinese(text):
87
  language = "ZH"
88
  current_time = datetime.now()
89
+ print(str(current_time)+':'+str(speaker)+":"+ text+":"+language)
90
  audio = infer(
91
  text,
92
  sdp_ratio=sdp_ratio,
 
146
  audio_fin.append(silence_data)
147
  return (hps.data.sampling_rate, np.concatenate(audio_fin))
148
 
149
+ def generate_audio_and_srt_for_group(group, outputPath, group_index, sampling_rate, speaker, sdp_ratio, noise_scale, noise_scale_w, length_scale,spealerList,silenceTime):
150
+ audio_fin = []
151
+ ass_entries = []
152
+ start_time = 0
153
+ speaker = random.choice(cara_list)
154
+ ass_header = """[Script Info]
155
+ ; 我没意见
156
+ Title: Audiobook
157
+ ScriptType: v4.00+
158
+ WrapStyle: 0
159
+ PlayResX: 640
160
+ PlayResY: 360
161
+ ScaledBorderAndShadow: yes
162
+ [V4+ Styles]
163
+ Format: Name, Fontname, Fontsize, PrimaryColour, SecondaryColour, OutlineColour, BackColour, Bold, Italic, Underline, StrikeOut, ScaleX, ScaleY, Spacing, Angle, BorderStyle, Outline, Shadow, Alignment, MarginL, MarginR, MarginV, Encoding
164
+ Style: Default,Arial,20,&H00FFFFFF,&H000000FF,&H00000000,&H00000000,0,0,0,0,100,100,0,0,1,1,1,2,10,10,10,1
165
+ [Events]
166
+ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
167
+ """
168
+
169
+ for sentence in group:
170
+ try:
171
+ FakeSpeaker = sentence.split("|")[0]
172
+ print(FakeSpeaker)
173
+ SpeakersList = re.split('\n', spealerList)
174
+ if FakeSpeaker in list(hps.data.spk2id.keys()):
175
+ speaker = FakeSpeaker
176
+ for i in SpeakersList:
177
+ if FakeSpeaker == i.split("|")[1]:
178
+ speaker = i.split("|")[0]
179
+ if sentence != '\n':
180
+ audio = generate_audio(remove_annotations(sentence.split("|")[-1]).replace(" ",""), speaker=speaker, sdp_ratio=sdp_ratio, noise_scale=noise_scale, noise_scale_w=noise_scale_w, length_scale=length_scale, language='Auto')
181
+ silence_frames = int(silenceTime * 44010)
182
+ silence_data = np.zeros((silence_frames,), dtype=audio.dtype)
183
+ audio_fin.append(audio)
184
+ audio_fin.append(silence_data)
185
+
186
+ duration = len(audio) / sampling_rate
187
+ end_time = start_time + duration + silenceTime
188
+ ass_entries.append("Dialogue: 0,{},{},".format(seconds_to_ass_time(start_time), seconds_to_ass_time(end_time)) + "Default,,0,0,0,,{}".format(sentence.replace("|",":")))
189
+ start_time = end_time
190
+ except:
191
+ pass
192
+ wav_filename = os.path.join(outputPath, f'audiobook_part_{group_index}.wav')
193
+ ass_filename = os.path.join(outputPath, f'audiobook_part_{group_index}.ass')
194
+
195
+ write(wav_filename, sampling_rate, np.concatenate(audio_fin))
196
+
197
+ with open(ass_filename, 'w', encoding='utf-8') as f:
198
+ f.write(ass_header + '\n'.join(ass_entries))
199
+ return (hps.data.sampling_rate, np.concatenate(audio_fin))
200
+
201
+ def audiobook(inputFile, groupsize, speaker, sdp_ratio, noise_scale, noise_scale_w, length_scale,spealerList,silenceTime,filepath):
202
+ directory_path = filepath if torch.cuda.is_available() else "books"
203
+
204
+ if os.path.exists(directory_path):
205
+ shutil.rmtree(directory_path)
206
+
207
+ os.makedirs(directory_path)
208
+ text = extract_text_from_file(inputFile.name)
209
+ sentences = extrac(text)
210
+ GROUP_SIZE = groupsize
211
+ for i in range(0, len(sentences), GROUP_SIZE):
212
+ group = sentences[i:i+GROUP_SIZE]
213
+ if spealerList == "":
214
+ spealerList = "无"
215
+ result = generate_audio_and_srt_for_group(group,directory_path, i//GROUP_SIZE + 1, 44100, speaker, sdp_ratio, noise_scale, noise_scale_w, length_scale,spealerList,silenceTime)
216
+ if not torch.cuda.is_available():
217
+ return result
218
+ return result
219
+
220
  def loadmodel(model):
221
  _ = net_g.eval()
222
  _ = utils.load_checkpoint(model, net_g, None, skip_optimizer=True)
 
312
  outputs=[audio_output],
313
  )
314
 
315
+ with gr.Tab('拓展功能'):
316
+ with gr.Row():
317
+ with gr.Column():
318
+ gr.Markdown(
319
+ f"从 <a href='https://nijigaku.top/2023/10/03/BangDreamTTS/'>我的博客站点</a> 查看自制galgame使用说明\n</a>"
320
+ )
321
+ inputFile = gr.UploadButton(label="上传txt(可设置角色对应表)、epub或mobi文件")
322
+ groupSize = gr.Slider(
323
+ minimum=10, maximum=1000 if torch.cuda.is_available() else 50,value = 50, step=1, label="单个音频文件包含的最大字数"
324
+ )
325
+ silenceTime = gr.Slider(
326
+ minimum=0, maximum=1, value=0.5, step=0.1, label="句子的间隔"
327
+ )
328
+ filepath = gr.TextArea(
329
+ label="本地合成时的音频存储文件夹(会清空文件夹警告)",
330
+ value = "D:/audiobook/book1",
331
+ )
332
+ spealerList = gr.TextArea(
333
+ label="角色对应表(example)",
334
+ placeholder="左边是你想要在每一句话合成中用到的speaker(见角色清单)右边是你上传文本时分隔符左边设置的说话人:{ChoseSpeakerFromConfigList1}|{SeakerInUploadText1}\n{ChoseSpeakerFromConfigList2}|{SeakerInUploadText2}\n{ChoseSpeakerFromConfigList3}|{SeakerInUploadText3}\n",
335
+ value = "ましろ|真白\n七深|七深\n透子|透子\nつくし|筑紫\n瑠唯|瑠唯\nそよ|素世\n祥子|祥子",
336
+ )
337
+ speaker = gr.Dropdown(
338
+ choices=speakers, value = "ましろ", label="选择默认说话人"
339
+ )
340
+ with gr.Column():
341
+ sdp_ratio = gr.Slider(
342
+ minimum=0, maximum=1, value=0.2, step=0.01, label="SDP/DP混合比"
343
+ )
344
+ noise_scale = gr.Slider(
345
+ minimum=0.1, maximum=2, value=0.6, step=0.01, label="感情调节"
346
+ )
347
+ noise_scale_w = gr.Slider(
348
+ minimum=0.1, maximum=2, value=0.8, step=0.01, label="音素长度"
349
+ )
350
+ length_scale = gr.Slider(
351
+ minimum=0.1, maximum=2, value=1, step=0.01, label="生成长度"
352
+ )
353
+ LastAudioOutput = gr.Audio(label="当使用cuda时才能在本地文件夹浏览全部文件")
354
+ btn2 = gr.Button("点击生成", variant="primary")
355
+ btn2.click(
356
+ audiobook,
357
+ inputs=[
358
+ inputFile,
359
+ groupSize,
360
+ speaker,
361
+ sdp_ratio,
362
+ noise_scale,
363
+ noise_scale_w,
364
+ length_scale,
365
+ spealerList,
366
+ silenceTime,
367
+ filepath
368
+ ],
369
+ outputs=[LastAudioOutput],
370
+ )
371
  print("推理页面已开启!")
372
+ app.launch()