Spaces:

united-avatars
/

linly

Sleeping

linly / GPT_SoVITS /inference_gui.py

David Victor

init

bc3753a about 2 months ago

12.8 kB

	import sys
	from PyQt5.QtCore import QEvent
	from PyQt5.QtWidgets import QApplication, QMainWindow, QLabel, QLineEdit, QPushButton, QTextEdit
	from PyQt5.QtWidgets import QGridLayout, QVBoxLayout, QWidget, QFileDialog, QStatusBar, QComboBox
	import soundfile as sf

	from tools.i18n.i18n import I18nAuto
	i18n = I18nAuto()

	from GPT_SoVITS.inference_webui import change_gpt_weights, change_sovits_weights, get_tts_wav


	class GPTSoVITSGUI(QMainWindow):
	def __init__(self):
	super().__init__()

	self.init_ui()

	def init_ui(self):
	self.setWindowTitle('GPT-SoVITS GUI')
	self.setGeometry(800, 450, 950, 850)

	self.setStyleSheet("""
	QWidget {
	background-color: #a3d3b1;
	}

	QTabWidget::pane {
	background-color: #a3d3b1;
	}

	QTabWidget::tab-bar {
	alignment: left;
	}

	QTabBar::tab {
	background: #8da4bf;
	color: #ffffff;
	padding: 8px;
	}

	QTabBar::tab:selected {
	background: #2a3f54;
	}

	QLabel {
	color: #000000;
	}

	QPushButton {
	background-color: #4CAF50;
	color: white;
	padding: 8px;
	border: 1px solid #4CAF50;
	border-radius: 4px;
	}

	QPushButton:hover {
	background-color: #45a049;
	border: 1px solid #45a049;
	box-shadow: 2px 2px 2px rgba(0, 0, 0, 0.1);
	}
	""")

	license_text = (
	"本软件以MIT协议开源, 作者不对软件具备任何控制力, 使用软件者、传播软件导出的声音者自负全责. "
	"如不认可该条款, 则不能使用或引用软件包内任何代码和文件. 详见根目录LICENSE.")
	license_label = QLabel(license_text)
	license_label.setWordWrap(True)

	self.GPT_model_label = QLabel("选择GPT模型:")
	self.GPT_model_input = QLineEdit()
	self.GPT_model_input.setPlaceholderText("拖拽或选择文件")
	self.GPT_model_input.setReadOnly(True)
	self.GPT_model_button = QPushButton("选择GPT模型文件")
	self.GPT_model_button.clicked.connect(self.select_GPT_model)

	self.SoVITS_model_label = QLabel("选择SoVITS模型:")
	self.SoVITS_model_input = QLineEdit()
	self.SoVITS_model_input.setPlaceholderText("拖拽或选择文件")
	self.SoVITS_model_input.setReadOnly(True)
	self.SoVITS_model_button = QPushButton("选择SoVITS模型文件")
	self.SoVITS_model_button.clicked.connect(self.select_SoVITS_model)

	self.ref_audio_label = QLabel("上传参考音频:")
	self.ref_audio_input = QLineEdit()
	self.ref_audio_input.setPlaceholderText("拖拽或选择文件")
	self.ref_audio_input.setReadOnly(True)
	self.ref_audio_button = QPushButton("选择音频文件")
	self.ref_audio_button.clicked.connect(self.select_ref_audio)

	self.ref_text_label = QLabel("参考音频文本:")
	self.ref_text_input = QLineEdit()
	self.ref_text_input.setPlaceholderText("拖拽或选择文件")
	self.ref_text_input.setReadOnly(True)
	self.ref_text_button = QPushButton("上传文本")
	self.ref_text_button.clicked.connect(self.upload_ref_text)

	self.language_label = QLabel("参考音频语言:")
	self.language_combobox = QComboBox()
	self.language_combobox.addItems(["中文", "英文", "日文"])

	self.target_text_label = QLabel("合成目标文本:")
	self.target_text_input = QLineEdit()
	self.target_text_input.setPlaceholderText("拖拽或选择文件")
	self.target_text_input.setReadOnly(True)
	self.target_text_button = QPushButton("上传文本")
	self.target_text_button.clicked.connect(self.upload_target_text)

	self.language_label_02 = QLabel("合成音频语言:")
	self.language_combobox_02 = QComboBox()
	self.language_combobox_02.addItems(["中文", "英文", "日文"])

	self.output_label = QLabel("输出音频路径:")
	self.output_input = QLineEdit()
	self.output_input.setPlaceholderText("拖拽或选择文件")
	self.output_input.setReadOnly(True)
	self.output_button = QPushButton("选择文件夹")
	self.output_button.clicked.connect(self.select_output_path)

	self.output_text = QTextEdit()
	self.output_text.setReadOnly(True)

	self.add_drag_drop_events([
	self.GPT_model_input,
	self.SoVITS_model_input,
	self.ref_audio_input,
	self.ref_text_input,
	self.target_text_input,
	self.output_input,
	])

	self.synthesize_button = QPushButton("合成")
	self.synthesize_button.clicked.connect(self.synthesize)

	self.status_bar = QStatusBar()

	main_layout = QVBoxLayout()

	input_layout = QGridLayout()
	input_layout.setSpacing(10)

	self.setLayout(input_layout)

	input_layout.addWidget(license_label, 0, 0, 1, 3)

	input_layout.addWidget(self.GPT_model_label, 1, 0)
	input_layout.addWidget(self.GPT_model_input, 2, 0, 1, 2)
	input_layout.addWidget(self.GPT_model_button, 2, 2)

	input_layout.addWidget(self.SoVITS_model_label, 3, 0)
	input_layout.addWidget(self.SoVITS_model_input, 4, 0, 1, 2)
	input_layout.addWidget(self.SoVITS_model_button, 4, 2)

	input_layout.addWidget(self.ref_audio_label, 5, 0)
	input_layout.addWidget(self.ref_audio_input, 6, 0, 1, 2)
	input_layout.addWidget(self.ref_audio_button, 6, 2)

	input_layout.addWidget(self.language_label, 7, 0)
	input_layout.addWidget(self.language_combobox, 8, 0, 1, 1)
	input_layout.addWidget(self.ref_text_label, 9, 0)
	input_layout.addWidget(self.ref_text_input, 10, 0, 1, 2)
	input_layout.addWidget(self.ref_text_button, 10, 2)

	input_layout.addWidget(self.language_label_02, 11, 0)
	input_layout.addWidget(self.language_combobox_02, 12, 0, 1, 1)
	input_layout.addWidget(self.target_text_label, 13, 0)
	input_layout.addWidget(self.target_text_input, 14, 0, 1, 2)
	input_layout.addWidget(self.target_text_button, 14, 2)

	input_layout.addWidget(self.output_label, 15, 0)
	input_layout.addWidget(self.output_input, 16, 0, 1, 2)
	input_layout.addWidget(self.output_button, 16, 2)

	main_layout.addLayout(input_layout)

	output_layout = QVBoxLayout()
	output_layout.addWidget(self.output_text)
	main_layout.addLayout(output_layout)

	main_layout.addWidget(self.synthesize_button)

	main_layout.addWidget(self.status_bar)

	self.central_widget = QWidget()
	self.central_widget.setLayout(main_layout)
	self.setCentralWidget(self.central_widget)

	def dragEnterEvent(self, event):
	if event.mimeData().hasUrls():
	event.acceptProposedAction()

	def dropEvent(self, event):
	if event.mimeData().hasUrls():
	file_paths = [url.toLocalFile() for url in event.mimeData().urls()]

	if len(file_paths) == 1:
	self.update_ref_audio(file_paths[0])
	self.update_input_paths(self.ref_audio_input, file_paths[0])
	else:
	self.update_ref_audio(", ".join(file_paths))

	def add_drag_drop_events(self, widgets):
	for widget in widgets:
	widget.setAcceptDrops(True)
	widget.installEventFilter(self)

	def eventFilter(self, obj, event):
	if event.type() == QEvent.DragEnter:
	mime_data = event.mimeData()
	if mime_data.hasUrls():
	event.acceptProposedAction()

	elif event.type() == QEvent.Drop:
	mime_data = event.mimeData()
	if mime_data.hasUrls():
	file_paths = [url.toLocalFile() for url in mime_data.urls()]
	if len(file_paths) == 1:
	self.update_input_paths(obj, file_paths[0])
	else:
	self.update_input_paths(obj, ", ".join(file_paths))
	event.acceptProposedAction()

	return super().eventFilter(obj, event)

	def select_GPT_model(self):
	file_path, _ = QFileDialog.getOpenFileName(self, "选择GPT模型文件", "", "GPT Files (*.ckpt)")
	if file_path:
	self.GPT_model_input.setText(file_path)

	def select_SoVITS_model(self):
	file_path, _ = QFileDialog.getOpenFileName(self, "选择SoVITS模型文件", "", "SoVITS Files (*.pth)")
	if file_path:
	self.SoVITS_model_input.setText(file_path)

	def select_ref_audio(self):
	options = QFileDialog.Options()
	options \|= QFileDialog.DontUseNativeDialog
	options \|= QFileDialog.ShowDirsOnly

	file_dialog = QFileDialog()
	file_dialog.setOptions(options)

	file_dialog.setFileMode(QFileDialog.AnyFile)
	file_dialog.setNameFilter("Audio Files (.wav .mp3)")

	if file_dialog.exec_():
	file_paths = file_dialog.selectedFiles()

	if len(file_paths) == 1:
	self.update_ref_audio(file_paths[0])
	self.update_input_paths(self.ref_audio_input, file_paths[0])
	else:
	self.update_ref_audio(", ".join(file_paths))

	def upload_ref_text(self):
	file_path, _ = QFileDialog.getOpenFileName(self, "选择文本文件", "", "Text Files (*.txt)")
	if file_path:
	with open(file_path, 'r', encoding='utf-8') as file:
	content = file.read()
	self.ref_text_input.setText(content)
	self.update_input_paths(self.ref_text_input, file_path)

	def upload_target_text(self):
	file_path, _ = QFileDialog.getOpenFileName(self, "选择文本文件", "", "Text Files (*.txt)")
	if file_path:
	with open(file_path, 'r', encoding='utf-8') as file:
	content = file.read()
	self.target_text_input.setText(content)
	self.update_input_paths(self.target_text_input, file_path)

	def select_output_path(self):
	options = QFileDialog.Options()
	options \|= QFileDialog.DontUseNativeDialog
	options \|= QFileDialog.ShowDirsOnly

	folder_dialog = QFileDialog()
	folder_dialog.setOptions(options)
	folder_dialog.setFileMode(QFileDialog.Directory)

	if folder_dialog.exec_():
	folder_path = folder_dialog.selectedFiles()[0]
	self.output_input.setText(folder_path)

	def update_ref_audio(self, file_path):
	self.ref_audio_input.setText(file_path)

	def update_input_paths(self, input_box, file_path):
	input_box.setText(file_path)

	def synthesize(self):
	GPT_model_path = self.GPT_model_input.text()
	SoVITS_model_path = self.SoVITS_model_input.text()
	ref_audio_path = self.ref_audio_input.text()
	language_combobox = self.language_combobox.currentText()
	language_combobox = i18n(language_combobox)
	ref_text = self.ref_text_input.text()
	language_combobox_02 = self.language_combobox_02.currentText()
	language_combobox_02 = i18n(language_combobox_02)
	target_text = self.target_text_input.text()
	output_path = self.output_input.text()

	change_gpt_weights(gpt_path=GPT_model_path)
	change_sovits_weights(sovits_path=SoVITS_model_path)

	synthesis_result = get_tts_wav(ref_wav_path=ref_audio_path,
	prompt_text=ref_text,
	prompt_language=language_combobox,
	text=target_text,
	text_language=language_combobox_02)

	result_list = list(synthesis_result)

	if result_list:
	last_sampling_rate, last_audio_data = result_list[-1]
	output_wav_path = os.path.join(output_path, "output.wav")
	sf.write(output_wav_path, last_audio_data, last_sampling_rate)

	result = "Audio saved to " + output_wav_path

	self.status_bar.showMessage("合成完成！输出路径：" + output_wav_path, 5000)
	self.output_text.append("处理结果：\n" + result)

	def main():
	app = QApplication(sys.argv)
	mainWin = GPTSoVITSGUI()
	mainWin.show()
	sys.exit(app.exec_())


	if __name__ == '__main__':
	main()