Spaces:

ZiyuG
/

SignLanguage

Running on A10G

App Files Files Community

ZiyuG commited on Sep 17

Commit

d650e96

•

1 Parent(s): b95957d

Create ms_tts.py

Browse files

Files changed (1) hide show

ms_tts.py +75 -0

ms_tts.py ADDED Viewed

	@@ -0,0 +1,75 @@

+# import cv2
+# import numpy as np
+# import os
+# # 添加更多常见的视频编码器
+# codecs = {
+#     'X264': 'H.264 / AVC',
+#     'MP4V': 'MPEG-4',
+#     'AVC1': 'H.264 / AVC',
+#     'DIVX': 'MPEG-4',
+#     'XVID': 'MPEG-4',
+#     'MJPG': 'Motion JPEG',
+#     'VP80': 'VP8',
+#     'VP90': 'VP9',
+#     'HEVC': 'H.265 / HEVC',
+#     'PIM1': 'MPEG-1',
+#     'MPG1': 'MPEG-1',
+#     'MPG2': 'MPEG-2'
+# }
+# # 输出MP4视频文件路径
+# output_file = 'test_video.mp4'
+# # 定义一个函数来测试是否支持某种编码器并输出 mp4 文件
+# def test_codec(codec_fourcc, codec_name):
+#     # 使用 cv2.VideoWriter 创建视频文件对象
+#     fourcc = cv2.VideoWriter_fourcc(*codec_fourcc)
+#     out = cv2.VideoWriter(output_file, fourcc, 25.0, (640, 480))
+#     # 检查是否成功初始化
+#     if out.isOpened():
+#         print(f"[SUPPORTED] Codec {codec_name} ({codec_fourcc}) is supported.")
+#         # 写入一些帧，生成简单的测试视频
+#         for i in range(100):
+#             # 创建一个简单的蓝色帧作为测试
+#             frame = (255 * (i % 2) * np.ones((480, 640, 3), dtype=np.uint8))
+#             out.write(frame)
+#         out.release()  # 关闭视频写入器
+#         # 检查文件是否生成
+#         if os.path.exists(output_file):
+#             print(f"Video file {output_file} successfully created with codec {codec_fourcc}.")
+#         else:
+#             print(f"Failed to create video file {output_file}.")
+#         # 删除测试视频文件
+#         os.remove(output_file)
+#     # else:
+#         # print(f"[NOT SUPPORTED] Codec {codec_name} ({codec_fourcc}) is not supported.")
+# # 测试所有编码器
+# for fourcc, name in codecs.items():
+#     test_codec(fourcc, name)
+import azure.cognitiveservices.speech as speechsdk
+def ms_tts_gen(text, audio_path):
+    # 创建 Speech 配置
+    speech_config = speechsdk.SpeechConfig(subscription="b45d4ca1f5eb4c96950666eb97b9be60", region="eastasia")
+    speech_config.speech_synthesis_voice_name = 'zh-HK-HiuGaaiNeural'  # 粤语女声
+    # 配置音频输出为 MP3 格式
+    audio_config = speechsdk.audio.AudioOutputConfig(filename=audio_path)
+    # 指定输出格式为 MP3
+    speech_config.set_speech_synthesis_output_format(speechsdk.SpeechSynthesisOutputFormat.Audio16Khz32KBitRateMonoMp3)
+    # 合成语音并输出到 MP3 文件
+    synthesizer = speechsdk.SpeechSynthesizer(speech_config=speech_config, audio_config=audio_config)
+    synthesizer.speak_text_async(text).get()