ZiyuG commited on
Commit
d650e96
1 Parent(s): b95957d

Create ms_tts.py

Browse files
Files changed (1) hide show
  1. ms_tts.py +75 -0
ms_tts.py ADDED
@@ -0,0 +1,75 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # import cv2
2
+ # import numpy as np
3
+ # import os
4
+
5
+ # # 添加更多常见的视频编码器
6
+ # codecs = {
7
+ # 'X264': 'H.264 / AVC',
8
+ # 'MP4V': 'MPEG-4',
9
+ # 'AVC1': 'H.264 / AVC',
10
+ # 'DIVX': 'MPEG-4',
11
+ # 'XVID': 'MPEG-4',
12
+ # 'MJPG': 'Motion JPEG',
13
+ # 'VP80': 'VP8',
14
+ # 'VP90': 'VP9',
15
+ # 'HEVC': 'H.265 / HEVC',
16
+ # 'PIM1': 'MPEG-1',
17
+ # 'MPG1': 'MPEG-1',
18
+ # 'MPG2': 'MPEG-2'
19
+ # }
20
+
21
+ # # 输出MP4视频文件路径
22
+ # output_file = 'test_video.mp4'
23
+
24
+ # # 定义一个函数来测试是否支持某种编码器并输出 mp4 文件
25
+ # def test_codec(codec_fourcc, codec_name):
26
+ # # 使用 cv2.VideoWriter 创建视频文件对象
27
+ # fourcc = cv2.VideoWriter_fourcc(*codec_fourcc)
28
+ # out = cv2.VideoWriter(output_file, fourcc, 25.0, (640, 480))
29
+
30
+ # # 检查是否成功初始化
31
+ # if out.isOpened():
32
+ # print(f"[SUPPORTED] Codec {codec_name} ({codec_fourcc}) is supported.")
33
+
34
+ # # 写入一些帧,生成简单的测试视频
35
+ # for i in range(100):
36
+ # # 创建一个简单的蓝色帧作为测试
37
+ # frame = (255 * (i % 2) * np.ones((480, 640, 3), dtype=np.uint8))
38
+ # out.write(frame)
39
+
40
+ # out.release() # 关闭视频写入器
41
+
42
+ # # 检查文件是否生成
43
+ # if os.path.exists(output_file):
44
+ # print(f"Video file {output_file} successfully created with codec {codec_fourcc}.")
45
+ # else:
46
+ # print(f"Failed to create video file {output_file}.")
47
+
48
+ # # 删除测试视频文件
49
+ # os.remove(output_file)
50
+ # # else:
51
+ # # print(f"[NOT SUPPORTED] Codec {codec_name} ({codec_fourcc}) is not supported.")
52
+
53
+ # # 测试所有编码器
54
+ # for fourcc, name in codecs.items():
55
+ # test_codec(fourcc, name)
56
+
57
+
58
+
59
+ import azure.cognitiveservices.speech as speechsdk
60
+
61
+ def ms_tts_gen(text, audio_path):
62
+ # 创建 Speech 配置
63
+ speech_config = speechsdk.SpeechConfig(subscription="b45d4ca1f5eb4c96950666eb97b9be60", region="eastasia")
64
+ speech_config.speech_synthesis_voice_name = 'zh-HK-HiuGaaiNeural' # 粤语女声
65
+
66
+ # 配置音频输出为 MP3 格式
67
+ audio_config = speechsdk.audio.AudioOutputConfig(filename=audio_path)
68
+
69
+ # 指定输出格式为 MP3
70
+ speech_config.set_speech_synthesis_output_format(speechsdk.SpeechSynthesisOutputFormat.Audio16Khz32KBitRateMonoMp3)
71
+
72
+ # 合成语音并输出到 MP3 文件
73
+ synthesizer = speechsdk.SpeechSynthesizer(speech_config=speech_config, audio_config=audio_config)
74
+ synthesizer.speak_text_async(text).get()
75
+