File size: 1,330 Bytes
6fd8e87
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
bdf63e1
6fd8e87
 
 
509f98e
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
import random
from azure.cognitiveservices.speech import (
    AudioDataStream,
    SpeechConfig,
    SpeechSynthesizer,
    SpeechSynthesisOutputFormat,
)

# Subscription settings from Azure
# Region can be westeurope for example
subscription_key = "[AZURE_SPEECH_API_KEY]"
subscription_region = "[AZURE_SPEECH_API_REGION]"

# Input SSML file
# Open this file to change or fine-tune the pitch, pronunciation, speaking rate, volume, voice, language and more
# https://github.com/MicrosoftDocs/azure-docs/blob/master/articles/cognitive-services/Speech-Service/language-support.md#neural-voices
input_file = "ssml.xml"

# https://docs.microsoft.com/nl-nl/python/api/azure-cognitiveservices-speech/azure.cognitiveservices.speech.speechsynthesisoutputformat?view=azure-python
audio_format = "Riff24Khz16BitMonoPcm"

# Output folder and file
output_file = f"file-{random.randint(10000,99999)}.wav"

speech_config = SpeechConfig(subscription=subscription_key, region=subscription_region)
speech_config.set_speech_synthesis_output_format(
    SpeechSynthesisOutputFormat[audio_format]
)
synthesizer = SpeechSynthesizer(speech_config=speech_config, audio_config=None)

input = open(f"{input_file}", "r").read()
result = synthesizer.speak_ssml_async(input).get()

stream = AudioDataStream(result)
stream.save_to_wav_file(f"{output_file}")