aigmixer commited on
Commit
f6a94c1
1 Parent(s): 4236dbe

back to file based audio output

Browse files
Files changed (1) hide show
  1. app.py +20 -9
app.py CHANGED
@@ -13,27 +13,38 @@ def synthesize_speech(text):
13
  # Check for NSFW content
14
  nsfw_result = nsfw_detector(text)
15
  if nsfw_result[0]['label'] == 'NSFW':
16
- yield "NSFW content detected. Cannot process."
17
- return
18
 
19
  model_path = hf_hub_download(repo_id="aigmixer/speaker_00", filename="speaker_00_model.onnx")
20
  config_path = hf_hub_download(repo_id="aigmixer/speaker_00", filename="speaker_00_model.onnx.json")
21
  voice = PiperVoice.load(model_path, config_path)
22
 
23
- # Synthesize speech and stream audio
24
- for audio_chunk in voice.synthesize(text, chunk_size=2048):
25
- yield audio_chunk.tobytes()
 
 
 
 
 
 
 
 
 
 
 
 
26
 
27
  # Using Gradio Blocks
28
  with gr.Blocks(theme=gr.themes.Base()) as blocks:
29
  gr.Markdown("# Text to Speech Synthesizer")
30
  gr.Markdown("Enter text to synthesize it into speech using PiperVoice.")
31
  input_text = gr.Textbox(label="Input Text")
32
- output_audio = gr.Audio(label="Synthesized Speech", type="numpy", streaming=True)
 
33
  submit_button = gr.Button("Synthesize")
34
 
35
- submit_button.click(synthesize_speech, inputs=input_text, outputs=output_audio)
36
 
37
  # Run the app
38
- blocks.launch()
39
-
 
13
  # Check for NSFW content
14
  nsfw_result = nsfw_detector(text)
15
  if nsfw_result[0]['label'] == 'NSFW':
16
+ return "NSFW content detected. Cannot process.", None
 
17
 
18
  model_path = hf_hub_download(repo_id="aigmixer/speaker_00", filename="speaker_00_model.onnx")
19
  config_path = hf_hub_download(repo_id="aigmixer/speaker_00", filename="speaker_00_model.onnx.json")
20
  voice = PiperVoice.load(model_path, config_path)
21
 
22
+ # Create an in-memory buffer for the WAV file
23
+ buffer = BytesIO()
24
+ with wave.open(buffer, 'wb') as wav_file:
25
+ wav_file.setframerate(voice.config.sample_rate)
26
+ wav_file.setsampwidth(2) # 16-bit
27
+ wav_file.setnchannels(1) # mono
28
+
29
+ # Synthesize speech
30
+ voice.synthesize(text, wav_file)
31
+
32
+ # Convert buffer to NumPy array for Gradio output
33
+ buffer.seek(0)
34
+ audio_data = np.frombuffer(buffer.read(), dtype=np.int16)
35
+
36
+ return audio_data.tobytes(), None
37
 
38
  # Using Gradio Blocks
39
  with gr.Blocks(theme=gr.themes.Base()) as blocks:
40
  gr.Markdown("# Text to Speech Synthesizer")
41
  gr.Markdown("Enter text to synthesize it into speech using PiperVoice.")
42
  input_text = gr.Textbox(label="Input Text")
43
+ output_audio = gr.Audio(label="Synthesized Speech", type="numpy")
44
+ output_text = gr.Textbox(label="Output Text", visible=False) # This is the new text output component
45
  submit_button = gr.Button("Synthesize")
46
 
47
+ submit_button.click(synthesize_speech, inputs=input_text, outputs=[output_audio, output_text])
48
 
49
  # Run the app
50
+ blocks.launch()