DLI-SLQ commited on
Commit
4c3b8f4
·
1 Parent(s): 1dd2e00

adding code comments

Browse files
Files changed (1) hide show
  1. app.py +35 -16
app.py CHANGED
@@ -6,59 +6,78 @@ from huggingface_hub import hf_hub_download
6
  from piper import PiperVoice
7
  from transformers import pipeline
8
 
9
- # Load the NSFW classifier model
 
 
 
 
 
 
 
 
 
10
  nsfw_detector = pipeline("text-classification", model="michellejieli/NSFW_text_classifier")
11
 
12
  def synthesize_speech(text):
13
- # Check for NSFW content
 
 
14
  nsfw_result = nsfw_detector(text)
15
  if nsfw_result[0]['label'] == 'NSFW':
16
- # Path to your default error audio file
17
  error_audio_path = hf_hub_download(repo_id="DLI-SLQ/speaker_01234", filename="error_audio.wav")
18
  with open(error_audio_path, 'rb') as error_audio_file:
19
- error_audio = error_audio_file.read()
20
  return error_audio, "NSFW content detected. Cannot process."
21
 
22
-
 
23
  model_path = hf_hub_download(repo_id="DLI-SLQ/speaker_01234", filename="speaker__01234_model.onnx")
24
  config_path = hf_hub_download(repo_id="DLI-SLQ/speaker_01234", filename="speaker__1234_model.onnx.json")
25
 
26
-
27
  voice = PiperVoice.load(model_path, config_path)
28
 
29
- # Create an in-memory buffer for the WAV file
30
  buffer = BytesIO()
31
  with wave.open(buffer, 'wb') as wav_file:
 
32
  wav_file.setframerate(voice.config.sample_rate)
33
  wav_file.setsampwidth(2) # 16-bit
34
  wav_file.setnchannels(1) # mono
35
 
36
- # Synthesize speech
37
  voice.synthesize(text, wav_file)
38
 
39
- # Convert buffer to NumPy array for Gradio output
40
  buffer.seek(0)
41
  audio_data = np.frombuffer(buffer.read(), dtype=np.int16)
42
  return audio_data.tobytes(), None
43
-
44
 
45
- # Gradio Interface
46
  with gr.Blocks(theme=gr.themes.Base()) as blocks:
 
47
  gr.Markdown("# Text to Speech Synthesizer")
48
  gr.Markdown("Enter text to synthesize it into speech using models from the State Library of Queensland's collection using Piper.")
 
 
49
  input_text = gr.Textbox(label="Input Text")
50
  output_audio = gr.Audio(label="Synthesized Speech", type="numpy")
51
- output_text = gr.Textbox(label="Output Text", visible=True) # Make this visible for error messages
52
- submit_button = gr.Button("Synthesize")
53
 
 
54
  def process_and_output(text):
55
  audio, message = synthesize_speech(text)
56
  if message:
57
- return None, message # Return None for audio and the error message
 
58
  else:
59
- return audio, None # Return the audio data and None for the message
 
60
 
 
 
61
  submit_button.click(process_and_output, inputs=input_text, outputs=[output_audio, output_text])
62
 
63
- # Run the app
64
  blocks.launch()
 
6
  from piper import PiperVoice
7
  from transformers import pipeline
8
 
9
+ # Import necessary libraries:
10
+ # gradio for creating the web interface,
11
+ # wave for handling WAV audio format,
12
+ # numpy for numerical operations,
13
+ # BytesIO for in-memory byte handling,
14
+ # huggingface_hub for downloading models from the Hugging Face Hub,
15
+ # PiperVoice for the text-to-speech functionality,
16
+ # pipeline from transformers for the NSFW classifier.
17
+
18
+ # Load the NSFW classifier model using Hugging Face's pipeline
19
  nsfw_detector = pipeline("text-classification", model="michellejieli/NSFW_text_classifier")
20
 
21
  def synthesize_speech(text):
22
+ # Function to synthesize speech from text.
23
+
24
+ # First, check if the input text contains NSFW content.
25
  nsfw_result = nsfw_detector(text)
26
  if nsfw_result[0]['label'] == 'NSFW':
27
+ # If NSFW content is detected, load a pre-recorded error message.
28
  error_audio_path = hf_hub_download(repo_id="DLI-SLQ/speaker_01234", filename="error_audio.wav")
29
  with open(error_audio_path, 'rb') as error_audio_file:
30
+ error_audio = error_audio_file.read()
31
  return error_audio, "NSFW content detected. Cannot process."
32
 
33
+ # If the content is safe, proceed with speech synthesis.
34
+ # Download the model and configuration from Hugging Face Hub.
35
  model_path = hf_hub_download(repo_id="DLI-SLQ/speaker_01234", filename="speaker__01234_model.onnx")
36
  config_path = hf_hub_download(repo_id="DLI-SLQ/speaker_01234", filename="speaker__1234_model.onnx.json")
37
 
38
+ # Load the PiperVoice model for speech synthesis.
39
  voice = PiperVoice.load(model_path, config_path)
40
 
41
+ # Create a BytesIO buffer to hold the synthesized WAV file in memory.
42
  buffer = BytesIO()
43
  with wave.open(buffer, 'wb') as wav_file:
44
+ # Set WAV file properties: sample rate, bit depth, and mono channel.
45
  wav_file.setframerate(voice.config.sample_rate)
46
  wav_file.setsampwidth(2) # 16-bit
47
  wav_file.setnchannels(1) # mono
48
 
49
+ # Use the PiperVoice model to synthesize speech from the text.
50
  voice.synthesize(text, wav_file)
51
 
52
+ # Convert the buffer content to a NumPy array, then to bytes for Gradio output.
53
  buffer.seek(0)
54
  audio_data = np.frombuffer(buffer.read(), dtype=np.int16)
55
  return audio_data.tobytes(), None
 
56
 
57
+ # Set up the Gradio interface.
58
  with gr.Blocks(theme=gr.themes.Base()) as blocks:
59
+ # Create a user-friendly markdown title and description.
60
  gr.Markdown("# Text to Speech Synthesizer")
61
  gr.Markdown("Enter text to synthesize it into speech using models from the State Library of Queensland's collection using Piper.")
62
+
63
+ # Define Gradio interface components: input textbox, audio output, and output textbox.
64
  input_text = gr.Textbox(label="Input Text")
65
  output_audio = gr.Audio(label="Synthesized Speech", type="numpy")
66
+ output_text = gr.Textbox(label="Output Text", visible=True) # Visible for error messages
 
67
 
68
+ # Define a function to process the input text and produce outputs.
69
  def process_and_output(text):
70
  audio, message = synthesize_speech(text)
71
  if message:
72
+ # If there's a message (e.g., an error message), return None for audio and the message.
73
+ return None, message
74
  else:
75
+ # Otherwise, return the synthesized audio and None for the message.
76
+ return audio, None
77
 
78
+ # Link the processing function to the Gradio interface button.
79
+ submit_button = gr.Button("Synthesize")
80
  submit_button.click(process_and_output, inputs=input_text, outputs=[output_audio, output_text])
81
 
82
+ # Launch the Gradio web application.
83
  blocks.launch()