Tonic commited on
Commit
c4b4e50
β€’
1 Parent(s): f9aebc6

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +6 -3
app.py CHANGED
@@ -21,7 +21,8 @@ You can also use πŸŒ¬οΈπŸ’¬πŸ“WhisperSpeech by cloning this space. πŸ§¬πŸ”¬
21
  We're **celebrating the release of the whisperspeech** at [the LAION community, if you love open source ai learn more here : https://laion.ai/](https://laion.ai/) big thanks to the folks at huggingface for the community grant πŸ€—
22
 
23
  ### How to Use
24
- Input text with the language identifiers provided to create a multilingual speech. Optionally you can add an audiosample to make a voice print. Scroll down and try the api <3 Gradio.
 
25
  """
26
 
27
  # text examples=["<en> Hello, how are you? <fr> Bonjour, comment Γ§a va?", "<de> Guten Tag <it> Buongiorno <jp> こんにけは"]
@@ -46,9 +47,11 @@ def generate_segment_audio(text, lang, speaker_url, pipe):
46
  audio_np = audio_data_resampled.cpu().numpy()
47
  return audio_np
48
 
 
49
  def concatenate_audio_segments(segments):
50
- max_len = max([seg.shape[0] for seg in segments])
51
- padded_segments = [np.pad(seg, (0, max_len - seg.shape[0]), 'constant') for seg in segments]
 
52
  concatenated_audio = np.concatenate(padded_segments, axis=0)
53
  concatenated_audio = concatenated_audio / np.max(np.abs(concatenated_audio))
54
  return np.asarray(concatenated_audio, dtype=np.float32)
 
21
  We're **celebrating the release of the whisperspeech** at [the LAION community, if you love open source ai learn more here : https://laion.ai/](https://laion.ai/) big thanks to the folks at huggingface for the community grant πŸ€—
22
 
23
  ### How to Use
24
+ Input text with the language identifiers provided to create a multilingual speech. Optionally you can add an audiosample to make a voice print.Scroll down and try the api <3 Gradio.
25
+ This space runs on ZeroGPU, so **you need to be patient** while you acquire the GPU and load the model the first time you make a request !
26
  """
27
 
28
  # text examples=["<en> Hello, how are you? <fr> Bonjour, comment Γ§a va?", "<de> Guten Tag <it> Buongiorno <jp> こんにけは"]
 
47
  audio_np = audio_data_resampled.cpu().numpy()
48
  return audio_np
49
 
50
+ # this function pads each segment to the length of the longest segment which is not optimal
51
  def concatenate_audio_segments(segments):
52
+ mono_segments = [seg[:, 0] if seg.ndim > 1 else seg for seg in segments]
53
+ max_len = max(seg.shape[0] for seg in mono_segments)
54
+ padded_segments = [np.pad(seg, (0, max_len - seg.shape[0]), 'constant') for seg in mono_segments]
55
  concatenated_audio = np.concatenate(padded_segments, axis=0)
56
  concatenated_audio = concatenated_audio / np.max(np.abs(concatenated_audio))
57
  return np.asarray(concatenated_audio, dtype=np.float32)