TroglodyteDerivations
commited on
Updated lines 342-361
Browse files
app.py
CHANGED
@@ -334,10 +334,10 @@ def plot_alignments(trellis, segments, word_segments, waveform=np.random.randn(1
|
|
334 |
|
335 |
|
336 |
#plot_alignments(trellis, segments, word_segments, waveform, sample_rate)
|
337 |
-
st.pyplot(plot_alignments(trellis, segments, word_segments, waveform=np.random.randn(1024), sample_rate=
|
338 |
|
339 |
# Part N: Display Segment
|
340 |
-
sample_rate =
|
341 |
|
342 |
def display_segment(i):
|
343 |
word = word_segments[i]
|
@@ -345,22 +345,27 @@ def display_segment(i):
|
|
345 |
x1 = int(word.end * sample_rate)
|
346 |
print(f"{word.label} ({word.score:.2f}): {x0 / sample_rate:.3f} - {x1 / sample_rate:.3f} sec")
|
347 |
segment = waveform[x0:x1]
|
|
|
|
|
348 |
return segment.numpy() # Return the audio data as a numpy array
|
349 |
|
350 |
-
st.write(display_segment(3))
|
351 |
-
|
352 |
-
# Part O: Audio generation for each segment
|
353 |
-
st.write('Abby Cadabby Transcript:')
|
354 |
-
# Display the audio in the Streamlit app
|
355 |
-
st.audio(SPEECH_FILE, format="audio/wav")
|
356 |
-
|
357 |
# Display the full audio file
|
358 |
st.audio(waveform.numpy(), format="audio/wav", sample_rate=sample_rate)
|
359 |
|
360 |
# Display the audio for each segment
|
361 |
for i in range(len(word_segments)):
|
362 |
segment_audio = display_segment(i)
|
363 |
-
st.audio
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
364 |
|
365 |
|
366 |
st.image('Abby_and_Prince.jpg')
|
|
|
334 |
|
335 |
|
336 |
#plot_alignments(trellis, segments, word_segments, waveform, sample_rate)
|
337 |
+
st.pyplot(plot_alignments(trellis, segments, word_segments, waveform=np.random.randn(1024), sample_rate=16000))
|
338 |
|
339 |
# Part N: Display Segment
|
340 |
+
sample_rate = 16000
|
341 |
|
342 |
def display_segment(i):
|
343 |
word = word_segments[i]
|
|
|
345 |
x1 = int(word.end * sample_rate)
|
346 |
print(f"{word.label} ({word.score:.2f}): {x0 / sample_rate:.3f} - {x1 / sample_rate:.3f} sec")
|
347 |
segment = waveform[x0:x1]
|
348 |
+
# Ensure the segment is a 1D array
|
349 |
+
segment = segment.squeeze()
|
350 |
return segment.numpy() # Return the audio data as a numpy array
|
351 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
352 |
# Display the full audio file
|
353 |
st.audio(waveform.numpy(), format="audio/wav", sample_rate=sample_rate)
|
354 |
|
355 |
# Display the audio for each segment
|
356 |
for i in range(len(word_segments)):
|
357 |
segment_audio = display_segment(i)
|
358 |
+
# Ensure the segment_audio is a 1D array before passing to st.audio
|
359 |
+
if segment_audio.ndim == 2:
|
360 |
+
segment_audio = segment_audio.squeeze()
|
361 |
+
st.audio(segment_audio.astype('float32'), format="audio/wav", sample_rate=sample_rate)
|
362 |
+
|
363 |
+
st.write(display_segment(3))
|
364 |
+
|
365 |
+
# Part O: Audio generation for each segment
|
366 |
+
st.write('Abby Cadabby Transcript:')
|
367 |
+
# Display the audio in the Streamlit app
|
368 |
+
st.audio(SPEECH_FILE, format="audio/wav")
|
369 |
|
370 |
|
371 |
st.image('Abby_and_Prince.jpg')
|