Spaces:

TroglodyteDerivations
/

Abby_Cadabby_WAV2VEC2_Alignment

Sleeping

TroglodyteDerivations commited on May 30, 2024

Commit

338ef81

verified ·

1 Parent(s): 4aa8752

Updated lines 342-361

Files changed (1) hide show

app.py CHANGED Viewed

@@ -334,10 +334,10 @@ def plot_alignments(trellis, segments, word_segments, waveform=np.random.randn(1
 #plot_alignments(trellis, segments, word_segments, waveform, sample_rate)
-st.pyplot(plot_alignments(trellis, segments, word_segments, waveform=np.random.randn(1024), sample_rate=44100))
 # Part N: Display Segment
-sample_rate = 44100
 def display_segment(i):
     word = word_segments[i]
@@ -345,22 +345,27 @@ def display_segment(i):
     x1 = int(word.end * sample_rate)
     print(f"{word.label} ({word.score:.2f}): {x0 / sample_rate:.3f} - {x1 / sample_rate:.3f} sec")
     segment = waveform[x0:x1]
     return segment.numpy()  # Return the audio data as a numpy array
-st.write(display_segment(3))
-# Part O: Audio generation for each segment
-st.write('Abby Cadabby Transcript:')
-# Display the audio in the Streamlit app
-st.audio(SPEECH_FILE, format="audio/wav")
 # Display the full audio file
 st.audio(waveform.numpy(), format="audio/wav", sample_rate=sample_rate)
 # Display the audio for each segment
 for i in range(len(word_segments)):
     segment_audio = display_segment(i)
-    st.audio(segment_audio, format="audio/wav", sample_rate=sample_rate)
 st.image('Abby_and_Prince.jpg')

 #plot_alignments(trellis, segments, word_segments, waveform, sample_rate)
+st.pyplot(plot_alignments(trellis, segments, word_segments, waveform=np.random.randn(1024), sample_rate=16000))
 # Part N: Display Segment
+sample_rate = 16000
 def display_segment(i):
     word = word_segments[i]
     x1 = int(word.end * sample_rate)
     print(f"{word.label} ({word.score:.2f}): {x0 / sample_rate:.3f} - {x1 / sample_rate:.3f} sec")
     segment = waveform[x0:x1]
+    # Ensure the segment is a 1D array
+    segment = segment.squeeze()
     return segment.numpy()  # Return the audio data as a numpy array
 # Display the full audio file
 st.audio(waveform.numpy(), format="audio/wav", sample_rate=sample_rate)
 # Display the audio for each segment
 for i in range(len(word_segments)):
     segment_audio = display_segment(i)
+    # Ensure the segment_audio is a 1D array before passing to st.audio
+    if segment_audio.ndim == 2:
+        segment_audio = segment_audio.squeeze()
+    st.audio(segment_audio.astype('float32'), format="audio/wav", sample_rate=sample_rate)
+st.write(display_segment(3))
+# Part O: Audio generation for each segment
+st.write('Abby Cadabby Transcript:')
+# Display the audio in the Streamlit app
+st.audio(SPEECH_FILE, format="audio/wav")
 st.image('Abby_and_Prince.jpg')