mohammedriza-rahman commited on
Commit
0848b21
·
verified ·
1 Parent(s): b0f03b3

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +54 -0
app.py ADDED
@@ -0,0 +1,54 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from streamlit_webrtc import webrtc_streamer
3
+ from transformers import Wav2Vec2Processor, Wav2Vec2Model
4
+ import torch
5
+ import numpy as np
6
+ import wave
7
+ import io
8
+
9
+ # Load Wav2Vec 2.0 model and processor
10
+ processor = Wav2Vec2Processor.from_pretrained("facebook/wav2vec2-base-960h")
11
+ model = Wav2Vec2Model.from_pretrained("facebook/wav2vec2-base-960h")
12
+
13
+ # Function to generate embeddings
14
+ def generate_embedding(audio, samplerate):
15
+ input_values = processor(audio, sampling_rate=samplerate, return_tensors="pt", padding=True).input_values
16
+ with torch.no_grad():
17
+ embeddings = model(input_values).last_hidden_state
18
+ return embeddings.mean(dim=1) # Mean across time
19
+
20
+ # Streamlit Interface
21
+ st.title("Live Audio Recording and Embedding with Wav2Vec 2.0")
22
+ st.write("Record your audio using the browser and generate embeddings.")
23
+
24
+ # WebRTC audio recording
25
+ webrtc_ctx = webrtc_streamer(
26
+ key="audio",
27
+ mode="SENDONLY",
28
+ media_stream_constraints={"audio": True, "video": False},
29
+ async_processing=False,
30
+ )
31
+
32
+ if webrtc_ctx.audio_receiver:
33
+ audio_frames = webrtc_ctx.audio_receiver.get_frames()
34
+ audio_data = b"".join([frame.to_ndarray().tobytes() for frame in audio_frames])
35
+
36
+ # Convert raw audio bytes to a NumPy array
37
+ audio_array = np.frombuffer(audio_data, dtype=np.float32)
38
+
39
+ # Process and save the audio
40
+ samplerate = 16000 # Default sample rate for Wav2Vec2
41
+ file_name = "recorded_audio.wav"
42
+ with wave.open(file_name, "wb") as wf:
43
+ wf.setnchannels(1)
44
+ wf.setsampwidth(2)
45
+ wf.setframerate(samplerate)
46
+ wf.writeframes(audio_array.tobytes())
47
+
48
+ st.audio(file_name, format="audio/wav")
49
+
50
+ # Generate embedding
51
+ embedding = generate_embedding(audio_array, samplerate)
52
+ st.success("Audio embedding generated!")
53
+ st.write("Embedding Shape:", embedding.shape)
54
+ st.write("Embedding Values:", embedding.numpy())