Spaces:
Build error
Build error
import streamlit as st | |
import speech_recognition as sr | |
import librosa | |
import numpy as np | |
import noisereduce as nr | |
import soundfile as sf | |
import tempfile | |
import opensmile | |
# Custom CSS for a modern UI | |
st.markdown(""" | |
<style> | |
body { | |
background-color: #f5f5f5; | |
} | |
.stApp { | |
background-color: #ffffff; | |
border-radius: 10px; | |
padding: 20px; | |
box-shadow: 0px 0px 10px rgba(0, 0, 0, 0.1); | |
} | |
h1 { | |
color: #FF5733; | |
text-align: center; | |
} | |
.button { | |
background-color: #ff6600; | |
color: white; | |
border-radius: 5px; | |
padding: 10px; | |
font-size: 16px; | |
font-weight: bold; | |
} | |
.button:hover { | |
background-color: #cc5200; | |
} | |
</style> | |
""", unsafe_allow_html=True) | |
# Initialize OpenSMILE for emotion detection | |
smile = opensmile.Smile( | |
feature_set=opensmile.FeatureSet.ComParE_2016, | |
feature_level=opensmile.FeatureLevel.Functionals, | |
) | |
# Streamlit UI | |
st.markdown("<h1>ποΈ Speech Detection System</h1>", unsafe_allow_html=True) | |
st.write("πΉ Record speech and analyze it in real time.") | |
# Audio Recorder | |
st.markdown("## π€ Record Your Voice") | |
recognizer = sr.Recognizer() | |
with sr.Microphone() as source: | |
st.write("Press **Start Recording** and speak...") | |
if st.button("ποΈ Start Recording", key="record", help="Click to start recording"): | |
with st.spinner("Listening..."): | |
audio = recognizer.listen(source) | |
st.success("π§ Recording Complete!") | |
with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as temp_audio: | |
temp_audio.write(audio.get_wav_data()) | |
audio_path = temp_audio.name | |
# Speech Recognition | |
st.markdown("## π Speech-to-Text") | |
try: | |
text = recognizer.recognize_google(audio) | |
st.success(f"π£οΈ Recognized Speech: **{text}**") | |
except sr.UnknownValueError: | |
st.warning("β οΈ Speech not recognized. Please try again.") | |
# Acoustic Analysis | |
st.markdown("## π Acoustic Analysis") | |
y, sr_val = librosa.load(audio_path) | |
# Pitch Calculation | |
pitch, _ = librosa.piptrack(y=y, sr=sr_val) | |
mean_pitch = np.mean(pitch[pitch > 0]) | |
st.write(f"π΅ **Mean Pitch:** {mean_pitch:.2f} Hz") | |
# Volume Calculation | |
rms = librosa.feature.rms(y=y) | |
mean_volume = np.mean(rms) | |
st.write(f"π **Mean Volume:** {mean_volume:.2f}") | |
# Noise Filtering | |
st.markdown("## π Noise Reduction") | |
reduced_noise = nr.reduce_noise(y=y, sr=sr_val) | |
noise_filtered_path = "cleaned_audio.wav" | |
sf.write(noise_filtered_path, reduced_noise, sr_val) | |
st.audio(noise_filtered_path, format="audio/wav") | |
st.success("β Noise Reduced Successfully!") | |
# Emotional Detection | |
st.markdown("## π Emotion Detection") | |
features = smile.process_file(audio_path) | |
st.write("π **Emotion Features Extracted!**") | |
st.dataframe(features) | |
st.success("β Speech Analysis Completed!") | |