import streamlit as st
import speech_recognition as sr
import librosa
import numpy as np
import noisereduce as nr
import soundfile as sf
import tempfile
import opensmile
# Custom CSS for a modern UI
st.markdown("""
""", unsafe_allow_html=True)
# Initialize OpenSMILE for emotion detection
smile = opensmile.Smile(
feature_set=opensmile.FeatureSet.ComParE_2016,
feature_level=opensmile.FeatureLevel.Functionals,
)
# Streamlit UI
st.markdown("
🎙️ Speech Detection System
", unsafe_allow_html=True)
st.write("🔹 Record speech and analyze it in real time.")
# Audio Recorder
st.markdown("## 🎤 Record Your Voice")
recognizer = sr.Recognizer()
with sr.Microphone() as source:
st.write("Press **Start Recording** and speak...")
if st.button("🎙️ Start Recording", key="record", help="Click to start recording"):
with st.spinner("Listening..."):
audio = recognizer.listen(source)
st.success("🎧 Recording Complete!")
with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as temp_audio:
temp_audio.write(audio.get_wav_data())
audio_path = temp_audio.name
# Speech Recognition
st.markdown("## 📜 Speech-to-Text")
try:
text = recognizer.recognize_google(audio)
st.success(f"🗣️ Recognized Speech: **{text}**")
except sr.UnknownValueError:
st.warning("⚠️ Speech not recognized. Please try again.")
# Acoustic Analysis
st.markdown("## 📊 Acoustic Analysis")
y, sr_val = librosa.load(audio_path)
# Pitch Calculation
pitch, _ = librosa.piptrack(y=y, sr=sr_val)
mean_pitch = np.mean(pitch[pitch > 0])
st.write(f"🎵 **Mean Pitch:** {mean_pitch:.2f} Hz")
# Volume Calculation
rms = librosa.feature.rms(y=y)
mean_volume = np.mean(rms)
st.write(f"🔊 **Mean Volume:** {mean_volume:.2f}")
# Noise Filtering
st.markdown("## 🔇 Noise Reduction")
reduced_noise = nr.reduce_noise(y=y, sr=sr_val)
noise_filtered_path = "cleaned_audio.wav"
sf.write(noise_filtered_path, reduced_noise, sr_val)
st.audio(noise_filtered_path, format="audio/wav")
st.success("✅ Noise Reduced Successfully!")
# Emotional Detection
st.markdown("## 😊 Emotion Detection")
features = smile.process_file(audio_path)
st.write("🔍 **Emotion Features Extracted!**")
st.dataframe(features)
st.success("✅ Speech Analysis Completed!")