import streamlit as st import speech_recognition as sr import librosa import numpy as np import noisereduce as nr import soundfile as sf import tempfile import opensmile # Custom CSS for a modern UI st.markdown(""" """, unsafe_allow_html=True) # Initialize OpenSMILE for emotion detection smile = opensmile.Smile( feature_set=opensmile.FeatureSet.ComParE_2016, feature_level=opensmile.FeatureLevel.Functionals, ) # Streamlit UI st.markdown("

🎙️ Speech Detection System

", unsafe_allow_html=True) st.write("🔹 Record speech and analyze it in real time.") # Audio Recorder st.markdown("## 🎤 Record Your Voice") recognizer = sr.Recognizer() with sr.Microphone() as source: st.write("Press **Start Recording** and speak...") if st.button("🎙️ Start Recording", key="record", help="Click to start recording"): with st.spinner("Listening..."): audio = recognizer.listen(source) st.success("🎧 Recording Complete!") with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as temp_audio: temp_audio.write(audio.get_wav_data()) audio_path = temp_audio.name # Speech Recognition st.markdown("## 📜 Speech-to-Text") try: text = recognizer.recognize_google(audio) st.success(f"🗣️ Recognized Speech: **{text}**") except sr.UnknownValueError: st.warning("⚠️ Speech not recognized. Please try again.") # Acoustic Analysis st.markdown("## 📊 Acoustic Analysis") y, sr_val = librosa.load(audio_path) # Pitch Calculation pitch, _ = librosa.piptrack(y=y, sr=sr_val) mean_pitch = np.mean(pitch[pitch > 0]) st.write(f"🎵 **Mean Pitch:** {mean_pitch:.2f} Hz") # Volume Calculation rms = librosa.feature.rms(y=y) mean_volume = np.mean(rms) st.write(f"🔊 **Mean Volume:** {mean_volume:.2f}") # Noise Filtering st.markdown("## 🔇 Noise Reduction") reduced_noise = nr.reduce_noise(y=y, sr=sr_val) noise_filtered_path = "cleaned_audio.wav" sf.write(noise_filtered_path, reduced_noise, sr_val) st.audio(noise_filtered_path, format="audio/wav") st.success("✅ Noise Reduced Successfully!") # Emotional Detection st.markdown("## 😊 Emotion Detection") features = smile.process_file(audio_path) st.write("🔍 **Emotion Features Extracted!**") st.dataframe(features) st.success("✅ Speech Analysis Completed!")