Speech-detect1 / app.py
ojas121's picture
Update app.py
3e3cbf6 verified
import streamlit as st
import speech_recognition as sr
import librosa
import numpy as np
import noisereduce as nr
import soundfile as sf
import tempfile
import opensmile
# Custom CSS for a modern UI
st.markdown("""
<style>
body {
background-color: #f5f5f5;
}
.stApp {
background-color: #ffffff;
border-radius: 10px;
padding: 20px;
box-shadow: 0px 0px 10px rgba(0, 0, 0, 0.1);
}
h1 {
color: #FF5733;
text-align: center;
}
.button {
background-color: #ff6600;
color: white;
border-radius: 5px;
padding: 10px;
font-size: 16px;
font-weight: bold;
}
.button:hover {
background-color: #cc5200;
}
</style>
""", unsafe_allow_html=True)
# Initialize OpenSMILE for emotion detection
smile = opensmile.Smile(
feature_set=opensmile.FeatureSet.ComParE_2016,
feature_level=opensmile.FeatureLevel.Functionals,
)
# Streamlit UI
st.markdown("<h1>πŸŽ™οΈ Speech Detection System</h1>", unsafe_allow_html=True)
st.write("πŸ”Ή Record speech and analyze it in real time.")
# Audio Recorder
st.markdown("## 🎀 Record Your Voice")
recognizer = sr.Recognizer()
with sr.Microphone() as source:
st.write("Press **Start Recording** and speak...")
if st.button("πŸŽ™οΈ Start Recording", key="record", help="Click to start recording"):
with st.spinner("Listening..."):
audio = recognizer.listen(source)
st.success("🎧 Recording Complete!")
with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as temp_audio:
temp_audio.write(audio.get_wav_data())
audio_path = temp_audio.name
# Speech Recognition
st.markdown("## πŸ“œ Speech-to-Text")
try:
text = recognizer.recognize_google(audio)
st.success(f"πŸ—£οΈ Recognized Speech: **{text}**")
except sr.UnknownValueError:
st.warning("⚠️ Speech not recognized. Please try again.")
# Acoustic Analysis
st.markdown("## πŸ“Š Acoustic Analysis")
y, sr_val = librosa.load(audio_path)
# Pitch Calculation
pitch, _ = librosa.piptrack(y=y, sr=sr_val)
mean_pitch = np.mean(pitch[pitch > 0])
st.write(f"🎡 **Mean Pitch:** {mean_pitch:.2f} Hz")
# Volume Calculation
rms = librosa.feature.rms(y=y)
mean_volume = np.mean(rms)
st.write(f"πŸ”Š **Mean Volume:** {mean_volume:.2f}")
# Noise Filtering
st.markdown("## πŸ”‡ Noise Reduction")
reduced_noise = nr.reduce_noise(y=y, sr=sr_val)
noise_filtered_path = "cleaned_audio.wav"
sf.write(noise_filtered_path, reduced_noise, sr_val)
st.audio(noise_filtered_path, format="audio/wav")
st.success("βœ… Noise Reduced Successfully!")
# Emotional Detection
st.markdown("## 😊 Emotion Detection")
features = smile.process_file(audio_path)
st.write("πŸ” **Emotion Features Extracted!**")
st.dataframe(features)
st.success("βœ… Speech Analysis Completed!")