Spaces:

ojas121
/

Speech-detect1

Build error

App Files Files Community

ojas121 commited on Mar 4

Commit

3e3cbf6

verified ·

1 Parent(s): ace00d7

Update app.py

Browse files

Files changed (1) hide show

app.py +80 -64

app.py CHANGED Viewed

@@ -1,84 +1,100 @@
 import streamlit as st
-# Set dark mode
-st.set_page_config(page_title="Speech Detection System", page_icon="🎙", layout="wide")
-# Custom CSS for dark theme and styling
-st.markdown(
-    """
     <style>
         body {
-            color: white;
-            background-color: #0e1117;
         }
         .stApp {
-            background-color: #0e1117;
         }
-        .title {
             text-align: center;
-            font-size: 2.5rem;
-            font-weight: bold;
-            color: #1db954;
         }
-        .subheading {
-            font-size: 1.5rem;
             font-weight: bold;
-            color: #f4f4f4;
-            text-align: center;
         }
-        .description {
-            font-size: 1.1rem;
-            text-align: center;
-            color: #d1d1d1;
-            margin-bottom: 20px;
-        }
-        .feature-card {
-            background-color: #22272e;
-            padding: 15px;
-            border-radius: 10px;
-            box-shadow: 2px 2px 10px rgba(255, 255, 255, 0.1);
-            margin: 10px;
         }
     </style>
-    """,
-    unsafe_allow_html=True
 )
-# Title
-st.markdown("<div class='title'>🎙 Speech Detection System</div>", unsafe_allow_html=True)
-# Description
-st.markdown(
-    """
-    <div class='description'>
-        Speech detection systems utilize various datasets to analyze and interpret spoken language.
-        These systems perform **acoustic analysis** to identify pitch, tone, and volume, while **speech recognition** converts audio into text.
-        **Noise filtering** enhances clarity by removing background sounds, and **emotional detection** determines the speaker's mood based on vocal tone.
-        **Real-time processing** ensures live detection with minimal delay. The use of **multilingual** and **diverse environmental datasets**
-        improves adaptability and accuracy, making these systems ideal for applications like **virtual assistants, sentiment analysis, and voice-controlled systems**.
-    </div>
-    """,
-    unsafe_allow_html=True
-)
-# Features
-st.markdown("<div class='subheading'>🔍 Key Features</div>", unsafe_allow_html=True)
-features = [
-    ("🎵 Acoustic Analysis", "Identifies pitch, tone, and volume. Processes sound waveforms to extract unique speech characteristics."),
-    ("😊 Emotional Detection", "Detects emotions such as happiness, anger, or neutrality from vocal tone."),
-    ("🗣 Speech Recognition", "Converts spoken words into text using advanced algorithms. Detects languages and keywords."),
-    ("⚡ Real-Time Processing", "Enables live speech detection with minimal latency for fast, accurate responses."),
-    ("🔇 Noise Filtering", "Removes background noise, ensuring clearer speech recognition and analysis."),
-    ("🌍 Dataset Diversity", "Utilizes multilingual and environmental datasets for robust, adaptable speech detection."),
-]
-# Display features in two columns
-cols = st.columns(2)
-for i, (title, desc) in enumerate(features):
-    with cols[i % 2]:  # Distribute features evenly
-        st.markdown(f"<div class='feature-card'><b>{title}</b><br>{desc}</div>", unsafe_allow_html=True)
-# Footer
-st.markdown("---")
-st.markdown("<div style='text-align: center; font-size: 0.9rem;'>Built with ❤️ using Streamlit</div>", unsafe_allow_html=True)

 import streamlit as st
+import speech_recognition as sr
+import librosa
+import numpy as np
+import noisereduce as nr
+import soundfile as sf
+import tempfile
+import opensmile
+# Custom CSS for a modern UI
+st.markdown("""
     <style>
         body {
+            background-color: #f5f5f5;
         }
         .stApp {
+            background-color: #ffffff;
+            border-radius: 10px;
+            padding: 20px;
+            box-shadow: 0px 0px 10px rgba(0, 0, 0, 0.1);
         }
+        h1 {
+            color: #FF5733;
             text-align: center;
         }
+        .button {
+            background-color: #ff6600;
+            color: white;
+            border-radius: 5px;
+            padding: 10px;
+            font-size: 16px;
             font-weight: bold;
         }
+        .button:hover {
+            background-color: #cc5200;
         }
     </style>
+""", unsafe_allow_html=True)
+# Initialize OpenSMILE for emotion detection
+smile = opensmile.Smile(
+    feature_set=opensmile.FeatureSet.ComParE_2016,
+    feature_level=opensmile.FeatureLevel.Functionals,
 )
+# Streamlit UI
+st.markdown("<h1>🎙️ Speech Detection System</h1>", unsafe_allow_html=True)
+st.write("🔹 Record speech and analyze it in real time.")
+# Audio Recorder
+st.markdown("## 🎤 Record Your Voice")
+recognizer = sr.Recognizer()
+with sr.Microphone() as source:
+    st.write("Press **Start Recording** and speak...")
+    if st.button("🎙️ Start Recording", key="record", help="Click to start recording"):
+        with st.spinner("Listening..."):
+            audio = recognizer.listen(source)
+            st.success("🎧 Recording Complete!")
+            with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as temp_audio:
+                temp_audio.write(audio.get_wav_data())
+                audio_path = temp_audio.name
+        # Speech Recognition
+        st.markdown("## 📜 Speech-to-Text")
+        try:
+            text = recognizer.recognize_google(audio)
+            st.success(f"🗣️ Recognized Speech: **{text}**")
+        except sr.UnknownValueError:
+            st.warning("⚠️ Speech not recognized. Please try again.")
+        # Acoustic Analysis
+        st.markdown("## 📊 Acoustic Analysis")
+        y, sr_val = librosa.load(audio_path)
+        # Pitch Calculation
+        pitch, _ = librosa.piptrack(y=y, sr=sr_val)
+        mean_pitch = np.mean(pitch[pitch > 0])
+        st.write(f"🎵 **Mean Pitch:** {mean_pitch:.2f} Hz")
+        # Volume Calculation
+        rms = librosa.feature.rms(y=y)
+        mean_volume = np.mean(rms)
+        st.write(f"🔊 **Mean Volume:** {mean_volume:.2f}")
+        # Noise Filtering
+        st.markdown("## 🔇 Noise Reduction")
+        reduced_noise = nr.reduce_noise(y=y, sr=sr_val)
+        noise_filtered_path = "cleaned_audio.wav"
+        sf.write(noise_filtered_path, reduced_noise, sr_val)
+        st.audio(noise_filtered_path, format="audio/wav")
+        st.success("✅ Noise Reduced Successfully!")
+        # Emotional Detection
+        st.markdown("## 😊 Emotion Detection")
+        features = smile.process_file(audio_path)
+        st.write("🔍 **Emotion Features Extracted!**")
+        st.dataframe(features)
+        st.success("✅ Speech Analysis Completed!")