ojas121 commited on
Commit
3e3cbf6
Β·
verified Β·
1 Parent(s): ace00d7

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +80 -64
app.py CHANGED
@@ -1,84 +1,100 @@
1
  import streamlit as st
 
 
 
 
 
 
 
2
 
3
- # Set dark mode
4
- st.set_page_config(page_title="Speech Detection System", page_icon="πŸŽ™", layout="wide")
5
-
6
- # Custom CSS for dark theme and styling
7
- st.markdown(
8
- """
9
  <style>
10
  body {
11
- color: white;
12
- background-color: #0e1117;
13
  }
14
  .stApp {
15
- background-color: #0e1117;
 
 
 
16
  }
17
- .title {
 
18
  text-align: center;
19
- font-size: 2.5rem;
20
- font-weight: bold;
21
- color: #1db954;
22
  }
23
- .subheading {
24
- font-size: 1.5rem;
 
 
 
 
25
  font-weight: bold;
26
- color: #f4f4f4;
27
- text-align: center;
28
  }
29
- .description {
30
- font-size: 1.1rem;
31
- text-align: center;
32
- color: #d1d1d1;
33
- margin-bottom: 20px;
34
- }
35
- .feature-card {
36
- background-color: #22272e;
37
- padding: 15px;
38
- border-radius: 10px;
39
- box-shadow: 2px 2px 10px rgba(255, 255, 255, 0.1);
40
- margin: 10px;
41
  }
42
  </style>
43
- """,
44
- unsafe_allow_html=True
 
 
 
 
45
  )
46
 
47
- # Title
48
- st.markdown("<div class='title'>πŸŽ™ Speech Detection System</div>", unsafe_allow_html=True)
 
49
 
50
- # Description
51
- st.markdown(
52
- """
53
- <div class='description'>
54
- Speech detection systems utilize various datasets to analyze and interpret spoken language.
55
- These systems perform **acoustic analysis** to identify pitch, tone, and volume, while **speech recognition** converts audio into text.
56
- **Noise filtering** enhances clarity by removing background sounds, and **emotional detection** determines the speaker's mood based on vocal tone.
57
- **Real-time processing** ensures live detection with minimal delay. The use of **multilingual** and **diverse environmental datasets**
58
- improves adaptability and accuracy, making these systems ideal for applications like **virtual assistants, sentiment analysis, and voice-controlled systems**.
59
- </div>
60
- """,
61
- unsafe_allow_html=True
62
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
63
 
64
- # Features
65
- st.markdown("<div class='subheading'>πŸ” Key Features</div>", unsafe_allow_html=True)
 
 
66
 
67
- features = [
68
- ("🎡 Acoustic Analysis", "Identifies pitch, tone, and volume. Processes sound waveforms to extract unique speech characteristics."),
69
- ("😊 Emotional Detection", "Detects emotions such as happiness, anger, or neutrality from vocal tone."),
70
- ("πŸ—£ Speech Recognition", "Converts spoken words into text using advanced algorithms. Detects languages and keywords."),
71
- ("⚑ Real-Time Processing", "Enables live speech detection with minimal latency for fast, accurate responses."),
72
- ("πŸ”‡ Noise Filtering", "Removes background noise, ensuring clearer speech recognition and analysis."),
73
- ("🌍 Dataset Diversity", "Utilizes multilingual and environmental datasets for robust, adaptable speech detection."),
74
- ]
75
 
76
- # Display features in two columns
77
- cols = st.columns(2)
78
- for i, (title, desc) in enumerate(features):
79
- with cols[i % 2]: # Distribute features evenly
80
- st.markdown(f"<div class='feature-card'><b>{title}</b><br>{desc}</div>", unsafe_allow_html=True)
81
 
82
- # Footer
83
- st.markdown("---")
84
- st.markdown("<div style='text-align: center; font-size: 0.9rem;'>Built with ❀️ using Streamlit</div>", unsafe_allow_html=True)
 
1
  import streamlit as st
2
+ import speech_recognition as sr
3
+ import librosa
4
+ import numpy as np
5
+ import noisereduce as nr
6
+ import soundfile as sf
7
+ import tempfile
8
+ import opensmile
9
 
10
+ # Custom CSS for a modern UI
11
+ st.markdown("""
 
 
 
 
12
  <style>
13
  body {
14
+ background-color: #f5f5f5;
 
15
  }
16
  .stApp {
17
+ background-color: #ffffff;
18
+ border-radius: 10px;
19
+ padding: 20px;
20
+ box-shadow: 0px 0px 10px rgba(0, 0, 0, 0.1);
21
  }
22
+ h1 {
23
+ color: #FF5733;
24
  text-align: center;
 
 
 
25
  }
26
+ .button {
27
+ background-color: #ff6600;
28
+ color: white;
29
+ border-radius: 5px;
30
+ padding: 10px;
31
+ font-size: 16px;
32
  font-weight: bold;
 
 
33
  }
34
+ .button:hover {
35
+ background-color: #cc5200;
 
 
 
 
 
 
 
 
 
 
36
  }
37
  </style>
38
+ """, unsafe_allow_html=True)
39
+
40
+ # Initialize OpenSMILE for emotion detection
41
+ smile = opensmile.Smile(
42
+ feature_set=opensmile.FeatureSet.ComParE_2016,
43
+ feature_level=opensmile.FeatureLevel.Functionals,
44
  )
45
 
46
+ # Streamlit UI
47
+ st.markdown("<h1>πŸŽ™οΈ Speech Detection System</h1>", unsafe_allow_html=True)
48
+ st.write("πŸ”Ή Record speech and analyze it in real time.")
49
 
50
+ # Audio Recorder
51
+ st.markdown("## 🎀 Record Your Voice")
52
+ recognizer = sr.Recognizer()
53
+
54
+ with sr.Microphone() as source:
55
+ st.write("Press **Start Recording** and speak...")
56
+ if st.button("πŸŽ™οΈ Start Recording", key="record", help="Click to start recording"):
57
+ with st.spinner("Listening..."):
58
+ audio = recognizer.listen(source)
59
+ st.success("🎧 Recording Complete!")
60
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as temp_audio:
61
+ temp_audio.write(audio.get_wav_data())
62
+ audio_path = temp_audio.name
63
+
64
+ # Speech Recognition
65
+ st.markdown("## πŸ“œ Speech-to-Text")
66
+ try:
67
+ text = recognizer.recognize_google(audio)
68
+ st.success(f"πŸ—£οΈ Recognized Speech: **{text}**")
69
+ except sr.UnknownValueError:
70
+ st.warning("⚠️ Speech not recognized. Please try again.")
71
+
72
+ # Acoustic Analysis
73
+ st.markdown("## πŸ“Š Acoustic Analysis")
74
+ y, sr_val = librosa.load(audio_path)
75
+
76
+ # Pitch Calculation
77
+ pitch, _ = librosa.piptrack(y=y, sr=sr_val)
78
+ mean_pitch = np.mean(pitch[pitch > 0])
79
+ st.write(f"🎡 **Mean Pitch:** {mean_pitch:.2f} Hz")
80
 
81
+ # Volume Calculation
82
+ rms = librosa.feature.rms(y=y)
83
+ mean_volume = np.mean(rms)
84
+ st.write(f"πŸ”Š **Mean Volume:** {mean_volume:.2f}")
85
 
86
+ # Noise Filtering
87
+ st.markdown("## πŸ”‡ Noise Reduction")
88
+ reduced_noise = nr.reduce_noise(y=y, sr=sr_val)
89
+ noise_filtered_path = "cleaned_audio.wav"
90
+ sf.write(noise_filtered_path, reduced_noise, sr_val)
91
+ st.audio(noise_filtered_path, format="audio/wav")
92
+ st.success("βœ… Noise Reduced Successfully!")
 
93
 
94
+ # Emotional Detection
95
+ st.markdown("## 😊 Emotion Detection")
96
+ features = smile.process_file(audio_path)
97
+ st.write("πŸ” **Emotion Features Extracted!**")
98
+ st.dataframe(features)
99
 
100
+ st.success("βœ… Speech Analysis Completed!")