# tabs/speech_stress_analysis.py import gradio as gr import librosa import numpy as np import matplotlib.pyplot as plt import tempfile import warnings warnings.filterwarnings("ignore", category=UserWarning, module='librosa') def extract_audio_features(audio_file): y, sr = librosa.load(audio_file, sr=None) # Fundamental frequency estimation using librosa.pyin f0, voiced_flag, voiced_probs = librosa.pyin(y, fmin=75, fmax=600) f0 = f0[~np.isnan(f0)] # Remove unvoiced frames # Energy (intensity) energy = librosa.feature.rms(y=y)[0] # MFCCs (Mel-frequency cepstral coefficients) mfccs = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13) # Onset envelope for speech rate estimation onset_env = librosa.onset.onset_strength(y=y, sr=sr) tempo, _ = librosa.beat.beat_track(onset_envelope=onset_env, sr=sr) speech_rate = tempo / 60 # Speech rate estimation (syllables per second) return f0, energy, speech_rate, mfccs, y, sr def analyze_voice_stress(audio_file): if not audio_file: return "No audio file provided.", None, None try: f0, energy, speech_rate, mfccs, y, sr = extract_audio_features(audio_file) # Calculate statistical measures mean_f0 = np.mean(f0) std_f0 = np.std(f0) mean_energy = np.mean(energy) std_energy = np.std(energy) # Normative data (example values from medical literature) norm_mean_f0_male = 110 norm_mean_f0_female = 220 norm_std_f0 = 20 norm_mean_energy = 0.02 norm_std_energy = 0.005 norm_speech_rate = 4.4 norm_std_speech_rate = 0.5 # Gender detection gender = 'male' if mean_f0 < 165 else 'female' norm_mean_f0 = norm_mean_f0_male if gender == 'male' else norm_mean_f0_female # Compute Z-scores z_f0 = (mean_f0 - norm_mean_f0) / norm_std_f0 z_energy = (mean_energy - norm_mean_energy) / norm_std_energy z_speech_rate = (speech_rate - norm_speech_rate) / norm_std_speech_rate # Combine Z-scores for stress level stress_score = (0.4 * z_f0) + (0.4 * z_speech_rate) + (0.2 * z_energy) stress_level = float(1 / (1 + np.exp(-stress_score)) * 100) # Sigmoid function if stress_level < 20: stress_category = "Very Low Stress" elif stress_level < 40: stress_category = "Low Stress" elif stress_level < 60: stress_category = "Moderate Stress" elif stress_level < 80: stress_category = "High Stress" else: stress_category = "Very High Stress" # More verbose interpretations for each stress category interpretations = { "Very Low Stress": ( "Your vocal analysis indicates a very relaxed state. " "This suggests that you're currently experiencing minimal stress. " "Maintaining such low stress levels is beneficial for your health. " "Continue engaging in activities that promote relaxation and well-being. " "Regular self-care practices can help sustain this positive state." ), "Low Stress": ( "Minor signs of stress are detected in your voice. " "This is common due to everyday challenges and is usually not concerning. " "Incorporating relaxation techniques, like deep breathing or meditation, may help. " "Regular breaks and leisure activities can also reduce stress. " "Staying mindful of stress levels supports overall health." ), "Moderate Stress": ( "Your voice reflects moderate stress levels. " "This could be due to ongoing pressures or challenges you're facing. " "Consider practicing stress management strategies such as mindfulness exercises or physical activity. " "Identifying stressors and addressing them can be beneficial. " "Balancing work and rest is important for your well-being." ), "High Stress": ( "Elevated stress levels are apparent in your vocal patterns. " "It's important to recognize and address these feelings. " "Identifying stressors and seeking support from friends, family, or professionals could be helpful. " "Engaging in stress reduction techniques is recommended. " "Taking proactive steps can improve your mental and physical health." ), "Very High Stress": ( "Your voice suggests very high stress levels. " "This may indicate significant strain or anxiety. " "It may be helpful to consult a healthcare professional for support. " "Promptly addressing stress is important for your well-being. " "Consider reaching out to trusted individuals or resources." ) } final_interpretation = interpretations[stress_category] # Plotting fig, axs = plt.subplots(5, 1, figsize=(10, 15)) # Plot Fundamental Frequency (Pitch) axs[0].plot(f0) axs[0].set_title('Fundamental Frequency (Pitch)') axs[0].set_ylabel('Frequency (Hz)') # Plot Energy (Loudness) axs[1].plot(energy) axs[1].set_title('Energy (Loudness)') axs[1].set_ylabel('Energy') # Plot MFCCs img = librosa.display.specshow(mfccs, sr=sr, x_axis='time', ax=axs[2]) axs[2].set_title('MFCCs (Mel-frequency cepstral coefficients)') fig.colorbar(img, ax=axs[2]) # Plot Waveform librosa.display.waveshow(y, sr=sr, ax=axs[3]) axs[3].set_title('Waveform') axs[3].set_xlabel('Time (s)') axs[3].set_ylabel('Amplitude') # Plot Pitch Contour (Histogram of f0) axs[4].hist(f0, bins=50, color='blue', alpha=0.7) axs[4].set_title('Pitch Contour (Histogram of f0)') axs[4].set_xlabel('Frequency (Hz)') axs[4].set_ylabel('Count') plt.tight_layout() with tempfile.NamedTemporaryFile(delete=False, suffix='.png') as temp_file: plt.savefig(temp_file.name) plot_path = temp_file.name plt.close() # Return separate values for Gradio output components return f"{stress_level:.2f}% - {stress_category}", final_interpretation, plot_path except Exception as e: return f"Error: {str(e)}", None, None def create_voice_stress_tab(): custom_css = """ /* General container styling for mobile */ .gradio-container { padding: 10px !important; font-size: 16px !important; } /* Headings */ h3 { text-align: center; font-size: 1.5em !important; margin-bottom: 20px !important; } /* Full width for audio input and other components */ .gradio-container .gradio-row, .gradio-container .gradio-column { flex-direction: column !important; align-items: center !important; } /* Make the components scale better on smaller screens */ #input_audio, #stress_output, #interpretation_output, #plot_output { width: 100% !important; max-width: 100% !important; } #input_audio label, #stress_output label, #interpretation_output label, #plot_output label { font-size: 1.2em !important; } /* Textbox area adjustment */ #interpretation_output textarea { font-size: 1em !important; line-height: 1.4 !important; } /* Responsive styling for images */ #plot_output img { width: 100% !important; height: auto !important; } /* Adjust clear button */ #clear_btn button { font-size: 1em !important; padding: 10px 20px !important; } /* Responsive adjustments */ @media only screen and (max-width: 600px) { .gradio-container { padding: 5px !important; font-size: 14px !important; } h3 { font-size: 1.2em !important; } #clear_btn button { font-size: 0.9em !important; } #interpretation_output textarea { font-size: 0.9em !important; } } """ with gr.Blocks(css=custom_css) as voice_stress_tab: gr.Markdown("

Speech Stress Analysis

") with gr.Column(): input_audio = gr.Audio(label="Upload your voice recording", type="filepath", elem_id="input_audio") stress_output = gr.Label(label="Stress Interpretation", elem_id="stress_output") interpretation_output = gr.Textbox(label="Detailed Interpretation", lines=6, elem_id="interpretation_output") plot_output = gr.Image(label="Stress Analysis Plot", elem_id="plot_output") input_audio.change( analyze_voice_stress, inputs=[input_audio], outputs=[stress_output, interpretation_output, plot_output] ) gr.Button("Clear", elem_id="clear_btn").click( lambda: (None, None, None), outputs=[input_audio, stress_output, interpretation_output, plot_output] ) return voice_stress_tab