Spaces:
Build error
Build error
# tabs/speech_stress_analysis.py | |
import gradio as gr | |
import librosa | |
import numpy as np | |
import matplotlib.pyplot as plt | |
import tempfile | |
import warnings | |
warnings.filterwarnings("ignore", category=UserWarning, module='librosa') | |
def extract_audio_features(audio_file): | |
y, sr = librosa.load(audio_file, sr=None) | |
# Fundamental frequency estimation using librosa.pyin | |
f0, voiced_flag, voiced_probs = librosa.pyin(y, fmin=75, fmax=600) | |
f0 = f0[~np.isnan(f0)] # Remove unvoiced frames | |
# Energy (intensity) | |
energy = librosa.feature.rms(y=y)[0] | |
# MFCCs (Mel-frequency cepstral coefficients) | |
mfccs = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13) | |
# Onset envelope for speech rate estimation | |
onset_env = librosa.onset.onset_strength(y=y, sr=sr) | |
tempo, _ = librosa.beat.beat_track(onset_envelope=onset_env, sr=sr) | |
speech_rate = tempo / 60 # Speech rate estimation (syllables per second) | |
return f0, energy, speech_rate, mfccs, y, sr | |
def analyze_voice_stress(audio_file): | |
if not audio_file: | |
return "No audio file provided.", None, None | |
try: | |
f0, energy, speech_rate, mfccs, y, sr = extract_audio_features(audio_file) | |
# Calculate statistical measures | |
mean_f0 = np.mean(f0) | |
std_f0 = np.std(f0) | |
mean_energy = np.mean(energy) | |
std_energy = np.std(energy) | |
# Normative data (example values from medical literature) | |
norm_mean_f0_male = 110 | |
norm_mean_f0_female = 220 | |
norm_std_f0 = 20 | |
norm_mean_energy = 0.02 | |
norm_std_energy = 0.005 | |
norm_speech_rate = 4.4 | |
norm_std_speech_rate = 0.5 | |
# Gender detection | |
gender = 'male' if mean_f0 < 165 else 'female' | |
norm_mean_f0 = norm_mean_f0_male if gender == 'male' else norm_mean_f0_female | |
# Compute Z-scores | |
z_f0 = (mean_f0 - norm_mean_f0) / norm_std_f0 | |
z_energy = (mean_energy - norm_mean_energy) / norm_std_energy | |
z_speech_rate = (speech_rate - norm_speech_rate) / norm_std_speech_rate | |
# Combine Z-scores for stress level | |
stress_score = (0.4 * z_f0) + (0.4 * z_speech_rate) + (0.2 * z_energy) | |
stress_level = float(1 / (1 + np.exp(-stress_score)) * 100) # Sigmoid function | |
if stress_level < 20: | |
stress_category = "Very Low Stress" | |
elif stress_level < 40: | |
stress_category = "Low Stress" | |
elif stress_level < 60: | |
stress_category = "Moderate Stress" | |
elif stress_level < 80: | |
stress_category = "High Stress" | |
else: | |
stress_category = "Very High Stress" | |
# More verbose interpretations for each stress category | |
interpretations = { | |
"Very Low Stress": ( | |
"Your vocal analysis indicates a very relaxed state. " | |
"This suggests that you're currently experiencing minimal stress. " | |
"Maintaining such low stress levels is beneficial for your health. " | |
"Continue engaging in activities that promote relaxation and well-being. " | |
"Regular self-care practices can help sustain this positive state." | |
), | |
"Low Stress": ( | |
"Minor signs of stress are detected in your voice. " | |
"This is common due to everyday challenges and is usually not concerning. " | |
"Incorporating relaxation techniques, like deep breathing or meditation, may help. " | |
"Regular breaks and leisure activities can also reduce stress. " | |
"Staying mindful of stress levels supports overall health." | |
), | |
"Moderate Stress": ( | |
"Your voice reflects moderate stress levels. " | |
"This could be due to ongoing pressures or challenges you're facing. " | |
"Consider practicing stress management strategies such as mindfulness exercises or physical activity. " | |
"Identifying stressors and addressing them can be beneficial. " | |
"Balancing work and rest is important for your well-being." | |
), | |
"High Stress": ( | |
"Elevated stress levels are apparent in your vocal patterns. " | |
"It's important to recognize and address these feelings. " | |
"Identifying stressors and seeking support from friends, family, or professionals could be helpful. " | |
"Engaging in stress reduction techniques is recommended. " | |
"Taking proactive steps can improve your mental and physical health." | |
), | |
"Very High Stress": ( | |
"Your voice suggests very high stress levels. " | |
"This may indicate significant strain or anxiety. " | |
"It may be helpful to consult a healthcare professional for support. " | |
"Promptly addressing stress is important for your well-being. " | |
"Consider reaching out to trusted individuals or resources." | |
) | |
} | |
final_interpretation = interpretations[stress_category] | |
# Plotting | |
fig, axs = plt.subplots(5, 1, figsize=(10, 15)) | |
# Plot Fundamental Frequency (Pitch) | |
axs[0].plot(f0) | |
axs[0].set_title('Fundamental Frequency (Pitch)') | |
axs[0].set_ylabel('Frequency (Hz)') | |
# Plot Energy (Loudness) | |
axs[1].plot(energy) | |
axs[1].set_title('Energy (Loudness)') | |
axs[1].set_ylabel('Energy') | |
# Plot MFCCs | |
img = librosa.display.specshow(mfccs, sr=sr, x_axis='time', ax=axs[2]) | |
axs[2].set_title('MFCCs (Mel-frequency cepstral coefficients)') | |
fig.colorbar(img, ax=axs[2]) | |
# Plot Waveform | |
librosa.display.waveshow(y, sr=sr, ax=axs[3]) | |
axs[3].set_title('Waveform') | |
axs[3].set_xlabel('Time (s)') | |
axs[3].set_ylabel('Amplitude') | |
# Plot Pitch Contour (Histogram of f0) | |
axs[4].hist(f0, bins=50, color='blue', alpha=0.7) | |
axs[4].set_title('Pitch Contour (Histogram of f0)') | |
axs[4].set_xlabel('Frequency (Hz)') | |
axs[4].set_ylabel('Count') | |
plt.tight_layout() | |
with tempfile.NamedTemporaryFile(delete=False, suffix='.png') as temp_file: | |
plt.savefig(temp_file.name) | |
plot_path = temp_file.name | |
plt.close() | |
# Return separate values for Gradio output components | |
return f"{stress_level:.2f}% - {stress_category}", final_interpretation, plot_path | |
except Exception as e: | |
return f"Error: {str(e)}", None, None | |
def create_voice_stress_tab(): | |
custom_css = """ | |
/* General container styling for mobile */ | |
.gradio-container { | |
padding: 10px !important; | |
font-size: 16px !important; | |
} | |
/* Headings */ | |
h3 { | |
text-align: center; | |
font-size: 1.5em !important; | |
margin-bottom: 20px !important; | |
} | |
/* Full width for audio input and other components */ | |
.gradio-container .gradio-row, .gradio-container .gradio-column { | |
flex-direction: column !important; | |
align-items: center !important; | |
} | |
/* Make the components scale better on smaller screens */ | |
#input_audio, #stress_output, #interpretation_output, #plot_output { | |
width: 100% !important; | |
max-width: 100% !important; | |
} | |
#input_audio label, #stress_output label, #interpretation_output label, #plot_output label { | |
font-size: 1.2em !important; | |
} | |
/* Textbox area adjustment */ | |
#interpretation_output textarea { | |
font-size: 1em !important; | |
line-height: 1.4 !important; | |
} | |
/* Responsive styling for images */ | |
#plot_output img { | |
width: 100% !important; | |
height: auto !important; | |
} | |
/* Adjust clear button */ | |
#clear_btn button { | |
font-size: 1em !important; | |
padding: 10px 20px !important; | |
} | |
/* Responsive adjustments */ | |
@media only screen and (max-width: 600px) { | |
.gradio-container { | |
padding: 5px !important; | |
font-size: 14px !important; | |
} | |
h3 { | |
font-size: 1.2em !important; | |
} | |
#clear_btn button { | |
font-size: 0.9em !important; | |
} | |
#interpretation_output textarea { | |
font-size: 0.9em !important; | |
} | |
} | |
""" | |
with gr.Blocks(css=custom_css) as voice_stress_tab: | |
gr.Markdown("<h3>Speech Stress Analysis</h3>") | |
with gr.Column(): | |
input_audio = gr.Audio(label="Upload your voice recording", type="filepath", elem_id="input_audio") | |
stress_output = gr.Label(label="Stress Interpretation", elem_id="stress_output") | |
interpretation_output = gr.Textbox(label="Detailed Interpretation", lines=6, elem_id="interpretation_output") | |
plot_output = gr.Image(label="Stress Analysis Plot", elem_id="plot_output") | |
input_audio.change( | |
analyze_voice_stress, | |
inputs=[input_audio], | |
outputs=[stress_output, interpretation_output, plot_output] | |
) | |
gr.Button("Clear", elem_id="clear_btn").click( | |
lambda: (None, None, None), | |
outputs=[input_audio, stress_output, interpretation_output, plot_output] | |
) | |
return voice_stress_tab | |