Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -398,15 +398,45 @@ def detect_word_boundary_overlap(audio_segment: torch.Tensor, sample_rate: int,
|
|
398 |
if len(energy_levels) < 3:
|
399 |
return 0.0
|
400 |
|
401 |
-
#
|
402 |
-
|
403 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
404 |
|
405 |
-
|
|
|
|
|
406 |
|
407 |
-
|
|
|
|
|
|
|
|
|
|
|
408 |
for i, energy in enumerate(energy_levels):
|
409 |
-
|
|
|
|
|
|
|
|
|
|
|
410 |
log(f" Window {i}: energy={energy:.6f} [{window_type}]")
|
411 |
|
412 |
# Find sustained silence (2+ consecutive low-energy windows)
|
|
|
398 |
if len(energy_levels) < 3:
|
399 |
return 0.0
|
400 |
|
401 |
+
# FIXED: Use absolute thresholds that work with real audio levels
|
402 |
+
# Convert to logarithmic scale to better handle wide dynamic range
|
403 |
+
log_energies = []
|
404 |
+
for energy in energy_levels:
|
405 |
+
# Convert to dB-like scale, with floor to prevent log(0)
|
406 |
+
log_energy = 10 * np.log10(max(energy, 1e-10))
|
407 |
+
log_energies.append(log_energy)
|
408 |
+
|
409 |
+
# Calculate thresholds in log domain
|
410 |
+
min_log = min(log_energies)
|
411 |
+
max_log = max(log_energies)
|
412 |
+
dynamic_range = max_log - min_log
|
413 |
+
|
414 |
+
# If dynamic range is too small, everything is similar energy - no clear pattern
|
415 |
+
if dynamic_range < 6: # Less than 6dB difference
|
416 |
+
log(f"π Boundary analysis for '{word}': insufficient dynamic range ({dynamic_range:.1f}dB)")
|
417 |
+
return 0.0
|
418 |
+
|
419 |
+
# Set thresholds: silence is bottom 25% of range, noise is top 50%
|
420 |
+
silence_threshold_log = min_log + dynamic_range * 0.25
|
421 |
+
noise_threshold_log = min_log + dynamic_range * 0.5
|
422 |
|
423 |
+
# Convert back to linear for comparison
|
424 |
+
silence_threshold = 10 ** (silence_threshold_log / 10)
|
425 |
+
noise_threshold = 10 ** (noise_threshold_log / 10)
|
426 |
|
427 |
+
log(f"π Boundary analysis for '{word}': {len(energy_levels)} windows")
|
428 |
+
log(f" Energy range: {min(energy_levels):.6f} to {max(energy_levels):.6f}")
|
429 |
+
log(f" Log range: {min_log:.1f}dB to {max_log:.1f}dB (dynamic range: {dynamic_range:.1f}dB)")
|
430 |
+
log(f" Thresholds: silence={silence_threshold:.6f}, noise={noise_threshold:.6f}")
|
431 |
+
|
432 |
+
# Classify windows with new thresholds
|
433 |
for i, energy in enumerate(energy_levels):
|
434 |
+
if energy > noise_threshold:
|
435 |
+
window_type = "NOISE"
|
436 |
+
elif energy < silence_threshold:
|
437 |
+
window_type = "SILENCE"
|
438 |
+
else:
|
439 |
+
window_type = "MID"
|
440 |
log(f" Window {i}: energy={energy:.6f} [{window_type}]")
|
441 |
|
442 |
# Find sustained silence (2+ consecutive low-energy windows)
|