Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -702,14 +702,14 @@ def trim_audio_segment_by_phoneme_position(audio_segment: torch.Tensor,
|
|
702 |
start_trim_pct = best_start / total_phoneme_len
|
703 |
end_trim_pct = (total_phoneme_len - best_end) / total_phoneme_len
|
704 |
|
705 |
-
# Apply
|
706 |
-
start_trim_pct_adjusted = start_trim_pct * 0.
|
707 |
-
end_trim_pct_adjusted = end_trim_pct * 0.
|
708 |
|
709 |
log(f"🎵 Audio trimming for '{word}':")
|
710 |
log(f" Phoneme position: {best_start}-{best_end-1} of {total_phoneme_len} chars") # Fixed: best_end-1 for actual last index
|
711 |
log(f" Calculated trim: start={start_trim_pct:.1%}, end={end_trim_pct:.1%}")
|
712 |
-
log(f" Adjusted trim (
|
713 |
|
714 |
# Calculate samples to trim using adjusted percentages
|
715 |
total_samples = audio_segment.shape[-1]
|
@@ -1163,8 +1163,8 @@ async def transcribe(audio: UploadFile = File(...), similarity_threshold: float
|
|
1163 |
total_phoneme_len = len(detected_phoneme_norm)
|
1164 |
|
1165 |
if total_phoneme_len > 0:
|
1166 |
-
start_trim_pct = match_start / total_phoneme_len * 0.
|
1167 |
-
end_trim_pct = (total_phoneme_len - match_end) / total_phoneme_len * 0.
|
1168 |
|
1169 |
# Apply trim percentages to timing
|
1170 |
time_trimmed_from_start = expanded_duration * start_trim_pct
|
|
|
702 |
start_trim_pct = best_start / total_phoneme_len
|
703 |
end_trim_pct = (total_phoneme_len - best_end) / total_phoneme_len
|
704 |
|
705 |
+
# Apply 85% factor to be less aggressive
|
706 |
+
start_trim_pct_adjusted = start_trim_pct * 0.85
|
707 |
+
end_trim_pct_adjusted = end_trim_pct * 0.85
|
708 |
|
709 |
log(f"🎵 Audio trimming for '{word}':")
|
710 |
log(f" Phoneme position: {best_start}-{best_end-1} of {total_phoneme_len} chars") # Fixed: best_end-1 for actual last index
|
711 |
log(f" Calculated trim: start={start_trim_pct:.1%}, end={end_trim_pct:.1%}")
|
712 |
+
log(f" Adjusted trim (85%): start={start_trim_pct_adjusted:.1%}, end={end_trim_pct_adjusted:.1%}")
|
713 |
|
714 |
# Calculate samples to trim using adjusted percentages
|
715 |
total_samples = audio_segment.shape[-1]
|
|
|
1163 |
total_phoneme_len = len(detected_phoneme_norm)
|
1164 |
|
1165 |
if total_phoneme_len > 0:
|
1166 |
+
start_trim_pct = match_start / total_phoneme_len * 0.85 # Apply 85% factor
|
1167 |
+
end_trim_pct = (total_phoneme_len - match_end) / total_phoneme_len * 0.85
|
1168 |
|
1169 |
# Apply trim percentages to timing
|
1170 |
time_trimmed_from_start = expanded_duration * start_trim_pct
|