greg0rs commited on
Commit
0cd1bb4
·
verified ·
1 Parent(s): b9d73c6

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +6 -6
app.py CHANGED
@@ -702,14 +702,14 @@ def trim_audio_segment_by_phoneme_position(audio_segment: torch.Tensor,
702
  start_trim_pct = best_start / total_phoneme_len
703
  end_trim_pct = (total_phoneme_len - best_end) / total_phoneme_len
704
 
705
- # Apply 95% factor to be less aggressive
706
- start_trim_pct_adjusted = start_trim_pct * 0.95
707
- end_trim_pct_adjusted = end_trim_pct * 0.95
708
 
709
  log(f"🎵 Audio trimming for '{word}':")
710
  log(f" Phoneme position: {best_start}-{best_end-1} of {total_phoneme_len} chars") # Fixed: best_end-1 for actual last index
711
  log(f" Calculated trim: start={start_trim_pct:.1%}, end={end_trim_pct:.1%}")
712
- log(f" Adjusted trim (95%): start={start_trim_pct_adjusted:.1%}, end={end_trim_pct_adjusted:.1%}")
713
 
714
  # Calculate samples to trim using adjusted percentages
715
  total_samples = audio_segment.shape[-1]
@@ -1163,8 +1163,8 @@ async def transcribe(audio: UploadFile = File(...), similarity_threshold: float
1163
  total_phoneme_len = len(detected_phoneme_norm)
1164
 
1165
  if total_phoneme_len > 0:
1166
- start_trim_pct = match_start / total_phoneme_len * 0.95 # Apply 95% factor
1167
- end_trim_pct = (total_phoneme_len - match_end) / total_phoneme_len * 0.95
1168
 
1169
  # Apply trim percentages to timing
1170
  time_trimmed_from_start = expanded_duration * start_trim_pct
 
702
  start_trim_pct = best_start / total_phoneme_len
703
  end_trim_pct = (total_phoneme_len - best_end) / total_phoneme_len
704
 
705
+ # Apply 85% factor to be less aggressive
706
+ start_trim_pct_adjusted = start_trim_pct * 0.85
707
+ end_trim_pct_adjusted = end_trim_pct * 0.85
708
 
709
  log(f"🎵 Audio trimming for '{word}':")
710
  log(f" Phoneme position: {best_start}-{best_end-1} of {total_phoneme_len} chars") # Fixed: best_end-1 for actual last index
711
  log(f" Calculated trim: start={start_trim_pct:.1%}, end={end_trim_pct:.1%}")
712
+ log(f" Adjusted trim (85%): start={start_trim_pct_adjusted:.1%}, end={end_trim_pct_adjusted:.1%}")
713
 
714
  # Calculate samples to trim using adjusted percentages
715
  total_samples = audio_segment.shape[-1]
 
1163
  total_phoneme_len = len(detected_phoneme_norm)
1164
 
1165
  if total_phoneme_len > 0:
1166
+ start_trim_pct = match_start / total_phoneme_len * 0.85 # Apply 85% factor
1167
+ end_trim_pct = (total_phoneme_len - match_end) / total_phoneme_len * 0.85
1168
 
1169
  # Apply trim percentages to timing
1170
  time_trimmed_from_start = expanded_duration * start_trim_pct