Spaces:

greg0rs
/

fonetik-fast

Sleeping

greg0rs commited on Jul 26

Commit

0cd1bb4

verified ·

1 Parent(s): b9d73c6

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -702,14 +702,14 @@ def trim_audio_segment_by_phoneme_position(audio_segment: torch.Tensor,
     start_trim_pct = best_start / total_phoneme_len
     end_trim_pct = (total_phoneme_len - best_end) / total_phoneme_len
-    # Apply 95% factor to be less aggressive
-    start_trim_pct_adjusted = start_trim_pct * 0.95
-    end_trim_pct_adjusted = end_trim_pct * 0.95
     log(f"🎵 Audio trimming for '{word}':")
     log(f"  Phoneme position: {best_start}-{best_end-1} of {total_phoneme_len} chars")  # Fixed: best_end-1 for actual last index
     log(f"  Calculated trim: start={start_trim_pct:.1%}, end={end_trim_pct:.1%}")
-    log(f"  Adjusted trim (95%): start={start_trim_pct_adjusted:.1%}, end={end_trim_pct_adjusted:.1%}")
     # Calculate samples to trim using adjusted percentages
     total_samples = audio_segment.shape[-1]
@@ -1163,8 +1163,8 @@ async def transcribe(audio: UploadFile = File(...), similarity_threshold: float
                 total_phoneme_len = len(detected_phoneme_norm)
                 if total_phoneme_len > 0:
-                    start_trim_pct = match_start / total_phoneme_len * 0.95  # Apply 95% factor
-                    end_trim_pct = (total_phoneme_len - match_end) / total_phoneme_len * 0.95
                     # Apply trim percentages to timing
                     time_trimmed_from_start = expanded_duration * start_trim_pct

     start_trim_pct = best_start / total_phoneme_len
     end_trim_pct = (total_phoneme_len - best_end) / total_phoneme_len
+    # Apply 85% factor to be less aggressive
+    start_trim_pct_adjusted = start_trim_pct * 0.85
+    end_trim_pct_adjusted = end_trim_pct * 0.85
     log(f"🎵 Audio trimming for '{word}':")
     log(f"  Phoneme position: {best_start}-{best_end-1} of {total_phoneme_len} chars")  # Fixed: best_end-1 for actual last index
     log(f"  Calculated trim: start={start_trim_pct:.1%}, end={end_trim_pct:.1%}")
+    log(f"  Adjusted trim (85%): start={start_trim_pct_adjusted:.1%}, end={end_trim_pct_adjusted:.1%}")
     # Calculate samples to trim using adjusted percentages
     total_samples = audio_segment.shape[-1]
                 total_phoneme_len = len(detected_phoneme_norm)
                 if total_phoneme_len > 0:
+                    start_trim_pct = match_start / total_phoneme_len * 0.85  # Apply 85% factor
+                    end_trim_pct = (total_phoneme_len - match_end) / total_phoneme_len * 0.85
                     # Apply trim percentages to timing
                     time_trimmed_from_start = expanded_duration * start_trim_pct