greg0rs commited on
Commit
5dae976
·
verified ·
1 Parent(s): fb55913

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +14 -13
app.py CHANGED
@@ -133,24 +133,24 @@ def clean_word_for_phonemes(word: str) -> str:
133
  return cleaned
134
 
135
  def load_whisperx_models():
136
- """Load WhisperX models lazily"""
137
  global whisperx_model, whisperx_align_model, whisperx_metadata
138
 
139
  if whisperx_model is None:
140
- log("Loading WhisperX models...")
141
  try:
142
  # Use float32 compute type for CPU compatibility
143
- whisperx_model = whisperx.load_model("base", device="cpu", compute_type="float32")
144
  whisperx_align_model, whisperx_metadata = whisperx.load_align_model(language_code="en", device="cpu")
145
- log("WhisperX models loaded successfully")
146
  except Exception as e:
147
  log(f"Error loading WhisperX models: {e}")
148
  # Fallback: try with smaller model
149
  try:
150
- log("Trying fallback with tiny model...")
151
- whisperx_model = whisperx.load_model("tiny", device="cpu", compute_type="float32")
152
  whisperx_align_model, whisperx_metadata = whisperx.load_align_model(language_code="en", device="cpu")
153
- log("WhisperX models loaded with fallback (tiny model)")
154
  except Exception as fallback_error:
155
  log(f"Fallback also failed: {fallback_error}")
156
  raise
@@ -700,11 +700,12 @@ async def transcribe(audio: UploadFile = File(...)):
700
 
701
  log(f"Audio loaded for WhisperX: {len(audio_data)} samples")
702
 
703
- # 2. Get transcription with WhisperX
704
- result = whisperx_model.transcribe(audio_data, batch_size=16)
 
705
 
706
- # 3. Get precise word alignments with WhisperX
707
- aligned_result = whisperx.align(result["segments"], whisperx_align_model, whisperx_metadata, audio_data, device="cpu")
708
 
709
  # Extract word-level data from WhisperX results
710
  words = []
@@ -860,7 +861,7 @@ async def transcribe(audio: UploadFile = File(...)):
860
  "debug_info": {
861
  "total_words": len(words),
862
  "similarity_threshold": similarity,
863
- "alignment_method": "WhisperX + Enhanced Sliding Window with Fallback",
864
  "results_summary": [
865
  {
866
  "word": r['word_text'],
@@ -888,7 +889,7 @@ async def transcribe(audio: UploadFile = File(...)):
888
 
889
  @app.get("/")
890
  def root():
891
- return "Clean Fonetik with WhisperX + Enhanced Character-Level Feedback running"
892
 
893
  @app.post("/api/clear-cache")
894
  def clear_cache():
 
133
  return cleaned
134
 
135
  def load_whisperx_models():
136
+ """Load WhisperX models lazily - ENGLISH ONLY"""
137
  global whisperx_model, whisperx_align_model, whisperx_metadata
138
 
139
  if whisperx_model is None:
140
+ log("Loading WhisperX models (English only)...")
141
  try:
142
  # Use float32 compute type for CPU compatibility
143
+ whisperx_model = whisperx.load_model("base", device="cpu", compute_type="float32", language="en")
144
  whisperx_align_model, whisperx_metadata = whisperx.load_align_model(language_code="en", device="cpu")
145
+ log("WhisperX models loaded successfully (English forced)")
146
  except Exception as e:
147
  log(f"Error loading WhisperX models: {e}")
148
  # Fallback: try with smaller model
149
  try:
150
+ log("Trying fallback with tiny model (English only)...")
151
+ whisperx_model = whisperx.load_model("tiny", device="cpu", compute_type="float32", language="en")
152
  whisperx_align_model, whisperx_metadata = whisperx.load_align_model(language_code="en", device="cpu")
153
+ log("WhisperX models loaded with fallback (tiny model, English forced)")
154
  except Exception as fallback_error:
155
  log(f"Fallback also failed: {fallback_error}")
156
  raise
 
700
 
701
  log(f"Audio loaded for WhisperX: {len(audio_data)} samples")
702
 
703
+ # 2. Get transcription with WhisperX - FORCE ENGLISH
704
+ log("🌍 Forcing WhisperX to use English language detection...")
705
+ result = whisperx_model.transcribe(audio_data, batch_size=16, language="en")
706
 
707
+ # 3. Get precise word alignments with WhisperX - ENGLISH ONLY
708
+ aligned_result = whisperx.align(result["segments"], whisperx_align_model, whisperx_metadata, audio_data, device="cpu", return_char_alignments=False)
709
 
710
  # Extract word-level data from WhisperX results
711
  words = []
 
861
  "debug_info": {
862
  "total_words": len(words),
863
  "similarity_threshold": similarity,
864
+ "alignment_method": "WhisperX + Enhanced Sliding Window with Fallback (English Only)",
865
  "results_summary": [
866
  {
867
  "word": r['word_text'],
 
889
 
890
  @app.get("/")
891
  def root():
892
+ return "Clean Fonetik with WhisperX + Enhanced Character-Level Feedback (English Only) running"
893
 
894
  @app.post("/api/clear-cache")
895
  def clear_cache():