Spaces:

greg0rs
/

fonetik-fast

Sleeping

App Files Files Community

greg0rs commited on Jul 21

Commit

5dae976

verified ·

1 Parent(s): fb55913

Update app.py

Browse files

Files changed (1) hide show

app.py +14 -13

app.py CHANGED Viewed

@@ -133,24 +133,24 @@ def clean_word_for_phonemes(word: str) -> str:
     return cleaned
 def load_whisperx_models():
-    """Load WhisperX models lazily"""
     global whisperx_model, whisperx_align_model, whisperx_metadata
     if whisperx_model is None:
-        log("Loading WhisperX models...")
         try:
             # Use float32 compute type for CPU compatibility
-            whisperx_model = whisperx.load_model("base", device="cpu", compute_type="float32")
             whisperx_align_model, whisperx_metadata = whisperx.load_align_model(language_code="en", device="cpu")
-            log("WhisperX models loaded successfully")
         except Exception as e:
             log(f"Error loading WhisperX models: {e}")
             # Fallback: try with smaller model
             try:
-                log("Trying fallback with tiny model...")
-                whisperx_model = whisperx.load_model("tiny", device="cpu", compute_type="float32")
                 whisperx_align_model, whisperx_metadata = whisperx.load_align_model(language_code="en", device="cpu")
-                log("WhisperX models loaded with fallback (tiny model)")
             except Exception as fallback_error:
                 log(f"Fallback also failed: {fallback_error}")
                 raise
@@ -700,11 +700,12 @@ async def transcribe(audio: UploadFile = File(...)):
         log(f"Audio loaded for WhisperX: {len(audio_data)} samples")
-        # 2. Get transcription with WhisperX
-        result = whisperx_model.transcribe(audio_data, batch_size=16)
-        # 3. Get precise word alignments with WhisperX
-        aligned_result = whisperx.align(result["segments"], whisperx_align_model, whisperx_metadata, audio_data, device="cpu")
         # Extract word-level data from WhisperX results
         words = []
@@ -860,7 +861,7 @@ async def transcribe(audio: UploadFile = File(...)):
             "debug_info": {
                 "total_words": len(words),
                 "similarity_threshold": similarity,
-                "alignment_method": "WhisperX + Enhanced Sliding Window with Fallback",
                 "results_summary": [
                     {
                         "word": r['word_text'],
@@ -888,7 +889,7 @@ async def transcribe(audio: UploadFile = File(...)):
 @app.get("/")
 def root():
-    return "Clean Fonetik with WhisperX + Enhanced Character-Level Feedback running"
 @app.post("/api/clear-cache")
 def clear_cache():

     return cleaned
 def load_whisperx_models():
+    """Load WhisperX models lazily - ENGLISH ONLY"""
     global whisperx_model, whisperx_align_model, whisperx_metadata
     if whisperx_model is None:
+        log("Loading WhisperX models (English only)...")
         try:
             # Use float32 compute type for CPU compatibility
+            whisperx_model = whisperx.load_model("base", device="cpu", compute_type="float32", language="en")
             whisperx_align_model, whisperx_metadata = whisperx.load_align_model(language_code="en", device="cpu")
+            log("WhisperX models loaded successfully (English forced)")
         except Exception as e:
             log(f"Error loading WhisperX models: {e}")
             # Fallback: try with smaller model
             try:
+                log("Trying fallback with tiny model (English only)...")
+                whisperx_model = whisperx.load_model("tiny", device="cpu", compute_type="float32", language="en")
                 whisperx_align_model, whisperx_metadata = whisperx.load_align_model(language_code="en", device="cpu")
+                log("WhisperX models loaded with fallback (tiny model, English forced)")
             except Exception as fallback_error:
                 log(f"Fallback also failed: {fallback_error}")
                 raise
         log(f"Audio loaded for WhisperX: {len(audio_data)} samples")
+        # 2. Get transcription with WhisperX - FORCE ENGLISH
+        log("🌍 Forcing WhisperX to use English language detection...")
+        result = whisperx_model.transcribe(audio_data, batch_size=16, language="en")
+        # 3. Get precise word alignments with WhisperX - ENGLISH ONLY
+        aligned_result = whisperx.align(result["segments"], whisperx_align_model, whisperx_metadata, audio_data, device="cpu", return_char_alignments=False)
         # Extract word-level data from WhisperX results
         words = []
             "debug_info": {
                 "total_words": len(words),
                 "similarity_threshold": similarity,
+                "alignment_method": "WhisperX + Enhanced Sliding Window with Fallback (English Only)",
                 "results_summary": [
                     {
                         "word": r['word_text'],
 @app.get("/")
 def root():
+    return "Clean Fonetik with WhisperX + Enhanced Character-Level Feedback (English Only) running"
 @app.post("/api/clear-cache")
 def clear_cache():