Spaces:

greg0rs
/

fonetik-fast

Sleeping

App Files Files Community

greg0rs commited on Jul 23

Commit

00d10fe

verified ·

1 Parent(s): 127baac

Update app.py

Browse files

Files changed (1) hide show

app.py +49 -47

app.py CHANGED Viewed

@@ -34,6 +34,54 @@ import whisperx  # New: WhisperX for precise alignment
 from transformers import Wav2Vec2Processor, Wav2Vec2ForCTC
 import edge_tts
 # Phoneme reverse lookup
 phoneme_to_words_cache = {}
@@ -406,53 +454,7 @@ def convert_webm_to_wav(bts):
         raise RuntimeError(p.stderr.decode())
     return io.BytesIO(p.stdout)
-def normalize_phoneme_string(s: str) -> str:
-    """Normalize phoneme string for comparison - remove spaces and extra chars"""
-    if not s:
-        return s
-    original = s
-    # Convert to lowercase and remove spaces, stress marks, and length markers
-    normalized = s.lower().strip()
-    normalized = normalized.replace(' ', '')  # Remove spaces between phonemes
-    normalized = normalized.replace('ː', '')  # Remove length markers
-    normalized = normalized.replace('ˈ', '')  # Remove primary stress
-    normalized = normalized.replace('ˌ', '')  # Remove secondary stress
-    normalized = normalized.replace('.', '')  # Remove syllable boundaries
-    # CRITICAL: Normalize ASCII symbols to proper IPA equivalents
-    # Convert all wav2vec2 ASCII characters to standard IPA
-    ascii_to_ipa = {
-        'g': 'ɡ',    # ASCII g → IPA script g (voiced velar stop)
-        'b': 'b',    # ASCII b → IPA b (already correct, but explicit)
-        'd': 'd',    # ASCII d → IPA d (already correct, but explicit)
-        'f': 'f',    # ASCII f → IPA f (already correct, but explicit)
-        'h': 'h',    # ASCII h → IPA h (already correct, but explicit)
-        'i': 'i',    # ASCII i → IPA i (already correct, but explicit)
-        # Note: Most ASCII phonetic chars are already valid IPA, except 'g'
-    }
-    # Normalize variant IPA symbols to consistent forms
-    # Handle different representations of the same sounds
-    ipa_variants = {
-        'ɜ': 'ɝ',    # Open-mid central → r-colored (American English "er")
-        'ɚ': 'ɝ',    # R-colored schwa → r-colored vowel (both "er" sounds)
-        'ʌ': 'ə',    # Open-mid back → schwa (both unstressed "uh" sounds)
-        'ð': 'θ',    # Voiced th → voiceless th (accent training - treat as equivalent)
-    }
-    for ascii_char, ipa_char in ascii_to_ipa.items():
-        normalized = normalized.replace(ascii_char, ipa_char)
-    for variant_char, standard_char in ipa_variants.items():
-        normalized = normalized.replace(variant_char, standard_char)
-    # Debug specific phoneme strings
-    if 'hello' in original.lower() or 'həloʊ' in original or 'hɛloʊ' in original:
-        log(f"🔍 Normalization debug: '{original}' → '{normalized}'")
-    return normalized
 # TEMPORARILY DISABLED: English letter sounds conversion
 # def phoneme_to_english_sounds(phoneme_string: str) -> str:

 from transformers import Wav2Vec2Processor, Wav2Vec2ForCTC
 import edge_tts
+def normalize_phoneme_string(s: str) -> str:
+    """Normalize phoneme string for comparison - remove spaces and extra chars"""
+    if not s:
+        return s
+    original = s
+    # Convert to lowercase and remove spaces, stress marks, and length markers
+    normalized = s.lower().strip()
+    normalized = normalized.replace(' ', '')  # Remove spaces between phonemes
+    normalized = normalized.replace('ː', '')  # Remove length markers
+    normalized = normalized.replace('ˈ', '')  # Remove primary stress
+    normalized = normalized.replace('ˌ', '')  # Remove secondary stress
+    normalized = normalized.replace('.', '')  # Remove syllable boundaries
+    # CRITICAL: Normalize ASCII symbols to proper IPA equivalents
+    # Convert all wav2vec2 ASCII characters to standard IPA
+    ascii_to_ipa = {
+        'g': 'ɡ',    # ASCII g → IPA script g (voiced velar stop)
+        'b': 'b',    # ASCII b → IPA b (already correct, but explicit)
+        'd': 'd',    # ASCII d → IPA d (already correct, but explicit)
+        'f': 'f',    # ASCII f → IPA f (already correct, but explicit)
+        'h': 'h',    # ASCII h → IPA h (already correct, but explicit)
+        'i': 'i',    # ASCII i → IPA i (already correct, but explicit)
+        # Note: Most ASCII phonetic chars are already valid IPA, except 'g'
+    }
+    # Normalize variant IPA symbols to consistent forms
+    # Handle different representations of the same sounds
+    ipa_variants = {
+        'ɜ': 'ɝ',    # Open-mid central → r-colored (American English "er")
+        'ɚ': 'ɝ',    # R-colored schwa → r-colored vowel (both "er" sounds)
+        'ʌ': 'ə',    # Open-mid back → schwa (both unstressed "uh" sounds)
+        'ð': 'θ',    # Voiced th → voiceless th (accent training - treat as equivalent)
+    }
+    for ascii_char, ipa_char in ascii_to_ipa.items():
+        normalized = normalized.replace(ascii_char, ipa_char)
+    for variant_char, standard_char in ipa_variants.items():
+        normalized = normalized.replace(variant_char, standard_char)
+    # Debug specific phoneme strings
+    if 'hello' in original.lower() or 'həloʊ' in original or 'hɛloʊ' in original:
+        log(f"🔍 Normalization debug: '{original}' → '{normalized}'")
+    return normalized
 # Phoneme reverse lookup
 phoneme_to_words_cache = {}
         raise RuntimeError(p.stderr.decode())
     return io.BytesIO(p.stdout)
+def normalize_phoneme_string_DUPLICATE_REMOVED():
 # TEMPORARILY DISABLED: English letter sounds conversion
 # def phoneme_to_english_sounds(phoneme_string: str) -> str: