Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -34,6 +34,54 @@ import whisperx # New: WhisperX for precise alignment
|
|
34 |
from transformers import Wav2Vec2Processor, Wav2Vec2ForCTC
|
35 |
import edge_tts
|
36 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
37 |
# Phoneme reverse lookup
|
38 |
phoneme_to_words_cache = {}
|
39 |
|
@@ -406,53 +454,7 @@ def convert_webm_to_wav(bts):
|
|
406 |
raise RuntimeError(p.stderr.decode())
|
407 |
return io.BytesIO(p.stdout)
|
408 |
|
409 |
-
def
|
410 |
-
"""Normalize phoneme string for comparison - remove spaces and extra chars"""
|
411 |
-
if not s:
|
412 |
-
return s
|
413 |
-
|
414 |
-
original = s
|
415 |
-
|
416 |
-
# Convert to lowercase and remove spaces, stress marks, and length markers
|
417 |
-
normalized = s.lower().strip()
|
418 |
-
normalized = normalized.replace(' ', '') # Remove spaces between phonemes
|
419 |
-
normalized = normalized.replace('Λ', '') # Remove length markers
|
420 |
-
normalized = normalized.replace('Λ', '') # Remove primary stress
|
421 |
-
normalized = normalized.replace('Λ', '') # Remove secondary stress
|
422 |
-
normalized = normalized.replace('.', '') # Remove syllable boundaries
|
423 |
-
|
424 |
-
# CRITICAL: Normalize ASCII symbols to proper IPA equivalents
|
425 |
-
# Convert all wav2vec2 ASCII characters to standard IPA
|
426 |
-
ascii_to_ipa = {
|
427 |
-
'g': 'Ι‘', # ASCII g β IPA script g (voiced velar stop)
|
428 |
-
'b': 'b', # ASCII b β IPA b (already correct, but explicit)
|
429 |
-
'd': 'd', # ASCII d β IPA d (already correct, but explicit)
|
430 |
-
'f': 'f', # ASCII f β IPA f (already correct, but explicit)
|
431 |
-
'h': 'h', # ASCII h β IPA h (already correct, but explicit)
|
432 |
-
'i': 'i', # ASCII i β IPA i (already correct, but explicit)
|
433 |
-
# Note: Most ASCII phonetic chars are already valid IPA, except 'g'
|
434 |
-
}
|
435 |
-
|
436 |
-
# Normalize variant IPA symbols to consistent forms
|
437 |
-
# Handle different representations of the same sounds
|
438 |
-
ipa_variants = {
|
439 |
-
'Ι': 'Ι', # Open-mid central β r-colored (American English "er")
|
440 |
-
'Ι': 'Ι', # R-colored schwa β r-colored vowel (both "er" sounds)
|
441 |
-
'Κ': 'Ι', # Open-mid back β schwa (both unstressed "uh" sounds)
|
442 |
-
'Γ°': 'ΞΈ', # Voiced th β voiceless th (accent training - treat as equivalent)
|
443 |
-
}
|
444 |
-
|
445 |
-
for ascii_char, ipa_char in ascii_to_ipa.items():
|
446 |
-
normalized = normalized.replace(ascii_char, ipa_char)
|
447 |
-
|
448 |
-
for variant_char, standard_char in ipa_variants.items():
|
449 |
-
normalized = normalized.replace(variant_char, standard_char)
|
450 |
-
|
451 |
-
# Debug specific phoneme strings
|
452 |
-
if 'hello' in original.lower() or 'hΙloΚ' in original or 'hΙloΚ' in original:
|
453 |
-
log(f"π Normalization debug: '{original}' β '{normalized}'")
|
454 |
-
|
455 |
-
return normalized
|
456 |
|
457 |
# TEMPORARILY DISABLED: English letter sounds conversion
|
458 |
# def phoneme_to_english_sounds(phoneme_string: str) -> str:
|
|
|
34 |
from transformers import Wav2Vec2Processor, Wav2Vec2ForCTC
|
35 |
import edge_tts
|
36 |
|
37 |
+
def normalize_phoneme_string(s: str) -> str:
|
38 |
+
"""Normalize phoneme string for comparison - remove spaces and extra chars"""
|
39 |
+
if not s:
|
40 |
+
return s
|
41 |
+
|
42 |
+
original = s
|
43 |
+
|
44 |
+
# Convert to lowercase and remove spaces, stress marks, and length markers
|
45 |
+
normalized = s.lower().strip()
|
46 |
+
normalized = normalized.replace(' ', '') # Remove spaces between phonemes
|
47 |
+
normalized = normalized.replace('Λ', '') # Remove length markers
|
48 |
+
normalized = normalized.replace('Λ', '') # Remove primary stress
|
49 |
+
normalized = normalized.replace('Λ', '') # Remove secondary stress
|
50 |
+
normalized = normalized.replace('.', '') # Remove syllable boundaries
|
51 |
+
|
52 |
+
# CRITICAL: Normalize ASCII symbols to proper IPA equivalents
|
53 |
+
# Convert all wav2vec2 ASCII characters to standard IPA
|
54 |
+
ascii_to_ipa = {
|
55 |
+
'g': 'Ι‘', # ASCII g β IPA script g (voiced velar stop)
|
56 |
+
'b': 'b', # ASCII b β IPA b (already correct, but explicit)
|
57 |
+
'd': 'd', # ASCII d β IPA d (already correct, but explicit)
|
58 |
+
'f': 'f', # ASCII f β IPA f (already correct, but explicit)
|
59 |
+
'h': 'h', # ASCII h β IPA h (already correct, but explicit)
|
60 |
+
'i': 'i', # ASCII i β IPA i (already correct, but explicit)
|
61 |
+
# Note: Most ASCII phonetic chars are already valid IPA, except 'g'
|
62 |
+
}
|
63 |
+
|
64 |
+
# Normalize variant IPA symbols to consistent forms
|
65 |
+
# Handle different representations of the same sounds
|
66 |
+
ipa_variants = {
|
67 |
+
'Ι': 'Ι', # Open-mid central β r-colored (American English "er")
|
68 |
+
'Ι': 'Ι', # R-colored schwa β r-colored vowel (both "er" sounds)
|
69 |
+
'Κ': 'Ι', # Open-mid back β schwa (both unstressed "uh" sounds)
|
70 |
+
'Γ°': 'ΞΈ', # Voiced th β voiceless th (accent training - treat as equivalent)
|
71 |
+
}
|
72 |
+
|
73 |
+
for ascii_char, ipa_char in ascii_to_ipa.items():
|
74 |
+
normalized = normalized.replace(ascii_char, ipa_char)
|
75 |
+
|
76 |
+
for variant_char, standard_char in ipa_variants.items():
|
77 |
+
normalized = normalized.replace(variant_char, standard_char)
|
78 |
+
|
79 |
+
# Debug specific phoneme strings
|
80 |
+
if 'hello' in original.lower() or 'hΙloΚ' in original or 'hΙloΚ' in original:
|
81 |
+
log(f"π Normalization debug: '{original}' β '{normalized}'")
|
82 |
+
|
83 |
+
return normalized
|
84 |
+
|
85 |
# Phoneme reverse lookup
|
86 |
phoneme_to_words_cache = {}
|
87 |
|
|
|
454 |
raise RuntimeError(p.stderr.decode())
|
455 |
return io.BytesIO(p.stdout)
|
456 |
|
457 |
+
def normalize_phoneme_string_DUPLICATE_REMOVED():
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
458 |
|
459 |
# TEMPORARILY DISABLED: English letter sounds conversion
|
460 |
# def phoneme_to_english_sounds(phoneme_string: str) -> str:
|