Spaces:

greg0rs
/

fonetik-fast

Running

App Files Files Community

greg0rs commited on Jul 21

Commit

0e7406c

verified ·

1 Parent(s): d01d216

Update app.py

Browse files

Files changed (1) hide show

app.py +61 -2

app.py CHANGED Viewed

@@ -8,6 +8,7 @@ from typing import List, Tuple, Dict
 import asyncio
 import base64
 import string
 # Set cache environment
 os.environ['HF_HOME'] = '/tmp/hf'
@@ -117,6 +118,61 @@ PHONEME_TO_ENGLISH = {
     'ˌ': '',       # secondary stress (remove)
 }
 def log(msg):
     print(f"[{datetime.now().strftime('%H:%M:%S')}] {msg}")
@@ -730,12 +786,15 @@ async def transcribe(audio: UploadFile = File(...)):
                 for word_info in segment["words"]:
                     if "start" in word_info and "end" in word_info and word_info["word"]:
                         original_word = word_info["word"].strip()
-                        cleaned_word = clean_word_for_phonemes(original_word)
                         # Only process words that have alphabetical content after cleaning
                         if cleaned_word:
                             words.append(word_info)
-                            word_texts.append(original_word)  # Keep original for display
                             word_texts_clean.append(cleaned_word)  # Clean for processing
                             word_timings.append((word_info["start"], word_info["end"]))

 import asyncio
 import base64
 import string
+import re
 # Set cache environment
 os.environ['HF_HOME'] = '/tmp/hf'
     'ˌ': '',       # secondary stress (remove)
 }
+def convert_digits_to_words(text: str) -> str:
+    """Convert digits to word form for better phoneme analysis"""
+    # Dictionary for number conversion
+    number_words = {
+        '0': 'zero', '1': 'one', '2': 'two', '3': 'three', '4': 'four',
+        '5': 'five', '6': 'six', '7': 'seven', '8': 'eight', '9': 'nine',
+        '10': 'ten', '11': 'eleven', '12': 'twelve', '13': 'thirteen', '14': 'fourteen',
+        '15': 'fifteen', '16': 'sixteen', '17': 'seventeen', '18': 'eighteen', '19': 'nineteen',
+        '20': 'twenty', '30': 'thirty', '40': 'forty', '50': 'fifty',
+        '60': 'sixty', '70': 'seventy', '80': 'eighty', '90': 'ninety',
+        '100': 'one hundred', '1000': 'one thousand'
+    }
+    def convert_number(match):
+        num_str = match.group()
+        num = int(num_str)
+        # Direct lookup for common numbers
+        if num_str in number_words:
+            return number_words[num_str]
+        # Handle numbers 21-99
+        if 21 <= num <= 99:
+            tens = (num // 10) * 10
+            ones = num % 10
+            if ones == 0:
+                return number_words[str(tens)]
+            else:
+                return number_words[str(tens)] + " " + number_words[str(ones)]
+        # Handle numbers 101-999 (basic implementation)
+        if 101 <= num <= 999:
+            hundreds = num // 100
+            remainder = num % 100
+            result = number_words[str(hundreds)] + " hundred"
+            if remainder > 0:
+                if remainder < 21:
+                    result += " " + number_words[str(remainder)]
+                else:
+                    tens = (remainder // 10) * 10
+                    ones = remainder % 10
+                    result += " " + number_words[str(tens)]
+                    if ones > 0:
+                        result += " " + number_words[str(ones)]
+            return result
+        # For larger numbers or edge cases, return original
+        return num_str
+    # Replace standalone digits/numbers with word equivalents
+    converted = re.sub(r'\b\d+\b', convert_number, text)
+    log(f"Number conversion: '{text}' → '{converted}'")
+    return converted
 def log(msg):
     print(f"[{datetime.now().strftime('%H:%M:%S')}] {msg}")
                 for word_info in segment["words"]:
                     if "start" in word_info and "end" in word_info and word_info["word"]:
                         original_word = word_info["word"].strip()
+                        # Convert digits to words for better phoneme analysis
+                        word_converted = convert_digits_to_words(original_word)
+                        cleaned_word = clean_word_for_phonemes(word_converted)
                         # Only process words that have alphabetical content after cleaning
                         if cleaned_word:
                             words.append(word_info)
+                            word_texts.append(word_converted)  # Use converted form for display
                             word_texts_clean.append(cleaned_word)  # Clean for processing
                             word_timings.append((word_info["start"], word_info["end"]))