Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -8,6 +8,7 @@ from typing import List, Tuple, Dict
|
|
8 |
import asyncio
|
9 |
import base64
|
10 |
import string
|
|
|
11 |
|
12 |
# Set cache environment
|
13 |
os.environ['HF_HOME'] = '/tmp/hf'
|
@@ -117,6 +118,61 @@ PHONEME_TO_ENGLISH = {
|
|
117 |
'ˌ': '', # secondary stress (remove)
|
118 |
}
|
119 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
120 |
def log(msg):
|
121 |
print(f"[{datetime.now().strftime('%H:%M:%S')}] {msg}")
|
122 |
|
@@ -730,12 +786,15 @@ async def transcribe(audio: UploadFile = File(...)):
|
|
730 |
for word_info in segment["words"]:
|
731 |
if "start" in word_info and "end" in word_info and word_info["word"]:
|
732 |
original_word = word_info["word"].strip()
|
733 |
-
|
|
|
|
|
|
|
734 |
|
735 |
# Only process words that have alphabetical content after cleaning
|
736 |
if cleaned_word:
|
737 |
words.append(word_info)
|
738 |
-
word_texts.append(
|
739 |
word_texts_clean.append(cleaned_word) # Clean for processing
|
740 |
word_timings.append((word_info["start"], word_info["end"]))
|
741 |
|
|
|
8 |
import asyncio
|
9 |
import base64
|
10 |
import string
|
11 |
+
import re
|
12 |
|
13 |
# Set cache environment
|
14 |
os.environ['HF_HOME'] = '/tmp/hf'
|
|
|
118 |
'ˌ': '', # secondary stress (remove)
|
119 |
}
|
120 |
|
121 |
+
def convert_digits_to_words(text: str) -> str:
|
122 |
+
"""Convert digits to word form for better phoneme analysis"""
|
123 |
+
|
124 |
+
# Dictionary for number conversion
|
125 |
+
number_words = {
|
126 |
+
'0': 'zero', '1': 'one', '2': 'two', '3': 'three', '4': 'four',
|
127 |
+
'5': 'five', '6': 'six', '7': 'seven', '8': 'eight', '9': 'nine',
|
128 |
+
'10': 'ten', '11': 'eleven', '12': 'twelve', '13': 'thirteen', '14': 'fourteen',
|
129 |
+
'15': 'fifteen', '16': 'sixteen', '17': 'seventeen', '18': 'eighteen', '19': 'nineteen',
|
130 |
+
'20': 'twenty', '30': 'thirty', '40': 'forty', '50': 'fifty',
|
131 |
+
'60': 'sixty', '70': 'seventy', '80': 'eighty', '90': 'ninety',
|
132 |
+
'100': 'one hundred', '1000': 'one thousand'
|
133 |
+
}
|
134 |
+
|
135 |
+
def convert_number(match):
|
136 |
+
num_str = match.group()
|
137 |
+
num = int(num_str)
|
138 |
+
|
139 |
+
# Direct lookup for common numbers
|
140 |
+
if num_str in number_words:
|
141 |
+
return number_words[num_str]
|
142 |
+
|
143 |
+
# Handle numbers 21-99
|
144 |
+
if 21 <= num <= 99:
|
145 |
+
tens = (num // 10) * 10
|
146 |
+
ones = num % 10
|
147 |
+
if ones == 0:
|
148 |
+
return number_words[str(tens)]
|
149 |
+
else:
|
150 |
+
return number_words[str(tens)] + " " + number_words[str(ones)]
|
151 |
+
|
152 |
+
# Handle numbers 101-999 (basic implementation)
|
153 |
+
if 101 <= num <= 999:
|
154 |
+
hundreds = num // 100
|
155 |
+
remainder = num % 100
|
156 |
+
result = number_words[str(hundreds)] + " hundred"
|
157 |
+
if remainder > 0:
|
158 |
+
if remainder < 21:
|
159 |
+
result += " " + number_words[str(remainder)]
|
160 |
+
else:
|
161 |
+
tens = (remainder // 10) * 10
|
162 |
+
ones = remainder % 10
|
163 |
+
result += " " + number_words[str(tens)]
|
164 |
+
if ones > 0:
|
165 |
+
result += " " + number_words[str(ones)]
|
166 |
+
return result
|
167 |
+
|
168 |
+
# For larger numbers or edge cases, return original
|
169 |
+
return num_str
|
170 |
+
|
171 |
+
# Replace standalone digits/numbers with word equivalents
|
172 |
+
converted = re.sub(r'\b\d+\b', convert_number, text)
|
173 |
+
log(f"Number conversion: '{text}' → '{converted}'")
|
174 |
+
return converted
|
175 |
+
|
176 |
def log(msg):
|
177 |
print(f"[{datetime.now().strftime('%H:%M:%S')}] {msg}")
|
178 |
|
|
|
786 |
for word_info in segment["words"]:
|
787 |
if "start" in word_info and "end" in word_info and word_info["word"]:
|
788 |
original_word = word_info["word"].strip()
|
789 |
+
|
790 |
+
# Convert digits to words for better phoneme analysis
|
791 |
+
word_converted = convert_digits_to_words(original_word)
|
792 |
+
cleaned_word = clean_word_for_phonemes(word_converted)
|
793 |
|
794 |
# Only process words that have alphabetical content after cleaning
|
795 |
if cleaned_word:
|
796 |
words.append(word_info)
|
797 |
+
word_texts.append(word_converted) # Use converted form for display
|
798 |
word_texts_clean.append(cleaned_word) # Clean for processing
|
799 |
word_timings.append((word_info["start"], word_info["end"]))
|
800 |
|