Spaces:
Paused
Paused
Update src/main.py
Browse files- src/main.py +34 -39
src/main.py
CHANGED
@@ -6,8 +6,7 @@ import io
|
|
6 |
import cv2
|
7 |
import numpy as np
|
8 |
import os
|
9 |
-
import
|
10 |
-
import asyncio
|
11 |
from concurrent.futures import ThreadPoolExecutor
|
12 |
from urllib.parse import quote, unquote
|
13 |
import tempfile
|
@@ -70,8 +69,8 @@ def spell_out_word(word: str) -> str:
|
|
70 |
"""단어를 개별 알파벳으로 분리하는 함수"""
|
71 |
return ' '.join(list(word.lower()))
|
72 |
|
73 |
-
|
74 |
-
"""
|
75 |
try:
|
76 |
url = "https://translate.googleapis.com/translate_a/single"
|
77 |
params = {
|
@@ -82,20 +81,20 @@ async def translate_text_chunk(session: aiohttp.ClientSession, text: str, source
|
|
82 |
"q": text
|
83 |
}
|
84 |
|
85 |
-
|
86 |
-
|
87 |
-
|
88 |
-
|
89 |
-
|
90 |
-
|
91 |
-
|
92 |
|
93 |
except Exception as e:
|
94 |
logger.error(f"Translation error: {e}")
|
95 |
return text
|
96 |
|
97 |
-
|
98 |
-
"""한글 전용 번역 함수
|
99 |
try:
|
100 |
quoted_match = re.search(r"'([^']*)'", text)
|
101 |
if not quoted_match:
|
@@ -103,29 +102,28 @@ async def translate_korean_text(text: str) -> str:
|
|
103 |
|
104 |
quoted_word = quoted_match.group(1)
|
105 |
|
106 |
-
|
107 |
-
|
108 |
-
|
109 |
-
|
110 |
-
|
111 |
-
|
112 |
-
|
113 |
-
|
114 |
-
|
115 |
-
proper_noun = (await translate_text_chunk(session, quoted_word, "ko", "en")).upper()
|
116 |
|
117 |
-
|
118 |
-
|
119 |
-
|
120 |
-
|
121 |
-
|
122 |
|
123 |
except Exception as e:
|
124 |
logger.error(f"Korean translation error: {e}")
|
125 |
return text
|
126 |
|
127 |
-
|
128 |
-
"""전체 텍스트 번역 함수
|
129 |
try:
|
130 |
text = normalize_quotes(text)
|
131 |
|
@@ -137,7 +135,7 @@ async def translate_korean_to_english(text: str) -> str:
|
|
137 |
return text
|
138 |
|
139 |
if is_korean(text):
|
140 |
-
return
|
141 |
|
142 |
return text
|
143 |
except Exception as e:
|
@@ -214,7 +212,7 @@ def index():
|
|
214 |
return render_template('index.html', title=app.config['TITLE'])
|
215 |
|
216 |
@app.route('/translate/', methods=['POST'])
|
217 |
-
|
218 |
if request.method == 'POST':
|
219 |
input_text = request.form['inputSentence'].strip()
|
220 |
if not input_text:
|
@@ -222,7 +220,7 @@ async def result():
|
|
222 |
|
223 |
try:
|
224 |
input_text = normalize_quotes(input_text)
|
225 |
-
english_text =
|
226 |
if not english_text:
|
227 |
raise Exception("Translation failed")
|
228 |
|
@@ -234,9 +232,7 @@ async def result():
|
|
234 |
eng_to_asl_translator = NlpSpacyBaseTranslator(sentence=clean_english)
|
235 |
return eng_to_asl_translator.translate_to_gloss()
|
236 |
|
237 |
-
generated_gloss =
|
238 |
-
executor, process_nlp
|
239 |
-
)
|
240 |
|
241 |
# Gloss 처리 최적화
|
242 |
processed_gloss = []
|
@@ -269,14 +265,13 @@ async def result():
|
|
269 |
word = processed_gloss[i]
|
270 |
# 동의어 찾기를 스레드 풀에서 실행
|
271 |
final_gloss.append(
|
272 |
-
|
273 |
-
executor,
|
274 |
sp.find_synonyms,
|
275 |
word,
|
276 |
nlp,
|
277 |
dict_docs_spacy,
|
278 |
list_2000_tokens
|
279 |
-
)
|
280 |
)
|
281 |
i += 1
|
282 |
|
|
|
6 |
import cv2
|
7 |
import numpy as np
|
8 |
import os
|
9 |
+
import requests
|
|
|
10 |
from concurrent.futures import ThreadPoolExecutor
|
11 |
from urllib.parse import quote, unquote
|
12 |
import tempfile
|
|
|
69 |
"""단어를 개별 알파벳으로 분리하는 함수"""
|
70 |
return ' '.join(list(word.lower()))
|
71 |
|
72 |
+
def translate_text_chunk(text: str, source_lang: str, target_lang: str) -> str:
|
73 |
+
"""텍스트 번역 함수"""
|
74 |
try:
|
75 |
url = "https://translate.googleapis.com/translate_a/single"
|
76 |
params = {
|
|
|
81 |
"q": text
|
82 |
}
|
83 |
|
84 |
+
response = requests.get(url, params=params)
|
85 |
+
if response.status_code != 200:
|
86 |
+
logger.error(f"Translation API error: {response.status_code}")
|
87 |
+
return text
|
88 |
+
|
89 |
+
data = response.json()
|
90 |
+
return ' '.join(item[0] for item in data[0] if item[0])
|
91 |
|
92 |
except Exception as e:
|
93 |
logger.error(f"Translation error: {e}")
|
94 |
return text
|
95 |
|
96 |
+
def translate_korean_text(text: str) -> str:
|
97 |
+
"""한글 전용 번역 함수"""
|
98 |
try:
|
99 |
quoted_match = re.search(r"'([^']*)'", text)
|
100 |
if not quoted_match:
|
|
|
102 |
|
103 |
quoted_word = quoted_match.group(1)
|
104 |
|
105 |
+
# 본문 번역
|
106 |
+
main_text = text.replace(f"'{quoted_word}'", "XXXXX")
|
107 |
+
translated_main = translate_text_chunk(main_text, "ko", "en")
|
108 |
+
|
109 |
+
# 인용된 단어 처리
|
110 |
+
if re.match(r'^[A-Za-z]+$', quoted_word):
|
111 |
+
proper_noun = quoted_word.upper()
|
112 |
+
else:
|
113 |
+
proper_noun = translate_text_chunk(quoted_word, "ko", "en").upper()
|
|
|
114 |
|
115 |
+
final_text = translated_main.replace("XXXXX", f"'{proper_noun}'")
|
116 |
+
final_text = re.sub(r'\bNAME\b', 'name', final_text)
|
117 |
+
final_text = final_text.replace(" .", ".")
|
118 |
+
|
119 |
+
return final_text
|
120 |
|
121 |
except Exception as e:
|
122 |
logger.error(f"Korean translation error: {e}")
|
123 |
return text
|
124 |
|
125 |
+
def translate_korean_to_english(text: str) -> str:
|
126 |
+
"""전체 텍스트 번역 함수"""
|
127 |
try:
|
128 |
text = normalize_quotes(text)
|
129 |
|
|
|
135 |
return text
|
136 |
|
137 |
if is_korean(text):
|
138 |
+
return translate_korean_text(text)
|
139 |
|
140 |
return text
|
141 |
except Exception as e:
|
|
|
212 |
return render_template('index.html', title=app.config['TITLE'])
|
213 |
|
214 |
@app.route('/translate/', methods=['POST'])
|
215 |
+
def result():
|
216 |
if request.method == 'POST':
|
217 |
input_text = request.form['inputSentence'].strip()
|
218 |
if not input_text:
|
|
|
220 |
|
221 |
try:
|
222 |
input_text = normalize_quotes(input_text)
|
223 |
+
english_text = translate_korean_to_english(input_text)
|
224 |
if not english_text:
|
225 |
raise Exception("Translation failed")
|
226 |
|
|
|
232 |
eng_to_asl_translator = NlpSpacyBaseTranslator(sentence=clean_english)
|
233 |
return eng_to_asl_translator.translate_to_gloss()
|
234 |
|
235 |
+
generated_gloss = executor.submit(process_nlp).result()
|
|
|
|
|
236 |
|
237 |
# Gloss 처리 최적화
|
238 |
processed_gloss = []
|
|
|
265 |
word = processed_gloss[i]
|
266 |
# 동의어 찾기를 스레드 풀에서 실행
|
267 |
final_gloss.append(
|
268 |
+
executor.submit(
|
|
|
269 |
sp.find_synonyms,
|
270 |
word,
|
271 |
nlp,
|
272 |
dict_docs_spacy,
|
273 |
list_2000_tokens
|
274 |
+
).result()
|
275 |
)
|
276 |
i += 1
|
277 |
|