Spaces:

ginigen
/

Sign-language

Paused

App Files Files Community

ginipick commited on Jan 26

Commit

aa40f07

verified ·

1 Parent(s): 0b28249

Update src/main.py

Browse files

Files changed (1) hide show

src/main.py +42 -49

src/main.py CHANGED Viewed

@@ -17,59 +17,52 @@ app.config['TITLE'] = 'Sign Language Translate'
 nlp, dict_docs_spacy = sp.load_spacy_values()
 dataset, list_2000_tokens = dg.load_data()
 def find_quoted_words(text):
     """작은따옴표로 묶인 단어들을 찾는 함수"""
-    # 따옴표가 없는 단어에 따옴표 추가 (예: 한국'을 '한국'으로)
-    text = re.sub(r"(\w+)'", r"'\1'", text)
     return re.findall(r"'([^']*)'", text)
 def spell_out_word(word):
     """단어를 개별 알파벳으로 분리하는 함수"""
     return ' '.join(list(word.lower()))
-def translate_quoted_word(word):
-    """따옴표 안의 단어를 개별적으로 번역"""
-    try:
-        url = "https://translate.googleapis.com/translate_a/single"
-        params = {
-            "client": "gtx",
-            "sl": "ko",
-            "tl": "en",
-            "dt": "t",
-            "q": word
-        }
-        response = requests.get(url, params=params)
-        if response.status_code == 200:
-            translated = response.json()[0][0][0].upper()
-            return translated
-        return word
-    except Exception as e:
-        print(f"Word translation error: {e}")
-        return word
-def normalize_quotes(text):
-    """따옴표 형식을 정규화하는 함수"""
-    # 따옴표가 없는 단어 끝에 붙은 따옴표 처리
-    text = re.sub(r"(\w+)'", r"'\1'", text)
-    # 한쪽 따옴표만 있는 경우 처리
-    text = re.sub(r"'(\w+)(?!')", r"'\1'", text)
-    text = re.sub(r"(?<!')(\w+)'", r"'\1'", text)
-    return text
-def normalize_quotes(text):
-    """따옴표 형식을 정규화하는 함수"""
-    # 따옴표가 없는 단어 끝에 붙은 따옴표 처리
-    text = re.sub(r"(\w+)'", r"'\1'", text)
-    # 한쪽 따옴표만 있는 경우 처리
-    text = re.sub(r"'(\w+)(?!')", r"'\1'", text)
-    text = re.sub(r"(?<!')(\w+)'", r"'\1'", text)
-    return text
 def is_english(text):
     """텍스트가 영어인지 확인하는 함수"""
-    # 영어 알파벳과 기본적인 문장부호만 포함되어 있는지 확인
     english_pattern = re.compile(r'^[A-Za-z\s\'".,!?-]+$')
     return bool(english_pattern.match(text.replace("'", "")))
@@ -89,6 +82,8 @@ def translate_korean_to_english(text):
         # 따옴표 안의 단어들 먼저 번역
         for word in quoted_words:
             url = "https://translate.googleapis.com/translate_a/single"
             params = {
                 "client": "gtx",
@@ -128,6 +123,10 @@ def translate_korean_to_english(text):
         print(f"Translation error: {e}")
         return text
 @app.route('/translate/', methods=['POST'])
 def result():
     if request.method == 'POST':
@@ -242,12 +241,6 @@ def generate_complete_video(gloss_list, dataset, list_2000_tokens):
         print(f"Error generating video: {str(e)}")
         raise
-@app.route('/')
-def index():
-    return render_template('index.html', title=app.config['TITLE'])
 @app.route('/video_feed')
 def video_feed():
     sentence = request.args.get('gloss_sentence_to_display', '')

 nlp, dict_docs_spacy = sp.load_spacy_values()
 dataset, list_2000_tokens = dg.load_data()
+def clean_quotes(text):
+    """따옴표 정리 함수"""
+    # 연속된 따옴표 제거
+    text = re.sub(r"'+", "'", text)
+    # 단어 중간의 따옴표 제거
+    text = re.sub(r"(\w)'(\w)", r"\1\2", text)
+    return text
+def normalize_quotes(text):
+    """따옴표 형식을 정규화하는 함수"""
+    # 먼저 모든 따옴표를 정리
+    text = clean_quotes(text)
+    # 한글 또는 영어 단어를 찾아서 처리
+    pattern = r'([가-힣A-Za-z]+)'
+    def process_match(match):
+        word = match.group(1)
+        # 이미 따옴표로 둘러싸인 경우는 처리하지 않음
+        if not re.match(r"'.*'", word):
+            return f"'{word}'"
+        return word
+    # 단어 단위로 처리
+    words = text.split()
+    processed_words = []
+    for word in words:
+        if re.search(pattern, word):
+            # 이미 따옴표가 있는 경우는 그대로 두고, 없는 경우만 추가
+            if not word.startswith("'") and not word.endswith("'"):
+                word = f"'{word}'"
+        processed_words.append(word)
+    return ' '.join(processed_words)
 def find_quoted_words(text):
     """작은따옴표로 묶인 단어들을 찾는 함수"""
     return re.findall(r"'([^']*)'", text)
 def spell_out_word(word):
     """단어를 개별 알파벳으로 분리하는 함수"""
     return ' '.join(list(word.lower()))
 def is_english(text):
     """텍스트가 영어인지 확인하는 함수"""
     english_pattern = re.compile(r'^[A-Za-z\s\'".,!?-]+$')
     return bool(english_pattern.match(text.replace("'", "")))
         # 따옴표 안의 단어들 먼저 번역
         for word in quoted_words:
+            if not word.strip():  # 빈 문자열 건너뛰기
+                continue
             url = "https://translate.googleapis.com/translate_a/single"
             params = {
                 "client": "gtx",
         print(f"Translation error: {e}")
         return text
+@app.route('/')
+def index():
+    return render_template('index.html', title=app.config['TITLE'])
 @app.route('/translate/', methods=['POST'])
 def result():
     if request.method == 'POST':
         print(f"Error generating video: {str(e)}")
         raise
 @app.route('/video_feed')
 def video_feed():
     sentence = request.args.get('gloss_sentence_to_display', '')