Spaces:
Paused
Paused
Update src/ASL_gloss_functions.py
Browse files- src/ASL_gloss_functions.py +13 -8
src/ASL_gloss_functions.py
CHANGED
@@ -1,8 +1,8 @@
|
|
1 |
# Define a list of question adverbs
|
2 |
opened_question_adverbs = ["how", "when", "where", "why", "how much", "how many", "how often", "how long", "what", "which", "who", "whose", "whom"]
|
3 |
|
4 |
-
|
5 |
-
time_words = ["yesterday", "today", "tomorrow"]
|
6 |
|
7 |
# ASL glossing rules implemented in functions
|
8 |
def gloss_word(word):
|
@@ -36,14 +36,13 @@ def add_time_indicator(gloss_sentence_):
|
|
36 |
return f"{word.text.upper()} {gloss_sentence_.replace(word.text.upper(), '').strip()}"
|
37 |
return gloss_sentence_
|
38 |
|
39 |
-
|
40 |
def skip_stop_words(word):
|
41 |
if word.lower() == 'the' or word.lower() == 'a':
|
42 |
return ''
|
43 |
else:
|
44 |
return word
|
45 |
|
46 |
-
## doc est une liste de tokens
|
47 |
def question_type(doc):
|
48 |
try:
|
49 |
if doc[-1].text == '?':
|
@@ -52,11 +51,9 @@ def question_type(doc):
|
|
52 |
else:
|
53 |
return "yes-no-question"
|
54 |
return None
|
55 |
-
|
56 |
except IndexError:
|
57 |
return None
|
58 |
|
59 |
-
# 수정된 process_sentence 함수
|
60 |
def process_sentence(doc):
|
61 |
nms = {
|
62 |
"wh-question": "wh-q",
|
@@ -66,12 +63,19 @@ def process_sentence(doc):
|
|
66 |
"car": "CL:3",
|
67 |
"person": "CL:1"
|
68 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
69 |
|
70 |
glossed_sentence = []
|
71 |
for token in doc:
|
72 |
word = token.lemma_.lower()
|
73 |
|
74 |
-
# 고유명사 처리 추가
|
75 |
if token.pos_ == "PROPN":
|
76 |
glossed_word = token.text.upper()
|
77 |
elif word in ["i", "me"]:
|
@@ -80,13 +84,14 @@ def process_sentence(doc):
|
|
80 |
glossed_word = handle_indexing("YOU", 2)
|
81 |
elif word in classifiers:
|
82 |
glossed_word = classifiers[word]
|
|
|
|
|
83 |
else:
|
84 |
glossed_word = gloss_word(word)
|
85 |
|
86 |
glossed_word = skip_stop_words(glossed_word)
|
87 |
glossed_sentence.append(glossed_word)
|
88 |
|
89 |
-
# Move time words to beginning
|
90 |
for gloss in glossed_sentence:
|
91 |
if gloss.lower() in time_words:
|
92 |
glossed_sentence.insert(0, glossed_sentence.pop(glossed_sentence.index(gloss)))
|
|
|
1 |
# Define a list of question adverbs
|
2 |
opened_question_adverbs = ["how", "when", "where", "why", "how much", "how many", "how often", "how long", "what", "which", "who", "whose", "whom"]
|
3 |
|
4 |
+
# time adverbs to be moved at the beginning of ASL Gloss sentences
|
5 |
+
time_words = ["yesterday", "today", "tomorrow", "now", "before", "after", "morning", "afternoon", "evening", "night"]
|
6 |
|
7 |
# ASL glossing rules implemented in functions
|
8 |
def gloss_word(word):
|
|
|
36 |
return f"{word.text.upper()} {gloss_sentence_.replace(word.text.upper(), '').strip()}"
|
37 |
return gloss_sentence_
|
38 |
|
39 |
+
# skip stop_words
|
40 |
def skip_stop_words(word):
|
41 |
if word.lower() == 'the' or word.lower() == 'a':
|
42 |
return ''
|
43 |
else:
|
44 |
return word
|
45 |
|
|
|
46 |
def question_type(doc):
|
47 |
try:
|
48 |
if doc[-1].text == '?':
|
|
|
51 |
else:
|
52 |
return "yes-no-question"
|
53 |
return None
|
|
|
54 |
except IndexError:
|
55 |
return None
|
56 |
|
|
|
57 |
def process_sentence(doc):
|
58 |
nms = {
|
59 |
"wh-question": "wh-q",
|
|
|
63 |
"car": "CL:3",
|
64 |
"person": "CL:1"
|
65 |
}
|
66 |
+
basic_verbs = {
|
67 |
+
"is": "BE",
|
68 |
+
"am": "BE",
|
69 |
+
"are": "BE",
|
70 |
+
"was": "BE",
|
71 |
+
"were": "BE",
|
72 |
+
"be": "BE"
|
73 |
+
}
|
74 |
|
75 |
glossed_sentence = []
|
76 |
for token in doc:
|
77 |
word = token.lemma_.lower()
|
78 |
|
|
|
79 |
if token.pos_ == "PROPN":
|
80 |
glossed_word = token.text.upper()
|
81 |
elif word in ["i", "me"]:
|
|
|
84 |
glossed_word = handle_indexing("YOU", 2)
|
85 |
elif word in classifiers:
|
86 |
glossed_word = classifiers[word]
|
87 |
+
elif word in basic_verbs:
|
88 |
+
glossed_word = basic_verbs[word]
|
89 |
else:
|
90 |
glossed_word = gloss_word(word)
|
91 |
|
92 |
glossed_word = skip_stop_words(glossed_word)
|
93 |
glossed_sentence.append(glossed_word)
|
94 |
|
|
|
95 |
for gloss in glossed_sentence:
|
96 |
if gloss.lower() in time_words:
|
97 |
glossed_sentence.insert(0, glossed_sentence.pop(glossed_sentence.index(gloss)))
|