Ashlee Kupor
commited on
Commit
·
a1cf18b
1
Parent(s):
c0139e5
Add truncate end of prior text
Browse files- handler.py +12 -3
handler.py
CHANGED
@@ -31,10 +31,10 @@ class EndpointHandler():
|
|
31 |
)
|
32 |
|
33 |
def utterance_to_str(self, utterance: Utterance) -> (List[str], str):
|
34 |
-
#
|
35 |
|
36 |
doc = nlp(utterance.text)
|
37 |
-
prior_text = self.get_prior_text(utterance)
|
38 |
|
39 |
if len(doc) > token_limit:
|
40 |
utterance_text_list = self.handle_long_utterances(doc)
|
@@ -46,6 +46,15 @@ class EndpointHandler():
|
|
46 |
else:
|
47 |
return [prior_text, utterance.text], 'single'
|
48 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
49 |
def format_speaker(self, speaker: str, source: str) -> str:
|
50 |
prior_text = ''
|
51 |
if speaker == 'student':
|
@@ -165,7 +174,7 @@ class EndpointHandler():
|
|
165 |
utterances_list.extend(utterance_str)
|
166 |
else:
|
167 |
utterances_list.append(utterance_str)
|
168 |
-
|
169 |
predictions, raw_outputs = self.model.predict(utterances_list)
|
170 |
|
171 |
return predictions
|
|
|
31 |
)
|
32 |
|
33 |
def utterance_to_str(self, utterance: Utterance) -> (List[str], str):
|
34 |
+
#probing using prior text and truncates end of the prior text
|
35 |
|
36 |
doc = nlp(utterance.text)
|
37 |
+
prior_text = self.truncate_end(self.get_prior_text(utterance))
|
38 |
|
39 |
if len(doc) > token_limit:
|
40 |
utterance_text_list = self.handle_long_utterances(doc)
|
|
|
46 |
else:
|
47 |
return [prior_text, utterance.text], 'single'
|
48 |
|
49 |
+
def truncate_end(self, prior_text: str) -> str:
|
50 |
+
max_seq_length = 512
|
51 |
+
prior_text_max_length = int(max_seq_length / 2) #divide by 2 because 2 columns
|
52 |
+
|
53 |
+
if len(prior_text) > prior_text_max_length:
|
54 |
+
starting_index = len(prior_text) - prior_text_max_length
|
55 |
+
return prior_text[starting_index:]
|
56 |
+
return prior_text
|
57 |
+
|
58 |
def format_speaker(self, speaker: str, source: str) -> str:
|
59 |
prior_text = ''
|
60 |
if speaker == 'student':
|
|
|
174 |
utterances_list.extend(utterance_str)
|
175 |
else:
|
176 |
utterances_list.append(utterance_str)
|
177 |
+
|
178 |
predictions, raw_outputs = self.model.predict(utterances_list)
|
179 |
|
180 |
return predictions
|