Ashlee Kupor commited on
Commit
a1cf18b
·
1 Parent(s): c0139e5

Add truncate end of prior text

Browse files
Files changed (1) hide show
  1. handler.py +12 -3
handler.py CHANGED
@@ -31,10 +31,10 @@ class EndpointHandler():
31
  )
32
 
33
  def utterance_to_str(self, utterance: Utterance) -> (List[str], str):
34
- #TODO: FOR PROBING - truncate end
35
 
36
  doc = nlp(utterance.text)
37
- prior_text = self.get_prior_text(utterance)
38
 
39
  if len(doc) > token_limit:
40
  utterance_text_list = self.handle_long_utterances(doc)
@@ -46,6 +46,15 @@ class EndpointHandler():
46
  else:
47
  return [prior_text, utterance.text], 'single'
48
 
 
 
 
 
 
 
 
 
 
49
  def format_speaker(self, speaker: str, source: str) -> str:
50
  prior_text = ''
51
  if speaker == 'student':
@@ -165,7 +174,7 @@ class EndpointHandler():
165
  utterances_list.extend(utterance_str)
166
  else:
167
  utterances_list.append(utterance_str)
168
-
169
  predictions, raw_outputs = self.model.predict(utterances_list)
170
 
171
  return predictions
 
31
  )
32
 
33
  def utterance_to_str(self, utterance: Utterance) -> (List[str], str):
34
+ #probing using prior text and truncates end of the prior text
35
 
36
  doc = nlp(utterance.text)
37
+ prior_text = self.truncate_end(self.get_prior_text(utterance))
38
 
39
  if len(doc) > token_limit:
40
  utterance_text_list = self.handle_long_utterances(doc)
 
46
  else:
47
  return [prior_text, utterance.text], 'single'
48
 
49
+ def truncate_end(self, prior_text: str) -> str:
50
+ max_seq_length = 512
51
+ prior_text_max_length = int(max_seq_length / 2) #divide by 2 because 2 columns
52
+
53
+ if len(prior_text) > prior_text_max_length:
54
+ starting_index = len(prior_text) - prior_text_max_length
55
+ return prior_text[starting_index:]
56
+ return prior_text
57
+
58
  def format_speaker(self, speaker: str, source: str) -> str:
59
  prior_text = ''
60
  if speaker == 'student':
 
174
  utterances_list.extend(utterance_str)
175
  else:
176
  utterances_list.append(utterance_str)
177
+
178
  predictions, raw_outputs = self.model.predict(utterances_list)
179
 
180
  return predictions