aekupor
/

probing

Ashlee Kupor commited on Apr 19, 2023

Commit

a1cf18b

1 Parent(s): c0139e5

Add truncate end of prior text

Files changed (1) hide show

handler.py CHANGED Viewed

@@ -31,10 +31,10 @@ class EndpointHandler():
         )
     def utterance_to_str(self, utterance: Utterance) -> (List[str], str):
-        #TODO: FOR PROBING -  truncate end
         doc = nlp(utterance.text)
-        prior_text = self.get_prior_text(utterance)
         if len(doc) > token_limit:
             utterance_text_list = self.handle_long_utterances(doc)
@@ -46,6 +46,15 @@ class EndpointHandler():
         else:
             return [prior_text, utterance.text], 'single'
     def format_speaker(self, speaker: str, source: str) -> str:
         prior_text = ''
         if speaker == 'student':
@@ -165,7 +174,7 @@ class EndpointHandler():
                 utterances_list.extend(utterance_str)
             else:
                 utterances_list.append(utterance_str)
         predictions, raw_outputs = self.model.predict(utterances_list)
         return predictions

         )
     def utterance_to_str(self, utterance: Utterance) -> (List[str], str):
+        #probing using prior text and truncates end of the prior text
         doc = nlp(utterance.text)
+        prior_text = self.truncate_end(self.get_prior_text(utterance))
         if len(doc) > token_limit:
             utterance_text_list = self.handle_long_utterances(doc)
         else:
             return [prior_text, utterance.text], 'single'
+    def truncate_end(self, prior_text: str) -> str:
+        max_seq_length = 512
+        prior_text_max_length = int(max_seq_length / 2) #divide by 2 because 2 columns
+        if len(prior_text) > prior_text_max_length:
+            starting_index = len(prior_text) - prior_text_max_length
+            return prior_text[starting_index:]
+        return prior_text
     def format_speaker(self, speaker: str, source: str) -> str:
         prior_text = ''
         if speaker == 'student':
                 utterances_list.extend(utterance_str)
             else:
                 utterances_list.append(utterance_str)
         predictions, raw_outputs = self.model.predict(utterances_list)
         return predictions