Ashlee Kupor
commited on
Commit
·
a2b60e7
1
Parent(s):
a8d332e
Update handler to take in and process vtt file
Browse files- handler.py +88 -8
- test_run_handler.py +1 -29
handler.py
CHANGED
@@ -1,8 +1,21 @@
|
|
1 |
from simpletransformers.classification import ClassificationModel, ClassificationArgs
|
2 |
from typing import Dict, List, Any
|
3 |
import pandas as pd
|
|
|
|
|
4 |
import torch
|
5 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
6 |
|
7 |
class EndpointHandler():
|
8 |
def __init__(self, path="."):
|
@@ -11,17 +24,84 @@ class EndpointHandler():
|
|
11 |
self.model = ClassificationModel(
|
12 |
"roberta", path, use_cuda=cuda_available
|
13 |
)
|
14 |
-
|
15 |
|
16 |
-
def
|
17 |
-
#
|
|
|
18 |
|
19 |
-
|
20 |
-
|
21 |
-
|
22 |
-
#utterances_list = []
|
23 |
-
#utterances_list.append(utterances)
|
24 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
25 |
predictions, raw_outputs = self.model.predict(utterances_list)
|
26 |
|
27 |
return predictions
|
|
|
|
1 |
from simpletransformers.classification import ClassificationModel, ClassificationArgs
|
2 |
from typing import Dict, List, Any
|
3 |
import pandas as pd
|
4 |
+
import webvtt
|
5 |
+
from datetime import datetime
|
6 |
import torch
|
7 |
|
8 |
+
class Utterance(object):
|
9 |
+
|
10 |
+
def __init__(self, starttime, endtime, speaker, text,
|
11 |
+
idx, prev_utterance, prev_prev_utterance):
|
12 |
+
self.starttime = starttime
|
13 |
+
self.endtime = endtime
|
14 |
+
self.speaker = speaker
|
15 |
+
self.text = text
|
16 |
+
self.idx = idx
|
17 |
+
self.prev = prev_utterance
|
18 |
+
self.prev_prev = prev_prev_utterance
|
19 |
|
20 |
class EndpointHandler():
|
21 |
def __init__(self, path="."):
|
|
|
24 |
self.model = ClassificationModel(
|
25 |
"roberta", path, use_cuda=cuda_available
|
26 |
)
|
|
|
27 |
|
28 |
+
def utterance_to_str(self, utterance: Utterance) -> str:
|
29 |
+
# eliciting only uses text
|
30 |
+
return utterance.text
|
31 |
|
32 |
+
def convert_time(self, time_str):
|
33 |
+
time = datetime.strptime(time_str, "%H:%M:%S.%f")
|
34 |
+
return 1000 * (3600 * time.hour + 60 * time.minute + time.second) + time.microsecond / 1000
|
|
|
|
|
35 |
|
36 |
+
def process_vtt_transcript(self, vttfile) -> List[Utterance]:
|
37 |
+
"""Process raw vtt file."""
|
38 |
+
|
39 |
+
utterances_list = []
|
40 |
+
text = ""
|
41 |
+
prev_speaker = None
|
42 |
+
prev_start = "00:00:00.000"
|
43 |
+
prev_end = "00:00:00.000"
|
44 |
+
idx = 0
|
45 |
+
prev_utterance = None
|
46 |
+
prev_prev_utterance = None
|
47 |
+
for caption in webvtt.read(vttfile):
|
48 |
+
|
49 |
+
# Get speaker
|
50 |
+
check_for_speaker = caption.text.split(":")
|
51 |
+
if len(check_for_speaker) > 1: # the speaker was changed or restated
|
52 |
+
speaker = check_for_speaker[0]
|
53 |
+
else:
|
54 |
+
speaker = prev_speaker
|
55 |
+
|
56 |
+
# Get utterance
|
57 |
+
new_text = check_for_speaker[1] if len(check_for_speaker) > 1 else check_for_speaker[0]
|
58 |
+
|
59 |
+
# If speaker was changed, start new batch
|
60 |
+
if (prev_speaker is not None) and (speaker != prev_speaker):
|
61 |
+
utterance = Utterance(starttime=self.convert_time(prev_start),
|
62 |
+
endtime=self.convert_time(prev_end),
|
63 |
+
speaker=prev_speaker,
|
64 |
+
text=text.strip(),
|
65 |
+
idx=idx,
|
66 |
+
prev_utterance=prev_utterance,
|
67 |
+
prev_prev_utterance=prev_prev_utterance)
|
68 |
+
|
69 |
+
utterances_list.append(utterance)
|
70 |
+
|
71 |
+
# Start new batch
|
72 |
+
prev_start = caption.start
|
73 |
+
text = ""
|
74 |
+
prev_prev_utterance = prev_utterance
|
75 |
+
prev_utterance = utterance
|
76 |
+
idx+=1
|
77 |
+
text += new_text + " "
|
78 |
+
prev_end = caption.end
|
79 |
+
prev_speaker = speaker
|
80 |
+
|
81 |
+
# Append last one
|
82 |
+
if prev_speaker is not None:
|
83 |
+
utterance = Utterance(starttime=self.convert_time(prev_start),
|
84 |
+
endtime=self.convert_time(prev_end),
|
85 |
+
speaker=prev_speaker,
|
86 |
+
text=text.strip(),
|
87 |
+
idx=idx,
|
88 |
+
prev_utterance=prev_utterance,
|
89 |
+
prev_prev_utterance=prev_prev_utterance)
|
90 |
+
utterances_list.append(utterance)
|
91 |
+
|
92 |
+
print(utterances_list)
|
93 |
+
return utterances_list
|
94 |
+
|
95 |
+
|
96 |
+
def __call__(self, data_file: str) -> List[Dict[str, Any]]:
|
97 |
+
''' data_file is a str pointing to filename of type .vtt '''
|
98 |
+
|
99 |
+
utterances_list = []
|
100 |
+
for utterance in self.process_vtt_transcript(data_file):
|
101 |
+
#TODO: filter out to only have SL utterances
|
102 |
+
utterances_list.append(self.utterance_to_str(utterance))
|
103 |
+
|
104 |
predictions, raw_outputs = self.model.predict(utterances_list)
|
105 |
|
106 |
return predictions
|
107 |
+
|
test_run_handler.py
CHANGED
@@ -4,35 +4,7 @@ from handler import EndpointHandler
|
|
4 |
my_handler = EndpointHandler(path=".")
|
5 |
|
6 |
# prepare sample payload
|
7 |
-
test_payload =
|
8 |
-
"We will start after about five minutes to get other students. Hello. Hello. Can you see the screen please?",
|
9 |
-
"Hello everyone. Welcome to the second section of our journey. I'm so excited and glad to see you today and thank you for coming. Last week we learned about KRL and this week we will learn about using Python. Today we will cover the proven point. We will start with the concepts. After that we will try to solve these two problems. Question and answer. Let's now talk about variables. What variables? A variable is a place to store information in a program. Basically our name location is to store data in a memory. It receives a new name with value so you can create a new variable by assigning a value. For example, x equals 10, the value can change with a new assignment like x equals 5. You can see the value using mathematical expression like x equals 5 plus 7. We will talk more about expressions in the next class. The variable name has properties so the variable names should be in a snack case. For example, name underscores students. The variable names are case sensitive.", "So hello here is not the same hello here. Capital, letter and small letter. The variable name has some rules. The variable name must be start with a letter or underscore. It contains only letter, digit or underscore. And it cannot be built in a command in Python. For example, we cannot use for or the variable name. It does not change throughout the code. Here we want to explain some type in Python. We will use int stands for integer. No decimal point. For example, x equals 10, y equals minus 2. We used float stands for real number value. For example, x equals 5. 0 or y equals minus 3. 7. We use string stands for text between single and double quotes. For example, x equals hello or y equals 10. Please note here the string y equals 10 is not the same y equals 10. There is a difference here. We use also bold stands for booriological value, true or false. For example, x equals true or y equals false.","If you ask why do we have int and float value, this slide will explain the difference. This is a question, how much do I weight? The answer can be a real value number. There is no next number, so this would be a float. But the question, how many children do I have? The answer is integer, so we will define next number. This would be an int. It's a clear or no?",
|
10 |
-
"To assign value to the variables, we use the other syntax, variable name equals value. For example, greeting equals hello, x equals minus 7. 6 or math formula 5. 7. Now let's go to solve this problem today, which are mass weight. The line weight on Mars is 37. 8% of the weight on Earth. We want to write a Python code that helps the user to enter their weight on Mars. Here is a sample run. This is a file name. The second two lines are a branded test. Enter weight on Earth. The user will type number. After that, the output will be like this, 45. 36. Now think about this point. Number one, what types of variable do we need? Write a code and a Python code. Let's solve this problem as a group. So just give me a moment. I think we are three, so let's do it together. No problem. Just give me a moment. To save the time here, a sample solution, I'll explain it.",
|
11 |
-
" And after that, I run this code on Python. Who's going to help me today? You can do it. I will help you. No one?",
|
12 |
-
" .. .. Okay, no problem. I will explain it for you. The first line here, we write the file name. After that, we use a constant because we don't want to change this number. This is a fixed number. After that, we define the main. We won't have this question. Enter a weight on Earth and we will store this value from user here. After that, we want to change the string into the float. So we will write value, float, and take this value from here. After that, we do our equation. So, Earth weight from here. And this is a fixed number, as I said. After that, we will bring this equation. Please give me a moment. I will show you the code. So here, the same code. I press run. Enter a weight on the Earth. I will have this number. Enter. So, as you see, you give me the value. Is it a clue? Do you have any question here?",
|
13 |
-
"Okay, let's move to the next slide. Now, let's go to the next problem today, which is 8-ball. The idea of 8-ball is you ask a question. Yes, no question. And it tells you the answer, random answer. Here is a sample run, the first line as a file name. After that, here, the user asks, asks, asks no question. For example, SQL Emerald. The question will be here, no, not a chance, which is a random selected. So, the same thing is here. Here is no question, random answer. And again, the same thing is here. It's clear for all of you. This is Sambara.",
|
14 |
-
"Okay, let's now go to solve this problem. We'll apply the same idea with numbers. Here are the steps of solving our problem. Step one, we will import the module, import random. After that, we write a function, which is random, randint, ab, because we want a random number of integers, value from a to b. So, the output will be, generates a random integer number between a and b. If you still don't understand the idea, don't worry. We'll solve it now, together. To save time here, the sample of our solution, I explained it, and again, after this, I'll run the code. I will give you about five minutes to read the code. After that, we'll discuss it. Thank you. Thank you. Do you finish now? Yes. Okay. Who's the one who helped me to explain this code? I can try.",
|
15 |
-
"Ok I'll explain the variables again",
|
16 |
-
"sys.exit for press enter to quit to finish the code",
|
17 |
-
"That's a try. The first line, you can start with the first line.",
|
18 |
-
"Excellent.",
|
19 |
-
"Excellent. Good job. Good answer. So I will run the code now. Can I have a question?",
|
20 |
-
"I don't understand the question, but I think you, you mean if I, if the user press enter, the program will finish.",
|
21 |
-
"So let's take a try now. So here, if I press enter, it's to make it exist.","So, you're welcome. Here I will run the question. How are you? No, my Is it clear for a for a pollute? If you have any question, please ask me. Is this clean?",
|
22 |
-
"What about now?",
|
23 |
-
"If you finish this, I send me to change the slide.",
|
24 |
-
"You're welcome. Okay. To sum up what we learned in our section today, we start our section by reviewing the concepts. After that, we solve together two problems, which are mass weight problem and eight ball problem. That's all for today. And if you have any question, please ask me. I think I see a question about variable. Do you want me to explain it?","Excellent question. This concept will be covering the letter and the class, but I will send you very useful resource for this for your question.",
|
25 |
-
"https://realpython.com/python-variables/",
|
26 |
-
"You're welcome. Please. If you have any question in any time, you can send me a rapid message on it. If you have more questions about anything, there is a tool by them, even if you don't discuss it. Do you have more questions now before we finish?",
|
27 |
-
"I'm not understand 100 best, but I think you see, you say if you want it, change the value from the main to the B.",
|
28 |
-
"Could you send me the code to see it to give you a full answer?",
|
29 |
-
"Could you please go to the lecture and just send me a picture? It's okay if you share me on a platform and I will give you the answer for all students on the thread.",
|
30 |
-
"Because I want to see the code to give you a full answer.","Okay, thank you so much.",
|
31 |
-
"Anyone have another question? Feel free to ask.",
|
32 |
-
"I will show you this is a very good and smart environment, which called Google Colab. It's very nice and I can share the information with my friends. I can import the old libraries without need to, to install up python or any libraries, or my device. I can choose the link. https://colab.research.google.com/",
|
33 |
-
"You're welcome. Any other question? Okay, thank you so much for coming today. It was nice to meet you and again, please, if you have any questions, send me a private message. You're welcome.",
|
34 |
-
"Goodbye. Goodbye. Have a good day. Thank you. Thank you."]
|
35 |
-
|
36 |
|
37 |
# test the handler
|
38 |
test_pred=my_handler(test_payload)
|
|
|
4 |
my_handler = EndpointHandler(path=".")
|
5 |
|
6 |
# prepare sample payload
|
7 |
+
test_payload = 'test.transcript.vtt'
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
8 |
|
9 |
# test the handler
|
10 |
test_pred=my_handler(test_payload)
|