Uan Sholanbayev
commited on
Commit
•
8f18779
1
Parent(s):
0efa6b7
add custom handler
Browse files- handler.py +36 -0
- main_test.py +57 -0
- requirements.txt +4 -0
handler.py
ADDED
@@ -0,0 +1,36 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from typing import List, Dict, Any
|
2 |
+
import numpy as np
|
3 |
+
from transformers import BertTokenizer, BertModel
|
4 |
+
import torch
|
5 |
+
import pickle
|
6 |
+
|
7 |
+
|
8 |
+
def unpickle_obj(filepath):
|
9 |
+
with open(filepath, 'rb') as f_in:
|
10 |
+
data = pickle.load(f_in)
|
11 |
+
print(f"unpickled {filepath}")
|
12 |
+
return data
|
13 |
+
|
14 |
+
|
15 |
+
class EndpointHandler():
|
16 |
+
def __init__(self, path=""):
|
17 |
+
self.model = unpickle_obj(path)
|
18 |
+
self.tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
|
19 |
+
self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
|
20 |
+
self.bert = BertModel.from_pretrained('bert-base-uncased').to(self.device)
|
21 |
+
|
22 |
+
def get_embeddings(self, texts: List[str]):
|
23 |
+
inputs = self.tokenizer(texts, return_tensors='pt', truncation=True,
|
24 |
+
padding=True, max_length=512).to(self.device)
|
25 |
+
with torch.no_grad():
|
26 |
+
outputs = self.bert(**inputs)
|
27 |
+
return outputs.last_hidden_state.mean(dim=1).cpu().numpy()
|
28 |
+
|
29 |
+
def __call__(self, data: Dict[str, Any]) -> List[Dict[str, Any]]:
|
30 |
+
queries = data['queries']
|
31 |
+
texts = data['texts']
|
32 |
+
queries_vec = self.get_embeddings(queries)
|
33 |
+
texts_vec = self.get_embeddings(texts)
|
34 |
+
diff = (np.array(texts_vec)[:, np.newaxis] - np.array(queries_vec))\
|
35 |
+
.reshape(-1, len(queries_vec[0]))
|
36 |
+
return self.model.predict_proba(diff)
|
main_test.py
ADDED
@@ -0,0 +1,57 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from handler import EndpointHandler
|
2 |
+
|
3 |
+
|
4 |
+
summary = "The article discusses the principles and process of doing " \
|
5 |
+
"great work, emphasizing the importance of choosing the " \
|
6 |
+
"right field, developing a deep interest, and working hard." \
|
7 |
+
" It also highlights the significance of curiosity, delight," \
|
8 |
+
" and the desire to do something impressive. The author " \
|
9 |
+
"further discusses the challenges of figuring out what to" \
|
10 |
+
" work on, the dangers of procrastination, and the importance" \
|
11 |
+
" of consistency and exponential growth. The article " \
|
12 |
+
"concludes by highlighting the importance of intellectual " \
|
13 |
+
"honesty and avoiding affectation.\n\n• Choosing the right " \
|
14 |
+
"field\n • The author suggests that the work one chooses " \
|
15 |
+
"should be something they have a natural aptitude for, a deep" \
|
16 |
+
" interest in, and offers scope to do great work. The author" \
|
17 |
+
" also emphasizes the importance of working on one's own " \
|
18 |
+
"projects and following one's curiosity.\n\n• Figuring out what" \
|
19 |
+
" to work on\n • The author discusses the challenges of " \
|
20 |
+
"figuring out what to work on, especially when young and " \
|
21 |
+
"inexperienced. The author suggests trying lots of things, " \
|
22 |
+
"meeting lots of people, reading lots of books, and asking" \
|
23 |
+
" lots of questions to discover one's interests.\n\n• The " \
|
24 |
+
"dangers of procrastination\n • The author warns against " \
|
25 |
+
"procrastination, especially per-project procrastination, which" \
|
26 |
+
" can lead to significant delays in starting ambitious " \
|
27 |
+
"projects. The author suggests regularly asking oneself" \
|
28 |
+
" if they are working on what they most want to work on.\n\n•" \
|
29 |
+
" The importance of consistency and exponential growth\n • " \
|
30 |
+
"The author emphasizes the importance of consistency in work" \
|
31 |
+
" and the potential for exponential growth. The author " \
|
32 |
+
"suggests that work that compounds can lead to exponential " \
|
33 |
+
"growth, but warns that the early stages of exponential growth" \
|
34 |
+
" can feel flat and be undervalued.\n\n• Intellectual honesty " \
|
35 |
+
"and avoiding affectation\n • The author concludes by " \
|
36 |
+
"highlighting the importance of intellectual honesty and " \
|
37 |
+
"avoiding affectation. The author suggests that being earnest" \
|
38 |
+
" and intellectually honest can help one see new ideas and truths."
|
39 |
+
query = "What are the principles of doing great work according to the article?"
|
40 |
+
ir_query = "What are the best recipes for a vegan diet?"
|
41 |
+
semi_relevant_query = "Did Cristiano Ronaldo did a great work in Saudi league?"
|
42 |
+
|
43 |
+
|
44 |
+
# init handler
|
45 |
+
my_handler = EndpointHandler(path="./bert_lr.pkl")
|
46 |
+
|
47 |
+
# prepare sample payload
|
48 |
+
relevant_payload = {"queries": [query], "texts": [summary]}
|
49 |
+
irrelevant_payload = {"queries": semi_relevant_query, "texts": [summary]}
|
50 |
+
|
51 |
+
# test the handler
|
52 |
+
relevant_pred=my_handler(relevant_payload)
|
53 |
+
irrelevant_pred=my_handler(irrelevant_payload)
|
54 |
+
|
55 |
+
# show results
|
56 |
+
print("relevant_pred", relevant_pred)
|
57 |
+
print("irrelevant_pred", irrelevant_pred)
|
requirements.txt
ADDED
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
1 |
+
numpy==1.26.0
|
2 |
+
torch==2.1.0
|
3 |
+
transformers==4.34.0
|
4 |
+
scikit-learn==1.3.1
|