arubenruben
commited on
Commit
•
9112af4
1
Parent(s):
2bbce4c
Update deploy_pipeline.py
Browse files- deploy_pipeline.py +10 -12
deploy_pipeline.py
CHANGED
@@ -14,7 +14,7 @@ class TokenizeAndAlignLabelsStep():
|
|
14 |
# Adapted From : https://huggingface.co/docs/transformers/tasks/token_classification
|
15 |
def tokenize_and_align_labels(self, examples, tokenizer):
|
16 |
|
17 |
-
tokenized_inputs = tokenizer(examples, padding='max_length', max_length=
|
18 |
|
19 |
# Map tokens to their respective word.
|
20 |
word_ids = tokenized_inputs.word_ids()
|
@@ -34,9 +34,7 @@ class TokenizeAndAlignLabelsStep():
|
|
34 |
|
35 |
previous_word_idx = word_idx
|
36 |
|
37 |
-
tokenized_inputs["tokens"] = examples
|
38 |
-
tokenized_inputs["ner_tags"] = []
|
39 |
-
tokenized_inputs["labels"] = []
|
40 |
tokenized_inputs["labels_mask"] = labels_mask
|
41 |
|
42 |
return tokenized_inputs
|
@@ -60,16 +58,16 @@ class BERT_CRF_Pipeline(Pipeline):
|
|
60 |
def _forward(self, tokenizer_results):
|
61 |
|
62 |
input_ids = torch.tensor(
|
63 |
-
tokenizer_results['input_ids'], dtype=torch.long).unsqueeze(0)
|
64 |
|
65 |
token_type_ids = torch.tensor(
|
66 |
-
tokenizer_results['token_type_ids'], dtype=torch.long).unsqueeze(0)
|
67 |
|
68 |
attention_mask = torch.tensor(
|
69 |
-
tokenizer_results['attention_mask'], dtype=torch.bool).unsqueeze(0)
|
70 |
|
71 |
labels_mask = torch.tensor(
|
72 |
-
tokenizer_results['labels_mask'], dtype=torch.bool).unsqueeze(0)
|
73 |
|
74 |
# input_ids, token_type_ids, attention_mask, labels, labels_mask
|
75 |
outputs = self.model(input_ids=input_ids, token_type_ids=token_type_ids,
|
@@ -87,12 +85,12 @@ class BERT_CRF_Pipeline(Pipeline):
|
|
87 |
|
88 |
def main():
|
89 |
|
90 |
-
PIPELINE_REGISTRY.register_pipeline("
|
91 |
pipeline_class=BERT_CRF_Pipeline,
|
92 |
pt_model=AutoModelForTokenClassification,
|
93 |
)
|
94 |
-
classifier = pipeline("
|
95 |
-
device=
|
96 |
out_path = os.path.join(sys.path[0], 'out', 'pipeline')
|
97 |
repo = Repository(
|
98 |
out_path, clone_from=f"arubenruben/PT-BERT-Large-CRF-HAREM-Selective", use_auth_token=True)
|
@@ -100,4 +98,4 @@ def main():
|
|
100 |
# repo.git_pull()
|
101 |
|
102 |
classifier.save_pretrained(out_path)
|
103 |
-
repo.push_to_hub()
|
|
|
14 |
# Adapted From : https://huggingface.co/docs/transformers/tasks/token_classification
|
15 |
def tokenize_and_align_labels(self, examples, tokenizer):
|
16 |
|
17 |
+
tokenized_inputs = tokenizer(examples, padding='max_length', truncation=True, max_length=128)
|
18 |
|
19 |
# Map tokens to their respective word.
|
20 |
word_ids = tokenized_inputs.word_ids()
|
|
|
34 |
|
35 |
previous_word_idx = word_idx
|
36 |
|
37 |
+
tokenized_inputs["tokens"] = examples
|
|
|
|
|
38 |
tokenized_inputs["labels_mask"] = labels_mask
|
39 |
|
40 |
return tokenized_inputs
|
|
|
58 |
def _forward(self, tokenizer_results):
|
59 |
|
60 |
input_ids = torch.tensor(
|
61 |
+
tokenizer_results['input_ids'], dtype=torch.long, device=torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")).unsqueeze(0)
|
62 |
|
63 |
token_type_ids = torch.tensor(
|
64 |
+
tokenizer_results['token_type_ids'], dtype=torch.long, device=torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")).unsqueeze(0)
|
65 |
|
66 |
attention_mask = torch.tensor(
|
67 |
+
tokenizer_results['attention_mask'], dtype=torch.bool, device=torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")).unsqueeze(0)
|
68 |
|
69 |
labels_mask = torch.tensor(
|
70 |
+
tokenizer_results['labels_mask'], dtype=torch.bool, device=torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")).unsqueeze(0)
|
71 |
|
72 |
# input_ids, token_type_ids, attention_mask, labels, labels_mask
|
73 |
outputs = self.model(input_ids=input_ids, token_type_ids=token_type_ids,
|
|
|
85 |
|
86 |
def main():
|
87 |
|
88 |
+
PIPELINE_REGISTRY.register_pipeline("PT-BERT-Large-CRF-HAREM-Selective-pipeline",
|
89 |
pipeline_class=BERT_CRF_Pipeline,
|
90 |
pt_model=AutoModelForTokenClassification,
|
91 |
)
|
92 |
+
classifier = pipeline("PT-BERT-Large-CRF-HAREM-Selective-pipeline", model="arubenruben/PT-BERT-Large-CRF-HAREM-Selective",
|
93 |
+
device=torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu"), trust_remote_code=True)
|
94 |
out_path = os.path.join(sys.path[0], 'out', 'pipeline')
|
95 |
repo = Repository(
|
96 |
out_path, clone_from=f"arubenruben/PT-BERT-Large-CRF-HAREM-Selective", use_auth_token=True)
|
|
|
98 |
# repo.git_pull()
|
99 |
|
100 |
classifier.save_pretrained(out_path)
|
101 |
+
repo.push_to_hub()
|