File size: 1,995 Bytes
07d13f3
38cc873
 
 
 
583b2ec
38cc873
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f09449e
38cc873
 
 
 
 
 
 
 
 
5571f23
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
import torch
from utca.core import RenameAttribute,Flush
from utca.implementation.predictors import TokenSearcherPredictor, TokenSearcherPredictorConfig
from utca.implementation.tasks import TokenSearcherNER, TokenSearcherNERPostprocessor
from utca.implementation.predictors.token_searcher.token_searcher_pipeline import TokenClassificationPipeline
from transformers import AutoTokenizer, AutoModelForTokenClassification, pipeline

predictor = TokenSearcherPredictor(
    TokenSearcherPredictorConfig(
        model_name = "knowledgator/UTC-DeBERTa-large-v2",
        device="cpu"
    )
)

def generate_pipeline(threshold: float = 0.5):
    ner_task = TokenSearcherNER(
        predictor=predictor,
        postprocess=TokenSearcherNERPostprocessor(
            threshold=threshold
        )
    )
    pipeline = (        
        ner_task
        | Flush(keys=["labels"])
        | RenameAttribute("output", "entities")
    )

    return pipeline

tokenizer = AutoTokenizer.from_pretrained("knowledgator/UTC-DeBERTa-large-v2")
model = AutoModelForTokenClassification.from_pretrained("knowledgator/UTC-DeBERTa-large-v2")
transformers_pipeline  = TokenClassificationPipeline(model=model, tokenizer=tokenizer, aggregation_strategy = 'first')

if __name__=="__main__":
    pipeline = generate_pipeline()
    res = pipeline.run({
        "text": """Dr. Paul Hammond, a renowned neurologist at Johns Hopkins University, has recently published a paper in the prestigious journal "Nature Neuroscience". 
    His research focuses on a rare genetic mutation, found in less than 0.01% of the population, that appears to prevent the development of Alzheimer's disease. Collaborating with researchers at the University of California, San Francisco, the team is now working to understand the mechanism by which this mutation confers its protective effect. 
    Funded by the National Institutes of Health, their research could potentially open new avenues for Alzheimer's treatment."""
    })

    print(res)