File size: 661 Bytes
4b21134
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4c24ae0
4b21134
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
import torch
from transformers import T5Tokenizer


def abstractive_summarizer(tokenizer, model, text):
    device = torch.device("cpu")
    preprocess_text = text.strip().replace("\n", "")
    t5_prepared_text = "summarize: " + preprocess_text
    tokenized_text = tokenizer.encode(t5_prepared_text, return_tensors="pt").to(device)

    # summmarize
    summary_ids = model.generate(
        tokenized_text,
        num_beams=4,
        no_repeat_ngram_size=2,
        min_length=30,
        max_length=300,
        early_stopping=True,
    )
    abs_summarized_text = tokenizer.decode(summary_ids[0], skip_special_tokens=True)

    return abs_summarized_text