Example Usage

import torch
import jieba
import numpy as np
from classifier import BertForMaskClassification
from transformers import AutoTokenizer, AutoConfig, BertForTokenClassification

label_list = ["O","COMMA","PERIOD","COLON"]

label2punct = {
    "COMMA": ",",
    "PERIOD": "。",
    "COLON":":",
}

model_name_or_path = "pmp-h256"

tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
model = BertForMaskClassification.from_pretrained(model_name_or_path)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

def punct(text):

    tokenize_words = jieba.lcut(''.join(text))
    mask_tokens = []
    for word in tokenize_words:
        mask_tokens.extend(word)
        mask_tokens.append("[MASK]")
    tokenized_inputs = tokenizer(mask_tokens,is_split_into_words=True, return_tensors="pt")
    with torch.no_grad():   
        logits = model(**tokenized_inputs).logits
    predictions = logits.argmax(-1).tolist()
    predictions = predictions[0]
    tokens = tokenizer.convert_ids_to_tokens(tokenized_inputs["input_ids"][0])

    result =[]
    print(tokens)
    print(predictions)
    for token, prediction in zip(tokens, predictions):
        if token =="[CLS]" or token =="[SEP]":
            continue
        if token == "[MASK]":
            label = label_list[prediction]
            if label != "O":
                punct = label2punct[label]
                result.append(punct)
        else:
            result.append(token)

    return "".join(result)

text = 'θ‚ζ΅ŠιŸ³η•Œζ­£εΈΈθ‚δΈŠη•Œδ½δΊŽι”ιͺ¨δΈ­ηΊΏη¬¬δΊ”θ‚‹ι—΄η§»εŠ¨ζ΅ŠιŸ³ι˜΄ζ€§θ‚ΎεŒΊζ— ε©η—›'
print(punct(text))

# θ‚ζ΅ŠιŸ³η•Œζ­£εΈΈοΌŒθ‚δΈŠη•Œδ½δΊŽι”ιͺ¨δΈ­ηΊΏη¬¬δΊ”θ‚‹ι—΄οΌŒη§»εŠ¨ζ΅ŠιŸ³ι˜΄ζ€§οΌŒθ‚ΎεŒΊζ— ε©η—›γ€‚

Acknowledgments

This work was in part supported by Shenzhen Science and Technology Program (No:JCYJ20210324135809025).

Citations

Coming Soon

License

MIT

Downloads last month
0
Inference Providers NEW
This model isn't deployed by any Inference Provider. πŸ™‹ Ask for provider support

Model tree for rickltt/pmp-h256

Unable to build the model tree, the base model loops to the model itself. Learn more.