Spaces:
Build error
Build error
import sys | |
import time | |
from transformers import logging | |
from recasepunc import CasePuncPredictor | |
from recasepunc import WordpieceTokenizer | |
from recasepunc import Config | |
logging.set_verbosity_error() | |
predictor = CasePuncPredictor('checkpoint', lang="en") | |
text = " ".join(open(sys.argv[1]).readlines()) | |
tokens = list(enumerate(predictor.tokenize(text))) | |
results = "" | |
for token, case_label, punc_label in predictor.predict(tokens, lambda x: x[1]): | |
prediction = predictor.map_punc_label(predictor.map_case_label(token[1], case_label), punc_label) | |
if token[1][0] == '\'' or (len(results) > 0 and results[-1] == '\''): | |
results = results + prediction | |
elif token[1][0] != '#': | |
results = results + ' ' + prediction | |
else: | |
results = results + prediction | |
print (results.strip()) | |