import os import sys import re import torch import transformers import traceback from queue import Queue from threading import Thread def post_process_output(text): text = text.strip() pattern = re.compile( r"||||\[PAD\]|<\|endoftext\|>|\[UNK\]|\[CLS\]|\[MASK\]|<\|startofpiece\|>|<\|endofpiece\|>|\[gMASK\]|\[sMASK\]" ) text = pattern.sub("", text.strip()).strip() return text def post_process_code(code): sep = "\n```" if sep in code: blocks = code.split(sep) if len(blocks) % 2 == 1: for i in range(1, len(blocks), 2): blocks[i] = blocks[i].replace("\\_", "_") code = sep.join(blocks) return code class Stream(transformers.StoppingCriteria): def __init__(self, callback_func=None): self.callback_func = callback_func def __call__(self, input_ids, scores) -> bool: if self.callback_func is not None: self.callback_func(input_ids[0]) return False class Iteratorize: """ Transforms a function that takes a callback into a lazy iterator (generator). """ def __init__(self, func, kwargs={}, callback=None): self.mfunc = func self.c_callback = callback self.q = Queue() self.sentinel = object() self.kwargs = kwargs self.stop_now = False print('init') def _callback(val): if self.stop_now: raise ValueError self.q.put(val) def gentask(): try: ret = self.mfunc(callback=_callback, **self.kwargs) except ValueError: pass except: traceback.print_exc() pass self.q.put(self.sentinel) if self.c_callback: self.c_callback(ret) self.thread = Thread(target=gentask) self.thread.start() def __iter__(self): return self def __next__(self): obj = self.q.get(True, None) if obj is self.sentinel: print(obj) raise StopIteration else: return obj def __enter__(self): return self def __exit__(self, exc_type, exc_val, exc_tb): #self.stop_now = True pass