--- license: apache-2.0 --- ```python from transformers import AutoTokenizer,GenerationConfig,AutoConfig,GenerationConfig,AutoModelForCausalLM import torch from transformers import AutoTokenizer model_path = "model_path" model = AutoModelForCausalLM.from_pretrained(model_path,trust_remote_code = True) tokenizer = AutoTokenizer.from_pretrained(model_path) _inputs = """your input""" gen_conf = GenerationConfig( num_beams=1, do_sample=False, max_length=128, max_new_tokens=128, no_repeat_ngram_size=4, eos_token_id=tokenizer.eos_token_id, pad_token_id=tokenizer.pad_token_id, ) tokend = tokenizer.encode_plus(text=_inputs, add_special_tokens=False) input_ids_lengths = len(tokend.input_ids) print(_inputs) input_ids = torch.LongTensor([tokend.input_ids]) outputs = model.generate( inputs=input_ids, generation_config=gen_conf, ) outs = tokenizer.decode(outputs[0][input_ids_lengths:],skip_special_tokens=False) print(outs)