from transformers import AutoModelForCausalLM, AutoTokenizer import modin as pd import gradio as gr tokenizer = AutoTokenizer.from_pretrained("microsoft/lts-gpt2-sm") model = AutoModelForCausalLM.from_pretrained("microsoft/lts-gpt2-sm", subfolder="gpt2_6c6e63116ff74ba444ff5a08cef54380073ebea3") attention_mask = {'pad_token_id':'eos_token_id':0} def chat(Prompt): input_ids = tokenizer(Prompt, return_tensors="pt").input_ids generated_ids = model.generate(input_ids, max_length=128) bot = tokenizer.decode(generated_ids[0], skip_special_tokens=True) return bot gr.Interface(fn=chat, inputs='text', outputs='text').launch(debug=True)