from transformers import pipeline from transformers import AutoModelForCausalLM, AutoTokenizer import gradio as gr model = "janny127/autotrain-7qmts-cs1er" tokenizer = AutoTokenizer.from_pretrained(model) pipeline = pipeline( "text-generation", model=model, torch_dtype=torch.float32, device_map="auto", ) def generate_answer(query, sample_num=3): formatted_prompt = ( f"<|im_start|>user\n{query}<|im_end|>\n<|im_start|>assistant\n" ) sequences = pipeline( formatted_prompt, do_sample=True, top_k=50, top_p = 0.9, num_return_sequences=sample_num, repetition_penalty=1.1, max_new_tokens=150, eos_token_id=CHAT_EOS_TOKEN_ID, ) answers = list() for seq in sequences: answer = seq['generated_text'].replace(formatted_prompt, "") answers.append(answer) #print(f"Result: {answer}") #print("------------------------------------------") return answers interface = gr.ChatInterface( fn=generate_answer, stop_btn=None ) with gr.Blocks() as demo: interface.render() demo.launch()