import gradio as gr from transformers import ( AutoModelForCausalLM, AutoTokenizer ) from peft import PeftModel import torch model_path = "Qwen1.5-1.8B-Chat" lora_path = "." #+ "/checkpoint-100" if torch.cuda.is_available(): device = "cuda:0" else: device = "cpu" tokenizer = AutoTokenizer.from_pretrained( model_path, ) config_kwargs = {"device_map": device} model = AutoModelForCausalLM.from_pretrained( model_path, torch_dtype=torch.float16, **config_kwargs ) model = PeftModel.from_pretrained(model, lora_path) model = model.merge_and_unload() model.eval() # model.config.use_cache = True # model.to("cpu") # model.save_pretrained("/data/ango/EssayGPT") # tokenizer.save_pretrained("/data/ango/EssayGPT") MAX_MATERIALS = 4 def call(related_materials, materials, question): query_texts = [f"材料{i + 1}\n{material}" for i, material in enumerate(materials) if i in related_materials] query_texts.append(f"问题:{question}") query = "\n".join(query_texts) messages = [ {"role": "system", "content": "请你根据以下提供的材料来回答问题"}, {"role": "user", "content": query} ] text = tokenizer.apply_chat_template( messages, tokenize=False, add_generation_prompt=True ) model_inputs = tokenizer([text], return_tensors="pt").to(device) print(len(model_inputs.input_ids[0])) generated_ids = model.generate( model_inputs.input_ids, max_length=8096 ) generated_ids = [ output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids) ] response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0] return response def create_ui(): with gr.Blocks() as app: gr.Markdown("""