import gradio as gr import torch torch.cuda.is_available = lambda : False from transformers import AutoModelForCausalLM, AutoTokenizer,BitsAndBytesConfig from transformers.generation import GenerationConfig """ quantization_config = BitsAndBytesConfig( load_in_4bit=True, bnb_4bit_quant_type='int8', load_in_8bit_fp32_cpu_offload=True, llm_int8_enable_fp32_cpu_offload=True, bnb_4bit_compute_dtype=torch.bfloat16)""" # Note: The default behavior now has injection attack prevention off. tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen-7B-Chat", trust_remote_code=True) model = AutoModelForCausalLM.from_pretrained("Qwen/Qwen-7B-Chat", trust_remote_code=True).half().half().eval() # Specify hyperparameters for generation model.generation_config = GenerationConfig.from_pretrained("Qwen/Qwen-7B-Chat", trust_remote_code=True) # 可指定不同的生成长度、top_p等相关超参 def generate(text): response, history = model.chat(tokenizer, text, history=None) return response examples = [ ["The Moon's orbit around Earth has"], ["The smooth Borealis basin in the Northern Hemisphere covers 40%"], ] demo = gr.Interface( fn=generate, inputs=gr.inputs.Textbox(lines=5, label="Input Text"), outputs=gr.outputs.Textbox(label="Generated Text"), examples=examples ) demo.launch()