import os import gradio as gr from transformers import AutoTokenizer, AutoModelForCausalLM import torch import bitsandbytes as bnb model_id = "dingckc/FineLlama-3.1-8B" tokenizer = AutoTokenizer.from_pretrained(model_id, token=os.getenv('ACCESS_KEY')) model = AutoModelForCausalLM.from_pretrained( model_id, device_map="auto", # 自動映射到可用設備(CPU) torch_dtype=torch.float16 # 減少內存占用 ) # 定義推理函數 def evaluate_essay(title, essay): input_text = f""" Essay Title: {title} Essay Rubric: Evaluate the argument based on clarity, coherence, lexical resource, and grammatical accuracy. Essay: {essay} Please generate a detailed evaluation based on the rubric provided above. """ inputs = tokenizer(input_text, return_tensors="pt").to("cuda" if torch.cuda.is_available() else "cpu") with torch.no_grad(): outputs = model.generate(input_ids=inputs["input_ids"], max_new_tokens=150) return tokenizer.decode(outputs[0], skip_special_tokens=True) # 使用 Gradio 構建界面 title_input = gr.Textbox(label="Essay Title") essay_input = gr.Textbox(label="Essay Content", lines=10) output_text = gr.Textbox(label="Evaluation Result") gr.Interface( fn=evaluate_essay, inputs=[title_input, essay_input], outputs=output_text, title="Essay Evaluation", description="Enter the title and content of your essay to receive an evaluation." ).launch(share=True)