larry1129 commited on
Commit
efa5b9e
·
verified ·
1 Parent(s): 423e3f6

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +34 -83
app.py CHANGED
@@ -39,93 +39,44 @@ def generate_response(instruction, input_text):
39
  global model, tokenizer
40
 
41
  if model is None:
42
- # 在函数内部导入需要 GPU 的库
43
- import torch
44
- from transformers import AutoTokenizer, AutoModelForCausalLM
45
-
46
- # 加载分词器
47
- tokenizer = AutoTokenizer.from_pretrained(model_name, use_auth_token=hf_token)
48
-
49
- # 加载模型
50
- model = AutoModelForCausalLM.from_pretrained(
51
- model_name,
52
- device_map="auto",
53
- torch_dtype=torch.float16,
54
- use_auth_token=hf_token,
55
- )
56
-
57
- # 设置 pad_token
58
- tokenizer.pad_token = tokenizer.eos_token
59
- model.config.pad_token_id = tokenizer.pad_token_id
60
-
61
- # 切换到评估模式
62
- model.eval()
63
- else:import spaces # 必须在最顶部导入
64
- import gradio as gr
65
- import os
66
-
67
- # 获取 Hugging Face 访问令牌
68
- hf_token = os.getenv("HF_API_TOKEN")
69
-
70
- # 定义基础模型名称
71
- base_model_name = "larry1129/meta-llama-3.1-8b-bnb-4bit"
72
-
73
- # 定义 adapter 模型名称
74
- adapter_model_name = "larry1129/WooWoof_AI"
75
-
76
- # 定义全局变量用于缓存模型和分词器
77
- model = None
78
- tokenizer = None
79
-
80
- # 定义提示生成函数
81
- def generate_prompt(instruction, input_text=""):
82
- if input_text:
83
- prompt = f"""### Instruction:
84
- {instruction}
85
- ### Input:
86
- {input_text}
87
- ### Response:
88
- """
89
  else:
90
- prompt = f"""### Instruction:
91
- {instruction}
92
- ### Response:
93
- """
94
- return prompt
95
-
96
- # 定义生成响应的函数,并使用 @spaces.GPU 装饰
97
- @spaces.GPU(duration=120)
98
- def generate_response(instruction, input_text):
99
- global model, tokenizer
100
-
101
- if model is None:
102
- # 检查 bitsandbytes 是否已安装
103
- import importlib.util
104
- if importlib.util.find_spec("bitsandbytes") is None:
105
- import subprocess
106
- subprocess.call(["pip", "install", "--upgrade", "bitsandbytes"])
107
-
108
- # 在函数内部导入需要 GPU 的库
109
- import torch
110
- from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
111
- from peft import PeftModel
112
-
113
- # 创建量化配置
114
- bnb_config = BitsAndBytesConfig(
115
- load_in_4bit=True,
116
- bnb_4bit_use_double_quant=True,
117
- bnb_4bit_quant_type="nf4",
118
- bnb_4bit_compute_dtype=torch.float16
119
- )
120
-
121
- # 加载分词器
122
- tokenizer = AutoTokenizer.from_pretrained(base_model_name, use_auth_token=hf_token)
123
-
124
- # 加载基础模型
125
-
126
  # 在函数内部导入需要的库
127
  import torch
128
 
 
 
 
 
 
129
  # 生成提示
130
  prompt = generate_prompt(instruction, input_text)
131
  inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
 
39
  global model, tokenizer
40
 
41
  if model is None:
42
+ print("开始加载模型...")
43
+ try:
44
+ # 在函数内部导入需要 GPU 的库
45
+ import torch
46
+ from transformers import AutoTokenizer, AutoModelForCausalLM
47
+
48
+ # 加载分词器
49
+ tokenizer = AutoTokenizer.from_pretrained(model_name, use_auth_token=hf_token)
50
+ print("分词器加载成功。")
51
+
52
+ # 加载模型
53
+ model = AutoModelForCausalLM.from_pretrained(
54
+ model_name,
55
+ device_map="auto",
56
+ torch_dtype=torch.float16,
57
+ use_auth_token=hf_token,
58
+ )
59
+ print("模型加载成功。")
60
+
61
+ # 设置 pad_token
62
+ tokenizer.pad_token = tokenizer.eos_token
63
+ model.config.pad_token_id = tokenizer.pad_token_id
64
+
65
+ # 切换到评估模式
66
+ model.eval()
67
+ print("模型已切换到评估模式。")
68
+ except Exception as e:
69
+ print("加载模型时出错:", e)
70
+ raise e # 重新抛出异常,以便在日志中看到完整的错误信息
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
71
  else:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
72
  # 在函数内部导入需要的库
73
  import torch
74
 
75
+ # 检查 model 和 tokenizer 是否已正确加载
76
+ if model is None or tokenizer is None:
77
+ print("模型或分词器未正确加载。")
78
+ raise ValueError("模型或分词器未正确加载。")
79
+
80
  # 生成提示
81
  prompt = generate_prompt(instruction, input_text)
82
  inputs = tokenizer(prompt, return_tensors="pt").to(model.device)