masonchu commited on
Commit
5d51e3b
·
verified ·
1 Parent(s): 7d5dbda

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +15 -15
app.py CHANGED
@@ -1,22 +1,19 @@
1
  from transformers import AutoTokenizer, AutoModelForCausalLM
2
  import gradio as gr
3
- from unsloth import FastLanguageModel
4
 
5
  def load_model(model_name="masonchu/qwen2.5-7b-lora-unsloth_nomerge"):
6
  # Load tokenizer
7
  tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
8
-
9
- # Load model with appropriate settings for inference using unsloth
10
- model, _ = FastLanguageModel.from_pretrained(
11
  model_name,
12
- max_seq_length=2048,
13
- dtype=None,
14
- load_in_4bit=True,
15
  )
16
-
17
- # Enable faster inference
18
- FastLanguageModel.for_inference(model)
19
-
20
  return model, tokenizer
21
 
22
  def predict(message, history):
@@ -26,19 +23,22 @@ def predict(message, history):
26
  human, assistant = msg
27
  prompt += f"### Instruction:\n{human}\n\n### Input:\n\n### Response:\n{assistant}\n\n"
28
  prompt += f"### Instruction:\n{message}\n\n### Input:\n\n### Response:\n"
29
-
30
  # Generate response
31
  inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
32
  outputs = model.generate(
33
  **inputs,
34
  max_new_tokens=512,
 
 
 
35
  use_cache=True
36
  )
37
-
38
  # Get response
39
  full_response = tokenizer.decode(outputs[0], skip_special_tokens=True)
40
  response = full_response.split("### Response:\n")[-1].strip()
41
-
42
  return response
43
 
44
  # Load model globally
@@ -51,7 +51,7 @@ demo = gr.ChatInterface(
51
  predict,
52
  title="春笋科技 AI 助手",
53
  description="基于 Qwen2.5 模型训练的企业智能助手",
54
- examples=["春笋科技公司的创始人是谁?", "春笋科技员工有多少天带薪年假?", "公司的技术实力如何?"],
55
  theme="soft"
56
  )
57
 
 
1
  from transformers import AutoTokenizer, AutoModelForCausalLM
2
  import gradio as gr
3
+ import torch
4
 
5
  def load_model(model_name="masonchu/qwen2.5-7b-lora-unsloth_nomerge"):
6
  # Load tokenizer
7
  tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
8
+
9
+ # Load model with standard transformers settings
10
+ model = AutoModelForCausalLM.from_pretrained(
11
  model_name,
12
+ device_map="auto",
13
+ torch_dtype=torch.float16,
14
+ trust_remote_code=True
15
  )
16
+
 
 
 
17
  return model, tokenizer
18
 
19
  def predict(message, history):
 
23
  human, assistant = msg
24
  prompt += f"### Instruction:\n{human}\n\n### Input:\n\n### Response:\n{assistant}\n\n"
25
  prompt += f"### Instruction:\n{message}\n\n### Input:\n\n### Response:\n"
26
+
27
  # Generate response
28
  inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
29
  outputs = model.generate(
30
  **inputs,
31
  max_new_tokens=512,
32
+ temperature=0,
33
+ top_p=0.9,
34
+ repetition_penalty=1.1,
35
  use_cache=True
36
  )
37
+
38
  # Get response
39
  full_response = tokenizer.decode(outputs[0], skip_special_tokens=True)
40
  response = full_response.split("### Response:\n")[-1].strip()
41
+
42
  return response
43
 
44
  # Load model globally
 
51
  predict,
52
  title="春笋科技 AI 助手",
53
  description="基于 Qwen2.5 模型训练的企业智能助手",
54
+ examples=["你是谁?", "介绍一下公司的福利政策", "公司的技术实力如何?"],
55
  theme="soft"
56
  )
57