NewBreaker commited on
Commit
310cea3
2 Parent(s): 00b02de b689d9b

add app.py int4 cpu model

Browse files
Files changed (4) hide show
  1. .idea/chatglm-6b-int4.iml +1 -1
  2. app.py +10 -6
  3. app_local.py +2 -1
  4. demo_single_chat.py +10 -3
.idea/chatglm-6b-int4.iml CHANGED
@@ -2,7 +2,7 @@
2
  <module type="PYTHON_MODULE" version="4">
3
  <component name="NewModuleRootManager">
4
  <content url="file://$MODULE_DIR$" />
5
- <orderEntry type="inheritedJdk" />
6
  <orderEntry type="sourceFolder" forTests="false" />
7
  </component>
8
  <component name="PyDocumentationSettings">
 
2
  <module type="PYTHON_MODULE" version="4">
3
  <component name="NewModuleRootManager">
4
  <content url="file://$MODULE_DIR$" />
5
+ <orderEntry type="jdk" jdkName="Python 3.8" jdkType="Python SDK" />
6
  <orderEntry type="sourceFolder" forTests="false" />
7
  </component>
8
  <component name="PyDocumentationSettings">
app.py CHANGED
@@ -1,16 +1,20 @@
1
  from transformers import AutoTokenizer, AutoModel
2
  import gradio as gr
3
 
4
- tokenizer = AutoTokenizer.from_pretrained(".\\models\\chatglm-6b-int4", trust_remote_code=True, revision="")
5
- model = AutoModel.from_pretrained(".\\models\\chatglm-6b-int4", trust_remote_code=True, revision="").half().cuda()
 
 
 
 
6
 
 
 
 
7
 
8
- # from transformers import AutoTokenizer, AutoModel
9
- # tokenizer = AutoTokenizer.from_pretrained("THUDM/chatglm-6b", trust_remote_code=True)
10
- # model = AutoModel.from_pretrained("THUDM/chatglm-6b", trust_remote_code=True)
11
- # model = model.eval()
12
 
13
 
 
14
  # kernel_file = "./models/chatglm-6b-int4/quantization_kernels.so"
15
  # tokenizer = AutoTokenizer.from_pretrained("./models/chatglm-6b-int4", trust_remote_code=True, revision="")
16
  # model = AutoModel.from_pretrained("./models/chatglm-6b-int4", trust_remote_code=True, revision="").half().cuda()
 
1
  from transformers import AutoTokenizer, AutoModel
2
  import gradio as gr
3
 
4
+ # tokenizer = AutoTokenizer.from_pretrained(".\\models\\chatglm-6b-int4", trust_remote_code=True, revision="")
5
+ # model = AutoModel.from_pretrained(".\\models\\chatglm-6b-int4", trust_remote_code=True, revision="").half().cuda()
6
+ # chatglm-6b-int4 cuda,本地可以运行成功
7
+ # tokenizer = AutoTokenizer.from_pretrained(".\\models\\chatglm-6b-int4", trust_remote_code=True, revision="")
8
+ # model = AutoModel.from_pretrained(".\\models\\chatglm-6b-int4", trust_remote_code=True, revision="").half().cuda()
9
+
10
 
11
+ # chatglm-6b-int4 CPU,
12
+ tokenizer = AutoTokenizer.from_pretrained(".\\models\\chatglm-6b-int4", trust_remote_code=True, revision="")
13
+ model = AutoModel.from_pretrained(".\\models\\chatglm-6b-int4", trust_remote_code=True, revision="").float()
14
 
 
 
 
 
15
 
16
 
17
+ # chatglm-6b
18
  # kernel_file = "./models/chatglm-6b-int4/quantization_kernels.so"
19
  # tokenizer = AutoTokenizer.from_pretrained("./models/chatglm-6b-int4", trust_remote_code=True, revision="")
20
  # model = AutoModel.from_pretrained("./models/chatglm-6b-int4", trust_remote_code=True, revision="").half().cuda()
app_local.py CHANGED
@@ -2,7 +2,8 @@ from transformers import AutoTokenizer, AutoModel
2
  import gradio as gr
3
 
4
  tokenizer = AutoTokenizer.from_pretrained(".\\models\\chatglm-6b-int4", trust_remote_code=True, revision="")
5
- model = AutoModel.from_pretrained(".\\models\\chatglm-6b-int4", trust_remote_code=True, revision="").half().cuda()
 
6
 
7
 
8
  # from transformers import AutoTokenizer, AutoModel
 
2
  import gradio as gr
3
 
4
  tokenizer = AutoTokenizer.from_pretrained(".\\models\\chatglm-6b-int4", trust_remote_code=True, revision="")
5
+ model = AutoModel.from_pretrained(".\\models\\chatglm-6b-int4", trust_remote_code=True, revision="").half()
6
+ # model = AutoModel.from_pretrained(".\\models\\chatglm-6b-int4", trust_remote_code=True, revision="").half().cuda()
7
 
8
 
9
  # from transformers import AutoTokenizer, AutoModel
demo_single_chat.py CHANGED
@@ -1,8 +1,15 @@
1
  from transformers import AutoTokenizer, AutoModel
2
  tokenizer = AutoTokenizer.from_pretrained(".\\models\\chatglm-6b-int4", trust_remote_code=True, revision="")
3
- model = AutoModel.from_pretrained(".\\models\\chatglm-6b-int4", trust_remote_code=True, revision="").half().cuda()
4
- kernel_file =
5
- model = model.quantize(bits=4, kernel_file=kernel)
 
 
 
 
 
 
 
6
  model = model.eval()
7
 
8
  def parse_text(text):
 
1
  from transformers import AutoTokenizer, AutoModel
2
  tokenizer = AutoTokenizer.from_pretrained(".\\models\\chatglm-6b-int4", trust_remote_code=True, revision="")
3
+ # model = AutoModel.from_pretrained(".\\models\\chatglm-6b-int4", trust_remote_code=True, revision="").half().cuda()
4
+ model = AutoModel.from_pretrained(".\\models\\chatglm-6b-int4", trust_remote_code=True, revision="").float()
5
+
6
+ # tokenizer = AutoTokenizer.from_pretrained("THUDM/chatglm-6b", trust_remote_code=True)
7
+ # model = AutoModel.from_pretrained("THUDM/chatglm-6b", trust_remote_code=True)
8
+
9
+
10
+
11
+ kernel_file = "./models/chatglm-6b-int4/quantization_kernels.so"
12
+ model = model.quantize(bits=4, kernel_file=kernel_file)
13
  model = model.eval()
14
 
15
  def parse_text(text):