peterpeter8585 commited on
Commit
c8eccb8
1 Parent(s): 8fb847e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +22 -2
app.py CHANGED
@@ -6,6 +6,18 @@ import random
6
  from diffusers import DiffusionPipeline
7
  import torch
8
  import transformers
 
 
 
 
 
 
 
 
 
 
 
 
9
  from transformers import AutoModelForVision2Seq, AutoProcessor
10
  transformers.utils.move_cache()
11
  device = "cuda" if torch.cuda.is_available() else "cpu"
@@ -59,7 +71,11 @@ def respond0(multimodal_input,password):
59
  model_id = "HuggingFaceM4/idefics2-8b"
60
 
61
  processor = AutoProcessor.from_pretrained(model_id)
62
- model = AutoModelForVision2Seq.from_pretrained(model_id, torch_dtype=torch.float16)
 
 
 
 
63
  prompt = processor.apply_chat_template(messages, add_generation_prompt=True)
64
  inputs = processor(text=prompt, images=[images], return_tensors="pt")
65
  inputs = {k: v.to(model.device) for k, v in inputs.items()}
@@ -83,7 +99,11 @@ def respond0(multimodal_input,password):
83
  model_id = "HuggingFaceM4/idefics2-8b"
84
 
85
  processor = AutoProcessor.from_pretrained(model_id)
86
- model = AutoModelForVision2Seq.from_pretrained(model_id, torch_dtype=torch.float16)
 
 
 
 
87
  prompt = processor.apply_chat_template(messages, add_generation_prompt=True)
88
  inputs = processor(text=prompt, images=[images], return_tensors="pt")
89
  inputs = {k: v.to(model.device) for k, v in inputs.items()}
 
6
  from diffusers import DiffusionPipeline
7
  import torch
8
  import transformers
9
+
10
+ from transformers import BitsAndBytesConfig
11
+
12
+ quantization_config = BitsAndBytesConfig(
13
+ load_in_4bit=True,
14
+ bnb_4bit_quant_type="nf4",
15
+ bnb_4bit_use_double_quant=True,
16
+ bnb_4bit_compute_dtype=torch.float16
17
+ )
18
+
19
+
20
+
21
  from transformers import AutoModelForVision2Seq, AutoProcessor
22
  transformers.utils.move_cache()
23
  device = "cuda" if torch.cuda.is_available() else "cpu"
 
71
  model_id = "HuggingFaceM4/idefics2-8b"
72
 
73
  processor = AutoProcessor.from_pretrained(model_id)
74
+ model = AutoModelForVision2Seq.from_pretrained(
75
+ "HuggingFaceM4/idefics2-8b",
76
+ torch_dtype=torch.float16,
77
+ quantization_config=quantization_config
78
+ ).to("cpu")
79
  prompt = processor.apply_chat_template(messages, add_generation_prompt=True)
80
  inputs = processor(text=prompt, images=[images], return_tensors="pt")
81
  inputs = {k: v.to(model.device) for k, v in inputs.items()}
 
99
  model_id = "HuggingFaceM4/idefics2-8b"
100
 
101
  processor = AutoProcessor.from_pretrained(model_id)
102
+ model = AutoModelForVision2Seq.from_pretrained(
103
+ "HuggingFaceM4/idefics2-8b",
104
+ torch_dtype=torch.float16,
105
+ quantization_config=quantization_config
106
+ ).to("cpu")
107
  prompt = processor.apply_chat_template(messages, add_generation_prompt=True)
108
  inputs = processor(text=prompt, images=[images], return_tensors="pt")
109
  inputs = {k: v.to(model.device) for k, v in inputs.items()}