Brandon Royal commited on
Commit
0ab4276
1 Parent(s): 221d1e4

updated README.md

Browse files
Files changed (1) hide show
  1. README.md +5 -75
README.md CHANGED
@@ -9,7 +9,7 @@ license_name: gemma-terms-of-use
9
  license_link: https://ai.google.dev/gemma/terms
10
  ---
11
 
12
- AWQ quantized version of gemma-2b model.
13
 
14
  ---
15
 
@@ -61,22 +61,6 @@ In that repository, we provide:
61
 
62
 
63
 
64
- #### Running the model on a CPU
65
-
66
-
67
- ```python
68
- from transformers import AutoTokenizer, AutoModelForCausalLM
69
-
70
- tokenizer = AutoTokenizer.from_pretrained("google/gemma-2b")
71
- model = AutoModelForCausalLM.from_pretrained("google/gemma-2b")
72
-
73
- input_text = "Write me a poem about Machine Learning."
74
- input_ids = tokenizer(input_text, return_tensors="pt")
75
-
76
- outputs = model.generate(**input_ids)
77
- print(tokenizer.decode(outputs[0]))
78
- ```
79
-
80
 
81
  #### Running the model on a single / multi GPU
82
 
@@ -85,8 +69,8 @@ print(tokenizer.decode(outputs[0]))
85
  # pip install accelerate
86
  from transformers import AutoTokenizer, AutoModelForCausalLM
87
 
88
- tokenizer = AutoTokenizer.from_pretrained("google/gemma-2b")
89
- model = AutoModelForCausalLM.from_pretrained("google/gemma-2b", device_map="auto")
90
 
91
  input_text = "Write me a poem about Machine Learning."
92
  input_ids = tokenizer(input_text, return_tensors="pt").to("cuda")
@@ -104,62 +88,8 @@ print(tokenizer.decode(outputs[0]))
104
  # pip install accelerate
105
  from transformers import AutoTokenizer, AutoModelForCausalLM
106
 
107
- tokenizer = AutoTokenizer.from_pretrained("google/gemma-2b")
108
- model = AutoModelForCausalLM.from_pretrained("google/gemma-2b", device_map="auto", torch_dtype=torch.float16)
109
-
110
- input_text = "Write me a poem about Machine Learning."
111
- input_ids = tokenizer(input_text, return_tensors="pt").to("cuda")
112
-
113
- outputs = model.generate(**input_ids)
114
- print(tokenizer.decode(outputs[0]))
115
- ```
116
-
117
- * _Using `torch.bfloat16`_
118
-
119
- ```python
120
- # pip install accelerate
121
- from transformers import AutoTokenizer, AutoModelForCausalLM
122
-
123
- tokenizer = AutoTokenizer.from_pretrained("google/gemma-2b")
124
- model = AutoModelForCausalLM.from_pretrained("google/gemma-2b", device_map="auto", torch_dtype=torch.bfloat16)
125
-
126
- input_text = "Write me a poem about Machine Learning."
127
- input_ids = tokenizer(input_text, return_tensors="pt").to("cuda")
128
-
129
- outputs = model.generate(**input_ids)
130
- print(tokenizer.decode(outputs[0]))
131
- ```
132
-
133
- #### Quantized Versions through `bitsandbytes`
134
-
135
- * _Using 8-bit precision (int8)_
136
-
137
- ```python
138
- # pip install bitsandbytes accelerate
139
- from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
140
-
141
- quantization_config = BitsAndBytesConfig(load_in_8bit=True)
142
-
143
- tokenizer = AutoTokenizer.from_pretrained("google/gemma-2b")
144
- model = AutoModelForCausalLM.from_pretrained("google/gemma-2b", quantization_config=quantization_config)
145
-
146
- input_text = "Write me a poem about Machine Learning."
147
- input_ids = tokenizer(input_text, return_tensors="pt").to("cuda")
148
-
149
- outputs = model.generate(**input_ids)
150
- print(tokenizer.decode(outputs[0]))
151
- ```
152
-
153
- * _Using 4-bit precision_
154
-
155
- ```python
156
- # pip install bitsandbytes accelerate
157
- from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
158
-
159
- quantization_config = BitsAndBytesConfig(load_in_4bit=True)
160
-
161
- tokenizer = AutoTokenizer.from_pretrained("google/gemma-2b")
162
- model = AutoModelForCausalLM.from_pretrained("google/gemma-2b", quantization_config=quantization_config)
163
 
164
  input_text = "Write me a poem about Machine Learning."
165
  input_ids = tokenizer(input_text, return_tensors="pt").to("cuda")
 
9
  license_link: https://ai.google.dev/gemma/terms
10
  ---
11
 
12
+ AWQ quantized version of [google/gemma-2b](https://huggingface.co/google/gemma-2b).
13
 
14
  ---
15
 
 
61
 
62
 
63
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
64
 
65
  #### Running the model on a single / multi GPU
66
 
 
69
  # pip install accelerate
70
  from transformers import AutoTokenizer, AutoModelForCausalLM
71
 
72
+ tokenizer = AutoTokenizer.from_pretrained("google/gemma-2b-AWQ")
73
+ model = AutoModelForCausalLM.from_pretrained("google/gemma-2b-AWQ", device_map="auto")
74
 
75
  input_text = "Write me a poem about Machine Learning."
76
  input_ids = tokenizer(input_text, return_tensors="pt").to("cuda")
 
88
  # pip install accelerate
89
  from transformers import AutoTokenizer, AutoModelForCausalLM
90
 
91
+ tokenizer = AutoTokenizer.from_pretrained("google/gemma-2b-AWQ")
92
+ model = AutoModelForCausalLM.from_pretrained("google/gemma-2b-AWQ", device_map="auto", torch_dtype=torch.float16)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
93
 
94
  input_text = "Write me a poem about Machine Learning."
95
  input_ids = tokenizer(input_text, return_tensors="pt").to("cuda")