lunahr commited on
Commit
8ea1897
·
unverified ·
1 Parent(s): f6c533e
Files changed (1) hide show
  1. README.md +20 -17
README.md CHANGED
@@ -1,4 +1,5 @@
1
  ---
 
2
  license: gemma
3
  library_name: transformers
4
  pipeline_tag: text-generation
@@ -8,10 +9,13 @@ extra_gated_prompt: >-
8
  Google’s usage license. To do this, please ensure you’re logged in to Hugging
9
  Face and click below. Requests are processed immediately.
10
  extra_gated_button_content: Acknowledge license
 
 
11
  ---
12
 
13
 
14
- # Gemma 2 model card
 
15
 
16
  **Model Page**: [Gemma](https://ai.google.dev/gemma/docs)
17
 
@@ -21,7 +25,7 @@ extra_gated_button_content: Acknowledge license
21
  * [Gemma on Kaggle][kaggle-gemma]
22
  * [Gemma on Vertex Model Garden][vertex-mg-gemma]
23
 
24
- **Terms of Use**: [Terms](https://www.kaggle.com/models/google/gemma/license/consent/verify/huggingface?returnModelRepoId=google/gemma-2-27b-it)
25
 
26
  **Authors**: Google
27
 
@@ -58,7 +62,7 @@ from transformers import pipeline
58
 
59
  pipe = pipeline(
60
  "text-generation",
61
- model="google/gemma-2-27b-it",
62
  model_kwargs={"torch_dtype": torch.bfloat16},
63
  device="cuda", # replace with "mps" to run on a Mac device
64
  )
@@ -80,9 +84,9 @@ print(assistant_response)
80
  from transformers import AutoTokenizer, AutoModelForCausalLM
81
  import torch
82
 
83
- tokenizer = AutoTokenizer.from_pretrained("google/gemma-2-27b-it")
84
  model = AutoModelForCausalLM.from_pretrained(
85
- "google/gemma-2-27b-it",
86
  device_map="auto",
87
  torch_dtype=torch.bfloat16,
88
  )
@@ -118,9 +122,9 @@ You can also use `float32` if you skip the dtype, but no precision increase will
118
  # pip install accelerate
119
  from transformers import AutoTokenizer, AutoModelForCausalLM
120
 
121
- tokenizer = AutoTokenizer.from_pretrained("google/gemma-2-27b-it")
122
  model = AutoModelForCausalLM.from_pretrained(
123
- "google/gemma-2-27b-it",
124
  device_map="auto",
125
  )
126
 
@@ -138,7 +142,7 @@ for running Gemma 2 through a command line interface, or CLI. Follow the [instal
138
  for getting started, then launch the CLI through the following command:
139
 
140
  ```shell
141
- local-gemma --model 27b --preset speed
142
  ```
143
 
144
  #### Quantized Versions through `bitsandbytes`
@@ -154,9 +158,9 @@ from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
154
 
155
  quantization_config = BitsAndBytesConfig(load_in_8bit=True)
156
 
157
- tokenizer = AutoTokenizer.from_pretrained("google/gemma-2-27b-it")
158
  model = AutoModelForCausalLM.from_pretrained(
159
- "google/gemma-2-27b-it",
160
  quantization_config=quantization_config,
161
  )
162
 
@@ -179,9 +183,9 @@ from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
179
 
180
  quantization_config = BitsAndBytesConfig(load_in_4bit=True)
181
 
182
- tokenizer = AutoTokenizer.from_pretrained("google/gemma-2-27b-it")
183
  model = AutoModelForCausalLM.from_pretrained(
184
- "google/gemma-2-27b-it",
185
  quantization_config=quantization_config,
186
  )
187
 
@@ -216,8 +220,8 @@ import torch
216
  torch.set_float32_matmul_precision("high")
217
 
218
  # load the model + tokenizer
219
- tokenizer = AutoTokenizer.from_pretrained("google/gemma-2-27b-it")
220
- model = Gemma2ForCausalLM.from_pretrained("google/gemma-2-27b-it", torch_dtype=torch.bfloat16)
221
  model.to("cuda")
222
 
223
  # apply the torch compile transformation
@@ -267,15 +271,14 @@ from transformers import AutoTokenizer, AutoModelForCausalLM
267
  import transformers
268
  import torch
269
 
270
- model_id = "google/gemma-2-27b-it"
271
  dtype = torch.bfloat16
272
 
273
  tokenizer = AutoTokenizer.from_pretrained(model_id)
274
  model = AutoModelForCausalLM.from_pretrained(
275
  model_id,
276
  device_map="cuda",
277
- torch_dtype=dtype,
278
- )
279
 
280
  chat = [
281
  { "role": "user", "content": "Write a hello world program" },
 
1
  ---
2
+ base_model: google/gemma-2-27b-it
3
  license: gemma
4
  library_name: transformers
5
  pipeline_tag: text-generation
 
9
  Google’s usage license. To do this, please ensure you’re logged in to Hugging
10
  Face and click below. Requests are processed immediately.
11
  extra_gated_button_content: Acknowledge license
12
+ tags:
13
+ - conversational
14
  ---
15
 
16
 
17
+ # SystemGemma2 27B model card
18
+ This is a version of [Gemma 2 27B](https://huggingface.co/google/gemma-2-27b-it) with system prompts enabled.
19
 
20
  **Model Page**: [Gemma](https://ai.google.dev/gemma/docs)
21
 
 
25
  * [Gemma on Kaggle][kaggle-gemma]
26
  * [Gemma on Vertex Model Garden][vertex-mg-gemma]
27
 
28
+ **Terms of Use**: [Terms](https://www.kaggle.com/models/google/gemma/license/consent/verify/huggingface?returnModelRepoId=google/gemma-2-9b-it)
29
 
30
  **Authors**: Google
31
 
 
62
 
63
  pipe = pipeline(
64
  "text-generation",
65
+ model="google/gemma-2-9b-it",
66
  model_kwargs={"torch_dtype": torch.bfloat16},
67
  device="cuda", # replace with "mps" to run on a Mac device
68
  )
 
84
  from transformers import AutoTokenizer, AutoModelForCausalLM
85
  import torch
86
 
87
+ tokenizer = AutoTokenizer.from_pretrained("google/gemma-2-9b-it")
88
  model = AutoModelForCausalLM.from_pretrained(
89
+ "google/gemma-2-9b-it",
90
  device_map="auto",
91
  torch_dtype=torch.bfloat16,
92
  )
 
122
  # pip install accelerate
123
  from transformers import AutoTokenizer, AutoModelForCausalLM
124
 
125
+ tokenizer = AutoTokenizer.from_pretrained("google/gemma-2-9b-it")
126
  model = AutoModelForCausalLM.from_pretrained(
127
+ "google/gemma-2-9b-it",
128
  device_map="auto",
129
  )
130
 
 
142
  for getting started, then launch the CLI through the following command:
143
 
144
  ```shell
145
+ local-gemma --model 9b --preset speed
146
  ```
147
 
148
  #### Quantized Versions through `bitsandbytes`
 
158
 
159
  quantization_config = BitsAndBytesConfig(load_in_8bit=True)
160
 
161
+ tokenizer = AutoTokenizer.from_pretrained("google/gemma-2-9b-it")
162
  model = AutoModelForCausalLM.from_pretrained(
163
+ "google/gemma-2-9b-it",
164
  quantization_config=quantization_config,
165
  )
166
 
 
183
 
184
  quantization_config = BitsAndBytesConfig(load_in_4bit=True)
185
 
186
+ tokenizer = AutoTokenizer.from_pretrained("google/gemma-2-9b-it")
187
  model = AutoModelForCausalLM.from_pretrained(
188
+ "google/gemma-2-9b-it",
189
  quantization_config=quantization_config,
190
  )
191
 
 
220
  torch.set_float32_matmul_precision("high")
221
 
222
  # load the model + tokenizer
223
+ tokenizer = AutoTokenizer.from_pretrained("google/gemma-2-9b-it")
224
+ model = Gemma2ForCausalLM.from_pretrained("google/gemma-2-9b-it", torch_dtype=torch.bfloat16)
225
  model.to("cuda")
226
 
227
  # apply the torch compile transformation
 
271
  import transformers
272
  import torch
273
 
274
+ model_id = "google/gemma-2-9b-it"
275
  dtype = torch.bfloat16
276
 
277
  tokenizer = AutoTokenizer.from_pretrained(model_id)
278
  model = AutoModelForCausalLM.from_pretrained(
279
  model_id,
280
  device_map="cuda",
281
+ torch_dtype=dtype,)
 
282
 
283
  chat = [
284
  { "role": "user", "content": "Write a hello world program" },