namratanwani commited on
Commit
840fa68
1 Parent(s): 7088ce0

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +23 -23
README.md CHANGED
@@ -109,29 +109,29 @@ Use the code below to get started with the model.
109
 
110
  - **Training regime:**
111
 
112
- max_seq_length = 2000
113
- trainer = SFTTrainer(
114
- model = model,
115
- tokenizer = tokenizer,
116
- train_dataset = train,
117
- dataset_text_field = "text",
118
- max_seq_length = max_seq_length,
119
- dataset_num_proc = 2,
120
- packing = False, # Can make training 5x faster for short sequences.
121
- args = TrainingArguments(
122
- per_device_train_batch_size = 2,
123
- gradient_accumulation_steps = 4,
124
- warmup_steps = 5,
125
- max_steps = 50,
126
- learning_rate = 2e-4,
127
- fp16 = not is_bfloat16_supported(),
128
- bf16 = is_bfloat16_supported(),
129
- logging_steps = 1,
130
- optim = "adamw_8bit",
131
- weight_decay = 0.01,
132
- lr_scheduler_type = "linear",
133
- seed = 3407,
134
- output_dir = "outputs",
135
  ),
136
  )
137
  #### Speeds, Sizes, Times [optional]
 
109
 
110
  - **Training regime:**
111
 
112
+ ```max_seq_length = 2000
113
+ ``` trainer = SFTTrainer(
114
+ ``` model = model,
115
+ ``` tokenizer = tokenizer,
116
+ ``` train_dataset = train,
117
+ ``` dataset_text_field = "text",
118
+ ``` max_seq_length = max_seq_length,
119
+ ``` dataset_num_proc = 2,
120
+ ``` packing = False, # Can make training 5x faster for short sequences.
121
+ ``` args = TrainingArguments(
122
+ ``` per_device_train_batch_size = 2,
123
+ ``` gradient_accumulation_steps = 4,
124
+ ``` warmup_steps = 5,
125
+ ``` max_steps = 50,
126
+ ``` learning_rate = 2e-4,
127
+ ``` fp16 = not is_bfloat16_supported(),
128
+ ``` bf16 = is_bfloat16_supported(),
129
+ ``` logging_steps = 1,
130
+ ``` optim = "adamw_8bit",
131
+ ``` weight_decay = 0.01,
132
+ ``` lr_scheduler_type = "linear",
133
+ ``` seed = 3407,
134
+ ``` output_dir = "outputs",
135
  ),
136
  )
137
  #### Speeds, Sizes, Times [optional]