antebe1
/

calc9

antebe1 commited on Feb 7

Commit

c9db075

verified ·

1 Parent(s): 850aa6d

Update README.md

Files changed (1) hide show

README.md CHANGED Viewed

@@ -11,6 +11,43 @@ tags: []
 ## Model Details
 ### Model Description
 <!-- Provide a longer summary of what this model is. -->

 ## Model Details
+@dataclass
+class SFTConfig:
+    sft_model_name: str = 'facebook/opt-350m'
+    sft_dataset_path: str = 'train.csv'
+    sft_model_cache_dir: str = 'cache'
+    sft_output_dir: str = '.'
+    hf_key: str = ''
+    peft_config = LoraConfig(
+        r=6, # TODO: play with this number
+        lora_alpha= 11, # TODO: play with this number
+        target_modules=['q_proj', 'v_proj', 'k_proj'],
+        lora_dropout=0.05,
+        bias="none",
+        task_type="CAUSAL_LM" # TODO: you need to figure this out. HINT https://github.com/huggingface/peft/blob/3d2bf9a8b261ed2960f26e61246cf0aa624a6115/src/peft/utils/peft_types.py#L67
+    )
+    training_args = TrainingArguments(
+        per_device_train_batch_size=2,
+        gradient_accumulation_steps=2,
+        gradient_checkpointing =False,
+        max_grad_norm= 0.3,
+        num_train_epochs=1, # TODO: play with this number
+        save_steps= 100,
+        learning_rate=0.0004, # TODO: play with this number
+        bf16=True,
+        save_total_limit=3,
+        logging_steps=10,
+        output_dir='./sft_models',
+        optim="adamw_torch",
+        lr_scheduler_type="cosine",
+        warmup_ratio=0.05,
+        remove_unused_columns=False,
+        report_to="none",
+    )
+    generate_max_length: int = 64
 ### Model Description
 <!-- Provide a longer summary of what this model is. -->