antebe1 commited on
Commit
c9db075
·
verified ·
1 Parent(s): 850aa6d

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +37 -0
README.md CHANGED
@@ -11,6 +11,43 @@ tags: []
11
 
12
  ## Model Details
13
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
14
  ### Model Description
15
 
16
  <!-- Provide a longer summary of what this model is. -->
 
11
 
12
  ## Model Details
13
 
14
+ @dataclass
15
+ class SFTConfig:
16
+ sft_model_name: str = 'facebook/opt-350m'
17
+ sft_dataset_path: str = 'train.csv'
18
+ sft_model_cache_dir: str = 'cache'
19
+ sft_output_dir: str = '.'
20
+ hf_key: str = ''
21
+
22
+ peft_config = LoraConfig(
23
+ r=6, # TODO: play with this number
24
+ lora_alpha= 11, # TODO: play with this number
25
+ target_modules=['q_proj', 'v_proj', 'k_proj'],
26
+ lora_dropout=0.05,
27
+ bias="none",
28
+ task_type="CAUSAL_LM" # TODO: you need to figure this out. HINT https://github.com/huggingface/peft/blob/3d2bf9a8b261ed2960f26e61246cf0aa624a6115/src/peft/utils/peft_types.py#L67
29
+ )
30
+
31
+ training_args = TrainingArguments(
32
+ per_device_train_batch_size=2,
33
+ gradient_accumulation_steps=2,
34
+ gradient_checkpointing =False,
35
+ max_grad_norm= 0.3,
36
+ num_train_epochs=1, # TODO: play with this number
37
+ save_steps= 100,
38
+ learning_rate=0.0004, # TODO: play with this number
39
+ bf16=True,
40
+ save_total_limit=3,
41
+ logging_steps=10,
42
+ output_dir='./sft_models',
43
+ optim="adamw_torch",
44
+ lr_scheduler_type="cosine",
45
+ warmup_ratio=0.05,
46
+ remove_unused_columns=False,
47
+ report_to="none",
48
+ )
49
+
50
+ generate_max_length: int = 64
51
  ### Model Description
52
 
53
  <!-- Provide a longer summary of what this model is. -->