dnau6 commited on
Commit
768c381
·
verified ·
1 Parent(s): 548ff12

End of training

Browse files
README.md CHANGED
@@ -34,13 +34,15 @@ More information needed
34
 
35
  The following hyperparameters were used during training:
36
  - learning_rate: 0.0001
37
- - train_batch_size: 1
38
  - eval_batch_size: 8
39
  - seed: 42
 
 
40
  - optimizer: Use paged_adamw_8bit with betas=(0.9,0.999) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments
41
  - lr_scheduler_type: linear
42
- - lr_scheduler_warmup_steps: 50
43
- - num_epochs: 1
44
 
45
  ### Training results
46
 
@@ -48,8 +50,8 @@ The following hyperparameters were used during training:
48
 
49
  ### Framework versions
50
 
51
- - PEFT 0.13.2
52
  - Transformers 4.46.3
53
- - Pytorch 2.5.1+cu121
54
- - Datasets 3.2.0
55
  - Tokenizers 0.20.3
 
34
 
35
  The following hyperparameters were used during training:
36
  - learning_rate: 0.0001
37
+ - train_batch_size: 2
38
  - eval_batch_size: 8
39
  - seed: 42
40
+ - gradient_accumulation_steps: 2
41
+ - total_train_batch_size: 4
42
  - optimizer: Use paged_adamw_8bit with betas=(0.9,0.999) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments
43
  - lr_scheduler_type: linear
44
+ - lr_scheduler_warmup_steps: 5
45
+ - num_epochs: 4
46
 
47
  ### Training results
48
 
 
50
 
51
  ### Framework versions
52
 
53
+ - PEFT 0.14.0
54
  - Transformers 4.46.3
55
+ - Pytorch 2.4.0
56
+ - Datasets 3.1.0
57
  - Tokenizers 0.20.3
adapter_config.json CHANGED
@@ -6,6 +6,8 @@
6
  },
7
  "base_model_name_or_path": "HuggingFaceTB/SmolVLM-Base",
8
  "bias": "none",
 
 
9
  "fan_in_fan_out": false,
10
  "inference_mode": true,
11
  "init_lora_weights": "gaussian",
@@ -14,6 +16,7 @@
14
  "layers_to_transform": null,
15
  "loftq_config": {},
16
  "lora_alpha": 8,
 
17
  "lora_dropout": 0.1,
18
  "megatron_config": null,
19
  "megatron_core": "megatron.core",
@@ -23,13 +26,13 @@
23
  "rank_pattern": {},
24
  "revision": null,
25
  "target_modules": [
26
- "k_proj",
27
- "down_proj",
28
- "gate_proj",
29
  "up_proj",
30
  "q_proj",
 
31
  "v_proj",
32
- "o_proj"
 
33
  ],
34
  "task_type": null,
35
  "use_dora": false,
 
6
  },
7
  "base_model_name_or_path": "HuggingFaceTB/SmolVLM-Base",
8
  "bias": "none",
9
+ "eva_config": null,
10
+ "exclude_modules": null,
11
  "fan_in_fan_out": false,
12
  "inference_mode": true,
13
  "init_lora_weights": "gaussian",
 
16
  "layers_to_transform": null,
17
  "loftq_config": {},
18
  "lora_alpha": 8,
19
+ "lora_bias": false,
20
  "lora_dropout": 0.1,
21
  "megatron_config": null,
22
  "megatron_core": "megatron.core",
 
26
  "rank_pattern": {},
27
  "revision": null,
28
  "target_modules": [
29
+ "o_proj",
 
 
30
  "up_proj",
31
  "q_proj",
32
+ "gate_proj",
33
  "v_proj",
34
+ "down_proj",
35
+ "k_proj"
36
  ],
37
  "task_type": null,
38
  "use_dora": false,
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:bc308e7e5d3758906672e5d5b6b8ca1b9ab291f2623f312a87251847cff3be56
3
  size 42220792
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:69711bcbe4e93aadc10e787e7348e7917e097ca29a22cc017cdf7967b3d9cb26
3
  size 42220792
runs/Dec12_03-58-02_a4b7fc4453ef/events.out.tfevents.1733975888.a4b7fc4453ef.23.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f5e726d014d871627550db7266b8740140df2a6712ee2bce8515bd7d016998f4
3
+ size 12063
runs/Dec12_03-58-58_a4b7fc4453ef/events.out.tfevents.1733975942.a4b7fc4453ef.23.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d213a1783c0228dfcedc23cdbfe3fdd8d98d00d66181e5eb1b9e3ab9437498c0
3
+ size 13032
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ea08b947363a05a7fed65154db61c35c9a084843877643918b3a19fdf356d305
3
  size 5304
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c5742b0ee48ad4d56f4811e3242db0ba06b5f81e5d5d0cdb4b576164bb707ad2
3
  size 5304