saim1212 commited on Mar 15

Commit

2fa44d9

verified ·

1 Parent(s): fd79b14

second model upload

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

.gitattributes +3 -0
README.md +63 -0
adapter_config.json +463 -0
adapter_model.safetensors +3 -0
added_tokens.json +16 -0
all_results.json +8 -0
chat_template.json +3 -0
checkpoint-1000/README.md +202 -0
checkpoint-1000/adapter_config.json +463 -0
checkpoint-1000/adapter_model.safetensors +3 -0
checkpoint-1000/added_tokens.json +16 -0
checkpoint-1000/chat_template.json +3 -0
checkpoint-1000/merges.txt +0 -0
checkpoint-1000/optimizer.pt +3 -0
checkpoint-1000/preprocessor_config.json +29 -0
checkpoint-1000/rng_state_0.pth +3 -0
checkpoint-1000/rng_state_1.pth +3 -0
checkpoint-1000/scaler.pt +3 -0
checkpoint-1000/scheduler.pt +3 -0
checkpoint-1000/special_tokens_map.json +31 -0
checkpoint-1000/tokenizer.json +3 -0
checkpoint-1000/tokenizer_config.json +148 -0
checkpoint-1000/trainer_state.json +733 -0
checkpoint-1000/training_args.bin +3 -0
checkpoint-1000/vocab.json +0 -0
checkpoint-1550/README.md +202 -0
checkpoint-1550/adapter_config.json +463 -0
checkpoint-1550/adapter_model.safetensors +3 -0
checkpoint-1550/added_tokens.json +16 -0
checkpoint-1550/chat_template.json +3 -0
checkpoint-1550/merges.txt +0 -0
checkpoint-1550/optimizer.pt +3 -0
checkpoint-1550/preprocessor_config.json +29 -0
checkpoint-1550/rng_state_0.pth +3 -0
checkpoint-1550/rng_state_1.pth +3 -0
checkpoint-1550/scaler.pt +3 -0
checkpoint-1550/scheduler.pt +3 -0
checkpoint-1550/special_tokens_map.json +31 -0
checkpoint-1550/tokenizer.json +3 -0
checkpoint-1550/tokenizer_config.json +148 -0
checkpoint-1550/trainer_state.json +1118 -0
checkpoint-1550/training_args.bin +3 -0
checkpoint-1550/vocab.json +0 -0
merges.txt +0 -0
preprocessor_config.json +29 -0
runs/Mar14_21-25-37_36c244e9105b/events.out.tfevents.1741987616.36c244e9105b.153.0 +3 -0
special_tokens_map.json +31 -0
tokenizer.json +3 -0
tokenizer_config.json +148 -0
train_results.json +8 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,6 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+checkpoint-1000/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+checkpoint-1550/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+tokenizer.json filter=lfs diff=lfs merge=lfs -text

README.md ADDED Viewed

	@@ -0,0 +1,63 @@

+---
+library_name: peft
+license: other
+base_model: saim1212/penguin2
+tags:
+- llama-factory
+- lora
+- generated_from_trainer
+model-index:
+- name: qwen2vl_lora_16lr_7b
+  results: []
+---
+<!-- This model card has been generated automatically according to the information the Trainer had access to. You
+should probably proofread and complete it, then remove this comment. -->
+# qwen2vl_lora_16lr_7b
+This model is a fine-tuned version of [saim1212/penguin2](https://huggingface.co/saim1212/penguin2) on the talk2car dataset.
+## Model description
+More information needed
+## Intended uses & limitations
+More information needed
+## Training and evaluation data
+More information needed
+## Training procedure
+### Training hyperparameters
+The following hyperparameters were used during training:
+- learning_rate: 2e-05
+- train_batch_size: 2
+- eval_batch_size: 8
+- seed: 42
+- distributed_type: multi-GPU
+- num_devices: 2
+- gradient_accumulation_steps: 2
+- total_train_batch_size: 8
+- total_eval_batch_size: 16
+- optimizer: Use adamw_torch with betas=(0.9,0.999) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments
+- lr_scheduler_type: cosine
+- lr_scheduler_warmup_ratio: 0.1
+- num_epochs: 25.0
+- mixed_precision_training: Native AMP
+### Training results
+### Framework versions
+- PEFT 0.12.0
+- Transformers 4.49.0
+- Pytorch 2.4.1+cu121
+- Datasets 3.2.0
+- Tokenizers 0.21.0

adapter_config.json ADDED Viewed

	@@ -0,0 +1,463 @@

+{
+  "alpha_pattern": {},
+  "auto_mapping": null,
+  "base_model_name_or_path": "saim1212/penguin2",
+  "bias": "none",
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layer_replication": null,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 32,
+  "lora_dropout": 0.0,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 16,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "model.layers.26.self_attn.o_proj",
+    "model.layers.19.self_attn.o_proj",
+    "visual.blocks.22.mlp.fc1",
+    "model.layers.1.self_attn.o_proj",
+    "model.layers.9.mlp.up_proj",
+    "model.layers.23.self_attn_text.q_proj",
+    "model.layers.24.self_attn_text.o_proj",
+    "model.layers.15.self_attn_text.o_proj",
+    "model.layers.27.self_attn.v_proj",
+    "model.layers.8.self_attn_text.k_proj",
+    "visual.blocks.10.attn.proj",
+    "visual.blocks.28.mlp.fc2",
+    "model.layers.3.self_attn_text.k_proj",
+    "model.layers.12.self_attn.v_proj",
+    "model.layers.18.self_attn_text.o_proj",
+    "visual.blocks.5.mlp.fc2",
+    "model.layers.13.self_attn.q_proj",
+    "visual.blocks.7.mlp.fc1",
+    "model.layers.27.mlp.down_proj",
+    "visual.blocks.1.mlp.fc1",
+    "model.layers.12.mlp.up_proj",
+    "model.layers.5.self_attn.o_proj",
+    "model.layers.15.self_attn_text.q_proj",
+    "model.layers.2.self_attn_text.k_proj",
+    "model.layers.3.self_attn_text.q_proj",
+    "model.layers.12.mlp.down_proj",
+    "model.layers.14.self_attn_text.q_proj",
+    "model.layers.19.self_attn_text.o_proj",
+    "visual.blocks.23.attn.proj",
+    "model.layers.20.self_attn.o_proj",
+    "model.layers.5.self_attn_text.k_proj",
+    "model.layers.26.mlp.gate_proj",
+    "model.layers.8.self_attn.q_proj",
+    "model.layers.20.mlp.gate_proj",
+    "model.layers.16.self_attn_text.k_proj",
+    "model.layers.20.self_attn.k_proj",
+    "visual.blocks.9.attn.qkv",
+    "model.layers.4.self_attn_text.k_proj",
+    "model.layers.4.mlp.gate_proj",
+    "model.layers.6.self_attn.v_proj",
+    "model.layers.1.self_attn_text.o_proj",
+    "model.layers.16.mlp.up_proj",
+    "visual.blocks.16.mlp.fc2",
+    "model.layers.10.self_attn.v_proj",
+    "model.layers.17.self_attn_text.o_proj",
+    "model.layers.17.self_attn.v_proj",
+    "visual.blocks.9.mlp.fc1",
+    "model.layers.25.mlp.gate_proj",
+    "model.layers.25.self_attn_text.q_proj",
+    "model.layers.9.self_attn.k_proj",
+    "model.layers.18.self_attn.q_proj",
+    "visual.blocks.9.attn.proj",
+    "visual.blocks.14.mlp.fc1",
+    "model.layers.13.self_attn.o_proj",
+    "model.layers.24.self_attn.v_proj",
+    "model.layers.11.mlp.down_proj",
+    "model.layers.27.self_attn_text.v_proj",
+    "model.layers.16.self_attn_text.o_proj",
+    "model.layers.25.mlp.down_proj",
+    "visual.blocks.4.mlp.fc2",
+    "model.layers.27.self_attn.q_proj",
+    "visual.blocks.5.attn.proj",
+    "model.layers.19.mlp.gate_proj",
+    "model.layers.14.self_attn.o_proj",
+    "model.layers.19.self_attn.v_proj",
+    "model.layers.13.mlp.gate_proj",
+    "model.layers.18.self_attn.o_proj",
+    "model.layers.18.self_attn.k_proj",
+    "model.layers.26.self_attn.k_proj",
+    "model.layers.9.self_attn_text.o_proj",
+    "model.layers.26.self_attn.v_proj",
+    "model.layers.27.self_attn.k_proj",
+    "model.layers.25.self_attn.o_proj",
+    "visual.blocks.20.attn.proj",
+    "visual.blocks.26.attn.qkv",
+    "model.layers.23.self_attn_text.v_proj",
+    "visual.blocks.14.attn.qkv",
+    "model.layers.19.self_attn.k_proj",
+    "model.layers.13.self_attn_text.q_proj",
+    "model.layers.13.mlp.down_proj",
+    "model.layers.21.self_attn.k_proj",
+    "model.layers.0.self_attn_text.o_proj",
+    "model.layers.6.self_attn.k_proj",
+    "visual.blocks.31.attn.proj",
+    "model.layers.16.self_attn.v_proj",
+    "model.layers.20.mlp.up_proj",
+    "visual.blocks.3.mlp.fc2",
+    "model.layers.3.self_attn.k_proj",
+    "visual.blocks.12.attn.qkv",
+    "model.layers.10.self_attn.k_proj",
+    "model.layers.12.self_attn_text.k_proj",
+    "visual.blocks.22.mlp.fc2",
+    "model.layers.11.self_attn.q_proj",
+    "visual.blocks.19.mlp.fc1",
+    "visual.blocks.2.mlp.fc1",
+    "model.layers.26.self_attn_text.k_proj",
+    "model.layers.5.self_attn.q_proj",
+    "model.layers.7.self_attn.q_proj",
+    "visual.blocks.27.attn.proj",
+    "model.layers.8.self_attn_text.v_proj",
+    "model.layers.12.mlp.gate_proj",
+    "model.layers.27.self_attn_text.q_proj",
+    "visual.blocks.1.attn.proj",
+    "model.layers.4.self_attn_text.o_proj",
+    "visual.blocks.6.mlp.fc2",
+    "model.layers.26.self_attn_text.v_proj",
+    "visual.blocks.6.mlp.fc1",
+    "visual.blocks.31.mlp.fc1",
+    "model.layers.8.mlp.gate_proj",
+    "visual.blocks.18.mlp.fc1",
+    "visual.blocks.14.attn.proj",
+    "model.layers.15.self_attn.o_proj",
+    "model.layers.16.self_attn.q_proj",
+    "visual.blocks.7.mlp.fc2",
+    "model.layers.11.self_attn.k_proj",
+    "model.layers.7.mlp.up_proj",
+    "model.layers.10.self_attn_text.v_proj",
+    "model.layers.23.self_attn.k_proj",
+    "visual.blocks.11.attn.qkv",
+    "visual.blocks.5.attn.qkv",
+    "model.layers.15.self_attn_text.v_proj",
+    "visual.blocks.21.attn.proj",
+    "model.layers.10.mlp.gate_proj",
+    "model.layers.5.self_attn.v_proj",
+    "model.layers.6.mlp.down_proj",
+    "model.layers.9.self_attn_text.v_proj",
+    "model.layers.4.self_attn_text.q_proj",
+    "model.layers.21.self_attn.v_proj",
+    "model.layers.8.mlp.down_proj",
+    "visual.blocks.8.mlp.fc2",
+    "model.layers.23.self_attn_text.o_proj",
+    "model.layers.1.self_attn.q_proj",
+    "model.layers.20.self_attn_text.k_proj",
+    "model.layers.8.self_attn.o_proj",
+    "model.layers.20.self_attn_text.o_proj",
+    "model.layers.6.mlp.up_proj",
+    "model.layers.1.mlp.down_proj",
+    "model.layers.18.mlp.down_proj",
+    "model.layers.18.mlp.gate_proj",
+    "model.layers.11.mlp.up_proj",
+    "visual.blocks.2.attn.proj",
+    "model.layers.0.mlp.down_proj",
+    "visual.blocks.0.mlp.fc2",
+    "visual.blocks.25.attn.proj",
+    "model.layers.0.self_attn.k_proj",
+    "model.layers.27.self_attn_text.k_proj",
+    "visual.blocks.12.mlp.fc1",
+    "model.layers.9.self_attn.q_proj",
+    "visual.blocks.17.attn.qkv",
+    "model.layers.17.self_attn_text.q_proj",
+    "model.layers.15.mlp.gate_proj",
+    "visual.blocks.21.attn.qkv",
+    "model.layers.16.mlp.gate_proj",
+    "model.layers.19.self_attn_text.v_proj",
+    "model.layers.24.self_attn_text.q_proj",
+    "visual.blocks.8.mlp.fc1",
+    "visual.blocks.30.mlp.fc2",
+    "model.layers.10.self_attn.q_proj",
+    "model.layers.14.mlp.gate_proj",
+    "model.layers.5.self_attn_text.q_proj",
+    "visual.blocks.26.mlp.fc2",
+    "model.layers.1.self_attn_text.k_proj",
+    "visual.blocks.29.mlp.fc1",
+    "model.layers.18.self_attn.v_proj",
+    "model.layers.23.mlp.gate_proj",
+    "visual.blocks.13.mlp.fc1",
+    "model.layers.5.self_attn_text.o_proj",
+    "model.layers.14.mlp.up_proj",
+    "visual.blocks.6.attn.qkv",
+    "model.layers.23.mlp.up_proj",
+    "model.layers.14.self_attn_text.v_proj",
+    "visual.blocks.4.mlp.fc1",
+    "visual.blocks.20.attn.qkv",
+    "model.layers.6.self_attn_text.q_proj",
+    "visual.blocks.25.attn.qkv",
+    "visual.blocks.15.attn.qkv",
+    "model.layers.1.self_attn.k_proj",
+    "model.layers.19.self_attn.q_proj",
+    "model.layers.4.self_attn.o_proj",
+    "model.layers.8.self_attn.v_proj",
+    "visual.blocks.23.attn.qkv",
+    "model.layers.3.self_attn.q_proj",
+    "model.layers.5.mlp.gate_proj",
+    "model.layers.1.mlp.up_proj",
+    "model.layers.11.mlp.gate_proj",
+    "visual.blocks.24.mlp.fc2",
+    "model.layers.1.mlp.gate_proj",
+    "visual.blocks.20.mlp.fc1",
+    "visual.blocks.13.mlp.fc2",
+    "visual.blocks.14.mlp.fc2",
+    "visual.blocks.3.attn.qkv",
+    "model.layers.12.self_attn_text.q_proj",
+    "model.layers.25.self_attn_text.o_proj",
+    "visual.blocks.19.attn.proj",
+    "visual.blocks.23.mlp.fc1",
+    "model.layers.14.mlp.down_proj",
+    "visual.blocks.25.mlp.fc2",
+    "model.layers.0.self_attn_text.q_proj",
+    "model.layers.23.self_attn_text.k_proj",
+    "model.layers.12.self_attn.k_proj",
+    "model.layers.4.self_attn.k_proj",
+    "visual.blocks.28.mlp.fc1",
+    "model.layers.21.self_attn_text.v_proj",
+    "model.layers.10.mlp.down_proj",
+    "visual.blocks.18.attn.qkv",
+    "model.layers.5.mlp.up_proj",
+    "model.layers.23.self_attn.v_proj",
+    "visual.blocks.31.mlp.fc2",
+    "model.layers.3.mlp.down_proj",
+    "visual.blocks.2.mlp.fc2",
+    "visual.blocks.10.mlp.fc2",
+    "model.layers.27.self_attn.o_proj",
+    "model.layers.11.self_attn_text.v_proj",
+    "model.layers.17.self_attn_text.k_proj",
+    "visual.blocks.25.mlp.fc1",
+    "visual.blocks.3.attn.proj",
+    "model.layers.2.self_attn.q_proj",
+    "model.layers.26.self_attn_text.o_proj",
+    "model.layers.9.self_attn.v_proj",
+    "model.layers.7.self_attn_text.o_proj",
+    "model.layers.20.self_attn.q_proj",
+    "model.layers.21.mlp.down_proj",
+    "model.layers.17.self_attn.q_proj",
+    "visual.blocks.17.attn.proj",
+    "model.layers.7.mlp.down_proj",
+    "model.layers.21.mlp.gate_proj",
+    "model.layers.20.mlp.down_proj",
+    "model.layers.7.self_attn.o_proj",
+    "model.layers.6.self_attn_text.o_proj",
+    "model.layers.5.self_attn_text.v_proj",
+    "model.layers.22.mlp.gate_proj",
+    "model.layers.7.self_attn_text.k_proj",
+    "model.layers.19.mlp.down_proj",
+    "model.layers.6.self_attn_text.k_proj",
+    "model.layers.9.self_attn_text.k_proj",
+    "visual.blocks.15.attn.proj",
+    "visual.blocks.6.attn.proj",
+    "model.layers.22.self_attn.k_proj",
+    "visual.blocks.13.attn.proj",
+    "model.layers.0.mlp.gate_proj",
+    "model.layers.13.self_attn.v_proj",
+    "model.layers.22.self_attn.q_proj",
+    "model.layers.19.self_attn_text.k_proj",
+    "model.layers.10.self_attn_text.q_proj",
+    "model.layers.2.mlp.down_proj",
+    "visual.blocks.10.attn.qkv",
+    "model.layers.4.mlp.up_proj",
+    "visual.blocks.16.attn.qkv",
+    "model.layers.13.self_attn_text.o_proj",
+    "model.layers.21.self_attn.o_proj",
+    "model.layers.13.mlp.up_proj",
+    "model.layers.7.self_attn_text.q_proj",
+    "visual.blocks.0.attn.proj",
+    "visual.blocks.17.mlp.fc1",
+    "model.layers.25.self_attn_text.v_proj",
+    "model.layers.3.self_attn.o_proj",
+    "visual.blocks.30.attn.proj",
+    "model.layers.16.self_attn.o_proj",
+    "model.layers.23.self_attn.o_proj",
+    "model.layers.4.mlp.down_proj",
+    "model.layers.17.self_attn_text.v_proj",
+    "model.layers.12.self_attn.q_proj",
+    "visual.blocks.3.mlp.fc1",
+    "visual.blocks.26.attn.proj",
+    "model.layers.21.self_attn.q_proj",
+    "visual.blocks.27.attn.qkv",
+    "model.layers.17.mlp.gate_proj",
+    "model.layers.23.mlp.down_proj",
+    "visual.blocks.18.mlp.fc2",
+    "model.layers.2.self_attn.k_proj",
+    "model.layers.9.mlp.down_proj",
+    "model.layers.6.mlp.gate_proj",
+    "visual.blocks.17.mlp.fc2",
+    "model.layers.0.self_attn.v_proj",
+    "visual.blocks.30.attn.qkv",
+    "model.layers.3.self_attn_text.o_proj",
+    "visual.blocks.4.attn.qkv",
+    "model.layers.10.mlp.up_proj",
+    "model.layers.2.self_attn.v_proj",
+    "visual.blocks.5.mlp.fc1",
+    "model.layers.0.self_attn_text.k_proj",
+    "model.layers.25.self_attn_text.k_proj",
+    "visual.blocks.19.attn.qkv",
+    "model.layers.2.mlp.gate_proj",
+    "model.layers.16.self_attn_text.q_proj",
+    "visual.blocks.0.mlp.fc1",
+    "model.layers.3.mlp.up_proj",
+    "visual.blocks.30.mlp.fc1",
+    "model.layers.2.mlp.up_proj",
+    "visual.blocks.29.attn.qkv",
+    "model.layers.27.mlp.gate_proj",
+    "model.layers.21.self_attn_text.o_proj",
+    "model.layers.21.mlp.up_proj",
+    "model.layers.1.self_attn.v_proj",
+    "visual.blocks.29.attn.proj",
+    "model.layers.8.self_attn_text.q_proj",
+    "model.layers.3.self_attn_text.v_proj",
+    "model.layers.1.self_attn_text.v_proj",
+    "visual.blocks.21.mlp.fc2",
+    "model.layers.3.self_attn.v_proj",
+    "visual.blocks.4.attn.proj",
+    "model.layers.4.self_attn.v_proj",
+    "model.layers.7.self_attn_text.v_proj",
+    "model.layers.22.self_attn_text.v_proj",
+    "model.layers.20.self_attn.v_proj",
+    "model.layers.21.self_attn_text.q_proj",
+    "model.layers.12.self_attn.o_proj",
+    "visual.blocks.27.mlp.fc2",
+    "model.layers.18.self_attn_text.k_proj",
+    "model.layers.24.self_attn_text.v_proj",
+    "model.layers.26.mlp.up_proj",
+    "model.layers.8.self_attn_text.o_proj",
+    "visual.blocks.11.mlp.fc1",
+    "model.layers.1.self_attn_text.q_proj",
+    "model.layers.7.self_attn.v_proj",
+    "visual.blocks.26.mlp.fc1",
+    "model.layers.11.self_attn.v_proj",
+    "model.layers.13.self_attn.k_proj",
+    "model.layers.10.self_attn.o_proj",
+    "model.layers.15.mlp.up_proj",
+    "visual.blocks.15.mlp.fc1",
+    "model.layers.22.mlp.down_proj",
+    "model.layers.24.mlp.up_proj",
+    "visual.blocks.15.mlp.fc2",
+    "model.layers.10.self_attn_text.o_proj",
+    "model.layers.15.self_attn_text.k_proj",
+    "visual.blocks.1.attn.qkv",
+    "model.layers.11.self_attn_text.o_proj",
+    "visual.blocks.10.mlp.fc1",
+    "model.layers.17.mlp.down_proj",
+    "visual.blocks.24.attn.qkv",
+    "model.layers.24.mlp.gate_proj",
+    "visual.blocks.7.attn.qkv",
+    "model.layers.5.self_attn.k_proj",
+    "model.layers.23.self_attn.q_proj",
+    "model.layers.0.mlp.up_proj",
+    "model.layers.22.self_attn_text.q_proj",
+    "visual.blocks.12.mlp.fc2",
+    "model.layers.3.mlp.gate_proj",
+    "model.layers.18.self_attn_text.v_proj",
+    "model.layers.12.self_attn_text.o_proj",
+    "model.layers.5.mlp.down_proj",
+    "model.layers.10.self_attn_text.k_proj",
+    "visual.blocks.24.attn.proj",
+    "model.layers.11.self_attn_text.q_proj",
+    "model.layers.25.self_attn.v_proj",
+    "model.layers.17.mlp.up_proj",
+    "visual.blocks.23.mlp.fc2",
+    "model.layers.22.self_attn.o_proj",
+    "model.layers.14.self_attn_text.o_proj",
+    "model.layers.19.mlp.up_proj",
+    "model.layers.14.self_attn.k_proj",
+    "visual.blocks.31.attn.qkv",
+    "model.layers.13.self_attn_text.v_proj",
+    "model.layers.16.mlp.down_proj",
+    "model.layers.16.self_attn_text.v_proj",
+    "model.layers.24.self_attn_text.k_proj",
+    "model.layers.26.self_attn_text.q_proj",
+    "visual.blocks.16.attn.proj",
+    "visual.blocks.22.attn.qkv",
+    "model.layers.27.self_attn_text.o_proj",
+    "visual.blocks.27.mlp.fc1",
+    "visual.blocks.12.attn.proj",
+    "visual.blocks.28.attn.proj",
+    "model.layers.21.self_attn_text.k_proj",
+    "visual.blocks.28.attn.qkv",
+    "visual.blocks.21.mlp.fc1",
+    "model.layers.27.mlp.up_proj",
+    "model.layers.15.self_attn.v_proj",
+    "model.layers.24.self_attn.k_proj",
+    "model.layers.2.self_attn_text.q_proj",
+    "model.layers.15.self_attn.q_proj",
+    "visual.blocks.29.mlp.fc2",
+    "visual.blocks.13.attn.qkv",
+    "visual.blocks.24.mlp.fc1",
+    "model.layers.11.self_attn.o_proj",
+    "model.layers.2.self_attn_text.o_proj",
+    "visual.blocks.7.attn.proj",
+    "model.layers.6.self_attn.o_proj",
+    "model.layers.9.self_attn_text.q_proj",
+    "model.layers.0.self_attn.o_proj",
+    "model.layers.9.mlp.gate_proj",
+    "visual.blocks.0.attn.qkv",
+    "model.layers.2.self_attn_text.v_proj",
+    "model.layers.8.mlp.up_proj",
+    "visual.blocks.8.attn.proj",
+    "visual.blocks.18.attn.proj",
+    "model.layers.4.self_attn_text.v_proj",
+    "model.layers.17.self_attn.o_proj",
+    "visual.blocks.22.attn.proj",
+    "model.layers.9.self_attn.o_proj",
+    "model.layers.26.self_attn.q_proj",
+    "visual.blocks.11.mlp.fc2",
+    "model.layers.22.mlp.up_proj",
+    "model.layers.18.mlp.up_proj",
+    "model.layers.14.self_attn_text.k_proj",
+    "visual.blocks.9.mlp.fc2",
+    "visual.blocks.11.attn.proj",
+    "model.layers.17.self_attn.k_proj",
+    "model.layers.8.self_attn.k_proj",
+    "model.layers.12.self_attn_text.v_proj",
+    "model.layers.26.mlp.down_proj",
+    "model.layers.14.self_attn.v_proj",
+    "model.layers.22.self_attn_text.o_proj",
+    "model.layers.0.self_attn_text.v_proj",
+    "model.layers.7.mlp.gate_proj",
+    "model.layers.22.self_attn.v_proj",
+    "model.layers.24.mlp.down_proj",
+    "model.layers.20.self_attn_text.q_proj",
+    "model.layers.2.self_attn.o_proj",
+    "model.layers.11.self_attn_text.k_proj",
+    "model.layers.24.self_attn.q_proj",
+    "model.layers.18.self_attn_text.q_proj",
+    "model.layers.6.self_attn_text.v_proj",
+    "model.layers.0.self_attn.q_proj",
+    "model.layers.25.self_attn.q_proj",
+    "model.layers.19.self_attn_text.q_proj",
+    "visual.blocks.20.mlp.fc2",
+    "model.layers.13.self_attn_text.k_proj",
+    "model.layers.25.mlp.up_proj",
+    "model.layers.20.self_attn_text.v_proj",
+    "visual.blocks.8.attn.qkv",
+    "visual.blocks.16.mlp.fc1",
+    "model.layers.25.self_attn.k_proj",
+    "model.layers.22.self_attn_text.k_proj",
+    "model.layers.16.self_attn.k_proj",
+    "model.layers.24.self_attn.o_proj",
+    "model.layers.15.self_attn.k_proj",
+    "visual.blocks.1.mlp.fc2",
+    "model.layers.6.self_attn.q_proj",
+    "model.layers.15.mlp.down_proj",
+    "visual.blocks.2.attn.qkv",
+    "model.layers.14.self_attn.q_proj",
+    "model.layers.4.self_attn.q_proj",
+    "visual.blocks.19.mlp.fc2",
+    "model.layers.7.self_attn.k_proj"
+  ],
+  "task_type": "CAUSAL_LM",
+  "use_dora": false,
+  "use_rslora": false
+}

adapter_model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:845f34c1f221b697726779b3fd71e1029e7d68df2e0d70cd0bb291bb74d0558a
+size 133350944

added_tokens.json ADDED Viewed

	@@ -0,0 +1,16 @@

+{
+  "<|box_end|>": 151649,
+  "<|box_start|>": 151648,
+  "<|endoftext|>": 151643,
+  "<|im_end|>": 151645,
+  "<|im_start|>": 151644,
+  "<|image_pad|>": 151655,
+  "<|object_ref_end|>": 151647,
+  "<|object_ref_start|>": 151646,
+  "<|quad_end|>": 151651,
+  "<|quad_start|>": 151650,
+  "<|video_pad|>": 151656,
+  "<|vision_end|>": 151653,
+  "<|vision_pad|>": 151654,
+  "<|vision_start|>": 151652
+}

all_results.json ADDED Viewed

	@@ -0,0 +1,8 @@

+{
+    "epoch": 24.608,
+    "total_flos": 1.324081921088553e+17,
+    "train_loss": 0.672794044127147,
+    "train_runtime": 33908.1665,
+    "train_samples_per_second": 0.369,
+    "train_steps_per_second": 0.046
+}

chat_template.json ADDED Viewed

	@@ -0,0 +1,3 @@

+{
+  "chat_template": "{% set image_count = namespace(value=0) %}{% set video_count = namespace(value=0) %}{% for message in messages %}{% if loop.first and message['role'] != 'system' %}<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n{% endif %}<|im_start|>{{ message['role'] }}\n{% if message['content'] is string %}{{ message['content'] }}<|im_end|>\n{% else %}{% for content in message['content'] %}{% if content['type'] == 'image' or 'image' in content or 'image_url' in content %}{% set image_count.value = image_count.value + 1 %}{% if add_vision_id %}Picture {{ image_count.value }}: {% endif %}<|vision_start|><|image_pad|><|vision_end|>{% elif content['type'] == 'video' or 'video' in content %}{% set video_count.value = video_count.value + 1 %}{% if add_vision_id %}Video {{ video_count.value }}: {% endif %}<|vision_start|><|video_pad|><|vision_end|>{% elif 'text' in content %}{{ content['text'] }}{% endif %}{% endfor %}<|im_end|>\n{% endif %}{% endfor %}{% if add_generation_prompt %}<|im_start|>assistant\n{% endif %}"
+}

checkpoint-1000/README.md ADDED Viewed

	@@ -0,0 +1,202 @@

+---
+base_model: saim1212/penguin2
+library_name: peft
+---
+# Model Card for Model ID
+<!-- Provide a quick summary of what the model is/does. -->
+## Model Details
+### Model Description
+<!-- Provide a longer summary of what this model is. -->
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+### Model Sources [optional]
+<!-- Provide the basic links for the model. -->
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+## Uses
+<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
+### Direct Use
+<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
+[More Information Needed]
+### Downstream Use [optional]
+<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
+[More Information Needed]
+### Out-of-Scope Use
+<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
+[More Information Needed]
+## Bias, Risks, and Limitations
+<!-- This section is meant to convey both technical and sociotechnical limitations. -->
+[More Information Needed]
+### Recommendations
+<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+## How to Get Started with the Model
+Use the code below to get started with the model.
+[More Information Needed]
+## Training Details
+### Training Data
+<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
+[More Information Needed]
+### Training Procedure
+<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
+#### Preprocessing [optional]
+[More Information Needed]
+#### Training Hyperparameters
+- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
+#### Speeds, Sizes, Times [optional]
+<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
+[More Information Needed]
+## Evaluation
+<!-- This section describes the evaluation protocols and provides the results. -->
+### Testing Data, Factors & Metrics
+#### Testing Data
+<!-- This should link to a Dataset Card if possible. -->
+[More Information Needed]
+#### Factors
+<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
+[More Information Needed]
+#### Metrics
+<!-- These are the evaluation metrics being used, ideally with a description of why. -->
+[More Information Needed]
+### Results
+[More Information Needed]
+#### Summary
+## Model Examination [optional]
+<!-- Relevant interpretability work for the model goes here -->
+[More Information Needed]
+## Environmental Impact
+<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+## Technical Specifications [optional]
+### Model Architecture and Objective
+[More Information Needed]
+### Compute Infrastructure
+[More Information Needed]
+#### Hardware
+[More Information Needed]
+#### Software
+[More Information Needed]
+## Citation [optional]
+<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
+**BibTeX:**
+[More Information Needed]
+**APA:**
+[More Information Needed]
+## Glossary [optional]
+<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
+[More Information Needed]
+## More Information [optional]
+[More Information Needed]
+## Model Card Authors [optional]
+[More Information Needed]
+## Model Card Contact
+[More Information Needed]
+### Framework versions
+- PEFT 0.12.0

checkpoint-1000/adapter_config.json ADDED Viewed

	@@ -0,0 +1,463 @@

+{
+  "alpha_pattern": {},
+  "auto_mapping": null,
+  "base_model_name_or_path": "saim1212/penguin2",
+  "bias": "none",
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layer_replication": null,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 32,
+  "lora_dropout": 0.0,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 16,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "model.layers.26.self_attn.o_proj",
+    "model.layers.19.self_attn.o_proj",
+    "visual.blocks.22.mlp.fc1",
+    "model.layers.1.self_attn.o_proj",
+    "model.layers.9.mlp.up_proj",
+    "model.layers.23.self_attn_text.q_proj",
+    "model.layers.24.self_attn_text.o_proj",
+    "model.layers.15.self_attn_text.o_proj",
+    "model.layers.27.self_attn.v_proj",
+    "model.layers.8.self_attn_text.k_proj",
+    "visual.blocks.10.attn.proj",
+    "visual.blocks.28.mlp.fc2",
+    "model.layers.3.self_attn_text.k_proj",
+    "model.layers.12.self_attn.v_proj",
+    "model.layers.18.self_attn_text.o_proj",
+    "visual.blocks.5.mlp.fc2",
+    "model.layers.13.self_attn.q_proj",
+    "visual.blocks.7.mlp.fc1",
+    "model.layers.27.mlp.down_proj",
+    "visual.blocks.1.mlp.fc1",
+    "model.layers.12.mlp.up_proj",
+    "model.layers.5.self_attn.o_proj",
+    "model.layers.15.self_attn_text.q_proj",
+    "model.layers.2.self_attn_text.k_proj",
+    "model.layers.3.self_attn_text.q_proj",
+    "model.layers.12.mlp.down_proj",
+    "model.layers.14.self_attn_text.q_proj",
+    "model.layers.19.self_attn_text.o_proj",
+    "visual.blocks.23.attn.proj",
+    "model.layers.20.self_attn.o_proj",
+    "model.layers.5.self_attn_text.k_proj",
+    "model.layers.26.mlp.gate_proj",
+    "model.layers.8.self_attn.q_proj",
+    "model.layers.20.mlp.gate_proj",
+    "model.layers.16.self_attn_text.k_proj",
+    "model.layers.20.self_attn.k_proj",
+    "visual.blocks.9.attn.qkv",
+    "model.layers.4.self_attn_text.k_proj",
+    "model.layers.4.mlp.gate_proj",
+    "model.layers.6.self_attn.v_proj",
+    "model.layers.1.self_attn_text.o_proj",
+    "model.layers.16.mlp.up_proj",
+    "visual.blocks.16.mlp.fc2",
+    "model.layers.10.self_attn.v_proj",
+    "model.layers.17.self_attn_text.o_proj",
+    "model.layers.17.self_attn.v_proj",
+    "visual.blocks.9.mlp.fc1",
+    "model.layers.25.mlp.gate_proj",
+    "model.layers.25.self_attn_text.q_proj",
+    "model.layers.9.self_attn.k_proj",
+    "model.layers.18.self_attn.q_proj",
+    "visual.blocks.9.attn.proj",
+    "visual.blocks.14.mlp.fc1",
+    "model.layers.13.self_attn.o_proj",
+    "model.layers.24.self_attn.v_proj",
+    "model.layers.11.mlp.down_proj",
+    "model.layers.27.self_attn_text.v_proj",
+    "model.layers.16.self_attn_text.o_proj",
+    "model.layers.25.mlp.down_proj",
+    "visual.blocks.4.mlp.fc2",
+    "model.layers.27.self_attn.q_proj",
+    "visual.blocks.5.attn.proj",
+    "model.layers.19.mlp.gate_proj",
+    "model.layers.14.self_attn.o_proj",
+    "model.layers.19.self_attn.v_proj",
+    "model.layers.13.mlp.gate_proj",
+    "model.layers.18.self_attn.o_proj",
+    "model.layers.18.self_attn.k_proj",
+    "model.layers.26.self_attn.k_proj",
+    "model.layers.9.self_attn_text.o_proj",
+    "model.layers.26.self_attn.v_proj",
+    "model.layers.27.self_attn.k_proj",
+    "model.layers.25.self_attn.o_proj",
+    "visual.blocks.20.attn.proj",
+    "visual.blocks.26.attn.qkv",
+    "model.layers.23.self_attn_text.v_proj",
+    "visual.blocks.14.attn.qkv",
+    "model.layers.19.self_attn.k_proj",
+    "model.layers.13.self_attn_text.q_proj",
+    "model.layers.13.mlp.down_proj",
+    "model.layers.21.self_attn.k_proj",
+    "model.layers.0.self_attn_text.o_proj",
+    "model.layers.6.self_attn.k_proj",
+    "visual.blocks.31.attn.proj",
+    "model.layers.16.self_attn.v_proj",
+    "model.layers.20.mlp.up_proj",
+    "visual.blocks.3.mlp.fc2",
+    "model.layers.3.self_attn.k_proj",
+    "visual.blocks.12.attn.qkv",
+    "model.layers.10.self_attn.k_proj",
+    "model.layers.12.self_attn_text.k_proj",
+    "visual.blocks.22.mlp.fc2",
+    "model.layers.11.self_attn.q_proj",
+    "visual.blocks.19.mlp.fc1",
+    "visual.blocks.2.mlp.fc1",
+    "model.layers.26.self_attn_text.k_proj",
+    "model.layers.5.self_attn.q_proj",
+    "model.layers.7.self_attn.q_proj",
+    "visual.blocks.27.attn.proj",
+    "model.layers.8.self_attn_text.v_proj",
+    "model.layers.12.mlp.gate_proj",
+    "model.layers.27.self_attn_text.q_proj",
+    "visual.blocks.1.attn.proj",
+    "model.layers.4.self_attn_text.o_proj",
+    "visual.blocks.6.mlp.fc2",
+    "model.layers.26.self_attn_text.v_proj",
+    "visual.blocks.6.mlp.fc1",
+    "visual.blocks.31.mlp.fc1",
+    "model.layers.8.mlp.gate_proj",
+    "visual.blocks.18.mlp.fc1",
+    "visual.blocks.14.attn.proj",
+    "model.layers.15.self_attn.o_proj",
+    "model.layers.16.self_attn.q_proj",
+    "visual.blocks.7.mlp.fc2",
+    "model.layers.11.self_attn.k_proj",
+    "model.layers.7.mlp.up_proj",
+    "model.layers.10.self_attn_text.v_proj",
+    "model.layers.23.self_attn.k_proj",
+    "visual.blocks.11.attn.qkv",
+    "visual.blocks.5.attn.qkv",
+    "model.layers.15.self_attn_text.v_proj",
+    "visual.blocks.21.attn.proj",
+    "model.layers.10.mlp.gate_proj",
+    "model.layers.5.self_attn.v_proj",
+    "model.layers.6.mlp.down_proj",
+    "model.layers.9.self_attn_text.v_proj",
+    "model.layers.4.self_attn_text.q_proj",
+    "model.layers.21.self_attn.v_proj",
+    "model.layers.8.mlp.down_proj",
+    "visual.blocks.8.mlp.fc2",
+    "model.layers.23.self_attn_text.o_proj",
+    "model.layers.1.self_attn.q_proj",
+    "model.layers.20.self_attn_text.k_proj",
+    "model.layers.8.self_attn.o_proj",
+    "model.layers.20.self_attn_text.o_proj",
+    "model.layers.6.mlp.up_proj",
+    "model.layers.1.mlp.down_proj",
+    "model.layers.18.mlp.down_proj",
+    "model.layers.18.mlp.gate_proj",
+    "model.layers.11.mlp.up_proj",
+    "visual.blocks.2.attn.proj",
+    "model.layers.0.mlp.down_proj",
+    "visual.blocks.0.mlp.fc2",
+    "visual.blocks.25.attn.proj",
+    "model.layers.0.self_attn.k_proj",
+    "model.layers.27.self_attn_text.k_proj",
+    "visual.blocks.12.mlp.fc1",
+    "model.layers.9.self_attn.q_proj",
+    "visual.blocks.17.attn.qkv",
+    "model.layers.17.self_attn_text.q_proj",
+    "model.layers.15.mlp.gate_proj",
+    "visual.blocks.21.attn.qkv",
+    "model.layers.16.mlp.gate_proj",
+    "model.layers.19.self_attn_text.v_proj",
+    "model.layers.24.self_attn_text.q_proj",
+    "visual.blocks.8.mlp.fc1",
+    "visual.blocks.30.mlp.fc2",
+    "model.layers.10.self_attn.q_proj",
+    "model.layers.14.mlp.gate_proj",
+    "model.layers.5.self_attn_text.q_proj",
+    "visual.blocks.26.mlp.fc2",
+    "model.layers.1.self_attn_text.k_proj",
+    "visual.blocks.29.mlp.fc1",
+    "model.layers.18.self_attn.v_proj",
+    "model.layers.23.mlp.gate_proj",
+    "visual.blocks.13.mlp.fc1",
+    "model.layers.5.self_attn_text.o_proj",
+    "model.layers.14.mlp.up_proj",
+    "visual.blocks.6.attn.qkv",
+    "model.layers.23.mlp.up_proj",
+    "model.layers.14.self_attn_text.v_proj",
+    "visual.blocks.4.mlp.fc1",
+    "visual.blocks.20.attn.qkv",
+    "model.layers.6.self_attn_text.q_proj",
+    "visual.blocks.25.attn.qkv",
+    "visual.blocks.15.attn.qkv",
+    "model.layers.1.self_attn.k_proj",
+    "model.layers.19.self_attn.q_proj",
+    "model.layers.4.self_attn.o_proj",
+    "model.layers.8.self_attn.v_proj",
+    "visual.blocks.23.attn.qkv",
+    "model.layers.3.self_attn.q_proj",
+    "model.layers.5.mlp.gate_proj",
+    "model.layers.1.mlp.up_proj",
+    "model.layers.11.mlp.gate_proj",
+    "visual.blocks.24.mlp.fc2",
+    "model.layers.1.mlp.gate_proj",
+    "visual.blocks.20.mlp.fc1",
+    "visual.blocks.13.mlp.fc2",
+    "visual.blocks.14.mlp.fc2",
+    "visual.blocks.3.attn.qkv",
+    "model.layers.12.self_attn_text.q_proj",
+    "model.layers.25.self_attn_text.o_proj",
+    "visual.blocks.19.attn.proj",
+    "visual.blocks.23.mlp.fc1",
+    "model.layers.14.mlp.down_proj",
+    "visual.blocks.25.mlp.fc2",
+    "model.layers.0.self_attn_text.q_proj",
+    "model.layers.23.self_attn_text.k_proj",
+    "model.layers.12.self_attn.k_proj",
+    "model.layers.4.self_attn.k_proj",
+    "visual.blocks.28.mlp.fc1",
+    "model.layers.21.self_attn_text.v_proj",
+    "model.layers.10.mlp.down_proj",
+    "visual.blocks.18.attn.qkv",
+    "model.layers.5.mlp.up_proj",
+    "model.layers.23.self_attn.v_proj",
+    "visual.blocks.31.mlp.fc2",
+    "model.layers.3.mlp.down_proj",
+    "visual.blocks.2.mlp.fc2",
+    "visual.blocks.10.mlp.fc2",
+    "model.layers.27.self_attn.o_proj",
+    "model.layers.11.self_attn_text.v_proj",
+    "model.layers.17.self_attn_text.k_proj",
+    "visual.blocks.25.mlp.fc1",
+    "visual.blocks.3.attn.proj",
+    "model.layers.2.self_attn.q_proj",
+    "model.layers.26.self_attn_text.o_proj",
+    "model.layers.9.self_attn.v_proj",
+    "model.layers.7.self_attn_text.o_proj",
+    "model.layers.20.self_attn.q_proj",
+    "model.layers.21.mlp.down_proj",
+    "model.layers.17.self_attn.q_proj",
+    "visual.blocks.17.attn.proj",
+    "model.layers.7.mlp.down_proj",
+    "model.layers.21.mlp.gate_proj",
+    "model.layers.20.mlp.down_proj",
+    "model.layers.7.self_attn.o_proj",
+    "model.layers.6.self_attn_text.o_proj",
+    "model.layers.5.self_attn_text.v_proj",
+    "model.layers.22.mlp.gate_proj",
+    "model.layers.7.self_attn_text.k_proj",
+    "model.layers.19.mlp.down_proj",
+    "model.layers.6.self_attn_text.k_proj",
+    "model.layers.9.self_attn_text.k_proj",
+    "visual.blocks.15.attn.proj",
+    "visual.blocks.6.attn.proj",
+    "model.layers.22.self_attn.k_proj",
+    "visual.blocks.13.attn.proj",
+    "model.layers.0.mlp.gate_proj",
+    "model.layers.13.self_attn.v_proj",
+    "model.layers.22.self_attn.q_proj",
+    "model.layers.19.self_attn_text.k_proj",
+    "model.layers.10.self_attn_text.q_proj",
+    "model.layers.2.mlp.down_proj",
+    "visual.blocks.10.attn.qkv",
+    "model.layers.4.mlp.up_proj",
+    "visual.blocks.16.attn.qkv",
+    "model.layers.13.self_attn_text.o_proj",
+    "model.layers.21.self_attn.o_proj",
+    "model.layers.13.mlp.up_proj",
+    "model.layers.7.self_attn_text.q_proj",
+    "visual.blocks.0.attn.proj",
+    "visual.blocks.17.mlp.fc1",
+    "model.layers.25.self_attn_text.v_proj",
+    "model.layers.3.self_attn.o_proj",
+    "visual.blocks.30.attn.proj",
+    "model.layers.16.self_attn.o_proj",
+    "model.layers.23.self_attn.o_proj",
+    "model.layers.4.mlp.down_proj",
+    "model.layers.17.self_attn_text.v_proj",
+    "model.layers.12.self_attn.q_proj",
+    "visual.blocks.3.mlp.fc1",
+    "visual.blocks.26.attn.proj",
+    "model.layers.21.self_attn.q_proj",
+    "visual.blocks.27.attn.qkv",
+    "model.layers.17.mlp.gate_proj",
+    "model.layers.23.mlp.down_proj",
+    "visual.blocks.18.mlp.fc2",
+    "model.layers.2.self_attn.k_proj",
+    "model.layers.9.mlp.down_proj",
+    "model.layers.6.mlp.gate_proj",
+    "visual.blocks.17.mlp.fc2",
+    "model.layers.0.self_attn.v_proj",
+    "visual.blocks.30.attn.qkv",
+    "model.layers.3.self_attn_text.o_proj",
+    "visual.blocks.4.attn.qkv",
+    "model.layers.10.mlp.up_proj",
+    "model.layers.2.self_attn.v_proj",
+    "visual.blocks.5.mlp.fc1",
+    "model.layers.0.self_attn_text.k_proj",
+    "model.layers.25.self_attn_text.k_proj",
+    "visual.blocks.19.attn.qkv",
+    "model.layers.2.mlp.gate_proj",
+    "model.layers.16.self_attn_text.q_proj",
+    "visual.blocks.0.mlp.fc1",
+    "model.layers.3.mlp.up_proj",
+    "visual.blocks.30.mlp.fc1",
+    "model.layers.2.mlp.up_proj",
+    "visual.blocks.29.attn.qkv",
+    "model.layers.27.mlp.gate_proj",
+    "model.layers.21.self_attn_text.o_proj",
+    "model.layers.21.mlp.up_proj",
+    "model.layers.1.self_attn.v_proj",
+    "visual.blocks.29.attn.proj",
+    "model.layers.8.self_attn_text.q_proj",
+    "model.layers.3.self_attn_text.v_proj",
+    "model.layers.1.self_attn_text.v_proj",
+    "visual.blocks.21.mlp.fc2",
+    "model.layers.3.self_attn.v_proj",
+    "visual.blocks.4.attn.proj",
+    "model.layers.4.self_attn.v_proj",
+    "model.layers.7.self_attn_text.v_proj",
+    "model.layers.22.self_attn_text.v_proj",
+    "model.layers.20.self_attn.v_proj",
+    "model.layers.21.self_attn_text.q_proj",
+    "model.layers.12.self_attn.o_proj",
+    "visual.blocks.27.mlp.fc2",
+    "model.layers.18.self_attn_text.k_proj",
+    "model.layers.24.self_attn_text.v_proj",
+    "model.layers.26.mlp.up_proj",
+    "model.layers.8.self_attn_text.o_proj",
+    "visual.blocks.11.mlp.fc1",
+    "model.layers.1.self_attn_text.q_proj",
+    "model.layers.7.self_attn.v_proj",
+    "visual.blocks.26.mlp.fc1",
+    "model.layers.11.self_attn.v_proj",
+    "model.layers.13.self_attn.k_proj",
+    "model.layers.10.self_attn.o_proj",
+    "model.layers.15.mlp.up_proj",
+    "visual.blocks.15.mlp.fc1",
+    "model.layers.22.mlp.down_proj",
+    "model.layers.24.mlp.up_proj",
+    "visual.blocks.15.mlp.fc2",
+    "model.layers.10.self_attn_text.o_proj",
+    "model.layers.15.self_attn_text.k_proj",
+    "visual.blocks.1.attn.qkv",
+    "model.layers.11.self_attn_text.o_proj",
+    "visual.blocks.10.mlp.fc1",
+    "model.layers.17.mlp.down_proj",
+    "visual.blocks.24.attn.qkv",
+    "model.layers.24.mlp.gate_proj",
+    "visual.blocks.7.attn.qkv",
+    "model.layers.5.self_attn.k_proj",
+    "model.layers.23.self_attn.q_proj",
+    "model.layers.0.mlp.up_proj",
+    "model.layers.22.self_attn_text.q_proj",
+    "visual.blocks.12.mlp.fc2",
+    "model.layers.3.mlp.gate_proj",
+    "model.layers.18.self_attn_text.v_proj",
+    "model.layers.12.self_attn_text.o_proj",
+    "model.layers.5.mlp.down_proj",
+    "model.layers.10.self_attn_text.k_proj",
+    "visual.blocks.24.attn.proj",
+    "model.layers.11.self_attn_text.q_proj",
+    "model.layers.25.self_attn.v_proj",
+    "model.layers.17.mlp.up_proj",
+    "visual.blocks.23.mlp.fc2",
+    "model.layers.22.self_attn.o_proj",
+    "model.layers.14.self_attn_text.o_proj",
+    "model.layers.19.mlp.up_proj",
+    "model.layers.14.self_attn.k_proj",
+    "visual.blocks.31.attn.qkv",
+    "model.layers.13.self_attn_text.v_proj",
+    "model.layers.16.mlp.down_proj",
+    "model.layers.16.self_attn_text.v_proj",
+    "model.layers.24.self_attn_text.k_proj",
+    "model.layers.26.self_attn_text.q_proj",
+    "visual.blocks.16.attn.proj",
+    "visual.blocks.22.attn.qkv",
+    "model.layers.27.self_attn_text.o_proj",
+    "visual.blocks.27.mlp.fc1",
+    "visual.blocks.12.attn.proj",
+    "visual.blocks.28.attn.proj",
+    "model.layers.21.self_attn_text.k_proj",
+    "visual.blocks.28.attn.qkv",
+    "visual.blocks.21.mlp.fc1",
+    "model.layers.27.mlp.up_proj",
+    "model.layers.15.self_attn.v_proj",
+    "model.layers.24.self_attn.k_proj",
+    "model.layers.2.self_attn_text.q_proj",
+    "model.layers.15.self_attn.q_proj",
+    "visual.blocks.29.mlp.fc2",
+    "visual.blocks.13.attn.qkv",
+    "visual.blocks.24.mlp.fc1",
+    "model.layers.11.self_attn.o_proj",
+    "model.layers.2.self_attn_text.o_proj",
+    "visual.blocks.7.attn.proj",
+    "model.layers.6.self_attn.o_proj",
+    "model.layers.9.self_attn_text.q_proj",
+    "model.layers.0.self_attn.o_proj",
+    "model.layers.9.mlp.gate_proj",
+    "visual.blocks.0.attn.qkv",
+    "model.layers.2.self_attn_text.v_proj",
+    "model.layers.8.mlp.up_proj",
+    "visual.blocks.8.attn.proj",
+    "visual.blocks.18.attn.proj",
+    "model.layers.4.self_attn_text.v_proj",
+    "model.layers.17.self_attn.o_proj",
+    "visual.blocks.22.attn.proj",
+    "model.layers.9.self_attn.o_proj",
+    "model.layers.26.self_attn.q_proj",
+    "visual.blocks.11.mlp.fc2",
+    "model.layers.22.mlp.up_proj",
+    "model.layers.18.mlp.up_proj",
+    "model.layers.14.self_attn_text.k_proj",
+    "visual.blocks.9.mlp.fc2",
+    "visual.blocks.11.attn.proj",
+    "model.layers.17.self_attn.k_proj",
+    "model.layers.8.self_attn.k_proj",
+    "model.layers.12.self_attn_text.v_proj",
+    "model.layers.26.mlp.down_proj",
+    "model.layers.14.self_attn.v_proj",
+    "model.layers.22.self_attn_text.o_proj",
+    "model.layers.0.self_attn_text.v_proj",
+    "model.layers.7.mlp.gate_proj",
+    "model.layers.22.self_attn.v_proj",
+    "model.layers.24.mlp.down_proj",
+    "model.layers.20.self_attn_text.q_proj",
+    "model.layers.2.self_attn.o_proj",
+    "model.layers.11.self_attn_text.k_proj",
+    "model.layers.24.self_attn.q_proj",
+    "model.layers.18.self_attn_text.q_proj",
+    "model.layers.6.self_attn_text.v_proj",
+    "model.layers.0.self_attn.q_proj",
+    "model.layers.25.self_attn.q_proj",
+    "model.layers.19.self_attn_text.q_proj",
+    "visual.blocks.20.mlp.fc2",
+    "model.layers.13.self_attn_text.k_proj",
+    "model.layers.25.mlp.up_proj",
+    "model.layers.20.self_attn_text.v_proj",
+    "visual.blocks.8.attn.qkv",
+    "visual.blocks.16.mlp.fc1",
+    "model.layers.25.self_attn.k_proj",
+    "model.layers.22.self_attn_text.k_proj",
+    "model.layers.16.self_attn.k_proj",
+    "model.layers.24.self_attn.o_proj",
+    "model.layers.15.self_attn.k_proj",
+    "visual.blocks.1.mlp.fc2",
+    "model.layers.6.self_attn.q_proj",
+    "model.layers.15.mlp.down_proj",
+    "visual.blocks.2.attn.qkv",
+    "model.layers.14.self_attn.q_proj",
+    "model.layers.4.self_attn.q_proj",
+    "visual.blocks.19.mlp.fc2",
+    "model.layers.7.self_attn.k_proj"
+  ],
+  "task_type": "CAUSAL_LM",
+  "use_dora": false,
+  "use_rslora": false
+}

checkpoint-1000/adapter_model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d448e42aa2230d31621b33f5ddf10de783f06ca70e9b34e23d996b4d244955bb
+size 133350944

checkpoint-1000/added_tokens.json ADDED Viewed

	@@ -0,0 +1,16 @@

+{
+  "<|box_end|>": 151649,
+  "<|box_start|>": 151648,
+  "<|endoftext|>": 151643,
+  "<|im_end|>": 151645,
+  "<|im_start|>": 151644,
+  "<|image_pad|>": 151655,
+  "<|object_ref_end|>": 151647,
+  "<|object_ref_start|>": 151646,
+  "<|quad_end|>": 151651,
+  "<|quad_start|>": 151650,
+  "<|video_pad|>": 151656,
+  "<|vision_end|>": 151653,
+  "<|vision_pad|>": 151654,
+  "<|vision_start|>": 151652
+}

checkpoint-1000/chat_template.json ADDED Viewed

	@@ -0,0 +1,3 @@

+{
+  "chat_template": "{% set image_count = namespace(value=0) %}{% set video_count = namespace(value=0) %}{% for message in messages %}{% if loop.first and message['role'] != 'system' %}<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n{% endif %}<|im_start|>{{ message['role'] }}\n{% if message['content'] is string %}{{ message['content'] }}<|im_end|>\n{% else %}{% for content in message['content'] %}{% if content['type'] == 'image' or 'image' in content or 'image_url' in content %}{% set image_count.value = image_count.value + 1 %}{% if add_vision_id %}Picture {{ image_count.value }}: {% endif %}<|vision_start|><|image_pad|><|vision_end|>{% elif content['type'] == 'video' or 'video' in content %}{% set video_count.value = video_count.value + 1 %}{% if add_vision_id %}Video {{ video_count.value }}: {% endif %}<|vision_start|><|video_pad|><|vision_end|>{% elif 'text' in content %}{{ content['text'] }}{% endif %}{% endfor %}<|im_end|>\n{% endif %}{% endfor %}{% if add_generation_prompt %}<|im_start|>assistant\n{% endif %}"
+}

checkpoint-1000/merges.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

checkpoint-1000/optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:35293326f76b61bf8e57bcdc349c1db4e135861ccbdfc311558a5d388c64ed9f
+size 267205066

checkpoint-1000/preprocessor_config.json ADDED Viewed

	@@ -0,0 +1,29 @@

+{
+  "do_convert_rgb": true,
+  "do_normalize": true,
+  "do_rescale": true,
+  "do_resize": true,
+  "image_mean": [
+    0.48145466,
+    0.4578275,
+    0.40821073
+  ],
+  "image_processor_type": "Qwen2VLImageProcessor",
+  "image_std": [
+    0.26862954,
+    0.26130258,
+    0.27577711
+  ],
+  "max_pixels": 12845056,
+  "merge_size": 2,
+  "min_pixels": 3136,
+  "patch_size": 14,
+  "processor_class": "Qwen2VLProcessor",
+  "resample": 3,
+  "rescale_factor": 0.00392156862745098,
+  "size": {
+    "longest_edge": 12845056,
+    "shortest_edge": 3136
+  },
+  "temporal_patch_size": 2
+}

checkpoint-1000/rng_state_0.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:9281be04f2316562db36bd289b226e07237cabe0e64c821e5b8cfedcb5f17669
+size 14512

checkpoint-1000/rng_state_1.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:658f78ca761388a441a3eaf439ede5c9b4dc1f731762e906f81abe29d7755ff1
+size 14512

checkpoint-1000/scaler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:9cb47022c4ef6f7ab1976ffb75cb22e35906b122b7a825388edd1a2193daa457
+size 988

checkpoint-1000/scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:537ce0ef560212676d0f3429491464ff9ae6f29d92c07b4446b30e769d7c02f4
+size 1064

checkpoint-1000/special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,31 @@

+{
+  "additional_special_tokens": [
+    "<|im_start|>",
+    "<|im_end|>",
+    "<|object_ref_start|>",
+    "<|object_ref_end|>",
+    "<|box_start|>",
+    "<|box_end|>",
+    "<|quad_start|>",
+    "<|quad_end|>",
+    "<|vision_start|>",
+    "<|vision_end|>",
+    "<|vision_pad|>",
+    "<|image_pad|>",
+    "<|video_pad|>"
+  ],
+  "eos_token": {
+    "content": "<|im_end|>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "pad_token": {
+    "content": "<|endoftext|>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  }
+}

checkpoint-1000/tokenizer.json ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:091aa7594dc2fcfbfa06b9e3c22a5f0562ac14f30375c13af7309407a0e67b8a
+size 11420371

checkpoint-1000/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,148 @@

+{
+  "add_prefix_space": false,
+  "added_tokens_decoder": {
+    "151643": {
+      "content": "<|endoftext|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151644": {
+      "content": "<|im_start|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151645": {
+      "content": "<|im_end|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151646": {
+      "content": "<|object_ref_start|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151647": {
+      "content": "<|object_ref_end|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151648": {
+      "content": "<|box_start|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151649": {
+      "content": "<|box_end|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151650": {
+      "content": "<|quad_start|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151651": {
+      "content": "<|quad_end|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151652": {
+      "content": "<|vision_start|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151653": {
+      "content": "<|vision_end|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151654": {
+      "content": "<|vision_pad|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151655": {
+      "content": "<|image_pad|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151656": {
+      "content": "<|video_pad|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "additional_special_tokens": [
+    "<|im_start|>",
+    "<|im_end|>",
+    "<|object_ref_start|>",
+    "<|object_ref_end|>",
+    "<|box_start|>",
+    "<|box_end|>",
+    "<|quad_start|>",
+    "<|quad_end|>",
+    "<|vision_start|>",
+    "<|vision_end|>",
+    "<|vision_pad|>",
+    "<|image_pad|>",
+    "<|video_pad|>"
+  ],
+  "bos_token": null,
+  "chat_template": "{% set image_count = namespace(value=0) %}{% set video_count = namespace(value=0) %}{% for message in messages %}{% if loop.first and message['role'] != 'system' %}<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n{% endif %}<|im_start|>{{ message['role'] }}\n{% if message['content'] is string %}{{ message['content'] }}<|im_end|>\n{% else %}{% for content in message['content'] %}{% if content['type'] == 'image' or 'image' in content or 'image_url' in content %}{% set image_count.value = image_count.value + 1 %}{% if add_vision_id %}Picture {{ image_count.value }}: {% endif %}<|vision_start|><|image_pad|><|vision_end|>{% elif content['type'] == 'video' or 'video' in content %}{% set video_count.value = video_count.value + 1 %}{% if add_vision_id %}Video {{ video_count.value }}: {% endif %}<|vision_start|><|video_pad|><|vision_end|>{% elif 'text' in content %}{{ content['text'] }}{% endif %}{% endfor %}<|im_end|>\n{% endif %}{% endfor %}{% if add_generation_prompt %}<|im_start|>assistant\n{% endif %}",
+  "clean_up_tokenization_spaces": false,
+  "eos_token": "<|im_end|>",
+  "errors": "replace",
+  "extra_special_tokens": {},
+  "max_length": null,
+  "model_max_length": 32768,
+  "pad_to_multiple_of": null,
+  "pad_token": "<|endoftext|>",
+  "pad_token_type_id": 0,
+  "padding_side": "right",
+  "processor_class": "Qwen2VLProcessor",
+  "split_special_tokens": false,
+  "tokenizer_class": "Qwen2Tokenizer",
+  "unk_token": null
+}

checkpoint-1000/trainer_state.json ADDED Viewed

	@@ -0,0 +1,733 @@

+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 15.88,
+  "eval_steps": 500,
+  "global_step": 1000,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.16,
+      "grad_norm": 155.76007080078125,
+      "learning_rate": 9.032258064516129e-07,
+      "loss": 12.094,
+      "step": 10
+    },
+    {
+      "epoch": 0.32,
+      "grad_norm": 37.119384765625,
+      "learning_rate": 2.1935483870967745e-06,
+      "loss": 7.0819,
+      "step": 20
+    },
+    {
+      "epoch": 0.48,
+      "grad_norm": 14.752822875976562,
+      "learning_rate": 3.4838709677419357e-06,
+      "loss": 4.4657,
+      "step": 30
+    },
+    {
+      "epoch": 0.64,
+      "grad_norm": 11.597710609436035,
+      "learning_rate": 4.774193548387097e-06,
+      "loss": 3.5378,
+      "step": 40
+    },
+    {
+      "epoch": 0.8,
+      "grad_norm": 15.393077850341797,
+      "learning_rate": 6.064516129032259e-06,
+      "loss": 2.8862,
+      "step": 50
+    },
+    {
+      "epoch": 0.96,
+      "grad_norm": 23.84307861328125,
+      "learning_rate": 7.35483870967742e-06,
+      "loss": 2.6138,
+      "step": 60
+    },
+    {
+      "epoch": 1.112,
+      "grad_norm": 13.163668632507324,
+      "learning_rate": 8.64516129032258e-06,
+      "loss": 2.3109,
+      "step": 70
+    },
+    {
+      "epoch": 1.272,
+      "grad_norm": 12.168913841247559,
+      "learning_rate": 9.935483870967742e-06,
+      "loss": 2.2949,
+      "step": 80
+    },
+    {
+      "epoch": 1.432,
+      "grad_norm": 10.725199699401855,
+      "learning_rate": 1.1225806451612904e-05,
+      "loss": 2.3399,
+      "step": 90
+    },
+    {
+      "epoch": 1.592,
+      "grad_norm": 8.531355857849121,
+      "learning_rate": 1.2516129032258067e-05,
+      "loss": 2.217,
+      "step": 100
+    },
+    {
+      "epoch": 1.752,
+      "grad_norm": 6.670936584472656,
+      "learning_rate": 1.3806451612903227e-05,
+      "loss": 2.1938,
+      "step": 110
+    },
+    {
+      "epoch": 1.912,
+      "grad_norm": 5.666457653045654,
+      "learning_rate": 1.5096774193548389e-05,
+      "loss": 2.0994,
+      "step": 120
+    },
+    {
+      "epoch": 2.064,
+      "grad_norm": 7.0824384689331055,
+      "learning_rate": 1.638709677419355e-05,
+      "loss": 2.0094,
+      "step": 130
+    },
+    {
+      "epoch": 2.224,
+      "grad_norm": 5.3269195556640625,
+      "learning_rate": 1.7677419354838713e-05,
+      "loss": 1.8313,
+      "step": 140
+    },
+    {
+      "epoch": 2.384,
+      "grad_norm": 3.4799787998199463,
+      "learning_rate": 1.896774193548387e-05,
+      "loss": 1.8772,
+      "step": 150
+    },
+    {
+      "epoch": 2.544,
+      "grad_norm": 4.512059211730957,
+      "learning_rate": 1.9999898566691428e-05,
+      "loss": 1.7948,
+      "step": 160
+    },
+    {
+      "epoch": 2.7039999999999997,
+      "grad_norm": 9.884415626525879,
+      "learning_rate": 1.9996348616949673e-05,
+      "loss": 1.7994,
+      "step": 170
+    },
+    {
+      "epoch": 2.864,
+      "grad_norm": 3.1838889122009277,
+      "learning_rate": 1.998772905933476e-05,
+      "loss": 1.8654,
+      "step": 180
+    },
+    {
+      "epoch": 3.016,
+      "grad_norm": 3.452301263809204,
+      "learning_rate": 1.9974044265220564e-05,
+      "loss": 1.6745,
+      "step": 190
+    },
+    {
+      "epoch": 3.176,
+      "grad_norm": 3.3805224895477295,
+      "learning_rate": 1.995530117479521e-05,
+      "loss": 1.5509,
+      "step": 200
+    },
+    {
+      "epoch": 3.336,
+      "grad_norm": 6.541603088378906,
+      "learning_rate": 1.993150929354139e-05,
+      "loss": 1.4749,
+      "step": 210
+    },
+    {
+      "epoch": 3.496,
+      "grad_norm": 2.95489764213562,
+      "learning_rate": 1.9902680687415704e-05,
+      "loss": 1.4165,
+      "step": 220
+    },
+    {
+      "epoch": 3.656,
+      "grad_norm": 3.144228458404541,
+      "learning_rate": 1.9868829976729444e-05,
+      "loss": 1.3226,
+      "step": 230
+    },
+    {
+      "epoch": 3.816,
+      "grad_norm": 3.747593641281128,
+      "learning_rate": 1.982997432873397e-05,
+      "loss": 1.5257,
+      "step": 240
+    },
+    {
+      "epoch": 3.976,
+      "grad_norm": 2.2221176624298096,
+      "learning_rate": 1.978613344891441e-05,
+      "loss": 1.4218,
+      "step": 250
+    },
+    {
+      "epoch": 4.128,
+      "grad_norm": 2.854719877243042,
+      "learning_rate": 1.9737329570996098e-05,
+      "loss": 1.2454,
+      "step": 260
+    },
+    {
+      "epoch": 4.288,
+      "grad_norm": 3.9374194145202637,
+      "learning_rate": 1.968358744566884e-05,
+      "loss": 1.2503,
+      "step": 270
+    },
+    {
+      "epoch": 4.448,
+      "grad_norm": 4.536250591278076,
+      "learning_rate": 1.9624934328034673e-05,
+      "loss": 1.2983,
+      "step": 280
+    },
+    {
+      "epoch": 4.608,
+      "grad_norm": 4.311966419219971,
+      "learning_rate": 1.9561399963785586e-05,
+      "loss": 1.2944,
+      "step": 290
+    },
+    {
+      "epoch": 4.768,
+      "grad_norm": 4.188143253326416,
+      "learning_rate": 1.9493016574118103e-05,
+      "loss": 1.2997,
+      "step": 300
+    },
+    {
+      "epoch": 4.928,
+      "grad_norm": 5.04379415512085,
+      "learning_rate": 1.9419818839392408e-05,
+      "loss": 1.2976,
+      "step": 310
+    },
+    {
+      "epoch": 5.08,
+      "grad_norm": 4.528952598571777,
+      "learning_rate": 1.9341843881544372e-05,
+      "loss": 1.1579,
+      "step": 320
+    },
+    {
+      "epoch": 5.24,
+      "grad_norm": 4.810428142547607,
+      "learning_rate": 1.9259131245259293e-05,
+      "loss": 1.13,
+      "step": 330
+    },
+    {
+      "epoch": 5.4,
+      "grad_norm": 3.7566370964050293,
+      "learning_rate": 1.917172287791698e-05,
+      "loss": 1.1387,
+      "step": 340
+    },
+    {
+      "epoch": 5.5600000000000005,
+      "grad_norm": 3.8142237663269043,
+      "learning_rate": 1.9079663108318304e-05,
+      "loss": 1.1176,
+      "step": 350
+    },
+    {
+      "epoch": 5.72,
+      "grad_norm": 4.0017619132995605,
+      "learning_rate": 1.8982998624204016e-05,
+      "loss": 1.1042,
+      "step": 360
+    },
+    {
+      "epoch": 5.88,
+      "grad_norm": 3.9953103065490723,
+      "learning_rate": 1.8881778448577274e-05,
+      "loss": 1.1386,
+      "step": 370
+    },
+    {
+      "epoch": 6.032,
+      "grad_norm": 3.269265651702881,
+      "learning_rate": 1.877605391484179e-05,
+      "loss": 0.9651,
+      "step": 380
+    },
+    {
+      "epoch": 6.192,
+      "grad_norm": 5.4509172439575195,
+      "learning_rate": 1.8665878640768332e-05,
+      "loss": 0.9487,
+      "step": 390
+    },
+    {
+      "epoch": 6.352,
+      "grad_norm": 3.8790087699890137,
+      "learning_rate": 1.855130850130267e-05,
+      "loss": 0.9193,
+      "step": 400
+    },
+    {
+      "epoch": 6.5120000000000005,
+      "grad_norm": 5.1756110191345215,
+      "learning_rate": 1.8432401600228823e-05,
+      "loss": 0.9112,
+      "step": 410
+    },
+    {
+      "epoch": 6.672,
+      "grad_norm": 4.771461009979248,
+      "learning_rate": 1.8309218240701973e-05,
+      "loss": 0.9371,
+      "step": 420
+    },
+    {
+      "epoch": 6.832,
+      "grad_norm": 4.88088846206665,
+      "learning_rate": 1.818182089466595e-05,
+      "loss": 1.0264,
+      "step": 430
+    },
+    {
+      "epoch": 6.992,
+      "grad_norm": 4.158401012420654,
+      "learning_rate": 1.8050274171170835e-05,
+      "loss": 0.9534,
+      "step": 440
+    },
+    {
+      "epoch": 7.144,
+      "grad_norm": 5.25468635559082,
+      "learning_rate": 1.791464478360676e-05,
+      "loss": 0.7345,
+      "step": 450
+    },
+    {
+      "epoch": 7.304,
+      "grad_norm": 4.713033676147461,
+      "learning_rate": 1.7775001515870466e-05,
+      "loss": 0.8399,
+      "step": 460
+    },
+    {
+      "epoch": 7.464,
+      "grad_norm": 5.714450359344482,
+      "learning_rate": 1.7631415187481818e-05,
+      "loss": 0.7525,
+      "step": 470
+    },
+    {
+      "epoch": 7.624,
+      "grad_norm": 6.085780143737793,
+      "learning_rate": 1.7483958617668e-05,
+      "loss": 0.7276,
+      "step": 480
+    },
+    {
+      "epoch": 7.784,
+      "grad_norm": 4.569671630859375,
+      "learning_rate": 1.733270658843351e-05,
+      "loss": 0.8071,
+      "step": 490
+    },
+    {
+      "epoch": 7.944,
+      "grad_norm": 6.115426540374756,
+      "learning_rate": 1.717773580663479e-05,
+      "loss": 0.7683,
+      "step": 500
+    },
+    {
+      "epoch": 8.096,
+      "grad_norm": 4.305016040802002,
+      "learning_rate": 1.7019124865078625e-05,
+      "loss": 0.6376,
+      "step": 510
+    },
+    {
+      "epoch": 8.256,
+      "grad_norm": 6.470266342163086,
+      "learning_rate": 1.6856954202664158e-05,
+      "loss": 0.6286,
+      "step": 520
+    },
+    {
+      "epoch": 8.416,
+      "grad_norm": 6.055320739746094,
+      "learning_rate": 1.6691306063588583e-05,
+      "loss": 0.6196,
+      "step": 530
+    },
+    {
+      "epoch": 8.576,
+      "grad_norm": 6.73253870010376,
+      "learning_rate": 1.652226445563737e-05,
+      "loss": 0.564,
+      "step": 540
+    },
+    {
+      "epoch": 8.736,
+      "grad_norm": 5.043179512023926,
+      "learning_rate": 1.634991510758003e-05,
+      "loss": 0.6122,
+      "step": 550
+    },
+    {
+      "epoch": 8.896,
+      "grad_norm": 6.78087854385376,
+      "learning_rate": 1.617434542569313e-05,
+      "loss": 0.6173,
+      "step": 560
+    },
+    {
+      "epoch": 9.048,
+      "grad_norm": 6.2355146408081055,
+      "learning_rate": 1.5995644449432538e-05,
+      "loss": 0.5342,
+      "step": 570
+    },
+    {
+      "epoch": 9.208,
+      "grad_norm": 5.987257480621338,
+      "learning_rate": 1.5813902806277445e-05,
+      "loss": 0.4269,
+      "step": 580
+    },
+    {
+      "epoch": 9.368,
+      "grad_norm": 5.455114364624023,
+      "learning_rate": 1.562921266576898e-05,
+      "loss": 0.4548,
+      "step": 590
+    },
+    {
+      "epoch": 9.528,
+      "grad_norm": 5.296268463134766,
+      "learning_rate": 1.5441667692766805e-05,
+      "loss": 0.4038,
+      "step": 600
+    },
+    {
+      "epoch": 9.688,
+      "grad_norm": 5.551358699798584,
+      "learning_rate": 1.5251362999947386e-05,
+      "loss": 0.4015,
+      "step": 610
+    },
+    {
+      "epoch": 9.848,
+      "grad_norm": 4.464796543121338,
+      "learning_rate": 1.5058395099567935e-05,
+      "loss": 0.4353,
+      "step": 620
+    },
+    {
+      "epoch": 10.0,
+      "grad_norm": 3.268158197402954,
+      "learning_rate": 1.4862861854520652e-05,
+      "loss": 0.3927,
+      "step": 630
+    },
+    {
+      "epoch": 10.16,
+      "grad_norm": 8.046059608459473,
+      "learning_rate": 1.4664862428701925e-05,
+      "loss": 0.2612,
+      "step": 640
+    },
+    {
+      "epoch": 10.32,
+      "grad_norm": 4.157690048217773,
+      "learning_rate": 1.4464497236721779e-05,
+      "loss": 0.2621,
+      "step": 650
+    },
+    {
+      "epoch": 10.48,
+      "grad_norm": 5.3797688484191895,
+      "learning_rate": 1.4261867892979e-05,
+      "loss": 0.263,
+      "step": 660
+    },
+    {
+      "epoch": 10.64,
+      "grad_norm": 4.068567276000977,
+      "learning_rate": 1.4057077160127806e-05,
+      "loss": 0.2492,
+      "step": 670
+    },
+    {
+      "epoch": 10.8,
+      "grad_norm": 5.405711650848389,
+      "learning_rate": 1.3850228896962178e-05,
+      "loss": 0.2523,
+      "step": 680
+    },
+    {
+      "epoch": 10.96,
+      "grad_norm": 4.762354373931885,
+      "learning_rate": 1.3641428005744308e-05,
+      "loss": 0.2586,
+      "step": 690
+    },
+    {
+      "epoch": 11.112,
+      "grad_norm": 5.127146244049072,
+      "learning_rate": 1.3430780379003814e-05,
+      "loss": 0.1699,
+      "step": 700
+    },
+    {
+      "epoch": 11.272,
+      "grad_norm": 3.0993189811706543,
+      "learning_rate": 1.3218392845834789e-05,
+      "loss": 0.1514,
+      "step": 710
+    },
+    {
+      "epoch": 11.432,
+      "grad_norm": 5.754135608673096,
+      "learning_rate": 1.300437311771785e-05,
+      "loss": 0.1432,
+      "step": 720
+    },
+    {
+      "epoch": 11.592,
+      "grad_norm": 4.12827730178833,
+      "learning_rate": 1.2788829733894698e-05,
+      "loss": 0.1512,
+      "step": 730
+    },
+    {
+      "epoch": 11.752,
+      "grad_norm": 4.6962175369262695,
+      "learning_rate": 1.257187200632289e-05,
+      "loss": 0.1534,
+      "step": 740
+    },
+    {
+      "epoch": 11.912,
+      "grad_norm": 6.317523002624512,
+      "learning_rate": 1.2353609964238686e-05,
+      "loss": 0.1452,
+      "step": 750
+    },
+    {
+      "epoch": 12.064,
+      "grad_norm": 2.793424367904663,
+      "learning_rate": 1.213415429835621e-05,
+      "loss": 0.1167,
+      "step": 760
+    },
+    {
+      "epoch": 12.224,
+      "grad_norm": 3.816258668899536,
+      "learning_rate": 1.1913616304731064e-05,
+      "loss": 0.0785,
+      "step": 770
+    },
+    {
+      "epoch": 12.384,
+      "grad_norm": 3.989567518234253,
+      "learning_rate": 1.1692107828317014e-05,
+      "loss": 0.0857,
+      "step": 780
+    },
+    {
+      "epoch": 12.544,
+      "grad_norm": 4.456111431121826,
+      "learning_rate": 1.1469741206244249e-05,
+      "loss": 0.0862,
+      "step": 790
+    },
+    {
+      "epoch": 12.704,
+      "grad_norm": 4.539771556854248,
+      "learning_rate": 1.1246629210848062e-05,
+      "loss": 0.0949,
+      "step": 800
+    },
+    {
+      "epoch": 12.864,
+      "grad_norm": 2.4530129432678223,
+      "learning_rate": 1.1022884992476826e-05,
+      "loss": 0.0928,
+      "step": 810
+    },
+    {
+      "epoch": 13.016,
+      "grad_norm": 2.042999267578125,
+      "learning_rate": 1.0821068423364156e-05,
+      "loss": 0.0951,
+      "step": 820
+    },
+    {
+      "epoch": 13.176,
+      "grad_norm": 2.9049434661865234,
+      "learning_rate": 1.0596435812513276e-05,
+      "loss": 0.0483,
+      "step": 830
+    },
+    {
+      "epoch": 13.336,
+      "grad_norm": 2.3502166271209717,
+      "learning_rate": 1.037150072164626e-05,
+      "loss": 0.0559,
+      "step": 840
+    },
+    {
+      "epoch": 13.496,
+      "grad_norm": 2.2428765296936035,
+      "learning_rate": 1.0146377225686996e-05,
+      "loss": 0.0801,
+      "step": 850
+    },
+    {
+      "epoch": 13.656,
+      "grad_norm": 5.673745155334473,
+      "learning_rate": 9.921179495108249e-06,
+      "loss": 0.0683,
+      "step": 860
+    },
+    {
+      "epoch": 13.816,
+      "grad_norm": 3.9386937618255615,
+      "learning_rate": 9.696021738030575e-06,
+      "loss": 0.0616,
+      "step": 870
+    },
+    {
+      "epoch": 13.975999999999999,
+      "grad_norm": 4.362432479858398,
+      "learning_rate": 9.471018142302127e-06,
+      "loss": 0.058,
+      "step": 880
+    },
+    {
+      "epoch": 14.128,
+      "grad_norm": 2.225241184234619,
+      "learning_rate": 9.24628281758876e-06,
+      "loss": 0.0356,
+      "step": 890
+    },
+    {
+      "epoch": 14.288,
+      "grad_norm": 4.0786356925964355,
+      "learning_rate": 9.021929737503757e-06,
+      "loss": 0.0458,
+      "step": 900
+    },
+    {
+      "epoch": 14.448,
+      "grad_norm": 2.464179277420044,
+      "learning_rate": 8.79807268180658e-06,
+      "loss": 0.0531,
+      "step": 910
+    },
+    {
+      "epoch": 14.608,
+      "grad_norm": 2.679661273956299,
+      "learning_rate": 8.574825178699935e-06,
+      "loss": 0.0359,
+      "step": 920
+    },
+    {
+      "epoch": 14.768,
+      "grad_norm": 2.0911498069763184,
+      "learning_rate": 8.352300447254372e-06,
+      "loss": 0.0362,
+      "step": 930
+    },
+    {
+      "epoch": 14.928,
+      "grad_norm": 2.3030571937561035,
+      "learning_rate": 8.130611339989731e-06,
+      "loss": 0.0292,
+      "step": 940
+    },
+    {
+      "epoch": 15.08,
+      "grad_norm": 1.6733816862106323,
+      "learning_rate": 7.909870285642403e-06,
+      "loss": 0.0241,
+      "step": 950
+    },
+    {
+      "epoch": 15.24,
+      "grad_norm": 1.4519929885864258,
+      "learning_rate": 7.690189232147566e-06,
+      "loss": 0.0264,
+      "step": 960
+    },
+    {
+      "epoch": 15.4,
+      "grad_norm": 1.980666995048523,
+      "learning_rate": 7.4716795898652615e-06,
+      "loss": 0.0231,
+      "step": 970
+    },
+    {
+      "epoch": 15.56,
+      "grad_norm": 2.6794183254241943,
+      "learning_rate": 7.2544521750790345e-06,
+      "loss": 0.0243,
+      "step": 980
+    },
+    {
+      "epoch": 15.72,
+      "grad_norm": 1.8193122148513794,
+      "learning_rate": 7.038617153795948e-06,
+      "loss": 0.0226,
+      "step": 990
+    },
+    {
+      "epoch": 15.88,
+      "grad_norm": 2.1489455699920654,
+      "learning_rate": 6.82428398587631e-06,
+      "loss": 0.0321,
+      "step": 1000
+    }
+  ],
+  "logging_steps": 10,
+  "max_steps": 1550,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 25,
+  "save_steps": 1000,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": false
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 8.544789130431693e+16,
+  "train_batch_size": 2,
+  "trial_name": null,
+  "trial_params": null
+}

checkpoint-1000/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:dc45890bd2d24eb38ee6085d083cd1874d1991cf87176f31b08f0cafc9576e6c
+size 5688

checkpoint-1000/vocab.json ADDED Viewed

The diff for this file is too large to render. See raw diff

checkpoint-1550/README.md ADDED Viewed

	@@ -0,0 +1,202 @@

+---
+base_model: saim1212/penguin2
+library_name: peft
+---
+# Model Card for Model ID
+<!-- Provide a quick summary of what the model is/does. -->
+## Model Details
+### Model Description
+<!-- Provide a longer summary of what this model is. -->
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+### Model Sources [optional]
+<!-- Provide the basic links for the model. -->
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+## Uses
+<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
+### Direct Use
+<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
+[More Information Needed]
+### Downstream Use [optional]
+<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
+[More Information Needed]
+### Out-of-Scope Use
+<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
+[More Information Needed]
+## Bias, Risks, and Limitations
+<!-- This section is meant to convey both technical and sociotechnical limitations. -->
+[More Information Needed]
+### Recommendations
+<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+## How to Get Started with the Model
+Use the code below to get started with the model.
+[More Information Needed]
+## Training Details
+### Training Data
+<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
+[More Information Needed]
+### Training Procedure
+<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
+#### Preprocessing [optional]
+[More Information Needed]
+#### Training Hyperparameters
+- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
+#### Speeds, Sizes, Times [optional]
+<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
+[More Information Needed]
+## Evaluation
+<!-- This section describes the evaluation protocols and provides the results. -->
+### Testing Data, Factors & Metrics
+#### Testing Data
+<!-- This should link to a Dataset Card if possible. -->
+[More Information Needed]
+#### Factors
+<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
+[More Information Needed]
+#### Metrics
+<!-- These are the evaluation metrics being used, ideally with a description of why. -->
+[More Information Needed]
+### Results
+[More Information Needed]
+#### Summary
+## Model Examination [optional]
+<!-- Relevant interpretability work for the model goes here -->
+[More Information Needed]
+## Environmental Impact
+<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+## Technical Specifications [optional]
+### Model Architecture and Objective
+[More Information Needed]
+### Compute Infrastructure
+[More Information Needed]
+#### Hardware
+[More Information Needed]
+#### Software
+[More Information Needed]
+## Citation [optional]
+<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
+**BibTeX:**
+[More Information Needed]
+**APA:**
+[More Information Needed]
+## Glossary [optional]
+<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
+[More Information Needed]
+## More Information [optional]
+[More Information Needed]
+## Model Card Authors [optional]
+[More Information Needed]
+## Model Card Contact
+[More Information Needed]
+### Framework versions
+- PEFT 0.12.0

checkpoint-1550/adapter_config.json ADDED Viewed

	@@ -0,0 +1,463 @@

+{
+  "alpha_pattern": {},
+  "auto_mapping": null,
+  "base_model_name_or_path": "saim1212/penguin2",
+  "bias": "none",
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layer_replication": null,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 32,
+  "lora_dropout": 0.0,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 16,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "model.layers.26.self_attn.o_proj",
+    "model.layers.19.self_attn.o_proj",
+    "visual.blocks.22.mlp.fc1",
+    "model.layers.1.self_attn.o_proj",
+    "model.layers.9.mlp.up_proj",
+    "model.layers.23.self_attn_text.q_proj",
+    "model.layers.24.self_attn_text.o_proj",
+    "model.layers.15.self_attn_text.o_proj",
+    "model.layers.27.self_attn.v_proj",
+    "model.layers.8.self_attn_text.k_proj",
+    "visual.blocks.10.attn.proj",
+    "visual.blocks.28.mlp.fc2",
+    "model.layers.3.self_attn_text.k_proj",
+    "model.layers.12.self_attn.v_proj",
+    "model.layers.18.self_attn_text.o_proj",
+    "visual.blocks.5.mlp.fc2",
+    "model.layers.13.self_attn.q_proj",
+    "visual.blocks.7.mlp.fc1",
+    "model.layers.27.mlp.down_proj",
+    "visual.blocks.1.mlp.fc1",
+    "model.layers.12.mlp.up_proj",
+    "model.layers.5.self_attn.o_proj",
+    "model.layers.15.self_attn_text.q_proj",
+    "model.layers.2.self_attn_text.k_proj",
+    "model.layers.3.self_attn_text.q_proj",
+    "model.layers.12.mlp.down_proj",
+    "model.layers.14.self_attn_text.q_proj",
+    "model.layers.19.self_attn_text.o_proj",
+    "visual.blocks.23.attn.proj",
+    "model.layers.20.self_attn.o_proj",
+    "model.layers.5.self_attn_text.k_proj",
+    "model.layers.26.mlp.gate_proj",
+    "model.layers.8.self_attn.q_proj",
+    "model.layers.20.mlp.gate_proj",
+    "model.layers.16.self_attn_text.k_proj",
+    "model.layers.20.self_attn.k_proj",
+    "visual.blocks.9.attn.qkv",
+    "model.layers.4.self_attn_text.k_proj",
+    "model.layers.4.mlp.gate_proj",
+    "model.layers.6.self_attn.v_proj",
+    "model.layers.1.self_attn_text.o_proj",
+    "model.layers.16.mlp.up_proj",
+    "visual.blocks.16.mlp.fc2",
+    "model.layers.10.self_attn.v_proj",
+    "model.layers.17.self_attn_text.o_proj",
+    "model.layers.17.self_attn.v_proj",
+    "visual.blocks.9.mlp.fc1",
+    "model.layers.25.mlp.gate_proj",
+    "model.layers.25.self_attn_text.q_proj",
+    "model.layers.9.self_attn.k_proj",
+    "model.layers.18.self_attn.q_proj",
+    "visual.blocks.9.attn.proj",
+    "visual.blocks.14.mlp.fc1",
+    "model.layers.13.self_attn.o_proj",
+    "model.layers.24.self_attn.v_proj",
+    "model.layers.11.mlp.down_proj",
+    "model.layers.27.self_attn_text.v_proj",
+    "model.layers.16.self_attn_text.o_proj",
+    "model.layers.25.mlp.down_proj",
+    "visual.blocks.4.mlp.fc2",
+    "model.layers.27.self_attn.q_proj",
+    "visual.blocks.5.attn.proj",
+    "model.layers.19.mlp.gate_proj",
+    "model.layers.14.self_attn.o_proj",
+    "model.layers.19.self_attn.v_proj",
+    "model.layers.13.mlp.gate_proj",
+    "model.layers.18.self_attn.o_proj",
+    "model.layers.18.self_attn.k_proj",
+    "model.layers.26.self_attn.k_proj",
+    "model.layers.9.self_attn_text.o_proj",
+    "model.layers.26.self_attn.v_proj",
+    "model.layers.27.self_attn.k_proj",
+    "model.layers.25.self_attn.o_proj",
+    "visual.blocks.20.attn.proj",
+    "visual.blocks.26.attn.qkv",
+    "model.layers.23.self_attn_text.v_proj",
+    "visual.blocks.14.attn.qkv",
+    "model.layers.19.self_attn.k_proj",
+    "model.layers.13.self_attn_text.q_proj",
+    "model.layers.13.mlp.down_proj",
+    "model.layers.21.self_attn.k_proj",
+    "model.layers.0.self_attn_text.o_proj",
+    "model.layers.6.self_attn.k_proj",
+    "visual.blocks.31.attn.proj",
+    "model.layers.16.self_attn.v_proj",
+    "model.layers.20.mlp.up_proj",
+    "visual.blocks.3.mlp.fc2",
+    "model.layers.3.self_attn.k_proj",
+    "visual.blocks.12.attn.qkv",
+    "model.layers.10.self_attn.k_proj",
+    "model.layers.12.self_attn_text.k_proj",
+    "visual.blocks.22.mlp.fc2",
+    "model.layers.11.self_attn.q_proj",
+    "visual.blocks.19.mlp.fc1",
+    "visual.blocks.2.mlp.fc1",
+    "model.layers.26.self_attn_text.k_proj",
+    "model.layers.5.self_attn.q_proj",
+    "model.layers.7.self_attn.q_proj",
+    "visual.blocks.27.attn.proj",
+    "model.layers.8.self_attn_text.v_proj",
+    "model.layers.12.mlp.gate_proj",
+    "model.layers.27.self_attn_text.q_proj",
+    "visual.blocks.1.attn.proj",
+    "model.layers.4.self_attn_text.o_proj",
+    "visual.blocks.6.mlp.fc2",
+    "model.layers.26.self_attn_text.v_proj",
+    "visual.blocks.6.mlp.fc1",
+    "visual.blocks.31.mlp.fc1",
+    "model.layers.8.mlp.gate_proj",
+    "visual.blocks.18.mlp.fc1",
+    "visual.blocks.14.attn.proj",
+    "model.layers.15.self_attn.o_proj",
+    "model.layers.16.self_attn.q_proj",
+    "visual.blocks.7.mlp.fc2",
+    "model.layers.11.self_attn.k_proj",
+    "model.layers.7.mlp.up_proj",
+    "model.layers.10.self_attn_text.v_proj",
+    "model.layers.23.self_attn.k_proj",
+    "visual.blocks.11.attn.qkv",
+    "visual.blocks.5.attn.qkv",
+    "model.layers.15.self_attn_text.v_proj",
+    "visual.blocks.21.attn.proj",
+    "model.layers.10.mlp.gate_proj",
+    "model.layers.5.self_attn.v_proj",
+    "model.layers.6.mlp.down_proj",
+    "model.layers.9.self_attn_text.v_proj",
+    "model.layers.4.self_attn_text.q_proj",
+    "model.layers.21.self_attn.v_proj",
+    "model.layers.8.mlp.down_proj",
+    "visual.blocks.8.mlp.fc2",
+    "model.layers.23.self_attn_text.o_proj",
+    "model.layers.1.self_attn.q_proj",
+    "model.layers.20.self_attn_text.k_proj",
+    "model.layers.8.self_attn.o_proj",
+    "model.layers.20.self_attn_text.o_proj",
+    "model.layers.6.mlp.up_proj",
+    "model.layers.1.mlp.down_proj",
+    "model.layers.18.mlp.down_proj",
+    "model.layers.18.mlp.gate_proj",
+    "model.layers.11.mlp.up_proj",
+    "visual.blocks.2.attn.proj",
+    "model.layers.0.mlp.down_proj",
+    "visual.blocks.0.mlp.fc2",
+    "visual.blocks.25.attn.proj",
+    "model.layers.0.self_attn.k_proj",
+    "model.layers.27.self_attn_text.k_proj",
+    "visual.blocks.12.mlp.fc1",
+    "model.layers.9.self_attn.q_proj",
+    "visual.blocks.17.attn.qkv",
+    "model.layers.17.self_attn_text.q_proj",
+    "model.layers.15.mlp.gate_proj",
+    "visual.blocks.21.attn.qkv",
+    "model.layers.16.mlp.gate_proj",
+    "model.layers.19.self_attn_text.v_proj",
+    "model.layers.24.self_attn_text.q_proj",
+    "visual.blocks.8.mlp.fc1",
+    "visual.blocks.30.mlp.fc2",
+    "model.layers.10.self_attn.q_proj",
+    "model.layers.14.mlp.gate_proj",
+    "model.layers.5.self_attn_text.q_proj",
+    "visual.blocks.26.mlp.fc2",
+    "model.layers.1.self_attn_text.k_proj",
+    "visual.blocks.29.mlp.fc1",
+    "model.layers.18.self_attn.v_proj",
+    "model.layers.23.mlp.gate_proj",
+    "visual.blocks.13.mlp.fc1",
+    "model.layers.5.self_attn_text.o_proj",
+    "model.layers.14.mlp.up_proj",
+    "visual.blocks.6.attn.qkv",
+    "model.layers.23.mlp.up_proj",
+    "model.layers.14.self_attn_text.v_proj",
+    "visual.blocks.4.mlp.fc1",
+    "visual.blocks.20.attn.qkv",
+    "model.layers.6.self_attn_text.q_proj",
+    "visual.blocks.25.attn.qkv",
+    "visual.blocks.15.attn.qkv",
+    "model.layers.1.self_attn.k_proj",
+    "model.layers.19.self_attn.q_proj",
+    "model.layers.4.self_attn.o_proj",
+    "model.layers.8.self_attn.v_proj",
+    "visual.blocks.23.attn.qkv",
+    "model.layers.3.self_attn.q_proj",
+    "model.layers.5.mlp.gate_proj",
+    "model.layers.1.mlp.up_proj",
+    "model.layers.11.mlp.gate_proj",
+    "visual.blocks.24.mlp.fc2",
+    "model.layers.1.mlp.gate_proj",
+    "visual.blocks.20.mlp.fc1",
+    "visual.blocks.13.mlp.fc2",
+    "visual.blocks.14.mlp.fc2",
+    "visual.blocks.3.attn.qkv",
+    "model.layers.12.self_attn_text.q_proj",
+    "model.layers.25.self_attn_text.o_proj",
+    "visual.blocks.19.attn.proj",
+    "visual.blocks.23.mlp.fc1",
+    "model.layers.14.mlp.down_proj",
+    "visual.blocks.25.mlp.fc2",
+    "model.layers.0.self_attn_text.q_proj",
+    "model.layers.23.self_attn_text.k_proj",
+    "model.layers.12.self_attn.k_proj",
+    "model.layers.4.self_attn.k_proj",
+    "visual.blocks.28.mlp.fc1",
+    "model.layers.21.self_attn_text.v_proj",
+    "model.layers.10.mlp.down_proj",
+    "visual.blocks.18.attn.qkv",
+    "model.layers.5.mlp.up_proj",
+    "model.layers.23.self_attn.v_proj",
+    "visual.blocks.31.mlp.fc2",
+    "model.layers.3.mlp.down_proj",
+    "visual.blocks.2.mlp.fc2",
+    "visual.blocks.10.mlp.fc2",
+    "model.layers.27.self_attn.o_proj",
+    "model.layers.11.self_attn_text.v_proj",
+    "model.layers.17.self_attn_text.k_proj",
+    "visual.blocks.25.mlp.fc1",
+    "visual.blocks.3.attn.proj",
+    "model.layers.2.self_attn.q_proj",
+    "model.layers.26.self_attn_text.o_proj",
+    "model.layers.9.self_attn.v_proj",
+    "model.layers.7.self_attn_text.o_proj",
+    "model.layers.20.self_attn.q_proj",
+    "model.layers.21.mlp.down_proj",
+    "model.layers.17.self_attn.q_proj",
+    "visual.blocks.17.attn.proj",
+    "model.layers.7.mlp.down_proj",
+    "model.layers.21.mlp.gate_proj",
+    "model.layers.20.mlp.down_proj",
+    "model.layers.7.self_attn.o_proj",
+    "model.layers.6.self_attn_text.o_proj",
+    "model.layers.5.self_attn_text.v_proj",
+    "model.layers.22.mlp.gate_proj",
+    "model.layers.7.self_attn_text.k_proj",
+    "model.layers.19.mlp.down_proj",
+    "model.layers.6.self_attn_text.k_proj",
+    "model.layers.9.self_attn_text.k_proj",
+    "visual.blocks.15.attn.proj",
+    "visual.blocks.6.attn.proj",
+    "model.layers.22.self_attn.k_proj",
+    "visual.blocks.13.attn.proj",
+    "model.layers.0.mlp.gate_proj",
+    "model.layers.13.self_attn.v_proj",
+    "model.layers.22.self_attn.q_proj",
+    "model.layers.19.self_attn_text.k_proj",
+    "model.layers.10.self_attn_text.q_proj",
+    "model.layers.2.mlp.down_proj",
+    "visual.blocks.10.attn.qkv",
+    "model.layers.4.mlp.up_proj",
+    "visual.blocks.16.attn.qkv",
+    "model.layers.13.self_attn_text.o_proj",
+    "model.layers.21.self_attn.o_proj",
+    "model.layers.13.mlp.up_proj",
+    "model.layers.7.self_attn_text.q_proj",
+    "visual.blocks.0.attn.proj",
+    "visual.blocks.17.mlp.fc1",
+    "model.layers.25.self_attn_text.v_proj",
+    "model.layers.3.self_attn.o_proj",
+    "visual.blocks.30.attn.proj",
+    "model.layers.16.self_attn.o_proj",
+    "model.layers.23.self_attn.o_proj",
+    "model.layers.4.mlp.down_proj",
+    "model.layers.17.self_attn_text.v_proj",
+    "model.layers.12.self_attn.q_proj",
+    "visual.blocks.3.mlp.fc1",
+    "visual.blocks.26.attn.proj",
+    "model.layers.21.self_attn.q_proj",
+    "visual.blocks.27.attn.qkv",
+    "model.layers.17.mlp.gate_proj",
+    "model.layers.23.mlp.down_proj",
+    "visual.blocks.18.mlp.fc2",
+    "model.layers.2.self_attn.k_proj",
+    "model.layers.9.mlp.down_proj",
+    "model.layers.6.mlp.gate_proj",
+    "visual.blocks.17.mlp.fc2",
+    "model.layers.0.self_attn.v_proj",
+    "visual.blocks.30.attn.qkv",
+    "model.layers.3.self_attn_text.o_proj",
+    "visual.blocks.4.attn.qkv",
+    "model.layers.10.mlp.up_proj",
+    "model.layers.2.self_attn.v_proj",
+    "visual.blocks.5.mlp.fc1",
+    "model.layers.0.self_attn_text.k_proj",
+    "model.layers.25.self_attn_text.k_proj",
+    "visual.blocks.19.attn.qkv",
+    "model.layers.2.mlp.gate_proj",
+    "model.layers.16.self_attn_text.q_proj",
+    "visual.blocks.0.mlp.fc1",
+    "model.layers.3.mlp.up_proj",
+    "visual.blocks.30.mlp.fc1",
+    "model.layers.2.mlp.up_proj",
+    "visual.blocks.29.attn.qkv",
+    "model.layers.27.mlp.gate_proj",
+    "model.layers.21.self_attn_text.o_proj",
+    "model.layers.21.mlp.up_proj",
+    "model.layers.1.self_attn.v_proj",
+    "visual.blocks.29.attn.proj",
+    "model.layers.8.self_attn_text.q_proj",
+    "model.layers.3.self_attn_text.v_proj",
+    "model.layers.1.self_attn_text.v_proj",
+    "visual.blocks.21.mlp.fc2",
+    "model.layers.3.self_attn.v_proj",
+    "visual.blocks.4.attn.proj",
+    "model.layers.4.self_attn.v_proj",
+    "model.layers.7.self_attn_text.v_proj",
+    "model.layers.22.self_attn_text.v_proj",
+    "model.layers.20.self_attn.v_proj",
+    "model.layers.21.self_attn_text.q_proj",
+    "model.layers.12.self_attn.o_proj",
+    "visual.blocks.27.mlp.fc2",
+    "model.layers.18.self_attn_text.k_proj",
+    "model.layers.24.self_attn_text.v_proj",
+    "model.layers.26.mlp.up_proj",
+    "model.layers.8.self_attn_text.o_proj",
+    "visual.blocks.11.mlp.fc1",
+    "model.layers.1.self_attn_text.q_proj",
+    "model.layers.7.self_attn.v_proj",
+    "visual.blocks.26.mlp.fc1",
+    "model.layers.11.self_attn.v_proj",
+    "model.layers.13.self_attn.k_proj",
+    "model.layers.10.self_attn.o_proj",
+    "model.layers.15.mlp.up_proj",
+    "visual.blocks.15.mlp.fc1",
+    "model.layers.22.mlp.down_proj",
+    "model.layers.24.mlp.up_proj",
+    "visual.blocks.15.mlp.fc2",
+    "model.layers.10.self_attn_text.o_proj",
+    "model.layers.15.self_attn_text.k_proj",
+    "visual.blocks.1.attn.qkv",
+    "model.layers.11.self_attn_text.o_proj",
+    "visual.blocks.10.mlp.fc1",
+    "model.layers.17.mlp.down_proj",
+    "visual.blocks.24.attn.qkv",
+    "model.layers.24.mlp.gate_proj",
+    "visual.blocks.7.attn.qkv",
+    "model.layers.5.self_attn.k_proj",
+    "model.layers.23.self_attn.q_proj",
+    "model.layers.0.mlp.up_proj",
+    "model.layers.22.self_attn_text.q_proj",
+    "visual.blocks.12.mlp.fc2",
+    "model.layers.3.mlp.gate_proj",
+    "model.layers.18.self_attn_text.v_proj",
+    "model.layers.12.self_attn_text.o_proj",
+    "model.layers.5.mlp.down_proj",
+    "model.layers.10.self_attn_text.k_proj",
+    "visual.blocks.24.attn.proj",
+    "model.layers.11.self_attn_text.q_proj",
+    "model.layers.25.self_attn.v_proj",
+    "model.layers.17.mlp.up_proj",
+    "visual.blocks.23.mlp.fc2",
+    "model.layers.22.self_attn.o_proj",
+    "model.layers.14.self_attn_text.o_proj",
+    "model.layers.19.mlp.up_proj",
+    "model.layers.14.self_attn.k_proj",
+    "visual.blocks.31.attn.qkv",
+    "model.layers.13.self_attn_text.v_proj",
+    "model.layers.16.mlp.down_proj",
+    "model.layers.16.self_attn_text.v_proj",
+    "model.layers.24.self_attn_text.k_proj",
+    "model.layers.26.self_attn_text.q_proj",
+    "visual.blocks.16.attn.proj",
+    "visual.blocks.22.attn.qkv",
+    "model.layers.27.self_attn_text.o_proj",
+    "visual.blocks.27.mlp.fc1",
+    "visual.blocks.12.attn.proj",
+    "visual.blocks.28.attn.proj",
+    "model.layers.21.self_attn_text.k_proj",
+    "visual.blocks.28.attn.qkv",
+    "visual.blocks.21.mlp.fc1",
+    "model.layers.27.mlp.up_proj",
+    "model.layers.15.self_attn.v_proj",
+    "model.layers.24.self_attn.k_proj",
+    "model.layers.2.self_attn_text.q_proj",
+    "model.layers.15.self_attn.q_proj",
+    "visual.blocks.29.mlp.fc2",
+    "visual.blocks.13.attn.qkv",
+    "visual.blocks.24.mlp.fc1",
+    "model.layers.11.self_attn.o_proj",
+    "model.layers.2.self_attn_text.o_proj",
+    "visual.blocks.7.attn.proj",
+    "model.layers.6.self_attn.o_proj",
+    "model.layers.9.self_attn_text.q_proj",
+    "model.layers.0.self_attn.o_proj",
+    "model.layers.9.mlp.gate_proj",
+    "visual.blocks.0.attn.qkv",
+    "model.layers.2.self_attn_text.v_proj",
+    "model.layers.8.mlp.up_proj",
+    "visual.blocks.8.attn.proj",
+    "visual.blocks.18.attn.proj",
+    "model.layers.4.self_attn_text.v_proj",
+    "model.layers.17.self_attn.o_proj",
+    "visual.blocks.22.attn.proj",
+    "model.layers.9.self_attn.o_proj",
+    "model.layers.26.self_attn.q_proj",
+    "visual.blocks.11.mlp.fc2",
+    "model.layers.22.mlp.up_proj",
+    "model.layers.18.mlp.up_proj",
+    "model.layers.14.self_attn_text.k_proj",
+    "visual.blocks.9.mlp.fc2",
+    "visual.blocks.11.attn.proj",
+    "model.layers.17.self_attn.k_proj",
+    "model.layers.8.self_attn.k_proj",
+    "model.layers.12.self_attn_text.v_proj",
+    "model.layers.26.mlp.down_proj",
+    "model.layers.14.self_attn.v_proj",
+    "model.layers.22.self_attn_text.o_proj",
+    "model.layers.0.self_attn_text.v_proj",
+    "model.layers.7.mlp.gate_proj",
+    "model.layers.22.self_attn.v_proj",
+    "model.layers.24.mlp.down_proj",
+    "model.layers.20.self_attn_text.q_proj",
+    "model.layers.2.self_attn.o_proj",
+    "model.layers.11.self_attn_text.k_proj",
+    "model.layers.24.self_attn.q_proj",
+    "model.layers.18.self_attn_text.q_proj",
+    "model.layers.6.self_attn_text.v_proj",
+    "model.layers.0.self_attn.q_proj",
+    "model.layers.25.self_attn.q_proj",
+    "model.layers.19.self_attn_text.q_proj",
+    "visual.blocks.20.mlp.fc2",
+    "model.layers.13.self_attn_text.k_proj",
+    "model.layers.25.mlp.up_proj",
+    "model.layers.20.self_attn_text.v_proj",
+    "visual.blocks.8.attn.qkv",
+    "visual.blocks.16.mlp.fc1",
+    "model.layers.25.self_attn.k_proj",
+    "model.layers.22.self_attn_text.k_proj",
+    "model.layers.16.self_attn.k_proj",
+    "model.layers.24.self_attn.o_proj",
+    "model.layers.15.self_attn.k_proj",
+    "visual.blocks.1.mlp.fc2",
+    "model.layers.6.self_attn.q_proj",
+    "model.layers.15.mlp.down_proj",
+    "visual.blocks.2.attn.qkv",
+    "model.layers.14.self_attn.q_proj",
+    "model.layers.4.self_attn.q_proj",
+    "visual.blocks.19.mlp.fc2",
+    "model.layers.7.self_attn.k_proj"
+  ],
+  "task_type": "CAUSAL_LM",
+  "use_dora": false,
+  "use_rslora": false
+}

checkpoint-1550/adapter_model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:845f34c1f221b697726779b3fd71e1029e7d68df2e0d70cd0bb291bb74d0558a
+size 133350944

checkpoint-1550/added_tokens.json ADDED Viewed

	@@ -0,0 +1,16 @@

+{
+  "<|box_end|>": 151649,
+  "<|box_start|>": 151648,
+  "<|endoftext|>": 151643,
+  "<|im_end|>": 151645,
+  "<|im_start|>": 151644,
+  "<|image_pad|>": 151655,
+  "<|object_ref_end|>": 151647,
+  "<|object_ref_start|>": 151646,
+  "<|quad_end|>": 151651,
+  "<|quad_start|>": 151650,
+  "<|video_pad|>": 151656,
+  "<|vision_end|>": 151653,
+  "<|vision_pad|>": 151654,
+  "<|vision_start|>": 151652
+}

checkpoint-1550/chat_template.json ADDED Viewed

	@@ -0,0 +1,3 @@

+{
+  "chat_template": "{% set image_count = namespace(value=0) %}{% set video_count = namespace(value=0) %}{% for message in messages %}{% if loop.first and message['role'] != 'system' %}<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n{% endif %}<|im_start|>{{ message['role'] }}\n{% if message['content'] is string %}{{ message['content'] }}<|im_end|>\n{% else %}{% for content in message['content'] %}{% if content['type'] == 'image' or 'image' in content or 'image_url' in content %}{% set image_count.value = image_count.value + 1 %}{% if add_vision_id %}Picture {{ image_count.value }}: {% endif %}<|vision_start|><|image_pad|><|vision_end|>{% elif content['type'] == 'video' or 'video' in content %}{% set video_count.value = video_count.value + 1 %}{% if add_vision_id %}Video {{ video_count.value }}: {% endif %}<|vision_start|><|video_pad|><|vision_end|>{% elif 'text' in content %}{{ content['text'] }}{% endif %}{% endfor %}<|im_end|>\n{% endif %}{% endfor %}{% if add_generation_prompt %}<|im_start|>assistant\n{% endif %}"
+}

checkpoint-1550/merges.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

checkpoint-1550/optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:62e3c5c599d5aaa75e6dcff11ab64bf776776e62bf32286668cb1e63b02c27a6
+size 267205066

checkpoint-1550/preprocessor_config.json ADDED Viewed

	@@ -0,0 +1,29 @@

+{
+  "do_convert_rgb": true,
+  "do_normalize": true,
+  "do_rescale": true,
+  "do_resize": true,
+  "image_mean": [
+    0.48145466,
+    0.4578275,
+    0.40821073
+  ],
+  "image_processor_type": "Qwen2VLImageProcessor",
+  "image_std": [
+    0.26862954,
+    0.26130258,
+    0.27577711
+  ],
+  "max_pixels": 12845056,
+  "merge_size": 2,
+  "min_pixels": 3136,
+  "patch_size": 14,
+  "processor_class": "Qwen2VLProcessor",
+  "resample": 3,
+  "rescale_factor": 0.00392156862745098,
+  "size": {
+    "longest_edge": 12845056,
+    "shortest_edge": 3136
+  },
+  "temporal_patch_size": 2
+}

checkpoint-1550/rng_state_0.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:a456c014f4062602da085bb6b03f6bda6f86ab8b2dcf1a268dd903ed52f3fb91
+size 14512

checkpoint-1550/rng_state_1.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:74ea19755c03db3ccc062fd1a38208daf2c7258b6dd1d567b2257f1306869548
+size 14512

checkpoint-1550/scaler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:5c97d38d3bd4ed40c71b688ea05ca6d304a5bf700ddc5a8af6bedb520bae088a
+size 988

checkpoint-1550/scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:2a770130995005184d2e5d60f8a10bfcce2b010542c31499a57165f764fe2b67
+size 1064

checkpoint-1550/special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,31 @@

+{
+  "additional_special_tokens": [
+    "<|im_start|>",
+    "<|im_end|>",
+    "<|object_ref_start|>",
+    "<|object_ref_end|>",
+    "<|box_start|>",
+    "<|box_end|>",
+    "<|quad_start|>",
+    "<|quad_end|>",
+    "<|vision_start|>",
+    "<|vision_end|>",
+    "<|vision_pad|>",
+    "<|image_pad|>",
+    "<|video_pad|>"
+  ],
+  "eos_token": {
+    "content": "<|im_end|>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "pad_token": {
+    "content": "<|endoftext|>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  }
+}

checkpoint-1550/tokenizer.json ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:091aa7594dc2fcfbfa06b9e3c22a5f0562ac14f30375c13af7309407a0e67b8a
+size 11420371

checkpoint-1550/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,148 @@

+{
+  "add_prefix_space": false,
+  "added_tokens_decoder": {
+    "151643": {
+      "content": "<|endoftext|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151644": {
+      "content": "<|im_start|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151645": {
+      "content": "<|im_end|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151646": {
+      "content": "<|object_ref_start|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151647": {
+      "content": "<|object_ref_end|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151648": {
+      "content": "<|box_start|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151649": {
+      "content": "<|box_end|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151650": {
+      "content": "<|quad_start|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151651": {
+      "content": "<|quad_end|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151652": {
+      "content": "<|vision_start|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151653": {
+      "content": "<|vision_end|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151654": {
+      "content": "<|vision_pad|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151655": {
+      "content": "<|image_pad|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151656": {
+      "content": "<|video_pad|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "additional_special_tokens": [
+    "<|im_start|>",
+    "<|im_end|>",
+    "<|object_ref_start|>",
+    "<|object_ref_end|>",
+    "<|box_start|>",
+    "<|box_end|>",
+    "<|quad_start|>",
+    "<|quad_end|>",
+    "<|vision_start|>",
+    "<|vision_end|>",
+    "<|vision_pad|>",
+    "<|image_pad|>",
+    "<|video_pad|>"
+  ],
+  "bos_token": null,
+  "chat_template": "{% set image_count = namespace(value=0) %}{% set video_count = namespace(value=0) %}{% for message in messages %}{% if loop.first and message['role'] != 'system' %}<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n{% endif %}<|im_start|>{{ message['role'] }}\n{% if message['content'] is string %}{{ message['content'] }}<|im_end|>\n{% else %}{% for content in message['content'] %}{% if content['type'] == 'image' or 'image' in content or 'image_url' in content %}{% set image_count.value = image_count.value + 1 %}{% if add_vision_id %}Picture {{ image_count.value }}: {% endif %}<|vision_start|><|image_pad|><|vision_end|>{% elif content['type'] == 'video' or 'video' in content %}{% set video_count.value = video_count.value + 1 %}{% if add_vision_id %}Video {{ video_count.value }}: {% endif %}<|vision_start|><|video_pad|><|vision_end|>{% elif 'text' in content %}{{ content['text'] }}{% endif %}{% endfor %}<|im_end|>\n{% endif %}{% endfor %}{% if add_generation_prompt %}<|im_start|>assistant\n{% endif %}",
+  "clean_up_tokenization_spaces": false,
+  "eos_token": "<|im_end|>",
+  "errors": "replace",
+  "extra_special_tokens": {},
+  "max_length": null,
+  "model_max_length": 32768,
+  "pad_to_multiple_of": null,
+  "pad_token": "<|endoftext|>",
+  "pad_token_type_id": 0,
+  "padding_side": "right",
+  "processor_class": "Qwen2VLProcessor",
+  "split_special_tokens": false,
+  "tokenizer_class": "Qwen2Tokenizer",
+  "unk_token": null
+}

checkpoint-1550/trainer_state.json ADDED Viewed

	@@ -0,0 +1,1118 @@

+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 24.608,
+  "eval_steps": 500,
+  "global_step": 1550,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.16,
+      "grad_norm": 155.76007080078125,
+      "learning_rate": 9.032258064516129e-07,
+      "loss": 12.094,
+      "step": 10
+    },
+    {
+      "epoch": 0.32,
+      "grad_norm": 37.119384765625,
+      "learning_rate": 2.1935483870967745e-06,
+      "loss": 7.0819,
+      "step": 20
+    },
+    {
+      "epoch": 0.48,
+      "grad_norm": 14.752822875976562,
+      "learning_rate": 3.4838709677419357e-06,
+      "loss": 4.4657,
+      "step": 30
+    },
+    {
+      "epoch": 0.64,
+      "grad_norm": 11.597710609436035,
+      "learning_rate": 4.774193548387097e-06,
+      "loss": 3.5378,
+      "step": 40
+    },
+    {
+      "epoch": 0.8,
+      "grad_norm": 15.393077850341797,
+      "learning_rate": 6.064516129032259e-06,
+      "loss": 2.8862,
+      "step": 50
+    },
+    {
+      "epoch": 0.96,
+      "grad_norm": 23.84307861328125,
+      "learning_rate": 7.35483870967742e-06,
+      "loss": 2.6138,
+      "step": 60
+    },
+    {
+      "epoch": 1.112,
+      "grad_norm": 13.163668632507324,
+      "learning_rate": 8.64516129032258e-06,
+      "loss": 2.3109,
+      "step": 70
+    },
+    {
+      "epoch": 1.272,
+      "grad_norm": 12.168913841247559,
+      "learning_rate": 9.935483870967742e-06,
+      "loss": 2.2949,
+      "step": 80
+    },
+    {
+      "epoch": 1.432,
+      "grad_norm": 10.725199699401855,
+      "learning_rate": 1.1225806451612904e-05,
+      "loss": 2.3399,
+      "step": 90
+    },
+    {
+      "epoch": 1.592,
+      "grad_norm": 8.531355857849121,
+      "learning_rate": 1.2516129032258067e-05,
+      "loss": 2.217,
+      "step": 100
+    },
+    {
+      "epoch": 1.752,
+      "grad_norm": 6.670936584472656,
+      "learning_rate": 1.3806451612903227e-05,
+      "loss": 2.1938,
+      "step": 110
+    },
+    {
+      "epoch": 1.912,
+      "grad_norm": 5.666457653045654,
+      "learning_rate": 1.5096774193548389e-05,
+      "loss": 2.0994,
+      "step": 120
+    },
+    {
+      "epoch": 2.064,
+      "grad_norm": 7.0824384689331055,
+      "learning_rate": 1.638709677419355e-05,
+      "loss": 2.0094,
+      "step": 130
+    },
+    {
+      "epoch": 2.224,
+      "grad_norm": 5.3269195556640625,
+      "learning_rate": 1.7677419354838713e-05,
+      "loss": 1.8313,
+      "step": 140
+    },
+    {
+      "epoch": 2.384,
+      "grad_norm": 3.4799787998199463,
+      "learning_rate": 1.896774193548387e-05,
+      "loss": 1.8772,
+      "step": 150
+    },
+    {
+      "epoch": 2.544,
+      "grad_norm": 4.512059211730957,
+      "learning_rate": 1.9999898566691428e-05,
+      "loss": 1.7948,
+      "step": 160
+    },
+    {
+      "epoch": 2.7039999999999997,
+      "grad_norm": 9.884415626525879,
+      "learning_rate": 1.9996348616949673e-05,
+      "loss": 1.7994,
+      "step": 170
+    },
+    {
+      "epoch": 2.864,
+      "grad_norm": 3.1838889122009277,
+      "learning_rate": 1.998772905933476e-05,
+      "loss": 1.8654,
+      "step": 180
+    },
+    {
+      "epoch": 3.016,
+      "grad_norm": 3.452301263809204,
+      "learning_rate": 1.9974044265220564e-05,
+      "loss": 1.6745,
+      "step": 190
+    },
+    {
+      "epoch": 3.176,
+      "grad_norm": 3.3805224895477295,
+      "learning_rate": 1.995530117479521e-05,
+      "loss": 1.5509,
+      "step": 200
+    },
+    {
+      "epoch": 3.336,
+      "grad_norm": 6.541603088378906,
+      "learning_rate": 1.993150929354139e-05,
+      "loss": 1.4749,
+      "step": 210
+    },
+    {
+      "epoch": 3.496,
+      "grad_norm": 2.95489764213562,
+      "learning_rate": 1.9902680687415704e-05,
+      "loss": 1.4165,
+      "step": 220
+    },
+    {
+      "epoch": 3.656,
+      "grad_norm": 3.144228458404541,
+      "learning_rate": 1.9868829976729444e-05,
+      "loss": 1.3226,
+      "step": 230
+    },
+    {
+      "epoch": 3.816,
+      "grad_norm": 3.747593641281128,
+      "learning_rate": 1.982997432873397e-05,
+      "loss": 1.5257,
+      "step": 240
+    },
+    {
+      "epoch": 3.976,
+      "grad_norm": 2.2221176624298096,
+      "learning_rate": 1.978613344891441e-05,
+      "loss": 1.4218,
+      "step": 250
+    },
+    {
+      "epoch": 4.128,
+      "grad_norm": 2.854719877243042,
+      "learning_rate": 1.9737329570996098e-05,
+      "loss": 1.2454,
+      "step": 260
+    },
+    {
+      "epoch": 4.288,
+      "grad_norm": 3.9374194145202637,
+      "learning_rate": 1.968358744566884e-05,
+      "loss": 1.2503,
+      "step": 270
+    },
+    {
+      "epoch": 4.448,
+      "grad_norm": 4.536250591278076,
+      "learning_rate": 1.9624934328034673e-05,
+      "loss": 1.2983,
+      "step": 280
+    },
+    {
+      "epoch": 4.608,
+      "grad_norm": 4.311966419219971,
+      "learning_rate": 1.9561399963785586e-05,
+      "loss": 1.2944,
+      "step": 290
+    },
+    {
+      "epoch": 4.768,
+      "grad_norm": 4.188143253326416,
+      "learning_rate": 1.9493016574118103e-05,
+      "loss": 1.2997,
+      "step": 300
+    },
+    {
+      "epoch": 4.928,
+      "grad_norm": 5.04379415512085,
+      "learning_rate": 1.9419818839392408e-05,
+      "loss": 1.2976,
+      "step": 310
+    },
+    {
+      "epoch": 5.08,
+      "grad_norm": 4.528952598571777,
+      "learning_rate": 1.9341843881544372e-05,
+      "loss": 1.1579,
+      "step": 320
+    },
+    {
+      "epoch": 5.24,
+      "grad_norm": 4.810428142547607,
+      "learning_rate": 1.9259131245259293e-05,
+      "loss": 1.13,
+      "step": 330
+    },
+    {
+      "epoch": 5.4,
+      "grad_norm": 3.7566370964050293,
+      "learning_rate": 1.917172287791698e-05,
+      "loss": 1.1387,
+      "step": 340
+    },
+    {
+      "epoch": 5.5600000000000005,
+      "grad_norm": 3.8142237663269043,
+      "learning_rate": 1.9079663108318304e-05,
+      "loss": 1.1176,
+      "step": 350
+    },
+    {
+      "epoch": 5.72,
+      "grad_norm": 4.0017619132995605,
+      "learning_rate": 1.8982998624204016e-05,
+      "loss": 1.1042,
+      "step": 360
+    },
+    {
+      "epoch": 5.88,
+      "grad_norm": 3.9953103065490723,
+      "learning_rate": 1.8881778448577274e-05,
+      "loss": 1.1386,
+      "step": 370
+    },
+    {
+      "epoch": 6.032,
+      "grad_norm": 3.269265651702881,
+      "learning_rate": 1.877605391484179e-05,
+      "loss": 0.9651,
+      "step": 380
+    },
+    {
+      "epoch": 6.192,
+      "grad_norm": 5.4509172439575195,
+      "learning_rate": 1.8665878640768332e-05,
+      "loss": 0.9487,
+      "step": 390
+    },
+    {
+      "epoch": 6.352,
+      "grad_norm": 3.8790087699890137,
+      "learning_rate": 1.855130850130267e-05,
+      "loss": 0.9193,
+      "step": 400
+    },
+    {
+      "epoch": 6.5120000000000005,
+      "grad_norm": 5.1756110191345215,
+      "learning_rate": 1.8432401600228823e-05,
+      "loss": 0.9112,
+      "step": 410
+    },
+    {
+      "epoch": 6.672,
+      "grad_norm": 4.771461009979248,
+      "learning_rate": 1.8309218240701973e-05,
+      "loss": 0.9371,
+      "step": 420
+    },
+    {
+      "epoch": 6.832,
+      "grad_norm": 4.88088846206665,
+      "learning_rate": 1.818182089466595e-05,
+      "loss": 1.0264,
+      "step": 430
+    },
+    {
+      "epoch": 6.992,
+      "grad_norm": 4.158401012420654,
+      "learning_rate": 1.8050274171170835e-05,
+      "loss": 0.9534,
+      "step": 440
+    },
+    {
+      "epoch": 7.144,
+      "grad_norm": 5.25468635559082,
+      "learning_rate": 1.791464478360676e-05,
+      "loss": 0.7345,
+      "step": 450
+    },
+    {
+      "epoch": 7.304,
+      "grad_norm": 4.713033676147461,
+      "learning_rate": 1.7775001515870466e-05,
+      "loss": 0.8399,
+      "step": 460
+    },
+    {
+      "epoch": 7.464,
+      "grad_norm": 5.714450359344482,
+      "learning_rate": 1.7631415187481818e-05,
+      "loss": 0.7525,
+      "step": 470
+    },
+    {
+      "epoch": 7.624,
+      "grad_norm": 6.085780143737793,
+      "learning_rate": 1.7483958617668e-05,
+      "loss": 0.7276,
+      "step": 480
+    },
+    {
+      "epoch": 7.784,
+      "grad_norm": 4.569671630859375,
+      "learning_rate": 1.733270658843351e-05,
+      "loss": 0.8071,
+      "step": 490
+    },
+    {
+      "epoch": 7.944,
+      "grad_norm": 6.115426540374756,
+      "learning_rate": 1.717773580663479e-05,
+      "loss": 0.7683,
+      "step": 500
+    },
+    {
+      "epoch": 8.096,
+      "grad_norm": 4.305016040802002,
+      "learning_rate": 1.7019124865078625e-05,
+      "loss": 0.6376,
+      "step": 510
+    },
+    {
+      "epoch": 8.256,
+      "grad_norm": 6.470266342163086,
+      "learning_rate": 1.6856954202664158e-05,
+      "loss": 0.6286,
+      "step": 520
+    },
+    {
+      "epoch": 8.416,
+      "grad_norm": 6.055320739746094,
+      "learning_rate": 1.6691306063588583e-05,
+      "loss": 0.6196,
+      "step": 530
+    },
+    {
+      "epoch": 8.576,
+      "grad_norm": 6.73253870010376,
+      "learning_rate": 1.652226445563737e-05,
+      "loss": 0.564,
+      "step": 540
+    },
+    {
+      "epoch": 8.736,
+      "grad_norm": 5.043179512023926,
+      "learning_rate": 1.634991510758003e-05,
+      "loss": 0.6122,
+      "step": 550
+    },
+    {
+      "epoch": 8.896,
+      "grad_norm": 6.78087854385376,
+      "learning_rate": 1.617434542569313e-05,
+      "loss": 0.6173,
+      "step": 560
+    },
+    {
+      "epoch": 9.048,
+      "grad_norm": 6.2355146408081055,
+      "learning_rate": 1.5995644449432538e-05,
+      "loss": 0.5342,
+      "step": 570
+    },
+    {
+      "epoch": 9.208,
+      "grad_norm": 5.987257480621338,
+      "learning_rate": 1.5813902806277445e-05,
+      "loss": 0.4269,
+      "step": 580
+    },
+    {
+      "epoch": 9.368,
+      "grad_norm": 5.455114364624023,
+      "learning_rate": 1.562921266576898e-05,
+      "loss": 0.4548,
+      "step": 590
+    },
+    {
+      "epoch": 9.528,
+      "grad_norm": 5.296268463134766,
+      "learning_rate": 1.5441667692766805e-05,
+      "loss": 0.4038,
+      "step": 600
+    },
+    {
+      "epoch": 9.688,
+      "grad_norm": 5.551358699798584,
+      "learning_rate": 1.5251362999947386e-05,
+      "loss": 0.4015,
+      "step": 610
+    },
+    {
+      "epoch": 9.848,
+      "grad_norm": 4.464796543121338,
+      "learning_rate": 1.5058395099567935e-05,
+      "loss": 0.4353,
+      "step": 620
+    },
+    {
+      "epoch": 10.0,
+      "grad_norm": 3.268158197402954,
+      "learning_rate": 1.4862861854520652e-05,
+      "loss": 0.3927,
+      "step": 630
+    },
+    {
+      "epoch": 10.16,
+      "grad_norm": 8.046059608459473,
+      "learning_rate": 1.4664862428701925e-05,
+      "loss": 0.2612,
+      "step": 640
+    },
+    {
+      "epoch": 10.32,
+      "grad_norm": 4.157690048217773,
+      "learning_rate": 1.4464497236721779e-05,
+      "loss": 0.2621,
+      "step": 650
+    },
+    {
+      "epoch": 10.48,
+      "grad_norm": 5.3797688484191895,
+      "learning_rate": 1.4261867892979e-05,
+      "loss": 0.263,
+      "step": 660
+    },
+    {
+      "epoch": 10.64,
+      "grad_norm": 4.068567276000977,
+      "learning_rate": 1.4057077160127806e-05,
+      "loss": 0.2492,
+      "step": 670
+    },
+    {
+      "epoch": 10.8,
+      "grad_norm": 5.405711650848389,
+      "learning_rate": 1.3850228896962178e-05,
+      "loss": 0.2523,
+      "step": 680
+    },
+    {
+      "epoch": 10.96,
+      "grad_norm": 4.762354373931885,
+      "learning_rate": 1.3641428005744308e-05,
+      "loss": 0.2586,
+      "step": 690
+    },
+    {
+      "epoch": 11.112,
+      "grad_norm": 5.127146244049072,
+      "learning_rate": 1.3430780379003814e-05,
+      "loss": 0.1699,
+      "step": 700
+    },
+    {
+      "epoch": 11.272,
+      "grad_norm": 3.0993189811706543,
+      "learning_rate": 1.3218392845834789e-05,
+      "loss": 0.1514,
+      "step": 710
+    },
+    {
+      "epoch": 11.432,
+      "grad_norm": 5.754135608673096,
+      "learning_rate": 1.300437311771785e-05,
+      "loss": 0.1432,
+      "step": 720
+    },
+    {
+      "epoch": 11.592,
+      "grad_norm": 4.12827730178833,
+      "learning_rate": 1.2788829733894698e-05,
+      "loss": 0.1512,
+      "step": 730
+    },
+    {
+      "epoch": 11.752,
+      "grad_norm": 4.6962175369262695,
+      "learning_rate": 1.257187200632289e-05,
+      "loss": 0.1534,
+      "step": 740
+    },
+    {
+      "epoch": 11.912,
+      "grad_norm": 6.317523002624512,
+      "learning_rate": 1.2353609964238686e-05,
+      "loss": 0.1452,
+      "step": 750
+    },
+    {
+      "epoch": 12.064,
+      "grad_norm": 2.793424367904663,
+      "learning_rate": 1.213415429835621e-05,
+      "loss": 0.1167,
+      "step": 760
+    },
+    {
+      "epoch": 12.224,
+      "grad_norm": 3.816258668899536,
+      "learning_rate": 1.1913616304731064e-05,
+      "loss": 0.0785,
+      "step": 770
+    },
+    {
+      "epoch": 12.384,
+      "grad_norm": 3.989567518234253,
+      "learning_rate": 1.1692107828317014e-05,
+      "loss": 0.0857,
+      "step": 780
+    },
+    {
+      "epoch": 12.544,
+      "grad_norm": 4.456111431121826,
+      "learning_rate": 1.1469741206244249e-05,
+      "loss": 0.0862,
+      "step": 790
+    },
+    {
+      "epoch": 12.704,
+      "grad_norm": 4.539771556854248,
+      "learning_rate": 1.1246629210848062e-05,
+      "loss": 0.0949,
+      "step": 800
+    },
+    {
+      "epoch": 12.864,
+      "grad_norm": 2.4530129432678223,
+      "learning_rate": 1.1022884992476826e-05,
+      "loss": 0.0928,
+      "step": 810
+    },
+    {
+      "epoch": 13.016,
+      "grad_norm": 2.042999267578125,
+      "learning_rate": 1.0821068423364156e-05,
+      "loss": 0.0951,
+      "step": 820
+    },
+    {
+      "epoch": 13.176,
+      "grad_norm": 2.9049434661865234,
+      "learning_rate": 1.0596435812513276e-05,
+      "loss": 0.0483,
+      "step": 830
+    },
+    {
+      "epoch": 13.336,
+      "grad_norm": 2.3502166271209717,
+      "learning_rate": 1.037150072164626e-05,
+      "loss": 0.0559,
+      "step": 840
+    },
+    {
+      "epoch": 13.496,
+      "grad_norm": 2.2428765296936035,
+      "learning_rate": 1.0146377225686996e-05,
+      "loss": 0.0801,
+      "step": 850
+    },
+    {
+      "epoch": 13.656,
+      "grad_norm": 5.673745155334473,
+      "learning_rate": 9.921179495108249e-06,
+      "loss": 0.0683,
+      "step": 860
+    },
+    {
+      "epoch": 13.816,
+      "grad_norm": 3.9386937618255615,
+      "learning_rate": 9.696021738030575e-06,
+      "loss": 0.0616,
+      "step": 870
+    },
+    {
+      "epoch": 13.975999999999999,
+      "grad_norm": 4.362432479858398,
+      "learning_rate": 9.471018142302127e-06,
+      "loss": 0.058,
+      "step": 880
+    },
+    {
+      "epoch": 14.128,
+      "grad_norm": 2.225241184234619,
+      "learning_rate": 9.24628281758876e-06,
+      "loss": 0.0356,
+      "step": 890
+    },
+    {
+      "epoch": 14.288,
+      "grad_norm": 4.0786356925964355,
+      "learning_rate": 9.021929737503757e-06,
+      "loss": 0.0458,
+      "step": 900
+    },
+    {
+      "epoch": 14.448,
+      "grad_norm": 2.464179277420044,
+      "learning_rate": 8.79807268180658e-06,
+      "loss": 0.0531,
+      "step": 910
+    },
+    {
+      "epoch": 14.608,
+      "grad_norm": 2.679661273956299,
+      "learning_rate": 8.574825178699935e-06,
+      "loss": 0.0359,
+      "step": 920
+    },
+    {
+      "epoch": 14.768,
+      "grad_norm": 2.0911498069763184,
+      "learning_rate": 8.352300447254372e-06,
+      "loss": 0.0362,
+      "step": 930
+    },
+    {
+      "epoch": 14.928,
+      "grad_norm": 2.3030571937561035,
+      "learning_rate": 8.130611339989731e-06,
+      "loss": 0.0292,
+      "step": 940
+    },
+    {
+      "epoch": 15.08,
+      "grad_norm": 1.6733816862106323,
+      "learning_rate": 7.909870285642403e-06,
+      "loss": 0.0241,
+      "step": 950
+    },
+    {
+      "epoch": 15.24,
+      "grad_norm": 1.4519929885864258,
+      "learning_rate": 7.690189232147566e-06,
+      "loss": 0.0264,
+      "step": 960
+    },
+    {
+      "epoch": 15.4,
+      "grad_norm": 1.980666995048523,
+      "learning_rate": 7.4716795898652615e-06,
+      "loss": 0.0231,
+      "step": 970
+    },
+    {
+      "epoch": 15.56,
+      "grad_norm": 2.6794183254241943,
+      "learning_rate": 7.2544521750790345e-06,
+      "loss": 0.0243,
+      "step": 980
+    },
+    {
+      "epoch": 15.72,
+      "grad_norm": 1.8193122148513794,
+      "learning_rate": 7.038617153795948e-06,
+      "loss": 0.0226,
+      "step": 990
+    },
+    {
+      "epoch": 15.88,
+      "grad_norm": 2.1489455699920654,
+      "learning_rate": 6.82428398587631e-06,
+      "loss": 0.0321,
+      "step": 1000
+    },
+    {
+      "epoch": 16.032,
+      "grad_norm": 0.9566267728805542,
+      "learning_rate": 6.611561369521546e-06,
+      "loss": 0.019,
+      "step": 1010
+    },
+    {
+      "epoch": 16.192,
+      "grad_norm": 0.45050784945487976,
+      "learning_rate": 6.400557186148371e-06,
+      "loss": 0.0101,
+      "step": 1020
+    },
+    {
+      "epoch": 16.352,
+      "grad_norm": 3.0079352855682373,
+      "learning_rate": 6.191378445677125e-06,
+      "loss": 0.0139,
+      "step": 1030
+    },
+    {
+      "epoch": 16.512,
+      "grad_norm": 1.0027068853378296,
+      "learning_rate": 5.984131232262167e-06,
+      "loss": 0.0264,
+      "step": 1040
+    },
+    {
+      "epoch": 16.672,
+      "grad_norm": 0.34918779134750366,
+      "learning_rate": 5.7789206504916815e-06,
+      "loss": 0.0123,
+      "step": 1050
+    },
+    {
+      "epoch": 16.832,
+      "grad_norm": 1.0329653024673462,
+      "learning_rate": 5.5758507720843425e-06,
+      "loss": 0.0115,
+      "step": 1060
+    },
+    {
+      "epoch": 16.992,
+      "grad_norm": 1.9161659479141235,
+      "learning_rate": 5.375024583109745e-06,
+      "loss": 0.0135,
+      "step": 1070
+    },
+    {
+      "epoch": 17.144,
+      "grad_norm": 0.42554718255996704,
+      "learning_rate": 5.176543931759447e-06,
+      "loss": 0.005,
+      "step": 1080
+    },
+    {
+      "epoch": 17.304,
+      "grad_norm": 0.7298970818519592,
+      "learning_rate": 4.980509476695043e-06,
+      "loss": 0.0096,
+      "step": 1090
+    },
+    {
+      "epoch": 17.464,
+      "grad_norm": 2.393183946609497,
+      "learning_rate": 4.7870206359995815e-06,
+      "loss": 0.0148,
+      "step": 1100
+    },
+    {
+      "epoch": 17.624,
+      "grad_norm": 0.4778424799442291,
+      "learning_rate": 4.596175536758024e-06,
+      "loss": 0.0067,
+      "step": 1110
+    },
+    {
+      "epoch": 17.784,
+      "grad_norm": 0.22980810701847076,
+      "learning_rate": 4.408070965292534e-06,
+      "loss": 0.0053,
+      "step": 1120
+    },
+    {
+      "epoch": 17.944,
+      "grad_norm": 0.18192055821418762,
+      "learning_rate": 4.222802318077664e-06,
+      "loss": 0.0079,
+      "step": 1130
+    },
+    {
+      "epoch": 18.096,
+      "grad_norm": 0.6733874678611755,
+      "learning_rate": 4.040463553360431e-06,
+      "loss": 0.0039,
+      "step": 1140
+    },
+    {
+      "epoch": 18.256,
+      "grad_norm": 0.17421452701091766,
+      "learning_rate": 3.861147143509754e-06,
+      "loss": 0.0023,
+      "step": 1150
+    },
+    {
+      "epoch": 18.416,
+      "grad_norm": 0.15809208154678345,
+      "learning_rate": 3.6849440281194813e-06,
+      "loss": 0.006,
+      "step": 1160
+    },
+    {
+      "epoch": 18.576,
+      "grad_norm": 0.06922920793294907,
+      "learning_rate": 3.5119435678887328e-06,
+      "loss": 0.0023,
+      "step": 1170
+    },
+    {
+      "epoch": 18.736,
+      "grad_norm": 0.08193696290254593,
+      "learning_rate": 3.342233499302985e-06,
+      "loss": 0.003,
+      "step": 1180
+    },
+    {
+      "epoch": 18.896,
+      "grad_norm": 0.0757126435637474,
+      "learning_rate": 3.175899890138858e-06,
+      "loss": 0.002,
+      "step": 1190
+    },
+    {
+      "epoch": 19.048,
+      "grad_norm": 0.057399798184633255,
+      "learning_rate": 3.0130270958152196e-06,
+      "loss": 0.0022,
+      "step": 1200
+    },
+    {
+      "epoch": 19.208,
+      "grad_norm": 0.068113774061203,
+      "learning_rate": 2.8536977166126234e-06,
+      "loss": 0.0022,
+      "step": 1210
+    },
+    {
+      "epoch": 19.368,
+      "grad_norm": 0.06517008692026138,
+      "learning_rate": 2.697992555782969e-06,
+      "loss": 0.0016,
+      "step": 1220
+    },
+    {
+      "epoch": 19.528,
+      "grad_norm": 0.07533544301986694,
+      "learning_rate": 2.545990578570404e-06,
+      "loss": 0.0015,
+      "step": 1230
+    },
+    {
+      "epoch": 19.688,
+      "grad_norm": 0.08159100264310837,
+      "learning_rate": 2.397768872164462e-06,
+      "loss": 0.0018,
+      "step": 1240
+    },
+    {
+      "epoch": 19.848,
+      "grad_norm": 0.05212102085351944,
+      "learning_rate": 2.253402606605577e-06,
+      "loss": 0.0014,
+      "step": 1250
+    },
+    {
+      "epoch": 20.0,
+      "grad_norm": 0.038643430918455124,
+      "learning_rate": 2.1129649966629185e-06,
+      "loss": 0.0013,
+      "step": 1260
+    },
+    {
+      "epoch": 20.16,
+      "grad_norm": 0.040117453783750534,
+      "learning_rate": 1.9765272647038038e-06,
+      "loss": 0.0013,
+      "step": 1270
+    },
+    {
+      "epoch": 20.32,
+      "grad_norm": 0.03363404422998428,
+      "learning_rate": 1.8441586045735737e-06,
+      "loss": 0.0011,
+      "step": 1280
+    },
+    {
+      "epoch": 20.48,
+      "grad_norm": 0.055696483701467514,
+      "learning_rate": 1.7159261465041954e-06,
+      "loss": 0.0013,
+      "step": 1290
+    },
+    {
+      "epoch": 20.64,
+      "grad_norm": 0.0553043931722641,
+      "learning_rate": 1.5918949230694635e-06,
+      "loss": 0.0014,
+      "step": 1300
+    },
+    {
+      "epoch": 20.8,
+      "grad_norm": 0.049317434430122375,
+      "learning_rate": 1.4721278362039626e-06,
+      "loss": 0.0011,
+      "step": 1310
+    },
+    {
+      "epoch": 20.96,
+      "grad_norm": 0.07064161449670792,
+      "learning_rate": 1.356685625302625e-06,
+      "loss": 0.0012,
+      "step": 1320
+    },
+    {
+      "epoch": 21.112,
+      "grad_norm": 0.0384482778608799,
+      "learning_rate": 1.2456268364169853e-06,
+      "loss": 0.0011,
+      "step": 1330
+    },
+    {
+      "epoch": 21.272,
+      "grad_norm": 0.04504753276705742,
+      "learning_rate": 1.1390077925637865e-06,
+      "loss": 0.0011,
+      "step": 1340
+    },
+    {
+      "epoch": 21.432,
+      "grad_norm": 0.04046454280614853,
+      "learning_rate": 1.0368825651609893e-06,
+      "loss": 0.001,
+      "step": 1350
+    },
+    {
+      "epoch": 21.592,
+      "grad_norm": 0.04408493638038635,
+      "learning_rate": 9.393029466056714e-07,
+      "loss": 0.0012,
+      "step": 1360
+    },
+    {
+      "epoch": 21.752,
+      "grad_norm": 0.03646273910999298,
+      "learning_rate": 8.463184240077172e-07,
+      "loss": 0.0012,
+      "step": 1370
+    },
+    {
+      "epoch": 21.912,
+      "grad_norm": 0.03203440457582474,
+      "learning_rate": 7.579761540926434e-07,
+      "loss": 0.0011,
+      "step": 1380
+    },
+    {
+      "epoch": 22.064,
+      "grad_norm": 0.03588934242725372,
+      "learning_rate": 6.743209392862349e-07,
+      "loss": 0.001,
+      "step": 1390
+    },
+    {
+      "epoch": 22.224,
+      "grad_norm": 0.0342290997505188,
+      "learning_rate": 5.953952049931999e-07,
+      "loss": 0.0011,
+      "step": 1400
+    },
+    {
+      "epoch": 22.384,
+      "grad_norm": 0.036632440984249115,
+      "learning_rate": 5.212389780812733e-07,
+      "loss": 0.001,
+      "step": 1410
+    },
+    {
+      "epoch": 22.544,
+      "grad_norm": 0.03759520500898361,
+      "learning_rate": 4.518898665817695e-07,
+      "loss": 0.0011,
+      "step": 1420
+    },
+    {
+      "epoch": 22.704,
+      "grad_norm": 0.03835231438279152,
+      "learning_rate": 3.8738304061681107e-07,
+      "loss": 0.0011,
+      "step": 1430
+    },
+    {
+      "epoch": 22.864,
+      "grad_norm": 0.042444001883268356,
+      "learning_rate": 3.2775121456295024e-07,
+      "loss": 0.0011,
+      "step": 1440
+    },
+    {
+      "epoch": 23.016,
+      "grad_norm": 0.033434733748435974,
+      "learning_rate": 2.730246304601991e-07,
+      "loss": 0.001,
+      "step": 1450
+    },
+    {
+      "epoch": 23.176,
+      "grad_norm": 0.03470597416162491,
+      "learning_rate": 2.2323104267490404e-07,
+      "loss": 0.0011,
+      "step": 1460
+    },
+    {
+      "epoch": 23.336,
+      "grad_norm": 0.04532945156097412,
+      "learning_rate": 1.783957038242279e-07,
+      "loss": 0.001,
+      "step": 1470
+    },
+    {
+      "epoch": 23.496,
+      "grad_norm": 0.035716019570827484,
+      "learning_rate": 1.3854135196939345e-07,
+      "loss": 0.001,
+      "step": 1480
+    },
+    {
+      "epoch": 23.656,
+      "grad_norm": 0.03435162454843521,
+      "learning_rate": 1.0368819908415983e-07,
+      "loss": 0.0011,
+      "step": 1490
+    },
+    {
+      "epoch": 23.816,
+      "grad_norm": 0.04788799211382866,
+      "learning_rate": 7.385392080440535e-08,
+      "loss": 0.0011,
+      "step": 1500
+    },
+    {
+      "epoch": 23.976,
+      "grad_norm": 0.037617627531290054,
+      "learning_rate": 4.905364746400021e-08,
+      "loss": 0.0011,
+      "step": 1510
+    },
+    {
+      "epoch": 24.128,
+      "grad_norm": 0.04006591811776161,
+      "learning_rate": 2.929995642151906e-08,
+      "loss": 0.001,
+      "step": 1520
+    },
+    {
+      "epoch": 24.288,
+      "grad_norm": 0.03150051832199097,
+      "learning_rate": 1.4602865681682122e-08,
+      "loss": 0.001,
+      "step": 1530
+    },
+    {
+      "epoch": 24.448,
+      "grad_norm": 0.04720960184931755,
+      "learning_rate": 4.969828814767042e-09,
+      "loss": 0.001,
+      "step": 1540
+    },
+    {
+      "epoch": 24.608,
+      "grad_norm": 0.0407867431640625,
+      "learning_rate": 4.0573117655595684e-10,
+      "loss": 0.001,
+      "step": 1550
+    }
+  ],
+  "logging_steps": 10,
+  "max_steps": 1550,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 25,
+  "save_steps": 1000,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": true
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 1.324081921088553e+17,
+  "train_batch_size": 2,
+  "trial_name": null,
+  "trial_params": null
+}

checkpoint-1550/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:dc45890bd2d24eb38ee6085d083cd1874d1991cf87176f31b08f0cafc9576e6c
+size 5688

checkpoint-1550/vocab.json ADDED Viewed

The diff for this file is too large to render. See raw diff

merges.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

preprocessor_config.json ADDED Viewed

	@@ -0,0 +1,29 @@

+{
+  "do_convert_rgb": true,
+  "do_normalize": true,
+  "do_rescale": true,
+  "do_resize": true,
+  "image_mean": [
+    0.48145466,
+    0.4578275,
+    0.40821073
+  ],
+  "image_processor_type": "Qwen2VLImageProcessor",
+  "image_std": [
+    0.26862954,
+    0.26130258,
+    0.27577711
+  ],
+  "max_pixels": 12845056,
+  "merge_size": 2,
+  "min_pixels": 3136,
+  "patch_size": 14,
+  "processor_class": "Qwen2VLProcessor",
+  "resample": 3,
+  "rescale_factor": 0.00392156862745098,
+  "size": {
+    "longest_edge": 12845056,
+    "shortest_edge": 3136
+  },
+  "temporal_patch_size": 2
+}

runs/Mar14_21-25-37_36c244e9105b/events.out.tfevents.1741987616.36c244e9105b.153.0 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:80083da1c9fae8677b49dd927b26fa083b186b01c1061ad3e277e9a6cca56c4a
+size 39394

special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,31 @@

+{
+  "additional_special_tokens": [
+    "<|im_start|>",
+    "<|im_end|>",
+    "<|object_ref_start|>",
+    "<|object_ref_end|>",
+    "<|box_start|>",
+    "<|box_end|>",
+    "<|quad_start|>",
+    "<|quad_end|>",
+    "<|vision_start|>",
+    "<|vision_end|>",
+    "<|vision_pad|>",
+    "<|image_pad|>",
+    "<|video_pad|>"
+  ],
+  "eos_token": {
+    "content": "<|im_end|>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "pad_token": {
+    "content": "<|endoftext|>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  }
+}

tokenizer.json ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:091aa7594dc2fcfbfa06b9e3c22a5f0562ac14f30375c13af7309407a0e67b8a
+size 11420371

tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,148 @@

+{
+  "add_prefix_space": false,
+  "added_tokens_decoder": {
+    "151643": {
+      "content": "<|endoftext|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151644": {
+      "content": "<|im_start|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151645": {
+      "content": "<|im_end|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151646": {
+      "content": "<|object_ref_start|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151647": {
+      "content": "<|object_ref_end|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151648": {
+      "content": "<|box_start|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151649": {
+      "content": "<|box_end|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151650": {
+      "content": "<|quad_start|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151651": {
+      "content": "<|quad_end|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151652": {
+      "content": "<|vision_start|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151653": {
+      "content": "<|vision_end|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151654": {
+      "content": "<|vision_pad|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151655": {
+      "content": "<|image_pad|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151656": {
+      "content": "<|video_pad|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "additional_special_tokens": [
+    "<|im_start|>",
+    "<|im_end|>",
+    "<|object_ref_start|>",
+    "<|object_ref_end|>",
+    "<|box_start|>",
+    "<|box_end|>",
+    "<|quad_start|>",
+    "<|quad_end|>",
+    "<|vision_start|>",
+    "<|vision_end|>",
+    "<|vision_pad|>",
+    "<|image_pad|>",
+    "<|video_pad|>"
+  ],
+  "bos_token": null,
+  "chat_template": "{% set image_count = namespace(value=0) %}{% set video_count = namespace(value=0) %}{% for message in messages %}{% if loop.first and message['role'] != 'system' %}<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n{% endif %}<|im_start|>{{ message['role'] }}\n{% if message['content'] is string %}{{ message['content'] }}<|im_end|>\n{% else %}{% for content in message['content'] %}{% if content['type'] == 'image' or 'image' in content or 'image_url' in content %}{% set image_count.value = image_count.value + 1 %}{% if add_vision_id %}Picture {{ image_count.value }}: {% endif %}<|vision_start|><|image_pad|><|vision_end|>{% elif content['type'] == 'video' or 'video' in content %}{% set video_count.value = video_count.value + 1 %}{% if add_vision_id %}Video {{ video_count.value }}: {% endif %}<|vision_start|><|video_pad|><|vision_end|>{% elif 'text' in content %}{{ content['text'] }}{% endif %}{% endfor %}<|im_end|>\n{% endif %}{% endfor %}{% if add_generation_prompt %}<|im_start|>assistant\n{% endif %}",
+  "clean_up_tokenization_spaces": false,
+  "eos_token": "<|im_end|>",
+  "errors": "replace",
+  "extra_special_tokens": {},
+  "max_length": null,
+  "model_max_length": 32768,
+  "pad_to_multiple_of": null,
+  "pad_token": "<|endoftext|>",
+  "pad_token_type_id": 0,
+  "padding_side": "right",
+  "processor_class": "Qwen2VLProcessor",
+  "split_special_tokens": false,
+  "tokenizer_class": "Qwen2Tokenizer",
+  "unk_token": null
+}

train_results.json ADDED Viewed

	@@ -0,0 +1,8 @@

+{
+    "epoch": 24.608,
+    "total_flos": 1.324081921088553e+17,
+    "train_loss": 0.672794044127147,
+    "train_runtime": 33908.1665,
+    "train_samples_per_second": 0.369,
+    "train_steps_per_second": 0.046
+}