Upload folder using huggingface_hub
Browse files- .gitattributes +1 -0
- adapter_config.json +26 -0
- adapter_model.safetensors +3 -0
- git_hash.txt +1 -0
- preprocessor_config.json +40 -0
- results.json +1 -0
- special_tokens_map.json +39 -0
- tokenizer.json +3 -0
- tokenizer_config.json +0 -0
- training_config.yml +41 -0
.gitattributes
CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
36 |
+
tokenizer.json filter=lfs diff=lfs merge=lfs -text
|
adapter_config.json
ADDED
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"alpha_pattern": {},
|
3 |
+
"auto_mapping": null,
|
4 |
+
"base_model_name_or_path": "./models/paligemma-3b-mix-448",
|
5 |
+
"bias": "none",
|
6 |
+
"fan_in_fan_out": false,
|
7 |
+
"inference_mode": false,
|
8 |
+
"init_lora_weights": "gaussian",
|
9 |
+
"layer_replication": null,
|
10 |
+
"layers_pattern": null,
|
11 |
+
"layers_to_transform": null,
|
12 |
+
"loftq_config": {},
|
13 |
+
"lora_alpha": 32,
|
14 |
+
"lora_dropout": 0.1,
|
15 |
+
"megatron_config": null,
|
16 |
+
"megatron_core": "megatron.core",
|
17 |
+
"modules_to_save": null,
|
18 |
+
"peft_type": "LORA",
|
19 |
+
"r": 32,
|
20 |
+
"rank_pattern": {},
|
21 |
+
"revision": null,
|
22 |
+
"target_modules": "(.*(language_model).*(down_proj|gate_proj|up_proj|k_proj|q_proj|v_proj|o_proj).*$|.*(custom_text_proj).*$)",
|
23 |
+
"task_type": "FEATURE_EXTRACTION",
|
24 |
+
"use_dora": false,
|
25 |
+
"use_rslora": false
|
26 |
+
}
|
adapter_model.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6f96fe700542c8012c3282fb5b86b83ef27cfaf77c2ecc89d56e8b5744d0d3fd
|
3 |
+
size 78625112
|
git_hash.txt
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
c8a18e7425c52fda841cf3cc6c70a710a0fe5bb6
|
preprocessor_config.json
ADDED
@@ -0,0 +1,40 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_valid_processor_keys": [
|
3 |
+
"images",
|
4 |
+
"do_resize",
|
5 |
+
"size",
|
6 |
+
"resample",
|
7 |
+
"do_rescale",
|
8 |
+
"rescale_factor",
|
9 |
+
"do_normalize",
|
10 |
+
"image_mean",
|
11 |
+
"image_std",
|
12 |
+
"return_tensors",
|
13 |
+
"data_format",
|
14 |
+
"input_data_format",
|
15 |
+
"do_convert_rgb"
|
16 |
+
],
|
17 |
+
"do_convert_rgb": null,
|
18 |
+
"do_normalize": true,
|
19 |
+
"do_rescale": true,
|
20 |
+
"do_resize": true,
|
21 |
+
"image_mean": [
|
22 |
+
0.5,
|
23 |
+
0.5,
|
24 |
+
0.5
|
25 |
+
],
|
26 |
+
"image_processor_type": "SiglipImageProcessor",
|
27 |
+
"image_seq_length": 1024,
|
28 |
+
"image_std": [
|
29 |
+
0.5,
|
30 |
+
0.5,
|
31 |
+
0.5
|
32 |
+
],
|
33 |
+
"processor_class": "PaliGemmaProcessor",
|
34 |
+
"resample": 3,
|
35 |
+
"rescale_factor": 0.00392156862745098,
|
36 |
+
"size": {
|
37 |
+
"height": 448,
|
38 |
+
"width": 448
|
39 |
+
}
|
40 |
+
}
|
results.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"validation_set": {"ndcg_at_1": 0.766, "ndcg_at_3": 0.81507, "ndcg_at_5": 0.8272, "ndcg_at_10": 0.83188, "ndcg_at_20": 0.8394, "ndcg_at_100": 0.84661, "ndcg_at_1000": 0.8515, "map_at_1": 0.766, "map_at_3": 0.803, "map_at_5": 0.8096, "map_at_10": 0.81162, "map_at_20": 0.81365, "map_at_100": 0.81456, "map_at_1000": 0.81479, "recall_at_1": 0.766, "recall_at_3": 0.85, "recall_at_5": 0.88, "recall_at_10": 0.894, "recall_at_20": 0.924, "recall_at_100": 0.964, "recall_at_1000": 1.0, "precision_at_1": 0.766, "precision_at_3": 0.28333, "precision_at_5": 0.176, "precision_at_10": 0.0894, "precision_at_20": 0.0462, "precision_at_100": 0.00964, "precision_at_1000": 0.001, "mrr_at_1": 0.768, "mrr_at_3": 0.803, "mrr_at_5": 0.8105999999999998, "mrr_at_10": 0.8131071428571427, "mrr_at_20": 0.8150872402451348, "mrr_at_100": 0.8158956494654525, "mrr_at_1000": 0.8161641241736527, "naucs_at_1_max": -0.05647265550841267, "naucs_at_1_std": 0.21091483847610842, "naucs_at_1_diff1": 0.9486531949368422, "naucs_at_3_max": -0.2782557761145449, "naucs_at_3_std": 0.4861112918971709, "naucs_at_3_diff1": 0.9356329319882861, "naucs_at_5_max": -0.3757795100222721, "naucs_at_5_std": 0.5670378619153679, "naucs_at_5_diff1": 0.9350540884505252, "naucs_at_10_max": -0.3661590614628277, "naucs_at_10_std": 0.5820776316490065, "naucs_at_10_diff1": 0.9313579326624427, "naucs_at_20_max": -0.4687945353580084, "naucs_at_20_std": 0.823246842596686, "naucs_at_20_diff1": 0.9514103887168898, "naucs_at_100_max": -0.8629785247432413, "naucs_at_100_std": 1.0, "naucs_at_100_diff1": 0.9627814088598399, "naucs_at_1000_max": 1.0, "naucs_at_1000_std": 1.0, "naucs_at_1000_diff1": 1.0}, "syntheticDocQA_energy": {"ndcg_at_1": 0.89, "ndcg_at_3": 0.92024, "ndcg_at_5": 0.93703, "ndcg_at_10": 0.93703, "ndcg_at_20": 0.93703, "ndcg_at_100": 0.94099, "ndcg_at_1000": 0.94099, "map_at_1": 0.89, "map_at_3": 0.91333, "map_at_5": 0.92283, "map_at_10": 0.92283, "map_at_20": 0.92283, "map_at_100": 0.92348, "map_at_1000": 0.92348, "recall_at_1": 0.89, "recall_at_3": 0.94, "recall_at_5": 0.98, "recall_at_10": 0.98, "recall_at_20": 0.98, "recall_at_100": 1.0, "recall_at_1000": 1.0, "precision_at_1": 0.89, "precision_at_3": 0.31333, "precision_at_5": 0.196, "precision_at_10": 0.098, "precision_at_20": 0.049, "precision_at_100": 0.01, "precision_at_1000": 0.001, "mrr_at_1": 0.89, "mrr_at_3": 0.9183333333333333, "mrr_at_5": 0.9253333333333333, "mrr_at_10": 0.9253333333333333, "mrr_at_20": 0.9259583333333333, "mrr_at_100": 0.9261544117647058, "mrr_at_1000": 0.9261544117647058, "naucs_at_1_max": 0.20560948372132942, "naucs_at_1_std": -0.40249978524181756, "naucs_at_1_diff1": 0.8973455888669352, "naucs_at_3_max": 0.012138188608777013, "naucs_at_3_std": -0.41970121381886066, "naucs_at_3_diff1": 0.8821195144724526, "naucs_at_5_max": 0.27544351073763346, "naucs_at_5_std": 0.1914098972922579, "naucs_at_5_diff1": 0.9346405228758136, "naucs_at_10_max": 0.27544351073763346, "naucs_at_10_std": 0.1914098972922579, "naucs_at_10_diff1": 0.9346405228758136, "naucs_at_20_max": 0.27544351073763346, "naucs_at_20_std": 0.1914098972922579, "naucs_at_20_diff1": 0.9346405228758136, "naucs_at_100_max": NaN, "naucs_at_100_std": NaN, "naucs_at_100_diff1": NaN, "naucs_at_1000_max": NaN, "naucs_at_1000_std": NaN, "naucs_at_1000_diff1": NaN}, "syntheticDocQA_healthcare_industry": {"ndcg_at_1": 0.9, "ndcg_at_3": 0.94524, "ndcg_at_5": 0.94524, "ndcg_at_10": 0.94524, "ndcg_at_20": 0.94768, "ndcg_at_100": 0.94989, "ndcg_at_1000": 0.94989, "map_at_1": 0.9, "map_at_3": 0.93333, "map_at_5": 0.93333, "map_at_10": 0.93333, "map_at_20": 0.93396, "map_at_100": 0.93441, "map_at_1000": 0.93441, "recall_at_1": 0.9, "recall_at_3": 0.98, "recall_at_5": 0.98, "recall_at_10": 0.98, "recall_at_20": 0.99, "recall_at_100": 1.0, "recall_at_1000": 1.0, "precision_at_1": 0.9, "precision_at_3": 0.32667, "precision_at_5": 0.196, "precision_at_10": 0.098, "precision_at_20": 0.0495, "precision_at_100": 0.01, "precision_at_1000": 0.001, "mrr_at_1": 0.91, "mrr_at_3": 0.9383333333333332, "mrr_at_5": 0.9383333333333332, "mrr_at_10": 0.9383333333333332, "mrr_at_20": 0.939047619047619, "mrr_at_100": 0.9395021645021644, "mrr_at_1000": 0.9395021645021644, "naucs_at_1_max": 0.5474789915966383, "naucs_at_1_std": -0.2496265172735772, "naucs_at_1_diff1": 0.9330065359477123, "naucs_at_3_max": 0.7957516339869218, "naucs_at_3_std": -0.6909430438842241, "naucs_at_3_diff1": 1.0, "naucs_at_5_max": 0.7957516339869297, "naucs_at_5_std": -0.690943043884218, "naucs_at_5_diff1": 1.0, "naucs_at_10_max": 0.7957516339869297, "naucs_at_10_std": -0.690943043884218, "naucs_at_10_diff1": 1.0, "naucs_at_20_max": 0.7222222222222276, "naucs_at_20_std": -1.7399626517273863, "naucs_at_20_diff1": 1.0, "naucs_at_100_max": NaN, "naucs_at_100_std": NaN, "naucs_at_100_diff1": NaN, "naucs_at_1000_max": NaN, "naucs_at_1000_std": NaN, "naucs_at_1000_diff1": NaN}, "syntheticDocQA_artificial_intelligence_test": {"ndcg_at_1": 0.93, "ndcg_at_3": 0.96155, "ndcg_at_5": 0.96585, "ndcg_at_10": 0.96585, "ndcg_at_20": 0.96585, "ndcg_at_100": 0.9678, "ndcg_at_1000": 0.9678, "map_at_1": 0.93, "map_at_3": 0.955, "map_at_5": 0.9575, "map_at_10": 0.9575, "map_at_20": 0.9575, "map_at_100": 0.95779, "map_at_1000": 0.95779, "recall_at_1": 0.93, "recall_at_3": 0.98, "recall_at_5": 0.99, "recall_at_10": 0.99, "recall_at_20": 0.99, "recall_at_100": 1.0, "recall_at_1000": 1.0, "precision_at_1": 0.93, "precision_at_3": 0.32667, "precision_at_5": 0.198, "precision_at_10": 0.099, "precision_at_20": 0.0495, "precision_at_100": 0.01, "precision_at_1000": 0.001, "mrr_at_1": 0.95, "mrr_at_3": 0.9683333333333333, "mrr_at_5": 0.9683333333333333, "mrr_at_10": 0.9683333333333333, "mrr_at_20": 0.9683333333333333, "mrr_at_100": 0.9686363636363636, "mrr_at_1000": 0.9686363636363636, "naucs_at_1_max": 0.3985594237695081, "naucs_at_1_std": -0.13618780845671735, "naucs_at_1_diff1": 0.9416433239962672, "naucs_at_3_max": -0.5144724556489392, "naucs_at_3_std": -1.7399626517273692, "naucs_at_3_diff1": 0.9346405228758099, "naucs_at_5_max": -1.1517273576097316, "naucs_at_5_std": -1.7399626517273863, "naucs_at_5_diff1": 0.8692810457516413, "naucs_at_10_max": -1.1517273576097316, "naucs_at_10_std": -1.7399626517273863, "naucs_at_10_diff1": 0.8692810457516413, "naucs_at_20_max": -1.1517273576097316, "naucs_at_20_std": -1.7399626517273863, "naucs_at_20_diff1": 0.8692810457516413, "naucs_at_100_max": NaN, "naucs_at_100_std": NaN, "naucs_at_100_diff1": NaN, "naucs_at_1000_max": NaN, "naucs_at_1000_std": NaN, "naucs_at_1000_diff1": NaN}, "syntheticDocQA_government_reports": {"ndcg_at_1": 0.85, "ndcg_at_3": 0.91547, "ndcg_at_5": 0.91934, "ndcg_at_10": 0.92893, "ndcg_at_20": 0.92893, "ndcg_at_100": 0.92893, "ndcg_at_1000": 0.92893, "map_at_1": 0.85, "map_at_3": 0.9, "map_at_5": 0.902, "map_at_10": 0.90589, "map_at_20": 0.90589, "map_at_100": 0.90589, "map_at_1000": 0.90589, "recall_at_1": 0.85, "recall_at_3": 0.96, "recall_at_5": 0.97, "recall_at_10": 1.0, "recall_at_20": 1.0, "recall_at_100": 1.0, "recall_at_1000": 1.0, "precision_at_1": 0.85, "precision_at_3": 0.32, "precision_at_5": 0.194, "precision_at_10": 0.1, "precision_at_20": 0.05, "precision_at_100": 0.01, "precision_at_1000": 0.001, "mrr_at_1": 0.85, "mrr_at_3": 0.9, "mrr_at_5": 0.9045000000000001, "mrr_at_10": 0.9074166666666666, "mrr_at_20": 0.9074166666666666, "mrr_at_100": 0.9074166666666666, "mrr_at_1000": 0.9074166666666666, "naucs_at_1_max": 0.1951513179303619, "naucs_at_1_std": 0.1834038398958682, "naucs_at_1_diff1": 0.8705174096973645, "naucs_at_3_max": 0.195028011204482, "naucs_at_3_std": 0.8651960784313714, "naucs_at_3_diff1": 0.9346405228758151, "naucs_at_5_max": 0.317149081854964, "naucs_at_5_std": 0.8638344226579531, "naucs_at_5_diff1": 0.9128540305010848, "naucs_at_10_max": 1.0, "naucs_at_10_std": 1.0, "naucs_at_10_diff1": 1.0, "naucs_at_20_max": 1.0, "naucs_at_20_std": 1.0, "naucs_at_20_diff1": 1.0, "naucs_at_100_max": NaN, "naucs_at_100_std": NaN, "naucs_at_100_diff1": NaN, "naucs_at_1000_max": NaN, "naucs_at_1000_std": NaN, "naucs_at_1000_diff1": NaN}, "infovqa_subsampled": {"ndcg_at_1": 0.762, "ndcg_at_3": 0.82238, "ndcg_at_5": 0.83133, "ndcg_at_10": 0.8417, "ndcg_at_20": 0.84798, "ndcg_at_100": 0.8546, "ndcg_at_1000": 0.8572, "map_at_1": 0.762, "map_at_3": 0.80733, "map_at_5": 0.81223, "map_at_10": 0.81653, "map_at_20": 0.81837, "map_at_100": 0.81925, "map_at_1000": 0.81936, "recall_at_1": 0.762, "recall_at_3": 0.866, "recall_at_5": 0.888, "recall_at_10": 0.92, "recall_at_20": 0.944, "recall_at_100": 0.98, "recall_at_1000": 1.0, "precision_at_1": 0.762, "precision_at_3": 0.28867, "precision_at_5": 0.1776, "precision_at_10": 0.092, "precision_at_20": 0.0472, "precision_at_100": 0.0098, "precision_at_1000": 0.001, "mrr_at_1": 0.762, "mrr_at_3": 0.8056666666666665, "mrr_at_5": 0.8115666666666665, "mrr_at_10": 0.8159404761904762, "mrr_at_20": 0.8181074314574313, "mrr_at_100": 0.8189443570619399, "mrr_at_1000": 0.8190295305603392, "naucs_at_1_max": 0.1767170601891172, "naucs_at_1_std": 0.14054871010875725, "naucs_at_1_diff1": 0.8664114324042982, "naucs_at_3_max": 0.184513599607939, "naucs_at_3_std": 0.2742191216109084, "naucs_at_3_diff1": 0.8105423987776925, "naucs_at_5_max": 0.2880140692640653, "naucs_at_5_std": 0.3452634604978336, "naucs_at_5_diff1": 0.8091771509740256, "naucs_at_10_max": 0.35270774976657293, "naucs_at_10_std": 0.3808006535947707, "naucs_at_10_diff1": 0.7957633053221298, "naucs_at_20_max": 0.5644924636521266, "naucs_at_20_std": 0.6023409363745509, "naucs_at_20_diff1": 0.8036714685874354, "naucs_at_100_max": 0.540943043884213, "naucs_at_100_std": 0.8345004668534008, "naucs_at_100_diff1": 0.8160597572362192, "naucs_at_1000_max": 1.0, "naucs_at_1000_std": 1.0, "naucs_at_1000_diff1": 1.0}, "docvqa_subsampled": {"ndcg_at_1": 0.45, "ndcg_at_3": 0.51869, "ndcg_at_5": 0.54253, "ndcg_at_10": 0.55896, "ndcg_at_20": 0.58117, "ndcg_at_100": 0.60301, "ndcg_at_1000": 0.61773, "map_at_1": 0.45, "map_at_3": 0.50167, "map_at_5": 0.51487, "map_at_10": 0.52142, "map_at_20": 0.52749, "map_at_100": 0.53036, "map_at_1000": 0.53093, "recall_at_1": 0.45, "recall_at_3": 0.568, "recall_at_5": 0.626, "recall_at_10": 0.678, "recall_at_20": 0.766, "recall_at_100": 0.886, "recall_at_1000": 1.0, "precision_at_1": 0.45, "precision_at_3": 0.18933, "precision_at_5": 0.1252, "precision_at_10": 0.0678, "precision_at_20": 0.0383, "precision_at_100": 0.00886, "precision_at_1000": 0.001, "mrr_at_1": 0.45, "mrr_at_3": 0.5040000000000002, "mrr_at_5": 0.5152, "mrr_at_10": 0.5227079365079365, "mrr_at_20": 0.5284917879420201, "mrr_at_100": 0.5313597415923945, "mrr_at_1000": 0.5319774760028352, "naucs_at_1_max": 0.1185333595233417, "naucs_at_1_std": 0.29580567013684284, "naucs_at_1_diff1": 0.8310823021017479, "naucs_at_3_max": 0.1068842862543385, "naucs_at_3_std": 0.3483268990640717, "naucs_at_3_diff1": 0.727578385437549, "naucs_at_5_max": 0.1941680224220685, "naucs_at_5_std": 0.35541840737365077, "naucs_at_5_diff1": 0.6995137944871821, "naucs_at_10_max": 0.1797100551604054, "naucs_at_10_std": 0.38631553954309467, "naucs_at_10_diff1": 0.6701937668397527, "naucs_at_20_max": 0.20515763866507056, "naucs_at_20_std": 0.5216976675393697, "naucs_at_20_diff1": 0.6193404827394373, "naucs_at_100_max": 0.12443604641483673, "naucs_at_100_std": 0.8357417550402053, "naucs_at_100_diff1": 0.6719080360264361, "naucs_at_1000_max": 1.0, "naucs_at_1000_std": 1.0, "naucs_at_1000_diff1": 1.0}, "arxivqa_subsampled": {"ndcg_at_1": 0.698, "ndcg_at_3": 0.7569, "ndcg_at_5": 0.77464, "ndcg_at_10": 0.78696, "ndcg_at_20": 0.79616, "ndcg_at_100": 0.80911, "ndcg_at_1000": 0.81176, "map_at_1": 0.698, "map_at_3": 0.74267, "map_at_5": 0.75277, "map_at_10": 0.75787, "map_at_20": 0.76044, "map_at_100": 0.76248, "map_at_1000": 0.76259, "recall_at_1": 0.698, "recall_at_3": 0.798, "recall_at_5": 0.84, "recall_at_10": 0.878, "recall_at_20": 0.914, "recall_at_100": 0.98, "recall_at_1000": 1.0, "precision_at_1": 0.698, "precision_at_3": 0.266, "precision_at_5": 0.168, "precision_at_10": 0.0878, "precision_at_20": 0.0457, "precision_at_100": 0.0098, "precision_at_1000": 0.001, "mrr_at_1": 0.696, "mrr_at_3": 0.7413333333333333, "mrr_at_5": 0.7505333333333332, "mrr_at_10": 0.7552285714285714, "mrr_at_20": 0.7583260209948288, "mrr_at_100": 0.7604873731476858, "mrr_at_1000": 0.7605635278418731, "naucs_at_1_max": -0.055869870164348266, "naucs_at_1_std": 0.10823823173816223, "naucs_at_1_diff1": 0.8869543691723738, "naucs_at_3_max": -0.07860339930705217, "naucs_at_3_std": 0.17561103758668112, "naucs_at_3_diff1": 0.8355266744982577, "naucs_at_5_max": -0.11532891507118516, "naucs_at_5_std": 0.19644084437898846, "naucs_at_5_diff1": 0.8057376043200786, "naucs_at_10_max": -0.1035255757081758, "naucs_at_10_std": 0.23232117383329737, "naucs_at_10_diff1": 0.7871016287564068, "naucs_at_20_max": -0.023179814561485438, "naucs_at_20_std": 0.25375111284823754, "naucs_at_20_diff1": 0.7496145745119744, "naucs_at_100_max": 0.06652661064425251, "naucs_at_100_std": 0.687488328664789, "naucs_at_100_diff1": 0.5939309056955991, "naucs_at_1000_max": 1.0, "naucs_at_1000_std": 1.0, "naucs_at_1000_diff1": 1.0}, "tabfquad_subsampled": {"ndcg_at_1": 0.78929, "ndcg_at_3": 0.84141, "ndcg_at_5": 0.85463, "ndcg_at_10": 0.86561, "ndcg_at_20": 0.87555, "ndcg_at_100": 0.87843, "ndcg_at_1000": 0.87843, "map_at_1": 0.78929, "map_at_3": 0.82857, "map_at_5": 0.83589, "map_at_10": 0.84009, "map_at_20": 0.84283, "map_at_100": 0.8433, "map_at_1000": 0.8433, "recall_at_1": 0.78929, "recall_at_3": 0.87857, "recall_at_5": 0.91071, "recall_at_10": 0.94643, "recall_at_20": 0.98571, "recall_at_100": 1.0, "recall_at_1000": 1.0, "precision_at_1": 0.78929, "precision_at_3": 0.29286, "precision_at_5": 0.18214, "precision_at_10": 0.09464, "precision_at_20": 0.04929, "precision_at_100": 0.01, "precision_at_1000": 0.001, "mrr_at_1": 0.7857142857142857, "mrr_at_3": 0.8285714285714286, "mrr_at_5": 0.8351785714285715, "mrr_at_10": 0.8397760770975058, "mrr_at_20": 0.8421972319629608, "mrr_at_100": 0.8426666387082893, "mrr_at_1000": 0.8426666387082893, "naucs_at_1_max": 0.3559822873930443, "naucs_at_1_std": 0.14919530975649437, "naucs_at_1_diff1": 0.7722114206915455, "naucs_at_3_max": 0.40353910159104817, "naucs_at_3_std": 0.3278377466689149, "naucs_at_3_diff1": 0.753359194917635, "naucs_at_5_max": 0.4595704948646128, "naucs_at_5_std": 0.3973482726423888, "naucs_at_5_diff1": 0.7223529411764712, "naucs_at_10_max": 0.3345471521942117, "naucs_at_10_std": 0.534049175225648, "naucs_at_10_diff1": 0.7597572362278245, "naucs_at_20_max": 0.6095938375350239, "naucs_at_20_std": 0.5682773109243794, "naucs_at_20_diff1": 0.8190943043884236, "naucs_at_100_max": 1.0, "naucs_at_100_std": 1.0, "naucs_at_100_diff1": 1.0, "naucs_at_1000_max": NaN, "naucs_at_1000_std": NaN, "naucs_at_1000_diff1": NaN}, "tatdqa": {"ndcg_at_1": 0.5003, "ndcg_at_3": 0.5982, "ndcg_at_5": 0.6328, "ndcg_at_10": 0.66317, "ndcg_at_20": 0.67789, "ndcg_at_100": 0.69246, "ndcg_at_1000": 0.69503, "map_at_1": 0.5003, "map_at_3": 0.57406, "map_at_5": 0.59322, "map_at_10": 0.60577, "map_at_20": 0.60988, "map_at_100": 0.61195, "map_at_1000": 0.61207, "recall_at_1": 0.5003, "recall_at_3": 0.66807, "recall_at_5": 0.75225, "recall_at_10": 0.84606, "recall_at_20": 0.90379, "recall_at_100": 0.98136, "recall_at_1000": 1.0, "precision_at_1": 0.5003, "precision_at_3": 0.22269, "precision_at_5": 0.15045, "precision_at_10": 0.08461, "precision_at_20": 0.04519, "precision_at_100": 0.00981, "precision_at_1000": 0.001, "mrr_at_1": 0.49428743235117256, "mrr_at_3": 0.572359190218482, "mrr_at_5": 0.5900681499298476, "mrr_at_10": 0.6031252087926785, "mrr_at_20": 0.6073244235411775, "mrr_at_100": 0.6093170014664278, "mrr_at_1000": 0.6094377023930486, "naucs_at_1_max": 0.09792216312008274, "naucs_at_1_std": -0.11842420918666108, "naucs_at_1_diff1": 0.6834139343029724, "naucs_at_3_max": 0.08535691556922033, "naucs_at_3_std": -0.10421284851346817, "naucs_at_3_diff1": 0.5834079272394407, "naucs_at_5_max": 0.10226322306161462, "naucs_at_5_std": -0.06500024199152869, "naucs_at_5_diff1": 0.5485720805238943, "naucs_at_10_max": 0.0904215671816798, "naucs_at_10_std": 0.0785183407230477, "naucs_at_10_diff1": 0.5093558783340201, "naucs_at_20_max": 0.10634128534944767, "naucs_at_20_std": 0.17739180108969987, "naucs_at_20_diff1": 0.48084477027507894, "naucs_at_100_max": 0.054336329119673545, "naucs_at_100_std": 0.4229461634336187, "naucs_at_100_diff1": 0.5690631836637791, "naucs_at_1000_max": NaN, "naucs_at_1000_std": NaN, "naucs_at_1000_diff1": NaN}, "shift_project": {"ndcg_at_1": 0.6, "ndcg_at_3": 0.71595, "ndcg_at_5": 0.76544, "ndcg_at_10": 0.77838, "ndcg_at_20": 0.78108, "ndcg_at_100": 0.78658, "ndcg_at_1000": 0.78807, "map_at_1": 0.6, "map_at_3": 0.69, "map_at_5": 0.7175, "map_at_10": 0.72285, "map_at_20": 0.72368, "map_at_100": 0.72441, "map_at_1000": 0.72451, "recall_at_1": 0.6, "recall_at_3": 0.79, "recall_at_5": 0.91, "recall_at_10": 0.95, "recall_at_20": 0.96, "recall_at_100": 0.99, "recall_at_1000": 1.0, "precision_at_1": 0.6, "precision_at_3": 0.26333, "precision_at_5": 0.182, "precision_at_10": 0.095, "precision_at_20": 0.048, "precision_at_100": 0.0099, "precision_at_1000": 0.001, "mrr_at_1": 0.61, "mrr_at_3": 0.705, "mrr_at_5": 0.7264999999999999, "mrr_at_10": 0.7333730158730158, "mrr_at_20": 0.7333730158730158, "mrr_at_100": 0.7342514817574138, "mrr_at_1000": 0.7342514817574138, "naucs_at_1_max": -0.23055802908875025, "naucs_at_1_std": -0.3323538141881866, "naucs_at_1_diff1": 0.5277382012466599, "naucs_at_3_max": 0.034361699569576964, "naucs_at_3_std": -0.4302786928607498, "naucs_at_3_diff1": 0.5138384591338641, "naucs_at_5_max": -0.054310613134142136, "naucs_at_5_std": -0.7573918456271357, "naucs_at_5_diff1": 0.45834630148355543, "naucs_at_10_max": -0.21027077497664926, "naucs_at_10_std": -0.6262371615312674, "naucs_at_10_diff1": 0.6242763772175585, "naucs_at_20_max": -0.22000466853407427, "naucs_at_20_std": -0.49486461251166075, "naucs_at_20_diff1": 0.5303454715219444, "naucs_at_100_max": -1.7399626517273008, "naucs_at_100_std": -1.7399626517273008, "naucs_at_100_diff1": 0.7222222222222041, "naucs_at_1000_max": NaN, "naucs_at_1000_std": NaN, "naucs_at_1000_diff1": NaN}}
|
special_tokens_map.json
ADDED
@@ -0,0 +1,39 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"additional_special_tokens": [
|
3 |
+
{
|
4 |
+
"content": "<image>",
|
5 |
+
"lstrip": false,
|
6 |
+
"normalized": false,
|
7 |
+
"rstrip": false,
|
8 |
+
"single_word": false
|
9 |
+
}
|
10 |
+
],
|
11 |
+
"bos_token": {
|
12 |
+
"content": "<bos>",
|
13 |
+
"lstrip": false,
|
14 |
+
"normalized": false,
|
15 |
+
"rstrip": false,
|
16 |
+
"single_word": false
|
17 |
+
},
|
18 |
+
"eos_token": {
|
19 |
+
"content": "<eos>",
|
20 |
+
"lstrip": false,
|
21 |
+
"normalized": false,
|
22 |
+
"rstrip": false,
|
23 |
+
"single_word": false
|
24 |
+
},
|
25 |
+
"pad_token": {
|
26 |
+
"content": "<pad>",
|
27 |
+
"lstrip": false,
|
28 |
+
"normalized": false,
|
29 |
+
"rstrip": false,
|
30 |
+
"single_word": false
|
31 |
+
},
|
32 |
+
"unk_token": {
|
33 |
+
"content": "<unk>",
|
34 |
+
"lstrip": false,
|
35 |
+
"normalized": false,
|
36 |
+
"rstrip": false,
|
37 |
+
"single_word": false
|
38 |
+
}
|
39 |
+
}
|
tokenizer.json
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1df2ab04780faccf51a881d7c5a7026cc6f979083af2eebf709d051b8d47134b
|
3 |
+
size 17763458
|
tokenizer_config.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
training_config.yml
ADDED
@@ -0,0 +1,41 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
config:
|
2 |
+
(): colpali_engine.utils.train_colpali_engine_models.ColModelTrainingConfig
|
3 |
+
output_dir: !path ../../../models/train_colpali-docmatix-3b-mix-448
|
4 |
+
processor:
|
5 |
+
() : colpali_engine.utils.wrapper.AutoProcessorWrapper
|
6 |
+
pretrained_model_name_or_path: "./models/paligemma-3b-mix-448"
|
7 |
+
max_length: 50
|
8 |
+
model:
|
9 |
+
(): colpali_engine.utils.wrapper.AutoColModelWrapper
|
10 |
+
pretrained_model_name_or_path: "./models/paligemma-3b-mix-448"
|
11 |
+
training_objective: "colbertv1"
|
12 |
+
# attn_implementation: "eager"
|
13 |
+
torch_dtype: !ext torch.bfloat16
|
14 |
+
# device_map: "auto"
|
15 |
+
# quantization_config:
|
16 |
+
# (): transformers.BitsAndBytesConfig
|
17 |
+
# load_in_4bit: true
|
18 |
+
# bnb_4bit_quant_type: "nf4"
|
19 |
+
# bnb_4bit_compute_dtype: "bfloat16"
|
20 |
+
# bnb_4bit_use_double_quant: true
|
21 |
+
|
22 |
+
dataset_loading_func: !ext colpali_engine.utils.dataset_transformation.load_train_set_with_docmatix
|
23 |
+
eval_dataset_loader: !import ../data/test_data.yaml
|
24 |
+
|
25 |
+
max_length: 50
|
26 |
+
run_eval: true
|
27 |
+
add_suffix: true
|
28 |
+
loss_func:
|
29 |
+
(): colpali_engine.loss.colbert_loss.ColbertPairwiseCELoss
|
30 |
+
tr_args: !import ../tr_args/default_tr_args.yaml
|
31 |
+
peft_config:
|
32 |
+
(): peft.LoraConfig
|
33 |
+
r: 32
|
34 |
+
lora_alpha: 32
|
35 |
+
lora_dropout: 0.1
|
36 |
+
init_lora_weights: "gaussian"
|
37 |
+
bias: "none"
|
38 |
+
task_type: "FEATURE_EXTRACTION"
|
39 |
+
target_modules: '(.*(language_model).*(down_proj|gate_proj|up_proj|k_proj|q_proj|v_proj|o_proj).*$|.*(custom_text_proj).*$)'
|
40 |
+
# target_modules: '(.*(language_model).*(down_proj|gate_proj|up_proj|k_proj|q_proj|v_proj|o_proj).*$|.*(custom_text_proj).*$)'
|
41 |
+
|