{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "ceph can not be used\n", "ATTENTION_MODE: math\n" ] } ], "source": [ "import shutil\n", "import os\n", "import torch\n", "import pickle, easydict\n", "\n", "# if mp.get_start_method(allow_none=True) != 'spawn':\n", "# mp.set_start_method('spawn')\n", "from core.config import Config_Hulk as Config\n", "from core.solvers import solver_entry\n", "# %pip install pip easydict timm json_tricks xtcocotools pycocotools dict_recursive_update scikit-learn numpy" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "[2024-09-20 02:38:11,099][ solver.py][line: 88][ INFO] auto_denan disabled!\n", "[2024-09-20 02:38:11,103][ solver_deter.py][line: 58][ INFO] deterministic mode, seed: 233, worker_rank: True, cudnn_deterministic: False\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "backbone of task5 has been overided to {'type': 'vit_base_patch16_mask', 'kwargs': {'task_sp_list': ['rel_pos_h', 'rel_pos_w'], 'pretrained': True, 'lms_checkpoint_train': 'fairscale', 'window': False, 'test_pos_mode': False, 'learnable_pos': True, 'drop_path_rate': 0.2, 'img_size': 1344, 'num_encoded_tokens': 192, 'vis_patch_token_ratio': 1, 'vis_label_token_ratio': 0.0}}\n", "decoder of task5 has been overided to {'type': 'UniHCPv2_Head', 'kwargs': {'predictor': 'hulk', 'task': 'recons', 'modality_share_list': ['predictor.mask_token'], 'task_sp_list': ['predictor.query_embed_patch', 'predictor.query_embed_label', 'predictor.class_embed', 'predictor.fc_bias'], 'loss_weight': 1.0, 'transformer_predictor_cfg': {'hidden_dim': 256, 'num_queries': 20, 'nheads': 8, 'dim_feedforward': 2048, 'dec_layers': 9, 'pre_norm': False, 'arch': 'fan_in', 'enforce_input_project': False, 'mask_on': False, 'num_feature_levels': 1, 'cross_pos_embed': 'anchor', 'self_attn_mask_type': 'patch_diag_label_row', 'cls_out_dim': 1, 'detach_from_peddet': True}, 'loss_cfg': {'type': 'CEL_Sigmoid'}}}\n", "dataset of task5 has been overided to {'type': 'MultiAttrDataset', 'kwargs': {'text_label_return': True, 'task_spec': {'dataset': ['rap2', 'PA_100k', 'parse27k', 'market', 'HARDHC'], 'data_path': ['/mnt/path...to.../pedattr_public/rap2/dataset.pkl', '/mnt/path...to.../pedattr_public/PA-100k/dataset.pkl', '/mnt/path...to.../pedattr_public/Parse27k/parse27k/parse27k/dataset.pkl', '/mnt/path...to.../pedattr_public/market/dataset.pkl', '/mnt/path...to.../pedattr_public/HARDHC/dataset.pkl'], 'root_path': ['/mnt/path...to.../pedattr_public/rap2/RAP_dataset/', '/mnt/path...to.../pedattr_public/PA-100k/data/', '/mnt/path...to.../pedattr_public/Parse27k/parse27k/parse27k/images', '/mnt/path...to.../pedattr_public/market/bounding_box_train', '/mnt/path...to.../pedattr_public/HARDHC/croped_image/']}, 'augmentation': {'height': 256, 'width': 192}}}\n", "sampler of task5 has been overided to {'batch_size': 147, 'shuffle_strategy': 1}\n", "patch_neck of task5 has been overided to {'type': 'MAEdecoder_proj_neck', 'kwargs': {'mask_dim': 256, 'modality': 'rgb'}}\n", "patch_adapter of task5 has been overided to {'type': 'rgb_adapter', 'kwargs': {'pretrained': True, 'stride_level': 1, 'in_chans': 3, 'learnable_pos': False, 'test_pos_mode': False, 'img_size': [256, 192], 'task_sp_list': ['pos_embed']}}\n", "patch_proj of task5 has been overided to {'type': 'rgb_projector', 'kwargs': {'loss_cfg': {'type': 'MaskedMSELoss', 'kwargs': {'stride': 1, 'norm_pix_loss': True, 'pix_loss': True, 'pix_loss_weight': 1.0, 'norm_pix_loss_weight': 1.0}}}}\n", "label_neck of task5 has been overided to {'type': 'MAEdecoder_proj_neck', 'kwargs': {'mask_dim': 256, 'modality': 'text'}}\n", "label_adapter of task5 has been overided to {'type': 'text_adapter', 'kwargs': {'pretrained': True, 'task_sp_list': ['text_vectors'], 'one_way_semantics': True, 'description_dict_name': 'multi_rap2_PA_100k_parse27k_market_HARDHC_attr_name'}}\n", "label_proj of task5 has been overided to {'type': 'text_projector', 'kwargs': {'task_sp_list': ['text_vectors', 'translate_weight', 'translate_bias', 'post_mul_norm'], 'one_way_semantics': True, 'post_mul_norm': True, 'replace_post_mul_norm': False, 'translate_weight_scale': 5, 'description_dict_name': 'multi_rap2_PA_100k_parse27k_market_HARDHC_attr_name', 'pre_proj_type': '', 'loss_cfg': {'type': 'MaskedOneSideBCELoss', 'kwargs': {'use_focal_weight': True, 'loss_weight': 1.0, 'dataset_weight': [0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.25, 0.25, 0.25, 0.25, 0.25, 0.25, 0.25, 0.25, 0.25, 0.25, 0.25, 0.25, 0.25, 0.25, 0.25, 0.25, 0.25, 0.25, 0.25, 0.25, 0.25, 0.25, 0.25, 0.25, 0.25, 0.25, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0], 'sample_weight': [0.00172477, 0.05791431, 0.2792891, 0.00459644, 0.01987675, 0.06484867, 0.02327336, 0.01420398, 0.06937013, 0.03476447, 0.08533858, 0.0091179, 0.0125145, 0.02894172, 0.00816949, 0.17255632, 0.00890175, 0.00613153, 0.00838123, 0.07975844, 0.03529381, 0.07885856, 0.06067129, 0.02532455, 0.00429207, 0.06790121, 0.02532014, 0.00639179, 0.02070164, 0.00790041, 0.01142935, 0.00823125, 0.00310547, 0.00732696, 0.08890281, 0.00265994, 0.12081324, 0.16404275, 0.010578, 0.09486231, 0.040896, 0.23313939, 0.02223673, 0.28135352, 0.01603462, 0.01012806, 0.00799305, 0.01450835, 0.00697848, 0.00314958, 0.00536399, 0.00762692, 0.03982408, 0.00306577, 0.01728739, 0.0714522, 0.23161312, 0.16539257, 0.01964296, 0.0599655, 0.04277957, 0.01663895, 0.00187475, 0.00670499, 0.0128674, 0.28255336, 0.06885843, 0.0455939, 0.00238203, 0.07344605, 0.07651623, 0.06356061, 0.00378038, 0.00534193, 0.36698324, 0.02468052, 0.18279907, 0.14001068, 0.1169667, 0.14002832, 0.00080283, 0.04727897, 0.05596016, 0.00868119, 0.00850474, 0.00013234, 0.02891966, 0.0113279, 0.00466261, 0.00932522, 0.04154444, 0.00932522, 0.00466261, 0.0113279, 0.0128277, 0.05136371, 0.05703648, 0.00839005, 0.00951049, 0.10332735, 0.04794505, 0.01736679, 0.05591605, 0.04794505, 0.01736679, 0.05591605, 0.04949779, 0.01482155, 0.05690856, 0.04949779, 0.01482155, 0.05690856, 0.00515225, 0.00014998, 0.11592566, 0.02974014, 0.00336131, 0.08812644, 0.00546986, 0.00292902, 0.11282902, 0.03215746, 0.00087341, 0.08819702, 0.01577436, 0.01377169, 0.00681968, 0.02183531, 0.00826654, 0.00613153, 0.0091179, 0.00096605, 0.00241732, 0.00012792, 0.00481259, 0.00091752, 0.00754752, 0.00346277, 0.00502433, 0.00635209, 0.00219676, 0.00692113, 0.01726093, 0.00282756, 0.04876553, 0.03532027, 0.05422657, 0.01836813, 0.00129247, 0.0237233, 0.00093958, 0.04455727, 0.01074562, 0.00082048, 0.07086552, 0.02805507, 0.0062771, 0.02825357, 0.0273978, 0.05809076, 0.00874295, 0.01927683, 0.01020305, 0.04525424, 0.01257185, 0.00412004, 0.03352934, 0.00677998]}}}}\n", "override tensor.cuda() to preserve task_specific flag\n", "override tensor.half() to preserve task_specific flag\n" ] } ], "source": [ "# set config path\n", "# config_path = '/dscilab_dungvo/workspace/BA-PRE_THESIS/my_source/OpenGVBackbone/PATH/experiments/L2_full_setting_joint_v100_32g/v100_32g_vitbase_size224.yaml'\n", "config_path = \"/dscilab_dungvo/workspace/BA-PRE_THESIS/my_source/OpenGVBackbone/Hulk/experiments/release/Hulk_vit-B.yaml\"\n", "C = Config(config_path)\n", "\n", "# Disable parameter for folder path\n", "\n", "# if not os.path.exists(\"./temp\"):\n", "# os.makedirs(\"./temp\")\n", "# pickle.dump(\n", "# easydict.EasyDict({\"image_name\": \"temp\", \"label\": \"temp\", \"partition\": {\"test\": \"temp\"}}),\n", "# open(\"./temp/temp.pkl\", \"wb\"),\n", "# )\n", "# C.config[\"common\"][\"dataset\"][\"kwargs\"][\"task_spec\"][\"data_path\"] = \"./temp/temp.pkl\"\n", "# C.config[\"common\"][\"dataset\"][\"kwargs\"][\"task_spec\"][\"root_path\"] = \"./\"\n", "C.config[\"expname\"] = \"TEMP_EXPERIMENT\"\n", "\n", "# Disable parameter for slurm\n", "C.ginfo.neck_share_group = None\n", "C.ginfo.group = None\n", "C.ginfo.decoder_share_group = None\n", "\n", "# # Set pretrained model path\n", "# C.config[\"common\"][\"backbone\"][\"kwargs\"][\n", "# \"pretrain_path\"\n", "# ] = '/dscilab_dungvo/workspace/BA-PRE_THESIS/my_source/OpenGVBackbone/Hulk/hulk_checkpoint/Pretrain/ckpt_task5_iter_newest.pth.tar'\n", "C.config[\"tasks\"][5]['backbone']['kwargs']['pretrained'] = False\n", "C.config[\"tasks\"][5]['patch_adapter']['kwargs']['pretrained'] = False\n", "C.config[\"tasks\"][5]['label_adapter']['kwargs']['pretrained'] = False\n", "\n", "S = solver_entry(C)\n", "# S.config.dataset.train=False\n", "# pseudo_dataset = S.create_dataset()\n", "# transform = pseudo_dataset.transform\n" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "[Rank 0] fairscale checkpoint success\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "/dscilab_dungvo/workspace/BA-PRE_THESIS/my_source/OpenGVBackbone/Hulk/core/models/input_adapter/text_adapter.py:70: FutureWarning: You are using `torch.load` with `weights_only=False` (the current default value), which uses the default pickle module implicitly. It is possible to construct malicious pickle data which will execute arbitrary code during unpickling (See https://github.com/pytorch/pytorch/blob/main/SECURITY.md#untrusted-models for more details). In a future release, the default value for `weights_only` will be flipped to `True`. This limits the functions that could be executed during unpickling. Arbitrary objects will no longer be allowed to be loaded via this mode unless they are explicitly allowlisted by the user via `torch.serialization.add_safe_globals`. We recommend you start setting `weights_only=True` for any use case where you don't have full control of the loaded file. Please open an issue on GitHub for any issues related to this experimental feature.\n", " text_vectors = torch.load(f'./{description_dict_name}.pth')\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "Number of conv/bn params: 0.00M\n", "Number of linear params: 85.02M\n", "Position interpolate from (14, 14) to [16, 12]\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "/dscilab_dungvo/workspace/BA-PRE_THESIS/my_source/OpenGVBackbone/Hulk/core/models/output_projector/text_projector.py:79: FutureWarning: You are using `torch.load` with `weights_only=False` (the current default value), which uses the default pickle module implicitly. It is possible to construct malicious pickle data which will execute arbitrary code during unpickling (See https://github.com/pytorch/pytorch/blob/main/SECURITY.md#untrusted-models for more details). In a future release, the default value for `weights_only` will be flipped to `True`. This limits the functions that could be executed during unpickling. Arbitrary objects will no longer be allowed to be loaded via this mode unless they are explicitly allowlisted by the user via `torch.serialization.add_safe_globals`. We recommend you start setting `weights_only=True` for any use case where you don't have full control of the loaded file. Please open an issue on GitHub for any issues related to this experimental feature.\n", " text_vectors = torch.load(f'./{description_dict_name}.pth')\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "add param pos_embed as task_specific\n", "add param proj.weight as rgb_specific\n", "add param proj.bias as rgb_specific\n", "add param type_embed.weight as rgb_specific\n", "add param text_vectors as task_specific\n", "add param output_proj.weight as rgb_specific\n", "add param output_proj.bias as rgb_specific\n", "add param text_vectors as task_specific\n", "add param patch_proj.weight as text_specific\n", "add param class_proj.weight as text_specific\n", "add param post_mul_norm.weight as task_specific\n", "add param post_mul_norm.bias as task_specific\n", "add param global_tokens as backbone_specific\n", "add param blocks.0.norm1.weight as backbone_specific\n", "add param blocks.0.norm1.bias as backbone_specific\n", "add param blocks.0.attn.qkv.weight as backbone_specific\n", "add param blocks.0.attn.qkv.bias as backbone_specific\n", "add param blocks.0.attn.proj.weight as backbone_specific\n", "add param blocks.0.attn.proj.bias as backbone_specific\n", "add param blocks.0.norm2.weight as backbone_specific\n", "add param blocks.0.norm2.bias as backbone_specific\n", "add param blocks.0.mlp.fc1.weight as backbone_specific\n", "add param blocks.0.mlp.fc1.bias as backbone_specific\n", "add param blocks.0.mlp.fc2.weight as backbone_specific\n", "add param blocks.0.mlp.fc2.bias as backbone_specific\n", "add param blocks.1.norm1.weight as backbone_specific\n", "add param blocks.1.norm1.bias as backbone_specific\n", "add param blocks.1.attn.qkv.weight as backbone_specific\n", "add param blocks.1.attn.qkv.bias as backbone_specific\n", "add param blocks.1.attn.proj.weight as backbone_specific\n", "add param blocks.1.attn.proj.bias as backbone_specific\n", "add param blocks.1.norm2.weight as backbone_specific\n", "add param blocks.1.norm2.bias as backbone_specific\n", "add param blocks.1.mlp.fc1.weight as backbone_specific\n", "add param blocks.1.mlp.fc1.bias as backbone_specific\n", "add param blocks.1.mlp.fc2.weight as backbone_specific\n", "add param blocks.1.mlp.fc2.bias as backbone_specific\n", "add param blocks.2.norm1.weight as backbone_specific\n", "add param blocks.2.norm1.bias as backbone_specific\n", "add param blocks.2.attn.qkv.weight as backbone_specific\n", "add param blocks.2.attn.qkv.bias as backbone_specific\n", "add param blocks.2.attn.proj.weight as backbone_specific\n", "add param blocks.2.attn.proj.bias as backbone_specific\n", "add param blocks.2.norm2.weight as backbone_specific\n", "add param blocks.2.norm2.bias as backbone_specific\n", "add param blocks.2.mlp.fc1.weight as backbone_specific\n", "add param blocks.2.mlp.fc1.bias as backbone_specific\n", "add param blocks.2.mlp.fc2.weight as backbone_specific\n", "add param blocks.2.mlp.fc2.bias as backbone_specific\n", "add param blocks.3.norm1.weight as backbone_specific\n", "add param blocks.3.norm1.bias as backbone_specific\n", "add param blocks.3.attn.qkv.weight as backbone_specific\n", "add param blocks.3.attn.qkv.bias as backbone_specific\n", "add param blocks.3.attn.proj.weight as backbone_specific\n", "add param blocks.3.attn.proj.bias as backbone_specific\n", "add param blocks.3.norm2.weight as backbone_specific\n", "add param blocks.3.norm2.bias as backbone_specific\n", "add param blocks.3.mlp.fc1.weight as backbone_specific\n", "add param blocks.3.mlp.fc1.bias as backbone_specific\n", "add param blocks.3.mlp.fc2.weight as backbone_specific\n", "add param blocks.3.mlp.fc2.bias as backbone_specific\n", "add param blocks.4.norm1.weight as backbone_specific\n", "add param blocks.4.norm1.bias as backbone_specific\n", "add param blocks.4.attn.qkv.weight as backbone_specific\n", "add param blocks.4.attn.qkv.bias as backbone_specific\n", "add param blocks.4.attn.proj.weight as backbone_specific\n", "add param blocks.4.attn.proj.bias as backbone_specific\n", "add param blocks.4.norm2.weight as backbone_specific\n", "add param blocks.4.norm2.bias as backbone_specific\n", "add param blocks.4.mlp.fc1.weight as backbone_specific\n", "add param blocks.4.mlp.fc1.bias as backbone_specific\n", "add param blocks.4.mlp.fc2.weight as backbone_specific\n", "add param blocks.4.mlp.fc2.bias as backbone_specific\n", "add param blocks.5.norm1.weight as backbone_specific\n", "add param blocks.5.norm1.bias as backbone_specific\n", "add param blocks.5.attn.qkv.weight as backbone_specific\n", "add param blocks.5.attn.qkv.bias as backbone_specific\n", "add param blocks.5.attn.proj.weight as backbone_specific\n", "add param blocks.5.attn.proj.bias as backbone_specific\n", "add param blocks.5.norm2.weight as backbone_specific\n", "add param blocks.5.norm2.bias as backbone_specific\n", "add param blocks.5.mlp.fc1.weight as backbone_specific\n", "add param blocks.5.mlp.fc1.bias as backbone_specific\n", "add param blocks.5.mlp.fc2.weight as backbone_specific\n", "add param blocks.5.mlp.fc2.bias as backbone_specific\n", "add param blocks.6.norm1.weight as backbone_specific\n", "add param blocks.6.norm1.bias as backbone_specific\n", "add param blocks.6.attn.qkv.weight as backbone_specific\n", "add param blocks.6.attn.qkv.bias as backbone_specific\n", "add param blocks.6.attn.proj.weight as backbone_specific\n", "add param blocks.6.attn.proj.bias as backbone_specific\n", "add param blocks.6.norm2.weight as backbone_specific\n", "add param blocks.6.norm2.bias as backbone_specific\n", "add param blocks.6.mlp.fc1.weight as backbone_specific\n", "add param blocks.6.mlp.fc1.bias as backbone_specific\n", "add param blocks.6.mlp.fc2.weight as backbone_specific\n", "add param blocks.6.mlp.fc2.bias as backbone_specific\n", "add param blocks.7.norm1.weight as backbone_specific\n", "add param blocks.7.norm1.bias as backbone_specific\n", "add param blocks.7.attn.qkv.weight as backbone_specific\n", "add param blocks.7.attn.qkv.bias as backbone_specific\n", "add param blocks.7.attn.proj.weight as backbone_specific\n", "add param blocks.7.attn.proj.bias as backbone_specific\n", "add param blocks.7.norm2.weight as backbone_specific\n", "add param blocks.7.norm2.bias as backbone_specific\n", "add param blocks.7.mlp.fc1.weight as backbone_specific\n", "add param blocks.7.mlp.fc1.bias as backbone_specific\n", "add param blocks.7.mlp.fc2.weight as backbone_specific\n", "add param blocks.7.mlp.fc2.bias as backbone_specific\n", "add param blocks.8.norm1.weight as backbone_specific\n", "add param blocks.8.norm1.bias as backbone_specific\n", "add param blocks.8.attn.qkv.weight as backbone_specific\n", "add param blocks.8.attn.qkv.bias as backbone_specific\n", "add param blocks.8.attn.proj.weight as backbone_specific\n", "add param blocks.8.attn.proj.bias as backbone_specific\n", "add param blocks.8.norm2.weight as backbone_specific\n", "add param blocks.8.norm2.bias as backbone_specific\n", "add param blocks.8.mlp.fc1.weight as backbone_specific\n", "add param blocks.8.mlp.fc1.bias as backbone_specific\n", "add param blocks.8.mlp.fc2.weight as backbone_specific\n", "add param blocks.8.mlp.fc2.bias as backbone_specific\n", "add param blocks.9.norm1.weight as backbone_specific\n", "add param blocks.9.norm1.bias as backbone_specific\n", "add param blocks.9.attn.qkv.weight as backbone_specific\n", "add param blocks.9.attn.qkv.bias as backbone_specific\n", "add param blocks.9.attn.proj.weight as backbone_specific\n", "add param blocks.9.attn.proj.bias as backbone_specific\n", "add param blocks.9.norm2.weight as backbone_specific\n", "add param blocks.9.norm2.bias as backbone_specific\n", "add param blocks.9.mlp.fc1.weight as backbone_specific\n", "add param blocks.9.mlp.fc1.bias as backbone_specific\n", "add param blocks.9.mlp.fc2.weight as backbone_specific\n", "add param blocks.9.mlp.fc2.bias as backbone_specific\n", "add param blocks.10.norm1.weight as backbone_specific\n", "add param blocks.10.norm1.bias as backbone_specific\n", "add param blocks.10.attn.qkv.weight as backbone_specific\n", "add param blocks.10.attn.qkv.bias as backbone_specific\n", "add param blocks.10.attn.proj.weight as backbone_specific\n", "add param blocks.10.attn.proj.bias as backbone_specific\n", "add param blocks.10.norm2.weight as backbone_specific\n", "add param blocks.10.norm2.bias as backbone_specific\n", "add param blocks.10.mlp.fc1.weight as backbone_specific\n", "add param blocks.10.mlp.fc1.bias as backbone_specific\n", "add param blocks.10.mlp.fc2.weight as backbone_specific\n", "add param blocks.10.mlp.fc2.bias as backbone_specific\n", "add param blocks.11.norm1.weight as backbone_specific\n", "add param blocks.11.norm1.bias as backbone_specific\n", "add param blocks.11.attn.qkv.weight as backbone_specific\n", "add param blocks.11.attn.qkv.bias as backbone_specific\n", "add param blocks.11.attn.proj.weight as backbone_specific\n", "add param blocks.11.attn.proj.bias as backbone_specific\n", "add param blocks.11.norm2.weight as backbone_specific\n", "add param blocks.11.norm2.bias as backbone_specific\n", "add param blocks.11.mlp.fc1.weight as backbone_specific\n", "add param blocks.11.mlp.fc1.bias as backbone_specific\n", "add param blocks.11.mlp.fc2.weight as backbone_specific\n", "add param blocks.11.mlp.fc2.bias as backbone_specific\n", "add param norm.weight as backbone_specific\n", "add param norm.bias as backbone_specific\n", "add param predictor.mask_token as modality_share\n", "add param predictor.query_embed_patch as task_specific\n", "add param predictor.query_embed_label as task_specific\n", "add param predictor.transformer_self_attention_layers.0.self_attn.in_proj_weight as decoder_specific\n", "add param predictor.transformer_self_attention_layers.0.self_attn.in_proj_bias as decoder_specific\n", "add param predictor.transformer_self_attention_layers.0.self_attn.out_proj.weight as decoder_specific\n", "add param predictor.transformer_self_attention_layers.0.self_attn.out_proj.bias as decoder_specific\n", "add param predictor.transformer_self_attention_layers.0.norm.weight as decoder_specific\n", "add param predictor.transformer_self_attention_layers.0.norm.bias as decoder_specific\n", "add param predictor.transformer_self_attention_layers.1.self_attn.in_proj_weight as decoder_specific\n", "add param predictor.transformer_self_attention_layers.1.self_attn.in_proj_bias as decoder_specific\n", "add param predictor.transformer_self_attention_layers.1.self_attn.out_proj.weight as decoder_specific\n", "add param predictor.transformer_self_attention_layers.1.self_attn.out_proj.bias as decoder_specific\n", "add param predictor.transformer_self_attention_layers.1.norm.weight as decoder_specific\n", "add param predictor.transformer_self_attention_layers.1.norm.bias as decoder_specific\n", "add param predictor.transformer_self_attention_layers.2.self_attn.in_proj_weight as decoder_specific\n", "add param predictor.transformer_self_attention_layers.2.self_attn.in_proj_bias as decoder_specific\n", "add param predictor.transformer_self_attention_layers.2.self_attn.out_proj.weight as decoder_specific\n", "add param predictor.transformer_self_attention_layers.2.self_attn.out_proj.bias as decoder_specific\n", "add param predictor.transformer_self_attention_layers.2.norm.weight as decoder_specific\n", "add param predictor.transformer_self_attention_layers.2.norm.bias as decoder_specific\n", "add param predictor.transformer_self_attention_layers.3.self_attn.in_proj_weight as decoder_specific\n", "add param predictor.transformer_self_attention_layers.3.self_attn.in_proj_bias as decoder_specific\n", "add param predictor.transformer_self_attention_layers.3.self_attn.out_proj.weight as decoder_specific\n", "add param predictor.transformer_self_attention_layers.3.self_attn.out_proj.bias as decoder_specific\n", "add param predictor.transformer_self_attention_layers.3.norm.weight as decoder_specific\n", "add param predictor.transformer_self_attention_layers.3.norm.bias as decoder_specific\n", "add param predictor.transformer_self_attention_layers.4.self_attn.in_proj_weight as decoder_specific\n", "add param predictor.transformer_self_attention_layers.4.self_attn.in_proj_bias as decoder_specific\n", "add param predictor.transformer_self_attention_layers.4.self_attn.out_proj.weight as decoder_specific\n", "add param predictor.transformer_self_attention_layers.4.self_attn.out_proj.bias as decoder_specific\n", "add param predictor.transformer_self_attention_layers.4.norm.weight as decoder_specific\n", "add param predictor.transformer_self_attention_layers.4.norm.bias as decoder_specific\n", "add param predictor.transformer_self_attention_layers.5.self_attn.in_proj_weight as decoder_specific\n", "add param predictor.transformer_self_attention_layers.5.self_attn.in_proj_bias as decoder_specific\n", "add param predictor.transformer_self_attention_layers.5.self_attn.out_proj.weight as decoder_specific\n", "add param predictor.transformer_self_attention_layers.5.self_attn.out_proj.bias as decoder_specific\n", "add param predictor.transformer_self_attention_layers.5.norm.weight as decoder_specific\n", "add param predictor.transformer_self_attention_layers.5.norm.bias as decoder_specific\n", "add param predictor.transformer_self_attention_layers.6.self_attn.in_proj_weight as decoder_specific\n", "add param predictor.transformer_self_attention_layers.6.self_attn.in_proj_bias as decoder_specific\n", "add param predictor.transformer_self_attention_layers.6.self_attn.out_proj.weight as decoder_specific\n", "add param predictor.transformer_self_attention_layers.6.self_attn.out_proj.bias as decoder_specific\n", "add param predictor.transformer_self_attention_layers.6.norm.weight as decoder_specific\n", "add param predictor.transformer_self_attention_layers.6.norm.bias as decoder_specific\n", "add param predictor.transformer_self_attention_layers.7.self_attn.in_proj_weight as decoder_specific\n", "add param predictor.transformer_self_attention_layers.7.self_attn.in_proj_bias as decoder_specific\n", "add param predictor.transformer_self_attention_layers.7.self_attn.out_proj.weight as decoder_specific\n", "add param predictor.transformer_self_attention_layers.7.self_attn.out_proj.bias as decoder_specific\n", "add param predictor.transformer_self_attention_layers.7.norm.weight as decoder_specific\n", "add param predictor.transformer_self_attention_layers.7.norm.bias as decoder_specific\n", "add param predictor.transformer_self_attention_layers.8.self_attn.in_proj_weight as decoder_specific\n", "add param predictor.transformer_self_attention_layers.8.self_attn.in_proj_bias as decoder_specific\n", "add param predictor.transformer_self_attention_layers.8.self_attn.out_proj.weight as decoder_specific\n", "add param predictor.transformer_self_attention_layers.8.self_attn.out_proj.bias as decoder_specific\n", "add param predictor.transformer_self_attention_layers.8.norm.weight as decoder_specific\n", "add param predictor.transformer_self_attention_layers.8.norm.bias as decoder_specific\n", "add param predictor.transformer_ffn_layers.0.linear1.weight as decoder_specific\n", "add param predictor.transformer_ffn_layers.0.linear1.bias as decoder_specific\n", "add param predictor.transformer_ffn_layers.0.linear2.weight as decoder_specific\n", "add param predictor.transformer_ffn_layers.0.linear2.bias as decoder_specific\n", "add param predictor.transformer_ffn_layers.0.norm.weight as decoder_specific\n", "add param predictor.transformer_ffn_layers.0.norm.bias as decoder_specific\n", "add param predictor.transformer_ffn_layers.1.linear1.weight as decoder_specific\n", "add param predictor.transformer_ffn_layers.1.linear1.bias as decoder_specific\n", "add param predictor.transformer_ffn_layers.1.linear2.weight as decoder_specific\n", "add param predictor.transformer_ffn_layers.1.linear2.bias as decoder_specific\n", "add param predictor.transformer_ffn_layers.1.norm.weight as decoder_specific\n", "add param predictor.transformer_ffn_layers.1.norm.bias as decoder_specific\n", "add param predictor.transformer_ffn_layers.2.linear1.weight as decoder_specific\n", "add param predictor.transformer_ffn_layers.2.linear1.bias as decoder_specific\n", "add param predictor.transformer_ffn_layers.2.linear2.weight as decoder_specific\n", "add param predictor.transformer_ffn_layers.2.linear2.bias as decoder_specific\n", "add param predictor.transformer_ffn_layers.2.norm.weight as decoder_specific\n", "add param predictor.transformer_ffn_layers.2.norm.bias as decoder_specific\n", "add param predictor.transformer_ffn_layers.3.linear1.weight as decoder_specific\n", "add param predictor.transformer_ffn_layers.3.linear1.bias as decoder_specific\n", "add param predictor.transformer_ffn_layers.3.linear2.weight as decoder_specific\n", "add param predictor.transformer_ffn_layers.3.linear2.bias as decoder_specific\n", "add param predictor.transformer_ffn_layers.3.norm.weight as decoder_specific\n", "add param predictor.transformer_ffn_layers.3.norm.bias as decoder_specific\n", "add param predictor.transformer_ffn_layers.4.linear1.weight as decoder_specific\n", "add param predictor.transformer_ffn_layers.4.linear1.bias as decoder_specific\n", "add param predictor.transformer_ffn_layers.4.linear2.weight as decoder_specific\n", "add param predictor.transformer_ffn_layers.4.linear2.bias as decoder_specific\n", "add param predictor.transformer_ffn_layers.4.norm.weight as decoder_specific\n", "add param predictor.transformer_ffn_layers.4.norm.bias as decoder_specific\n", "add param predictor.transformer_ffn_layers.5.linear1.weight as decoder_specific\n", "add param predictor.transformer_ffn_layers.5.linear1.bias as decoder_specific\n", "add param predictor.transformer_ffn_layers.5.linear2.weight as decoder_specific\n", "add param predictor.transformer_ffn_layers.5.linear2.bias as decoder_specific\n", "add param predictor.transformer_ffn_layers.5.norm.weight as decoder_specific\n", "add param predictor.transformer_ffn_layers.5.norm.bias as decoder_specific\n", "add param predictor.transformer_ffn_layers.6.linear1.weight as decoder_specific\n", "add param predictor.transformer_ffn_layers.6.linear1.bias as decoder_specific\n", "add param predictor.transformer_ffn_layers.6.linear2.weight as decoder_specific\n", "add param predictor.transformer_ffn_layers.6.linear2.bias as decoder_specific\n", "add param predictor.transformer_ffn_layers.6.norm.weight as decoder_specific\n", "add param predictor.transformer_ffn_layers.6.norm.bias as decoder_specific\n", "add param predictor.transformer_ffn_layers.7.linear1.weight as decoder_specific\n", "add param predictor.transformer_ffn_layers.7.linear1.bias as decoder_specific\n", "add param predictor.transformer_ffn_layers.7.linear2.weight as decoder_specific\n", "add param predictor.transformer_ffn_layers.7.linear2.bias as decoder_specific\n", "add param predictor.transformer_ffn_layers.7.norm.weight as decoder_specific\n", "add param predictor.transformer_ffn_layers.7.norm.bias as decoder_specific\n", "add param predictor.transformer_ffn_layers.8.linear1.weight as decoder_specific\n", "add param predictor.transformer_ffn_layers.8.linear1.bias as decoder_specific\n", "add param predictor.transformer_ffn_layers.8.linear2.weight as decoder_specific\n", "add param predictor.transformer_ffn_layers.8.linear2.bias as decoder_specific\n", "add param predictor.transformer_ffn_layers.8.norm.weight as decoder_specific\n", "add param predictor.transformer_ffn_layers.8.norm.bias as decoder_specific\n", "add param predictor.decoder_norm.weight as decoder_specific\n", "add param predictor.decoder_norm.bias as decoder_specific\n", "add param predictor.level_embed.weight as decoder_specific\n", "add param predictor.class_embed.weight as task_specific\n", "add param predictor.class_embed.bias as task_specific\n", "add param predictor.mask_embed.layers.0.weight as decoder_specific\n", "add param predictor.mask_embed.layers.0.bias as decoder_specific\n", "add param predictor.mask_embed.layers.1.weight as decoder_specific\n", "add param predictor.mask_embed.layers.1.bias as decoder_specific\n", "add param predictor.mask_embed.layers.2.weight as decoder_specific\n", "add param predictor.mask_embed.layers.2.bias as decoder_specific\n", "add param predictor.adapt_pos2d.0.weight as decoder_specific\n", "add param predictor.adapt_pos2d.0.bias as decoder_specific\n", "add param predictor.adapt_pos2d.2.weight as decoder_specific\n", "add param predictor.adapt_pos2d.2.bias as decoder_specific\n", "add param predictor.adapt_pos1d.0.weight as decoder_specific\n", "add param predictor.adapt_pos1d.0.bias as decoder_specific\n", "add param predictor.adapt_pos1d.2.weight as decoder_specific\n", "add param predictor.adapt_pos1d.2.bias as decoder_specific\n", "add param mask_map.0.weight as decoder_specific\n", "add param mask_map.0.bias as decoder_specific\n", "add param mask_map.0.weight as decoder_specific\n", "add param mask_map.0.bias as decoder_specific\n", "aio_entry_v2mae_shareneck(\n", " (backbone_module): maskViT(\n", " (blocks): ModuleList(\n", " (0): Block(\n", " (norm1): LayerNorm((768,), eps=1e-06, elementwise_affine=True)\n", " (attn): Attention(\n", " (qkv): Linear(in_features=768, out_features=2304, bias=True)\n", " (proj): Linear(in_features=768, out_features=768, bias=True)\n", " )\n", " (drop_path): Identity()\n", " (norm2): LayerNorm((768,), eps=1e-06, elementwise_affine=True)\n", " (mlp): Mlp(\n", " (fc1): Linear(in_features=768, out_features=3072, bias=True)\n", " (act): GELU(approximate='none')\n", " (fc2): Linear(in_features=3072, out_features=768, bias=True)\n", " (drop): Dropout(p=0.0, inplace=False)\n", " )\n", " )\n", " (1): Block(\n", " (norm1): LayerNorm((768,), eps=1e-06, elementwise_affine=True)\n", " (attn): Attention(\n", " (qkv): Linear(in_features=768, out_features=2304, bias=True)\n", " (proj): Linear(in_features=768, out_features=768, bias=True)\n", " )\n", " (drop_path): DropPath(p=0.0181818176060915)\n", " (norm2): LayerNorm((768,), eps=1e-06, elementwise_affine=True)\n", " (mlp): Mlp(\n", " (fc1): Linear(in_features=768, out_features=3072, bias=True)\n", " (act): GELU(approximate='none')\n", " (fc2): Linear(in_features=3072, out_features=768, bias=True)\n", " (drop): Dropout(p=0.0, inplace=False)\n", " )\n", " )\n", " (2): Block(\n", " (norm1): LayerNorm((768,), eps=1e-06, elementwise_affine=True)\n", " (attn): Attention(\n", " (qkv): Linear(in_features=768, out_features=2304, bias=True)\n", " (proj): Linear(in_features=768, out_features=768, bias=True)\n", " )\n", " (drop_path): DropPath(p=0.036363635212183)\n", " (norm2): LayerNorm((768,), eps=1e-06, elementwise_affine=True)\n", " (mlp): Mlp(\n", " (fc1): Linear(in_features=768, out_features=3072, bias=True)\n", " (act): GELU(approximate='none')\n", " (fc2): Linear(in_features=3072, out_features=768, bias=True)\n", " (drop): Dropout(p=0.0, inplace=False)\n", " )\n", " )\n", " (3): Block(\n", " (norm1): LayerNorm((768,), eps=1e-06, elementwise_affine=True)\n", " (attn): Attention(\n", " (qkv): Linear(in_features=768, out_features=2304, bias=True)\n", " (proj): Linear(in_features=768, out_features=768, bias=True)\n", " )\n", " (drop_path): DropPath(p=0.05454545468091965)\n", " (norm2): LayerNorm((768,), eps=1e-06, elementwise_affine=True)\n", " (mlp): Mlp(\n", " (fc1): Linear(in_features=768, out_features=3072, bias=True)\n", " (act): GELU(approximate='none')\n", " (fc2): Linear(in_features=3072, out_features=768, bias=True)\n", " (drop): Dropout(p=0.0, inplace=False)\n", " )\n", " )\n", " (4): Block(\n", " (norm1): LayerNorm((768,), eps=1e-06, elementwise_affine=True)\n", " (attn): Attention(\n", " (qkv): Linear(in_features=768, out_features=2304, bias=True)\n", " (proj): Linear(in_features=768, out_features=768, bias=True)\n", " )\n", " (drop_path): DropPath(p=0.072727270424366)\n", " (norm2): LayerNorm((768,), eps=1e-06, elementwise_affine=True)\n", " (mlp): Mlp(\n", " (fc1): Linear(in_features=768, out_features=3072, bias=True)\n", " (act): GELU(approximate='none')\n", " (fc2): Linear(in_features=3072, out_features=768, bias=True)\n", " (drop): Dropout(p=0.0, inplace=False)\n", " )\n", " )\n", " (5): Block(\n", " (norm1): LayerNorm((768,), eps=1e-06, elementwise_affine=True)\n", " (attn): Attention(\n", " (qkv): Linear(in_features=768, out_features=2304, bias=True)\n", " (proj): Linear(in_features=768, out_features=768, bias=True)\n", " )\n", " (drop_path): DropPath(p=0.09090908616781235)\n", " (norm2): LayerNorm((768,), eps=1e-06, elementwise_affine=True)\n", " (mlp): Mlp(\n", " (fc1): Linear(in_features=768, out_features=3072, bias=True)\n", " (act): GELU(approximate='none')\n", " (fc2): Linear(in_features=3072, out_features=768, bias=True)\n", " (drop): Dropout(p=0.0, inplace=False)\n", " )\n", " )\n", " (6): Block(\n", " (norm1): LayerNorm((768,), eps=1e-06, elementwise_affine=True)\n", " (attn): Attention(\n", " (qkv): Linear(in_features=768, out_features=2304, bias=True)\n", " (proj): Linear(in_features=768, out_features=768, bias=True)\n", " )\n", " (drop_path): DropPath(p=0.10909091681241989)\n", " (norm2): LayerNorm((768,), eps=1e-06, elementwise_affine=True)\n", " (mlp): Mlp(\n", " (fc1): Linear(in_features=768, out_features=3072, bias=True)\n", " (act): GELU(approximate='none')\n", " (fc2): Linear(in_features=3072, out_features=768, bias=True)\n", " (drop): Dropout(p=0.0, inplace=False)\n", " )\n", " )\n", " (7): Block(\n", " (norm1): LayerNorm((768,), eps=1e-06, elementwise_affine=True)\n", " (attn): Attention(\n", " (qkv): Linear(in_features=768, out_features=2304, bias=True)\n", " (proj): Linear(in_features=768, out_features=768, bias=True)\n", " )\n", " (drop_path): DropPath(p=0.12727272510528564)\n", " (norm2): LayerNorm((768,), eps=1e-06, elementwise_affine=True)\n", " (mlp): Mlp(\n", " (fc1): Linear(in_features=768, out_features=3072, bias=True)\n", " (act): GELU(approximate='none')\n", " (fc2): Linear(in_features=3072, out_features=768, bias=True)\n", " (drop): Dropout(p=0.0, inplace=False)\n", " )\n", " )\n", " (8): Block(\n", " (norm1): LayerNorm((768,), eps=1e-06, elementwise_affine=True)\n", " (attn): Attention(\n", " (qkv): Linear(in_features=768, out_features=2304, bias=True)\n", " (proj): Linear(in_features=768, out_features=768, bias=True)\n", " )\n", " (drop_path): DropPath(p=0.1454545557498932)\n", " (norm2): LayerNorm((768,), eps=1e-06, elementwise_affine=True)\n", " (mlp): Mlp(\n", " (fc1): Linear(in_features=768, out_features=3072, bias=True)\n", " (act): GELU(approximate='none')\n", " (fc2): Linear(in_features=3072, out_features=768, bias=True)\n", " (drop): Dropout(p=0.0, inplace=False)\n", " )\n", " )\n", " (9): Block(\n", " (norm1): LayerNorm((768,), eps=1e-06, elementwise_affine=True)\n", " (attn): Attention(\n", " (qkv): Linear(in_features=768, out_features=2304, bias=True)\n", " (proj): Linear(in_features=768, out_features=768, bias=True)\n", " )\n", " (drop_path): DropPath(p=0.16363637149333954)\n", " (norm2): LayerNorm((768,), eps=1e-06, elementwise_affine=True)\n", " (mlp): Mlp(\n", " (fc1): Linear(in_features=768, out_features=3072, bias=True)\n", " (act): GELU(approximate='none')\n", " (fc2): Linear(in_features=3072, out_features=768, bias=True)\n", " (drop): Dropout(p=0.0, inplace=False)\n", " )\n", " )\n", " (10): Block(\n", " (norm1): LayerNorm((768,), eps=1e-06, elementwise_affine=True)\n", " (attn): Attention(\n", " (qkv): Linear(in_features=768, out_features=2304, bias=True)\n", " (proj): Linear(in_features=768, out_features=768, bias=True)\n", " )\n", " (drop_path): DropPath(p=0.1818181872367859)\n", " (norm2): LayerNorm((768,), eps=1e-06, elementwise_affine=True)\n", " (mlp): Mlp(\n", " (fc1): Linear(in_features=768, out_features=3072, bias=True)\n", " (act): GELU(approximate='none')\n", " (fc2): Linear(in_features=3072, out_features=768, bias=True)\n", " (drop): Dropout(p=0.0, inplace=False)\n", " )\n", " )\n", " (11): Block(\n", " (norm1): LayerNorm((768,), eps=1e-06, elementwise_affine=True)\n", " (attn): Attention(\n", " (qkv): Linear(in_features=768, out_features=2304, bias=True)\n", " (proj): Linear(in_features=768, out_features=768, bias=True)\n", " )\n", " (drop_path): DropPath(p=0.20000000298023224)\n", " (norm2): LayerNorm((768,), eps=1e-06, elementwise_affine=True)\n", " (mlp): Mlp(\n", " (fc1): Linear(in_features=768, out_features=3072, bias=True)\n", " (act): GELU(approximate='none')\n", " (fc2): Linear(in_features=3072, out_features=768, bias=True)\n", " (drop): Dropout(p=0.0, inplace=False)\n", " )\n", " )\n", " )\n", " (ln_pre): Identity()\n", " (norm): LayerNorm((768,), eps=1e-06, elementwise_affine=True)\n", " )\n", " (decoder_module): UniHCPv2_Head(\n", " (predictor): Hulk_Decoder(\n", " (transformer_self_attention_layers): ModuleList(\n", " (0-8): 9 x SelfAttentionLayer(\n", " (self_attn): MultiheadAttention(\n", " (out_proj): NonDynamicallyQuantizableLinear(in_features=256, out_features=256, bias=True)\n", " )\n", " (norm): LayerNorm((256,), eps=1e-05, elementwise_affine=True)\n", " (dropout): Dropout(p=0.0, inplace=False)\n", " )\n", " )\n", " (transformer_cross_attention_layers): ModuleList()\n", " (transformer_ffn_layers): ModuleList(\n", " (0-8): 9 x FFNLayer(\n", " (linear1): Linear(in_features=256, out_features=2048, bias=True)\n", " (dropout): Dropout(p=0.0, inplace=False)\n", " (linear2): Linear(in_features=2048, out_features=256, bias=True)\n", " (norm): LayerNorm((256,), eps=1e-05, elementwise_affine=True)\n", " )\n", " )\n", " (decoder_norm): LayerNorm((256,), eps=1e-05, elementwise_affine=True)\n", " (level_embed): Embedding(1, 256)\n", " (class_embed): Linear(in_features=256, out_features=1, bias=True)\n", " (mask_embed): MLP(\n", " (layers): ModuleList(\n", " (0-2): 3 x Linear(in_features=256, out_features=256, bias=True)\n", " )\n", " )\n", " (adapt_pos2d): Sequential(\n", " (0): Linear(in_features=256, out_features=256, bias=True)\n", " (1): ReLU()\n", " (2): Linear(in_features=256, out_features=256, bias=True)\n", " )\n", " (adapt_pos1d): Sequential(\n", " (0): Linear(in_features=256, out_features=256, bias=True)\n", " (1): ReLU()\n", " (2): Linear(in_features=256, out_features=256, bias=True)\n", " )\n", " )\n", " )\n", " (adapter_rgb): RGBAdapter(\n", " (proj): Conv2d(3, 768, kernel_size=(16, 16), stride=(16, 16))\n", " (type_embed): Embedding(1, 768)\n", " )\n", " (adapter_text): TextAdapter()\n", " (neck_patch): MAEdecoder_proj_neck(\n", " (mask_map): Sequential(\n", " (0): Linear(in_features=768, out_features=256, bias=True)\n", " )\n", " )\n", " (neck_label): MAEdecoder_proj_neck(\n", " (mask_map): Sequential(\n", " (0): Linear(in_features=768, out_features=256, bias=True)\n", " )\n", " )\n", " (proj_rgb): RGBProjector(\n", " (output_proj): Linear(in_features=256, out_features=768, bias=True)\n", " (loss_fn): MaskedMSELoss()\n", " )\n", " (proj_text): TextProjector(\n", " (patch_proj): Linear(in_features=256, out_features=256, bias=False)\n", " (class_proj): Linear(in_features=768, out_features=256, bias=False)\n", " (post_mul_norm): LayerNorm((168,), eps=1e-06, elementwise_affine=True)\n", " (loss_fn): MaskedOneSideBCELoss()\n", " )\n", ")\n" ] } ], "source": [ "model = S.create_model().cuda()" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "/tmp/ipykernel_1514780/183409731.py:3: FutureWarning: You are using `torch.load` with `weights_only=False` (the current default value), which uses the default pickle module implicitly. It is possible to construct malicious pickle data which will execute arbitrary code during unpickling (See https://github.com/pytorch/pytorch/blob/main/SECURITY.md#untrusted-models for more details). In a future release, the default value for `weights_only` will be flipped to `True`. This limits the functions that could be executed during unpickling. Arbitrary objects will no longer be allowed to be loaded via this mode unless they are explicitly allowlisted by the user via `torch.serialization.add_safe_globals`. We recommend you start setting `weights_only=True` for any use case where you don't have full control of the loaded file. Please open an issue on GitHub for any issues related to this experimental feature.\n", " checkpoint = torch.load(a, map_location='cpu')['state_dict']\n" ] }, { "data": { "text/plain": [ "" ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "import os\n", "a = '/dscilab_dungvo/workspace/BA-PRE_THESIS/my_source/OpenGVBackbone/Hulk/hulk_checkpoint/Pretrain/ckpt_task5_iter_newest.pth.tar'\n", "checkpoint = torch.load(a, map_location='cpu')['state_dict']\n", "new_checkpoint = {}\n", "for key in checkpoint.keys():\n", " new_key = '.'.join(key.split('.')[1:])\n", " new_checkpoint[new_key] = checkpoint[key]\n", "\n", "model.load_state_dict(new_checkpoint, strict=True)" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "{'pred': {'logit': tensor([[-2.2218, -0.5062, 0.1172, -3.4788, -1.5123, -0.4237, -1.8674, -4.5272,\n", " -1.5651, -0.3951, 0.0449, -2.3393, -2.2826, -2.6548, -2.2335, -0.1697,\n", " -1.7212, -0.9769, -1.3352, -0.7082, 0.1437, -0.1626, -1.5070, -1.3349,\n", " -2.5496, -1.7293, -1.8053, -2.9313, -1.4735, -1.7461, -1.5343, -1.0918,\n", " -1.7906, -0.8116, -1.8156, -2.7187, -0.5243, 0.4708, -1.5717, 0.0535,\n", " -0.9919, 0.7430, -0.4153, -0.2946, -1.9957, -3.2230, -1.1970, -0.5759,\n", " -2.1625, -2.5628, -3.2574, -4.7830, -1.0126, -2.9042, -1.2592, -1.3643,\n", " -0.0944, 0.2616, -3.3232, -3.1323, -1.9324, -1.8244, -3.1203, -1.3809,\n", " 1.6132, 1.6032, 0.5312, -0.8020, 0.3160, 0.0911, 0.2025, -1.7385,\n", " -1.7807, 0.5448, 1.7459, 1.0756, 0.9205, -0.1745, 0.2260, -0.6294,\n", " -2.7521, -2.2809, 0.4320, 0.9061, 0.7648, -0.9934, -0.4588, -0.3298,\n", " -0.4703, -0.2817, 1.5337, 0.3704, -0.2654, 0.0082, -0.9022, 3.5166,\n", " 0.6163, 0.2661, 0.0712, 1.6265, 2.0602, -0.7545, -0.7451, 1.3032,\n", " -2.1849, -0.8359, 1.5142, 0.5111, 1.2664, 1.2274, 1.5934, 1.5000,\n", " 1.4737, -1.3929, 0.7773, 0.4433, -2.3016, 1.4572, 1.2776, -0.8464,\n", " 0.3172, 1.4062, -2.7292, -0.5261, 0.2714, 0.8413, -3.5620, 2.7011,\n", " -4.3869, -0.2446, 0.7477, -2.5714, -2.2399, -1.7987, -0.1741, 0.4717,\n", " 2.0034, -0.2145, -1.4264, -1.0940, -0.1281, 1.1754, 2.9553, 0.4460,\n", " 4.8391, 1.8169, 1.7823, 0.6396, -0.4456, 0.7234, -2.0964, 2.2630,\n", " -0.5462, -2.2564, 0.1688, 0.1309, -1.7053, -0.7602, -1.0549, 0.3514,\n", " 0.1672, 0.2859, -0.3508, 1.0844, -0.8712, -1.5186, -1.9469, -2.0822],\n", " [-2.2319, -0.5566, 0.0089, -3.5002, -1.5644, -0.4519, -1.8587, -4.5616,\n", " -1.5709, -0.3607, 0.0640, -2.2981, -2.2856, -2.6779, -2.1960, -0.0651,\n", " -1.7547, -0.9967, -1.2952, -0.7235, 0.1489, -0.0958, -1.4841, -1.1847,\n", " -2.4874, -1.6476, -1.7825, -2.9891, -1.5076, -1.7210, -1.5455, -1.0676,\n", " -1.8151, -0.8846, -1.7584, -2.8153, -0.5414, 0.4700, -1.5771, 0.0185,\n", " -1.0439, 0.7461, -0.3977, -0.2824, -2.1439, -3.1303, -1.2088, -0.5887,\n", " -2.0821, -2.6190, -3.2053, -4.7425, -1.0118, -2.9543, -1.2188, -1.3243,\n", " -0.0184, 0.1311, -3.3758, -3.0484, -1.8978, -1.8257, -3.0822, -1.4603,\n", " 1.7308, 1.6246, 0.5772, -0.8086, 0.3031, 0.0611, 0.2495, -1.6257,\n", " -1.7668, 0.4822, 1.7537, 1.0424, 0.9526, -0.1731, 0.2616, -0.6955,\n", " -2.9455, -2.2574, 0.4462, 1.0054, 0.7926, -0.9714, -0.5716, -0.3358,\n", " -0.4972, -0.3308, 1.4862, 0.3106, -0.2885, -0.0276, -0.9534, 3.5435,\n", " 0.5876, 0.2969, 0.1102, 1.6368, 2.0143, -0.7822, -0.7776, 1.3940,\n", " -2.2183, -0.9234, 1.5020, 0.4569, 1.1889, 1.1872, 1.4828, 1.5134,\n", " 1.4606, -1.3943, 0.7237, 0.3983, -2.3357, 1.4066, 1.3249, -0.9234,\n", " 0.3022, 1.2987, -2.8364, -0.4627, 0.3730, 0.9911, -3.4243, 2.7276,\n", " -4.5621, -0.1635, 0.8058, -2.6526, -2.0604, -1.7936, -0.1731, 0.3731,\n", " 2.0274, -0.1653, -1.4223, -1.1692, -0.0788, 1.0808, 2.9956, 0.6498,\n", " 4.8383, 2.0417, 1.7968, 0.7522, -0.4896, 0.6901, -2.0934, 2.4697,\n", " -0.6123, -2.1438, 0.1554, 0.1590, -1.7057, -0.7536, -1.0268, 0.3042,\n", " 0.2302, 0.3152, -0.3793, 1.1670, -0.8979, -1.5231, -1.9642, -2.1138]],\n", " device='cuda:0', grad_fn=)},\n", " 'pred_patch': {'rgb_pred': tensor([[[[-8.5102e-03, -6.8588e-02, -9.7623e-02, ..., -1.6556e-01,\n", " -2.6847e-02, 4.6890e-02],\n", " [-1.4040e-02, 2.5890e-01, -8.2376e-02, ..., 1.0461e-02,\n", " 9.8500e-02, -2.6721e-01],\n", " [ 1.7644e-01, 1.1620e-01, -1.3907e-01, ..., 6.8931e-02,\n", " -1.9410e-01, 1.0879e-01],\n", " ...,\n", " [ 2.0557e-01, -1.6737e-02, 4.3652e-02, ..., 9.2938e-02,\n", " -4.5724e-02, -9.8340e-02],\n", " [-2.3391e-01, -2.3155e-02, 1.1485e-01, ..., -8.0746e-02,\n", " -1.4743e-01, 9.7193e-02],\n", " [ 1.4937e-02, 4.5658e-02, 6.9093e-02, ..., 1.4171e-01,\n", " 3.5637e-01, -1.6270e-01]],\n", " \n", " [[-2.0647e-01, -3.2552e-03, 2.5316e-02, ..., -7.9657e-02,\n", " 6.7470e-03, 3.3327e-02],\n", " [ 1.1477e-01, -5.2536e-02, -3.5990e-03, ..., -3.2594e-02,\n", " 3.9591e-02, 1.6122e-02],\n", " [-3.4281e-03, 2.8897e-02, -9.8309e-02, ..., 9.1921e-02,\n", " 1.1264e-01, 9.3303e-02],\n", " ...,\n", " [ 1.3253e-01, 1.2553e-01, -2.2823e-02, ..., 1.2933e-01,\n", " 6.2961e-02, -6.9660e-02],\n", " [ 2.9101e-01, -5.2820e-02, 1.1581e-01, ..., -2.0848e-01,\n", " 1.5017e-01, -1.7899e-01],\n", " [ 5.7759e-02, 2.2556e-02, 5.3684e-02, ..., 1.1290e-02,\n", " 1.8113e-01, 5.0692e-02]],\n", " \n", " [[ 1.0415e-02, -2.9931e-01, -2.1340e-01, ..., 5.8440e-02,\n", " 2.0779e-01, 9.1390e-03],\n", " [ 1.1809e-02, -5.4814e-03, -8.2221e-02, ..., 8.6488e-02,\n", " -6.2388e-02, -3.1765e-02],\n", " [ 1.4851e-02, -4.3017e-02, 1.7274e-01, ..., 9.4139e-02,\n", " -1.2032e-02, -8.8854e-02],\n", " ...,\n", " [-1.6830e-01, 5.9033e-02, 3.3503e-02, ..., -2.0820e-02,\n", " 2.3758e-01, 1.0992e-01],\n", " [-4.5974e-02, 2.3484e-01, 9.7915e-02, ..., -2.1677e-02,\n", " 7.1220e-02, -1.7327e-02],\n", " [ 1.5916e-01, -3.6206e-02, -1.2113e-01, ..., -2.3734e-01,\n", " -3.6329e-02, 6.1633e-02]]],\n", " \n", " \n", " [[[-5.6990e-03, -7.1931e-02, -1.0655e-01, ..., -1.4504e-01,\n", " -3.4703e-02, 2.7221e-02],\n", " [-4.1942e-02, 2.5141e-01, -8.8802e-02, ..., 7.9501e-03,\n", " 1.2653e-01, -2.1218e-01],\n", " [ 1.7890e-01, 1.1225e-01, -1.3310e-01, ..., 9.7320e-02,\n", " -1.5944e-01, 1.1648e-01],\n", " ...,\n", " [ 2.1935e-01, -2.7660e-02, 3.3882e-02, ..., 9.6232e-02,\n", " -3.5784e-02, -1.0041e-01],\n", " [-2.2615e-01, -2.6685e-02, 1.2721e-01, ..., -8.1273e-02,\n", " -1.4737e-01, 8.6605e-02],\n", " [ 2.9079e-02, 5.1478e-02, 6.9770e-02, ..., 1.4124e-01,\n", " 3.6884e-01, -1.6504e-01]],\n", " \n", " [[-2.0840e-01, -3.8706e-03, 2.8289e-02, ..., -1.9337e-01,\n", " -5.0421e-02, 2.4232e-02],\n", " [ 1.1224e-01, -5.1248e-02, 3.3924e-03, ..., -3.8077e-03,\n", " 5.1067e-02, 5.8321e-02],\n", " [-1.3546e-02, 4.5307e-02, -1.0708e-01, ..., 1.1599e-01,\n", " 8.9323e-02, 1.1020e-01],\n", " ...,\n", " [ 1.4373e-01, 1.1806e-01, -1.7927e-02, ..., 1.0038e-01,\n", " 6.2575e-02, -7.4748e-02],\n", " [ 3.0052e-01, -5.1276e-02, 1.0790e-01, ..., -2.1327e-01,\n", " 1.3945e-01, -1.9314e-01],\n", " [ 5.2478e-02, 2.8049e-02, 2.8527e-02, ..., 2.0264e-02,\n", " 1.8108e-01, 4.4090e-02]],\n", " \n", " [[ 2.3006e-02, -3.0091e-01, -2.1528e-01, ..., 2.3178e-02,\n", " 1.9272e-01, 2.9400e-02],\n", " [ 1.5069e-02, 1.8067e-04, -7.9107e-02, ..., 8.2706e-02,\n", " -7.5338e-02, -5.2059e-02],\n", " [ 1.2606e-02, -5.1216e-02, 1.7120e-01, ..., 7.1463e-02,\n", " 3.3204e-02, -1.0136e-01],\n", " ...,\n", " [-1.7523e-01, 5.4408e-02, 2.5346e-02, ..., -1.9007e-02,\n", " 2.4373e-01, 1.1366e-01],\n", " [-5.1120e-02, 2.2612e-01, 9.6984e-02, ..., -5.0884e-03,\n", " 5.1531e-02, -2.0926e-02],\n", " [ 1.5899e-01, -1.9930e-02, -1.0935e-01, ..., -2.3639e-01,\n", " -4.2045e-02, 5.9707e-02]]]], device='cuda:0',\n", " grad_fn=)}}" ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "from easydict import EasyDict\n", "model.eval()\n", "model.forward_default_test(\n", " input_var=EasyDict({\n", " \"image\": torch.randn(2, 3, 256, 192).cuda(),\n", " \"label\": torch.Tensor([[1] * 168, [1] * 168]).cuda(),\n", " \"backbone_output\": True,\n", " }),\n", " current_step=None,\n", ")" ] }, { "cell_type": "code", "execution_count": 19, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "{'label': tensor([[0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,\n", " 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,\n", " 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,\n", " 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,\n", " 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,\n", " 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,\n", " 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],\n", " [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,\n", " 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,\n", " 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,\n", " 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,\n", " 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,\n", " 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,\n", " 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]]),\n", " 'adapter_output_text': {'tokens': tensor([[[-0.8803, -0.2713, -0.1661, ..., 0.0369, -0.6509, 0.8961],\n", " [-0.8889, -0.2478, -0.2297, ..., -0.1141, -0.6723, 0.8933],\n", " [-0.9295, -0.3889, -0.6972, ..., -0.5067, -0.7337, 0.9315],\n", " ...,\n", " [-0.8847, -0.2829, -0.1915, ..., 0.0122, -0.6125, 0.9035],\n", " [-0.8326, -0.2373, 0.2839, ..., 0.2495, -0.5819, 0.8366],\n", " [-0.6779, -0.1466, 0.6792, ..., 0.6245, -0.5224, 0.7505]],\n", " \n", " [[-0.8803, -0.2713, -0.1661, ..., 0.0369, -0.6509, 0.8961],\n", " [-0.8889, -0.2478, -0.2297, ..., -0.1141, -0.6723, 0.8933],\n", " [-0.9295, -0.3889, -0.6972, ..., -0.5067, -0.7337, 0.9315],\n", " ...,\n", " [-0.8847, -0.2829, -0.1915, ..., 0.0122, -0.6125, 0.9035],\n", " [-0.8326, -0.2373, 0.2839, ..., 0.2495, -0.5819, 0.8366],\n", " [-0.6779, -0.1466, 0.6792, ..., 0.6245, -0.5224, 0.7505]]],\n", " device='cuda:0'),\n", " 'Bs': 2,\n", " 'N_H': 1,\n", " 'N_W': 168,\n", " 'attn_mask': None}}" ] }, "execution_count": 19, "metadata": {}, "output_type": "execute_result" } ], "source": [ "model.adapter_text({\"label\": torch.Tensor([[0] * 168, [0] * 168])})" ] }, { "cell_type": "code", "execution_count": 138, "metadata": {}, "outputs": [], "source": [ "# %pip install datasets\n", "# %pip install datasets\n", "import datasets\n", "dataset_dict = datasets.load_dataset('tuandunghcmut/PA-100K')" ] }, { "cell_type": "code", "execution_count": 162, "metadata": {}, "outputs": [], "source": [ "import torchvision.transforms as T\n", "class ReidTestAugmentation(object):\n", " def __init__(self, height=256, width=192, vit=False):\n", "\n", " normalizer = T.Normalize(mean=[0.485, 0.456, 0.406],\n", " std=[0.229, 0.224, 0.225])\n", "\n", " if not vit:\n", " self.test_transformer = T.Compose([\n", " T.Resize((height, width)),\n", " T.ToTensor(),\n", " normalizer,\n", " ])\n", " else:\n", " self.test_transformer = T.Compose([\n", " T.Resize((height, width)),\n", " T.PILToTensor(),\n", " ])\n", "\n", " def __call__(self, img):\n", " ## transform\n", " return self.test_transformer(img)\n", "\n", "transform = ReidTestAugmentation(vit=True)" ] }, { "cell_type": "code", "execution_count": 184, "metadata": {}, "outputs": [], "source": [ "model.cuda().eval()\n", "\n", "def get_image_result(pil_image):\n", " assert model.training == False\n", " image = transform(pil_image).unsqueeze(0)\n", " with torch.no_grad():\n", " result = model.forward_default_test(\n", " input_var=EasyDict({\n", " \"image\": image.cuda(),\n", " \"label\": torch.Tensor([[1] * 168]).cuda(), # pseudo label for not causing error\n", " # \"label\": torch.Tensor([]).cuda(), # pseudo label for not causing error\n", " }),\n", " current_step=None,\n", " )\n", " cosine_logits = result['pred']['logit'] # use as probs\n", " probs = torch.nn.functional.sigmoid(cosine_logits)\n", " probs = torch.round(probs * 10000) / 10000\n", " return probs.cpu().numpy()\n" ] }, { "cell_type": "code", "execution_count": 171, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "{'HARDHC', 'PA_100k', 'market', 'parse27k', 'rap2'}" ] }, "execution_count": 171, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# load panda table info \"table_label_info.pkl\"\n", "import pandas as pd\n", "table_label_info = pd.read_pickle('./table_label_info.pkl')\n", "table_label_info\n", "# dataset_name\tlocal_label_id\tglobal_label_id\tpositive_meaning\tnegative_meaning\n", "# 0\trap2\t0\t0\twith a bald head\twithout a bald head\n", "# 1\trap2\t1\t1\twith long hair\twith short hair\n", "# 2\trap2\t2\t2\twith black hair\twith non-black hair\n", "# 3\trap2\t3\t3\twith a hat\twithout a hat\n", "# 4\trap2\t4\t4\twith glasses\twithout glasses\n", "# ...\t...\t...\t...\t...\t...\n", "# 163\tHARDHC\t9\t163\twith long pants\twithout long pants\n", "# 164\tHARDHC\t10\t164\twith skirt\twithout skirt\n", "# 165\tHARDHC\t11\t165\twith face mask\twithout face mask\n", "# 166\tHARDHC\t12\t166\twith logo clothes\twithout logo clothes\n", "# 167\tHARDHC\t13\t167\twith stripe clothes\twithout stripe clothes\n", "set_dataset_name = set(table_label_info['dataset_name'])\n", "set_dataset_name" ] }, { "cell_type": "code", "execution_count": 172, "metadata": {}, "outputs": [], "source": [ "# %pip install plotly\n", "import matplotlib.pyplot as plt\n", "import copy\n", "from IPython.display import display, HTML\n", "def interpret_result(pil_img, table_label_info=table_label_info, dataset_name='PA_100k', num_class_to_show=30):\n", " # plt.imshow(pil_img)\n", " # plt.show()\n", " result = get_image_result(pil_img)\n", " list_probs = result[0].tolist()\n", " result_table = copy.deepcopy(table_label_info[table_label_info['dataset_name'] == dataset_name])\n", " start_local_label_id = result_table['local_label_id'].iloc[0]\n", " end_index = result_table['local_label_id'].iloc[-1]\n", " list_probs = list_probs[start_local_label_id:end_index+1]\n", " # append \"probabilities\" column\n", " result_table['probabilities'] = list_probs\n", " # remove dataset_name\tlocal_label_id\tglobal_label_id\tnegative_meaning\n", " result_table = result_table.drop(columns=['dataset_name', 'local_label_id', 'global_label_id', 'negative_meaning'])\n", " result_table = result_table.head(num_class_to_show)\n", " # display as HTML have two parts: display image in the right side, and display the table in the left side\n", " display(HTML('
'\n", " '
' + result_table.to_html(index=False) + '
'\n", " '
' + '' + '
'\n", " '
'))\n", " \n", "def pil_img_to_base64(pil_img):\n", " import base64\n", " from io import BytesIO\n", " buffered = BytesIO()\n", " pil_img.save(buffered, format=\"PNG\")\n", " img_str = base64.b64encode(buffered.getvalue()).decode(\"utf-8\")\n", " return img_str" ] }, { "cell_type": "code", "execution_count": 183, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
positive_meaningprobabilities
with a bald head0.2944
with long hair0.2309
with black hair0.5587
with a hat0.8535
with glasses0.3999
with a shirt0.4297
with a sweater0.1584
with a vest0.1850
with a t-shirt0.3502
with cotton0.2963
with a jacket0.2452
with formal wear0.1194
with tight clothes0.3670
with short sleeves0.7235
with other upper-body clothing0.2671
with long trousers0.6702
with a skirt0.2249
with a short skirt0.2047
with a dress0.2871
with jeans0.1894
with tight trousers0.1321
with leather shoes0.3236
with sport shoes0.5417
with boots0.1984
with cloth shoes0.1947
with casual shoes0.6028
with other shoes0.4483
with a backpack0.1184
with a shoulder bag0.3393
with a handbag0.1225
" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "import random\n", "test_dataset = dataset_dict['test']\n", "attr_name = 'Hat'\n", "attr_dataset = test_dataset.filter(lambda x: x[attr_name] == 1)\n", "\n", "random_index = random.randint(0, len(attr_dataset))\n", "image = attr_dataset[random_index]['image']\n", "interpret_result(image, table_label_info, 'rap2')" ] }, { "cell_type": "code", "execution_count": 214, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "100%|████████████████████████████████████████████████████| 10/10 [00:47<00:00, 4.73s/it]\n" ] } ], "source": [ "BATCH_SIZE = 1024\n", "import tqdm\n", "def get_batch_tfresult(list_pil_image):\n", " assert model.training == False\n", " images = torch.stack([transform(img) for img in list_pil_image])\n", " with torch.no_grad():\n", " result = model.forward_default_test(\n", " input_var=EasyDict({\n", " \"image\": images.cuda(),\n", " \"label\": torch.Tensor([[0] * 168] * len(list_pil_image)).cuda(), # pseudo label for not causing error\n", " }),\n", " current_step=None,\n", " )\n", " cosine_logits = result['pred']['logit'] # use as probs\n", " probs = torch.nn.functional.sigmoid(cosine_logits)\n", " # tf_matrix = probs > threshold\n", " return probs.cpu().numpy()\n", "\n", "list_result = []\n", "for i in tqdm.tqdm(range(0, len(test_dataset), BATCH_SIZE)):\n", " list_pil_image = test_dataset[i:i+BATCH_SIZE]['image']\n", " list_result.extend(get_batch_tfresult(list_pil_image))" ] }, { "cell_type": "code", "execution_count": 232, "metadata": {}, "outputs": [], "source": [ "# table_label_info dump to table_label_info.txt\n", "# %pip install prettytable\n", "import prettytable\n", "\n", "prettytable_info = prettytable.PrettyTable()\n", "prettytable_info.field_names = table_label_info.columns\n", "for index, row in table_label_info.iterrows():\n", " prettytable_info.add_row(row)\n", " \n", "with open('table_label_info.txt', 'w') as f:\n", " f.write(str(prettytable_info))\n", "\n" ] }, { "cell_type": "code", "execution_count": 201, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
dataset_namelocal_label_idglobal_label_idpositive_meaningnegative_meaning
54PA_100k054with a hatwithout a hat
55PA_100k155with glasseswithout glasses
56PA_100k256with short sleeveswithout short sleeves
57PA_100k357with long sleeveswithout long sleeves
58PA_100k458with stripe upper-clotheswithout stripe upper-clothes
59PA_100k559with logo upper-clotheswithout logo upper-clothes
60PA_100k660with plaid upper-clotheswithout plaid upper-clothes
61PA_100k761with splice upper-clotheswithout splice upper-clothes
62PA_100k862with stripe lower-clotheswithout stripe lower-clothes
63PA_100k963with pattern lower-clotheswithout pattern lower-clothes
64PA_100k1064with long coatwithout long coat
65PA_100k1165with long trouserswithout long trousers
66PA_100k1266with short trouserswithout short trousers
67PA_100k1367with skirt or dresswithout skirt or dress
68PA_100k1468with bootswithout boots
69PA_100k1569with a handbagwithout a handbag
70PA_100k1670with a shoulder bagwithout a shoulder bag
71PA_100k1771with a backpackwithout a backpack
72PA_100k1872hold objects in frontnot hold objects in front
73PA_100k1973age greater than 60age less than or equal to 60
74PA_100k2074age between 18 and 60age less than 18 or greater than 60
75PA_100k2175age less than 18age greater than or equal to 18
76PA_100k2276femalemale
77PA_100k2377in the front positionnot in the front position
78PA_100k2478in the side positionnot in the side position
79PA_100k2579in the back positionnot in the back position
\n", "
" ], "text/plain": [ " dataset_name local_label_id global_label_id positive_meaning \\\n", "54 PA_100k 0 54 with a hat \n", "55 PA_100k 1 55 with glasses \n", "56 PA_100k 2 56 with short sleeves \n", "57 PA_100k 3 57 with long sleeves \n", "58 PA_100k 4 58 with stripe upper-clothes \n", "59 PA_100k 5 59 with logo upper-clothes \n", "60 PA_100k 6 60 with plaid upper-clothes \n", "61 PA_100k 7 61 with splice upper-clothes \n", "62 PA_100k 8 62 with stripe lower-clothes \n", "63 PA_100k 9 63 with pattern lower-clothes \n", "64 PA_100k 10 64 with long coat \n", "65 PA_100k 11 65 with long trousers \n", "66 PA_100k 12 66 with short trousers \n", "67 PA_100k 13 67 with skirt or dress \n", "68 PA_100k 14 68 with boots \n", "69 PA_100k 15 69 with a handbag \n", "70 PA_100k 16 70 with a shoulder bag \n", "71 PA_100k 17 71 with a backpack \n", "72 PA_100k 18 72 hold objects in front \n", "73 PA_100k 19 73 age greater than 60 \n", "74 PA_100k 20 74 age between 18 and 60 \n", "75 PA_100k 21 75 age less than 18 \n", "76 PA_100k 22 76 female \n", "77 PA_100k 23 77 in the front position \n", "78 PA_100k 24 78 in the side position \n", "79 PA_100k 25 79 in the back position \n", "\n", " negative_meaning \n", "54 without a hat \n", "55 without glasses \n", "56 without short sleeves \n", "57 without long sleeves \n", "58 without stripe upper-clothes \n", "59 without logo upper-clothes \n", "60 without plaid upper-clothes \n", "61 without splice upper-clothes \n", "62 without stripe lower-clothes \n", "63 without pattern lower-clothes \n", "64 without long coat \n", "65 without long trousers \n", "66 without short trousers \n", "67 without skirt or dress \n", "68 without boots \n", "69 without a handbag \n", "70 without a shoulder bag \n", "71 without a backpack \n", "72 not hold objects in front \n", "73 age less than or equal to 60 \n", "74 age less than 18 or greater than 60 \n", "75 age greater than or equal to 18 \n", "76 male \n", "77 not in the front position \n", "78 not in the side position \n", "79 not in the back position " ] }, "execution_count": 201, "metadata": {}, "output_type": "execute_result" } ], "source": [ "table_label_info[table_label_info['dataset_name'] == 'PA_100k']" ] }, { "cell_type": "code", "execution_count": 217, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "['Female', 'AgeOver60', 'Age18-60', 'AgeLess18', 'Front', 'Side', 'Back', 'Hat', 'Glasses', 'HandBag', 'ShoulderBag', 'Backpack', 'HoldObjectsInFront', 'ShortSleeve', 'LongSleeve', 'UpperStride', 'UpperLogo', 'UpperPlaid', 'UpperSplice', 'LowerStripe', 'LowerPattern', 'LongCoat', 'Trousers', 'Shorts', 'Skirt&Dress', 'boots']\n" ] } ], "source": [ "print(list(dataset_dict['test'].features.keys())[3:])" ] }, { "cell_type": "code", "execution_count": 228, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
precisionrecallf1-scoresupport
00.9205360.9042430.9123174407.0000
10.9255860.9384950.9319965593.0000
accuracy0.9234000.9234000.9234000.9234
macro avg0.9230610.9213690.92215610000.0000
weighted avg0.9233610.9234000.92332310000.0000
\n", "
" ], "text/plain": [ " precision recall f1-score support\n", "0 0.920536 0.904243 0.912317 4407.0000\n", "1 0.925586 0.938495 0.931996 5593.0000\n", "accuracy 0.923400 0.923400 0.923400 0.9234\n", "macro avg 0.923061 0.921369 0.922156 10000.0000\n", "weighted avg 0.923361 0.923400 0.923323 10000.0000" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "import numpy as np\n", "import sklearn.metrics\n", "\n", "def get_report(list_result, attr_name_in_test_dataset, hulk_index_of_class, threshold=0.5):\n", " np_result = np.array(list_result) # 10000, 168\n", " pred_result = np_result[:, hulk_index_of_class] > threshold\n", " ground_true_hat = np.array([x[attr_name_in_test_dataset] for x in test_dataset])\n", " res = sklearn.metrics.classification_report(ground_true_hat, pred_result, output_dict=True)\n", " table_res = pd.DataFrame(res).transpose()\n", " display(table_res)\n", " \n", "get_report(list_result, 'LongSleeve', 57, 0.5)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "path", "language": "python", "name": "path" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.12.3" } }, "nbformat": 4, "nbformat_minor": 4 }