{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "/dscilab_dungvo/workspace/bin/envs/vlmeval/lib/python3.10/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n", " from .autonotebook import tqdm as notebook_tqdm\n" ] }, { "ename": "AttributeError", "evalue": "module 'torch' has no attribute 'set_grad_enabled'", "output_type": "error", "traceback": [ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[0;31mAttributeError\u001b[0m Traceback (most recent call last)", "Cell \u001b[0;32mIn[1], line 4\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01mtorch\u001b[39;00m\n\u001b[1;32m 2\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01mtorch\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mdistributed\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m \u001b[38;5;21;01mdist\u001b[39;00m\n\u001b[0;32m----> 4\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mvlmeval\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mconfig\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m supported_VLM\n\u001b[1;32m 5\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mvlmeval\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mdataset\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m build_dataset\n\u001b[1;32m 6\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mvlmeval\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01minference\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m infer_data_job\n", "File \u001b[0;32m/dscilab_dungvo/workspace/VLMEvalKit/vlmeval/__init__.py:7\u001b[0m\n\u001b[1;32m 4\u001b[0m \u001b[38;5;28;01mpass\u001b[39;00m\n\u001b[1;32m 6\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01msmp\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;241m*\u001b[39m\n\u001b[0;32m----> 7\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mapi\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;241m*\u001b[39m\n\u001b[1;32m 8\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mdataset\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;241m*\u001b[39m\n\u001b[1;32m 9\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mutils\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;241m*\u001b[39m\n", "File \u001b[0;32m/dscilab_dungvo/workspace/VLMEvalKit/vlmeval/api/__init__.py:4\u001b[0m\n\u001b[1;32m 2\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mhf_chat_model\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m HFChatModel\n\u001b[1;32m 3\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mgemini\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m GeminiWrapper, GeminiProVision\n\u001b[0;32m----> 4\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mqwen_vl_api\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m QwenVLWrapper, QwenVLAPI, Qwen2VLAPI\n\u001b[1;32m 5\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mqwen_api\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m QwenAPI\n\u001b[1;32m 6\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mclaude\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m Claude_Wrapper, Claude3V\n", "File \u001b[0;32m/dscilab_dungvo/workspace/VLMEvalKit/vlmeval/api/qwen_vl_api.py:8\u001b[0m\n\u001b[1;32m 6\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mvlmeval\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01msmp\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;241m*\u001b[39m\n\u001b[1;32m 7\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mvlmeval\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mapi\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mbase\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m BaseAPI\n\u001b[0;32m----> 8\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mvlmeval\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mvlm\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mqwen2_vl\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mprompt\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m Qwen2VLPromptMixin\n\u001b[1;32m 11\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mensure_image_url\u001b[39m(image: \u001b[38;5;28mstr\u001b[39m) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m \u001b[38;5;28mstr\u001b[39m:\n\u001b[1;32m 12\u001b[0m prefixes \u001b[38;5;241m=\u001b[39m [\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mhttp://\u001b[39m\u001b[38;5;124m'\u001b[39m, \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mhttps://\u001b[39m\u001b[38;5;124m'\u001b[39m, \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mfile://\u001b[39m\u001b[38;5;124m'\u001b[39m, \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mdata:image;\u001b[39m\u001b[38;5;124m'\u001b[39m]\n", "File \u001b[0;32m/dscilab_dungvo/workspace/VLMEvalKit/vlmeval/vlm/__init__.py:3\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01mtorch\u001b[39;00m\n\u001b[0;32m----> 3\u001b[0m \u001b[43mtorch\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mset_grad_enabled\u001b[49m(\u001b[38;5;28;01mFalse\u001b[39;00m)\n\u001b[1;32m 4\u001b[0m torch\u001b[38;5;241m.\u001b[39mmanual_seed(\u001b[38;5;241m1234\u001b[39m)\n\u001b[1;32m 5\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01maria\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m Aria\n", "\u001b[0;31mAttributeError\u001b[0m: module 'torch' has no attribute 'set_grad_enabled'" ] } ], "source": [ "import torch\n", "import torch.distributed as dist\n", "\n", "from vlmeval.config import supported_VLM\n", "from vlmeval.dataset import build_dataset\n", "from vlmeval.inference import infer_data_job\n", "from vlmeval.inference_video import infer_data_job_video\n", "from vlmeval.inference_mt import infer_data_job_mt\n", "from vlmeval.smp import *\n", "from vlmeval.utils.result_transfer import MMMU_result_transfer, MMTBench_result_transfer" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Found existing installation: torch 2.5.1\n", "Uninstalling torch-2.5.1:\n", " Successfully uninstalled torch-2.5.1\n", "Found existing installation: torchvision 0.20.1\n", "Uninstalling torchvision-0.20.1:\n", " Successfully uninstalled torchvision-0.20.1\n", "\u001b[33mWARNING: Running pip as the 'root' user can result in broken permissions and conflicting behaviour with the system package manager, possibly rendering your system unusable.It is recommended to use a virtual environment instead: https://pip.pypa.io/warnings/venv. Use the --root-user-action option if you know what you are doing and want to suppress this warning.\u001b[0m\u001b[33m\n", "\u001b[0m" ] } ], "source": [ "!pip uninstall -y torch torchvision" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "ename": "RuntimeError", "evalue": "operator torchvision::nms does not exist", "output_type": "error", "traceback": [ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[0;31mRuntimeError\u001b[0m Traceback (most recent call last)", "Cell \u001b[0;32mIn[2], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01mtorchvision\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mtransforms\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m \u001b[38;5;21;01mtransforms\u001b[39;00m\n", "File \u001b[0;32m/dscilab_dungvo/workspace/bin/envs/vlmeval/lib/python3.10/site-packages/torchvision/__init__.py:10\u001b[0m\n\u001b[1;32m 7\u001b[0m \u001b[38;5;66;03m# Don't re-order these, we need to load the _C extension (done when importing\u001b[39;00m\n\u001b[1;32m 8\u001b[0m \u001b[38;5;66;03m# .extensions) before entering _meta_registrations.\u001b[39;00m\n\u001b[1;32m 9\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mextension\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m _HAS_OPS \u001b[38;5;66;03m# usort:skip\u001b[39;00m\n\u001b[0;32m---> 10\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mtorchvision\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m _meta_registrations, datasets, io, models, ops, transforms, utils \u001b[38;5;66;03m# usort:skip\u001b[39;00m\n\u001b[1;32m 12\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[1;32m 13\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mversion\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m __version__ \u001b[38;5;66;03m# noqa: F401\u001b[39;00m\n", "File \u001b[0;32m/dscilab_dungvo/workspace/bin/envs/vlmeval/lib/python3.10/site-packages/torchvision/_meta_registrations.py:164\u001b[0m\n\u001b[1;32m 153\u001b[0m torch\u001b[38;5;241m.\u001b[39m_check(\n\u001b[1;32m 154\u001b[0m grad\u001b[38;5;241m.\u001b[39mdtype \u001b[38;5;241m==\u001b[39m rois\u001b[38;5;241m.\u001b[39mdtype,\n\u001b[1;32m 155\u001b[0m \u001b[38;5;28;01mlambda\u001b[39;00m: (\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 158\u001b[0m ),\n\u001b[1;32m 159\u001b[0m )\n\u001b[1;32m 160\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m grad\u001b[38;5;241m.\u001b[39mnew_empty((batch_size, channels, height, width))\n\u001b[1;32m 163\u001b[0m \u001b[38;5;129;43m@torch\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mlibrary\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mregister_fake\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mtorchvision::nms\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m)\u001b[49m\n\u001b[0;32m--> 164\u001b[0m \u001b[38;5;28;43;01mdef\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[38;5;21;43mmeta_nms\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43mdets\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mscores\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43miou_threshold\u001b[49m\u001b[43m)\u001b[49m\u001b[43m:\u001b[49m\n\u001b[1;32m 165\u001b[0m \u001b[43m \u001b[49m\u001b[43mtorch\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_check\u001b[49m\u001b[43m(\u001b[49m\u001b[43mdets\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mdim\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m==\u001b[39;49m\u001b[43m \u001b[49m\u001b[38;5;241;43m2\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mlambda\u001b[39;49;00m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;124;43mf\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mboxes should be a 2d tensor, got \u001b[39;49m\u001b[38;5;132;43;01m{\u001b[39;49;00m\u001b[43mdets\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mdim\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\u001b[38;5;132;43;01m}\u001b[39;49;00m\u001b[38;5;124;43mD\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m)\u001b[49m\n\u001b[1;32m 166\u001b[0m \u001b[43m \u001b[49m\u001b[43mtorch\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_check\u001b[49m\u001b[43m(\u001b[49m\u001b[43mdets\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43msize\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m1\u001b[39;49m\u001b[43m)\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m==\u001b[39;49m\u001b[43m \u001b[49m\u001b[38;5;241;43m4\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mlambda\u001b[39;49;00m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;124;43mf\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mboxes should have 4 elements in dimension 1, got \u001b[39;49m\u001b[38;5;132;43;01m{\u001b[39;49;00m\u001b[43mdets\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43msize\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m1\u001b[39;49m\u001b[43m)\u001b[49m\u001b[38;5;132;43;01m}\u001b[39;49;00m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m)\u001b[49m\n", "File \u001b[0;32m/dscilab_dungvo/workspace/bin/envs/vlmeval/lib/python3.10/site-packages/torch/library.py:795\u001b[0m, in \u001b[0;36mregister_fake..register\u001b[0;34m(func)\u001b[0m\n\u001b[1;32m 793\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 794\u001b[0m use_lib \u001b[38;5;241m=\u001b[39m lib\n\u001b[0;32m--> 795\u001b[0m \u001b[43muse_lib\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_register_fake\u001b[49m\u001b[43m(\u001b[49m\u001b[43mop_name\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mfunc\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43m_stacklevel\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mstacklevel\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m+\u001b[39;49m\u001b[43m \u001b[49m\u001b[38;5;241;43m1\u001b[39;49m\u001b[43m)\u001b[49m\n\u001b[1;32m 796\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m func\n", "File \u001b[0;32m/dscilab_dungvo/workspace/bin/envs/vlmeval/lib/python3.10/site-packages/torch/library.py:184\u001b[0m, in \u001b[0;36mLibrary._register_fake\u001b[0;34m(self, op_name, fn, _stacklevel)\u001b[0m\n\u001b[1;32m 181\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 182\u001b[0m func_to_register \u001b[38;5;241m=\u001b[39m fn\n\u001b[0;32m--> 184\u001b[0m handle \u001b[38;5;241m=\u001b[39m \u001b[43mentry\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mfake_impl\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mregister\u001b[49m\u001b[43m(\u001b[49m\u001b[43mfunc_to_register\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43msource\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 185\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_registration_handles\u001b[38;5;241m.\u001b[39mappend(handle)\n", "File \u001b[0;32m/dscilab_dungvo/workspace/bin/envs/vlmeval/lib/python3.10/site-packages/torch/_library/fake_impl.py:31\u001b[0m, in \u001b[0;36mFakeImplHolder.register\u001b[0;34m(self, func, source)\u001b[0m\n\u001b[1;32m 25\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mkernel \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[1;32m 26\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mRuntimeError\u001b[39;00m(\n\u001b[1;32m 27\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mregister_fake(...): the operator \u001b[39m\u001b[38;5;132;01m{\u001b[39;00m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mqualname\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 28\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124malready has an fake impl registered at \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 29\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;132;01m{\u001b[39;00m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mkernel\u001b[38;5;241m.\u001b[39msource\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m.\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 30\u001b[0m )\n\u001b[0;32m---> 31\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[43mtorch\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_C\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_dispatch_has_kernel_for_dispatch_key\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mqualname\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mMeta\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m)\u001b[49m:\n\u001b[1;32m 32\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mRuntimeError\u001b[39;00m(\n\u001b[1;32m 33\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mregister_fake(...): the operator \u001b[39m\u001b[38;5;132;01m{\u001b[39;00m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mqualname\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 34\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124malready has an DispatchKey::Meta implementation via a \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 37\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mregister_fake.\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 38\u001b[0m )\n\u001b[1;32m 40\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m torch\u001b[38;5;241m.\u001b[39m_C\u001b[38;5;241m.\u001b[39m_dispatch_has_kernel_for_dispatch_key(\n\u001b[1;32m 41\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mqualname, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mCompositeImplicitAutograd\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 42\u001b[0m ):\n", "\u001b[0;31mRuntimeError\u001b[0m: operator torchvision::nms does not exist" ] } ], "source": [ "import torchvision.transforms as transforms" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\u001b[33mWARNING: Ignoring invalid distribution -ransformers (/dscilab_dungvo/workspace/bin/envs/vlmeval/lib/python3.10/site-packages)\u001b[0m\u001b[33m\n", "\u001b[0mRequirement already satisfied: torch in /dscilab_dungvo/workspace/bin/envs/vlmeval/lib/python3.10/site-packages (2.5.1)\n", "Requirement already satisfied: torchvision in /dscilab_dungvo/workspace/bin/envs/vlmeval/lib/python3.10/site-packages (0.20.1)\n", "Requirement already satisfied: filelock in /dscilab_dungvo/workspace/bin/envs/vlmeval/lib/python3.10/site-packages (from torch) (3.13.1)\n", "Requirement already satisfied: typing-extensions>=4.8.0 in /dscilab_dungvo/workspace/bin/envs/vlmeval/lib/python3.10/site-packages (from torch) (4.11.0)\n", "Requirement already satisfied: networkx in /dscilab_dungvo/workspace/bin/envs/vlmeval/lib/python3.10/site-packages (from torch) (3.3)\n", "Requirement already satisfied: jinja2 in /dscilab_dungvo/workspace/bin/envs/vlmeval/lib/python3.10/site-packages (from torch) (3.1.4)\n", "Requirement already satisfied: fsspec in /dscilab_dungvo/workspace/bin/envs/vlmeval/lib/python3.10/site-packages (from torch) (2024.6.1)\n", "Requirement already satisfied: nvidia-cuda-nvrtc-cu12==12.4.127 in /dscilab_dungvo/workspace/bin/envs/vlmeval/lib/python3.10/site-packages (from torch) (12.4.127)\n", "Requirement already satisfied: nvidia-cuda-runtime-cu12==12.4.127 in /dscilab_dungvo/workspace/bin/envs/vlmeval/lib/python3.10/site-packages (from torch) (12.4.127)\n", "Requirement already satisfied: nvidia-cuda-cupti-cu12==12.4.127 in /dscilab_dungvo/workspace/bin/envs/vlmeval/lib/python3.10/site-packages (from torch) (12.4.127)\n", "Requirement already satisfied: nvidia-cudnn-cu12==9.1.0.70 in /dscilab_dungvo/workspace/bin/envs/vlmeval/lib/python3.10/site-packages (from torch) (9.1.0.70)\n", "Requirement already satisfied: nvidia-cublas-cu12==12.4.5.8 in /dscilab_dungvo/workspace/bin/envs/vlmeval/lib/python3.10/site-packages (from torch) (12.4.5.8)\n", "Requirement already satisfied: nvidia-cufft-cu12==11.2.1.3 in /dscilab_dungvo/workspace/bin/envs/vlmeval/lib/python3.10/site-packages (from torch) (11.2.1.3)\n", "Requirement already satisfied: nvidia-curand-cu12==10.3.5.147 in /dscilab_dungvo/workspace/bin/envs/vlmeval/lib/python3.10/site-packages (from torch) (10.3.5.147)\n", "Requirement already satisfied: nvidia-cusolver-cu12==11.6.1.9 in /dscilab_dungvo/workspace/bin/envs/vlmeval/lib/python3.10/site-packages (from torch) (11.6.1.9)\n", "Requirement already satisfied: nvidia-cusparse-cu12==12.3.1.170 in /dscilab_dungvo/workspace/bin/envs/vlmeval/lib/python3.10/site-packages (from torch) (12.3.1.170)\n", "Requirement already satisfied: nvidia-nccl-cu12==2.21.5 in /dscilab_dungvo/workspace/bin/envs/vlmeval/lib/python3.10/site-packages (from torch) (2.21.5)\n", "Requirement already satisfied: nvidia-nvtx-cu12==12.4.127 in /dscilab_dungvo/workspace/bin/envs/vlmeval/lib/python3.10/site-packages (from torch) (12.4.127)\n", "Requirement already satisfied: nvidia-nvjitlink-cu12==12.4.127 in /dscilab_dungvo/workspace/bin/envs/vlmeval/lib/python3.10/site-packages (from torch) (12.4.127)\n", "Requirement already satisfied: triton==3.1.0 in /dscilab_dungvo/workspace/bin/envs/vlmeval/lib/python3.10/site-packages (from torch) (3.1.0)\n", "Requirement already satisfied: sympy==1.13.1 in /dscilab_dungvo/workspace/bin/envs/vlmeval/lib/python3.10/site-packages (from torch) (1.13.1)\n", "Requirement already satisfied: mpmath<1.4,>=1.1.0 in /dscilab_dungvo/workspace/bin/envs/vlmeval/lib/python3.10/site-packages (from sympy==1.13.1->torch) (1.3.0)\n", "Requirement already satisfied: numpy in /dscilab_dungvo/workspace/bin/envs/vlmeval/lib/python3.10/site-packages (from torchvision) (1.26.4)\n", "Requirement already satisfied: pillow!=8.3.*,>=5.3.0 in /dscilab_dungvo/workspace/bin/envs/vlmeval/lib/python3.10/site-packages (from torchvision) (11.0.0)\n", "Requirement already satisfied: MarkupSafe>=2.0 in /dscilab_dungvo/workspace/bin/envs/vlmeval/lib/python3.10/site-packages (from jinja2->torch) (2.1.3)\n", "\u001b[33mWARNING: Ignoring invalid distribution -ransformers (/dscilab_dungvo/workspace/bin/envs/vlmeval/lib/python3.10/site-packages)\u001b[0m\u001b[33m\n", "\u001b[0m\u001b[33mWARNING: Ignoring invalid distribution -ransformers (/dscilab_dungvo/workspace/bin/envs/vlmeval/lib/python3.10/site-packages)\u001b[0m\u001b[33m\n", "\u001b[0m\u001b[33mWARNING: Running pip as the 'root' user can result in broken permissions and conflicting behaviour with the system package manager, possibly rendering your system unusable.It is recommended to use a virtual environment instead: https://pip.pypa.io/warnings/venv. Use the --root-user-action option if you know what you are doing and want to suppress this warning.\u001b[0m\u001b[33m\n", "\u001b[0m" ] } ], "source": [ "!pip install --upgrade torch torchvision " ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [], "source": [ "\n", "def build_model_from_config(cfg):\n", " import vlmeval.api\n", " import vlmeval.vlm\n", " config = cp.deepcopy(cfg)\n", " assert 'class' in config\n", " cls_name = config.pop('class')\n", " if hasattr(vlmeval.api, cls_name):\n", " return getattr(vlmeval.api, cls_name)(**config)\n", " elif hasattr(vlmeval.vlm, cls_name):\n", " return getattr(vlmeval.vlm, cls_name)(**config)\n", " else:\n", " raise ValueError(f'Class {cls_name} is not supported in `vlmeval.api` or `vlmeval.vlm`')\n", " \n", "\n", "def build_dataset_from_config(cfg):\n", " import vlmeval.dataset\n", " config = cp.deepcopy(cfg)\n", " assert 'class' in config\n", " cls_name = config.pop('class')\n", " if hasattr(vlmeval.dataset, cls_name):\n", " return getattr(vlmeval.dataset, cls_name)(**config)\n", " else:\n", " raise ValueError(f'Class {cls_name} is not supported in `vlmeval.dataset`')" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [], "source": [ "\n", "\n", "def parse_args():\n", " help_msg = \"\"\"\\\n", "You can launch the evaluation by setting either --data and --model or --config.\n", "\n", "--data and --model:\n", " Each Arg should be a list of strings, specifying the names of datasets and models.\n", " To find all supported model names, please refer to the `vlmeval/config.py` of check the output of the command \\\n", " `vlmutil mlist all` in the terminal (you should first have vlmeval installed).\n", " To find all supported dataset names, please refer to the `vlmeval/dataset/__init__.py` file. The python script \\\n", " to print all supported dataset names is as follows:\n", " ```python\n", " from vlmeval.dataset import SUPPORTED_DATASETS\n", " print(SUPPORTED_DATASETS)\n", " ```\n", " or you can check the output of the command `vlmutil dlist all` in the terminal.\n", "\n", "--config:\n", " Launch the evaluation by specifying the path to the config json file. Sample Json Content:\n", " ```json\n", " {\n", " \"model\": {\n", " \"GPT4o_20240806_T00_HIGH\": {\n", " \"class\": \"GPT4V\",\n", " \"model\": \"gpt-4o-2024-08-06\",\n", " \"temperature\": 0,\n", " \"img_detail\": \"high\"\n", " },\n", " \"GPT4o_20240806_T10_Low\": {\n", " \"class\": \"GPT4V\",\n", " \"model\": \"gpt-4o-2024-08-06\",\n", " \"temperature\": 1.0,\n", " \"img_detail\": \"low\"\n", " }\n", " },\n", " \"data\": {\n", " \"MME-RealWorld-Lite\": {\n", " \"class\": \"MMERealWorld\",\n", " \"dataset\": \"MME-RealWorld-Lite\"\n", " },\n", " \"MMBench_DEV_EN_V11\": {\n", " \"class\": \"ImageMCQDataset\",\n", " \"dataset\": \"MMBench_DEV_EN_V11\"\n", " }\n", " }\n", " }\n", " ```\n", " Currently, only `model` and `data` are supported fields. The content of each field is a dictionary.\n", " For `model`, the key is the name of the model, and the value is a dictionary containing the following keys:\n", " - `class`: The class name of the model, which should be a class in `vlmeval.vlm` or `vlmeval.api`.\n", " - Other keys are specific to the model, please refer to the corresponding class.\n", " For `data`, the key is the name of the dataset (should be the same as the `dataset` field in most cases, \\\n", " except for video datasets), and the value is a dictionary containing the following keys:\n", " - `class`: The class name of the dataset, which should be a class in `vlmeval.dataset`.\n", " - `dataset`: The name of the dataset, which should be a string that is accepted by the `dataset` argument of the \\\n", " corresponding class.\n", " - Other keys are specific to the dataset, please refer to the corresponding class.\n", "\n", " The keys in the `model` and `data` fields will be used for naming the prediction files and evaluation results.\n", " When launching with `--config`, args for video datasets, such as `--nframe`, `--pack`, `--use-subtitle`, `--fps`, \\\n", " and args for API VLMs, such as `--retry`, `--verbose`, will be ignored.\n", "\"\"\"\n", " parser = argparse.ArgumentParser(description=help_msg, formatter_class=argparse.RawTextHelpFormatter)\n", " # Essential Args, Setting the Names of Datasets and Models\n", " parser.add_argument('--data', type=str, nargs='+', help='Names of Datasets')\n", " parser.add_argument('--model', type=str, nargs='+', help='Names of Models')\n", " parser.add_argument('--config', type=str, help='Path to the Config Json File', default=None)\n", " # Args that only apply to Video Dataset\n", " parser.add_argument('--nframe', type=int, default=8)\n", " parser.add_argument('--pack', action='store_true')\n", " parser.add_argument('--use-subtitle', action='store_true')\n", " parser.add_argument('--fps', type=float, default=-1)\n", " # Work Dir\n", " parser.add_argument('--work-dir', type=str, default='./outputs', help='select the output directory')\n", " # Infer + Eval or Infer Only\n", " parser.add_argument('--mode', type=str, default='all', choices=['all', 'infer'])\n", " # API Kwargs, Apply to API VLMs and Judge API LLMs\n", " parser.add_argument('--nproc', type=int, default=4, help='Parallel API calling')\n", " parser.add_argument('--retry', type=int, default=None, help='retry numbers for API VLMs')\n", " # Explicitly Set the Judge Model\n", " parser.add_argument('--judge', type=str, default=None)\n", " # Logging Utils\n", " parser.add_argument('--verbose', action='store_true')\n", " # Configuration for Resume\n", " # Ignore: will not rerun failed VLM inference\n", " parser.add_argument('--ignore', action='store_true', help='Ignore failed indices. ')\n", " # Reuse: will reuse the existing prediction files\n", " parser.add_argument('--reuse', action='store_true')\n", "\n", " args = parser.parse_args()\n", " return args" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "ename": "NameError", "evalue": "name 'parse_args' is not defined", "output_type": "error", "traceback": [ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)", "Cell \u001b[0;32mIn[2], line 27\u001b[0m\n\u001b[1;32m 22\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m args\n\u001b[1;32m 23\u001b[0m \u001b[38;5;66;03m# command2args('notebook --arg1 10 --arg2 hello', parser)\u001b[39;00m\n\u001b[1;32m 24\u001b[0m \n\u001b[1;32m 25\u001b[0m \u001b[38;5;66;03m# python run.py --data MMBench_DEV_EN MME SEEDBench_IMG --model idefics_80b_instruct --verbose\u001b[39;00m\n\u001b[1;32m 26\u001b[0m \u001b[38;5;66;03m# args = command2args('python run.py --data MMBench_DEV_EN MME SEEDBench_IMG --model SmolVLM --verbose')\u001b[39;00m\n\u001b[0;32m---> 27\u001b[0m args \u001b[38;5;241m=\u001b[39m \u001b[43mcommand2args\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mpython run.py --data COCO_VAL --model SmolVLM --verbose\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m)\u001b[49m\n", "Cell \u001b[0;32mIn[2], line 21\u001b[0m, in \u001b[0;36mcommand2args\u001b[0;34m(command)\u001b[0m\n\u001b[1;32m 14\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mcommand2args\u001b[39m(command):\n\u001b[1;32m 15\u001b[0m \n\u001b[1;32m 16\u001b[0m \u001b[38;5;66;03m# remove file name\u001b[39;00m\n\u001b[1;32m 19\u001b[0m sys\u001b[38;5;241m.\u001b[39margv \u001b[38;5;241m=\u001b[39m command\u001b[38;5;241m.\u001b[39msplit()[\u001b[38;5;241m1\u001b[39m:]\n\u001b[0;32m---> 21\u001b[0m args \u001b[38;5;241m=\u001b[39m \u001b[43mparse_args\u001b[49m()\n\u001b[1;32m 22\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m args\n", "\u001b[0;31mNameError\u001b[0m: name 'parse_args' is not defined" ] } ], "source": [ "import sys\n", "from argparse import ArgumentParser\n", "\n", "# parser = ArgumentParser(description=\"Example argparse in Jupyter Notebook\")\n", "# parser.add_argument('--arg1', type=int, help='An integer argument')\n", "# parser.add_argument('--arg2', type=str, help='A string argument')\n", "# sys.argv = ['notebook', '--arg1', '10', '--arg2', 'hello']\n", "# args = parser.parse_args()\n", "# print(f\"arg1: {args.arg1}, arg2: {args.arg2}\")\n", "\n", "\n", "# notebook --arg1 10 --arg2 hello\n", "\n", "def command2args(command):\n", " \n", " # remove file name\n", " \n", " \n", " sys.argv = command.split()[1:]\n", "\n", " args = parse_args()\n", " return args\n", "# command2args('notebook --arg1 10 --arg2 hello', parser)\n", "\n", "# python run.py --data MMBench_DEV_EN MME SEEDBench_IMG --model idefics_80b_instruct --verbose\n", "# args = command2args('python run.py --data MMBench_DEV_EN MME SEEDBench_IMG --model SmolVLM --verbose')\n", "args = command2args('python run.py --data COCO_VAL --model SmolVLM --verbose')\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": 24, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "[2024-12-13 07:39:07,083] WARNING - RUN - 245429849.py: - 14: --reuse is not set, will not reuse previous (before one day) temporary files\n", "[2024-12-13 07:39:07] WARNING - 245429849.py: - 14: --reuse is not set, will not reuse previous (before one day) temporary files\n" ] } ], "source": [ "logger = get_logger('RUN')\n", "rank, world_size = get_rank_and_world_size()\n", "if args.config is not None:\n", " assert args.data is None and args.model is None, '--data and --model should not be set when using --config'\n", " use_config, cfg = True, load(args.config)\n", " args.model = list(cfg['model'].keys())\n", " args.data = list(cfg['data'].keys())\n", "else:\n", " assert len(args.data), '--data should be a list of data files'\n", " \n", " \n", "if rank == 0:\n", " if not args.reuse:\n", " logger.warning('--reuse is not set, will not reuse previous (before one day) temporary files')\n", " else:\n", " logger.warning('--reuse is set, will reuse the latest prediction & temporary pickle files')\n", "\n", "if 'MMEVAL_ROOT' in os.environ:\n", " args.work_dir = os.environ['MMEVAL_ROOT']\n", "\n", "use_config, cfg = False, None\n", "if not use_config:\n", " for k, v in supported_VLM.items():\n", " if hasattr(v, 'keywords') and 'retry' in v.keywords and args.retry is not None:\n", " v.keywords['retry'] = args.retry\n", " supported_VLM[k] = v\n", " if hasattr(v, 'keywords') and 'verbose' in v.keywords and args.verbose is not None:\n", " v.keywords['verbose'] = args.verbose\n", " supported_VLM[k] = v\n" ] }, { "cell_type": "code", "execution_count": 25, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "['COCO_VAL']" ] }, "execution_count": 25, "metadata": {}, "output_type": "execute_result" } ], "source": [ "args.data" ] }, { "cell_type": "code", "execution_count": 26, "metadata": {}, "outputs": [], "source": [ "dataset_name = args.data[0]" ] }, { "cell_type": "code", "execution_count": 27, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "/dscilab_dungvo/workspace/VLMEvalKit/vlmeval/dataset/image_base.py:93: UserWarning: The dataset tsv is not downloaded\n", " warnings.warn('The dataset tsv is not downloaded')\n", "COCO_VAL.tsv: 345MB [00:48, 7.09MB/s] \n" ] } ], "source": [ "list_datasets = []\n", "dataset_kwargs = {}\n", "if dataset_name in ['MMLongBench_DOC', 'DUDE', 'DUDE_MINI', 'SLIDEVQA', 'SLIDEVQA_MINI']:\n", " dataset_kwargs['model'] = model_name\n", "if dataset_name == 'MMBench-Video':\n", " dataset_kwargs['pack'] = args.pack\n", "if dataset_name == 'Video-MME':\n", " dataset_kwargs['use_subtitle'] = args.use_subtitle\n", "for args_data in args.data:\n", " list_datasets.append(build_dataset(args_data, **dataset_kwargs))\n" ] } ], "metadata": { "kernelspec": { "display_name": "vlmeval", "language": "python", "name": "vlmeval" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.10.16" } }, "nbformat": 4, "nbformat_minor": 2 }