In [1]:
import torch
import torch.distributed as dist

from vlmeval.config import supported_VLM
from vlmeval.dataset import build_dataset
from vlmeval.inference import infer_data_job
from vlmeval.inference_video import infer_data_job_video
from vlmeval.inference_mt import infer_data_job_mt
from vlmeval.smp import *
from vlmeval.utils.result_transfer import MMMU_result_transfer, MMTBench_result_transfer

 from .autonotebook import tqdm as notebook_tqdm


AttributeError: module 'torch' has no attribute 'set_grad_enabled'

In [4]:
!pip uninstall -y torch torchvision

Found existing installation: torch 2.5.1
Uninstalling torch-2.5.1:
 Successfully uninstalled torch-2.5.1
Found existing installation: torchvision 0.20.1
Uninstalling torchvision-0.20.1:
 Successfully uninstalled torchvision-0.20.1
[0m

In [2]:
import torchvision.transforms as transforms

RuntimeError: operator torchvision::nms does not exist

In [1]:
!pip install --upgrade torch torchvision 

[0m

In [10]:

def build_model_from_config(cfg):
 import vlmeval.api
 import vlmeval.vlm
 config = cp.deepcopy(cfg)
 assert 'class' in config
 cls_name = config.pop('class')
 if hasattr(vlmeval.api, cls_name):
 return getattr(vlmeval.api, cls_name)(**config)
 elif hasattr(vlmeval.vlm, cls_name):
 return getattr(vlmeval.vlm, cls_name)(**config)
 else:
 raise ValueError(f'Class {cls_name} is not supported in `vlmeval.api` or `vlmeval.vlm`')
 

def build_dataset_from_config(cfg):
 import vlmeval.dataset
 config = cp.deepcopy(cfg)
 assert 'class' in config
 cls_name = config.pop('class')
 if hasattr(vlmeval.dataset, cls_name):
 return getattr(vlmeval.dataset, cls_name)(**config)
 else:
 raise ValueError(f'Class {cls_name} is not supported in `vlmeval.dataset`')

In [11]:


def parse_args():
 help_msg = """\
You can launch the evaluation by setting either --data and --model or --config.

--data and --model:
 Each Arg should be a list of strings, specifying the names of datasets and models.
 To find all supported model names, please refer to the `vlmeval/config.py` of check the output of the command \
 `vlmutil mlist all` in the terminal (you should first have vlmeval installed).
 To find all supported dataset names, please refer to the `vlmeval/dataset/__init__.py` file. The python script \
 to print all supported dataset names is as follows:
 ```python
 from vlmeval.dataset import SUPPORTED_DATASETS
 print(SUPPORTED_DATASETS)
 ```
 or you can check the output of the command `vlmutil dlist all` in the terminal.

--config:
 Launch the evaluation by specifying the path to the config json file. Sample Json Content:
 ```json
 {
 "model": {
 "GPT4o_20240806_T00_HIGH": {
 "class": "GPT4V",
 "model": "gpt-4o-2024-08-06",
 "temperature": 0,
 "img_detail": "high"
 },
 "GPT4o_20240806_T10_Low": {
 "class": "GPT4V",
 "model": "gpt-4o-2024-08-06",
 "temperature": 1.0,
 "img_detail": "low"
 }
 },
 "data": {
 "MME-RealWorld-Lite": {
 "class": "MMERealWorld",
 "dataset": "MME-RealWorld-Lite"
 },
 "MMBench_DEV_EN_V11": {
 "class": "ImageMCQDataset",
 "dataset": "MMBench_DEV_EN_V11"
 }
 }
 }
 ```
 Currently, only `model` and `data` are supported fields. The content of each field is a dictionary.
 For `model`, the key is the name of the model, and the value is a dictionary containing the following keys:
 - `class`: The class name of the model, which should be a class in `vlmeval.vlm` or `vlmeval.api`.
 - Other keys are specific to the model, please refer to the corresponding class.
 For `data`, the key is the name of the dataset (should be the same as the `dataset` field in most cases, \
 except for video datasets), and the value is a dictionary containing the following keys:
 - `class`: The class name of the dataset, which should be a class in `vlmeval.dataset`.
 - `dataset`: The name of the dataset, which should be a string that is accepted by the `dataset` argument of the \
 corresponding class.
 - Other keys are specific to the dataset, please refer to the corresponding class.

 The keys in the `model` and `data` fields will be used for naming the prediction files and evaluation results.
 When launching with `--config`, args for video datasets, such as `--nframe`, `--pack`, `--use-subtitle`, `--fps`, \
 and args for API VLMs, such as `--retry`, `--verbose`, will be ignored.
"""
 parser = argparse.ArgumentParser(description=help_msg, formatter_class=argparse.RawTextHelpFormatter)
 # Essential Args, Setting the Names of Datasets and Models
 parser.add_argument('--data', type=str, nargs='+', help='Names of Datasets')
 parser.add_argument('--model', type=str, nargs='+', help='Names of Models')
 parser.add_argument('--config', type=str, help='Path to the Config Json File', default=None)
 # Args that only apply to Video Dataset
 parser.add_argument('--nframe', type=int, default=8)
 parser.add_argument('--pack', action='store_true')
 parser.add_argument('--use-subtitle', action='store_true')
 parser.add_argument('--fps', type=float, default=-1)
 # Work Dir
 parser.add_argument('--work-dir', type=str, default='./outputs', help='select the output directory')
 # Infer + Eval or Infer Only
 parser.add_argument('--mode', type=str, default='all', choices=['all', 'infer'])
 # API Kwargs, Apply to API VLMs and Judge API LLMs
 parser.add_argument('--nproc', type=int, default=4, help='Parallel API calling')
 parser.add_argument('--retry', type=int, default=None, help='retry numbers for API VLMs')
 # Explicitly Set the Judge Model
 parser.add_argument('--judge', type=str, default=None)
 # Logging Utils
 parser.add_argument('--verbose', action='store_true')
 # Configuration for Resume
 # Ignore: will not rerun failed VLM inference
 parser.add_argument('--ignore', action='store_true', help='Ignore failed indices. ')
 # Reuse: will reuse the existing prediction files
 parser.add_argument('--reuse', action='store_true')

 args = parser.parse_args()
 return args

In [2]:
import sys
from argparse import ArgumentParser

# parser = ArgumentParser(description="Example argparse in Jupyter Notebook")
# parser.add_argument('--arg1', type=int, help='An integer argument')
# parser.add_argument('--arg2', type=str, help='A string argument')
# sys.argv = ['notebook', '--arg1', '10', '--arg2', 'hello']
# args = parser.parse_args()
# print(f"arg1: {args.arg1}, arg2: {args.arg2}")


# notebook --arg1 10 --arg2 hello

def command2args(command):
 
 # remove file name
 
 
 sys.argv = command.split()[1:]

 args = parse_args()
 return args
# command2args('notebook --arg1 10 --arg2 hello', parser)

# python run.py --data MMBench_DEV_EN MME SEEDBench_IMG --model idefics_80b_instruct --verbose
# args = command2args('python run.py --data MMBench_DEV_EN MME SEEDBench_IMG --model SmolVLM --verbose')
args = command2args('python run.py --data COCO_VAL --model SmolVLM --verbose')


NameError: name 'parse_args' is not defined

In [24]:
logger = get_logger('RUN')
rank, world_size = get_rank_and_world_size()
if args.config is not None:
 assert args.data is None and args.model is None, '--data and --model should not be set when using --config'
 use_config, cfg = True, load(args.config)
 args.model = list(cfg['model'].keys())
 args.data = list(cfg['data'].keys())
else:
 assert len(args.data), '--data should be a list of data files'
 
 
if rank == 0:
 if not args.reuse:
 logger.warning('--reuse is not set, will not reuse previous (before one day) temporary files')
 else:
 logger.warning('--reuse is set, will reuse the latest prediction & temporary pickle files')

if 'MMEVAL_ROOT' in os.environ:
 args.work_dir = os.environ['MMEVAL_ROOT']

use_config, cfg = False, None
if not use_config:
 for k, v in supported_VLM.items():
 if hasattr(v, 'keywords') and 'retry' in v.keywords and args.retry is not None:
 v.keywords['retry'] = args.retry
 supported_VLM[k] = v
 if hasattr(v, 'keywords') and 'verbose' in v.keywords and args.verbose is not None:
 v.keywords['verbose'] = args.verbose
 supported_VLM[k] = v




In [25]:
args.data

['COCO_VAL']

In [26]:
dataset_name = args.data[0]

In [27]:
list_datasets = []
dataset_kwargs = {}
if dataset_name in ['MMLongBench_DOC', 'DUDE', 'DUDE_MINI', 'SLIDEVQA', 'SLIDEVQA_MINI']:
 dataset_kwargs['model'] = model_name
if dataset_name == 'MMBench-Video':
 dataset_kwargs['pack'] = args.pack
if dataset_name == 'Video-MME':
 dataset_kwargs['use_subtitle'] = args.use_subtitle
for args_data in args.data:
 list_datasets.append(build_dataset(args_data, **dataset_kwargs))


COCO_VAL.tsv: 345MB [00:48, 7.09MB/s] 
