Upload folder using huggingface_hub

Browse files

Files changed (13) hide show

__init__.py +9 -9
__pycache__/base_model.cpython-310.pyc +0 -0
__pycache__/interfaces.cpython-310.pyc +0 -0
__pycache__/mini_gpt4_llama_v2.cpython-310.pyc +0 -0
__pycache__/modeling_llama_v2.cpython-310.pyc +0 -0
__pycache__/modeling_mistral.cpython-310.pyc +0 -0
__pycache__/utils.cpython-310.pyc +0 -0
base_model.py +2 -2
blip2.py +7 -7
interfaces.py +190 -0
logger.py +1 -1
mini_gpt4_llama_v2.py +16 -10
utils.py +180 -1

__init__.py CHANGED Viewed

@@ -9,15 +9,15 @@ import logging
 import torch
 from omegaconf import OmegaConf
-from minigpt4_video.registry import registry
-from minigpt4_video.base_model import BaseModel
-from minigpt4_video.base_processor import BaseProcessor
-from minigpt4_video.blip_processors import *
-from minigpt4_video.blip2 import Blip2Base
-from minigpt4_video.clip_vision_encoder import *
-from minigpt4_video.config import *
-from minigpt4_video.eva_vit import *
-from minigpt4_video.mini_gpt4_llama_v2 import MiniGPT4_Video

 import torch
 from omegaconf import OmegaConf
+from .registry import registry
+from .base_model import BaseModel
+from .base_processor import BaseProcessor
+from .blip_processors import *
+from .blip2 import Blip2Base
+from .clip_vision_encoder import *
+from .config import *
+from .eva_vit import *
+from .mini_gpt4_llama_v2 import MiniGPT4_Video

__pycache__/base_model.cpython-310.pyc CHANGED Viewed

Binary files a/__pycache__/base_model.cpython-310.pyc and b/__pycache__/base_model.cpython-310.pyc differ

__pycache__/interfaces.cpython-310.pyc ADDED Viewed

Binary file (5.29 kB). View file

__pycache__/mini_gpt4_llama_v2.cpython-310.pyc CHANGED Viewed

Binary files a/__pycache__/mini_gpt4_llama_v2.cpython-310.pyc and b/__pycache__/mini_gpt4_llama_v2.cpython-310.pyc differ

__pycache__/modeling_llama_v2.cpython-310.pyc CHANGED Viewed

Binary files a/__pycache__/modeling_llama_v2.cpython-310.pyc and b/__pycache__/modeling_llama_v2.cpython-310.pyc differ

__pycache__/modeling_mistral.cpython-310.pyc ADDED Viewed

Binary file (39.2 kB). View file

__pycache__/utils.cpython-310.pyc CHANGED Viewed

Binary files a/__pycache__/utils.cpython-310.pyc and b/__pycache__/utils.cpython-310.pyc differ

base_model.py CHANGED Viewed

@@ -11,8 +11,8 @@ import os
 import numpy as np
 import torch
 import torch.nn as nn
-from minigpt4_video.dist_utils import download_cached_file, is_dist_avail_and_initialized
-from minigpt4_video.utils import get_abs_path, is_url
 from omegaconf import OmegaConf
 from huggingface_hub import PyTorchModelHubMixin

 import numpy as np
 import torch
 import torch.nn as nn
+from .dist_utils import download_cached_file, is_dist_avail_and_initialized
+from .utils import get_abs_path, is_url
 from omegaconf import OmegaConf
 from huggingface_hub import PyTorchModelHubMixin

blip2.py CHANGED Viewed

@@ -15,13 +15,13 @@ import torch.nn as nn
 import torch.distributed as dist
 import torch.nn.functional as F
-from minigpt4_video import dist_utils as dist_utils
-from minigpt4_video.dist_utils import download_cached_file
-from minigpt4_video.utils import is_url
-from minigpt4_video.logger import MetricLogger
-from minigpt4_video.base_model import BaseModel
-from minigpt4_video.Qformer import BertConfig, BertLMHeadModel
-from minigpt4_video.eva_vit import create_eva_vit_g
 from transformers import BertTokenizer

 import torch.distributed as dist
 import torch.nn.functional as F
+import dist_utils as dist_utils
+from .dist_utils import download_cached_file
+from .utils import is_url
+from .logger import MetricLogger
+from .base_model import BaseModel
+from .Qformer import BertConfig, BertLMHeadModel
+from .eva_vit import create_eva_vit_g
 from transformers import BertTokenizer

interfaces.py ADDED Viewed

	@@ -0,0 +1,190 @@

+from typing import (ClassVar, Dict, List, Literal, Optional, Protocol, Type,
+                    Union, overload, runtime_checkable)
+from typing_extensions import TypeGuard
+from vllm.config import LoRAConfig, MultiModalConfig, SchedulerConfig
+from vllm.logger import init_logger
+logger = init_logger(__name__)
+@runtime_checkable
+class SupportsVision(Protocol):
+    """The interface required for all vision language models (VLMs)."""
+    supports_vision: ClassVar[Literal[True]] = True
+    """
+    A flag that indicates this model supports vision inputs.
+    Note:
+        There is no need to redefine this flag if this class is in the
+        MRO of your model class.
+    """
+    def __init__(self, *, multimodal_config: MultiModalConfig) -> None:
+        ...
+# We can't use runtime_checkable with ClassVar for issubclass checks
+# so we need to treat the class as an instance and use isinstance instead
+@runtime_checkable
+class _SupportsVisionType(Protocol):
+    supports_vision: Literal[True]
+    def __call__(self, *, multimodal_config: MultiModalConfig) -> None:
+        ...
+@overload
+def supports_vision(model: Type[object]) -> TypeGuard[Type[SupportsVision]]:
+    ...
+@overload
+def supports_vision(model: object) -> TypeGuard[SupportsVision]:
+    ...
+def supports_vision(
+    model: Union[Type[object], object],
+) -> Union[TypeGuard[Type[SupportsVision]], TypeGuard[SupportsVision]]:
+    if isinstance(model, type):
+        return isinstance(model, _SupportsVisionType)
+    return isinstance(model, SupportsVision)
+@runtime_checkable
+class SupportsLoRA(Protocol):
+    """The interface required for all models that support LoRA."""
+    supports_lora: ClassVar[Literal[True]] = True
+    """
+    A flag that indicates this model supports LoRA.
+    Note:
+        There is no need to redefine this flag if this class is in the
+        MRO of your model class.
+    """
+    packed_modules_mapping: ClassVar[Dict[str, List[str]]]
+    supported_lora_modules: ClassVar[List[str]]
+    embedding_modules: ClassVar[Dict[str, str]]
+    embedding_padding_modules: ClassVar[List[str]]
+    # lora_config is None when LoRA is not enabled
+    def __init__(self, *, lora_config: Optional[LoRAConfig] = None) -> None:
+        ...
+# We can't use runtime_checkable with ClassVar for issubclass checks
+# so we need to treat the class as an instance and use isinstance instead
+@runtime_checkable
+class _SupportsLoRAType(Protocol):
+    supports_lora: Literal[True]
+    packed_modules_mapping: Dict[str, List[str]]
+    supported_lora_modules: List[str]
+    embedding_modules: Dict[str, str]
+    embedding_padding_modules: List[str]
+    def __call__(self, *, lora_config: Optional[LoRAConfig] = None) -> None:
+        ...
+@overload
+def supports_lora(model: Type[object]) -> TypeGuard[Type[SupportsLoRA]]:
+    ...
+@overload
+def supports_lora(model: object) -> TypeGuard[SupportsLoRA]:
+    ...
+def supports_lora(
+    model: Union[Type[object], object],
+) -> Union[TypeGuard[Type[SupportsLoRA]], TypeGuard[SupportsLoRA]]:
+    result = _supports_lora(model)
+    if not result:
+        lora_attrs = (
+            "packed_modules_mapping",
+            "supported_lora_modules",
+            "embedding_modules",
+            "embedding_padding_modules",
+        )
+        missing_attrs = tuple(attr for attr in lora_attrs
+                              if not hasattr(model, attr))
+        if getattr(model, "supports_lora", False):
+            if missing_attrs:
+                logger.warning(
+                    "The model (%s) sets `supports_lora=True`, "
+                    "but is missing LoRA-specific attributes: %s",
+                    model,
+                    missing_attrs,
+                )
+        else:
+            if not missing_attrs:
+                logger.warning(
+                    "The model (%s) contains all LoRA-specific attributes, "
+                    "but does not set `supports_lora=True`.", model)
+    return result
+def _supports_lora(
+    model: Union[Type[object], object],
+) -> Union[TypeGuard[Type[SupportsLoRA]], TypeGuard[SupportsLoRA]]:
+    if isinstance(model, type):
+        return isinstance(model, _SupportsLoRAType)
+    return isinstance(model, SupportsLoRA)
+@runtime_checkable
+class HasInnerState(Protocol):
+    """The interface required for all models that has inner state."""
+    has_inner_state: ClassVar[Literal[True]] = True
+    """
+        A flag that indicates this model has inner state.
+        Models that has inner state usually need access to the scheduler_config
+        for max_num_seqs ,etc... (Currently only used by Jamba)
+    """
+    def __init__(self,
+                 *,
+                 scheduler_config: Optional[SchedulerConfig] = None) -> None:
+        ...
+@runtime_checkable
+class _HasInnerStateType(Protocol):
+    has_inner_state: ClassVar[Literal[True]]
+    def __init__(self,
+                 *,
+                 scheduler_config: Optional[SchedulerConfig] = None) -> None:
+        ...
+@overload
+def has_inner_state(model: object) -> TypeGuard[HasInnerState]:
+    ...
+@overload
+def has_inner_state(model: Type[object]) -> TypeGuard[Type[HasInnerState]]:
+    ...
+def has_inner_state(
+    model: Union[Type[object], object]
+) -> Union[TypeGuard[Type[HasInnerState]], TypeGuard[HasInnerState]]:
+    if isinstance(model, type):
+        return isinstance(model, _HasInnerStateType)
+    return isinstance(model, HasInnerState)

logger.py CHANGED Viewed

@@ -13,7 +13,7 @@ from collections import defaultdict, deque
 import torch
 import torch.distributed as dist
-from  minigpt4_video import dist_utils
 class SmoothedValue(object):

 import torch
 import torch.distributed as dist
+import dist_utils
 class SmoothedValue(object):

mini_gpt4_llama_v2.py CHANGED Viewed

@@ -16,9 +16,9 @@ import torch
 from torch.cuda.amp import autocast as autocast
 import torch.nn as nn
-from minigpt4_video.registry import registry
-from minigpt4_video.blip2 import Blip2Base, disabled_train
-from minigpt4_video.conversation import Conversation, SeparatorStyle, StoppingCriteriaList, StoppingCriteriaSub
 from transformers import LlamaTokenizer
 from transformers import BitsAndBytesConfig
 from transformers import AutoConfig, AutoTokenizer
@@ -34,7 +34,7 @@ import numpy as np
 import os
 from transformers import PretrainedConfig
 from transformers import PreTrainedModel
-from minigpt4_video.conversation import CONV_VISION
 import cv2
 def extract_audio(video_path, audio_path):
     video_clip = mp.VideoFileClip(video_path)
@@ -89,8 +89,10 @@ class MiniGPT4_Video(Blip2Base, PreTrainedModel):
     ):
         ## loop through the config minigpt4_video_config object and set the attributes
         # if isinstance(cfg, minigpt4_video_config):
-        cfg = cfg.to_dict()
         for key, value in cfg.items():
             try:
                 setattr(self, key, value)
@@ -216,8 +218,12 @@ class MiniGPT4_Video(Blip2Base, PreTrainedModel):
         else :
             # calculate the total number of frames in the video using opencv
             total_num_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
-        max_images_length = 45
-        max_sub_len = 400
         images = []
         frame_count = 0
         sampling_interval = int(total_num_frames / max_images_length)
@@ -839,11 +845,11 @@ class MiniGPT4_Video(Blip2Base, PreTrainedModel):
             msg = model.load_state_dict(ckpt['model'], strict=False)
         # push the model to the hub with its metadata and config file
         model.to('cuda')
-        model.push_to_hub("Vision-CAIR/MiniGPT4-video-hf")
         video_config = minigpt4_video_config(cfg)
         # video_config.save_pretrained("minigpt4_video_config")
         # print("Save Minigpt-4-LLM Config: minigpt4_video_config")
-        video_config.push_to_hub("MiniGPT4-video")
         return model

 from torch.cuda.amp import autocast as autocast
 import torch.nn as nn
+from .registry import registry
+from .blip2 import Blip2Base, disabled_train
+from .conversation import Conversation, SeparatorStyle, StoppingCriteriaList, StoppingCriteriaSub
 from transformers import LlamaTokenizer
 from transformers import BitsAndBytesConfig
 from transformers import AutoConfig, AutoTokenizer
 import os
 from transformers import PretrainedConfig
 from transformers import PreTrainedModel
+from .conversation import CONV_VISION
 import cv2
 def extract_audio(video_path, audio_path):
     video_clip = mp.VideoFileClip(video_path)
     ):
         ## loop through the config minigpt4_video_config object and set the attributes
         # if isinstance(cfg, minigpt4_video_config):
+        try:
+            cfg = cfg.to_dict()
+        except:
+            pass
         for key, value in cfg.items():
             try:
                 setattr(self, key, value)
         else :
             # calculate the total number of frames in the video using opencv
             total_num_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
+        if self.model_type == "Mistral":
+            max_images_length = 90
+            max_sub_len = 800
+        else:
+            max_images_length = 45
+            max_sub_len = 400
         images = []
         frame_count = 0
         sampling_interval = int(total_num_frames / max_images_length)
             msg = model.load_state_dict(ckpt['model'], strict=False)
         # push the model to the hub with its metadata and config file
         model.to('cuda')
+        # model.push_to_hub("Vision-CAIR/MiniGPT4-video-mistral-hf")
         video_config = minigpt4_video_config(cfg)
         # video_config.save_pretrained("minigpt4_video_config")
         # print("Save Minigpt-4-LLM Config: minigpt4_video_config")
+        # video_config.push_to_hub("Vision-CAIR/MiniGPT4-video-mistral-hf")
         return model

utils.py CHANGED Viewed

@@ -23,7 +23,7 @@ import pandas as pd
 import yaml
 from iopath.common.download import download
 from iopath.common.file_io import file_lock, g_pathmgr
-from minigpt4_video.registry import registry
 from torch.utils.model_zoo import tqdm
 from torchvision.datasets.utils import (
     check_integrity,
@@ -422,3 +422,182 @@ def get_file_size(filename):
     """
     size_in_mb = os.path.getsize(filename) / float(1024**2)
     return size_in_mb

 import yaml
 from iopath.common.download import download
 from iopath.common.file_io import file_lock, g_pathmgr
+from .registry import registry
 from torch.utils.model_zoo import tqdm
 from torchvision.datasets.utils import (
     check_integrity,
     """
     size_in_mb = os.path.getsize(filename) / float(1024**2)
     return size_in_mb
+from typing import Dict, List, Protocol, Tuple
+import torch
+from torch.func import functional_call
+from vllm.multimodal import BatchedTensors
+from vllm.utils import is_pin_memory_available
+def merge_vision_embeddings(input_ids: torch.Tensor,
+                            inputs_embeds: torch.Tensor,
+                            vision_embeddings: BatchedTensors,
+                            image_token_id: int) -> torch.Tensor:
+    """
+    Merge `vision_embeddings` into `inputs_embeds` by overwriting the positions
+    in `inputs_embeds` corresponding to placeholder image tokens in `input_ids`.
+    Note:
+        This updates `inputs_embeds` in place.
+    """
+    mask = (input_ids == image_token_id)
+    num_expected_tokens = mask.sum()
+    if isinstance(vision_embeddings, torch.Tensor):
+        batch_size, batch_tokens, *_, embed_dim = vision_embeddings.shape
+        total_tokens = batch_size * batch_tokens
+        if num_expected_tokens != total_tokens:
+            expr = f"{batch_size} x {batch_tokens}"
+            raise ValueError(
+                f"Attempted to assign {expr} = {total_tokens} "
+                f"image tokens to {num_expected_tokens} placeholders")
+        inputs_embeds[mask] = vision_embeddings.view(total_tokens, embed_dim)
+    else:
+        size_per_batch = [t.shape[0] for t in vision_embeddings]
+        total_tokens = sum(size_per_batch)
+        if num_expected_tokens != total_tokens:
+            expr = ' + '.join(map(str, size_per_batch))
+            raise ValueError(
+                f"Attempted to assign {expr} = {total_tokens} "
+                f"image tokens to {num_expected_tokens} placeholders")
+        inputs_embeds[mask] = torch.cat(vision_embeddings)
+    return inputs_embeds
+class LayerFn(Protocol):
+    def __call__(
+        self,
+        prefix="",
+    ) -> torch.nn.Module:
+        ...
+class PPMissingLayer(torch.nn.Identity):
+    """
+    A placeholder layer for missing layers in a pipeline parallel model.
+    """
+    def __init__(self, *args, **kwargs):
+        super().__init__()
+_CPU_OFFLOAD_BYTES = 0
+_CPU_OFFLOAD_MAX_BYTES = 0
+def set_cpu_offload_max_bytes(max_bytes: int) -> None:
+    global _CPU_OFFLOAD_MAX_BYTES, _CPU_OFFLOAD_BYTES
+    _CPU_OFFLOAD_BYTES = 0
+    _CPU_OFFLOAD_MAX_BYTES = max_bytes
+def maybe_offload_to_cpu(module: torch.nn.Module) -> torch.nn.Module:
+    device = next(module.parameters()).device
+    if device == torch.device("cpu"):
+        return module
+    global _CPU_OFFLOAD_MAX_BYTES, _CPU_OFFLOAD_BYTES
+    if _CPU_OFFLOAD_BYTES >= _CPU_OFFLOAD_MAX_BYTES:
+        return module
+    pin_memory = is_pin_memory_available()
+    # offload parameters to CPU
+    # use pin_memory if possible, which helps cudagraph capture speed
+    for p in module.parameters():
+        if _CPU_OFFLOAD_BYTES >= _CPU_OFFLOAD_MAX_BYTES:
+            # we use per-parameter offloading
+            # one module might have some parameters offloaded and some not
+            break
+        # `torch.empty_like` does not support `pin_memory` argument
+        cpu_data = torch.empty(size=p.data.size(),
+                               dtype=p.data.dtype,
+                               layout=p.data.layout,
+                               device='cpu',
+                               pin_memory=pin_memory)
+        cpu_data.copy_(p.data)
+        p.data = cpu_data
+        _CPU_OFFLOAD_BYTES += p.data.numel() * p.data.element_size()
+    state_dict: Dict[str, torch.Tensor] = module.state_dict()
+    original_forward = module.forward
+    def forward(*args, **kwargs):
+        module.forward = original_forward
+        device_state = {
+            # here we blindly call `to(device)`
+            # if the parameter is already on the device, it will be a no-op
+            k: v.to(device, non_blocking=True)
+            for k, v in state_dict.items()
+        }
+        output = functional_call(module,
+                                 device_state,
+                                 args=args,
+                                 kwargs=kwargs)
+        module.forward = forward
+        return output
+    module.forward = forward
+    return module
+def make_layers(
+    num_hidden_layers: int,
+    layer_fn: LayerFn,
+    prefix: str,
+) -> Tuple[int, int, torch.nn.ModuleList]:
+    """Make a list of layers with the given layer function, taking
+    pipeline parallelism into account.
+    """
+    from vllm.distributed.parallel_state import get_pp_group
+    from vllm.distributed.utils import get_pp_indices
+    start_layer, end_layer = get_pp_indices(num_hidden_layers,
+                                            get_pp_group().rank_in_group,
+                                            get_pp_group().world_size)
+    modules = torch.nn.ModuleList(
+        [PPMissingLayer() for _ in range(start_layer)] + [
+            maybe_offload_to_cpu(layer_fn(prefix=f"{prefix}.{idx}"))
+            for idx in range(start_layer, end_layer)
+        ] + [PPMissingLayer() for _ in range(end_layer, num_hidden_layers)])
+    return start_layer, end_layer, modules
+# NOTE: don't use lru_cache here because it can prevent garbage collection
+_model_to_pp_missing_layer_names: Dict[int, List[str]] = {}
+def get_pp_missing_layer_names(model: torch.nn.Module) -> List[str]:
+    """Get the names of the missing layers in a pipeline parallel model."""
+    model_id = id(model)
+    if model_id in _model_to_pp_missing_layer_names:
+        return _model_to_pp_missing_layer_names[model_id]
+    missing_layer_names = []
+    for name, module in model.named_modules():
+        if isinstance(module, PPMissingLayer):
+            # NOTE: the trailing dot is used to match the prefix of the layer.
+            # without the dot, we could match a layer that is not missing,
+            # e.g., 'encoder.layer.1' would match 'encoder.layer.11'
+            missing_layer_names.append(name + '.')
+    _model_to_pp_missing_layer_names[model_id] = missing_layer_names
+    return missing_layer_names
+def is_pp_missing_parameter(name: str, model: torch.nn.Module) -> bool:
+    """Check if a parameter is missing in a pipeline parallel model."""
+    for missing_layer_name in get_pp_missing_layer_names(model):
+        if name.startswith(missing_layer_name):
+            return True
+    return False