Upload folder using huggingface_hub
Browse files- modeling_llama_v2.py +0 -1
- registry.py +16 -19
- utils.py +0 -133
modeling_llama_v2.py
CHANGED
|
@@ -9,7 +9,6 @@ from transformers.utils import add_start_docstrings_to_model_forward, replace_re
|
|
| 9 |
from transformers.modeling_outputs import CausalLMOutputWithPast
|
| 10 |
from transformers.models.llama.modeling_llama import LLAMA_INPUTS_DOCSTRING, _CONFIG_FOR_DOC
|
| 11 |
from transformers.models.llama.modeling_llama import LlamaForCausalLM as LlamaForCausalLMOrig
|
| 12 |
-
# from minigpt4_video.models.transformers.src.transformers.models.llama.modeling_llama import LlamaForCausalLM as LlamaForCausalLMOrig
|
| 13 |
|
| 14 |
class LlamaForCausalLM(LlamaForCausalLMOrig):
|
| 15 |
|
|
|
|
| 9 |
from transformers.modeling_outputs import CausalLMOutputWithPast
|
| 10 |
from transformers.models.llama.modeling_llama import LLAMA_INPUTS_DOCSTRING, _CONFIG_FOR_DOC
|
| 11 |
from transformers.models.llama.modeling_llama import LlamaForCausalLM as LlamaForCausalLMOrig
|
|
|
|
| 12 |
|
| 13 |
class LlamaForCausalLM(LlamaForCausalLMOrig):
|
| 14 |
|
registry.py
CHANGED
|
@@ -26,24 +26,21 @@ class Registry:
|
|
| 26 |
Args:
|
| 27 |
name: Key with which the task will be registered.
|
| 28 |
|
| 29 |
-
Usage:
|
| 30 |
-
|
| 31 |
-
from minigpt4.common.registry import registry
|
| 32 |
"""
|
| 33 |
|
| 34 |
def wrap(model_cls):
|
| 35 |
-
|
| 36 |
|
| 37 |
-
|
| 38 |
-
|
| 39 |
-
|
| 40 |
|
| 41 |
-
|
| 42 |
-
|
| 43 |
-
|
| 44 |
-
|
| 45 |
-
|
| 46 |
-
|
| 47 |
cls.mapping["model_name_mapping"][name] = model_cls
|
| 48 |
return model_cls
|
| 49 |
|
|
@@ -58,7 +55,7 @@ class Registry:
|
|
| 58 |
|
| 59 |
Usage:
|
| 60 |
|
| 61 |
-
from
|
| 62 |
"""
|
| 63 |
|
| 64 |
def wrap(processor_cls):
|
|
@@ -87,7 +84,7 @@ class Registry:
|
|
| 87 |
|
| 88 |
Usage:
|
| 89 |
|
| 90 |
-
from
|
| 91 |
"""
|
| 92 |
|
| 93 |
def wrap(lr_sched_cls):
|
|
@@ -111,7 +108,7 @@ class Registry:
|
|
| 111 |
|
| 112 |
Usage:
|
| 113 |
|
| 114 |
-
|
| 115 |
"""
|
| 116 |
|
| 117 |
def wrap(runner_cls):
|
|
@@ -135,7 +132,7 @@ class Registry:
|
|
| 135 |
|
| 136 |
Usage:
|
| 137 |
|
| 138 |
-
from
|
| 139 |
"""
|
| 140 |
assert isinstance(path, str), "All path must be str."
|
| 141 |
if name in cls.mapping["paths"]:
|
|
@@ -151,7 +148,7 @@ class Registry:
|
|
| 151 |
|
| 152 |
Usage::
|
| 153 |
|
| 154 |
-
from
|
| 155 |
|
| 156 |
registry.register("config", {})
|
| 157 |
"""
|
|
@@ -260,7 +257,7 @@ class Registry:
|
|
| 260 |
name: Key which needs to be removed.
|
| 261 |
Usage::
|
| 262 |
|
| 263 |
-
from
|
| 264 |
|
| 265 |
config = registry.unregister("config")
|
| 266 |
"""
|
|
|
|
| 26 |
Args:
|
| 27 |
name: Key with which the task will be registered.
|
| 28 |
|
|
|
|
|
|
|
|
|
|
| 29 |
"""
|
| 30 |
|
| 31 |
def wrap(model_cls):
|
| 32 |
+
from .base_model import BaseModel
|
| 33 |
|
| 34 |
+
assert issubclass(
|
| 35 |
+
model_cls, BaseModel
|
| 36 |
+
), "All models must inherit BaseModel class"
|
| 37 |
|
| 38 |
+
if name in cls.mapping["model_name_mapping"]:
|
| 39 |
+
raise KeyError(
|
| 40 |
+
"Name '{}' already registered for {}.".format(
|
| 41 |
+
name, cls.mapping["model_name_mapping"][name]
|
| 42 |
+
)
|
| 43 |
+
)
|
| 44 |
cls.mapping["model_name_mapping"][name] = model_cls
|
| 45 |
return model_cls
|
| 46 |
|
|
|
|
| 55 |
|
| 56 |
Usage:
|
| 57 |
|
| 58 |
+
from .registry import registry
|
| 59 |
"""
|
| 60 |
|
| 61 |
def wrap(processor_cls):
|
|
|
|
| 84 |
|
| 85 |
Usage:
|
| 86 |
|
| 87 |
+
from .registry import registry
|
| 88 |
"""
|
| 89 |
|
| 90 |
def wrap(lr_sched_cls):
|
|
|
|
| 108 |
|
| 109 |
Usage:
|
| 110 |
|
| 111 |
+
.common.registry import registry
|
| 112 |
"""
|
| 113 |
|
| 114 |
def wrap(runner_cls):
|
|
|
|
| 132 |
|
| 133 |
Usage:
|
| 134 |
|
| 135 |
+
from .registry import registry
|
| 136 |
"""
|
| 137 |
assert isinstance(path, str), "All path must be str."
|
| 138 |
if name in cls.mapping["paths"]:
|
|
|
|
| 148 |
|
| 149 |
Usage::
|
| 150 |
|
| 151 |
+
from .registry import registry
|
| 152 |
|
| 153 |
registry.register("config", {})
|
| 154 |
"""
|
|
|
|
| 257 |
name: Key which needs to be removed.
|
| 258 |
Usage::
|
| 259 |
|
| 260 |
+
from registry import registry
|
| 261 |
|
| 262 |
config = registry.unregister("config")
|
| 263 |
"""
|
utils.py
CHANGED
|
@@ -468,136 +468,3 @@ def merge_vision_embeddings(input_ids: torch.Tensor,
|
|
| 468 |
inputs_embeds[mask] = torch.cat(vision_embeddings)
|
| 469 |
|
| 470 |
return inputs_embeds
|
| 471 |
-
|
| 472 |
-
|
| 473 |
-
class LayerFn(Protocol):
|
| 474 |
-
|
| 475 |
-
def __call__(
|
| 476 |
-
self,
|
| 477 |
-
prefix="",
|
| 478 |
-
) -> torch.nn.Module:
|
| 479 |
-
...
|
| 480 |
-
|
| 481 |
-
|
| 482 |
-
class PPMissingLayer(torch.nn.Identity):
|
| 483 |
-
"""
|
| 484 |
-
A placeholder layer for missing layers in a pipeline parallel model.
|
| 485 |
-
"""
|
| 486 |
-
|
| 487 |
-
def __init__(self, *args, **kwargs):
|
| 488 |
-
super().__init__()
|
| 489 |
-
|
| 490 |
-
|
| 491 |
-
_CPU_OFFLOAD_BYTES = 0
|
| 492 |
-
_CPU_OFFLOAD_MAX_BYTES = 0
|
| 493 |
-
|
| 494 |
-
|
| 495 |
-
def set_cpu_offload_max_bytes(max_bytes: int) -> None:
|
| 496 |
-
global _CPU_OFFLOAD_MAX_BYTES, _CPU_OFFLOAD_BYTES
|
| 497 |
-
_CPU_OFFLOAD_BYTES = 0
|
| 498 |
-
_CPU_OFFLOAD_MAX_BYTES = max_bytes
|
| 499 |
-
|
| 500 |
-
|
| 501 |
-
def maybe_offload_to_cpu(module: torch.nn.Module) -> torch.nn.Module:
|
| 502 |
-
device = next(module.parameters()).device
|
| 503 |
-
|
| 504 |
-
if device == torch.device("cpu"):
|
| 505 |
-
return module
|
| 506 |
-
|
| 507 |
-
global _CPU_OFFLOAD_MAX_BYTES, _CPU_OFFLOAD_BYTES
|
| 508 |
-
if _CPU_OFFLOAD_BYTES >= _CPU_OFFLOAD_MAX_BYTES:
|
| 509 |
-
return module
|
| 510 |
-
|
| 511 |
-
pin_memory = is_pin_memory_available()
|
| 512 |
-
|
| 513 |
-
# offload parameters to CPU
|
| 514 |
-
# use pin_memory if possible, which helps cudagraph capture speed
|
| 515 |
-
for p in module.parameters():
|
| 516 |
-
if _CPU_OFFLOAD_BYTES >= _CPU_OFFLOAD_MAX_BYTES:
|
| 517 |
-
# we use per-parameter offloading
|
| 518 |
-
# one module might have some parameters offloaded and some not
|
| 519 |
-
break
|
| 520 |
-
|
| 521 |
-
# `torch.empty_like` does not support `pin_memory` argument
|
| 522 |
-
cpu_data = torch.empty(size=p.data.size(),
|
| 523 |
-
dtype=p.data.dtype,
|
| 524 |
-
layout=p.data.layout,
|
| 525 |
-
device='cpu',
|
| 526 |
-
pin_memory=pin_memory)
|
| 527 |
-
cpu_data.copy_(p.data)
|
| 528 |
-
p.data = cpu_data
|
| 529 |
-
_CPU_OFFLOAD_BYTES += p.data.numel() * p.data.element_size()
|
| 530 |
-
|
| 531 |
-
state_dict: Dict[str, torch.Tensor] = module.state_dict()
|
| 532 |
-
|
| 533 |
-
original_forward = module.forward
|
| 534 |
-
|
| 535 |
-
def forward(*args, **kwargs):
|
| 536 |
-
module.forward = original_forward
|
| 537 |
-
device_state = {
|
| 538 |
-
# here we blindly call `to(device)`
|
| 539 |
-
# if the parameter is already on the device, it will be a no-op
|
| 540 |
-
k: v.to(device, non_blocking=True)
|
| 541 |
-
for k, v in state_dict.items()
|
| 542 |
-
}
|
| 543 |
-
output = functional_call(module,
|
| 544 |
-
device_state,
|
| 545 |
-
args=args,
|
| 546 |
-
kwargs=kwargs)
|
| 547 |
-
module.forward = forward
|
| 548 |
-
return output
|
| 549 |
-
|
| 550 |
-
module.forward = forward
|
| 551 |
-
|
| 552 |
-
return module
|
| 553 |
-
|
| 554 |
-
|
| 555 |
-
def make_layers(
|
| 556 |
-
num_hidden_layers: int,
|
| 557 |
-
layer_fn: LayerFn,
|
| 558 |
-
prefix: str,
|
| 559 |
-
) -> Tuple[int, int, torch.nn.ModuleList]:
|
| 560 |
-
"""Make a list of layers with the given layer function, taking
|
| 561 |
-
pipeline parallelism into account.
|
| 562 |
-
"""
|
| 563 |
-
from vllm.distributed.parallel_state import get_pp_group
|
| 564 |
-
from vllm.distributed.utils import get_pp_indices
|
| 565 |
-
start_layer, end_layer = get_pp_indices(num_hidden_layers,
|
| 566 |
-
get_pp_group().rank_in_group,
|
| 567 |
-
get_pp_group().world_size)
|
| 568 |
-
modules = torch.nn.ModuleList(
|
| 569 |
-
[PPMissingLayer() for _ in range(start_layer)] + [
|
| 570 |
-
maybe_offload_to_cpu(layer_fn(prefix=f"{prefix}.{idx}"))
|
| 571 |
-
for idx in range(start_layer, end_layer)
|
| 572 |
-
] + [PPMissingLayer() for _ in range(end_layer, num_hidden_layers)])
|
| 573 |
-
return start_layer, end_layer, modules
|
| 574 |
-
|
| 575 |
-
|
| 576 |
-
# NOTE: don't use lru_cache here because it can prevent garbage collection
|
| 577 |
-
_model_to_pp_missing_layer_names: Dict[int, List[str]] = {}
|
| 578 |
-
|
| 579 |
-
|
| 580 |
-
def get_pp_missing_layer_names(model: torch.nn.Module) -> List[str]:
|
| 581 |
-
"""Get the names of the missing layers in a pipeline parallel model."""
|
| 582 |
-
model_id = id(model)
|
| 583 |
-
if model_id in _model_to_pp_missing_layer_names:
|
| 584 |
-
return _model_to_pp_missing_layer_names[model_id]
|
| 585 |
-
|
| 586 |
-
missing_layer_names = []
|
| 587 |
-
for name, module in model.named_modules():
|
| 588 |
-
if isinstance(module, PPMissingLayer):
|
| 589 |
-
# NOTE: the trailing dot is used to match the prefix of the layer.
|
| 590 |
-
# without the dot, we could match a layer that is not missing,
|
| 591 |
-
# e.g., 'encoder.layer.1' would match 'encoder.layer.11'
|
| 592 |
-
missing_layer_names.append(name + '.')
|
| 593 |
-
_model_to_pp_missing_layer_names[model_id] = missing_layer_names
|
| 594 |
-
|
| 595 |
-
return missing_layer_names
|
| 596 |
-
|
| 597 |
-
|
| 598 |
-
def is_pp_missing_parameter(name: str, model: torch.nn.Module) -> bool:
|
| 599 |
-
"""Check if a parameter is missing in a pipeline parallel model."""
|
| 600 |
-
for missing_layer_name in get_pp_missing_layer_names(model):
|
| 601 |
-
if name.startswith(missing_layer_name):
|
| 602 |
-
return True
|
| 603 |
-
return False
|
|
|
|
| 468 |
inputs_embeds[mask] = torch.cat(vision_embeddings)
|
| 469 |
|
| 470 |
return inputs_embeds
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|