Spaces:

TencentARC
/

SEED-Story

Runtime error

App Files Files Community

Andyson commited on Aug 26, 2024

Commit

0447610

1 Parent(s): 46062ae

huggingface hub

Browse files

Files changed (13) hide show

app.py +1 -1
configs/clm_models/agent_7b_sft.yaml +1 -1
configs/detokenizer/detokenizer_sdxl_qwen_vit_adapted.yaml +2 -1
configs/tokenizer/clm_llama_tokenizer.yaml +2 -1
pretrained/cvlm_llama2_tokenizer/added_tokens.json +0 -68
pretrained/cvlm_llama2_tokenizer/special_tokens_map.json +0 -40
pretrained/cvlm_llama2_tokenizer/tokenizer.model +0 -3
pretrained/cvlm_llama2_tokenizer/tokenizer_config.json +0 -573
pretrained/detokenizer/detokenizer_george_adapted/checkpoint-4000/pytorch_model.bin +0 -3
pretrained/seed_story/george_sft/pytorch_model.bin +0 -3
src/models/qwen_visual.py +14 -6
src/models_clm/models.py +15 -3
src/models_ipa/adapter_modules.py +16 -5

app.py CHANGED Viewed

@@ -73,7 +73,7 @@ class Arguments:
         default='configs/detokenizer/detokenizer_sdxl_qwen_vit_adapted.yaml',
         metadata={"help": "config path of sd adapter"})
     agent: Optional[str] = field(default='configs/clm_models/agent_7b_sft.yaml',
-                                 metadata={"help": "config path of agent model"})
     diffusion_path: Optional[str] = field(default='stabilityai/stable-diffusion-xl-base-1.0',
                                           metadata={"help": "diffusion model path"})
     port: Optional[str] = field(default=80, metadata={"help": "network port"})

         default='configs/detokenizer/detokenizer_sdxl_qwen_vit_adapted.yaml',
         metadata={"help": "config path of sd adapter"})
     agent: Optional[str] = field(default='configs/clm_models/agent_7b_sft.yaml',
+                                 metadata={"help": "Hugging Face model path of agent model"})
     diffusion_path: Optional[str] = field(default='stabilityai/stable-diffusion-xl-base-1.0',
                                           metadata={"help": "diffusion model path"})
     port: Optional[str] = field(default=80, metadata={"help": "network port"})

configs/clm_models/agent_7b_sft.yaml CHANGED Viewed

@@ -15,4 +15,4 @@ output_resampler:
 lm_loss_scale: 1.0
 rec_loss_scale: 1.0
-pretrained_model_path: pretrained/seed_story/george_sft/pytorch_model.bin

 lm_loss_scale: 1.0
 rec_loss_scale: 1.0
+pretrained_model_path: TencentARC/SEED-Story

configs/detokenizer/detokenizer_sdxl_qwen_vit_adapted.yaml CHANGED Viewed

@@ -12,4 +12,5 @@ resampler:
   output2_dim: 1280
   ff_mult: 4
-pretrained_model_path: pretrained/detokenizer/detokenizer_george_adapted/checkpoint-4000/pytorch_model.bin

   output2_dim: 1280
   ff_mult: 4
+pretrained_model_path: TencentARC/SEED-Story
+subfolder: detokenizer/detokenizer_george_adapted/checkpoint-4000

configs/tokenizer/clm_llama_tokenizer.yaml CHANGED Viewed

@@ -1,2 +1,3 @@
 _target_: transformers.LlamaTokenizer.from_pretrained
-pretrained_model_name_or_path: pretrained/cvlm_llama2_tokenizer

 _target_: transformers.LlamaTokenizer.from_pretrained
+pretrained_model_name_or_path: TencentARC/SEED-Story
+subfolder: cvlm_llama2_tokenizer

pretrained/cvlm_llama2_tokenizer/added_tokens.json DELETED Viewed

@@ -1,68 +0,0 @@
-{
-  "</img>": 32065,
-  "<img>": 32064,
-  "<img_00000>": 32000,
-  "<img_00001>": 32001,
-  "<img_00002>": 32002,
-  "<img_00003>": 32003,
-  "<img_00004>": 32004,
-  "<img_00005>": 32005,
-  "<img_00006>": 32006,
-  "<img_00007>": 32007,
-  "<img_00008>": 32008,
-  "<img_00009>": 32009,
-  "<img_00010>": 32010,
-  "<img_00011>": 32011,
-  "<img_00012>": 32012,
-  "<img_00013>": 32013,
-  "<img_00014>": 32014,
-  "<img_00015>": 32015,
-  "<img_00016>": 32016,
-  "<img_00017>": 32017,
-  "<img_00018>": 32018,
-  "<img_00019>": 32019,
-  "<img_00020>": 32020,
-  "<img_00021>": 32021,
-  "<img_00022>": 32022,
-  "<img_00023>": 32023,
-  "<img_00024>": 32024,
-  "<img_00025>": 32025,
-  "<img_00026>": 32026,
-  "<img_00027>": 32027,
-  "<img_00028>": 32028,
-  "<img_00029>": 32029,
-  "<img_00030>": 32030,
-  "<img_00031>": 32031,
-  "<img_00032>": 32032,
-  "<img_00033>": 32033,
-  "<img_00034>": 32034,
-  "<img_00035>": 32035,
-  "<img_00036>": 32036,
-  "<img_00037>": 32037,
-  "<img_00038>": 32038,
-  "<img_00039>": 32039,
-  "<img_00040>": 32040,
-  "<img_00041>": 32041,
-  "<img_00042>": 32042,
-  "<img_00043>": 32043,
-  "<img_00044>": 32044,
-  "<img_00045>": 32045,
-  "<img_00046>": 32046,
-  "<img_00047>": 32047,
-  "<img_00048>": 32048,
-  "<img_00049>": 32049,
-  "<img_00050>": 32050,
-  "<img_00051>": 32051,
-  "<img_00052>": 32052,
-  "<img_00053>": 32053,
-  "<img_00054>": 32054,
-  "<img_00055>": 32055,
-  "<img_00056>": 32056,
-  "<img_00057>": 32057,
-  "<img_00058>": 32058,
-  "<img_00059>": 32059,
-  "<img_00060>": 32060,
-  "<img_00061>": 32061,
-  "<img_00062>": 32062,
-  "<img_00063>": 32063
-}

pretrained/cvlm_llama2_tokenizer/special_tokens_map.json DELETED Viewed

@@ -1,40 +0,0 @@
-{
-  "additional_special_tokens": [
-    {
-      "content": "<img>",
-      "lstrip": false,
-      "normalized": false,
-      "rstrip": false,
-      "single_word": false
-    },
-    {
-      "content": "</img>",
-      "lstrip": false,
-      "normalized": false,
-      "rstrip": false,
-      "single_word": false
-    }
-  ],
-  "bos_token": {
-    "content": "<s>",
-    "lstrip": false,
-    "normalized": false,
-    "rstrip": false,
-    "single_word": false
-  },
-  "eos_token": {
-    "content": "</s>",
-    "lstrip": false,
-    "normalized": false,
-    "rstrip": false,
-    "single_word": false
-  },
-  "pad_token": "<unk>",
-  "unk_token": {
-    "content": "<unk>",
-    "lstrip": false,
-    "normalized": false,
-    "rstrip": false,
-    "single_word": false
-  }
-}

pretrained/cvlm_llama2_tokenizer/tokenizer.model DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:9e556afd44213b6bd1be2b850ebbbd98f5481437a8021afaf58ee7fb1818d347
-size 499723

pretrained/cvlm_llama2_tokenizer/tokenizer_config.json DELETED Viewed

@@ -1,573 +0,0 @@
-{
-  "add_bos_token": true,
-  "add_eos_token": false,
-  "added_tokens_decoder": {
-    "0": {
-      "content": "<unk>",
-      "lstrip": false,
-      "normalized": false,
-      "rstrip": false,
-      "single_word": false,
-      "special": true
-    },
-    "1": {
-      "content": "<s>",
-      "lstrip": false,
-      "normalized": false,
-      "rstrip": false,
-      "single_word": false,
-      "special": true
-    },
-    "2": {
-      "content": "</s>",
-      "lstrip": false,
-      "normalized": false,
-      "rstrip": false,
-      "single_word": false,
-      "special": true
-    },
-    "32000": {
-      "content": "<img_00000>",
-      "lstrip": false,
-      "normalized": true,
-      "rstrip": false,
-      "single_word": false,
-      "special": false
-    },
-    "32001": {
-      "content": "<img_00001>",
-      "lstrip": false,
-      "normalized": true,
-      "rstrip": false,
-      "single_word": false,
-      "special": false
-    },
-    "32002": {
-      "content": "<img_00002>",
-      "lstrip": false,
-      "normalized": true,
-      "rstrip": false,
-      "single_word": false,
-      "special": false
-    },
-    "32003": {
-      "content": "<img_00003>",
-      "lstrip": false,
-      "normalized": true,
-      "rstrip": false,
-      "single_word": false,
-      "special": false
-    },
-    "32004": {
-      "content": "<img_00004>",
-      "lstrip": false,
-      "normalized": true,
-      "rstrip": false,
-      "single_word": false,
-      "special": false
-    },
-    "32005": {
-      "content": "<img_00005>",
-      "lstrip": false,
-      "normalized": true,
-      "rstrip": false,
-      "single_word": false,
-      "special": false
-    },
-    "32006": {
-      "content": "<img_00006>",
-      "lstrip": false,
-      "normalized": true,
-      "rstrip": false,
-      "single_word": false,
-      "special": false
-    },
-    "32007": {
-      "content": "<img_00007>",
-      "lstrip": false,
-      "normalized": true,
-      "rstrip": false,
-      "single_word": false,
-      "special": false
-    },
-    "32008": {
-      "content": "<img_00008>",
-      "lstrip": false,
-      "normalized": true,
-      "rstrip": false,
-      "single_word": false,
-      "special": false
-    },
-    "32009": {
-      "content": "<img_00009>",
-      "lstrip": false,
-      "normalized": true,
-      "rstrip": false,
-      "single_word": false,
-      "special": false
-    },
-    "32010": {
-      "content": "<img_00010>",
-      "lstrip": false,
-      "normalized": true,
-      "rstrip": false,
-      "single_word": false,
-      "special": false
-    },
-    "32011": {
-      "content": "<img_00011>",
-      "lstrip": false,
-      "normalized": true,
-      "rstrip": false,
-      "single_word": false,
-      "special": false
-    },
-    "32012": {
-      "content": "<img_00012>",
-      "lstrip": false,
-      "normalized": true,
-      "rstrip": false,
-      "single_word": false,
-      "special": false
-    },
-    "32013": {
-      "content": "<img_00013>",
-      "lstrip": false,
-      "normalized": true,
-      "rstrip": false,
-      "single_word": false,
-      "special": false
-    },
-    "32014": {
-      "content": "<img_00014>",
-      "lstrip": false,
-      "normalized": true,
-      "rstrip": false,
-      "single_word": false,
-      "special": false
-    },
-    "32015": {
-      "content": "<img_00015>",
-      "lstrip": false,
-      "normalized": true,
-      "rstrip": false,
-      "single_word": false,
-      "special": false
-    },
-    "32016": {
-      "content": "<img_00016>",
-      "lstrip": false,
-      "normalized": true,
-      "rstrip": false,
-      "single_word": false,
-      "special": false
-    },
-    "32017": {
-      "content": "<img_00017>",
-      "lstrip": false,
-      "normalized": true,
-      "rstrip": false,
-      "single_word": false,
-      "special": false
-    },
-    "32018": {
-      "content": "<img_00018>",
-      "lstrip": false,
-      "normalized": true,
-      "rstrip": false,
-      "single_word": false,
-      "special": false
-    },
-    "32019": {
-      "content": "<img_00019>",
-      "lstrip": false,
-      "normalized": true,
-      "rstrip": false,
-      "single_word": false,
-      "special": false
-    },
-    "32020": {
-      "content": "<img_00020>",
-      "lstrip": false,
-      "normalized": true,
-      "rstrip": false,
-      "single_word": false,
-      "special": false
-    },
-    "32021": {
-      "content": "<img_00021>",
-      "lstrip": false,
-      "normalized": true,
-      "rstrip": false,
-      "single_word": false,
-      "special": false
-    },
-    "32022": {
-      "content": "<img_00022>",
-      "lstrip": false,
-      "normalized": true,
-      "rstrip": false,
-      "single_word": false,
-      "special": false
-    },
-    "32023": {
-      "content": "<img_00023>",
-      "lstrip": false,
-      "normalized": true,
-      "rstrip": false,
-      "single_word": false,
-      "special": false
-    },
-    "32024": {
-      "content": "<img_00024>",
-      "lstrip": false,
-      "normalized": true,
-      "rstrip": false,
-      "single_word": false,
-      "special": false
-    },
-    "32025": {
-      "content": "<img_00025>",
-      "lstrip": false,
-      "normalized": true,
-      "rstrip": false,
-      "single_word": false,
-      "special": false
-    },
-    "32026": {
-      "content": "<img_00026>",
-      "lstrip": false,
-      "normalized": true,
-      "rstrip": false,
-      "single_word": false,
-      "special": false
-    },
-    "32027": {
-      "content": "<img_00027>",
-      "lstrip": false,
-      "normalized": true,
-      "rstrip": false,
-      "single_word": false,
-      "special": false
-    },
-    "32028": {
-      "content": "<img_00028>",
-      "lstrip": false,
-      "normalized": true,
-      "rstrip": false,
-      "single_word": false,
-      "special": false
-    },
-    "32029": {
-      "content": "<img_00029>",
-      "lstrip": false,
-      "normalized": true,
-      "rstrip": false,
-      "single_word": false,
-      "special": false
-    },
-    "32030": {
-      "content": "<img_00030>",
-      "lstrip": false,
-      "normalized": true,
-      "rstrip": false,
-      "single_word": false,
-      "special": false
-    },
-    "32031": {
-      "content": "<img_00031>",
-      "lstrip": false,
-      "normalized": true,
-      "rstrip": false,
-      "single_word": false,
-      "special": false
-    },
-    "32032": {
-      "content": "<img_00032>",
-      "lstrip": false,
-      "normalized": true,
-      "rstrip": false,
-      "single_word": false,
-      "special": false
-    },
-    "32033": {
-      "content": "<img_00033>",
-      "lstrip": false,
-      "normalized": true,
-      "rstrip": false,
-      "single_word": false,
-      "special": false
-    },
-    "32034": {
-      "content": "<img_00034>",
-      "lstrip": false,
-      "normalized": true,
-      "rstrip": false,
-      "single_word": false,
-      "special": false
-    },
-    "32035": {
-      "content": "<img_00035>",
-      "lstrip": false,
-      "normalized": true,
-      "rstrip": false,
-      "single_word": false,
-      "special": false
-    },
-    "32036": {
-      "content": "<img_00036>",
-      "lstrip": false,
-      "normalized": true,
-      "rstrip": false,
-      "single_word": false,
-      "special": false
-    },
-    "32037": {
-      "content": "<img_00037>",
-      "lstrip": false,
-      "normalized": true,
-      "rstrip": false,
-      "single_word": false,
-      "special": false
-    },
-    "32038": {
-      "content": "<img_00038>",
-      "lstrip": false,
-      "normalized": true,
-      "rstrip": false,
-      "single_word": false,
-      "special": false
-    },
-    "32039": {
-      "content": "<img_00039>",
-      "lstrip": false,
-      "normalized": true,
-      "rstrip": false,
-      "single_word": false,
-      "special": false
-    },
-    "32040": {
-      "content": "<img_00040>",
-      "lstrip": false,
-      "normalized": true,
-      "rstrip": false,
-      "single_word": false,
-      "special": false
-    },
-    "32041": {
-      "content": "<img_00041>",
-      "lstrip": false,
-      "normalized": true,
-      "rstrip": false,
-      "single_word": false,
-      "special": false
-    },
-    "32042": {
-      "content": "<img_00042>",
-      "lstrip": false,
-      "normalized": true,
-      "rstrip": false,
-      "single_word": false,
-      "special": false
-    },
-    "32043": {
-      "content": "<img_00043>",
-      "lstrip": false,
-      "normalized": true,
-      "rstrip": false,
-      "single_word": false,
-      "special": false
-    },
-    "32044": {
-      "content": "<img_00044>",
-      "lstrip": false,
-      "normalized": true,
-      "rstrip": false,
-      "single_word": false,
-      "special": false
-    },
-    "32045": {
-      "content": "<img_00045>",
-      "lstrip": false,
-      "normalized": true,
-      "rstrip": false,
-      "single_word": false,
-      "special": false
-    },
-    "32046": {
-      "content": "<img_00046>",
-      "lstrip": false,
-      "normalized": true,
-      "rstrip": false,
-      "single_word": false,
-      "special": false
-    },
-    "32047": {
-      "content": "<img_00047>",
-      "lstrip": false,
-      "normalized": true,
-      "rstrip": false,
-      "single_word": false,
-      "special": false
-    },
-    "32048": {
-      "content": "<img_00048>",
-      "lstrip": false,
-      "normalized": true,
-      "rstrip": false,
-      "single_word": false,
-      "special": false
-    },
-    "32049": {
-      "content": "<img_00049>",
-      "lstrip": false,
-      "normalized": true,
-      "rstrip": false,
-      "single_word": false,
-      "special": false
-    },
-    "32050": {
-      "content": "<img_00050>",
-      "lstrip": false,
-      "normalized": true,
-      "rstrip": false,
-      "single_word": false,
-      "special": false
-    },
-    "32051": {
-      "content": "<img_00051>",
-      "lstrip": false,
-      "normalized": true,
-      "rstrip": false,
-      "single_word": false,
-      "special": false
-    },
-    "32052": {
-      "content": "<img_00052>",
-      "lstrip": false,
-      "normalized": true,
-      "rstrip": false,
-      "single_word": false,
-      "special": false
-    },
-    "32053": {
-      "content": "<img_00053>",
-      "lstrip": false,
-      "normalized": true,
-      "rstrip": false,
-      "single_word": false,
-      "special": false
-    },
-    "32054": {
-      "content": "<img_00054>",
-      "lstrip": false,
-      "normalized": true,
-      "rstrip": false,
-      "single_word": false,
-      "special": false
-    },
-    "32055": {
-      "content": "<img_00055>",
-      "lstrip": false,
-      "normalized": true,
-      "rstrip": false,
-      "single_word": false,
-      "special": false
-    },
-    "32056": {
-      "content": "<img_00056>",
-      "lstrip": false,
-      "normalized": true,
-      "rstrip": false,
-      "single_word": false,
-      "special": false
-    },
-    "32057": {
-      "content": "<img_00057>",
-      "lstrip": false,
-      "normalized": true,
-      "rstrip": false,
-      "single_word": false,
-      "special": false
-    },
-    "32058": {
-      "content": "<img_00058>",
-      "lstrip": false,
-      "normalized": true,
-      "rstrip": false,
-      "single_word": false,
-      "special": false
-    },
-    "32059": {
-      "content": "<img_00059>",
-      "lstrip": false,
-      "normalized": true,
-      "rstrip": false,
-      "single_word": false,
-      "special": false
-    },
-    "32060": {
-      "content": "<img_00060>",
-      "lstrip": false,
-      "normalized": true,
-      "rstrip": false,
-      "single_word": false,
-      "special": false
-    },
-    "32061": {
-      "content": "<img_00061>",
-      "lstrip": false,
-      "normalized": true,
-      "rstrip": false,
-      "single_word": false,
-      "special": false
-    },
-    "32062": {
-      "content": "<img_00062>",
-      "lstrip": false,
-      "normalized": true,
-      "rstrip": false,
-      "single_word": false,
-      "special": false
-    },
-    "32063": {
-      "content": "<img_00063>",
-      "lstrip": false,
-      "normalized": true,
-      "rstrip": false,
-      "single_word": false,
-      "special": false
-    },
-    "32064": {
-      "content": "<img>",
-      "lstrip": false,
-      "normalized": false,
-      "rstrip": false,
-      "single_word": false,
-      "special": true
-    },
-    "32065": {
-      "content": "</img>",
-      "lstrip": false,
-      "normalized": false,
-      "rstrip": false,
-      "single_word": false,
-      "special": true
-    }
-  },
-  "additional_special_tokens": [
-    "<img>",
-    "</img>"
-  ],
-  "bos_token": "<s>",
-  "clean_up_tokenization_spaces": false,
-  "eos_token": "</s>",
-  "legacy": false,
-  "model_max_length": 1000000000000000019884624838656,
-  "pad_token": "<unk>",
-  "sp_model_kwargs": {},
-  "spaces_between_special_tokens": false,
-  "tokenizer_class": "LlamaTokenizer",
-  "unk_token": "<unk>",
-  "use_default_system_prompt": false
-}

pretrained/detokenizer/detokenizer_george_adapted/checkpoint-4000/pytorch_model.bin DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:785d4e670ccfdce33b493d0aada60ee5c116918468098b2ed82ae2c28f31e423
-size 6471628187

pretrained/seed_story/george_sft/pytorch_model.bin DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:c7e46794a2aab38f3f59484a4f4bb4c839217ef17c4329977b0a11839f462b94
-size 14709979626

src/models/qwen_visual.py CHANGED Viewed

@@ -411,13 +411,21 @@ class VisionTransformerWithAttnPool(nn.Module):
         return self(images)
     @classmethod
-    def from_pretrained(cls, pretrained_model_path=None, **kawrgs):
-        model = cls(**kawrgs)
         if pretrained_model_path is not None:
-            ckpt = torch.load(pretrained_model_path, map_location='cpu')
-            missing, unexpected = model.load_state_dict(ckpt, strict=False)
-            print('Load ckpt of qwen visual encoder')
-            print('missing keys: ', len(missing), 'unexpected keys:', len(unexpected))
         return model

         return self(images)
     @classmethod
+    def from_pretrained(cls, pretrained_model_path=None, subfolder=None, **kwargs):
+        model = cls(**kwargs)
         if pretrained_model_path is not None:
+            # Load model from Hugging Face Hub with subfolder specification
+            if 'TencentARC/SEED-Story' in pretrained_model_path:
+                # Use `subfolder` to specify the location within the repository
+                ckpt = AutoModel.from_pretrained(pretrained_model_path, subfolder=subfolder)
+                missing, unexpected = model.load_state_dict(ckpt.state_dict(), strict=False)
+                print('Detokenizer model, missing keys: ', len(missing), 'unexpected keys:', len(unexpected))
+            else:
+                # For local path loading
+                ckpt = torch.load(pretrained_model_path, map_location='cpu')
+                missing, unexpected = model.load_state_dict(ckpt, strict=False)
+                print('Detokenizer model, missing keys: ', len(missing), 'unexpected keys:', len(unexpected))
         return model

src/models_clm/models.py CHANGED Viewed

@@ -2,6 +2,7 @@ import torch
 import torch.nn as nn
 from transformers import LlamaForCausalLM, LlamaConfig
 from transformers import LogitsProcessor, LogitsProcessorList
 from .generation import AutoImageTokenGenerationProcessor
 import torch.nn.functional as F
@@ -220,13 +221,24 @@ class ContinuousLVLM(nn.Module):
             'past_key_values': output_past_key_values
         }
     @classmethod
     def from_pretrained(cls, llm, input_resampler, output_resampler, pretrained_model_path=None, **kwargs):
         model = cls(llm=llm, input_resampler=input_resampler, output_resampler=output_resampler, **kwargs)
         if pretrained_model_path is not None:
-            ckpt = torch.load(pretrained_model_path, map_location='cpu')
-            missing, unexpected = model.load_state_dict(ckpt, strict=False)
-            print('agent model, missing keys: ', len(missing), 'unexpected keys:', len(unexpected))
         return model

 import torch.nn as nn
 from transformers import LlamaForCausalLM, LlamaConfig
 from transformers import LogitsProcessor, LogitsProcessorList
+from transformers import AutoModel
 from .generation import AutoImageTokenGenerationProcessor
 import torch.nn.functional as F
             'past_key_values': output_past_key_values
         }
     @classmethod
     def from_pretrained(cls, llm, input_resampler, output_resampler, pretrained_model_path=None, **kwargs):
         model = cls(llm=llm, input_resampler=input_resampler, output_resampler=output_resampler, **kwargs)
         if pretrained_model_path is not None:
+            # Check if the path is intended for Hugging Face Hub
+            if 'TencentARC/SEED-Story' in pretrained_model_path:
+                # Load from a specific subfolder within the Hugging Face repository
+                ckpt = AutoModel.from_pretrained(pretrained_model_path, subfolder="seed_story/george_sft")
+                missing, unexpected = model.load_state_dict(ckpt.state_dict(), strict=False)
+                print('Agent model, missing keys: ', len(missing), 'unexpected keys:', len(unexpected))
+            else:
+                # For local path loading
+                ckpt = torch.load(pretrained_model_path, map_location='cpu')
+                missing, unexpected = model.load_state_dict(ckpt, strict=False)
+                print('Agent model, missing keys: ', len(missing), 'unexpected keys:', len(unexpected))
         return model

src/models_ipa/adapter_modules.py CHANGED Viewed

@@ -20,6 +20,7 @@ else:
 from diffusers.loaders import LoraLoaderMixin
 from diffusers.models.lora import LoRALinearLayer
 from diffusers.models.unet_2d_blocks import DownBlock2D
 # from .pipeline_stable_diffusion_xl_t2i_edit import StableDiffusionXLText2ImageAndEditPipeline
@@ -348,12 +349,22 @@ class SDXLAdapter(nn.Module):
         return image_embeds, pooled_image_embeds
     @classmethod
-    def from_pretrained(cls, unet, resampler, pretrained_model_path=None, **kwargs):
-        model = cls(unet=unet, resampler=resampler, **kwargs)
         if pretrained_model_path is not None:
-            ckpt = torch.load(pretrained_model_path, map_location='cpu')
-            missing, unexpected = model.load_state_dict(ckpt, strict=False)
-            print('missing keys: ', len(missing), 'unexpected keys:', len(unexpected))
         return model
     def init_pipe(self,

 from diffusers.loaders import LoraLoaderMixin
 from diffusers.models.lora import LoRALinearLayer
 from diffusers.models.unet_2d_blocks import DownBlock2D
+from transformers import AutoModel
 # from .pipeline_stable_diffusion_xl_t2i_edit import StableDiffusionXLText2ImageAndEditPipeline
         return image_embeds, pooled_image_embeds
     @classmethod
+    def from_pretrained(cls, pretrained_model_path=None, subfolder=None, **kwargs):
+        model = cls(**kwargs)
         if pretrained_model_path is not None:
+            # Load model from Hugging Face Hub with subfolder specification
+            if 'TencentARC/SEED-Story' in pretrained_model_path:
+                # Use `subfolder` to specify the location within the repository
+                ckpt = AutoModel.from_pretrained(pretrained_model_path, subfolder=subfolder)
+                missing, unexpected = model.load_state_dict(ckpt.state_dict(), strict=False)
+                print('Detokenizer model, missing keys: ', len(missing), 'unexpected keys:', len(unexpected))
+            else:
+                # For local path loading
+                ckpt = torch.load(pretrained_model_path, map_location='cpu')
+                missing, unexpected = model.load_state_dict(ckpt, strict=False)
+                print('Detokenizer model, missing keys: ', len(missing), 'unexpected keys:', len(unexpected))
         return model
     def init_pipe(self,