Spaces:

blanchon
/

HiDream-ai-dev

Paused

App Files Files Community

blanchon commited on Apr 7

Commit

9af0e64

1 Parent(s): 8feabfd

update app.py and deps

Browse files

Files changed (2) hide show

app.py +60 -85
requirements.txt +2 -2

app.py CHANGED Viewed

@@ -5,101 +5,82 @@ from hi_diffusers import HiDreamImagePipeline, HiDreamImageTransformer2DModel
 from hi_diffusers.schedulers.flash_flow_match import (
     FlashFlowMatchEulerDiscreteScheduler,
 )
-from hi_diffusers.schedulers.fm_solvers_unipc import FlowUniPCMultistepScheduler
 from transformers import LlamaForCausalLM, PreTrainedTokenizerFast
 # Constants
 MODEL_PREFIX: str = "HiDream-ai"
 LLAMA_MODEL_NAME: str = "meta-llama/Meta-Llama-3.1-8B-Instruct"
-# Model configurations
 MODEL_CONFIGS: dict[str, dict] = {
-    "dev": {
-        "path": f"{MODEL_PREFIX}/HiDream-I1-Dev",
-        "guidance_scale": 0.0,
-        "num_inference_steps": 28,
-        "shift": 6.0,
-        "scheduler": FlashFlowMatchEulerDiscreteScheduler,
-    },
-    "full": {
-        "path": f"{MODEL_PREFIX}/HiDream-I1-Full",
-        "guidance_scale": 5.0,
-        "num_inference_steps": 50,
-        "shift": 3.0,
-        "scheduler": FlowUniPCMultistepScheduler,
-    },
-    "fast": {
-        "path": f"{MODEL_PREFIX}/HiDream-I1-Fast",
-        "guidance_scale": 0.0,
-        "num_inference_steps": 16,
-        "shift": 3.0,
-        "scheduler": FlashFlowMatchEulerDiscreteScheduler,
-    },
 }
 # Supported image sizes
 RESOLUTION_OPTIONS: list[str] = [
-    "1024 × 1024 (Square)",
-    "768 × 1360 (Portrait)",
-    "1360 × 768 (Landscape)",
-    "880 × 1168 (Portrait)",
-    "1168 × 880 (Landscape)",
-    "1248 × 832 (Landscape)",
-    "832 × 1248 (Portrait)",
 ]
-# Model cache
-loaded_models: dict[str, HiDreamImagePipeline] = {}
 def parse_resolution(res_str: str) -> tuple[int, int]:
-    """Parse resolution string like '1024 × 1024' into (1024, 1024)"""
-    return tuple(map(int, res_str.replace("×", "x").replace(" ", "").split("x")))
-def load_models(model_type: str) -> HiDreamImagePipeline:
-    """Load and initialize the HiDream model pipeline for a given model type."""
-    config = MODEL_CONFIGS[model_type]
-    pretrained_model = config["path"]
-    tokenizer = PreTrainedTokenizerFast.from_pretrained(
-        LLAMA_MODEL_NAME, use_fast=False
-    )
-    text_encoder = LlamaForCausalLM.from_pretrained(
-        LLAMA_MODEL_NAME,
-        output_hidden_states=True,
-        output_attentions=True,
-        torch_dtype=torch.bfloat16,
-    ).to("cuda")
-    transformer = HiDreamImageTransformer2DModel.from_pretrained(
-        pretrained_model,
-        subfolder="transformer",
-        torch_dtype=torch.bfloat16,
-    ).to("cuda")
-    scheduler = config["scheduler"](
-        num_train_timesteps=1000,
-        shift=config["shift"],
-        use_dynamic_shifting=False,
-    )
-    pipe = HiDreamImagePipeline.from_pretrained(
-        pretrained_model,
-        scheduler=scheduler,
-        tokenizer_4=tokenizer,
-        text_encoder_4=text_encoder,
-        torch_dtype=torch.bfloat16,
-    ).to("cuda", torch.bfloat16)
-    pipe.transformer = transformer
-    return pipe
-# Preload default model
-print("🔧 Preloading default model (full)...")
-loaded_models["full"] = load_models("full")
-print("✅ Model loaded.")
 @spaces.GPU(duration=90)
@@ -109,12 +90,6 @@ def generate_image(
     resolution: str,
     seed: int,
 ) -> tuple[object, int]:
-    """Generate image using HiDream pipeline."""
-    if model_type not in loaded_models:
-        print(f"📦 Lazy-loading model {model_type}...")
-        loaded_models[model_type] = load_models(model_type)
-    pipe: HiDreamImagePipeline = loaded_models[model_type]
     config = MODEL_CONFIGS[model_type]
     if seed == -1:

 from hi_diffusers.schedulers.flash_flow_match import (
     FlashFlowMatchEulerDiscreteScheduler,
 )
 from transformers import LlamaForCausalLM, PreTrainedTokenizerFast
 # Constants
 MODEL_PREFIX: str = "HiDream-ai"
 LLAMA_MODEL_NAME: str = "meta-llama/Meta-Llama-3.1-8B-Instruct"
+MODEL_PATH = "HiDream-ai/HiDream-I1-Dev"
 MODEL_CONFIGS: dict[str, dict] = {
+    "guidance_scale": 0.0,
+    "num_inference_steps": 28,
+    "shift": 6.0,
+    "scheduler": FlashFlowMatchEulerDiscreteScheduler,
 }
+# Model configurations
+# MODEL_CONFIGS: dict[str, dict] = {
+#     "full": {
+#         "path": f"{MODEL_PREFIX}/HiDream-I1-Full",
+#         "guidance_scale": 5.0,
+#         "num_inference_steps": 50,
+#         "shift": 3.0,
+#         "scheduler": FlowUniPCMultistepScheduler,
+#     },
+#     "fast": {
+#         "path": f"{MODEL_PREFIX}/HiDream-I1-Fast",
+#         "guidance_scale": 0.0,
+#         "num_inference_steps": 16,
+#         "shift": 3.0,
+#         "scheduler": FlashFlowMatchEulerDiscreteScheduler,
+#     },
+# }
 # Supported image sizes
 RESOLUTION_OPTIONS: list[str] = [
+    "1024 x 1024 (Square)",
+    "768 x 1360 (Portrait)",
+    "1360 x 768 (Landscape)",
+    "880 x 1168 (Portrait)",
+    "1168 x 880 (Landscape)",
+    "1248 x 832 (Landscape)",
+    "832 x 1248 (Portrait)",
 ]
 def parse_resolution(res_str: str) -> tuple[int, int]:
+    return tuple(map(int, res_str.replace(" ", "").split("x")))
+tokenizer = PreTrainedTokenizerFast.from_pretrained(LLAMA_MODEL_NAME, use_fast=False)
+text_encoder = LlamaForCausalLM.from_pretrained(
+    LLAMA_MODEL_NAME,
+    output_hidden_states=True,
+    output_attentions=True,
+    torch_dtype=torch.bfloat16,
+).to("cuda")
+transformer = HiDreamImageTransformer2DModel.from_pretrained(
+    MODEL_PATH,
+    subfolder="transformer",
+    torch_dtype=torch.bfloat16,
+).to("cuda")
+scheduler = MODEL_CONFIGS["scheduler"](
+    num_train_timesteps=1000,
+    shift=MODEL_CONFIGS["shift"],
+    use_dynamic_shifting=False,
+)
+pipe = HiDreamImagePipeline.from_pretrained(
+    MODEL_PATH,
+    scheduler=scheduler,
+    tokenizer_4=tokenizer,
+    text_encoder_4=text_encoder,
+    torch_dtype=torch.bfloat16,
+).to("cuda", torch.bfloat16)
+pipe.transformer = transformer
 @spaces.GPU(duration=90)
     resolution: str,
     seed: int,
 ) -> tuple[object, int]:
     config = MODEL_CONFIGS[model_type]
     if seed == -1:

requirements.txt CHANGED Viewed

@@ -1,10 +1,10 @@
-torch>=2.5.1
 torchvision>=0.20.1
 diffusers>=0.32.1
 transformers>=4.47.1
 accelerate>=1.6.0
 xformers
-https://github.com/Dao-AILab/flash-attention/releases/download/v2.7.4.post1/flash_attn-2.7.4.post1+cu12torch2.4cxx11abiTRUE-cp310-cp310-linux_x86_64.whl
 einops>=0.7.0
 gradio>=5.23.3
 spaces>=0.34.1

+torch==2.6.0
 torchvision>=0.20.1
 diffusers>=0.32.1
 transformers>=4.47.1
 accelerate>=1.6.0
 xformers
+https://github.com/Dao-AILab/flash-attention/releases/download/v2.7.4.post1/flash_attn-2.7.4.post1+cu12torch2.6cxx11abiTRUE-cp310-cp310-linux_x86_64.whl
 einops>=0.7.0
 gradio>=5.23.3
 spaces>=0.34.1