blanchon commited on
Commit
9af0e64
·
1 Parent(s): 8feabfd

update app.py and deps

Browse files
Files changed (2) hide show
  1. app.py +60 -85
  2. requirements.txt +2 -2
app.py CHANGED
@@ -5,101 +5,82 @@ from hi_diffusers import HiDreamImagePipeline, HiDreamImageTransformer2DModel
5
  from hi_diffusers.schedulers.flash_flow_match import (
6
  FlashFlowMatchEulerDiscreteScheduler,
7
  )
8
- from hi_diffusers.schedulers.fm_solvers_unipc import FlowUniPCMultistepScheduler
9
  from transformers import LlamaForCausalLM, PreTrainedTokenizerFast
10
 
11
  # Constants
12
  MODEL_PREFIX: str = "HiDream-ai"
13
  LLAMA_MODEL_NAME: str = "meta-llama/Meta-Llama-3.1-8B-Instruct"
14
-
15
- # Model configurations
16
  MODEL_CONFIGS: dict[str, dict] = {
17
- "dev": {
18
- "path": f"{MODEL_PREFIX}/HiDream-I1-Dev",
19
- "guidance_scale": 0.0,
20
- "num_inference_steps": 28,
21
- "shift": 6.0,
22
- "scheduler": FlashFlowMatchEulerDiscreteScheduler,
23
- },
24
- "full": {
25
- "path": f"{MODEL_PREFIX}/HiDream-I1-Full",
26
- "guidance_scale": 5.0,
27
- "num_inference_steps": 50,
28
- "shift": 3.0,
29
- "scheduler": FlowUniPCMultistepScheduler,
30
- },
31
- "fast": {
32
- "path": f"{MODEL_PREFIX}/HiDream-I1-Fast",
33
- "guidance_scale": 0.0,
34
- "num_inference_steps": 16,
35
- "shift": 3.0,
36
- "scheduler": FlashFlowMatchEulerDiscreteScheduler,
37
- },
38
  }
39
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
40
  # Supported image sizes
41
  RESOLUTION_OPTIONS: list[str] = [
42
- "1024 × 1024 (Square)",
43
- "768 × 1360 (Portrait)",
44
- "1360 × 768 (Landscape)",
45
- "880 × 1168 (Portrait)",
46
- "1168 × 880 (Landscape)",
47
- "1248 × 832 (Landscape)",
48
- "832 × 1248 (Portrait)",
49
  ]
50
 
51
- # Model cache
52
- loaded_models: dict[str, HiDreamImagePipeline] = {}
53
-
54
 
55
  def parse_resolution(res_str: str) -> tuple[int, int]:
56
- """Parse resolution string like '1024 × 1024' into (1024, 1024)"""
57
- return tuple(map(int, res_str.replace("×", "x").replace(" ", "").split("x")))
58
-
59
-
60
- def load_models(model_type: str) -> HiDreamImagePipeline:
61
- """Load and initialize the HiDream model pipeline for a given model type."""
62
- config = MODEL_CONFIGS[model_type]
63
- pretrained_model = config["path"]
64
-
65
- tokenizer = PreTrainedTokenizerFast.from_pretrained(
66
- LLAMA_MODEL_NAME, use_fast=False
67
- )
68
- text_encoder = LlamaForCausalLM.from_pretrained(
69
- LLAMA_MODEL_NAME,
70
- output_hidden_states=True,
71
- output_attentions=True,
72
- torch_dtype=torch.bfloat16,
73
- ).to("cuda")
74
-
75
- transformer = HiDreamImageTransformer2DModel.from_pretrained(
76
- pretrained_model,
77
- subfolder="transformer",
78
- torch_dtype=torch.bfloat16,
79
- ).to("cuda")
80
-
81
- scheduler = config["scheduler"](
82
- num_train_timesteps=1000,
83
- shift=config["shift"],
84
- use_dynamic_shifting=False,
85
- )
86
-
87
- pipe = HiDreamImagePipeline.from_pretrained(
88
- pretrained_model,
89
- scheduler=scheduler,
90
- tokenizer_4=tokenizer,
91
- text_encoder_4=text_encoder,
92
- torch_dtype=torch.bfloat16,
93
- ).to("cuda", torch.bfloat16)
94
-
95
- pipe.transformer = transformer
96
- return pipe
97
 
 
 
 
 
 
 
 
98
 
99
- # Preload default model
100
- print("🔧 Preloading default model (full)...")
101
- loaded_models["full"] = load_models("full")
102
- print("✅ Model loaded.")
103
 
104
 
105
  @spaces.GPU(duration=90)
@@ -109,12 +90,6 @@ def generate_image(
109
  resolution: str,
110
  seed: int,
111
  ) -> tuple[object, int]:
112
- """Generate image using HiDream pipeline."""
113
- if model_type not in loaded_models:
114
- print(f"📦 Lazy-loading model {model_type}...")
115
- loaded_models[model_type] = load_models(model_type)
116
-
117
- pipe: HiDreamImagePipeline = loaded_models[model_type]
118
  config = MODEL_CONFIGS[model_type]
119
 
120
  if seed == -1:
 
5
  from hi_diffusers.schedulers.flash_flow_match import (
6
  FlashFlowMatchEulerDiscreteScheduler,
7
  )
 
8
  from transformers import LlamaForCausalLM, PreTrainedTokenizerFast
9
 
10
  # Constants
11
  MODEL_PREFIX: str = "HiDream-ai"
12
  LLAMA_MODEL_NAME: str = "meta-llama/Meta-Llama-3.1-8B-Instruct"
13
+ MODEL_PATH = "HiDream-ai/HiDream-I1-Dev"
 
14
  MODEL_CONFIGS: dict[str, dict] = {
15
+ "guidance_scale": 0.0,
16
+ "num_inference_steps": 28,
17
+ "shift": 6.0,
18
+ "scheduler": FlashFlowMatchEulerDiscreteScheduler,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
19
  }
20
 
21
+ # Model configurations
22
+ # MODEL_CONFIGS: dict[str, dict] = {
23
+ # "full": {
24
+ # "path": f"{MODEL_PREFIX}/HiDream-I1-Full",
25
+ # "guidance_scale": 5.0,
26
+ # "num_inference_steps": 50,
27
+ # "shift": 3.0,
28
+ # "scheduler": FlowUniPCMultistepScheduler,
29
+ # },
30
+ # "fast": {
31
+ # "path": f"{MODEL_PREFIX}/HiDream-I1-Fast",
32
+ # "guidance_scale": 0.0,
33
+ # "num_inference_steps": 16,
34
+ # "shift": 3.0,
35
+ # "scheduler": FlashFlowMatchEulerDiscreteScheduler,
36
+ # },
37
+ # }
38
+
39
  # Supported image sizes
40
  RESOLUTION_OPTIONS: list[str] = [
41
+ "1024 x 1024 (Square)",
42
+ "768 x 1360 (Portrait)",
43
+ "1360 x 768 (Landscape)",
44
+ "880 x 1168 (Portrait)",
45
+ "1168 x 880 (Landscape)",
46
+ "1248 x 832 (Landscape)",
47
+ "832 x 1248 (Portrait)",
48
  ]
49
 
 
 
 
50
 
51
  def parse_resolution(res_str: str) -> tuple[int, int]:
52
+ return tuple(map(int, res_str.replace(" ", "").split("x")))
53
+
54
+
55
+ tokenizer = PreTrainedTokenizerFast.from_pretrained(LLAMA_MODEL_NAME, use_fast=False)
56
+ text_encoder = LlamaForCausalLM.from_pretrained(
57
+ LLAMA_MODEL_NAME,
58
+ output_hidden_states=True,
59
+ output_attentions=True,
60
+ torch_dtype=torch.bfloat16,
61
+ ).to("cuda")
62
+
63
+ transformer = HiDreamImageTransformer2DModel.from_pretrained(
64
+ MODEL_PATH,
65
+ subfolder="transformer",
66
+ torch_dtype=torch.bfloat16,
67
+ ).to("cuda")
68
+
69
+ scheduler = MODEL_CONFIGS["scheduler"](
70
+ num_train_timesteps=1000,
71
+ shift=MODEL_CONFIGS["shift"],
72
+ use_dynamic_shifting=False,
73
+ )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
74
 
75
+ pipe = HiDreamImagePipeline.from_pretrained(
76
+ MODEL_PATH,
77
+ scheduler=scheduler,
78
+ tokenizer_4=tokenizer,
79
+ text_encoder_4=text_encoder,
80
+ torch_dtype=torch.bfloat16,
81
+ ).to("cuda", torch.bfloat16)
82
 
83
+ pipe.transformer = transformer
 
 
 
84
 
85
 
86
  @spaces.GPU(duration=90)
 
90
  resolution: str,
91
  seed: int,
92
  ) -> tuple[object, int]:
 
 
 
 
 
 
93
  config = MODEL_CONFIGS[model_type]
94
 
95
  if seed == -1:
requirements.txt CHANGED
@@ -1,10 +1,10 @@
1
- torch>=2.5.1
2
  torchvision>=0.20.1
3
  diffusers>=0.32.1
4
  transformers>=4.47.1
5
  accelerate>=1.6.0
6
  xformers
7
- https://github.com/Dao-AILab/flash-attention/releases/download/v2.7.4.post1/flash_attn-2.7.4.post1+cu12torch2.4cxx11abiTRUE-cp310-cp310-linux_x86_64.whl
8
  einops>=0.7.0
9
  gradio>=5.23.3
10
  spaces>=0.34.1
 
1
+ torch==2.6.0
2
  torchvision>=0.20.1
3
  diffusers>=0.32.1
4
  transformers>=4.47.1
5
  accelerate>=1.6.0
6
  xformers
7
+ https://github.com/Dao-AILab/flash-attention/releases/download/v2.7.4.post1/flash_attn-2.7.4.post1+cu12torch2.6cxx11abiTRUE-cp310-cp310-linux_x86_64.whl
8
  einops>=0.7.0
9
  gradio>=5.23.3
10
  spaces>=0.34.1