Update app.py
Browse files
app.py
CHANGED
|
@@ -10,7 +10,7 @@ pipe = OmniGenPipeline.from_pretrained(
|
|
| 10 |
"Shitao/OmniGen-v1"
|
| 11 |
)
|
| 12 |
|
| 13 |
-
@spaces.GPU(duration=
|
| 14 |
def generate_image(text, img1, img2, img3, height, width, guidance_scale, img_guidance_scale, inference_steps, seed, separate_cfg_infer, offload_model,
|
| 15 |
use_input_image_size_as_output, max_input_image_size, randomize_seed):
|
| 16 |
input_images = [img1, img2, img3]
|
|
@@ -57,6 +57,7 @@ def get_example():
|
|
| 57 |
0,
|
| 58 |
1024,
|
| 59 |
False,
|
|
|
|
| 60 |
],
|
| 61 |
[
|
| 62 |
"The woman in <img><|image_1|></img> waves her hand happily in the crowd",
|
|
@@ -70,6 +71,7 @@ def get_example():
|
|
| 70 |
128,
|
| 71 |
1024,
|
| 72 |
False,
|
|
|
|
| 73 |
],
|
| 74 |
[
|
| 75 |
"A man in a black shirt is reading a book. The man is the right man in <img><|image_1|></img>.",
|
|
@@ -83,9 +85,10 @@ def get_example():
|
|
| 83 |
0,
|
| 84 |
1024,
|
| 85 |
False,
|
|
|
|
| 86 |
],
|
| 87 |
[
|
| 88 |
-
"Two woman are raising fried chicken legs in a bar. A woman is <img><|image_1|></img>.
|
| 89 |
"./imgs/test_cases/mckenna.jpg",
|
| 90 |
"./imgs/test_cases/Amanda.jpg",
|
| 91 |
None,
|
|
@@ -93,9 +96,10 @@ def get_example():
|
|
| 93 |
1024,
|
| 94 |
2.5,
|
| 95 |
1.8,
|
| 96 |
-
|
| 97 |
1024,
|
| 98 |
False,
|
|
|
|
| 99 |
],
|
| 100 |
[
|
| 101 |
"A man and a short-haired woman with a wrinkled face are standing in front of a bookshelf in a library. The man is the man in the middle of <img><|image_1|></img>, and the woman is oldest woman in <img><|image_2|></img>",
|
|
@@ -109,6 +113,7 @@ def get_example():
|
|
| 109 |
60,
|
| 110 |
1024,
|
| 111 |
False,
|
|
|
|
| 112 |
],
|
| 113 |
[
|
| 114 |
"A man and a woman are sitting at a classroom desk. The man is the man with yellow hair in <img><|image_1|></img>. The woman is the woman on the left of <img><|image_2|></img>",
|
|
@@ -122,9 +127,10 @@ def get_example():
|
|
| 122 |
66,
|
| 123 |
1024,
|
| 124 |
False,
|
|
|
|
| 125 |
],
|
| 126 |
[
|
| 127 |
-
"The flower <img><|image_1
|
| 128 |
"./imgs/test_cases/rose.jpg",
|
| 129 |
"./imgs/test_cases/vase.jpg",
|
| 130 |
None,
|
|
@@ -135,6 +141,7 @@ def get_example():
|
|
| 135 |
0,
|
| 136 |
1024,
|
| 137 |
False,
|
|
|
|
| 138 |
],
|
| 139 |
[
|
| 140 |
"<img><|image_1|><img>\n Remove the woman's earrings. Replace the mug with a clear glass filled with sparkling iced cola.",
|
|
@@ -148,71 +155,77 @@ def get_example():
|
|
| 148 |
222,
|
| 149 |
1024,
|
| 150 |
False,
|
|
|
|
| 151 |
],
|
| 152 |
[
|
| 153 |
"Detect the skeleton of human in this image: <img><|image_1|></img>.",
|
| 154 |
"./imgs/test_cases/control.jpg",
|
| 155 |
None,
|
| 156 |
None,
|
| 157 |
-
|
| 158 |
-
|
| 159 |
2.0,
|
| 160 |
1.6,
|
| 161 |
0,
|
| 162 |
1024,
|
| 163 |
False,
|
|
|
|
| 164 |
],
|
| 165 |
[
|
| 166 |
"Generate a new photo using the following picture and text as conditions: <img><|image_1|><img>\n A young boy is sitting on a sofa in the library, holding a book. His hair is neatly combed, and a faint smile plays on his lips, with a few freckles scattered across his cheeks. The library is quiet, with rows of shelves filled with books stretching out behind him.",
|
| 167 |
"./imgs/demo_cases/skeletal.png",
|
| 168 |
None,
|
| 169 |
None,
|
| 170 |
-
|
| 171 |
-
|
| 172 |
2,
|
| 173 |
1.6,
|
| 174 |
-
|
| 175 |
1024,
|
| 176 |
False,
|
|
|
|
| 177 |
],
|
| 178 |
[
|
| 179 |
"Following the pose of this image <img><|image_1|><img>, generate a new photo: A young boy is sitting on a sofa in the library, holding a book. His hair is neatly combed, and a faint smile plays on his lips, with a few freckles scattered across his cheeks. The library is quiet, with rows of shelves filled with books stretching out behind him.",
|
| 180 |
"./imgs/demo_cases/edit.png",
|
| 181 |
None,
|
| 182 |
None,
|
| 183 |
-
|
| 184 |
-
|
| 185 |
2.0,
|
| 186 |
1.6,
|
| 187 |
123,
|
| 188 |
1024,
|
| 189 |
False,
|
|
|
|
| 190 |
],
|
| 191 |
[
|
| 192 |
"Following the depth mapping of this image <img><|image_1|><img>, generate a new photo: A young girl is sitting on a sofa in the library, holding a book. His hair is neatly combed, and a faint smile plays on his lips, with a few freckles scattered across his cheeks. The library is quiet, with rows of shelves filled with books stretching out behind him.",
|
| 193 |
"./imgs/demo_cases/edit.png",
|
| 194 |
None,
|
| 195 |
None,
|
| 196 |
-
|
| 197 |
-
|
| 198 |
2.0,
|
| 199 |
1.6,
|
| 200 |
1,
|
| 201 |
1024,
|
| 202 |
False,
|
|
|
|
| 203 |
],
|
| 204 |
[
|
| 205 |
-
"<img><|image_1|><\/img> What item can be used to see the current time? Please
|
| 206 |
"./imgs/test_cases/watch.jpg",
|
| 207 |
None,
|
| 208 |
None,
|
| 209 |
-
|
| 210 |
-
|
| 211 |
2.5,
|
| 212 |
1.6,
|
| 213 |
-
|
| 214 |
1024,
|
| 215 |
False,
|
|
|
|
| 216 |
],
|
| 217 |
[
|
| 218 |
"According to the following examples, generate an output for the input.\nInput: <img><|image_1|></img>\nOutput: <img><|image_2|></img>\n\nInput: <img><|image_3|></img>\nOutput: ",
|
|
@@ -226,16 +239,16 @@ def get_example():
|
|
| 226 |
1,
|
| 227 |
768,
|
| 228 |
False,
|
|
|
|
| 229 |
],
|
| 230 |
]
|
| 231 |
return case
|
| 232 |
|
| 233 |
-
def run_for_examples(text, img1, img2, img3, height, width, guidance_scale, img_guidance_scale, seed, max_input_image_size, randomize_seed):
|
| 234 |
# ๅจๅฝๆฐๅ
้จ่ฎพ็ฝฎ้ป่ฎคๅผ
|
| 235 |
inference_steps = 50
|
| 236 |
separate_cfg_infer = True
|
| 237 |
offload_model = False
|
| 238 |
-
use_input_image_size_as_output = False
|
| 239 |
|
| 240 |
return generate_image(
|
| 241 |
text, img1, img2, img3, height, width, guidance_scale, img_guidance_scale,
|
|
@@ -248,7 +261,6 @@ OmniGen is a unified image generation model that you can use to perform various
|
|
| 248 |
For multi-modal to image generation, you should pass a string as `prompt`, and a list of image paths as `input_images`. The placeholder in the prompt should be in the format of `<img><|image_*|></img>` (for the first image, the placeholder is <img><|image_1|></img>. for the second image, the the placeholder is <img><|image_2|></img>).
|
| 249 |
For example, use an image of a woman to generate a new image:
|
| 250 |
prompt = "A woman holds a bouquet of flowers and faces the camera. Thw woman is \<img\>\<|image_1|\>\</img\>."
|
| 251 |
-
|
| 252 |
Tips:
|
| 253 |
- For image editing task and controlnet task, we recommend setting the height and width of output image as the same as input image. For example, if you want to edit a 512x512 image, you should set the height and width of output image as 512x512. You also can set the `use_input_image_size_as_output` to automatically set the height and width of output image as the same as input image.
|
| 254 |
- For out-of-memory or time cost, you can set `offload_model=True` or refer to [./docs/inference.md#requiremented-resources](https://github.com/VectorSpaceLab/OmniGen/blob/main/docs/inference.md#requiremented-resources) to select a appropriate setting.
|
|
@@ -258,10 +270,7 @@ Tips:
|
|
| 258 |
- Animate Style: If the generated images are in animate style, you can try to add `photo` to the prompt`.
|
| 259 |
- Edit generated image. If you generate an image by omnigen and then want to edit it, you cannot use the same seed to edit this image. For example, use seed=0 to generate image, and should use seed=1 to edit this image.
|
| 260 |
- For image editing tasks, we recommend placing the image before the editing instruction. For example, use `<img><|image_1|></img> remove suit`, rather than `remove suit <img><|image_1|></img>`.
|
| 261 |
-
|
| 262 |
-
|
| 263 |
**HF Spaces often encounter errors due to quota limitations, so recommend to run it locally.**
|
| 264 |
-
|
| 265 |
"""
|
| 266 |
|
| 267 |
article = """
|
|
@@ -385,6 +394,7 @@ with gr.Blocks() as demo:
|
|
| 385 |
seed_input,
|
| 386 |
max_input_image_size,
|
| 387 |
randomize_seed,
|
|
|
|
| 388 |
],
|
| 389 |
outputs=output_image,
|
| 390 |
)
|
|
@@ -393,3 +403,4 @@ with gr.Blocks() as demo:
|
|
| 393 |
|
| 394 |
# launch
|
| 395 |
demo.launch()
|
|
|
|
|
|
| 10 |
"Shitao/OmniGen-v1"
|
| 11 |
)
|
| 12 |
|
| 13 |
+
@spaces.GPU(duration=180)
|
| 14 |
def generate_image(text, img1, img2, img3, height, width, guidance_scale, img_guidance_scale, inference_steps, seed, separate_cfg_infer, offload_model,
|
| 15 |
use_input_image_size_as_output, max_input_image_size, randomize_seed):
|
| 16 |
input_images = [img1, img2, img3]
|
|
|
|
| 57 |
0,
|
| 58 |
1024,
|
| 59 |
False,
|
| 60 |
+
False,
|
| 61 |
],
|
| 62 |
[
|
| 63 |
"The woman in <img><|image_1|></img> waves her hand happily in the crowd",
|
|
|
|
| 71 |
128,
|
| 72 |
1024,
|
| 73 |
False,
|
| 74 |
+
False,
|
| 75 |
],
|
| 76 |
[
|
| 77 |
"A man in a black shirt is reading a book. The man is the right man in <img><|image_1|></img>.",
|
|
|
|
| 85 |
0,
|
| 86 |
1024,
|
| 87 |
False,
|
| 88 |
+
False,
|
| 89 |
],
|
| 90 |
[
|
| 91 |
+
"Two woman are raising fried chicken legs in a bar. A woman is <img><|image_1|></img>. Another woman is <img><|image_2|></img>.",
|
| 92 |
"./imgs/test_cases/mckenna.jpg",
|
| 93 |
"./imgs/test_cases/Amanda.jpg",
|
| 94 |
None,
|
|
|
|
| 96 |
1024,
|
| 97 |
2.5,
|
| 98 |
1.8,
|
| 99 |
+
65,
|
| 100 |
1024,
|
| 101 |
False,
|
| 102 |
+
False,
|
| 103 |
],
|
| 104 |
[
|
| 105 |
"A man and a short-haired woman with a wrinkled face are standing in front of a bookshelf in a library. The man is the man in the middle of <img><|image_1|></img>, and the woman is oldest woman in <img><|image_2|></img>",
|
|
|
|
| 113 |
60,
|
| 114 |
1024,
|
| 115 |
False,
|
| 116 |
+
False,
|
| 117 |
],
|
| 118 |
[
|
| 119 |
"A man and a woman are sitting at a classroom desk. The man is the man with yellow hair in <img><|image_1|></img>. The woman is the woman on the left of <img><|image_2|></img>",
|
|
|
|
| 127 |
66,
|
| 128 |
1024,
|
| 129 |
False,
|
| 130 |
+
False,
|
| 131 |
],
|
| 132 |
[
|
| 133 |
+
"The flower <img><|image_1|></img> is placed in the vase which is in the middle of <img><|image_2|></img> on a wooden table of a living room",
|
| 134 |
"./imgs/test_cases/rose.jpg",
|
| 135 |
"./imgs/test_cases/vase.jpg",
|
| 136 |
None,
|
|
|
|
| 141 |
0,
|
| 142 |
1024,
|
| 143 |
False,
|
| 144 |
+
False,
|
| 145 |
],
|
| 146 |
[
|
| 147 |
"<img><|image_1|><img>\n Remove the woman's earrings. Replace the mug with a clear glass filled with sparkling iced cola.",
|
|
|
|
| 155 |
222,
|
| 156 |
1024,
|
| 157 |
False,
|
| 158 |
+
True,
|
| 159 |
],
|
| 160 |
[
|
| 161 |
"Detect the skeleton of human in this image: <img><|image_1|></img>.",
|
| 162 |
"./imgs/test_cases/control.jpg",
|
| 163 |
None,
|
| 164 |
None,
|
| 165 |
+
1024,
|
| 166 |
+
1024,
|
| 167 |
2.0,
|
| 168 |
1.6,
|
| 169 |
0,
|
| 170 |
1024,
|
| 171 |
False,
|
| 172 |
+
True,
|
| 173 |
],
|
| 174 |
[
|
| 175 |
"Generate a new photo using the following picture and text as conditions: <img><|image_1|><img>\n A young boy is sitting on a sofa in the library, holding a book. His hair is neatly combed, and a faint smile plays on his lips, with a few freckles scattered across his cheeks. The library is quiet, with rows of shelves filled with books stretching out behind him.",
|
| 176 |
"./imgs/demo_cases/skeletal.png",
|
| 177 |
None,
|
| 178 |
None,
|
| 179 |
+
1024,
|
| 180 |
+
1024,
|
| 181 |
2,
|
| 182 |
1.6,
|
| 183 |
+
999,
|
| 184 |
1024,
|
| 185 |
False,
|
| 186 |
+
True,
|
| 187 |
],
|
| 188 |
[
|
| 189 |
"Following the pose of this image <img><|image_1|><img>, generate a new photo: A young boy is sitting on a sofa in the library, holding a book. His hair is neatly combed, and a faint smile plays on his lips, with a few freckles scattered across his cheeks. The library is quiet, with rows of shelves filled with books stretching out behind him.",
|
| 190 |
"./imgs/demo_cases/edit.png",
|
| 191 |
None,
|
| 192 |
None,
|
| 193 |
+
1024,
|
| 194 |
+
1024,
|
| 195 |
2.0,
|
| 196 |
1.6,
|
| 197 |
123,
|
| 198 |
1024,
|
| 199 |
False,
|
| 200 |
+
True,
|
| 201 |
],
|
| 202 |
[
|
| 203 |
"Following the depth mapping of this image <img><|image_1|><img>, generate a new photo: A young girl is sitting on a sofa in the library, holding a book. His hair is neatly combed, and a faint smile plays on his lips, with a few freckles scattered across his cheeks. The library is quiet, with rows of shelves filled with books stretching out behind him.",
|
| 204 |
"./imgs/demo_cases/edit.png",
|
| 205 |
None,
|
| 206 |
None,
|
| 207 |
+
1024,
|
| 208 |
+
1024,
|
| 209 |
2.0,
|
| 210 |
1.6,
|
| 211 |
1,
|
| 212 |
1024,
|
| 213 |
False,
|
| 214 |
+
True,
|
| 215 |
],
|
| 216 |
[
|
| 217 |
+
"<img><|image_1|><\/img> What item can be used to see the current time? Please highlight it in blue.",
|
| 218 |
"./imgs/test_cases/watch.jpg",
|
| 219 |
None,
|
| 220 |
None,
|
| 221 |
+
1024,
|
| 222 |
+
1024,
|
| 223 |
2.5,
|
| 224 |
1.6,
|
| 225 |
+
666,
|
| 226 |
1024,
|
| 227 |
False,
|
| 228 |
+
True,
|
| 229 |
],
|
| 230 |
[
|
| 231 |
"According to the following examples, generate an output for the input.\nInput: <img><|image_1|></img>\nOutput: <img><|image_2|></img>\n\nInput: <img><|image_3|></img>\nOutput: ",
|
|
|
|
| 239 |
1,
|
| 240 |
768,
|
| 241 |
False,
|
| 242 |
+
False,
|
| 243 |
],
|
| 244 |
]
|
| 245 |
return case
|
| 246 |
|
| 247 |
+
def run_for_examples(text, img1, img2, img3, height, width, guidance_scale, img_guidance_scale, seed, max_input_image_size, randomize_seed, use_input_image_size_as_output):
|
| 248 |
# ๅจๅฝๆฐๅ
้จ่ฎพ็ฝฎ้ป่ฎคๅผ
|
| 249 |
inference_steps = 50
|
| 250 |
separate_cfg_infer = True
|
| 251 |
offload_model = False
|
|
|
|
| 252 |
|
| 253 |
return generate_image(
|
| 254 |
text, img1, img2, img3, height, width, guidance_scale, img_guidance_scale,
|
|
|
|
| 261 |
For multi-modal to image generation, you should pass a string as `prompt`, and a list of image paths as `input_images`. The placeholder in the prompt should be in the format of `<img><|image_*|></img>` (for the first image, the placeholder is <img><|image_1|></img>. for the second image, the the placeholder is <img><|image_2|></img>).
|
| 262 |
For example, use an image of a woman to generate a new image:
|
| 263 |
prompt = "A woman holds a bouquet of flowers and faces the camera. Thw woman is \<img\>\<|image_1|\>\</img\>."
|
|
|
|
| 264 |
Tips:
|
| 265 |
- For image editing task and controlnet task, we recommend setting the height and width of output image as the same as input image. For example, if you want to edit a 512x512 image, you should set the height and width of output image as 512x512. You also can set the `use_input_image_size_as_output` to automatically set the height and width of output image as the same as input image.
|
| 266 |
- For out-of-memory or time cost, you can set `offload_model=True` or refer to [./docs/inference.md#requiremented-resources](https://github.com/VectorSpaceLab/OmniGen/blob/main/docs/inference.md#requiremented-resources) to select a appropriate setting.
|
|
|
|
| 270 |
- Animate Style: If the generated images are in animate style, you can try to add `photo` to the prompt`.
|
| 271 |
- Edit generated image. If you generate an image by omnigen and then want to edit it, you cannot use the same seed to edit this image. For example, use seed=0 to generate image, and should use seed=1 to edit this image.
|
| 272 |
- For image editing tasks, we recommend placing the image before the editing instruction. For example, use `<img><|image_1|></img> remove suit`, rather than `remove suit <img><|image_1|></img>`.
|
|
|
|
|
|
|
| 273 |
**HF Spaces often encounter errors due to quota limitations, so recommend to run it locally.**
|
|
|
|
| 274 |
"""
|
| 275 |
|
| 276 |
article = """
|
|
|
|
| 394 |
seed_input,
|
| 395 |
max_input_image_size,
|
| 396 |
randomize_seed,
|
| 397 |
+
use_input_image_size_as_output,
|
| 398 |
],
|
| 399 |
outputs=output_image,
|
| 400 |
)
|
|
|
|
| 403 |
|
| 404 |
# launch
|
| 405 |
demo.launch()
|
| 406 |
+
|