Spaces:
Runtime error
Runtime error
remove blip captioning and more straight forward
Browse files
app.py
CHANGED
@@ -48,51 +48,30 @@ pipe_edit = StableDiffusionXLInstructPix2PixPipeline.from_single_file( edit_file
|
|
48 |
pipe_edit.scheduler = EDMEulerScheduler(sigma_min=0.002, sigma_max=120.0, sigma_data=1.0, prediction_type="v_prediction")
|
49 |
pipe_edit.to("cuda")
|
50 |
|
51 |
-
from transformers import BlipProcessor, BlipForConditionalGeneration
|
52 |
-
|
53 |
-
processor = BlipProcessor.from_pretrained("unography/blip-long-cap")
|
54 |
-
model = BlipForConditionalGeneration.from_pretrained("unography/blip-long-cap", torch_dtype=torch.float16).to("cuda")
|
55 |
-
|
56 |
# Generator
|
57 |
@spaces.GPU(duration=30, queue=False)
|
58 |
def king(type ,
|
59 |
input_image ,
|
60 |
instruction: str ,
|
61 |
-
steps: int =
|
62 |
randomize_seed: bool = False,
|
63 |
seed: int = 25,
|
64 |
-
text_cfg_scale: float = 7.3,
|
65 |
-
image_cfg_scale: float = 1.7,
|
66 |
width: int = 1024,
|
67 |
height: int = 1024,
|
68 |
-
guidance_scale: float =
|
69 |
use_resolution_binning: bool = True,
|
70 |
progress=gr.Progress(track_tqdm=True),
|
71 |
):
|
72 |
if type=="Image Editing" :
|
73 |
raw_image = Image.open(input_image).convert('RGB')
|
74 |
-
inputs = processor(raw_image, return_tensors="pt").to("cuda", torch.float16)
|
75 |
-
out = model.generate(**inputs, min_length=10, max_length=20)
|
76 |
-
caption = processor.decode(out[0], skip_special_tokens=True)
|
77 |
-
client1 = InferenceClient("mistralai/Mixtral-8x7B-Instruct-v0.1")
|
78 |
-
system_instructions1 = "<s>[SYSTEM] Your task is to modify prompt by USER with edit text, and create new prompt for image generation, reply with prompt only, Your task is to reply with final prompt only. [USER]"
|
79 |
-
formatted_prompt = f"{system_instructions1} {caption} [EDIT] {instruction} [FINAL_PROMPT]"
|
80 |
-
stream = client1.text_generation(formatted_prompt, max_new_tokens=50, stream=True, details=True, return_full_text=False)
|
81 |
-
instructions = "".join([response.token.text for response in stream if response.token.text != "</s>"])
|
82 |
-
print(instructions)
|
83 |
if randomize_seed:
|
84 |
seed = random.randint(0, 99999)
|
85 |
-
text_cfg_scale = text_cfg_scale
|
86 |
-
image_cfg_scale = image_cfg_scale
|
87 |
-
input_image = input_image
|
88 |
-
steps=steps
|
89 |
generator = torch.manual_seed(seed)
|
90 |
output_image = pipe_edit(
|
91 |
instructions, image=raw_image,
|
92 |
-
guidance_scale=
|
93 |
num_inference_steps=steps, generator=generator, output_type="latent",
|
94 |
).images
|
95 |
-
|
96 |
refine = refiner(
|
97 |
prompt=instructions,
|
98 |
guidance_scale=guidance_scale,
|
@@ -193,6 +172,7 @@ with gr.Blocks(css=css) as demo:
|
|
193 |
with gr.Row():
|
194 |
with gr.Column(scale=1):
|
195 |
type = gr.Dropdown(["Image Generation","Image Editing"], label="Task", value="Image Generation",interactive=True)
|
|
|
196 |
with gr.Column(scale=1):
|
197 |
generate_button = gr.Button("Generate")
|
198 |
|
@@ -200,10 +180,14 @@ with gr.Blocks(css=css) as demo:
|
|
200 |
input_image = gr.Image(label="Image", type='filepath', interactive=True)
|
201 |
|
202 |
with gr.Row():
|
203 |
-
text_cfg_scale = gr.Number(value=7.3, step=0.1, label="Text CFG", interactive=True)
|
204 |
-
image_cfg_scale = gr.Number(value=1.7, step=0.1,label="Image CFG", interactive=True)
|
205 |
guidance_scale = gr.Number(value=6.0, step=0.1, label="Image Generation Guidance Scale", interactive=True)
|
206 |
steps = gr.Number(value=25, step=1, label="Steps", interactive=True)
|
|
|
|
|
|
|
|
|
|
|
|
|
207 |
randomize_seed = gr.Radio(
|
208 |
["Fix Seed", "Randomize Seed"],
|
209 |
value="Randomize Seed",
|
@@ -213,9 +197,7 @@ with gr.Blocks(css=css) as demo:
|
|
213 |
)
|
214 |
seed = gr.Number(value=1371, step=1, label="Seed", interactive=True)
|
215 |
|
216 |
-
|
217 |
-
width = gr.Slider( label="Width", minimum=256, maximum=2048, step=64, value=1024)
|
218 |
-
height = gr.Slider( label="Height", minimum=256, maximum=2048, step=64, value=1024)
|
219 |
|
220 |
gr.Examples(
|
221 |
examples=examples,
|
@@ -225,7 +207,7 @@ with gr.Blocks(css=css) as demo:
|
|
225 |
cache_examples=False,
|
226 |
)
|
227 |
|
228 |
-
gr.Markdown(help_text)
|
229 |
|
230 |
instruction.change(fn=response, inputs=[instruction,input_image], outputs=type, queue=False)
|
231 |
|
@@ -242,8 +224,6 @@ with gr.Blocks(css=css) as demo:
|
|
242 |
steps,
|
243 |
randomize_seed,
|
244 |
seed,
|
245 |
-
text_cfg_scale,
|
246 |
-
image_cfg_scale,
|
247 |
width,
|
248 |
height,
|
249 |
guidance_scale,
|
|
|
48 |
pipe_edit.scheduler = EDMEulerScheduler(sigma_min=0.002, sigma_max=120.0, sigma_data=1.0, prediction_type="v_prediction")
|
49 |
pipe_edit.to("cuda")
|
50 |
|
|
|
|
|
|
|
|
|
|
|
51 |
# Generator
|
52 |
@spaces.GPU(duration=30, queue=False)
|
53 |
def king(type ,
|
54 |
input_image ,
|
55 |
instruction: str ,
|
56 |
+
steps: int = 25,
|
57 |
randomize_seed: bool = False,
|
58 |
seed: int = 25,
|
|
|
|
|
59 |
width: int = 1024,
|
60 |
height: int = 1024,
|
61 |
+
guidance_scale: float = 7,
|
62 |
use_resolution_binning: bool = True,
|
63 |
progress=gr.Progress(track_tqdm=True),
|
64 |
):
|
65 |
if type=="Image Editing" :
|
66 |
raw_image = Image.open(input_image).convert('RGB')
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
67 |
if randomize_seed:
|
68 |
seed = random.randint(0, 99999)
|
|
|
|
|
|
|
|
|
69 |
generator = torch.manual_seed(seed)
|
70 |
output_image = pipe_edit(
|
71 |
instructions, image=raw_image,
|
72 |
+
guidance_scale=guidance_scale,
|
73 |
num_inference_steps=steps, generator=generator, output_type="latent",
|
74 |
).images
|
|
|
75 |
refine = refiner(
|
76 |
prompt=instructions,
|
77 |
guidance_scale=guidance_scale,
|
|
|
172 |
with gr.Row():
|
173 |
with gr.Column(scale=1):
|
174 |
type = gr.Dropdown(["Image Generation","Image Editing"], label="Task", value="Image Generation",interactive=True)
|
175 |
+
enhance_prompt = gr.Chackbox()
|
176 |
with gr.Column(scale=1):
|
177 |
generate_button = gr.Button("Generate")
|
178 |
|
|
|
180 |
input_image = gr.Image(label="Image", type='filepath', interactive=True)
|
181 |
|
182 |
with gr.Row():
|
|
|
|
|
183 |
guidance_scale = gr.Number(value=6.0, step=0.1, label="Image Generation Guidance Scale", interactive=True)
|
184 |
steps = gr.Number(value=25, step=1, label="Steps", interactive=True)
|
185 |
+
|
186 |
+
with gr.Row():
|
187 |
+
width = gr.Slider( label="Width", minimum=256, maximum=2048, step=64, value=1024)
|
188 |
+
height = gr.Slider( label="Height", minimum=256, maximum=2048, step=64, value=1024)
|
189 |
+
|
190 |
+
with gr.Row()
|
191 |
randomize_seed = gr.Radio(
|
192 |
["Fix Seed", "Randomize Seed"],
|
193 |
value="Randomize Seed",
|
|
|
197 |
)
|
198 |
seed = gr.Number(value=1371, step=1, label="Seed", interactive=True)
|
199 |
|
200 |
+
|
|
|
|
|
201 |
|
202 |
gr.Examples(
|
203 |
examples=examples,
|
|
|
207 |
cache_examples=False,
|
208 |
)
|
209 |
|
210 |
+
# gr.Markdown(help_text)
|
211 |
|
212 |
instruction.change(fn=response, inputs=[instruction,input_image], outputs=type, queue=False)
|
213 |
|
|
|
224 |
steps,
|
225 |
randomize_seed,
|
226 |
seed,
|
|
|
|
|
227 |
width,
|
228 |
height,
|
229 |
guidance_scale,
|