multimodalart HF staff commited on
Commit
666a605
·
1 Parent(s): c24dac7

Final features

Browse files
Files changed (1) hide show
  1. app.py +119 -82
app.py CHANGED
@@ -6,14 +6,14 @@ import shutil
6
  from train_dreambooth import run_training
7
  from convertosd import convert
8
  from PIL import Image
 
 
9
  import torch
10
-
11
  css = '''
12
  .instruction{position: absolute; top: 0;right: 0;margin-top: 0px !important}
13
  .arrow{position: absolute;top: 0;right: -8px;margin-top: -8px !important}
14
  #component-4, #component-3, #component-10{min-height: 0}
15
  '''
16
- shutil.unpack_archive("mix.zip", "mix")
17
  model_to_load = "multimodalart/sd-fine-tunable"
18
  maximum_concepts = 3
19
 
@@ -34,12 +34,13 @@ def swap_text(option):
34
 
35
  def count_files(*inputs):
36
  file_counter = 0
 
37
  for i, input in enumerate(inputs):
38
  if(i < maximum_concepts-1):
39
- if(input):
40
- files = inputs[i+(maximum_concepts*2)]
41
- for j, tile_temp in enumerate(files):
42
- file_counter+= 1
43
  uses_custom = inputs[-1]
44
  type_of_thing = inputs[-4]
45
  if(uses_custom):
@@ -49,9 +50,13 @@ def count_files(*inputs):
49
  Training_Steps = file_counter*200*2
50
  else:
51
  Training_Steps = file_counter*200
52
- return(gr.update(visible=True, value=f"You are going to train {file_counter} files for {Training_Steps} steps. This should take around {round(Training_Steps/1.5, 2)} seconds, or {round((Training_Steps/1.5)/3600, 2)}. The T4 GPU costs US$0.60 for 1h, so the estimated costs for this training run should be {round(((Training_Steps/1.5)/3600)*0.6, 2)}"))
53
- def train(*inputs):
54
 
 
 
 
 
 
55
  if os.path.exists("diffusers_model.zip"): os.remove("diffusers_model.zip")
56
  if os.path.exists("model.ckpt"): os.remove("model.ckpt")
57
  file_counter = 0
@@ -61,6 +66,8 @@ def train(*inputs):
61
  os.makedirs('instance_images',exist_ok=True)
62
  files = inputs[i+(maximum_concepts*2)]
63
  prompt = inputs[i+maximum_concepts]
 
 
64
  for j, file_temp in enumerate(files):
65
  file = Image.open(file_temp.name)
66
  width, height = file.size
@@ -84,64 +91,16 @@ def train(*inputs):
84
  Train_text_encoder_for = int(inputs[-2])
85
  else:
86
  Training_Steps = file_counter*200
87
- if(type_of_thing == "person"):
88
- class_data_dir = "mix"
89
- Train_text_encoder_for=100
90
- args_txt_encoder = argparse.Namespace(
91
- image_captions_filename = True,
92
- train_text_encoder = True,
93
- pretrained_model_name_or_path=model_to_load,
94
- instance_data_dir="instance_images",
95
- class_data_dir=class_data_dir,
96
- output_dir="output_model",
97
- with_prior_preservation=True,
98
- prior_loss_weight=1.0,
99
- instance_prompt="",
100
- seed=42,
101
- resolution=512,
102
- mixed_precision="fp16",
103
- train_batch_size=1,
104
- gradient_accumulation_steps=1,
105
- gradient_checkpointing=True,
106
- use_8bit_adam=True,
107
- learning_rate=2e-6,
108
- lr_scheduler="polynomial",
109
- lr_warmup_steps=0,
110
- max_train_steps=Training_Steps,
111
- num_class_images=200
112
- )
113
- args_unet = argparse.Namespace(
114
- image_captions_filename = True,
115
- train_only_unet=True,
116
- Session_dir="output_model",
117
- save_starting_step=0,
118
- save_n_steps=0,
119
- pretrained_model_name_or_path=model_to_load,
120
- instance_data_dir="instance_images",
121
- output_dir="output_model",
122
- instance_prompt="",
123
- seed=42,
124
- resolution=512,
125
- mixed_precision="fp16",
126
- train_batch_size=1,
127
- gradient_accumulation_steps=1,
128
- gradient_checkpointing=False,
129
- use_8bit_adam=True,
130
- learning_rate=2e-6,
131
- lr_scheduler="polynomial",
132
- lr_warmup_steps=0,
133
- max_train_steps=Training_Steps
134
- )
135
- run_training(args_txt_encoder)
136
- run_training(args_unet)
137
- elif(type_of_thing == "object" or type_of_thing == "style"):
138
- if(type_of_thing == "object"):
139
- Train_text_encoder_for=30
140
- elif(type_of_thing == "style"):
141
- Train_text_encoder_for=15
142
- class_data_dir = None
143
- stptxt = int((Training_Steps*Train_text_encoder_for)/100)
144
- args_general = argparse.Namespace(
145
  image_captions_filename = True,
146
  train_text_encoder = True,
147
  stop_text_encoder_training = stptxt,
@@ -161,11 +120,11 @@ def train(*inputs):
161
  lr_scheduler="polynomial",
162
  lr_warmup_steps = 0,
163
  max_train_steps=Training_Steps,
164
- )
165
- run_training(args_general)
166
  torch.cuda.empty_cache()
167
  #convert("output_model", "model.ckpt")
168
- shutil.rmtree('instance_images')
169
  shutil.make_archive("diffusers_model", 'zip', "output_model")
170
  torch.cuda.empty_cache()
171
  return [gr.update(visible=True, value=["diffusers_model.zip"]), gr.update(visible=True), gr.update(visible=True), gr.update(visible=True)]
@@ -178,8 +137,80 @@ def generate(prompt):
178
  image = pipe(prompt).images[0]
179
  return(image)
180
 
181
- def push(path):
182
- pass
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
183
 
184
  def convert_to_ckpt():
185
  convert("output_model", "model.ckpt")
@@ -200,7 +231,7 @@ with gr.Blocks(css=css) as demo:
200
  gr.HTML('''
201
  <div class="gr-prose" style="max-width: 80%">
202
  <h2>You have successfully cloned the Dreambooth Training Space</h2>
203
- <p><a href="#">Now you can attribute a T4 GPU to it</a> (by going to the Settings tab) and run the training below. The GPU will be automatically unassigned after training is over. So you will be billed by the minute between when you activate the GPU and when it finishes training.</p>
204
  </div>
205
  ''')
206
  gr.Markdown("# Dreambooth training")
@@ -258,32 +289,38 @@ with gr.Blocks(css=css) as demo:
258
 
259
 
260
 
261
- with gr.Accordion("Advanced Settings", open=False):
262
- swap_auto_calculated = gr.Checkbox(label="Use these advanced setting")
263
- gr.Markdown("If not checked, the number of steps and % of frozen encoder will be tuned automatically according to the amount of images you upload and whether you are training an `object`, `person` or `style`.")
264
  steps = gr.Number(label="How many steps", value=800)
265
  perc_txt_encoder = gr.Number(label="Percentage of the training steps the text-encoder should be trained as well", value=30)
266
 
267
  type_of_thing.change(fn=swap_text, inputs=[type_of_thing], outputs=[thing_description, thing_image_example, things_naming, perc_txt_encoder], queue=False)
268
  training_summary = gr.Textbox("", visible=False, label="Training Summary")
 
 
269
  for file in file_collection:
270
- file.change(fn=count_files, inputs=file_collection+[type_of_thing]+[steps]+[perc_txt_encoder]+[swap_auto_calculated], outputs=[training_summary, training_summary])
271
  train_btn = gr.Button("Start Training")
272
  with gr.Box(visible=False) as try_your_model:
273
- gr.Markdown("Try your model")
274
  with gr.Row():
275
  prompt = gr.Textbox(label="Type your prompt")
276
- result = gr.Image()
277
  generate_button = gr.Button("Generate Image")
278
  with gr.Box(visible=False) as push_to_hub:
279
- gr.Markdown("Push to Hugging Face Hub")
280
- model_repo_tag = gr.Textbox(label="Model name or URL", placeholder="username/model_name")
 
 
 
281
  push_button = gr.Button("Push to the Hub")
282
  result = gr.File(label="Download the uploaded models in the diffusers format", visible=True)
 
283
  convert_button = gr.Button("Convert to CKPT", visible=False)
284
 
285
  train_btn.click(fn=train, inputs=is_visible+concept_collection+file_collection+[type_of_thing]+[steps]+[perc_txt_encoder]+[swap_auto_calculated], outputs=[result, try_your_model, push_to_hub, convert_button])
286
- generate_button.click(fn=generate, inputs=prompt, outputs=result)
287
- push_button.click(fn=push, inputs=model_repo_tag, outputs=[])
288
  convert_button.click(fn=convert_to_ckpt, inputs=[], outputs=result)
289
  demo.launch()
 
6
  from train_dreambooth import run_training
7
  from convertosd import convert
8
  from PIL import Image
9
+ from slugify import slugify
10
+ import requests
11
  import torch
 
12
  css = '''
13
  .instruction{position: absolute; top: 0;right: 0;margin-top: 0px !important}
14
  .arrow{position: absolute;top: 0;right: -8px;margin-top: -8px !important}
15
  #component-4, #component-3, #component-10{min-height: 0}
16
  '''
 
17
  model_to_load = "multimodalart/sd-fine-tunable"
18
  maximum_concepts = 3
19
 
 
34
 
35
  def count_files(*inputs):
36
  file_counter = 0
37
+ concept_counter = 0
38
  for i, input in enumerate(inputs):
39
  if(i < maximum_concepts-1):
40
+ files = inputs[i]
41
+ if(files):
42
+ concept_counter+=1
43
+ file_counter+=len(files)
44
  uses_custom = inputs[-1]
45
  type_of_thing = inputs[-4]
46
  if(uses_custom):
 
50
  Training_Steps = file_counter*200*2
51
  else:
52
  Training_Steps = file_counter*200
53
+ return(gr.update(visible=True, value=f"You are going to train {concept_counter} {type_of_thing}(s), with {file_counter} images for {Training_Steps} steps. This should take around {round(Training_Steps/1.5, 2)} seconds, or {round((Training_Steps/1.5)/3600, 2)} hours. As a reminder, the T4 GPU costs US$0.60 for 1h. Once training is over, don't forget to swap the hardware back to CPU."))
 
54
 
55
+ def train(*inputs):
56
+ if "IS_SHARED_UI" in os.environ:
57
+ raise gr.Error("This Space only works in duplicated instances")
58
+ if os.path.exists("output_model"): shutil.rmtree('output_model')
59
+ if os.path.exists("instance_images"): shutil.rmtree('instance_images')
60
  if os.path.exists("diffusers_model.zip"): os.remove("diffusers_model.zip")
61
  if os.path.exists("model.ckpt"): os.remove("model.ckpt")
62
  file_counter = 0
 
66
  os.makedirs('instance_images',exist_ok=True)
67
  files = inputs[i+(maximum_concepts*2)]
68
  prompt = inputs[i+maximum_concepts]
69
+ if(prompt == "" or prompt == None):
70
+ raise gr.Error("You forgot to define your concept prompt")
71
  for j, file_temp in enumerate(files):
72
  file = Image.open(file_temp.name)
73
  width, height = file.size
 
91
  Train_text_encoder_for = int(inputs[-2])
92
  else:
93
  Training_Steps = file_counter*200
94
+ if(type_of_thing == "object"):
95
+ Train_text_encoder_for=30
96
+ elif(type_of_thing == "person"):
97
+ Train_text_encoder_for=60
98
+ elif(type_of_thing == "style"):
99
+ Train_text_encoder_for=15
100
+
101
+ class_data_dir = None
102
+ stptxt = int((Training_Steps*Train_text_encoder_for)/100)
103
+ args_general = argparse.Namespace(
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
104
  image_captions_filename = True,
105
  train_text_encoder = True,
106
  stop_text_encoder_training = stptxt,
 
120
  lr_scheduler="polynomial",
121
  lr_warmup_steps = 0,
122
  max_train_steps=Training_Steps,
123
+ )
124
+ run_training(args_general)
125
  torch.cuda.empty_cache()
126
  #convert("output_model", "model.ckpt")
127
+ #shutil.rmtree('instance_images')
128
  shutil.make_archive("diffusers_model", 'zip', "output_model")
129
  torch.cuda.empty_cache()
130
  return [gr.update(visible=True, value=["diffusers_model.zip"]), gr.update(visible=True), gr.update(visible=True), gr.update(visible=True)]
 
137
  image = pipe(prompt).images[0]
138
  return(image)
139
 
140
+ def push(model_name, where_to_upload, hf_token):
141
+ if(not os.path.exists("model.ckpt")):
142
+ convert("output_model", "model.ckpt")
143
+ from huggingface_hub import HfApi, HfFolder, CommitOperationAdd
144
+ from huggingface_hub import create_repo
145
+ model_name_slug = slugify(model_name)
146
+ if(where_to_upload == "My personal profile"):
147
+ api = HfApi()
148
+ your_username = api.whoami(token=hf_token)["name"]
149
+ model_id = f"{your_username}/{model_name_slug}"
150
+ else:
151
+ model_id = f"sd-dreambooth-library/{model_name_slug}"
152
+ headers = {"Authorization" : f"Bearer: {hf_token}", "Content-Type": "application/json"}
153
+ response = requests.post("https://example.com/get-my-account-detail", headers=headers)
154
+
155
+ images_upload = os.listdir("instance_images")
156
+ image_string = ""
157
+ instance_prompt_list = []
158
+ previous_instance_prompt = ''
159
+ for i, image in enumerate(images_upload):
160
+ instance_prompt = image.split("_")[0]
161
+ if(instance_prompt != previous_instance_prompt):
162
+ title_instance_prompt_string = instance_prompt
163
+ instance_prompt_list.append(instance_prompt)
164
+ else:
165
+ title_instance_prompt_string = ''
166
+ previous_instance_prompt = instance_prompt
167
+ image_string = f'''
168
+ {title_instance_prompt_string}
169
+ {image_string}![{instance_prompt} {i}](https://huggingface.co/{model_name_slug}/resolve/main/sample_images/{image})
170
+ '''
171
+ readme_text = f'''---
172
+ license: creativeml-openrail-m
173
+ tags:
174
+ - text-to-image
175
+ ---
176
+ ### {model_name} Dreambooth model trained by {api.whoami(token=hf_token)["name"]} with [Hugging Face Dreambooth Training Space](https://huggingface.co/spaces/multimodalart/dreambooth-training)
177
+
178
+ You run your new concept via `diffusers` [Colab Notebook for Inference](https://colab.research.google.com/github/huggingface/notebooks/blob/main/diffusers/sd_dreambooth_inference.ipynb)
179
+
180
+ Sample pictures of this concept:
181
+ {image_string}
182
+ '''
183
+ #Save the readme to a file
184
+ readme_file = open("README.md", "w")
185
+ readme_file.write(readme_text)
186
+ readme_file.close()
187
+ #Save the token identifier to a file
188
+ text_file = open("token_identifier.txt", "w")
189
+ text_file.write(', '.join(instance_prompt_list))
190
+ text_file.close()
191
+ operations = [
192
+ CommitOperationAdd(path_in_repo="token_identifier.txt", path_or_fileobj="token_identifier.txt"),
193
+ CommitOperationAdd(path_in_repo="README.md", path_or_fileobj="README.md"),
194
+ CommitOperationAdd(path_in_repo=f"model.ckpt",path_or_fileobj="model.ckpt")
195
+ ]
196
+ api.create_commit(
197
+ repo_id=model_id,
198
+ operations=operations,
199
+ commit_message=f"Upload the model {model_name}",
200
+ token=hf_token
201
+ )
202
+ api.upload_folder(
203
+ folder_path="output_model",
204
+ repo_id=model_id,
205
+ token=hf_token
206
+ )
207
+ api.upload_folder(
208
+ folder_path="instance_images",
209
+ path_in_repo="concept_images",
210
+ repo_id=model_id,
211
+ token=hf_token
212
+ )
213
+ return [gr.update(visible=True, value=f"Successfully uploaded your model. Access it [here](https://huggingface.co/{model_id})"), gr.update(visible=True, value=["diffusers_model.zip", "model.ckpt"])]
214
 
215
  def convert_to_ckpt():
216
  convert("output_model", "model.ckpt")
 
231
  gr.HTML('''
232
  <div class="gr-prose" style="max-width: 80%">
233
  <h2>You have successfully cloned the Dreambooth Training Space</h2>
234
+ <p>If you haven't already, attribute a T4 GPU to it (via the Settings tab) and run the training below. You will be billed by the minute between when you activate the GPU until when you turn it off.</p>
235
  </div>
236
  ''')
237
  gr.Markdown("# Dreambooth training")
 
289
 
290
 
291
 
292
+ with gr.Accordion("Custom Settings", open=False):
293
+ swap_auto_calculated = gr.Checkbox(label="Use custom settings")
294
+ gr.Markdown("If not checked, the number of steps and % of frozen encoder will be tuned automatically according to the amount of images you upload and whether you are training an `object`, `person` or `style` as follows: The number of steps is calculated by number of images uploaded multiplied by 20. The text-encoder is frozen after 10% of the steps for a style, 30% of the steps for an object and is fully trained for persons.")
295
  steps = gr.Number(label="How many steps", value=800)
296
  perc_txt_encoder = gr.Number(label="Percentage of the training steps the text-encoder should be trained as well", value=30)
297
 
298
  type_of_thing.change(fn=swap_text, inputs=[type_of_thing], outputs=[thing_description, thing_image_example, things_naming, perc_txt_encoder], queue=False)
299
  training_summary = gr.Textbox("", visible=False, label="Training Summary")
300
+ steps.change(fn=count_files, inputs=file_collection+[type_of_thing]+[steps]+[perc_txt_encoder]+[swap_auto_calculated], outputs=[training_summary])
301
+ perc_txt_encoder.change(fn=count_files, inputs=file_collection+[type_of_thing]+[steps]+[perc_txt_encoder]+[swap_auto_calculated], outputs=[training_summary])
302
  for file in file_collection:
303
+ file.change(fn=count_files, inputs=file_collection+[type_of_thing]+[steps]+[perc_txt_encoder]+[swap_auto_calculated], outputs=[training_summary])
304
  train_btn = gr.Button("Start Training")
305
  with gr.Box(visible=False) as try_your_model:
306
+ gr.Markdown("## Try your model")
307
  with gr.Row():
308
  prompt = gr.Textbox(label="Type your prompt")
309
+ result_image = gr.Image()
310
  generate_button = gr.Button("Generate Image")
311
  with gr.Box(visible=False) as push_to_hub:
312
+ gr.Markdown("## Push to Hugging Face Hub")
313
+ model_name = gr.Textbox(label="Name of your model", placeholder="Tarsila do Amaral Style")
314
+ where_to_upload = gr.Dropdown(["My personal profile", "Public Library"], label="Upload to")
315
+ gr.Markdown("[A Hugging Face write access token](https://huggingface.co/settings/tokens), go to \"New token\" -> Role : Write. A regular read token won't work here.")
316
+ hf_token = gr.Textbox(label="Hugging Face Write Token")
317
  push_button = gr.Button("Push to the Hub")
318
  result = gr.File(label="Download the uploaded models in the diffusers format", visible=True)
319
+ success_message_upload = gr.Markdown(visible=False)
320
  convert_button = gr.Button("Convert to CKPT", visible=False)
321
 
322
  train_btn.click(fn=train, inputs=is_visible+concept_collection+file_collection+[type_of_thing]+[steps]+[perc_txt_encoder]+[swap_auto_calculated], outputs=[result, try_your_model, push_to_hub, convert_button])
323
+ generate_button.click(fn=generate, inputs=prompt, outputs=result_image)
324
+ push_button.click(fn=push, inputs=[model_name, where_to_upload, hf_token], outputs=[success_message_upload, result])
325
  convert_button.click(fn=convert_to_ckpt, inputs=[], outputs=result)
326
  demo.launch()