multimodalart HF staff commited on
Commit
a9aaca0
·
1 Parent(s): 8f652eb

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +59 -42
app.py CHANGED
@@ -31,11 +31,46 @@ processor = Blip2Processor.from_pretrained("Salesforce/blip2-opt-2.7b")
31
  model = Blip2ForConditionalGeneration.from_pretrained(
32
  "Salesforce/blip2-opt-2.7b", device_map={"": 0}, torch_dtype=torch.float16
33
  )
34
- #Run first captioning as apparently makes the other ones faster
35
- #pil_image = Image.new('RGB', (512, 512), 'black')
36
- #blip_inputs = processor(images=pil_image, return_tensors="pt").to(device, torch.float16)
37
- #generated_ids = model.generate(**blip_inputs)
38
- #generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0].strip()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
39
 
40
  def load_captioning(uploaded_images, option):
41
  updates = []
@@ -77,46 +112,28 @@ def make_options_visible(option):
77
  gr.update(value=sentence, visible=True),
78
  gr.update(visible=True),
79
  )
 
80
  def change_defaults(option, images):
 
81
  num_images = len(images)
82
- max_train_steps = num_images * 150
83
- max_train_steps = 500 if max_train_steps < 500 else max_train_steps
 
 
 
 
 
 
 
84
  random_files = []
85
- with_prior_preservation = False
86
- class_prompt = ""
87
- if(num_images > 24):
88
- repeats = 1
89
- elif(num_images > 10):
90
- repeats = 2
91
- else:
92
- repeats = 3
93
- if(max_train_steps > 2400):
94
- max_train_steps = 2400
95
-
96
- if(option == "face"):
97
- rank = 64
98
- max_train_steps = num_images*100
99
- lr_scheduler = "constant"
100
- #Takes 150 random faces for the prior preservation loss
101
- directory = FACES_DATASET_PATH
102
- file_count = 150
103
- files = [os.path.join(directory, file) for file in os.listdir(directory) if os.path.isfile(os.path.join(directory, file))]
104
  random_files = random.sample(files, min(len(files), file_count))
105
- with_prior_preservation = True
106
- class_prompt = "a photo of a person"
107
- elif(option == "style"):
108
- rank = 16
109
- lr_scheduler = "polynomial"
110
- elif(option == "object"):
111
- rank = 8
112
- repeats = 1
113
- lr_scheduler = "constant"
114
- else:
115
- rank = 32
116
- lr_scheduler = "constant"
117
-
118
- return max_train_steps, repeats, lr_scheduler, rank, with_prior_preservation, class_prompt, random_files
119
 
 
 
120
  def create_dataset(*inputs):
121
  print("Creating dataset")
122
  images = inputs[0]
@@ -300,7 +317,7 @@ def calculate_price(iterations):
300
  cost = round(cost_per_second * total_seconds, 2)
301
  return f'''To train this LoRA, we will duplicate the space and hook an A10G GPU under the hood.
302
  ## Estimated to cost <b>< US$ {str(cost)}</b> with your current train settings <small>({int(iterations)} iterations at 3.50s/it in Spaces A10G at US$1.05/h)</small>
303
- #### Grab a <b>write</b> token [here](https://huggingface.co/settings/tokens), enter it below ↓'''
304
 
305
  def start_training_og(
306
  lora_name,
@@ -482,7 +499,7 @@ css = '''.gr-group{background-color: transparent}
482
 
483
  '''
484
  theme = gr.themes.Monochrome(
485
- text_size="lg",
486
  font=[gr.themes.GoogleFont('Source Sans Pro'), 'ui-sans-serif', 'system-ui', 'sans-serif'],
487
  )
488
  with gr.Blocks(css=css, theme=theme) as demo:
 
31
  model = Blip2ForConditionalGeneration.from_pretrained(
32
  "Salesforce/blip2-opt-2.7b", device_map={"": 0}, torch_dtype=torch.float16
33
  )
34
+
35
+ training_option_settings = {
36
+ "face": {
37
+ "rank": 64,
38
+ "lr_scheduler": "constant",
39
+ "with_prior_preservation": True,
40
+ "class_prompt": "a photo of a person",
41
+ "train_steps_multiplier": 100,
42
+ "file_count": 150,
43
+ "dataset_path": FACES_DATASET_PATH
44
+ },
45
+ "style": {
46
+ "rank": 16,
47
+ "lr_scheduler": "polynomial",
48
+ "with_prior_preservation": False,
49
+ "class_prompt": "",
50
+ "train_steps_multiplier": 150
51
+ },
52
+ "object": {
53
+ "rank": 8,
54
+ "lr_scheduler": "constant",
55
+ "with_prior_preservation": False,
56
+ "class_prompt": "",
57
+ "train_steps_multiplier": 150
58
+ },
59
+ "custom": {
60
+ "rank": 32,
61
+ "lr_scheduler": "constant",
62
+ "with_prior_preservation": False,
63
+ "class_prompt": "",
64
+ "train_steps_multiplier": 150
65
+ }
66
+ }
67
+
68
+ num_images_settings = {
69
+ #>24 images, 1 repeat; 10<x<24 images 2 repeats; <10 images 3 repeats
70
+ "repeats": [(24, 1), (10, 2), (0, 3)],
71
+ "train_steps_min": 500,
72
+ "train_steps_max": 2400
73
+ }
74
 
75
  def load_captioning(uploaded_images, option):
76
  updates = []
 
112
  gr.update(value=sentence, visible=True),
113
  gr.update(visible=True),
114
  )
115
+
116
  def change_defaults(option, images):
117
+ settings = training_option_settings.get(option, training_option_settings["custom"])
118
  num_images = len(images)
119
+
120
+ # Calculate max_train_steps
121
+ train_steps_multiplier = settings["train_steps_multiplier"]
122
+ max_train_steps = max(num_images * train_steps_multiplier, num_images_settings["train_steps_min"])
123
+ max_train_steps = min(max_train_steps, num_images_settings["train_steps_max"])
124
+
125
+ # Determine repeats based on number of images
126
+ repeats = next(repeats for num, repeats in num_images_settings["repeats"] if num_images > num)
127
+
128
  random_files = []
129
+ if settings["with_prior_preservation"]:
130
+ directory = settings["dataset_path"]
131
+ file_count = settings["file_count"]
132
+ files = [os.path.join(directory, file) for file in os.listdir(directory) if os.path.isfile(os.path.join(directory, file))]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
133
  random_files = random.sample(files, min(len(files), file_count))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
134
 
135
+ return max_train_steps, repeats, settings["lr_scheduler"], settings["rank"], settings["with_prior_preservation"], settings["class_prompt"], random_files
136
+
137
  def create_dataset(*inputs):
138
  print("Creating dataset")
139
  images = inputs[0]
 
317
  cost = round(cost_per_second * total_seconds, 2)
318
  return f'''To train this LoRA, we will duplicate the space and hook an A10G GPU under the hood.
319
  ## Estimated to cost <b>< US$ {str(cost)}</b> with your current train settings <small>({int(iterations)} iterations at 3.50s/it in Spaces A10G at US$1.05/h)</small>
320
+ #### To continue, grab you <b>write</b> token [here](https://huggingface.co/settings/tokens) and enter it below ↓'''
321
 
322
  def start_training_og(
323
  lora_name,
 
499
 
500
  '''
501
  theme = gr.themes.Monochrome(
502
+ text_size=gr.themes.Size(lg="18px", md="15px", sm="13px", xl="22px", xs="12px", xxl="24px", xxs="9px"),
503
  font=[gr.themes.GoogleFont('Source Sans Pro'), 'ui-sans-serif', 'system-ui', 'sans-serif'],
504
  )
505
  with gr.Blocks(css=css, theme=theme) as demo: