tdurbor commited on
Commit
e587a3a
·
1 Parent(s): 23a56a2

Fix original image downsizing / Add image size check before upload

Browse files
image_processing_pipeline.py CHANGED
@@ -113,7 +113,7 @@ def main():
113
  downsize_processed_images(input_directory, output_directory, target_width)
114
 
115
  original_output_directory = os.path.join(args.output_dir, "web-original-images")
116
- downsize_processed_images(original_images_dir, original_output_directory, target_width)
117
 
118
  if args.dataset_name:
119
  upload_to_dataset(original_output_directory, args.output_dir, args.dataset_name, dry_run=not args.push_dataset)
 
113
  downsize_processed_images(input_directory, output_directory, target_width)
114
 
115
  original_output_directory = os.path.join(args.output_dir, "web-original-images")
116
+ downsize_processed_images(input_resized_dir, original_output_directory, target_width)
117
 
118
  if args.dataset_name:
119
  upload_to_dataset(original_output_directory, args.output_dir, args.dataset_name, dry_run=not args.push_dataset)
utils/resize_processed_images.py CHANGED
@@ -31,14 +31,13 @@ def resize_image(input_path, output_path, target_width):
31
  with Image.open(input_path) as img:
32
  # Correct orientation
33
  img = correct_orientation(img)
34
-
35
  # Calculate the new height to maintain the aspect ratio
36
  width_percent = target_width / img.width
37
  target_height = int(img.height * width_percent)
38
 
39
  # Resize the image
40
  img = img.resize((target_width, target_height), Image.LANCZOS)
41
-
42
  # Save the resized image in the same format as the input
43
  img.save(output_path, format=img.format)
44
 
 
31
  with Image.open(input_path) as img:
32
  # Correct orientation
33
  img = correct_orientation(img)
34
+
35
  # Calculate the new height to maintain the aspect ratio
36
  width_percent = target_width / img.width
37
  target_height = int(img.height * width_percent)
38
 
39
  # Resize the image
40
  img = img.resize((target_width, target_height), Image.LANCZOS)
 
41
  # Save the resized image in the same format as the input
42
  img.save(output_path, format=img.format)
43
 
utils/upload_to_dataset.py CHANGED
@@ -4,6 +4,8 @@ import os
4
  from collections import defaultdict
5
  import pandas as pd
6
  import argparse
 
 
7
 
8
  def upload_to_dataset(original_images_dir, processed_images_dir, dataset_name, dry_run=False):
9
  # Define the dataset features with dedicated columns for each model
@@ -53,8 +55,21 @@ def upload_to_dataset(original_images_dir, processed_images_dir, dataset_name, d
53
  "original_filename": []
54
  }
55
 
 
 
56
  for filename, entry in data.items():
57
  if "original_image" in entry:
 
 
 
 
 
 
 
 
 
 
 
58
  dataset_dict["original_image"].append(entry["original_image"])
59
  dataset_dict["clipdrop_image"].append(entry["clipdrop_image"])
60
  dataset_dict["bria_image"].append(entry["bria_image"])
@@ -62,6 +77,11 @@ def upload_to_dataset(original_images_dir, processed_images_dir, dataset_name, d
62
  dataset_dict["removebg_image"].append(entry["removebg_image"])
63
  dataset_dict["original_filename"].append(filename)
64
 
 
 
 
 
 
65
  # Save the data dictionary to a CSV file for inspection
66
  df = pd.DataFrame.from_dict(dataset_dict)
67
  df.to_csv("image_data.csv", index=False)
 
4
  from collections import defaultdict
5
  import pandas as pd
6
  import argparse
7
+ from PIL import Image as PILImage
8
+ import sys
9
 
10
  def upload_to_dataset(original_images_dir, processed_images_dir, dataset_name, dry_run=False):
11
  # Define the dataset features with dedicated columns for each model
 
55
  "original_filename": []
56
  }
57
 
58
+ errors = []
59
+
60
  for filename, entry in data.items():
61
  if "original_image" in entry:
62
+ # Check if all images have the same size
63
+ try:
64
+ original_size = PILImage.open(entry["original_image"]).size
65
+ for source in ["clipdrop_image", "bria_image", "photoroom_image", "removebg_image"]:
66
+ if entry[source] is not None:
67
+ processed_size = PILImage.open(entry[source]).size
68
+ if processed_size != original_size:
69
+ errors.append(f"Size mismatch for {filename}: {source} image size {processed_size} does not match original size {original_size}.")
70
+ except Exception as e:
71
+ errors.append(f"Error processing {filename}: {e}")
72
+
73
  dataset_dict["original_image"].append(entry["original_image"])
74
  dataset_dict["clipdrop_image"].append(entry["clipdrop_image"])
75
  dataset_dict["bria_image"].append(entry["bria_image"])
 
77
  dataset_dict["removebg_image"].append(entry["removebg_image"])
78
  dataset_dict["original_filename"].append(filename)
79
 
80
+ if errors:
81
+ for error in errors:
82
+ print(error)
83
+ sys.exit(1)
84
+
85
  # Save the data dictionary to a CSV file for inspection
86
  df = pd.DataFrame.from_dict(dataset_dict)
87
  df.to_csv("image_data.csv", index=False)