tdurbor commited on
Commit
fe62bf5
·
1 Parent(s): 892f774

Consolidate image preparation pipeline

Browse files
image_processing_pipeline.py ADDED
@@ -0,0 +1,118 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import argparse
3
+ import shutil
4
+ import sys
5
+ from dotenv import load_dotenv, find_dotenv
6
+
7
+ # Importing modules from the utils package
8
+ from utils.resize_images import main as resize_images_main
9
+ from utils.removebg import iterate_over_directory as removebg_iterate
10
+ from utils.photoroom import iterate_over_directory as photoroom_iterate
11
+ from utils.bria_rmbg20 import iterate_over_directory as bria_iterate
12
+ from utils.add_green_background import process_directory as add_green_background_process
13
+ from utils.upload_to_dataset import upload_to_dataset
14
+ from utils.resize_processed_images import process_images
15
+
16
+ def check_env_variables():
17
+ """Check if the necessary environment variables are loaded."""
18
+ if not find_dotenv():
19
+ sys.exit("Error: .env file not found.")
20
+
21
+ load_dotenv()
22
+
23
+ required_keys = ['REMOVEBG_API_KEY', 'PHOTOROOM_API_KEY', 'BRIA_API_TOKEN']
24
+ missing_keys = [key for key in required_keys if not os.getenv(key)]
25
+
26
+ if missing_keys:
27
+ sys.exit(f"Error: Missing environment variables: {', '.join(missing_keys)}")
28
+
29
+ def copy_images(source_dir, dest_dir):
30
+ os.makedirs(dest_dir, exist_ok=True)
31
+ valid_extensions = ('.png', '.jpg', '.jpeg')
32
+
33
+ # Walk through the source directory
34
+ for root, _, files in os.walk(source_dir):
35
+ for filename in files:
36
+ if filename.lower().endswith(valid_extensions):
37
+ source_file = os.path.join(root, filename)
38
+
39
+ # Extract the folder name
40
+ folder_name = os.path.basename(root)
41
+ # Append folder name to the filename
42
+ new_filename = f"{folder_name}_{filename}"
43
+ dest_file = os.path.join(dest_dir, new_filename)
44
+
45
+ # Check if the file is an image and doesn't already exist in the destination
46
+ if os.path.isfile(source_file) and not os.path.exists(dest_file):
47
+ shutil.copy2(source_file, dest_file)
48
+ print(f"Copied: {new_filename}")
49
+ else:
50
+ print(f"Skipped: {filename} (already exists or not a file)")
51
+
52
+ def main():
53
+ check_env_variables()
54
+
55
+ parser = argparse.ArgumentParser(description="Image Processing Pipeline")
56
+ parser.add_argument("--input-dir", type=str, default="original-images", help="Input directory for images")
57
+ parser.add_argument("--work-dir", type=str, default="workdir", help="Working directory for intermediate images")
58
+ parser.add_argument("--output-dir", type=str, default="final-images", help="Output directory for final images")
59
+ parser.add_argument("--dataset-name", type=str, help="Name of the dataset to upload to Hugging Face Hub")
60
+ parser.add_argument("--push-dataset", action="store_true", help="Push the dataset to the Hugging Face Hub")
61
+
62
+ args = parser.parse_args()
63
+
64
+ # Define intermediate directories within the work directory
65
+ input_resized_dir = os.path.join(args.work_dir, "resized")
66
+ bg_removed_dir = os.path.join(args.work_dir, "background-removed")
67
+ green_bg_dir = os.path.join(args.work_dir, "green-background")
68
+
69
+ # Ensure all directories exist
70
+ for directory in [input_resized_dir, bg_removed_dir, green_bg_dir]:
71
+ os.makedirs(directory, exist_ok=True)
72
+
73
+ # Step 4: Move images to final output directory
74
+ print("Moving images to final output directory...")
75
+ original_images_dir = os.path.join(args.work_dir, "merged-categories")
76
+ copy_images(args.input_dir, original_images_dir)
77
+
78
+ # Step 1: Resize images
79
+ print("Resizing images...")
80
+ resize_images_main(input_directory=original_images_dir, output_directory=input_resized_dir)
81
+
82
+ # Step 2: Remove background
83
+ print("Removing backgrounds...")
84
+ bg_removal_dirs = {
85
+ "removebg": os.path.join(bg_removed_dir, "removebg"),
86
+ "photoroom": os.path.join(bg_removed_dir, "photoroom"),
87
+ "bria": os.path.join(bg_removed_dir, "bria")
88
+ }
89
+
90
+ for dir_path in bg_removal_dirs.values():
91
+ os.makedirs(dir_path, exist_ok=True)
92
+
93
+ removebg_iterate(input_resized_dir, bg_removal_dirs["removebg"])
94
+ photoroom_iterate(input_resized_dir, bg_removal_dirs["photoroom"])
95
+ bria_iterate(input_resized_dir, bg_removal_dirs["bria"])
96
+
97
+ print("Adding green background...")
98
+ add_green_background_process(bg_removed_dir, green_bg_dir)
99
+
100
+ print("Resizing processed images...")
101
+ target_width = 800
102
+ subdirectories = ["bria", "photoroom", "clipdrop", "removebg"]
103
+ os.makedirs(args.output_dir, exist_ok=True)
104
+ for subdir in subdirectories:
105
+ input_directory = os.path.join(green_bg_dir, subdir)
106
+ output_directory = os.path.join(args.output_dir, subdir)
107
+ process_images(input_directory, output_directory, target_width)
108
+
109
+ original_output_directory = os.path.join(args.output_dir, "web-original-images")
110
+ process_images(original_images_dir, original_output_directory, target_width)
111
+
112
+ if args.dataset_name:
113
+ upload_to_dataset(original_output_directory, args.output_dir, args.dataset_name, dry_run=not args.push_dataset)
114
+ else:
115
+ print("Please provide a dataset name using --dataset-name")
116
+
117
+ if __name__ == "__main__":
118
+ main()
utils/add_green_background.py CHANGED
@@ -1,5 +1,6 @@
1
  import os
2
  from PIL import Image
 
3
 
4
  def add_green_background_to_image(image_path, output_path, background_color=(0, 255, 0)):
5
  """Add a green background to an image and save it as PNG."""
@@ -9,28 +10,37 @@ def add_green_background_to_image(image_path, output_path, background_color=(0,
9
  combined = Image.alpha_composite(background, img)
10
  combined.save(output_path, "PNG")
11
 
 
 
 
 
 
 
 
 
12
  def process_directory(input_dir, output_dir, background_color=(0, 255, 0)):
13
  """Recursively process a directory to add a green background to all images and convert them to PNG."""
14
  if not os.path.exists(output_dir):
15
  os.makedirs(output_dir)
16
 
17
- for root, _, files in os.walk(input_dir):
18
- for file in files:
19
- if file.lower().endswith(('.png', '.jpg', '.jpeg')):
20
- input_path = os.path.join(root, file)
21
- relative_path = os.path.relpath(input_path, input_dir)
22
- output_path = os.path.join(output_dir, os.path.splitext(relative_path)[0] + '.png')
23
-
24
- # Ensure the output directory exists
25
- os.makedirs(os.path.dirname(output_path), exist_ok=True)
26
-
27
- # Check if the output file already exists
28
- if not os.path.exists(output_path):
29
- # Add green background to the image and convert to PNG
30
- add_green_background_to_image(input_path, output_path, background_color)
31
- print(f"Processed: {input_path} -> {output_path}")
32
- else:
33
- print(f"Skipped: {output_path} already exists")
 
34
 
35
  # Example usage
36
  input_directory = "../../background-removal-arena-v0/train/data/resized"
 
1
  import os
2
  from PIL import Image
3
+ from concurrent.futures import ThreadPoolExecutor
4
 
5
  def add_green_background_to_image(image_path, output_path, background_color=(0, 255, 0)):
6
  """Add a green background to an image and save it as PNG."""
 
10
  combined = Image.alpha_composite(background, img)
11
  combined.save(output_path, "PNG")
12
 
13
+ def process_image_file(input_path, output_path, background_color):
14
+ """Process a single image file to add a green background."""
15
+ if not os.path.exists(output_path):
16
+ add_green_background_to_image(input_path, output_path, background_color)
17
+ print(f"Processed: {input_path} -> {output_path}")
18
+ else:
19
+ print(f"Skipped: {output_path} already exists")
20
+
21
  def process_directory(input_dir, output_dir, background_color=(0, 255, 0)):
22
  """Recursively process a directory to add a green background to all images and convert them to PNG."""
23
  if not os.path.exists(output_dir):
24
  os.makedirs(output_dir)
25
 
26
+ tasks = []
27
+ with ThreadPoolExecutor() as executor:
28
+ for root, _, files in os.walk(input_dir):
29
+ for file in files:
30
+ if file.lower().endswith(('.png', '.jpg', '.jpeg')):
31
+ input_path = os.path.join(root, file)
32
+ relative_path = os.path.relpath(input_path, input_dir)
33
+ output_path = os.path.join(output_dir, os.path.splitext(relative_path)[0] + '.png')
34
+
35
+ # Ensure the output directory exists
36
+ os.makedirs(os.path.dirname(output_path), exist_ok=True)
37
+
38
+ # Submit the task to the executor
39
+ tasks.append(executor.submit(process_image_file, input_path, output_path, background_color))
40
+
41
+ # Wait for all tasks to complete
42
+ for task in tasks:
43
+ task.result()
44
 
45
  # Example usage
46
  input_directory = "../../background-removal-arena-v0/train/data/resized"
utils/bria_rmbg20.py CHANGED
@@ -51,7 +51,7 @@ def iterate_over_directory(directory_path, result_directory):
51
  file_path = os.path.join(root, file)
52
 
53
  result_file_name = os.path.splitext(os.path.basename(file_path))[0] + '.png'
54
- result_file_directory = os.path.join(result_directory, os.path.basename(root))
55
 
56
  if not os.path.exists(result_file_directory):
57
  os.makedirs(result_file_directory)
 
51
  file_path = os.path.join(root, file)
52
 
53
  result_file_name = os.path.splitext(os.path.basename(file_path))[0] + '.png'
54
+ result_file_directory = os.path.join(result_directory)
55
 
56
  if not os.path.exists(result_file_directory):
57
  os.makedirs(result_file_directory)
utils/photoroom.py CHANGED
@@ -41,8 +41,8 @@ def iterate_over_directory(directory_path, result_directory):
41
  file_path = os.path.join(root, file)
42
 
43
  result_file_name = os.path.splitext(os.path.basename(file_path))[0] + '.png'
44
- result_file_directory = os.path.join(result_directory, os.path.basename(root))
45
-
46
  if not os.path.exists(result_file_directory):
47
  os.makedirs(result_file_directory)
48
 
 
41
  file_path = os.path.join(root, file)
42
 
43
  result_file_name = os.path.splitext(os.path.basename(file_path))[0] + '.png'
44
+ result_file_directory = os.path.join(result_directory)
45
+
46
  if not os.path.exists(result_file_directory):
47
  os.makedirs(result_file_directory)
48
 
utils/remove_backgrounds.py DELETED
@@ -1,66 +0,0 @@
1
- import os
2
- from photoroom import process_image as photoroom_process
3
- from removebg import process_image as removebg_process
4
- #from clipdrop import process_image as clipdrop_process
5
- from bria_rmbg20 import process_image as bria_process
6
-
7
- def create_directory(path):
8
- if not os.path.exists(path):
9
- os.makedirs(path)
10
-
11
- def process_images(input_directory, output_directory, process_function, limit=None):
12
- count = 0
13
- for root, _, files in os.walk(input_directory):
14
- for file in files:
15
- if file.lower().endswith(('.png', '.jpg', '.jpeg', '.webp', '.heic')):
16
- file_path = os.path.join(root, file)
17
- result_file_name = os.path.splitext(os.path.basename(file_path))[0] + '.png'
18
- result_file_directory = os.path.join(output_directory)
19
-
20
- if not os.path.exists(result_file_directory):
21
- os.makedirs(result_file_directory)
22
-
23
- result_path = os.path.join(result_file_directory, result_file_name)
24
-
25
- if not os.path.exists(result_path): # Check if the image has already been processed
26
- print(file_path, result_path)
27
- process_function(file_path, result_path)
28
- count += 1
29
- if limit and count >= limit:
30
- return
31
-
32
- def main(dry_run=False):
33
- input_directory = "../data/resized-original-images"
34
- output_base_directory = "../data/processed"
35
-
36
- # Define output directories for each API
37
- output_directories = {
38
- "photoroom": os.path.join(output_base_directory, "photoroom"),
39
- "removebg": os.path.join(output_base_directory, "removebg"),
40
- #"clipdrop": os.path.join(output_base_directory, "clipdrop"),
41
- "bria": os.path.join(output_base_directory, "bria")
42
- }
43
-
44
- # Create output directories if they don't exist
45
- for directory in output_directories.values():
46
- create_directory(directory)
47
-
48
- if dry_run:
49
- print("Starting dry run...")
50
- k = 5
51
- process_images(input_directory, output_directories["photoroom"], photoroom_process, limit=k)
52
- process_images(input_directory, output_directories["removebg"], removebg_process, limit=k)
53
- #process_images(input_directory, output_directories["clipdrop"], clipdrop_process, limit=k)
54
- process_images(input_directory, output_directories["bria"], bria_process, limit=k)
55
- print("Dry run completed.")
56
- else:
57
- print("Starting full processing...")
58
- process_images(input_directory, output_directories["photoroom"], photoroom_process)
59
- process_images(input_directory, output_directories["removebg"], removebg_process)
60
- #process_images(input_directory, output_directories["clipdrop"], clipdrop_process)
61
- process_images(input_directory, output_directories["bria"], bria_process)
62
- print("Full processing completed.")
63
-
64
- if __name__ == "__main__":
65
- # Set dry_run to True for a dry run, or False for full processing
66
- main(dry_run=False)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
utils/removebg.py CHANGED
@@ -41,7 +41,7 @@ def iterate_over_directory(directory_path, result_directory):
41
  file_path = os.path.join(root, file)
42
 
43
  result_file_name = os.path.splitext(os.path.basename(file_path))[0] + '.png'
44
- result_file_directory = os.path.join(result_directory, os.path.basename(root))
45
 
46
  if not os.path.exists(result_file_directory):
47
  os.makedirs(result_file_directory)
 
41
  file_path = os.path.join(root, file)
42
 
43
  result_file_name = os.path.splitext(os.path.basename(file_path))[0] + '.png'
44
+ result_file_directory = os.path.join(result_directory)
45
 
46
  if not os.path.exists(result_file_directory):
47
  os.makedirs(result_file_directory)
utils/resize_images.py CHANGED
@@ -1,5 +1,5 @@
1
  import os
2
- from PIL import Image
3
  import concurrent.futures
4
 
5
  # Define the directories
@@ -11,6 +11,24 @@ os.makedirs(output_directory, exist_ok=True)
11
 
12
  def resize_image(input_path, output_path):
13
  with Image.open(input_path) as img:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
14
  # Calculate the current megapixels
15
  current_megapixels = (img.width * img.height) / 1_000_000
16
  max_megapixels = 10
@@ -27,7 +45,7 @@ def resize_image(input_path, output_path):
27
  # If the image is smaller than 10 megapixels, save it as is
28
  img.save(output_path)
29
 
30
- def main():
31
  # Iterate over the input directory
32
  with concurrent.futures.ThreadPoolExecutor(max_workers=16) as executor:
33
  for filename in os.listdir(input_directory):
@@ -44,4 +62,4 @@ def main():
44
  print("All images have been resized and saved to the output directory.")
45
 
46
  if __name__ == "__main__":
47
- main()
 
1
  import os
2
+ from PIL import Image, ExifTags
3
  import concurrent.futures
4
 
5
  # Define the directories
 
11
 
12
  def resize_image(input_path, output_path):
13
  with Image.open(input_path) as img:
14
+ # Correct image orientation using EXIF data
15
+ try:
16
+ for orientation in ExifTags.TAGS.keys():
17
+ if ExifTags.TAGS[orientation] == 'Orientation':
18
+ break
19
+ exif = img._getexif()
20
+ if exif is not None:
21
+ orientation = exif.get(orientation, None)
22
+ if orientation == 3:
23
+ img = img.rotate(180, expand=True)
24
+ elif orientation == 6:
25
+ img = img.rotate(270, expand=True)
26
+ elif orientation == 8:
27
+ img = img.rotate(90, expand=True)
28
+ except (AttributeError, KeyError, IndexError):
29
+ # Cases: image don't have getexif
30
+ pass
31
+
32
  # Calculate the current megapixels
33
  current_megapixels = (img.width * img.height) / 1_000_000
34
  max_megapixels = 10
 
45
  # If the image is smaller than 10 megapixels, save it as is
46
  img.save(output_path)
47
 
48
+ def main(input_directory, output_directory):
49
  # Iterate over the input directory
50
  with concurrent.futures.ThreadPoolExecutor(max_workers=16) as executor:
51
  for filename in os.listdir(input_directory):
 
62
  print("All images have been resized and saved to the output directory.")
63
 
64
  if __name__ == "__main__":
65
+ main(input_directory, output_directory)
utils/resize_processed_images.py CHANGED
@@ -1,13 +1,37 @@
1
- from PIL import Image
2
  import os
 
3
 
4
  def create_directory(path):
5
  """Create a directory if it doesn't exist."""
6
  os.makedirs(path, exist_ok=True)
7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8
  def resize_image(input_path, output_path, target_width):
9
  """Resize an image to the target width while maintaining aspect ratio."""
10
  with Image.open(input_path) as img:
 
 
 
11
  # Calculate the new height to maintain the aspect ratio
12
  width_percent = target_width / img.width
13
  target_height = int(img.height * width_percent)
@@ -18,23 +42,26 @@ def resize_image(input_path, output_path, target_width):
18
  # Save the resized image in the same format as the input
19
  img.save(output_path, format=img.format)
20
 
 
 
 
 
 
 
 
 
21
  def process_images(input_directory, output_directory, target_width):
22
  """Process and resize images from the input directory to the output directory."""
23
  create_directory(output_directory)
24
 
25
- for root, _, files in os.walk(input_directory):
26
- for file in files:
27
- if file.lower().endswith(('.png', '.jpg', '.jpeg', '.webp', '.heic')):
28
- file_path = os.path.join(root, file)
29
- result_file_name = os.path.splitext(file)[0] + os.path.splitext(file)[1]
30
- result_path = os.path.join(output_directory, result_file_name)
31
-
32
- # Check if the output file already exists
33
- if not os.path.exists(result_path):
34
- print(f"Resizing {file_path} to {result_path}")
35
- resize_image(file_path, result_path, target_width)
36
- else:
37
- print(f"Skipped {file_path}, already resized.")
38
 
39
  def main():
40
  """Main function to resize images in specified subdirectories."""
 
1
+ from PIL import Image, ExifTags
2
  import os
3
+ from concurrent.futures import ThreadPoolExecutor
4
 
5
  def create_directory(path):
6
  """Create a directory if it doesn't exist."""
7
  os.makedirs(path, exist_ok=True)
8
 
9
+ def correct_orientation(img):
10
+ """Correct image orientation using EXIF data."""
11
+ try:
12
+ for orientation in ExifTags.TAGS.keys():
13
+ if ExifTags.TAGS[orientation] == 'Orientation':
14
+ break
15
+ exif = img._getexif()
16
+ if exif is not None:
17
+ orientation = exif.get(orientation, None)
18
+ if orientation == 3:
19
+ img = img.rotate(180, expand=True)
20
+ elif orientation == 6:
21
+ img = img.rotate(270, expand=True)
22
+ elif orientation == 8:
23
+ img = img.rotate(90, expand=True)
24
+ except (AttributeError, KeyError, IndexError):
25
+ # Cases: image doesn't have getexif
26
+ pass
27
+ return img
28
+
29
  def resize_image(input_path, output_path, target_width):
30
  """Resize an image to the target width while maintaining aspect ratio."""
31
  with Image.open(input_path) as img:
32
+ # Correct orientation
33
+ img = correct_orientation(img)
34
+
35
  # Calculate the new height to maintain the aspect ratio
36
  width_percent = target_width / img.width
37
  target_height = int(img.height * width_percent)
 
42
  # Save the resized image in the same format as the input
43
  img.save(output_path, format=img.format)
44
 
45
+ def process_image_file(file_path, result_path, target_width):
46
+ """Process a single image file."""
47
+ if not os.path.exists(result_path):
48
+ print(f"Resizing {file_path} to {result_path}")
49
+ resize_image(file_path, result_path, target_width)
50
+ else:
51
+ print(f"Skipped {file_path}, already resized.")
52
+
53
  def process_images(input_directory, output_directory, target_width):
54
  """Process and resize images from the input directory to the output directory."""
55
  create_directory(output_directory)
56
 
57
+ with ThreadPoolExecutor() as executor:
58
+ for root, _, files in os.walk(input_directory):
59
+ for file in files:
60
+ if file.lower().endswith(('.png', '.jpg', '.jpeg')):
61
+ file_path = os.path.join(root, file)
62
+ result_file_name = os.path.splitext(file)[0] + os.path.splitext(file)[1]
63
+ result_path = os.path.join(output_directory, result_file_name)
64
+ executor.submit(process_image_file, file_path, result_path, target_width)
 
 
 
 
 
65
 
66
  def main():
67
  """Main function to resize images in specified subdirectories."""
utils/upload-to-dataset.py DELETED
@@ -1,84 +0,0 @@
1
- from datasets import Dataset, Features, Value, Image
2
- from huggingface_hub import HfApi
3
- import os
4
- from collections import defaultdict
5
- import pandas as pd
6
- import argparse
7
-
8
- def upload_to_dataset(image_dir, dataset_name):
9
- # Define the dataset features with dedicated columns for each model
10
- features = Features({
11
- "original_image": Image(), # Original image feature
12
- "clipdrop_image": Image(), # Clipdrop segmented image
13
- "bria_image": Image(), # Bria segmented image
14
- "photoroom_image": Image(), # Photoroom segmented image
15
- "removebg_image": Image(), # RemoveBG segmented image
16
- "original_filename": Value("string") # Original filename
17
- })
18
-
19
- # Load image paths and metadata
20
- data = defaultdict(lambda: {
21
- "clipdrop_image": None,
22
- "bria_image": None,
23
- "photoroom_image": None,
24
- "removebg_image": None
25
- })
26
-
27
- # Walk into the web-original-images folder
28
- web_original_images_dir = os.path.join(image_dir, "web-original-images")
29
- for root, _, files in os.walk(web_original_images_dir):
30
- for f in files:
31
- if f.endswith(('.png', '.jpg', '.jpeg')):
32
- original_image_path = os.path.join(root, f)
33
- data[f]["original_image"] = original_image_path
34
- data[f]["original_filename"] = f
35
-
36
- # Check for corresponding images in other directories
37
- for source in ["clipdrop", "bria", "photoroom", "removebg"]:
38
- # Check for processed images ending in .png or .jpg
39
- for ext in ['.png', '.jpg']:
40
- processed_image_filename = os.path.splitext(f)[0] + ext
41
- source_image_path = os.path.join(image_dir, source, processed_image_filename)
42
-
43
- if os.path.exists(source_image_path):
44
- data[f][f"{source}_image"] = source_image_path
45
- break # Stop checking other extensions if a file is found
46
-
47
- # Convert the data to a dictionary of lists
48
- dataset_dict = {
49
- "original_image": [],
50
- "clipdrop_image": [],
51
- "bria_image": [],
52
- "photoroom_image": [],
53
- "removebg_image": [],
54
- "original_filename": []
55
- }
56
-
57
- for filename, entry in data.items():
58
- if "original_image" in entry:
59
- dataset_dict["original_image"].append(entry["original_image"])
60
- dataset_dict["clipdrop_image"].append(entry["clipdrop_image"])
61
- dataset_dict["bria_image"].append(entry["bria_image"])
62
- dataset_dict["photoroom_image"].append(entry["photoroom_image"])
63
- dataset_dict["removebg_image"].append(entry["removebg_image"])
64
- dataset_dict["original_filename"].append(filename)
65
-
66
- # Save the data dictionary to a CSV file for inspection
67
- df = pd.DataFrame.from_dict(dataset_dict)
68
- df.to_csv("image_data.csv", index=False)
69
-
70
- # Create a Dataset
71
- dataset = Dataset.from_dict(dataset_dict, features=features)
72
-
73
- # Push the dataset to Hugging Face Hub
74
- api = HfApi()
75
- dataset.push_to_hub(dataset_name, token=api.token)
76
-
77
- if __name__ == "__main__":
78
- parser = argparse.ArgumentParser(description="Upload images to a Hugging Face dataset.")
79
- parser.add_argument("image_dir", type=str, help="Directory containing the images.")
80
- parser.add_argument("dataset_name", type=str, help="Name of the dataset to upload to Hugging Face Hub.")
81
-
82
- args = parser.parse_args()
83
-
84
- upload_to_dataset(args.image_dir, args.dataset_name)