xguman commited on
Commit
1454eef
·
1 Parent(s): 2537caf

randomly choose 500 pictures from food dataset

Browse files
Files changed (1) hide show
  1. download_dataset.py +9 -9
download_dataset.py CHANGED
@@ -1,25 +1,25 @@
1
  import os
2
  from datasets import load_dataset
3
  from PIL import Image
 
4
 
5
  def download_images(name="food101"):
6
- # Load the "food101" dataset
7
- dataset = load_dataset(name, split="train[:1%]") # Get a small percentage of the data
8
 
9
  # Create a directory to save the images if it doesn't exist
10
  output_dir = "data/pictures"
11
  os.makedirs(output_dir, exist_ok=True)
12
 
13
- # Limit to 200 images
14
- num_images = 200
15
  count = 0
16
 
17
- # Iterate over the dataset and save the images
18
- for example in dataset:
19
- if count >= num_images:
20
- break
21
  image = example['image']
22
  image.save(os.path.join(output_dir, f"image_{count}.jpg")) # Save as JPG
23
  count += 1
24
 
25
- print(f"Downloaded and saved {count} images to the folder '{output_dir}'")
 
1
  import os
2
  from datasets import load_dataset
3
  from PIL import Image
4
+ import random
5
 
6
  def download_images(name="food101"):
7
+ dataset = load_dataset(name, split="train")
 
8
 
9
  # Create a directory to save the images if it doesn't exist
10
  output_dir = "data/pictures"
11
  os.makedirs(output_dir, exist_ok=True)
12
 
13
+ # Limit to 500 images
14
+ num_images = 500
15
  count = 0
16
 
17
+ # Randomly select and save the images
18
+ random_indices = random.sample(range(len(dataset)), num_images)
19
+ for index in random_indices:
20
+ example = dataset[index]
21
  image = example['image']
22
  image.save(os.path.join(output_dir, f"image_{count}.jpg")) # Save as JPG
23
  count += 1
24
 
25
+ print(f"Downloaded and saved {num_images} images to the folder '{output_dir}'")