Spaces:

devfire
/

ScanSmartAI

Sleeping

App Files Files Community

ScanSmartAI / data_check.py

devfire

Upload 6 files

f054618 verified 5 months ago

raw

history blame

2.33 kB

	import os
	from PIL import Image

	# Define the data directories
	base_dir = 'data/chest_xray'
	train_dir = os.path.join(base_dir, 'train')
	val_dir = os.path.join(base_dir, 'val')


	# Function to count images in a specific category (e.g., NORMAL, PNEUMONIA)
	def count_images(directory, category):
	category_dir = os.path.join(directory, category)
	count = 0
	for root, dirs, files in os.walk(category_dir):
	count += len([f for f in files if f.endswith(('.jpg', '.jpeg', '.png'))])
	return count


	# Function to check for corrupted images in a specific category
	def check_corrupted_images(directory, category):
	category_dir = os.path.join(directory, category)
	corrupted_files = []
	for root, dirs, files in os.walk(category_dir):
	for file in files:
	if file.endswith(('.jpg', '.jpeg', '.png')):
	try:
	img = Image.open(os.path.join(root, file))
	img.verify() # Check if the image can be opened and is not corrupted
	except (IOError, SyntaxError) as e:
	corrupted_files.append(os.path.join(root, file))
	return corrupted_files


	# Count images in the train and validation sets
	train_normal_count = count_images(train_dir, 'NORMAL')
	train_pneumonia_count = count_images(train_dir, 'PNEUMONIA')
	val_normal_count = count_images(val_dir, 'NORMAL')
	val_pneumonia_count = count_images(val_dir, 'PNEUMONIA')

	# Check for corrupted images in the train and validation sets
	train_normal_corrupted = check_corrupted_images(train_dir, 'NORMAL')
	train_pneumonia_corrupted = check_corrupted_images(train_dir, 'PNEUMONIA')
	val_normal_corrupted = check_corrupted_images(val_dir, 'NORMAL')
	val_pneumonia_corrupted = check_corrupted_images(val_dir, 'PNEUMONIA')

	# Print the results
	print(f"Training NORMAL images: {train_normal_count}")
	print(f"Training PNEUMONIA images: {train_pneumonia_count}")
	print(f"Validation NORMAL images: {val_normal_count}")
	print(f"Validation PNEUMONIA images: {val_pneumonia_count}")

	print(f"Corrupted images in training NORMAL: {train_normal_corrupted}")
	print(f"Corrupted images in training PNEUMONIA: {train_pneumonia_corrupted}")
	print(f"Corrupted images in validation NORMAL: {val_normal_corrupted}")
	print(f"Corrupted images in validation PNEUMONIA: {val_pneumonia_corrupted}")