Spaces:

dreamer-technoland
/

object-to-object-replace

Running

object-to-object-replace / only_gradio_server.py

nikunjkdtechnoland

init commit some more files

89c278d over 1 year ago

7.06 kB

	import os
	import base64
	import io
	import uuid
	from ultralytics import YOLO
	import cv2
	import torch
	import numpy as np
	from PIL import Image
	from torchvision import transforms
	import imageio.v2 as imageio
	from trainer import Trainer
	from utils.tools import get_config
	import torch.nn.functional as F
	from iopaint.single_processing import batch_inpaint_cv2
	from pathlib import Path

	# set current working directory cache instead of default
	os.environ["TORCH_HOME"] = "./pretrained-model"
	os.environ["HUGGINGFACE_HUB_CACHE"] = "./pretrained-model"

	def resize_image(input_image_path, width=640, height=640):
	"""Resizes an image from image data and returns the resized image."""
	try:
	# Read the image using cv2.imread
	img = cv2.imread(input_image_path, cv2.IMREAD_COLOR)

	# Resize while maintaining the aspect ratio
	shape = img.shape[:2] # current shape [height, width]
	new_shape = (width, height) # the shape to resize to

	# Scale ratio (new / old)
	r = min(new_shape[0] / shape[0], new_shape[1] / shape[1])
	ratio = r, r # width, height ratios
	new_unpad = int(round(shape[1] * r)), int(round(shape[0] * r))

	# Resize the image
	im = cv2.resize(img, new_unpad, interpolation=cv2.INTER_LINEAR)

	# Pad the image
	color = (114, 114, 114) # color used for padding
	dw, dh = new_shape[1] - new_unpad[0], new_shape[0] - new_unpad[1] # wh padding
	# divide padding into 2 sides
	dw /= 2
	dh /= 2
	# compute padding on all corners
	top, bottom = int(round(dh - 0.1)), int(round(dh + 0.1))
	left, right = int(round(dw - 0.1)), int(round(dw + 0.1))
	im = cv2.copyMakeBorder(im, top, bottom, left, right, cv2.BORDER_CONSTANT, value=color) # add border
	return im

	except Exception as e:
	print(f"Error resizing image: {e}")
	return None # Or handle differently as needed


	def load_weights(path, device):
	model_weights = torch.load(path)
	return {
	k: v.to(device)
	for k, v in model_weights.items()
	}


	# Function to convert image to base64
	def convert_image_to_base64(image):
	# Convert image to bytes
	_, buffer = cv2.imencode('.png', image)
	# Convert bytes to base64
	image_base64 = base64.b64encode(buffer).decode('utf-8')
	return image_base64


	def convert_to_base64(image):
	# Read the image file as binary data
	image_data = image.read()
	# Encode the binary data as base64
	base64_encoded = base64.b64encode(image_data).decode('utf-8')
	return base64_encoded

	def convert_to_base64_file(image):
	# Convert the image to binary data
	image_data = cv2.imencode('.png', image)[1].tobytes()
	# Encode the binary data as base64
	base64_encoded = base64.b64encode(image_data).decode('utf-8')
	return base64_encoded


	def process_images(input_image, append_image, default_class="chair"):
	# Static paths
	config_path = Path('configs/config.yaml')
	model_path = Path('pretrained-model/torch_model.p')

	# Resize input image and get base64 data of resized image
	img = resize_image(input_image)

	if img is None:
	return {'error': 'Failed to decode resized image'}, 419

	H, W, _ = img.shape
	x_point = 0
	y_point = 0
	width = 1
	height = 1

	# Load a model
	model = YOLO('pretrained-model/yolov8m-seg.pt') # pretrained YOLOv8m-seg model

	# Run batched inference on a list of images
	results = model(img, imgsz=(W,H), conf=0.5) # chair class 56 with confidence >= 0.5
	names = model.names

	class_found = False
	for result in results:
	for i, label in enumerate(result.boxes.cls):
	# Check if the label matches the chair label
	if names[int(label)] == default_class:
	class_found = True
	# Convert the tensor to a numpy array
	chair_mask_np = result.masks.data[i].numpy()

	kernel = np.ones((5, 5), np.uint8) # Create a 5x5 kernel for dilation
	chair_mask_np = cv2.dilate(chair_mask_np, kernel, iterations=2) # Apply dilation

	# Find contours to get bounding box
	contours, _ = cv2.findContours((chair_mask_np == 1).astype(np.uint8), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)

	# Iterate over contours to find the bounding box of each object
	for contour in contours:
	x, y, w, h = cv2.boundingRect(contour)
	x_point = x
	y_point = y
	width = w
	height = h

	# Get the corresponding mask
	mask = result.masks.data[i].numpy() * 255
	dilated_mask = cv2.dilate(mask, kernel, iterations=2) # Apply dilation
	# Resize the mask to match the dimensions of the original image
	resized_mask = cv2.resize(dilated_mask, (img.shape[1], img.shape[0]))

	# call repainting and merge function
	output_base64 = repaitingAndMerge(append_image,str(model_path), str(config_path),width, height, x_point, y_point, img, resized_mask)
	# Return the output base64 image in the API response
	return output_base64

	# return class not found in prediction
	if not class_found:
	return {'message': f'{default_class} object not found in the image'}, 200

	def repaitingAndMerge(append_image_path, model_path, config_path, width, height, xposition, yposition, input_base, mask_base):
	config = get_config(config_path)
	device = torch.device("cpu")
	trainer = Trainer(config)
	trainer.load_state_dict(load_weights(model_path, device), strict=False)
	trainer.eval()

	# lama inpainting start
	print("lama inpainting start")
	inpaint_result_np = batch_inpaint_cv2('lama', 'cpu', input_base, mask_base)
	print("lama inpainting end")

	# Create PIL Image from NumPy array
	final_image = Image.fromarray(inpaint_result_np)

	print("merge start")

	# Load the append image using cv2.imread
	append_image = cv2.imread(append_image_path, cv2.IMREAD_UNCHANGED)
	cv2.imwrite('appneded-image.png',append_image)
	# Resize the append image while preserving transparency
	resized_image = cv2.resize(append_image, (width, height), interpolation=cv2.INTER_AREA)
	# Convert the resized image to RGBA format (assuming it's in BGRA format)
	resized_image = cv2.cvtColor(resized_image, cv2.COLOR_BGRA2RGBA)
	# Create a PIL Image from the resized image with transparent background
	append_image_pil = Image.fromarray(resized_image)

	# Paste the append image onto the final image
	final_image.paste(append_image_pil, (xposition, yposition), append_image_pil)
	# Save the resulting image
	print("merge end")

	# Convert the final image to base64
	with io.BytesIO() as output_buffer:
	final_image.save(output_buffer, format='PNG')
	output_numpy = np.array(final_image)

	return output_numpy