Spaces:
Running
Running
import os | |
import base64 | |
import numpy as np | |
from PIL import Image | |
import io | |
import requests | |
import replicate | |
from flask import Flask, request | |
import gradio as gr | |
import openai | |
from openai import OpenAI | |
from dotenv import load_dotenv, find_dotenv | |
import json | |
# Locate the .env file | |
dotenv_path = find_dotenv() | |
load_dotenv(dotenv_path) | |
OPENAI_API_KEY = os.getenv('OPENAI_API_KEY') | |
REPLICATE_API_TOKEN = os.getenv('REPLICATE_API_TOKEN') | |
client = OpenAI() | |
def main(img): | |
mask = img['layers'][0] | |
base_image = Image.fromarray(img['background'].astype('uint8')) | |
img_base_64 = img_to_base64(base_image) | |
if is_transparent(mask) == True: | |
mask_base_64 = None | |
else: | |
mask_img = create_mask_image(mask) | |
mask_base_64 = img_to_base64(mask_img) | |
prompt = call_openai(img_base_64) | |
output_urls = generate_image(prompt, img_base_64, mask_base_64) | |
output_images = [download_image(url) for url in output_urls[3:]] # Start from the 4th image | |
return output_images | |
def generate_image(prompt, img, mask): | |
input_data = { | |
"image": img, | |
"prompt": prompt + ", expensive", | |
"refine": "no_refiner", | |
"scheduler": "K_EULER", | |
"lora_scale": 0.8, | |
"num_outputs": 4, | |
"controlnet_1": "edge_canny", | |
"controlnet_2": "depth_midas", | |
"controlnet_3": "lineart", | |
"guidance_scale": 7.5, | |
"apply_watermark": False, | |
"negative_prompt":"worst quality, low quality, illustration, 2d, painting, cartoons, sketch", | |
"prompt_strength": 0.75, | |
"sizing_strategy": "controlnet_1_image", | |
"controlnet_1_end": 1, | |
"controlnet_2_end": 1, | |
"controlnet_3_end": 1, | |
"controlnet_1_image": img, | |
"controlnet_1_start": 0, | |
"controlnet_2_image": img, | |
"controlnet_2_start": 0, | |
"controlnet_3_image": img, | |
"controlnet_3_start": 0, | |
"num_inference_steps": 30, | |
"controlnet_1_conditioning_scale": 0.8, | |
"controlnet_2_conditioning_scale": 0.8, | |
"controlnet_3_conditioning_scale": 0.75 | |
} | |
if mask is not None: | |
input_data["mask"] = mask | |
else: | |
input_data["prompt_strength"] = .6 | |
output = replicate.run( | |
"fofr/realvisxl-v3-multi-controlnet-lora:90a4a3604cd637cb9f1a2bdae1cfa9ed869362ca028814cdce310a78e27daade", | |
input=input_data | |
) | |
return output | |
def download_image(url): | |
response = requests.get(url) | |
img = Image.open(io.BytesIO(response.content)) | |
return img | |
def create_mask_image(mask_array): | |
# Convert the mask to a numpy array if it's not already | |
if not isinstance(mask_array, np.ndarray): | |
mask_array = np.array(mask_array) | |
# Create a new array with the same shape as the mask, but only for RGB channels | |
processed_mask = np.zeros((mask_array.shape[0], mask_array.shape[1], 3), dtype=np.uint8) | |
# Set transparent parts (alpha=0) to black (0, 0, 0) | |
transparent_mask = mask_array[:, :, 3] == 0 | |
processed_mask[transparent_mask] = [0, 0, 0] | |
# Set black parts (RGB=0, 0, 0 and alpha=255) to white (255, 255, 255) | |
black_mask = (mask_array[:, :, :3] == [0, 0, 0]).all(axis=2) & (mask_array[:, :, 3] == 255) | |
processed_mask[black_mask] = [255, 255, 255] | |
return Image.fromarray(processed_mask) | |
def is_transparent(mask_array): | |
return np.all(mask_array[:, :, 3] == 0) | |
def img_to_base64(img): | |
# Extract the format of the image (e.g., JPEG, PNG) | |
img_format = img.format if img.format else "PNG" | |
# Convert the image to bytes | |
buffered = io.BytesIO() | |
img.save(buffered, format=img_format) | |
img_base_64 = base64.b64encode(buffered.getvalue()).decode('utf-8') | |
return f"data:image/{img_format.lower()};base64," + img_base_64 | |
def call_openai(image_data): | |
try: | |
response = client.chat.completions.create( | |
model="gpt-4o", | |
messages=[ | |
{ | |
"role": "user", | |
"content": [ | |
{"type": "text", "text": "Please describe this image in one sentence, with a focus on the material and specific color (like pantone level specificity) and details of the main object in the scene. Mention the type of lighting as well."}, | |
{ | |
"type": "image_url", | |
"image_url": { | |
"url": image_data, | |
}, | |
}, | |
], | |
} | |
], | |
max_tokens=300, | |
) | |
return response.choices[0].message.content | |
except openai.BadRequestError as e: | |
print(e) | |
print("e type") | |
print(type(e)) | |
raise gr.Error(f"You uploaded an unsupported image. Please make sure your image is below 20 MB in size and is of one the following formats: ['png', 'jpeg', 'gif', 'webp']") | |
except Exception as e: | |
raise gr.Error("Unknown Error") | |
# Define the brush with only black color | |
black_brush = gr.Brush(colors=["#000000"], default_color="#000000", color_mode="fixed") | |
# Using the ImageEditor component to enable drawing on the image with limited colors | |
demo = gr.Interface( | |
fn=main, | |
inputs=gr.ImageEditor(brush=black_brush), | |
outputs=[gr.Image(type="pil"), gr.Image(type="pil"), gr.Image(type="pil"), gr.Image(type="pil")] | |
) | |
demo.launch(share=False) | |