File size: 6,864 Bytes
a0c2904
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
553ce54
a0c2904
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
import torch
from diffusers import StableDiffusionXLPipeline, AutoencoderTiny
import os
import json
import google.generativeai as genai
import gradio as gr
import random

art_styles = [
    "Impressionism", "Surrealism", "Cubism", "Abstract", "Realism", "Expressionism",
    "Pop Art", "Futurism", "Minimalism", "Conceptual Art", "Baroque", "Renaissance",
    "Gothic", "Neoclassicism", "Romanticism", "Art Nouveau", "Art Deco", "Post-Impressionism"
]

artists = [
    "Pablo Picasso", "Vincent van Gogh", "Leonardo da Vinci", "Claude Monet", "Salvador Dalí",
    "Frida Kahlo", "Rembrandt", "Michelangelo", "Andy Warhol", "Jackson Pollock",
    "Georgia O'Keeffe", "Edvard Munch", "Henri Matisse", "Paul Cézanne", "Gustav Klimt",
    "Caravaggio", "Jean-Michel Basquiat", "Raphael", "Auguste Rodin", "Yayoi Kusama"
]

light_effects = [
    "soft lighting", "dramatic lighting", "rim lighting", "ambient lighting", "studio lighting",
    "natural light", "backlighting", "side lighting", "low-key lighting", "high-key lighting"
]

depth_effects = [
    "shallow depth of field", "deep depth of field", "foreground focus", "background blur",
    "bokeh effect", "layered depth", "aerial perspective", "overlapping elements", "linear perspective",
    "vanishing point"
]

# Function to save the Gemini API key
def save_gemini_api_key(api_key):
    with open("gemini_api_key.json", "w") as f:
        json.dump({"api_key": api_key}, f)

# Function to load the Gemini API key
def load_gemini_api_key():
    try:
        with open("gemini_api_key.json", "r") as f:
            data = json.load(f)
            return data.get("api_key", "")
    except FileNotFoundError:
        return ""

# Function to delete the Gemini API key
def delete_gemini_api_key():
    try:
        if os.path.exists("gemini_api_key.json"):
            os.remove("gemini_api_key.json")
    except Exception as e:
        print(f"Error deleting Gemini API key: {e}")

# Function to enhance prompt using Gemini's API with specific instructions
def enhance_prompt_with_gemini(prompt, api_key):
    try:
        if api_key:
            genai.configure(api_key=api_key)

            # Construct the specific instruction prompt
            instruction = f"You are Professor of creating a prompt for Stable Diffusion to generate an image. Write the phrases for art styles, art movements, artists names, light and depth effects for this {prompt}. Make this prompt the best ever, only response with prompt itself in one paragraph."

            # Generate content using Gemini API with the instruction prompt
            model = genai.GenerativeModel('gemini-1.5-pro')
            response = model.generate_content(instruction)
            enhanced_prompt = response.text.strip()
            return enhanced_prompt
        else:
            return prompt
    except Exception as e:
        print(f"Failed to enhance prompt with Gemini API: {e}")
        return prompt  # Fallback to original prompt on error

# Function to generate images based on user input
def generate_image(prompt, negative_prompt, cfg_scale, steps, width, height, seed, art_style, artist, light_effect, depth_effect, api_key):
    queue = []
    additional_prompts = f"{art_style}, {artist}, {light_effect}, {depth_effect}"
    full_prompt = f"{prompt}, {additional_prompts}"
    enhanced_prompt = enhance_prompt_with_gemini(full_prompt, api_key)

    # Generate a random seed if not provided
    if seed is None or seed == "":
        seed = random.randint(0, 2**32 - 1)
    
    print("\nStarting image generation with the following parameters:")
    print(f"Enhanced Prompt: {enhanced_prompt}")
    print(f"Negative Prompt: {negative_prompt}")
    print(f"CFG Scale: {cfg_scale}")
    print(f"Steps: {steps}")
    print(f"Width: {width}")
    print(f"Height: {height}")
    print(f"Seed: {seed}")
    
    queue.append({
        'prompt': enhanced_prompt,
        'negative_prompt': negative_prompt,
        'cfg_scale': cfg_scale,
        'steps': steps,
        'width': width,
        'height': height,
        'seed': seed,
    })

    vae = AutoencoderTiny.from_pretrained(
      'madebyollin/taesdxl',
      use_safetensors=True,
      torch_dtype=torch.float16,
    ).to('cpu')

    pipe = StableDiffusionXLPipeline.from_pretrained(
        'ABDALLALSWAITI/DAVINCI-DIFF',
        torch_dtype=torch.float16,
        use_safetensors=True,
        vae=vae
    ).to('cpu')

    generator = torch.manual_seed(seed)

    image = pipe(
        prompt=enhanced_prompt,
        negative_prompt=negative_prompt,
        height=height,
        width=width,
        num_inference_steps=steps,
        guidance_scale=cfg_scale,
        generator=generator,
        output_type="pil",
    ).images[0]

    return image

# Gradio Blocks interface
api_key = load_gemini_api_key()

def on_submit(prompt, negative_prompt, cfg_scale, steps, width, height, seed, art_style, artist, light_effect, depth_effect, api_key):
    return generate_image(prompt, negative_prompt, cfg_scale, steps, width, height, seed, art_style, artist, light_effect, depth_effect, api_key)

with gr.Blocks() as interface:
    gr.Markdown("# Image Generation with Stable Diffusion")
    
    with gr.Row():
        prompt = gr.Textbox(label="Prompt", value="3/4 shot, candid photograph of a beautiful 30 year old redhead woman with messy dark hair, peacefully sleeping in her bed, night, dark, light from window, dark shadows, masterpiece, uhd, moody")
        negative_prompt = gr.Textbox(label="Negative Prompt", value="")
    
    with gr.Row():
        cfg_scale = gr.Number(label="CFG Scale", value=7.5)
        steps = gr.Number(label="Steps", value=50)
    
    with gr.Row():
        width = gr.Number(label="Image Width", value=1024)
        height = gr.Number(label="Image Height", value=1024)
    
    seed = gr.Number(label="Seed (leave empty for random seed)", value=None)
    
    with gr.Row():
        art_style = gr.Dropdown(label="Art Style", choices=art_styles, value="Impressionism")
        artist = gr.Dropdown(label="Artist", choices=artists, value="Vincent van Gogh")
    
    with gr.Row():
        light_effect = gr.Dropdown(label="Light Effect", choices=light_effects, value="soft lighting")
        depth_effect = gr.Dropdown(label="Depth Effect", choices=depth_effects, value="shallow depth of field")
    
    api_key = gr.Textbox(label="Gemini API Key", type="password", placeholder="Enter Gemini API Key (optional)")
    
    generate_btn = gr.Button("Generate Image")
    output_image = gr.Image(label="Generated Image")
    
    generate_btn.click(
        on_submit,
        inputs=[prompt, negative_prompt, cfg_scale, steps, width, height, seed, art_style, artist, light_effect, depth_effect, api_key],
        outputs=output_image
    )

interface.launch(share=True)

def on_session_end(session):
    delete_gemini_api_key()