Severian commited on
Commit
daee0bb
·
verified ·
1 Parent(s): d2c00ac

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +151 -15
app.py CHANGED
@@ -31,9 +31,9 @@ CAPTION_TYPE_MAP = {
31
  ("rng-tags", "formal", False, True): ["Write a list of Booru tags for this image within {word_count} words."],
32
  ("rng-tags", "formal", True, False): ["Write a {length} list of Booru tags for this image."],
33
 
34
- ("style_prompt", "formal", False, False): ["Generate a detailed style prompt for this image, including lens type, film stock, composition notes, and lighting aspects."],
35
- ("style_prompt", "formal", False, True): ["Generate a detailed style prompt for this image within {word_count} words, including lens type, film stock, composition notes, and lighting aspects."],
36
- ("style_prompt", "formal", True, False): ["Generate a {length} detailed style prompt for this image, including lens type, film stock, composition notes, and lighting aspects."],
37
  }
38
 
39
  HF_TOKEN = os.environ.get("HF_TOKEN", None)
@@ -186,7 +186,7 @@ def generate_caption(text_model, tokenizer, image_features, prompt_str: str, max
186
 
187
  @spaces.GPU()
188
  @torch.no_grad()
189
- def stream_chat(input_image: Image.Image, caption_type: str, caption_tone: str, caption_length: str | int, lens_type: str = "", film_stock: str = "", composition_style: str = "") -> str:
190
  """
191
  Generate a caption or style prompt based on the input image and parameters.
192
  """
@@ -209,7 +209,12 @@ def stream_chat(input_image: Image.Image, caption_type: str, caption_tone: str,
209
  prompt_str = CAPTION_TYPE_MAP[prompt_key][0].format(length=length, word_count=length)
210
 
211
  if caption_type == "style_prompt":
212
- prompt_str += f" Lens type: {lens_type}. Film stock: {film_stock}. Composition style: {composition_style}."
 
 
 
 
 
213
 
214
  print(f"Prompt: {prompt_str}")
215
 
@@ -243,12 +248,116 @@ ul, ol {
243
  }
244
  """
245
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
246
  # Gradio interface
247
  with gr.Blocks(theme="Hev832/Applio", css=css) as demo:
248
  with gr.Tab("Welcome"):
249
  gr.Markdown(
250
  """
251
- <img src="https://path-to-yamamoto-logo.png" alt="Yamamoto Logo" class="centered-image">
252
 
253
  # 🎨 Yamamoto JoyCaption: AI-Powered Art Inspiration
254
 
@@ -331,46 +440,73 @@ with gr.Blocks(theme="Hev832/Applio", css=css) as demo:
331
  )
332
 
333
  lens_type = gr.Dropdown(
334
- choices=["Wide-angle", "Standard", "Telephoto", "Macro", "Fish-eye"],
335
  label="Lens Type",
336
  visible=False,
 
337
  )
338
 
339
  film_stock = gr.Dropdown(
340
- choices=["Kodak Portra", "Fujifilm Velvia", "Ilford Delta", "Kodak Tri-X", "Fujifilm Provia"],
341
  label="Film Stock",
342
  visible=False,
 
343
  )
344
 
345
  composition_style = gr.Dropdown(
346
- choices=["Rule of Thirds", "Golden Ratio", "Symmetry", "Leading Lines", "Framing"],
347
  label="Composition Style",
348
  visible=False,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
349
  )
350
 
351
  gr.Markdown("**Note:** Caption tone doesn't affect `rng-tags`, `training_prompt`, and `style_prompt`.")
352
 
 
 
353
  run_button = gr.Button("Make My Caption!")
354
 
355
  with gr.Column():
356
  output_caption = gr.Textbox(label="Generated Caption")
357
  copy_button = gr.Button("Copy to Clipboard")
 
358
 
359
  def update_style_options(caption_type):
360
  return {
361
  lens_type: gr.update(visible=caption_type == "style_prompt"),
362
  film_stock: gr.update(visible=caption_type == "style_prompt"),
363
  composition_style: gr.update(visible=caption_type == "style_prompt"),
 
 
 
364
  }
365
 
366
- caption_type.change(update_style_options, inputs=[caption_type], outputs=[lens_type, film_stock, composition_style])
367
 
368
- run_button.click(fn=stream_chat, inputs=[input_image, caption_type, caption_tone, caption_length, lens_type, film_stock, composition_style], outputs=[output_caption])
369
-
370
- def copy_to_clipboard():
371
- return None
372
 
373
- copy_button.click(fn=copy_to_clipboard, inputs=[], outputs=[])
374
 
375
  if __name__ == "__main__":
376
  demo.launch()
 
31
  ("rng-tags", "formal", False, True): ["Write a list of Booru tags for this image within {word_count} words."],
32
  ("rng-tags", "formal", True, False): ["Write a {length} list of Booru tags for this image."],
33
 
34
+ ("style_prompt", "formal", False, False): ["Generate a detailed style prompt for this image, including lens type, film stock, composition notes, lighting aspects, and any special photographic techniques."],
35
+ ("style_prompt", "formal", False, True): ["Generate a detailed style prompt for this image within {word_count} words, including lens type, film stock, composition notes, lighting aspects, and any special photographic techniques."],
36
+ ("style_prompt", "formal", True, False): ["Generate a {length} detailed style prompt for this image, including lens type, film stock, composition notes, lighting aspects, and any special photographic techniques."],
37
  }
38
 
39
  HF_TOKEN = os.environ.get("HF_TOKEN", None)
 
186
 
187
  @spaces.GPU()
188
  @torch.no_grad()
189
+ def stream_chat(input_image: Image.Image, caption_type: str, caption_tone: str, caption_length: str | int, lens_type: str = "", film_stock: str = "", composition_style: str = "", lighting_aspect: str = "", special_technique: str = "", color_effect: str = "") -> str:
190
  """
191
  Generate a caption or style prompt based on the input image and parameters.
192
  """
 
209
  prompt_str = CAPTION_TYPE_MAP[prompt_key][0].format(length=length, word_count=length)
210
 
211
  if caption_type == "style_prompt":
212
+ prompt_str += f" Lens type: {lens_type} ({lens_types_info[lens_type]}). "
213
+ prompt_str += f"Film stock: {film_stock} ({film_stocks_info[film_stock]}). "
214
+ prompt_str += f"Composition style: {composition_style} ({composition_styles_info[composition_style]}). "
215
+ prompt_str += f"Lighting aspect: {lighting_aspect} ({lighting_aspects_info[lighting_aspect]}). "
216
+ prompt_str += f"Special technique: {special_technique} ({special_techniques_info[special_technique]}). "
217
+ prompt_str += f"Color effect: {color_effect} ({color_effects_info[color_effect]})."
218
 
219
  print(f"Prompt: {prompt_str}")
220
 
 
248
  }
249
  """
250
 
251
+ js = """
252
+ function copyToClipboard() {
253
+ var copyText = document.querySelector('.output-text textarea');
254
+ copyText.select();
255
+ document.execCommand('copy');
256
+ }
257
+ """
258
+
259
+ # Add detailed descriptions for each option
260
+ lens_types_info = {
261
+ "Standard": "A versatile lens with a field of view similar to human vision.",
262
+ "Wide-angle": "Captures a wider field of view, great for landscapes and architecture. Applies moderate to strong lens effect with image warp.",
263
+ "Telephoto": "Used for distant subjects, gives an 'award-winning' or 'National Geographic' look. Creates interesting effects when prompted.",
264
+ "Macro": "For extreme close-up photography, revealing tiny details.",
265
+ "Fish-eye": "Ultra-wide-angle lens that creates a strong bubble-like distortion. Generates panoramic photos with the entire image warping into a bubble.",
266
+ "Tilt-shift": "Allows adjusting the plane of focus, creating a 'miniature' effect. Known for the 'diorama miniature look'.",
267
+ "Zoom lens": "Variable focal length lens. Often zooms in on the subject, perfect for creating a base for inpainting. Interesting effect on landscapes with motion blur.",
268
+ "GoPro": "Wide-angle lens with clean digital look. Excludes film grain and most filter effects, resulting in natural colors and regular saturation.",
269
+ "Pinhole camera": "Creates a unique, foggy, low-detail, historic photograph look. Used since the 1850s, with peak popularity in the 1930s."
270
+ }
271
+
272
+ film_stocks_info = {
273
+ "Kodak Portra": "Professional color negative film known for its natural skin tones and low contrast.",
274
+ "Fujifilm Velvia": "Slide film known for vibrant colors and high saturation, popular among landscape photographers.",
275
+ "Ilford Delta": "Black and white film known for its fine grain and high sharpness.",
276
+ "Kodak Tri-X": "Classic high-speed black and white film, known for its distinctive grain and wide exposure latitude.",
277
+ "Fujifilm Provia": "Color reversal film known for its natural color reproduction and fine grain.",
278
+ "Cinestill": "Color photos with fine/low grain and higher than average resolution. Colors are slightly oversaturated or slightly desaturated.",
279
+ "Ektachrome": "Color photos with fine/low to moderate grain. Colors on the colder part of spectrum or regular, with normal or slightly higher saturation.",
280
+ "Ektar": "Modern Kodak film. Color photos with little to no grain. Results look like regular modern photography with artistic angles.",
281
+ "Film Washi": "Mostly black and white photos with fine/low to moderate grain. Occasionally gives colored photos with low saturation. Distinct style with high black contrast and soft camera lens effect.",
282
+ "Fomapan": "Black and white photos with fine/low to moderate grain, highly artistic exposure and angles. Adds very soft lens effect without distortion, dark photo vignette.",
283
+ "Fujicolor": "Color photos with fine/low to moderate grain. Colors are slightly or notably desaturated, with entire color hue shifted in a very distinct manner.",
284
+ "Holga": "Color photos with moderate to fine/low grain. Similar to Lomography in style, but with less grain. Good chance of black and white photography depending on subject.",
285
+ "Instax": "Instant color photos similar to Polaroid but clearer. Near perfect colors, regular saturation, fine/low to medium grain.",
286
+ "Lomography": "Color photos with high grain. Colors are either very oversaturated or slightly desaturated. Distinct contrast of black. Often applies photographic vignette.",
287
+ "Kodachrome": "Color photos with moderate grain. Colors on either colder part of spectrum or regular, with normal or slightly higher saturation.",
288
+ "Rollei": "Mostly black and white photos, sometimes color with fine/low grain. Can be sepia colored or have unusual hues and desaturation. Great for landscapes."
289
+ }
290
+
291
+ composition_styles_info = {
292
+ "Rule of Thirds": "Divides the frame into a 3x3 grid, placing key elements along the lines or at their intersections.",
293
+ "Golden Ratio": "Uses a spiral based on the golden ratio to create a balanced and aesthetically pleasing composition.",
294
+ "Symmetry": "Creates a mirror-like balance in the image, often used for architectural or nature photography.",
295
+ "Leading Lines": "Uses lines within the frame to draw the viewer's eye to the main subject or through the image.",
296
+ "Framing": "Uses elements within the scene to create a frame around the main subject.",
297
+ "Minimalism": "Simplifies the composition to its essential elements, often with a lot of negative space.",
298
+ "Fill the Frame": "The main subject dominates the entire frame, leaving little to no background.",
299
+ "Negative Space": "Uses empty space around the subject to create a sense of simplicity or isolation.",
300
+ "Centered Composition": "Places the main subject in the center of the frame, creating a sense of stability or importance.",
301
+ "Diagonal Lines": "Uses diagonal elements to create a sense of movement or dynamic tension in the image.",
302
+ "Triangular Composition": "Arranges elements in the frame to form a triangle, creating a sense of stability and harmony.",
303
+ "Radial Balance": "Arranges elements in a circular pattern around a central point, creating a sense of movement or completeness."
304
+ }
305
+
306
+ lighting_aspects_info = {
307
+ "Natural light": "Uses available light from the sun or sky, often creating soft, even illumination.",
308
+ "Studio lighting": "Controlled artificial lighting setup, allowing for precise manipulation of light and shadow.",
309
+ "Back light": "Light source behind the subject, creating silhouettes or rim lighting effects.",
310
+ "Split light": "Strong light source at 90-degree angle, lighting one half of the subject while leaving the other in shadow.",
311
+ "Broad light": "Light source at an angle to the subject, producing well-lit photographs with soft to moderate shadows.",
312
+ "Dim light": "Weak or distant light source, creating lower than average brightness and often dramatic images.",
313
+ "Flash photography": "Uses a brief, intense burst of light. Can be fill flash (even lighting) or harsh flash (strong contrasts).",
314
+ "Sunlight": "Direct light from the sun, often creating strong contrasts and warm tones.",
315
+ "Moonlight": "Soft, cool light from the moon, often creating a mysterious or romantic atmosphere.",
316
+ "Spotlight": "Focused beam of light illuminating a specific area, creating high contrast between light and shadow.",
317
+ "High-key lighting": "Bright, even lighting with minimal shadows, creating a light and airy feel.",
318
+ "Low-key lighting": "Predominantly dark tones with selective lighting, creating a moody or dramatic atmosphere.",
319
+ "Rembrandt lighting": "Classic portrait lighting technique creating a triangle of light on the cheek of the subject."
320
+ }
321
+
322
+ special_techniques_info = {
323
+ "Double exposure": "Superimposes two exposures to create a single image, often resulting in a dreamy or surreal effect.",
324
+ "Long exposure": "Uses a long shutter speed to capture motion over time, often creating smooth, blurred effects for moving elements.",
325
+ "Multiple exposure": "Superimposes multiple exposures, multiplying the subject or its key elements across the image.",
326
+ "HDR": "High Dynamic Range imaging, combining multiple exposures to capture a wider range of light and dark tones.",
327
+ "Bokeh effect": "Creates a soft, out-of-focus background, often with circular highlights.",
328
+ "Silhouette": "Captures the outline of a subject against a brighter background, creating a dramatic contrast.",
329
+ "Panning": "Follows a moving subject with the camera, creating a sharp subject with a blurred background.",
330
+ "Light painting": "Uses long exposure and moving light sources to 'paint' with light in the image.",
331
+ "Infrared photography": "Captures light in the infrared spectrum, often resulting in surreal, otherworldly images.",
332
+ "Ultraviolet photography": "Captures light in the ultraviolet spectrum, often revealing hidden patterns or creating a strong violet glow.",
333
+ "Kirlian photography": "High-voltage photographic technique that captures corona discharges around objects, creating a glowing effect.",
334
+ "Thermography": "Captures infrared radiation to create images based on temperature differences, resulting in false-color heat maps.",
335
+ "Astrophotography": "Specialized technique for capturing astronomical objects and celestial events, often resulting in stunning starry backgrounds.",
336
+ "Underwater photography": "Captures images beneath the surface of water, often in pools, seas, or aquariums.",
337
+ "Aerial photography": "Captures images from an elevated position, such as from drones, helicopters, or planes.",
338
+ "Macro photography": "Extreme close-up photography, revealing tiny details not visible to the naked eye."
339
+ }
340
+
341
+ color_effects_info = {
342
+ "Black and white": "Removes all color, leaving only shades of gray.",
343
+ "Sepia": "Reddish-brown monochrome effect, often associated with vintage photography.",
344
+ "Monochrome": "Uses variations of a single color.",
345
+ "Vintage color": "Muted or faded color palette reminiscent of old photographs.",
346
+ "Cross-processed": "Deliberate processing of film in the wrong chemicals, creating unusual color shifts.",
347
+ "Desaturated": "Reduces the intensity of all colors in the image.",
348
+ "Vivid colors": "Increases the saturation and intensity of colors.",
349
+ "Pastel colors": "Soft, pale colors with a light and airy feel.",
350
+ "High contrast": "Emphasizes the difference between light and dark areas in the image.",
351
+ "Low contrast": "Reduces the difference between light and dark areas, creating a softer look.",
352
+ "Color splash": "Converts most of the image to black and white while leaving one or more elements in color."
353
+ }
354
+
355
  # Gradio interface
356
  with gr.Blocks(theme="Hev832/Applio", css=css) as demo:
357
  with gr.Tab("Welcome"):
358
  gr.Markdown(
359
  """
360
+ <img src="https://cdn-uploads.huggingface.co/production/uploads/64740cf7485a7c8e1bd51ac9/EqvglEeWdTpCqWAcuP6-9.png">
361
 
362
  # 🎨 Yamamoto JoyCaption: AI-Powered Art Inspiration
363
 
 
440
  )
441
 
442
  lens_type = gr.Dropdown(
443
+ choices=list(lens_types_info.keys()),
444
  label="Lens Type",
445
  visible=False,
446
+ info="Select a lens type to define the perspective and field of view of the image."
447
  )
448
 
449
  film_stock = gr.Dropdown(
450
+ choices=list(film_stocks_info.keys()),
451
  label="Film Stock",
452
  visible=False,
453
+ info="Choose a film stock to determine the color, grain, and overall look of the image."
454
  )
455
 
456
  composition_style = gr.Dropdown(
457
+ choices=list(composition_styles_info.keys()),
458
  label="Composition Style",
459
  visible=False,
460
+ info="Select a composition style to guide the arrangement of elements in the image."
461
+ )
462
+
463
+ lighting_aspect = gr.Dropdown(
464
+ choices=list(lighting_aspects_info.keys()),
465
+ label="Lighting Aspect",
466
+ visible=False,
467
+ info="Choose a lighting style to define the mood and atmosphere of the image."
468
+ )
469
+
470
+ special_technique = gr.Dropdown(
471
+ choices=list(special_techniques_info.keys()),
472
+ label="Special Technique",
473
+ visible=False,
474
+ info="Select a special photographic technique to add unique effects to the image."
475
+ )
476
+
477
+ color_effect = gr.Dropdown(
478
+ choices=list(color_effects_info.keys()),
479
+ label="Color Effect",
480
+ visible=False,
481
+ info="Choose a color effect to alter the overall color palette of the image."
482
  )
483
 
484
  gr.Markdown("**Note:** Caption tone doesn't affect `rng-tags`, `training_prompt`, and `style_prompt`.")
485
 
486
+ ![image/png](https://cdn-uploads.huggingface.co/production/uploads/64740cf7485a7c8e1bd51ac9/-tNeUZTAGCOFiNLG_VCMb.png)
487
+
488
  run_button = gr.Button("Make My Caption!")
489
 
490
  with gr.Column():
491
  output_caption = gr.Textbox(label="Generated Caption")
492
  copy_button = gr.Button("Copy to Clipboard")
493
+ gr.HTML("<script>" + js + "</script>")
494
 
495
  def update_style_options(caption_type):
496
  return {
497
  lens_type: gr.update(visible=caption_type == "style_prompt"),
498
  film_stock: gr.update(visible=caption_type == "style_prompt"),
499
  composition_style: gr.update(visible=caption_type == "style_prompt"),
500
+ lighting_aspect: gr.update(visible=caption_type == "style_prompt"),
501
+ special_technique: gr.update(visible=caption_type == "style_prompt"),
502
+ color_effect: gr.update(visible=caption_type == "style_prompt"),
503
  }
504
 
505
+ caption_type.change(update_style_options, inputs=[caption_type], outputs=[lens_type, film_stock, composition_style, lighting_aspect, special_technique, color_effect])
506
 
507
+ run_button.click(fn=stream_chat, inputs=[input_image, caption_type, caption_tone, caption_length, lens_type, film_stock, composition_style, lighting_aspect, special_technique, color_effect], outputs=[output_caption])
 
 
 
508
 
509
+ copy_button.click(None, None, None, _js="copyToClipboard()")
510
 
511
  if __name__ == "__main__":
512
  demo.launch()