Test-Caption-Captain

Sleeping

App Files Files Community

Severian commited on Sep 26, 2024

Commit

daee0bb

verified ·

1 Parent(s): d2c00ac

Update app.py

Browse files

Files changed (1) hide show

app.py +151 -15

app.py CHANGED Viewed

@@ -31,9 +31,9 @@ CAPTION_TYPE_MAP = {
 	("rng-tags", "formal", False, True): ["Write a list of Booru tags for this image within {word_count} words."],
 	("rng-tags", "formal", True, False): ["Write a {length} list of Booru tags for this image."],
-	("style_prompt", "formal", False, False): ["Generate a detailed style prompt for this image, including lens type, film stock, composition notes, and lighting aspects."],
-	("style_prompt", "formal", False, True): ["Generate a detailed style prompt for this image within {word_count} words, including lens type, film stock, composition notes, and lighting aspects."],
-	("style_prompt", "formal", True, False): ["Generate a {length} detailed style prompt for this image, including lens type, film stock, composition notes, and lighting aspects."],
 }
 HF_TOKEN = os.environ.get("HF_TOKEN", None)
@@ -186,7 +186,7 @@ def generate_caption(text_model, tokenizer, image_features, prompt_str: str, max
 @spaces.GPU()
 @torch.no_grad()
-def stream_chat(input_image: Image.Image, caption_type: str, caption_tone: str, caption_length: str | int, lens_type: str = "", film_stock: str = "", composition_style: str = "") -> str:
     """
     Generate a caption or style prompt based on the input image and parameters.
     """
@@ -209,7 +209,12 @@ def stream_chat(input_image: Image.Image, caption_type: str, caption_tone: str,
     prompt_str = CAPTION_TYPE_MAP[prompt_key][0].format(length=length, word_count=length)
     if caption_type == "style_prompt":
-        prompt_str += f" Lens type: {lens_type}. Film stock: {film_stock}. Composition style: {composition_style}."
     print(f"Prompt: {prompt_str}")
@@ -243,12 +248,116 @@ ul, ol {
 }
 """
 # Gradio interface
 with gr.Blocks(theme="Hev832/Applio", css=css) as demo:
     with gr.Tab("Welcome"):
         gr.Markdown(
             """
-            <img src="https://path-to-yamamoto-logo.png" alt="Yamamoto Logo" class="centered-image">
             # 🎨 Yamamoto JoyCaption: AI-Powered Art Inspiration
@@ -331,46 +440,73 @@ with gr.Blocks(theme="Hev832/Applio", css=css) as demo:
                 )
                 lens_type = gr.Dropdown(
-                    choices=["Wide-angle", "Standard", "Telephoto", "Macro", "Fish-eye"],
                     label="Lens Type",
                     visible=False,
                 )
                 film_stock = gr.Dropdown(
-                    choices=["Kodak Portra", "Fujifilm Velvia", "Ilford Delta", "Kodak Tri-X", "Fujifilm Provia"],
                     label="Film Stock",
                     visible=False,
                 )
                 composition_style = gr.Dropdown(
-                    choices=["Rule of Thirds", "Golden Ratio", "Symmetry", "Leading Lines", "Framing"],
                     label="Composition Style",
                     visible=False,
                 )
                 gr.Markdown("**Note:** Caption tone doesn't affect `rng-tags`, `training_prompt`, and `style_prompt`.")
                 run_button = gr.Button("Make My Caption!")
             with gr.Column():
                 output_caption = gr.Textbox(label="Generated Caption")
                 copy_button = gr.Button("Copy to Clipboard")
     def update_style_options(caption_type):
         return {
             lens_type: gr.update(visible=caption_type == "style_prompt"),
             film_stock: gr.update(visible=caption_type == "style_prompt"),
             composition_style: gr.update(visible=caption_type == "style_prompt"),
         }
-    caption_type.change(update_style_options, inputs=[caption_type], outputs=[lens_type, film_stock, composition_style])
-    run_button.click(fn=stream_chat, inputs=[input_image, caption_type, caption_tone, caption_length, lens_type, film_stock, composition_style], outputs=[output_caption])
-    def copy_to_clipboard():
-        return None
-    copy_button.click(fn=copy_to_clipboard, inputs=[], outputs=[])
 if __name__ == "__main__":
     demo.launch()

 	("rng-tags", "formal", False, True): ["Write a list of Booru tags for this image within {word_count} words."],
 	("rng-tags", "formal", True, False): ["Write a {length} list of Booru tags for this image."],
+	("style_prompt", "formal", False, False): ["Generate a detailed style prompt for this image, including lens type, film stock, composition notes, lighting aspects, and any special photographic techniques."],
+	("style_prompt", "formal", False, True): ["Generate a detailed style prompt for this image within {word_count} words, including lens type, film stock, composition notes, lighting aspects, and any special photographic techniques."],
+	("style_prompt", "formal", True, False): ["Generate a {length} detailed style prompt for this image, including lens type, film stock, composition notes, lighting aspects, and any special photographic techniques."],
 }
 HF_TOKEN = os.environ.get("HF_TOKEN", None)
 @spaces.GPU()
 @torch.no_grad()
+def stream_chat(input_image: Image.Image, caption_type: str, caption_tone: str, caption_length: str | int, lens_type: str = "", film_stock: str = "", composition_style: str = "", lighting_aspect: str = "", special_technique: str = "", color_effect: str = "") -> str:
     """
     Generate a caption or style prompt based on the input image and parameters.
     """
     prompt_str = CAPTION_TYPE_MAP[prompt_key][0].format(length=length, word_count=length)
     if caption_type == "style_prompt":
+        prompt_str += f" Lens type: {lens_type} ({lens_types_info[lens_type]}). "
+        prompt_str += f"Film stock: {film_stock} ({film_stocks_info[film_stock]}). "
+        prompt_str += f"Composition style: {composition_style} ({composition_styles_info[composition_style]}). "
+        prompt_str += f"Lighting aspect: {lighting_aspect} ({lighting_aspects_info[lighting_aspect]}). "
+        prompt_str += f"Special technique: {special_technique} ({special_techniques_info[special_technique]}). "
+        prompt_str += f"Color effect: {color_effect} ({color_effects_info[color_effect]})."
     print(f"Prompt: {prompt_str}")
 }
 """
+js = """
+function copyToClipboard() {
+    var copyText = document.querySelector('.output-text textarea');
+    copyText.select();
+    document.execCommand('copy');
+}
+"""
+# Add detailed descriptions for each option
+lens_types_info = {
+    "Standard": "A versatile lens with a field of view similar to human vision.",
+    "Wide-angle": "Captures a wider field of view, great for landscapes and architecture. Applies moderate to strong lens effect with image warp.",
+    "Telephoto": "Used for distant subjects, gives an 'award-winning' or 'National Geographic' look. Creates interesting effects when prompted.",
+    "Macro": "For extreme close-up photography, revealing tiny details.",
+    "Fish-eye": "Ultra-wide-angle lens that creates a strong bubble-like distortion. Generates panoramic photos with the entire image warping into a bubble.",
+    "Tilt-shift": "Allows adjusting the plane of focus, creating a 'miniature' effect. Known for the 'diorama miniature look'.",
+    "Zoom lens": "Variable focal length lens. Often zooms in on the subject, perfect for creating a base for inpainting. Interesting effect on landscapes with motion blur.",
+    "GoPro": "Wide-angle lens with clean digital look. Excludes film grain and most filter effects, resulting in natural colors and regular saturation.",
+    "Pinhole camera": "Creates a unique, foggy, low-detail, historic photograph look. Used since the 1850s, with peak popularity in the 1930s."
+}
+film_stocks_info = {
+    "Kodak Portra": "Professional color negative film known for its natural skin tones and low contrast.",
+    "Fujifilm Velvia": "Slide film known for vibrant colors and high saturation, popular among landscape photographers.",
+    "Ilford Delta": "Black and white film known for its fine grain and high sharpness.",
+    "Kodak Tri-X": "Classic high-speed black and white film, known for its distinctive grain and wide exposure latitude.",
+    "Fujifilm Provia": "Color reversal film known for its natural color reproduction and fine grain.",
+    "Cinestill": "Color photos with fine/low grain and higher than average resolution. Colors are slightly oversaturated or slightly desaturated.",
+    "Ektachrome": "Color photos with fine/low to moderate grain. Colors on the colder part of spectrum or regular, with normal or slightly higher saturation.",
+    "Ektar": "Modern Kodak film. Color photos with little to no grain. Results look like regular modern photography with artistic angles.",
+    "Film Washi": "Mostly black and white photos with fine/low to moderate grain. Occasionally gives colored photos with low saturation. Distinct style with high black contrast and soft camera lens effect.",
+    "Fomapan": "Black and white photos with fine/low to moderate grain, highly artistic exposure and angles. Adds very soft lens effect without distortion, dark photo vignette.",
+    "Fujicolor": "Color photos with fine/low to moderate grain. Colors are slightly or notably desaturated, with entire color hue shifted in a very distinct manner.",
+    "Holga": "Color photos with moderate to fine/low grain. Similar to Lomography in style, but with less grain. Good chance of black and white photography depending on subject.",
+    "Instax": "Instant color photos similar to Polaroid but clearer. Near perfect colors, regular saturation, fine/low to medium grain.",
+    "Lomography": "Color photos with high grain. Colors are either very oversaturated or slightly desaturated. Distinct contrast of black. Often applies photographic vignette.",
+    "Kodachrome": "Color photos with moderate grain. Colors on either colder part of spectrum or regular, with normal or slightly higher saturation.",
+    "Rollei": "Mostly black and white photos, sometimes color with fine/low grain. Can be sepia colored or have unusual hues and desaturation. Great for landscapes."
+}
+composition_styles_info = {
+    "Rule of Thirds": "Divides the frame into a 3x3 grid, placing key elements along the lines or at their intersections.",
+    "Golden Ratio": "Uses a spiral based on the golden ratio to create a balanced and aesthetically pleasing composition.",
+    "Symmetry": "Creates a mirror-like balance in the image, often used for architectural or nature photography.",
+    "Leading Lines": "Uses lines within the frame to draw the viewer's eye to the main subject or through the image.",
+    "Framing": "Uses elements within the scene to create a frame around the main subject.",
+    "Minimalism": "Simplifies the composition to its essential elements, often with a lot of negative space.",
+    "Fill the Frame": "The main subject dominates the entire frame, leaving little to no background.",
+    "Negative Space": "Uses empty space around the subject to create a sense of simplicity or isolation.",
+    "Centered Composition": "Places the main subject in the center of the frame, creating a sense of stability or importance.",
+    "Diagonal Lines": "Uses diagonal elements to create a sense of movement or dynamic tension in the image.",
+    "Triangular Composition": "Arranges elements in the frame to form a triangle, creating a sense of stability and harmony.",
+    "Radial Balance": "Arranges elements in a circular pattern around a central point, creating a sense of movement or completeness."
+}
+lighting_aspects_info = {
+    "Natural light": "Uses available light from the sun or sky, often creating soft, even illumination.",
+    "Studio lighting": "Controlled artificial lighting setup, allowing for precise manipulation of light and shadow.",
+    "Back light": "Light source behind the subject, creating silhouettes or rim lighting effects.",
+    "Split light": "Strong light source at 90-degree angle, lighting one half of the subject while leaving the other in shadow.",
+    "Broad light": "Light source at an angle to the subject, producing well-lit photographs with soft to moderate shadows.",
+    "Dim light": "Weak or distant light source, creating lower than average brightness and often dramatic images.",
+    "Flash photography": "Uses a brief, intense burst of light. Can be fill flash (even lighting) or harsh flash (strong contrasts).",
+    "Sunlight": "Direct light from the sun, often creating strong contrasts and warm tones.",
+    "Moonlight": "Soft, cool light from the moon, often creating a mysterious or romantic atmosphere.",
+    "Spotlight": "Focused beam of light illuminating a specific area, creating high contrast between light and shadow.",
+    "High-key lighting": "Bright, even lighting with minimal shadows, creating a light and airy feel.",
+    "Low-key lighting": "Predominantly dark tones with selective lighting, creating a moody or dramatic atmosphere.",
+    "Rembrandt lighting": "Classic portrait lighting technique creating a triangle of light on the cheek of the subject."
+}
+special_techniques_info = {
+    "Double exposure": "Superimposes two exposures to create a single image, often resulting in a dreamy or surreal effect.",
+    "Long exposure": "Uses a long shutter speed to capture motion over time, often creating smooth, blurred effects for moving elements.",
+    "Multiple exposure": "Superimposes multiple exposures, multiplying the subject or its key elements across the image.",
+    "HDR": "High Dynamic Range imaging, combining multiple exposures to capture a wider range of light and dark tones.",
+    "Bokeh effect": "Creates a soft, out-of-focus background, often with circular highlights.",
+    "Silhouette": "Captures the outline of a subject against a brighter background, creating a dramatic contrast.",
+    "Panning": "Follows a moving subject with the camera, creating a sharp subject with a blurred background.",
+    "Light painting": "Uses long exposure and moving light sources to 'paint' with light in the image.",
+    "Infrared photography": "Captures light in the infrared spectrum, often resulting in surreal, otherworldly images.",
+    "Ultraviolet photography": "Captures light in the ultraviolet spectrum, often revealing hidden patterns or creating a strong violet glow.",
+    "Kirlian photography": "High-voltage photographic technique that captures corona discharges around objects, creating a glowing effect.",
+    "Thermography": "Captures infrared radiation to create images based on temperature differences, resulting in false-color heat maps.",
+    "Astrophotography": "Specialized technique for capturing astronomical objects and celestial events, often resulting in stunning starry backgrounds.",
+    "Underwater photography": "Captures images beneath the surface of water, often in pools, seas, or aquariums.",
+    "Aerial photography": "Captures images from an elevated position, such as from drones, helicopters, or planes.",
+    "Macro photography": "Extreme close-up photography, revealing tiny details not visible to the naked eye."
+}
+color_effects_info = {
+    "Black and white": "Removes all color, leaving only shades of gray.",
+    "Sepia": "Reddish-brown monochrome effect, often associated with vintage photography.",
+    "Monochrome": "Uses variations of a single color.",
+    "Vintage color": "Muted or faded color palette reminiscent of old photographs.",
+    "Cross-processed": "Deliberate processing of film in the wrong chemicals, creating unusual color shifts.",
+    "Desaturated": "Reduces the intensity of all colors in the image.",
+    "Vivid colors": "Increases the saturation and intensity of colors.",
+    "Pastel colors": "Soft, pale colors with a light and airy feel.",
+    "High contrast": "Emphasizes the difference between light and dark areas in the image.",
+    "Low contrast": "Reduces the difference between light and dark areas, creating a softer look.",
+    "Color splash": "Converts most of the image to black and white while leaving one or more elements in color."
+}
 # Gradio interface
 with gr.Blocks(theme="Hev832/Applio", css=css) as demo:
     with gr.Tab("Welcome"):
         gr.Markdown(
             """
+            <img src="https://cdn-uploads.huggingface.co/production/uploads/64740cf7485a7c8e1bd51ac9/EqvglEeWdTpCqWAcuP6-9.png">
             # 🎨 Yamamoto JoyCaption: AI-Powered Art Inspiration
                 )
                 lens_type = gr.Dropdown(
+                    choices=list(lens_types_info.keys()),
                     label="Lens Type",
                     visible=False,
+                    info="Select a lens type to define the perspective and field of view of the image."
                 )
                 film_stock = gr.Dropdown(
+                    choices=list(film_stocks_info.keys()),
                     label="Film Stock",
                     visible=False,
+                    info="Choose a film stock to determine the color, grain, and overall look of the image."
                 )
                 composition_style = gr.Dropdown(
+                    choices=list(composition_styles_info.keys()),
                     label="Composition Style",
                     visible=False,
+                    info="Select a composition style to guide the arrangement of elements in the image."
+                )
+                lighting_aspect = gr.Dropdown(
+                    choices=list(lighting_aspects_info.keys()),
+                    label="Lighting Aspect",
+                    visible=False,
+                    info="Choose a lighting style to define the mood and atmosphere of the image."
+                )
+                special_technique = gr.Dropdown(
+                    choices=list(special_techniques_info.keys()),
+                    label="Special Technique",
+                    visible=False,
+                    info="Select a special photographic technique to add unique effects to the image."
+                )
+                color_effect = gr.Dropdown(
+                    choices=list(color_effects_info.keys()),
+                    label="Color Effect",
+                    visible=False,
+                    info="Choose a color effect to alter the overall color palette of the image."
                 )
                 gr.Markdown("**Note:** Caption tone doesn't affect `rng-tags`, `training_prompt`, and `style_prompt`.")
+![image/png](https://cdn-uploads.huggingface.co/production/uploads/64740cf7485a7c8e1bd51ac9/-tNeUZTAGCOFiNLG_VCMb.png)
                 run_button = gr.Button("Make My Caption!")
             with gr.Column():
                 output_caption = gr.Textbox(label="Generated Caption")
                 copy_button = gr.Button("Copy to Clipboard")
+                gr.HTML("<script>" + js + "</script>")
     def update_style_options(caption_type):
         return {
             lens_type: gr.update(visible=caption_type == "style_prompt"),
             film_stock: gr.update(visible=caption_type == "style_prompt"),
             composition_style: gr.update(visible=caption_type == "style_prompt"),
+            lighting_aspect: gr.update(visible=caption_type == "style_prompt"),
+            special_technique: gr.update(visible=caption_type == "style_prompt"),
+            color_effect: gr.update(visible=caption_type == "style_prompt"),
         }
+    caption_type.change(update_style_options, inputs=[caption_type], outputs=[lens_type, film_stock, composition_style, lighting_aspect, special_technique, color_effect])
+    run_button.click(fn=stream_chat, inputs=[input_image, caption_type, caption_tone, caption_length, lens_type, film_stock, composition_style, lighting_aspect, special_technique, color_effect], outputs=[output_caption])
+    copy_button.click(None, None, None, _js="copyToClipboard()")
 if __name__ == "__main__":
     demo.launch()