Test-Caption-Captain

Sleeping

App Files Files Community

Severian commited on Sep 25, 2024

Commit

57b6904

verified ·

1 Parent(s): 9bc81e0

Update app.py

Browse files

Files changed (1) hide show

app.py +105 -59

app.py CHANGED Viewed

@@ -288,17 +288,20 @@ if tokenizer.bos_token_id is None or tokenizer.eos_token_id is None:
 def stream_chat(input_image: Image.Image, caption_type: str, caption_tone: str, caption_length: str | int, art_style: str) -> str:
     torch.cuda.empty_cache()
-    # 'any' means no length specified
-    length = None if caption_length == "any" else caption_length
-    if isinstance(length, str):
-        try:
-            length = int(length)
-        except ValueError:
-            pass
     # 'rng-tags' and 'training_prompt' don't have formal/informal tones
-    if caption_type == "rng-tags" or caption_type == "training_prompt":
         caption_tone = "formal"
     # Build prompt
@@ -465,66 +468,109 @@ with gr.Blocks(theme="Hev832/Applio", css=css) as demo:
             """
         )
-    with gr.Tab("JoyCaption"):
-        with gr.Row():
-            with gr.Column():
-                input_image = gr.Image(type="pil", label="Input Image")
-                caption_type = gr.Dropdown(
-                    choices=[
-                        "descriptive",
-                        "training_prompt",
-                        "rng-tags",
-                        "thematic_analysis",
-                        "stylistic_comparison",
-                        "narrative_suggestion",
-                        "contextual_storytelling",
-                        "style_prompt"  # Add this new option
-                    ],
-                    label="Caption Type",
-                    value="descriptive",
-                )
-                caption_tone = gr.Dropdown(
-                    choices=["formal", "informal"],
-                    label="Caption Tone",
-                    value="formal",
-                )
-                caption_length = gr.Dropdown(
-                    choices=["any", "very short", "short", "medium-length", "long", "very long"] +
-                            [str(i) for i in range(20, 261, 10)],
-                    label="Caption Length",
-                    value="any",
-                )
-                # Add this new dropdown for art styles
-                art_style = gr.Dropdown(
-                    choices=ART_STYLES,
-                    label="Art Style",
-                    value="Impressionism",
-                    visible=False  # Initially hidden
-                )
-                gr.Markdown("""
-                **Note:**
-                - Caption tone doesn't affect `rng-tags` and `training_prompt`.
-                - When 'style_prompt' is selected, choose an art style to analyze the uploaded image in that context.
-                - The art style option helps guide the caption generation by comparing the uploaded image to characteristics of the selected style.
-                """)
-                run_button = gr.Button("Caption")
             with gr.Column():
                 output_caption = gr.Textbox(label="Caption")
-        # Add this JavaScript to show/hide the art style dropdown based on caption type
         caption_type.change(
             fn=lambda x: gr.update(visible=(x == "style_prompt")),
             inputs=[caption_type],
             outputs=[art_style]
         )
         run_button.click(fn=stream_chat, inputs=[input_image, caption_type, caption_tone, caption_length, art_style], outputs=[output_caption])

 def stream_chat(input_image: Image.Image, caption_type: str, caption_tone: str, caption_length: str | int, art_style: str) -> str:
     torch.cuda.empty_cache()
+    # Handle caption_length
+    length = None
+    if caption_length != "any":
+        if isinstance(caption_length, int):
+            length = caption_length
+        elif isinstance(caption_length, str):
+            try:
+                length = int(caption_length)
+            except ValueError:
+                # If it's not a number, treat it as a descriptive length
+                length = caption_length
     # 'rng-tags' and 'training_prompt' don't have formal/informal tones
+    if caption_type in ["rng-tags", "training_prompt"]:
         caption_tone = "formal"
     # Build prompt
             """
         )
+        with gr.Tab("JoyCaption"):
+            gr.Markdown("""
+            # JoyCaption: AI-Powered Image Analysis Tool
+            This tool helps you generate various types of text based on an uploaded image. Here's how to use it:
+            1. Upload an image
+            2. Choose your desired output type
+            3. Adjust settings as needed
+            4. Click 'Generate Caption' to get your result
+            """)
+            with gr.Row():
+                with gr.Column(scale=1):
+                    input_image = gr.Image(type="pil", label="Upload Your Image")
+                    caption_type = gr.Dropdown(
+                        choices=[
+                            "descriptive",
+                            "training_prompt",
+                            "rng-tags",
+                            "thematic_analysis",
+                            "stylistic_comparison",
+                            "narrative_suggestion",
+                            "contextual_storytelling",
+                            "style_prompt"
+                        ],
+                        label="Output Type",
+                        value="descriptive",
+                    )
+                    gr.Markdown("""
+                    ### Output Types Explained:
+                    - **Descriptive**: A general description of the image
+                    - **Training Prompt**: A prompt for AI image generation
+                    - **RNG-Tags**: Tags for categorizing the image
+                    - **Thematic Analysis**: Exploration of themes in the image
+                    - **Stylistic Comparison**: Compares the image to art styles
+                    - **Narrative Suggestion**: A story idea based on the image
+                    - **Contextual Storytelling**: A background story for the image
+                    - **Style Prompt**: Analyzes the image in context of a specific art style
+                    """)
+                    caption_tone = gr.Dropdown(
+                        choices=["formal", "informal"],
+                        label="Tone",
+                        value="formal",
+                    )
+                    gr.Markdown("Choose between a formal (professional) or informal (casual) tone for the output.")
+                    caption_length = gr.Dropdown(
+                        choices=["any", "very short", "short", "medium-length", "long", "very long"] +
+                                [str(i) for i in range(20, 261, 10)],
+                        label="Length",
+                        value="any",
+                    )
+                    gr.Markdown("""
+                    Select the desired length of the output:
+                    - 'any': No specific length
+                    - Descriptive options: very short to very long
+                    - Numeric options: Specify exact word count (20 to 260 words)
+                    """)
+                    art_style = gr.Dropdown(
+                        choices=ART_STYLES,
+                        label="Art Style (for Style Prompt)",
+                        value="Impressionism",
+                        visible=False
+                    )
+                    gr.Markdown("Select an art style to analyze the image in that context. Only applicable for 'Style Prompt' output type.")
+                with gr.Column(scale=1):
+                    output_caption = gr.Textbox(label="Generated Output", lines=10)
+                    generate_button = gr.Button("Generate Caption")
+            gr.Markdown("""
+            ### Additional Notes:
+            - The 'Tone' setting doesn't affect 'RNG-Tags' and 'Training Prompt' outputs.
+            - 'Art Style' is only used when 'Style Prompt' is selected as the output type.
+            - The AI model analyzes the image and generates text based on your selections.
+            """)
+            run_button = gr.Button("Caption")
             with gr.Column():
                 output_caption = gr.Textbox(label="Caption")
         caption_type.change(
             fn=lambda x: gr.update(visible=(x == "style_prompt")),
             inputs=[caption_type],
             outputs=[art_style]
         )
+        generate_button.click(
+            fn=stream_chat,
+            inputs=[input_image, caption_type, caption_tone, caption_length, art_style],
+            outputs=[output_caption]
+        )
         run_button.click(fn=stream_chat, inputs=[input_image, caption_type, caption_tone, caption_length, art_style], outputs=[output_caption])