Update app.py
Browse files
app.py
CHANGED
@@ -288,17 +288,20 @@ if tokenizer.bos_token_id is None or tokenizer.eos_token_id is None:
|
|
288 |
def stream_chat(input_image: Image.Image, caption_type: str, caption_tone: str, caption_length: str | int, art_style: str) -> str:
|
289 |
torch.cuda.empty_cache()
|
290 |
|
291 |
-
#
|
292 |
-
length = None
|
293 |
-
|
294 |
-
|
295 |
-
|
296 |
-
|
297 |
-
|
298 |
-
|
|
|
|
|
|
|
299 |
|
300 |
# 'rng-tags' and 'training_prompt' don't have formal/informal tones
|
301 |
-
if caption_type
|
302 |
caption_tone = "formal"
|
303 |
|
304 |
# Build prompt
|
@@ -465,66 +468,109 @@ with gr.Blocks(theme="Hev832/Applio", css=css) as demo:
|
|
465 |
"""
|
466 |
)
|
467 |
|
468 |
-
|
469 |
-
|
470 |
-
|
471 |
-
|
472 |
-
|
473 |
-
|
474 |
-
|
475 |
-
|
476 |
-
|
477 |
-
|
478 |
-
|
479 |
-
|
480 |
-
|
481 |
-
|
482 |
-
|
483 |
-
|
484 |
-
|
485 |
-
|
486 |
-
|
487 |
-
|
488 |
-
|
489 |
-
|
490 |
-
|
491 |
-
|
492 |
-
|
493 |
-
|
494 |
-
|
495 |
-
|
496 |
-
|
497 |
-
|
498 |
-
|
499 |
-
|
500 |
-
|
501 |
-
|
502 |
-
|
503 |
-
|
504 |
-
|
505 |
-
|
506 |
-
|
507 |
-
|
508 |
-
|
509 |
-
|
510 |
-
|
511 |
-
|
512 |
-
|
513 |
-
|
514 |
-
|
515 |
-
|
516 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
517 |
|
518 |
with gr.Column():
|
519 |
output_caption = gr.Textbox(label="Caption")
|
520 |
|
521 |
-
|
522 |
caption_type.change(
|
523 |
fn=lambda x: gr.update(visible=(x == "style_prompt")),
|
524 |
inputs=[caption_type],
|
525 |
outputs=[art_style]
|
526 |
)
|
527 |
|
|
|
|
|
|
|
|
|
|
|
|
|
528 |
run_button.click(fn=stream_chat, inputs=[input_image, caption_type, caption_tone, caption_length, art_style], outputs=[output_caption])
|
529 |
|
530 |
|
|
|
288 |
def stream_chat(input_image: Image.Image, caption_type: str, caption_tone: str, caption_length: str | int, art_style: str) -> str:
|
289 |
torch.cuda.empty_cache()
|
290 |
|
291 |
+
# Handle caption_length
|
292 |
+
length = None
|
293 |
+
if caption_length != "any":
|
294 |
+
if isinstance(caption_length, int):
|
295 |
+
length = caption_length
|
296 |
+
elif isinstance(caption_length, str):
|
297 |
+
try:
|
298 |
+
length = int(caption_length)
|
299 |
+
except ValueError:
|
300 |
+
# If it's not a number, treat it as a descriptive length
|
301 |
+
length = caption_length
|
302 |
|
303 |
# 'rng-tags' and 'training_prompt' don't have formal/informal tones
|
304 |
+
if caption_type in ["rng-tags", "training_prompt"]:
|
305 |
caption_tone = "formal"
|
306 |
|
307 |
# Build prompt
|
|
|
468 |
"""
|
469 |
)
|
470 |
|
471 |
+
with gr.Tab("JoyCaption"):
|
472 |
+
gr.Markdown("""
|
473 |
+
# JoyCaption: AI-Powered Image Analysis Tool
|
474 |
+
|
475 |
+
This tool helps you generate various types of text based on an uploaded image. Here's how to use it:
|
476 |
+
|
477 |
+
1. Upload an image
|
478 |
+
2. Choose your desired output type
|
479 |
+
3. Adjust settings as needed
|
480 |
+
4. Click 'Generate Caption' to get your result
|
481 |
+
""")
|
482 |
+
|
483 |
+
with gr.Row():
|
484 |
+
with gr.Column(scale=1):
|
485 |
+
input_image = gr.Image(type="pil", label="Upload Your Image")
|
486 |
+
|
487 |
+
caption_type = gr.Dropdown(
|
488 |
+
choices=[
|
489 |
+
"descriptive",
|
490 |
+
"training_prompt",
|
491 |
+
"rng-tags",
|
492 |
+
"thematic_analysis",
|
493 |
+
"stylistic_comparison",
|
494 |
+
"narrative_suggestion",
|
495 |
+
"contextual_storytelling",
|
496 |
+
"style_prompt"
|
497 |
+
],
|
498 |
+
label="Output Type",
|
499 |
+
value="descriptive",
|
500 |
+
)
|
501 |
+
|
502 |
+
gr.Markdown("""
|
503 |
+
### Output Types Explained:
|
504 |
+
- **Descriptive**: A general description of the image
|
505 |
+
- **Training Prompt**: A prompt for AI image generation
|
506 |
+
- **RNG-Tags**: Tags for categorizing the image
|
507 |
+
- **Thematic Analysis**: Exploration of themes in the image
|
508 |
+
- **Stylistic Comparison**: Compares the image to art styles
|
509 |
+
- **Narrative Suggestion**: A story idea based on the image
|
510 |
+
- **Contextual Storytelling**: A background story for the image
|
511 |
+
- **Style Prompt**: Analyzes the image in context of a specific art style
|
512 |
+
""")
|
513 |
+
|
514 |
+
caption_tone = gr.Dropdown(
|
515 |
+
choices=["formal", "informal"],
|
516 |
+
label="Tone",
|
517 |
+
value="formal",
|
518 |
+
)
|
519 |
+
|
520 |
+
gr.Markdown("Choose between a formal (professional) or informal (casual) tone for the output.")
|
521 |
+
|
522 |
+
caption_length = gr.Dropdown(
|
523 |
+
choices=["any", "very short", "short", "medium-length", "long", "very long"] +
|
524 |
+
[str(i) for i in range(20, 261, 10)],
|
525 |
+
label="Length",
|
526 |
+
value="any",
|
527 |
+
)
|
528 |
+
|
529 |
+
gr.Markdown("""
|
530 |
+
Select the desired length of the output:
|
531 |
+
- 'any': No specific length
|
532 |
+
- Descriptive options: very short to very long
|
533 |
+
- Numeric options: Specify exact word count (20 to 260 words)
|
534 |
+
""")
|
535 |
+
|
536 |
+
art_style = gr.Dropdown(
|
537 |
+
choices=ART_STYLES,
|
538 |
+
label="Art Style (for Style Prompt)",
|
539 |
+
value="Impressionism",
|
540 |
+
visible=False
|
541 |
+
)
|
542 |
+
|
543 |
+
gr.Markdown("Select an art style to analyze the image in that context. Only applicable for 'Style Prompt' output type.")
|
544 |
+
|
545 |
+
with gr.Column(scale=1):
|
546 |
+
output_caption = gr.Textbox(label="Generated Output", lines=10)
|
547 |
+
generate_button = gr.Button("Generate Caption")
|
548 |
+
|
549 |
+
gr.Markdown("""
|
550 |
+
### Additional Notes:
|
551 |
+
- The 'Tone' setting doesn't affect 'RNG-Tags' and 'Training Prompt' outputs.
|
552 |
+
- 'Art Style' is only used when 'Style Prompt' is selected as the output type.
|
553 |
+
- The AI model analyzes the image and generates text based on your selections.
|
554 |
+
""")
|
555 |
+
|
556 |
+
run_button = gr.Button("Caption")
|
557 |
|
558 |
with gr.Column():
|
559 |
output_caption = gr.Textbox(label="Caption")
|
560 |
|
561 |
+
|
562 |
caption_type.change(
|
563 |
fn=lambda x: gr.update(visible=(x == "style_prompt")),
|
564 |
inputs=[caption_type],
|
565 |
outputs=[art_style]
|
566 |
)
|
567 |
|
568 |
+
generate_button.click(
|
569 |
+
fn=stream_chat,
|
570 |
+
inputs=[input_image, caption_type, caption_tone, caption_length, art_style],
|
571 |
+
outputs=[output_caption]
|
572 |
+
)
|
573 |
+
|
574 |
run_button.click(fn=stream_chat, inputs=[input_image, caption_type, caption_tone, caption_length, art_style], outputs=[output_caption])
|
575 |
|
576 |
|