Severian commited on
Commit
57b6904
·
verified ·
1 Parent(s): 9bc81e0

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +105 -59
app.py CHANGED
@@ -288,17 +288,20 @@ if tokenizer.bos_token_id is None or tokenizer.eos_token_id is None:
288
  def stream_chat(input_image: Image.Image, caption_type: str, caption_tone: str, caption_length: str | int, art_style: str) -> str:
289
  torch.cuda.empty_cache()
290
 
291
- # 'any' means no length specified
292
- length = None if caption_length == "any" else caption_length
293
-
294
- if isinstance(length, str):
295
- try:
296
- length = int(length)
297
- except ValueError:
298
- pass
 
 
 
299
 
300
  # 'rng-tags' and 'training_prompt' don't have formal/informal tones
301
- if caption_type == "rng-tags" or caption_type == "training_prompt":
302
  caption_tone = "formal"
303
 
304
  # Build prompt
@@ -465,66 +468,109 @@ with gr.Blocks(theme="Hev832/Applio", css=css) as demo:
465
  """
466
  )
467
 
468
- with gr.Tab("JoyCaption"):
469
- with gr.Row():
470
- with gr.Column():
471
- input_image = gr.Image(type="pil", label="Input Image")
472
-
473
- caption_type = gr.Dropdown(
474
- choices=[
475
- "descriptive",
476
- "training_prompt",
477
- "rng-tags",
478
- "thematic_analysis",
479
- "stylistic_comparison",
480
- "narrative_suggestion",
481
- "contextual_storytelling",
482
- "style_prompt" # Add this new option
483
- ],
484
- label="Caption Type",
485
- value="descriptive",
486
- )
487
-
488
- caption_tone = gr.Dropdown(
489
- choices=["formal", "informal"],
490
- label="Caption Tone",
491
- value="formal",
492
- )
493
-
494
- caption_length = gr.Dropdown(
495
- choices=["any", "very short", "short", "medium-length", "long", "very long"] +
496
- [str(i) for i in range(20, 261, 10)],
497
- label="Caption Length",
498
- value="any",
499
- )
500
-
501
- # Add this new dropdown for art styles
502
- art_style = gr.Dropdown(
503
- choices=ART_STYLES,
504
- label="Art Style",
505
- value="Impressionism",
506
- visible=False # Initially hidden
507
- )
508
-
509
- gr.Markdown("""
510
- **Note:**
511
- - Caption tone doesn't affect `rng-tags` and `training_prompt`.
512
- - When 'style_prompt' is selected, choose an art style to analyze the uploaded image in that context.
513
- - The art style option helps guide the caption generation by comparing the uploaded image to characteristics of the selected style.
514
- """)
515
-
516
- run_button = gr.Button("Caption")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
517
 
518
  with gr.Column():
519
  output_caption = gr.Textbox(label="Caption")
520
 
521
- # Add this JavaScript to show/hide the art style dropdown based on caption type
522
  caption_type.change(
523
  fn=lambda x: gr.update(visible=(x == "style_prompt")),
524
  inputs=[caption_type],
525
  outputs=[art_style]
526
  )
527
 
 
 
 
 
 
 
528
  run_button.click(fn=stream_chat, inputs=[input_image, caption_type, caption_tone, caption_length, art_style], outputs=[output_caption])
529
 
530
 
 
288
  def stream_chat(input_image: Image.Image, caption_type: str, caption_tone: str, caption_length: str | int, art_style: str) -> str:
289
  torch.cuda.empty_cache()
290
 
291
+ # Handle caption_length
292
+ length = None
293
+ if caption_length != "any":
294
+ if isinstance(caption_length, int):
295
+ length = caption_length
296
+ elif isinstance(caption_length, str):
297
+ try:
298
+ length = int(caption_length)
299
+ except ValueError:
300
+ # If it's not a number, treat it as a descriptive length
301
+ length = caption_length
302
 
303
  # 'rng-tags' and 'training_prompt' don't have formal/informal tones
304
+ if caption_type in ["rng-tags", "training_prompt"]:
305
  caption_tone = "formal"
306
 
307
  # Build prompt
 
468
  """
469
  )
470
 
471
+ with gr.Tab("JoyCaption"):
472
+ gr.Markdown("""
473
+ # JoyCaption: AI-Powered Image Analysis Tool
474
+
475
+ This tool helps you generate various types of text based on an uploaded image. Here's how to use it:
476
+
477
+ 1. Upload an image
478
+ 2. Choose your desired output type
479
+ 3. Adjust settings as needed
480
+ 4. Click 'Generate Caption' to get your result
481
+ """)
482
+
483
+ with gr.Row():
484
+ with gr.Column(scale=1):
485
+ input_image = gr.Image(type="pil", label="Upload Your Image")
486
+
487
+ caption_type = gr.Dropdown(
488
+ choices=[
489
+ "descriptive",
490
+ "training_prompt",
491
+ "rng-tags",
492
+ "thematic_analysis",
493
+ "stylistic_comparison",
494
+ "narrative_suggestion",
495
+ "contextual_storytelling",
496
+ "style_prompt"
497
+ ],
498
+ label="Output Type",
499
+ value="descriptive",
500
+ )
501
+
502
+ gr.Markdown("""
503
+ ### Output Types Explained:
504
+ - **Descriptive**: A general description of the image
505
+ - **Training Prompt**: A prompt for AI image generation
506
+ - **RNG-Tags**: Tags for categorizing the image
507
+ - **Thematic Analysis**: Exploration of themes in the image
508
+ - **Stylistic Comparison**: Compares the image to art styles
509
+ - **Narrative Suggestion**: A story idea based on the image
510
+ - **Contextual Storytelling**: A background story for the image
511
+ - **Style Prompt**: Analyzes the image in context of a specific art style
512
+ """)
513
+
514
+ caption_tone = gr.Dropdown(
515
+ choices=["formal", "informal"],
516
+ label="Tone",
517
+ value="formal",
518
+ )
519
+
520
+ gr.Markdown("Choose between a formal (professional) or informal (casual) tone for the output.")
521
+
522
+ caption_length = gr.Dropdown(
523
+ choices=["any", "very short", "short", "medium-length", "long", "very long"] +
524
+ [str(i) for i in range(20, 261, 10)],
525
+ label="Length",
526
+ value="any",
527
+ )
528
+
529
+ gr.Markdown("""
530
+ Select the desired length of the output:
531
+ - 'any': No specific length
532
+ - Descriptive options: very short to very long
533
+ - Numeric options: Specify exact word count (20 to 260 words)
534
+ """)
535
+
536
+ art_style = gr.Dropdown(
537
+ choices=ART_STYLES,
538
+ label="Art Style (for Style Prompt)",
539
+ value="Impressionism",
540
+ visible=False
541
+ )
542
+
543
+ gr.Markdown("Select an art style to analyze the image in that context. Only applicable for 'Style Prompt' output type.")
544
+
545
+ with gr.Column(scale=1):
546
+ output_caption = gr.Textbox(label="Generated Output", lines=10)
547
+ generate_button = gr.Button("Generate Caption")
548
+
549
+ gr.Markdown("""
550
+ ### Additional Notes:
551
+ - The 'Tone' setting doesn't affect 'RNG-Tags' and 'Training Prompt' outputs.
552
+ - 'Art Style' is only used when 'Style Prompt' is selected as the output type.
553
+ - The AI model analyzes the image and generates text based on your selections.
554
+ """)
555
+
556
+ run_button = gr.Button("Caption")
557
 
558
  with gr.Column():
559
  output_caption = gr.Textbox(label="Caption")
560
 
561
+
562
  caption_type.change(
563
  fn=lambda x: gr.update(visible=(x == "style_prompt")),
564
  inputs=[caption_type],
565
  outputs=[art_style]
566
  )
567
 
568
+ generate_button.click(
569
+ fn=stream_chat,
570
+ inputs=[input_image, caption_type, caption_tone, caption_length, art_style],
571
+ outputs=[output_caption]
572
+ )
573
+
574
  run_button.click(fn=stream_chat, inputs=[input_image, caption_type, caption_tone, caption_length, art_style], outputs=[output_caption])
575
 
576