Update app.py
Browse files
app.py
CHANGED
@@ -14,7 +14,6 @@ import torchvision.transforms.functional as TVF
|
|
14 |
CLIP_PATH = "google/siglip-so400m-patch14-384"
|
15 |
MODEL_PATH = "meta-llama/Meta-Llama-3.1-8B"
|
16 |
CHECKPOINT_PATH = Path("9em124t2-499968")
|
17 |
-
TITLE = "<h1><center>JoyCaption Alpha One (2024-09-20a)</center></h1>"
|
18 |
CAPTION_TYPE_MAP = {
|
19 |
("descriptive", "formal", False, False): ["Write a descriptive caption for this image in a formal tone."],
|
20 |
("descriptive", "formal", False, True): ["Write a descriptive caption for this image in a formal tone within {word_count} words."],
|
@@ -217,41 +216,79 @@ def stream_chat(input_image: Image.Image, caption_type: str, caption_tone: str,
|
|
217 |
|
218 |
return caption.strip()
|
219 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
220 |
|
221 |
-
with gr.Blocks() as demo:
|
222 |
-
|
223 |
-
|
224 |
-
|
225 |
-
|
226 |
-
input_image = gr.Image(type="pil", label="Input Image")
|
227 |
|
228 |
-
|
229 |
-
choices=["descriptive", "training_prompt", "rng-tags"],
|
230 |
-
label="Caption Type",
|
231 |
-
value="descriptive",
|
232 |
-
)
|
233 |
|
234 |
-
|
235 |
-
choices=["formal", "informal"],
|
236 |
-
label="Caption Tone",
|
237 |
-
value="formal",
|
238 |
-
)
|
239 |
|
240 |
-
|
241 |
-
|
242 |
-
[str(i) for i in range(20, 261, 10)],
|
243 |
-
label="Caption Length",
|
244 |
-
value="any",
|
245 |
-
)
|
246 |
|
247 |
-
|
|
|
|
|
|
|
|
|
248 |
|
249 |
-
|
|
|
|
|
250 |
|
251 |
-
|
252 |
-
|
253 |
-
|
254 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
255 |
|
256 |
|
257 |
if __name__ == "__main__":
|
|
|
14 |
CLIP_PATH = "google/siglip-so400m-patch14-384"
|
15 |
MODEL_PATH = "meta-llama/Meta-Llama-3.1-8B"
|
16 |
CHECKPOINT_PATH = Path("9em124t2-499968")
|
|
|
17 |
CAPTION_TYPE_MAP = {
|
18 |
("descriptive", "formal", False, False): ["Write a descriptive caption for this image in a formal tone."],
|
19 |
("descriptive", "formal", False, True): ["Write a descriptive caption for this image in a formal tone within {word_count} words."],
|
|
|
216 |
|
217 |
return caption.strip()
|
218 |
|
219 |
+
css = """
|
220 |
+
h1, h2, h3, h4, h5, h6, p, li, ul, ol, a, .centered-image {
|
221 |
+
text-align: center;
|
222 |
+
display: block;
|
223 |
+
margin-left: auto;
|
224 |
+
margin-right: auto;
|
225 |
+
}
|
226 |
+
ul, ol {
|
227 |
+
margin-left: auto;
|
228 |
+
margin-right: auto;
|
229 |
+
display: table;
|
230 |
+
}
|
231 |
+
.centered-image {
|
232 |
+
max-width: 100%;
|
233 |
+
height: auto;
|
234 |
+
}
|
235 |
+
"""
|
236 |
|
237 |
+
with gr.Blocks(theme="Hev832/Applio", css=css) as demo:
|
238 |
+
with gr.Tab("Welcome"):
|
239 |
+
gr.Markdown(
|
240 |
+
"""
|
241 |
+
<img src="https://path-to-yamamoto-logo.png" alt="Yamamoto Logo" class="centered-image">
|
|
|
242 |
|
243 |
+
# 🎨 Yamamoto JoyCaption: AI-Powered Art Inspiration
|
|
|
|
|
|
|
|
|
244 |
|
245 |
+
## Accelerate Your Creative Workflow with Intelligent Image Analysis
|
|
|
|
|
|
|
|
|
246 |
|
247 |
+
This innovative tool empowers Yamamoto's artists to quickly generate descriptive captions,<br>
|
248 |
+
training prompts, and tags from existing artwork, fueling the creative process for GenAI models.
|
|
|
|
|
|
|
|
|
249 |
|
250 |
+
## 🚀 How It Works:
|
251 |
+
1. **Upload Your Inspiration**: Drop in an image (e.g., a charcoal horse picture) that embodies your desired style.
|
252 |
+
2. **Choose Your Output**: Select from descriptive captions, training prompts, or tags.
|
253 |
+
3. **Customize the Results**: Adjust tone, length, and other parameters to fine-tune the output.
|
254 |
+
4. **Generate and Iterate**: Click 'Caption' to analyze your image and use the results to inspire new creations.
|
255 |
|
256 |
+
<h6><center>JoyCaption Alpha One</center></h6>
|
257 |
+
"""
|
258 |
+
)
|
259 |
|
260 |
+
with gr.Tab("JoyCaption"):
|
261 |
+
with gr.Row():
|
262 |
+
with gr.Column():
|
263 |
+
input_image = gr.Image(type="pil", label="Input Image")
|
264 |
+
|
265 |
+
caption_type = gr.Dropdown(
|
266 |
+
choices=["descriptive", "training_prompt", "rng-tags"],
|
267 |
+
label="Caption Type",
|
268 |
+
value="descriptive",
|
269 |
+
)
|
270 |
+
|
271 |
+
caption_tone = gr.Dropdown(
|
272 |
+
choices=["formal", "informal"],
|
273 |
+
label="Caption Tone",
|
274 |
+
value="formal",
|
275 |
+
)
|
276 |
+
|
277 |
+
caption_length = gr.Dropdown(
|
278 |
+
choices=["any", "very short", "short", "medium-length", "long", "very long"] +
|
279 |
+
[str(i) for i in range(20, 261, 10)],
|
280 |
+
label="Caption Length",
|
281 |
+
value="any",
|
282 |
+
)
|
283 |
+
|
284 |
+
gr.Markdown("**Note:** Caption tone doesn't affect `rng-tags` and `training_prompt`.")
|
285 |
+
|
286 |
+
run_button = gr.Button("Caption")
|
287 |
+
|
288 |
+
with gr.Column():
|
289 |
+
output_caption = gr.Textbox(label="Caption")
|
290 |
+
|
291 |
+
run_button.click(fn=stream_chat, inputs=[input_image, caption_type, caption_tone, caption_length], outputs=[output_caption])
|
292 |
|
293 |
|
294 |
if __name__ == "__main__":
|