Spaces:
Running
Running
Update main.py
Browse files
main.py
CHANGED
@@ -43,6 +43,7 @@ def generate_caption(processor, model, image, tokenizer=None):
|
|
43 |
|
44 |
return generated_caption
|
45 |
|
|
|
46 |
def generate_captions(image):
|
47 |
caption_git_base = generate_caption(git_processor_base, git_model_base, image)
|
48 |
|
@@ -56,56 +57,21 @@ def generate_captions(image):
|
|
56 |
|
57 |
return caption_git_base, caption_git_large, caption_blip_base, caption_blip_large, caption_vitgpt
|
58 |
|
59 |
-
|
60 |
-
|
|
|
61 |
|
62 |
title = "Interactive demo: comparing image captioning models"
|
63 |
description = "Gradio Demo to compare GIT, BLIP and ViT+GPT2, 3 state-of-the-art vision+language models. To use it, simply upload your image and click 'submit', or click one of the examples to load them. Read more at the links below."
|
64 |
article = "<p style='text-align: center'><a href='https://huggingface.co/docs/transformers/main/model_doc/blip' target='_blank'>BLIP docs</a> | <a href='https://huggingface.co/docs/transformers/main/model_doc/git' target='_blank'>GIT docs</a></p>"
|
65 |
|
66 |
-
|
67 |
-
body {
|
68 |
-
background-color: #f2f2f2;
|
69 |
-
font-family: Arial, sans-serif;
|
70 |
-
}
|
71 |
-
|
72 |
-
.title {
|
73 |
-
color: #333333;
|
74 |
-
font-size: 24px;
|
75 |
-
font-weight: bold;
|
76 |
-
margin-bottom: 20px;
|
77 |
-
}
|
78 |
-
|
79 |
-
.description {
|
80 |
-
color: #666666;
|
81 |
-
font-size: 16px;
|
82 |
-
margin-bottom: 20px;
|
83 |
-
}
|
84 |
-
|
85 |
-
.article {
|
86 |
-
color: #666666;
|
87 |
-
font-size: 14px;
|
88 |
-
margin-bottom: 20px;
|
89 |
-
text-align: center;
|
90 |
-
}
|
91 |
-
|
92 |
-
.input {
|
93 |
-
margin-bottom: 20px;
|
94 |
-
}
|
95 |
-
|
96 |
-
.output {
|
97 |
-
margin-bottom: 20px;
|
98 |
-
}
|
99 |
-
"""
|
100 |
-
|
101 |
-
iface = gr.Interface(fn=generate_captions,
|
102 |
inputs=gr.inputs.Image(type="pil"),
|
103 |
outputs=outputs,
|
104 |
-
examples=examples,
|
105 |
title=title,
|
106 |
description=description,
|
107 |
-
article=article,
|
108 |
-
css=css,
|
109 |
enable_queue=True)
|
110 |
iface.launch(server_name="0.0.0.0", server_port=7860)
|
111 |
|
|
|
43 |
|
44 |
return generated_caption
|
45 |
|
46 |
+
|
47 |
def generate_captions(image):
|
48 |
caption_git_base = generate_caption(git_processor_base, git_model_base, image)
|
49 |
|
|
|
57 |
|
58 |
return caption_git_base, caption_git_large, caption_blip_base, caption_blip_large, caption_vitgpt
|
59 |
|
60 |
+
|
61 |
+
#examples = [["cats.jpg"], ["stop_sign.png"], ["astronaut.jpg"]]
|
62 |
+
outputs = [gr.outputs.Textbox(label="Caption generated by GIT-base"), gr.outputs.Textbox(label="Caption generated by GIT-large"), gr.outputs.Textbox(label="Caption generated by BLIP-base"), gr.outputs.Textbox(label="Caption generated by BLIP-large"), gr.outputs.Textbox(label="Caption generated by ViT+GPT-2")]
|
63 |
|
64 |
title = "Interactive demo: comparing image captioning models"
|
65 |
description = "Gradio Demo to compare GIT, BLIP and ViT+GPT2, 3 state-of-the-art vision+language models. To use it, simply upload your image and click 'submit', or click one of the examples to load them. Read more at the links below."
|
66 |
article = "<p style='text-align: center'><a href='https://huggingface.co/docs/transformers/main/model_doc/blip' target='_blank'>BLIP docs</a> | <a href='https://huggingface.co/docs/transformers/main/model_doc/git' target='_blank'>GIT docs</a></p>"
|
67 |
|
68 |
+
iface = gr.Interface(fn=generate_captions,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
69 |
inputs=gr.inputs.Image(type="pil"),
|
70 |
outputs=outputs,
|
71 |
+
examples=examples,
|
72 |
title=title,
|
73 |
description=description,
|
74 |
+
article=article,
|
|
|
75 |
enable_queue=True)
|
76 |
iface.launch(server_name="0.0.0.0", server_port=7860)
|
77 |
|