Spaces:
Running
Running
Move alpha definition in slider
Browse files
app.py
CHANGED
@@ -32,8 +32,7 @@ unconstrained semantic space by multimodal data from large vision-language datab
|
|
32 |
retrieve the semantically most similar captions from a database, from which we extract a set of
|
33 |
candidate categories by applying text parsing and filtering techniques. We further score the
|
34 |
candidates using the multimodal aligned representation of the large pre-trained VLM, *i.e.* CLIP,
|
35 |
-
to obtain the best-matching category
|
36 |
-
between the visual and textual similarity.
|
37 |
"""
|
38 |
PAPER_URL = "https://arxiv.org/abs/2306.00917"
|
39 |
|
@@ -67,7 +66,13 @@ demo = gr.Interface(
|
|
67 |
fn=vic,
|
68 |
inputs=[
|
69 |
gr.Image(type="filepath", label="input"),
|
70 |
-
gr.Slider(
|
|
|
|
|
|
|
|
|
|
|
|
|
71 |
],
|
72 |
outputs=[gr.Label(num_top_classes=5, label="output")],
|
73 |
title=PAPER_TITLE,
|
|
|
32 |
retrieve the semantically most similar captions from a database, from which we extract a set of
|
33 |
candidate categories by applying text parsing and filtering techniques. We further score the
|
34 |
candidates using the multimodal aligned representation of the large pre-trained VLM, *i.e.* CLIP,
|
35 |
+
to obtain the best-matching category.
|
|
|
36 |
"""
|
37 |
PAPER_URL = "https://arxiv.org/abs/2306.00917"
|
38 |
|
|
|
66 |
fn=vic,
|
67 |
inputs=[
|
68 |
gr.Image(type="filepath", label="input"),
|
69 |
+
gr.Slider(
|
70 |
+
0.0,
|
71 |
+
1.0,
|
72 |
+
value=0.5,
|
73 |
+
label="alpha",
|
74 |
+
info="trade-off between the text (left) and image (right) modality",
|
75 |
+
),
|
76 |
],
|
77 |
outputs=[gr.Label(num_top_classes=5, label="output")],
|
78 |
title=PAPER_TITLE,
|