Spaces:
Runtime error
Runtime error
paul hilders
commited on
Commit
·
66dfac7
1
Parent(s):
c81ac67
Change descriptions and titles
Browse files
app.py
CHANGED
@@ -60,11 +60,18 @@ inputs = [input_img, input_txt]
|
|
60 |
outputs = [gr.inputs.Image(type='pil', label="Output Image"), "highlight"]
|
61 |
|
62 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
63 |
iface = gr.Interface(fn=run_demo,
|
64 |
inputs=inputs,
|
65 |
outputs=outputs,
|
66 |
title="CLIP Grounding Explainability",
|
67 |
-
description=
|
68 |
examples=[["example_images/London.png", "London Eye"],
|
69 |
["example_images/London.png", "Big Ben"],
|
70 |
["example_images/harrypotter.png", "Harry"],
|
@@ -121,6 +128,7 @@ outputs_NER = ["highlight", gr.Gallery(type='pil', label="NER Entity explanation
|
|
121 |
iface_NER = gr.Interface(fn=NER_demo,
|
122 |
inputs=inputs_NER,
|
123 |
outputs=outputs_NER,
|
|
|
124 |
examples=[["example_images/London.png", "In this image we see Big Ben and the London Eye, on both sides of the river Thames."]],
|
125 |
cache_examples=False)
|
126 |
|
|
|
60 |
outputs = [gr.inputs.Image(type='pil', label="Output Image"), "highlight"]
|
61 |
|
62 |
|
63 |
+
description = """A demonstration based on the Generic Attention-model Explainability method for Interpreting Bi-Modal
|
64 |
+
Transformers by Chefer et al. (2021): https://github.com/hila-chefer/Transformer-MM-Explainability. \n \n
|
65 |
+
This demo shows attributions scores on both the image and the text input when presented CLIP with a
|
66 |
+
<text,image> pair. Attributions are computed as Gradient-weighted Attention Rollout (Chefer et al.,
|
67 |
+
2021), and can be thought of as an estimate of the effective attention CLIP pays to its input when
|
68 |
+
computing a multimodal representation"""
|
69 |
+
|
70 |
iface = gr.Interface(fn=run_demo,
|
71 |
inputs=inputs,
|
72 |
outputs=outputs,
|
73 |
title="CLIP Grounding Explainability",
|
74 |
+
description=description,
|
75 |
examples=[["example_images/London.png", "London Eye"],
|
76 |
["example_images/London.png", "Big Ben"],
|
77 |
["example_images/harrypotter.png", "Harry"],
|
|
|
128 |
iface_NER = gr.Interface(fn=NER_demo,
|
129 |
inputs=inputs_NER,
|
130 |
outputs=outputs_NER,
|
131 |
+
title="Named Entity Grounding explainability using CLIP",
|
132 |
examples=[["example_images/London.png", "In this image we see Big Ben and the London Eye, on both sides of the river Thames."]],
|
133 |
cache_examples=False)
|
134 |
|