Spaces:
Runtime error
Runtime error
Martijn van Beers
commited on
Commit
•
7751ada
1
Parent(s):
330a2ff
Use noun_chunks instead of entities
Browse files
app.py
CHANGED
@@ -1,3 +1,4 @@
|
|
|
|
1 |
import sys
|
2 |
import gradio as gr
|
3 |
|
@@ -25,6 +26,28 @@ clip.clip._MODELS = {
|
|
25 |
"ViT-B/16": "https://openaipublic.azureedge.net/clip/models/5806e77cd80f8b59890b7e101eabd078d9fb84e6937f9e85e4ecb61988df416f/ViT-B-16.pt",
|
26 |
}
|
27 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
28 |
device = "cuda" if torch.cuda.is_available() else "cpu"
|
29 |
model, preprocess = clip.load("ViT-B/32", device=device, jit=False)
|
30 |
|
@@ -53,11 +76,16 @@ def run_demo(image, text):
|
|
53 |
|
54 |
|
55 |
# Default demo:
|
56 |
-
input_img = gr.inputs.Image(type='pil', label="Original Image")
|
57 |
-
input_txt = "text"
|
58 |
-
inputs = [input_img, input_txt]
|
59 |
|
60 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
61 |
|
62 |
|
63 |
description = """This demo is a copy of the demo CLIPGroundingExlainability built by Paul Hilders, Danilo de Goede and Piyush Bagad, as part of the course Interpretability and Explainability in AI (MSc AI, UvA, June 2022).
|
@@ -70,8 +98,8 @@ description = """This demo is a copy of the demo CLIPGroundingExlainability buil
|
|
70 |
of the model."""
|
71 |
|
72 |
iface = gr.Interface(fn=run_demo,
|
73 |
-
inputs=
|
74 |
-
outputs=
|
75 |
title="CLIP Grounding Explainability",
|
76 |
description=description,
|
77 |
examples=[["example_images/London.png", "London Eye"],
|
@@ -91,40 +119,58 @@ def add_label_to_img(img, label, add_entity_label=True):
|
|
91 |
img = ImageOps.expand(img, border=45, fill=(255,255,255))
|
92 |
draw = ImageDraw.Draw(img)
|
93 |
font = ImageFont.truetype("arial.ttf", 24)
|
94 |
-
|
95 |
-
|
|
|
|
|
|
|
|
|
96 |
else:
|
97 |
-
draw.text((5,5),
|
98 |
|
99 |
return img
|
100 |
|
101 |
def NER_demo(image, text):
|
102 |
-
# Apply NER to extract named entities, and run the explainability method
|
103 |
-
# for each named entity.
|
104 |
-
highlighed_entities = []
|
105 |
-
for ent in nlp(text).ents:
|
106 |
-
ent_text = ent.text
|
107 |
-
ent_label = ent.label_
|
108 |
-
highlighed_entities.append((ent_text, ent_label))
|
109 |
-
|
110 |
# As the default image, we run the default demo on the input image and text:
|
111 |
overlapped, highlighted_text = run_demo(image, text)
|
112 |
|
113 |
-
|
114 |
-
|
115 |
-
|
116 |
-
|
117 |
-
|
118 |
-
|
119 |
-
|
120 |
-
|
121 |
-
|
122 |
-
|
123 |
-
|
124 |
-
|
125 |
-
|
126 |
-
|
127 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
128 |
|
129 |
description_NER = """Automatically generated CLIP grounding explanations for
|
130 |
named entities, retrieved from the spacy NER model. <span style="color:red">Warning:</span> Note
|
@@ -136,7 +182,10 @@ iface_NER = gr.Interface(fn=NER_demo,
|
|
136 |
outputs=outputs_NER,
|
137 |
title="Named Entity Grounding explainability using CLIP",
|
138 |
description=description_NER,
|
139 |
-
examples=[
|
|
|
|
|
|
|
140 |
cache_examples=False)
|
141 |
|
142 |
demo_tabs = gr.TabbedInterface([iface, iface_NER], ["Default", "NER"])
|
|
|
1 |
+
import re
|
2 |
import sys
|
3 |
import gradio as gr
|
4 |
|
|
|
26 |
"ViT-B/16": "https://openaipublic.azureedge.net/clip/models/5806e77cd80f8b59890b7e101eabd078d9fb84e6937f9e85e4ecb61988df416f/ViT-B-16.pt",
|
27 |
}
|
28 |
|
29 |
+
colour_map = {
|
30 |
+
"N": "#f77189",
|
31 |
+
"CARDINAL": "#f7764a",
|
32 |
+
"DATE": "#d98a32",
|
33 |
+
"EVENT": "#bf9632",
|
34 |
+
"FAC": "#a99e31",
|
35 |
+
"GPE": "#90a531",
|
36 |
+
"LANGUAGE": "#68ad31",
|
37 |
+
"LAW": "#32b25e",
|
38 |
+
"LOC": "#34af86",
|
39 |
+
"MONEY": "#35ae9c",
|
40 |
+
"NORP": "#36acac",
|
41 |
+
"ORDINAL": "#37aabd",
|
42 |
+
"ORG": "#39a7d4",
|
43 |
+
"PERCENT": "#539ff4",
|
44 |
+
"PERSON": "#9890f4",
|
45 |
+
"PRODUCT": "#c47ef4",
|
46 |
+
"QUANTITY": "#ef5ff4",
|
47 |
+
"TIME": "#f565d0",
|
48 |
+
"WORK_OF_ART": "#f66baf",
|
49 |
+
}
|
50 |
+
|
51 |
device = "cuda" if torch.cuda.is_available() else "cpu"
|
52 |
model, preprocess = clip.load("ViT-B/32", device=device, jit=False)
|
53 |
|
|
|
76 |
|
77 |
|
78 |
# Default demo:
|
|
|
|
|
|
|
79 |
|
80 |
+
default_inputs = [
|
81 |
+
gr.components.Image(type='pil', label="Original Image"),
|
82 |
+
gr.components.Textbox(label="Image description"),
|
83 |
+
]
|
84 |
+
|
85 |
+
default_outputs = [
|
86 |
+
gr.components.Image(type='pil', label="Output Image"),
|
87 |
+
gr.components.HighlightedText(label="Text importance"),
|
88 |
+
]
|
89 |
|
90 |
|
91 |
description = """This demo is a copy of the demo CLIPGroundingExlainability built by Paul Hilders, Danilo de Goede and Piyush Bagad, as part of the course Interpretability and Explainability in AI (MSc AI, UvA, June 2022).
|
|
|
98 |
of the model."""
|
99 |
|
100 |
iface = gr.Interface(fn=run_demo,
|
101 |
+
inputs=default_inputs,
|
102 |
+
outputs=default_outputs,
|
103 |
title="CLIP Grounding Explainability",
|
104 |
description=description,
|
105 |
examples=[["example_images/London.png", "London Eye"],
|
|
|
119 |
img = ImageOps.expand(img, border=45, fill=(255,255,255))
|
120 |
draw = ImageDraw.Draw(img)
|
121 |
font = ImageFont.truetype("arial.ttf", 24)
|
122 |
+
m = re.match(r".*\((\w+)\)", label)
|
123 |
+
if add_entity_label and m is not None:
|
124 |
+
cat = m.group(1)
|
125 |
+
colours = tuple(map(lambda l: int(''.join(l),16), zip(*[iter(colour_map[cat][1:])]*2)))
|
126 |
+
|
127 |
+
draw.text((5,5), label , align="center", fill=colours, font=font)
|
128 |
else:
|
129 |
+
draw.text((5,5), label, align="center", fill=(0, 0, 0), font=font)
|
130 |
|
131 |
return img
|
132 |
|
133 |
def NER_demo(image, text):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
134 |
# As the default image, we run the default demo on the input image and text:
|
135 |
overlapped, highlighted_text = run_demo(image, text)
|
136 |
|
137 |
+
gallery_images = [add_label_to_img(overlapped, "Complete sentence", add_entity_label=False)]
|
138 |
+
|
139 |
+
labeled_text = dict(
|
140 |
+
text=text,
|
141 |
+
entities=[],
|
142 |
+
)
|
143 |
+
|
144 |
+
# Then, we run the demo for each of the noun chunks in the text:
|
145 |
+
for chunk in nlp(text).noun_chunks:
|
146 |
+
if len(chunk) == 1 and chunk[0].pos_ == "PRON":
|
147 |
+
continue
|
148 |
+
chunk_text = chunk.text
|
149 |
+
chunk_label = None
|
150 |
+
for t in chunk:
|
151 |
+
if t.ent_type_ != '':
|
152 |
+
chunk_label = t.ent_type_
|
153 |
+
break
|
154 |
+
if chunk_label is None:
|
155 |
+
chunk_label = "N"
|
156 |
+
|
157 |
+
labeled_text['entities'].append({'entity': chunk_label, 'start': chunk.start_char, 'end': chunk.end_char})
|
158 |
+
overlapped, highlighted_text = run_demo(image, chunk_text)
|
159 |
+
overlapped_labelled = add_label_to_img(overlapped, f"{chunk_text} ({chunk_label})")
|
160 |
+
gallery_images.append(overlapped_labelled)
|
161 |
+
|
162 |
+
return labeled_text, gallery_images
|
163 |
+
|
164 |
+
inputs_NER = [
|
165 |
+
gr.Image(type='pil', label="Original Image"),
|
166 |
+
gr.components.Textbox(label="Descriptive text"),
|
167 |
+
]
|
168 |
+
|
169 |
+
#colours = highlighter._style["color_map"]
|
170 |
+
outputs_NER = [
|
171 |
+
gr.components.HighlightedText(show_legend=True, color_map=colour_map, label="Noun chunks"),
|
172 |
+
gr.components.Gallery(type='pil', label="NER Entity explanations")
|
173 |
+
]
|
174 |
|
175 |
description_NER = """Automatically generated CLIP grounding explanations for
|
176 |
named entities, retrieved from the spacy NER model. <span style="color:red">Warning:</span> Note
|
|
|
182 |
outputs=outputs_NER,
|
183 |
title="Named Entity Grounding explainability using CLIP",
|
184 |
description=description_NER,
|
185 |
+
examples=[
|
186 |
+
["example_images/London.png", "In this image we see Big Ben and the London Eye, on both sides of the river Thames."],
|
187 |
+
["example_images/harrypotter.png", "Hermione, Harry and Ron in their school uniform"],
|
188 |
+
],
|
189 |
cache_examples=False)
|
190 |
|
191 |
demo_tabs = gr.TabbedInterface([iface, iface_NER], ["Default", "NER"])
|