Spaces:
Runtime error
Runtime error
Martijn van Beers
commited on
Commit
·
7751ada
1
Parent(s):
330a2ff
Use noun_chunks instead of entities
Browse files
app.py
CHANGED
|
@@ -1,3 +1,4 @@
|
|
|
|
|
| 1 |
import sys
|
| 2 |
import gradio as gr
|
| 3 |
|
|
@@ -25,6 +26,28 @@ clip.clip._MODELS = {
|
|
| 25 |
"ViT-B/16": "https://openaipublic.azureedge.net/clip/models/5806e77cd80f8b59890b7e101eabd078d9fb84e6937f9e85e4ecb61988df416f/ViT-B-16.pt",
|
| 26 |
}
|
| 27 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 28 |
device = "cuda" if torch.cuda.is_available() else "cpu"
|
| 29 |
model, preprocess = clip.load("ViT-B/32", device=device, jit=False)
|
| 30 |
|
|
@@ -53,11 +76,16 @@ def run_demo(image, text):
|
|
| 53 |
|
| 54 |
|
| 55 |
# Default demo:
|
| 56 |
-
input_img = gr.inputs.Image(type='pil', label="Original Image")
|
| 57 |
-
input_txt = "text"
|
| 58 |
-
inputs = [input_img, input_txt]
|
| 59 |
|
| 60 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 61 |
|
| 62 |
|
| 63 |
description = """This demo is a copy of the demo CLIPGroundingExlainability built by Paul Hilders, Danilo de Goede and Piyush Bagad, as part of the course Interpretability and Explainability in AI (MSc AI, UvA, June 2022).
|
|
@@ -70,8 +98,8 @@ description = """This demo is a copy of the demo CLIPGroundingExlainability buil
|
|
| 70 |
of the model."""
|
| 71 |
|
| 72 |
iface = gr.Interface(fn=run_demo,
|
| 73 |
-
inputs=
|
| 74 |
-
outputs=
|
| 75 |
title="CLIP Grounding Explainability",
|
| 76 |
description=description,
|
| 77 |
examples=[["example_images/London.png", "London Eye"],
|
|
@@ -91,40 +119,58 @@ def add_label_to_img(img, label, add_entity_label=True):
|
|
| 91 |
img = ImageOps.expand(img, border=45, fill=(255,255,255))
|
| 92 |
draw = ImageDraw.Draw(img)
|
| 93 |
font = ImageFont.truetype("arial.ttf", 24)
|
| 94 |
-
|
| 95 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 96 |
else:
|
| 97 |
-
draw.text((5,5),
|
| 98 |
|
| 99 |
return img
|
| 100 |
|
| 101 |
def NER_demo(image, text):
|
| 102 |
-
# Apply NER to extract named entities, and run the explainability method
|
| 103 |
-
# for each named entity.
|
| 104 |
-
highlighed_entities = []
|
| 105 |
-
for ent in nlp(text).ents:
|
| 106 |
-
ent_text = ent.text
|
| 107 |
-
ent_label = ent.label_
|
| 108 |
-
highlighed_entities.append((ent_text, ent_label))
|
| 109 |
-
|
| 110 |
# As the default image, we run the default demo on the input image and text:
|
| 111 |
overlapped, highlighted_text = run_demo(image, text)
|
| 112 |
|
| 113 |
-
|
| 114 |
-
|
| 115 |
-
|
| 116 |
-
|
| 117 |
-
|
| 118 |
-
|
| 119 |
-
|
| 120 |
-
|
| 121 |
-
|
| 122 |
-
|
| 123 |
-
|
| 124 |
-
|
| 125 |
-
|
| 126 |
-
|
| 127 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 128 |
|
| 129 |
description_NER = """Automatically generated CLIP grounding explanations for
|
| 130 |
named entities, retrieved from the spacy NER model. <span style="color:red">Warning:</span> Note
|
|
@@ -136,7 +182,10 @@ iface_NER = gr.Interface(fn=NER_demo,
|
|
| 136 |
outputs=outputs_NER,
|
| 137 |
title="Named Entity Grounding explainability using CLIP",
|
| 138 |
description=description_NER,
|
| 139 |
-
examples=[
|
|
|
|
|
|
|
|
|
|
| 140 |
cache_examples=False)
|
| 141 |
|
| 142 |
demo_tabs = gr.TabbedInterface([iface, iface_NER], ["Default", "NER"])
|
|
|
|
| 1 |
+
import re
|
| 2 |
import sys
|
| 3 |
import gradio as gr
|
| 4 |
|
|
|
|
| 26 |
"ViT-B/16": "https://openaipublic.azureedge.net/clip/models/5806e77cd80f8b59890b7e101eabd078d9fb84e6937f9e85e4ecb61988df416f/ViT-B-16.pt",
|
| 27 |
}
|
| 28 |
|
| 29 |
+
colour_map = {
|
| 30 |
+
"N": "#f77189",
|
| 31 |
+
"CARDINAL": "#f7764a",
|
| 32 |
+
"DATE": "#d98a32",
|
| 33 |
+
"EVENT": "#bf9632",
|
| 34 |
+
"FAC": "#a99e31",
|
| 35 |
+
"GPE": "#90a531",
|
| 36 |
+
"LANGUAGE": "#68ad31",
|
| 37 |
+
"LAW": "#32b25e",
|
| 38 |
+
"LOC": "#34af86",
|
| 39 |
+
"MONEY": "#35ae9c",
|
| 40 |
+
"NORP": "#36acac",
|
| 41 |
+
"ORDINAL": "#37aabd",
|
| 42 |
+
"ORG": "#39a7d4",
|
| 43 |
+
"PERCENT": "#539ff4",
|
| 44 |
+
"PERSON": "#9890f4",
|
| 45 |
+
"PRODUCT": "#c47ef4",
|
| 46 |
+
"QUANTITY": "#ef5ff4",
|
| 47 |
+
"TIME": "#f565d0",
|
| 48 |
+
"WORK_OF_ART": "#f66baf",
|
| 49 |
+
}
|
| 50 |
+
|
| 51 |
device = "cuda" if torch.cuda.is_available() else "cpu"
|
| 52 |
model, preprocess = clip.load("ViT-B/32", device=device, jit=False)
|
| 53 |
|
|
|
|
| 76 |
|
| 77 |
|
| 78 |
# Default demo:
|
|
|
|
|
|
|
|
|
|
| 79 |
|
| 80 |
+
default_inputs = [
|
| 81 |
+
gr.components.Image(type='pil', label="Original Image"),
|
| 82 |
+
gr.components.Textbox(label="Image description"),
|
| 83 |
+
]
|
| 84 |
+
|
| 85 |
+
default_outputs = [
|
| 86 |
+
gr.components.Image(type='pil', label="Output Image"),
|
| 87 |
+
gr.components.HighlightedText(label="Text importance"),
|
| 88 |
+
]
|
| 89 |
|
| 90 |
|
| 91 |
description = """This demo is a copy of the demo CLIPGroundingExlainability built by Paul Hilders, Danilo de Goede and Piyush Bagad, as part of the course Interpretability and Explainability in AI (MSc AI, UvA, June 2022).
|
|
|
|
| 98 |
of the model."""
|
| 99 |
|
| 100 |
iface = gr.Interface(fn=run_demo,
|
| 101 |
+
inputs=default_inputs,
|
| 102 |
+
outputs=default_outputs,
|
| 103 |
title="CLIP Grounding Explainability",
|
| 104 |
description=description,
|
| 105 |
examples=[["example_images/London.png", "London Eye"],
|
|
|
|
| 119 |
img = ImageOps.expand(img, border=45, fill=(255,255,255))
|
| 120 |
draw = ImageDraw.Draw(img)
|
| 121 |
font = ImageFont.truetype("arial.ttf", 24)
|
| 122 |
+
m = re.match(r".*\((\w+)\)", label)
|
| 123 |
+
if add_entity_label and m is not None:
|
| 124 |
+
cat = m.group(1)
|
| 125 |
+
colours = tuple(map(lambda l: int(''.join(l),16), zip(*[iter(colour_map[cat][1:])]*2)))
|
| 126 |
+
|
| 127 |
+
draw.text((5,5), label , align="center", fill=colours, font=font)
|
| 128 |
else:
|
| 129 |
+
draw.text((5,5), label, align="center", fill=(0, 0, 0), font=font)
|
| 130 |
|
| 131 |
return img
|
| 132 |
|
| 133 |
def NER_demo(image, text):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 134 |
# As the default image, we run the default demo on the input image and text:
|
| 135 |
overlapped, highlighted_text = run_demo(image, text)
|
| 136 |
|
| 137 |
+
gallery_images = [add_label_to_img(overlapped, "Complete sentence", add_entity_label=False)]
|
| 138 |
+
|
| 139 |
+
labeled_text = dict(
|
| 140 |
+
text=text,
|
| 141 |
+
entities=[],
|
| 142 |
+
)
|
| 143 |
+
|
| 144 |
+
# Then, we run the demo for each of the noun chunks in the text:
|
| 145 |
+
for chunk in nlp(text).noun_chunks:
|
| 146 |
+
if len(chunk) == 1 and chunk[0].pos_ == "PRON":
|
| 147 |
+
continue
|
| 148 |
+
chunk_text = chunk.text
|
| 149 |
+
chunk_label = None
|
| 150 |
+
for t in chunk:
|
| 151 |
+
if t.ent_type_ != '':
|
| 152 |
+
chunk_label = t.ent_type_
|
| 153 |
+
break
|
| 154 |
+
if chunk_label is None:
|
| 155 |
+
chunk_label = "N"
|
| 156 |
+
|
| 157 |
+
labeled_text['entities'].append({'entity': chunk_label, 'start': chunk.start_char, 'end': chunk.end_char})
|
| 158 |
+
overlapped, highlighted_text = run_demo(image, chunk_text)
|
| 159 |
+
overlapped_labelled = add_label_to_img(overlapped, f"{chunk_text} ({chunk_label})")
|
| 160 |
+
gallery_images.append(overlapped_labelled)
|
| 161 |
+
|
| 162 |
+
return labeled_text, gallery_images
|
| 163 |
+
|
| 164 |
+
inputs_NER = [
|
| 165 |
+
gr.Image(type='pil', label="Original Image"),
|
| 166 |
+
gr.components.Textbox(label="Descriptive text"),
|
| 167 |
+
]
|
| 168 |
+
|
| 169 |
+
#colours = highlighter._style["color_map"]
|
| 170 |
+
outputs_NER = [
|
| 171 |
+
gr.components.HighlightedText(show_legend=True, color_map=colour_map, label="Noun chunks"),
|
| 172 |
+
gr.components.Gallery(type='pil', label="NER Entity explanations")
|
| 173 |
+
]
|
| 174 |
|
| 175 |
description_NER = """Automatically generated CLIP grounding explanations for
|
| 176 |
named entities, retrieved from the spacy NER model. <span style="color:red">Warning:</span> Note
|
|
|
|
| 182 |
outputs=outputs_NER,
|
| 183 |
title="Named Entity Grounding explainability using CLIP",
|
| 184 |
description=description_NER,
|
| 185 |
+
examples=[
|
| 186 |
+
["example_images/London.png", "In this image we see Big Ben and the London Eye, on both sides of the river Thames."],
|
| 187 |
+
["example_images/harrypotter.png", "Hermione, Harry and Ron in their school uniform"],
|
| 188 |
+
],
|
| 189 |
cache_examples=False)
|
| 190 |
|
| 191 |
demo_tabs = gr.TabbedInterface([iface, iface_NER], ["Default", "NER"])
|