Spaces:
Runtime error
Runtime error
File size: 7,608 Bytes
7751ada 0241217 adf3a47 0241217 d80767e c3ca2bd d80767e 0241217 0c9f8df 0241217 adf3a47 7751ada 0241217 ae6e057 d80767e 0241217 dc15657 f992b4c dc15657 f992b4c 1b49495 dc15657 f992b4c 1fd86da dc15657 1fd86da 0241217 dc15657 0241217 8f3d1af 0241217 8f3d1af 0241217 929c841 adf3a47 dc15657 adf3a47 dc15657 adf3a47 dc15657 929c841 39c7251 201e3f5 c3ca2bd 39c7251 7751ada 39c7251 7751ada c3ca2bd 8c7ba46 6d91375 8c7ba46 cca85c2 7751ada 8c7ba46 7751ada 0241217 adf3a47 dc15657 adf3a47 dc15657 c081dbe dc15657 adf3a47 dc15657 c081dbe b43e284 dc15657 b43e284 adf3a47 330a2ff |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 |
import re
import sys
import pathlib
import csv
import gradio as gr
sys.path.append("CLIP_explainability/Transformer-MM-Explainability/")
import torch
import CLIP.clip as clip
import spacy
from PIL import Image, ImageFont, ImageDraw, ImageOps
from clip_grounding.utils.image import pad_to_square
from clip_grounding.datasets.png import (
overlay_relevance_map_on_image,
)
from CLIP_explainability.utils import interpret, show_img_heatmap, show_heatmap_on_text
clip.clip._MODELS = {
"ViT-B/32": "https://openaipublic.azureedge.net/clip/models/40d365715913c9da98579312b702a82c18be219cc2a73407c4526f58eba950af/ViT-B-32.pt",
"ViT-B/16": "https://openaipublic.azureedge.net/clip/models/5806e77cd80f8b59890b7e101eabd078d9fb84e6937f9e85e4ecb61988df416f/ViT-B-16.pt",
"ViT-L/14": "https://openaipublic.azureedge.net/clip/models/b8cca3fd41ae0c99ba7e8951adf17d267cdb84cd88be6f7c2e0eca1737a03836/ViT-L-14.pt",
"ViT-L/14@336px": "https://openaipublic.azureedge.net/clip/models/3035c92b350959924f9f00213499208652fc7ea050643e8b385c2dac08641f02/ViT-L-14-336px.pt",
}
def iter_file(filename):
with pathlib.Path(filename).open("r") as fh:
header = next(fh)
for line in fh:
yield line
colour_map = {
"N": "#f77189",
"CARDINAL": "#f7764a",
"DATE": "#d98a32",
"EVENT": "#bf9632",
"FAC": "#a99e31",
"GPE": "#90a531",
"LANGUAGE": "#68ad31",
"LAW": "#32b25e",
"LOC": "#34af86",
"MONEY": "#35ae9c",
"NORP": "#36acac",
"ORDINAL": "#37aabd",
"ORG": "#39a7d4",
"PERCENT": "#539ff4",
"PERSON": "#9890f4",
"PRODUCT": "#c47ef4",
"QUANTITY": "#ef5ff4",
"TIME": "#f565d0",
"WORK_OF_ART": "#f66baf",
}
device = "cuda" if torch.cuda.is_available() else "cpu"
# nlp = spacy.load("en_core_web_sm")
import en_core_web_sm
nlp = en_core_web_sm.load()
# Gradio Section:
def update_slider(model):
if model == "ViT-L/14":
return gr.update(maximum=23, value=23)
else:
return gr.update(maximum=11, value=11)
def run_demo(*args):
if len(args) == 4:
image, text, model_name, vision_layer = args
elif len(args) == 2:
image, text = args
model_name = "ViT-B/32"
vision_layer = 11
else:
raise ValueError("Unexpected number of parameters")
vision_layer = int(vision_layer)
model, preprocess = clip.load(model_name, device=device, jit=False)
orig_image = pad_to_square(image)
img = preprocess(orig_image).unsqueeze(0).to(device)
text_input = clip.tokenize([text]).to(device)
R_text, R_image = interpret(model=model, image=img, texts=text_input, device=device, start_layer=vision_layer)
image_relevance = show_img_heatmap(R_image[0], img, orig_image=orig_image, device=device)
overlapped = overlay_relevance_map_on_image(image, image_relevance)
text_scores, text_tokens_decoded = show_heatmap_on_text(text, text_input, R_text[0])
highlighted_text = []
for i, token in enumerate(text_tokens_decoded):
highlighted_text.append((str(token), float(text_scores[i])))
return overlapped, highlighted_text
# Default demo:
examples = list(csv.reader(iter_file("examples.csv")))
with gr.Blocks(title="CLIP Grounding Explainability") as iface_default:
gr.Markdown(pathlib.Path("description.md").read_text)
with gr.Row():
with gr.Column() as inputs:
orig = gr.components.Image(type='pil', label="Original Image")
description = gr.components.Textbox(label="Image description")
default_model = gr.Dropdown(label="CLIP Model", choices=['ViT-B/16', 'ViT-B/32', 'ViT-L/14'], value="ViT-B/32")
default_layer = gr.Slider(label="Vision start layer", minimum=0, maximum=11, step=1, value=11)
submit = gr.Button("Submit")
with gr.Column() as outputs:
image = gr.components.Image(type='pil', label="Output Image")
text = gr.components.HighlightedText(label="Text importance")
gr.Examples(examples=examples, inputs=[orig, description])
default_model.change(update_slider, inputs=default_model, outputs=default_layer)
submit.click(run_demo, inputs=[orig, description, default_model, default_layer], outputs=[image, text])
# NER demo:
def add_label_to_img(img, label, add_entity_label=True):
img = ImageOps.expand(img, border=45, fill=(255,255,255))
draw = ImageDraw.Draw(img)
font = ImageFont.truetype("arial.ttf", 24)
m = re.match(r".*\((\w+)\)", label)
if add_entity_label and m is not None:
cat = m.group(1)
colours = tuple(map(lambda l: int(''.join(l),16), zip(*[iter(colour_map[cat][1:])]*2)))
draw.text((5,5), label , align="center", fill=colours, font=font)
else:
draw.text((5,5), label, align="center", fill=(0, 0, 0), font=font)
return img
def NER_demo(image, text, model_name):
# As the default image, we run the default demo on the input image and text:
overlapped, highlighted_text = run_demo(image, text, model_name)
gallery_images = [add_label_to_img(overlapped, "Complete sentence", add_entity_label=False)]
labeled_text = dict(
text=text,
entities=[],
)
# Then, we run the demo for each of the noun chunks in the text:
for chunk in nlp(text).noun_chunks:
if len(chunk) == 1 and chunk[0].pos_ == "PRON":
continue
chunk_text = chunk.text
chunk_label = None
for t in chunk:
if t.ent_type_ != '':
chunk_label = t.ent_type_
break
if chunk_label is None:
chunk_label = "N"
labeled_text['entities'].append({'entity': chunk_label, 'start': chunk.start_char, 'end': chunk.end_char})
overlapped, highlighted_text = run_demo(image, chunk_text, model_name)
overlapped_labelled = add_label_to_img(overlapped, f"{chunk_text} ({chunk_label})")
gallery_images.append(overlapped_labelled)
return labeled_text, gallery_images
entity_examples = list(csv.reader(iter_file("entity_examples.csv")))
with gr.Blocks(title="Entity Grounding explainability using CLIP") as iface_NER:
gr.Markdown(pathlib.Path("entity_description.md").read_text)
with gr.Row():
with gr.Column() as inputs:
img = gr.Image(type='pil', label="Original Image")
intext = gr.components.Textbox(label="Descriptive text")
ner_model = gr.Dropdown(label="CLIP Model", choices=['ViT-B/16', 'ViT-B/32', 'ViT-L/14'], value="ViT-B/32")
ner_layer = gr.Slider(label="Vision start layer", minimum=0, maximum=11, step=1, value=11)
submit = gr.Button("Submit")
with gr.Column() as outputs:
text = gr.components.HighlightedText(show_legend=True, color_map=colour_map, label="Noun chunks")
gallery = gr.components.Gallery(type='pil', label="NER Entity explanations")
gr.Examples(examples=entity_examples, inputs=[img, text])
ner_model.change(update_slider, inputs=ner_model, outputs=ner_layer)
submit.click(run_demo, inputs=[img, intext, ner_model, ner_layer], outputs=[text, gallery])
demo_tabs = gr.TabbedInterface([iface_default, iface_NER], ["Default", "Entities"])
with demo_tabs:
gr.Markdown(pathlib.Path("footer.md").read_text)
demo_tabs.launch(show_error=True)
|