Spaces:
Sleeping
Sleeping
Muhusystem
commited on
Commit
·
d044829
1
Parent(s):
79378f3
Add opencv-python to requirements
Browse files
app.py
CHANGED
@@ -2,12 +2,10 @@ import gradio as gr
|
|
2 |
import torch
|
3 |
from transformers import GPT2Model, ViTModel, GPT2Tokenizer, ViTImageProcessor
|
4 |
from captum.attr import IntegratedGradients
|
|
|
5 |
from PIL import Image
|
6 |
import numpy as np
|
7 |
import cv2
|
8 |
-
import matplotlib.pyplot as plt
|
9 |
-
import io
|
10 |
-
import base64
|
11 |
|
12 |
# 定义多模态模型
|
13 |
class MultiModalModel(torch.nn.Module):
|
@@ -39,57 +37,30 @@ def load_model():
|
|
39 |
model.eval()
|
40 |
return model
|
41 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
42 |
# 初始化模型和加载器
|
43 |
model = load_model()
|
44 |
tokenizer = GPT2Tokenizer.from_pretrained("gpt2")
|
45 |
tokenizer.pad_token = tokenizer.eos_token
|
46 |
feature_extractor = ViTImageProcessor.from_pretrained("google/vit-base-patch16-224-in21k")
|
47 |
|
48 |
-
#
|
49 |
-
integrated_gradients = IntegratedGradients(model)
|
50 |
-
|
51 |
-
# 转换为 PIL 图像
|
52 |
-
def convert_to_pil(image_array):
|
53 |
-
if isinstance(image_array, torch.Tensor):
|
54 |
-
image_array = image_array.numpy()
|
55 |
-
image_array = np.transpose(image_array, (1, 2, 0))
|
56 |
-
if image_array.max() <= 1.0:
|
57 |
-
image_array = (image_array * 255).astype(np.uint8)
|
58 |
-
return Image.fromarray(image_array)
|
59 |
-
|
60 |
-
# 可视化归因结果
|
61 |
-
def visualize_attributions(attributions, pixel_values):
|
62 |
-
attribution_image = attributions.squeeze().cpu().numpy()
|
63 |
-
attribution_image = (attribution_image - attribution_image.min()) / (attribution_image.max() - attribution_image.min())
|
64 |
-
attribution_image = np.uint8(255 * attribution_image)
|
65 |
-
attribution_image_pil = convert_to_pil(attribution_image)
|
66 |
-
|
67 |
-
# 转换为灰度图并进行轮廓检测
|
68 |
-
attribution_gray = cv2.cvtColor(np.array(attribution_image_pil), cv2.COLOR_RGB2GRAY)
|
69 |
-
_, binary_mask = cv2.threshold(attribution_gray, 128, 255, cv2.THRESH_BINARY)
|
70 |
-
contours, _ = cv2.findContours(binary_mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
|
71 |
-
|
72 |
-
contour_image = np.array(attribution_image_pil)
|
73 |
-
cv2.drawContours(contour_image, contours, -1, (255, 0, 0), 2)
|
74 |
-
|
75 |
-
# 创建可视化图像
|
76 |
-
fig, axes = plt.subplots(1, 2, figsize=(10, 5))
|
77 |
-
axes[0].imshow(convert_to_pil(pixel_values.squeeze(0).numpy()))
|
78 |
-
axes[0].axis('off')
|
79 |
-
axes[0].set_title("Original Image")
|
80 |
-
|
81 |
-
axes[1].imshow(contour_image)
|
82 |
-
axes[1].axis('off')
|
83 |
-
axes[1].set_title("Attribution with Contours")
|
84 |
-
|
85 |
-
buf = io.BytesIO()
|
86 |
-
plt.savefig(buf, format='png')
|
87 |
-
plt.close(fig)
|
88 |
-
buf.seek(0)
|
89 |
-
img_str = base64.b64encode(buf.getvalue()).decode('utf-8')
|
90 |
-
return f"data:image/png;base64,{img_str}"
|
91 |
-
|
92 |
-
# 推理并进行归因分析
|
93 |
def predict(image, text):
|
94 |
# 处理图像
|
95 |
image = Image.fromarray(image)
|
@@ -104,8 +75,8 @@ def predict(image, text):
|
|
104 |
padding="max_length"
|
105 |
)
|
106 |
|
107 |
-
input_ids = inputs["input_ids"]
|
108 |
-
attention_mask = inputs["attention_mask"]
|
109 |
pixel_values = image_features["pixel_values"]
|
110 |
|
111 |
# 推理
|
@@ -114,18 +85,29 @@ def predict(image, text):
|
|
114 |
prediction = torch.argmax(logits, dim=1).item()
|
115 |
label = "yes" if prediction == 1 else "no"
|
116 |
|
117 |
-
#
|
118 |
attributions, _ = integrated_gradients.attribute(
|
119 |
inputs=pixel_values,
|
120 |
target=prediction,
|
121 |
additional_forward_args=(input_ids, attention_mask),
|
122 |
n_steps=1,
|
123 |
-
return_convergence_delta=
|
124 |
)
|
125 |
|
126 |
-
|
127 |
-
|
128 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
129 |
|
130 |
# 创建 Gradio 界面
|
131 |
iface = gr.Interface(
|
|
|
2 |
import torch
|
3 |
from transformers import GPT2Model, ViTModel, GPT2Tokenizer, ViTImageProcessor
|
4 |
from captum.attr import IntegratedGradients
|
5 |
+
import matplotlib.pyplot as plt
|
6 |
from PIL import Image
|
7 |
import numpy as np
|
8 |
import cv2
|
|
|
|
|
|
|
9 |
|
10 |
# 定义多模态模型
|
11 |
class MultiModalModel(torch.nn.Module):
|
|
|
37 |
model.eval()
|
38 |
return model
|
39 |
|
40 |
+
# 转换张量为 PIL 图像
|
41 |
+
def convert_tensor_to_pil(tensor_image):
|
42 |
+
if isinstance(tensor_image, torch.Tensor):
|
43 |
+
tensor_image = tensor_image.numpy()
|
44 |
+
image_np = np.transpose(tensor_image, (1, 2, 0))
|
45 |
+
if image_np.max() <= 1.0:
|
46 |
+
image_np = (image_np * 255).astype(np.uint8)
|
47 |
+
return Image.fromarray(image_np)
|
48 |
+
|
49 |
+
# 自定义前向函数用于集成梯度
|
50 |
+
def custom_forward(pixel_values, input_ids, attention_mask):
|
51 |
+
logits = model(input_ids=input_ids, attention_mask=attention_mask, pixel_values=pixel_values)
|
52 |
+
return logits
|
53 |
+
|
54 |
+
# 初始化集成梯度
|
55 |
+
integrated_gradients = IntegratedGradients(custom_forward)
|
56 |
+
|
57 |
# 初始化模型和加载器
|
58 |
model = load_model()
|
59 |
tokenizer = GPT2Tokenizer.from_pretrained("gpt2")
|
60 |
tokenizer.pad_token = tokenizer.eos_token
|
61 |
feature_extractor = ViTImageProcessor.from_pretrained("google/vit-base-patch16-224-in21k")
|
62 |
|
63 |
+
# 定义推理和归因分析函数
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
64 |
def predict(image, text):
|
65 |
# 处理图像
|
66 |
image = Image.fromarray(image)
|
|
|
75 |
padding="max_length"
|
76 |
)
|
77 |
|
78 |
+
input_ids = inputs["input_ids"].long()
|
79 |
+
attention_mask = inputs["attention_mask"].long()
|
80 |
pixel_values = image_features["pixel_values"]
|
81 |
|
82 |
# 推理
|
|
|
85 |
prediction = torch.argmax(logits, dim=1).item()
|
86 |
label = "yes" if prediction == 1 else "no"
|
87 |
|
88 |
+
# 归因分析
|
89 |
attributions, _ = integrated_gradients.attribute(
|
90 |
inputs=pixel_values,
|
91 |
target=prediction,
|
92 |
additional_forward_args=(input_ids, attention_mask),
|
93 |
n_steps=1,
|
94 |
+
return_convergence_delta=True
|
95 |
)
|
96 |
|
97 |
+
# 可视化归因结果
|
98 |
+
attribution_image = attributions.squeeze().cpu().numpy()
|
99 |
+
attribution_image = (attribution_image - attribution_image.min()) / (attribution_image.max() - attribution_image.min())
|
100 |
+
attribution_image = np.uint8(255 * attribution_image)
|
101 |
+
attribution_image_real = convert_tensor_to_pil(attribution_image)
|
102 |
+
|
103 |
+
# 轮廓检测
|
104 |
+
attribution_gray = cv2.cvtColor(np.array(attribution_image_real), cv2.COLOR_RGB2GRAY)
|
105 |
+
_, binary_mask = cv2.threshold(attribution_gray, 128, 255, cv2.THRESH_BINARY)
|
106 |
+
contours, _ = cv2.findContours(binary_mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
|
107 |
+
contour_image = np.array(attribution_image_real)
|
108 |
+
cv2.drawContours(contour_image, contours, -1, (255, 0, 0), 2)
|
109 |
+
|
110 |
+
return label, Image.fromarray(contour_image)
|
111 |
|
112 |
# 创建 Gradio 界面
|
113 |
iface = gr.Interface(
|