Muhusystem commited on
Commit
d044829
·
1 Parent(s): 79378f3

Add opencv-python to requirements

Browse files
Files changed (1) hide show
  1. app.py +37 -55
app.py CHANGED
@@ -2,12 +2,10 @@ import gradio as gr
2
  import torch
3
  from transformers import GPT2Model, ViTModel, GPT2Tokenizer, ViTImageProcessor
4
  from captum.attr import IntegratedGradients
 
5
  from PIL import Image
6
  import numpy as np
7
  import cv2
8
- import matplotlib.pyplot as plt
9
- import io
10
- import base64
11
 
12
  # 定义多模态模型
13
  class MultiModalModel(torch.nn.Module):
@@ -39,57 +37,30 @@ def load_model():
39
  model.eval()
40
  return model
41
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
42
  # 初始化模型和加载器
43
  model = load_model()
44
  tokenizer = GPT2Tokenizer.from_pretrained("gpt2")
45
  tokenizer.pad_token = tokenizer.eos_token
46
  feature_extractor = ViTImageProcessor.from_pretrained("google/vit-base-patch16-224-in21k")
47
 
48
- # 定义集成梯度解释性分析
49
- integrated_gradients = IntegratedGradients(model)
50
-
51
- # 转换为 PIL 图像
52
- def convert_to_pil(image_array):
53
- if isinstance(image_array, torch.Tensor):
54
- image_array = image_array.numpy()
55
- image_array = np.transpose(image_array, (1, 2, 0))
56
- if image_array.max() <= 1.0:
57
- image_array = (image_array * 255).astype(np.uint8)
58
- return Image.fromarray(image_array)
59
-
60
- # 可视化归因结果
61
- def visualize_attributions(attributions, pixel_values):
62
- attribution_image = attributions.squeeze().cpu().numpy()
63
- attribution_image = (attribution_image - attribution_image.min()) / (attribution_image.max() - attribution_image.min())
64
- attribution_image = np.uint8(255 * attribution_image)
65
- attribution_image_pil = convert_to_pil(attribution_image)
66
-
67
- # 转换为灰度图并进行轮廓检测
68
- attribution_gray = cv2.cvtColor(np.array(attribution_image_pil), cv2.COLOR_RGB2GRAY)
69
- _, binary_mask = cv2.threshold(attribution_gray, 128, 255, cv2.THRESH_BINARY)
70
- contours, _ = cv2.findContours(binary_mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
71
-
72
- contour_image = np.array(attribution_image_pil)
73
- cv2.drawContours(contour_image, contours, -1, (255, 0, 0), 2)
74
-
75
- # 创建可视化图像
76
- fig, axes = plt.subplots(1, 2, figsize=(10, 5))
77
- axes[0].imshow(convert_to_pil(pixel_values.squeeze(0).numpy()))
78
- axes[0].axis('off')
79
- axes[0].set_title("Original Image")
80
-
81
- axes[1].imshow(contour_image)
82
- axes[1].axis('off')
83
- axes[1].set_title("Attribution with Contours")
84
-
85
- buf = io.BytesIO()
86
- plt.savefig(buf, format='png')
87
- plt.close(fig)
88
- buf.seek(0)
89
- img_str = base64.b64encode(buf.getvalue()).decode('utf-8')
90
- return f"data:image/png;base64,{img_str}"
91
-
92
- # 推理并进行归因分析
93
  def predict(image, text):
94
  # 处理图像
95
  image = Image.fromarray(image)
@@ -104,8 +75,8 @@ def predict(image, text):
104
  padding="max_length"
105
  )
106
 
107
- input_ids = inputs["input_ids"]
108
- attention_mask = inputs["attention_mask"]
109
  pixel_values = image_features["pixel_values"]
110
 
111
  # 推理
@@ -114,18 +85,29 @@ def predict(image, text):
114
  prediction = torch.argmax(logits, dim=1).item()
115
  label = "yes" if prediction == 1 else "no"
116
 
117
- # 集成梯度归因
118
  attributions, _ = integrated_gradients.attribute(
119
  inputs=pixel_values,
120
  target=prediction,
121
  additional_forward_args=(input_ids, attention_mask),
122
  n_steps=1,
123
- return_convergence_delta=False
124
  )
125
 
126
- attribution_visual = visualize_attributions(attributions, pixel_values)
127
-
128
- return label, attribution_visual
 
 
 
 
 
 
 
 
 
 
 
129
 
130
  # 创建 Gradio 界面
131
  iface = gr.Interface(
 
2
  import torch
3
  from transformers import GPT2Model, ViTModel, GPT2Tokenizer, ViTImageProcessor
4
  from captum.attr import IntegratedGradients
5
+ import matplotlib.pyplot as plt
6
  from PIL import Image
7
  import numpy as np
8
  import cv2
 
 
 
9
 
10
  # 定义多模态模型
11
  class MultiModalModel(torch.nn.Module):
 
37
  model.eval()
38
  return model
39
 
40
+ # 转换张量为 PIL 图像
41
+ def convert_tensor_to_pil(tensor_image):
42
+ if isinstance(tensor_image, torch.Tensor):
43
+ tensor_image = tensor_image.numpy()
44
+ image_np = np.transpose(tensor_image, (1, 2, 0))
45
+ if image_np.max() <= 1.0:
46
+ image_np = (image_np * 255).astype(np.uint8)
47
+ return Image.fromarray(image_np)
48
+
49
+ # 自定义前向函数用于集成梯度
50
+ def custom_forward(pixel_values, input_ids, attention_mask):
51
+ logits = model(input_ids=input_ids, attention_mask=attention_mask, pixel_values=pixel_values)
52
+ return logits
53
+
54
+ # 初始化集成梯度
55
+ integrated_gradients = IntegratedGradients(custom_forward)
56
+
57
  # 初始化模型和加载器
58
  model = load_model()
59
  tokenizer = GPT2Tokenizer.from_pretrained("gpt2")
60
  tokenizer.pad_token = tokenizer.eos_token
61
  feature_extractor = ViTImageProcessor.from_pretrained("google/vit-base-patch16-224-in21k")
62
 
63
+ # 定义推理和归因分析函数
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
64
  def predict(image, text):
65
  # 处理图像
66
  image = Image.fromarray(image)
 
75
  padding="max_length"
76
  )
77
 
78
+ input_ids = inputs["input_ids"].long()
79
+ attention_mask = inputs["attention_mask"].long()
80
  pixel_values = image_features["pixel_values"]
81
 
82
  # 推理
 
85
  prediction = torch.argmax(logits, dim=1).item()
86
  label = "yes" if prediction == 1 else "no"
87
 
88
+ # 归因分析
89
  attributions, _ = integrated_gradients.attribute(
90
  inputs=pixel_values,
91
  target=prediction,
92
  additional_forward_args=(input_ids, attention_mask),
93
  n_steps=1,
94
+ return_convergence_delta=True
95
  )
96
 
97
+ # 可视化归因结果
98
+ attribution_image = attributions.squeeze().cpu().numpy()
99
+ attribution_image = (attribution_image - attribution_image.min()) / (attribution_image.max() - attribution_image.min())
100
+ attribution_image = np.uint8(255 * attribution_image)
101
+ attribution_image_real = convert_tensor_to_pil(attribution_image)
102
+
103
+ # 轮廓检测
104
+ attribution_gray = cv2.cvtColor(np.array(attribution_image_real), cv2.COLOR_RGB2GRAY)
105
+ _, binary_mask = cv2.threshold(attribution_gray, 128, 255, cv2.THRESH_BINARY)
106
+ contours, _ = cv2.findContours(binary_mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
107
+ contour_image = np.array(attribution_image_real)
108
+ cv2.drawContours(contour_image, contours, -1, (255, 0, 0), 2)
109
+
110
+ return label, Image.fromarray(contour_image)
111
 
112
  # 创建 Gradio 界面
113
  iface = gr.Interface(