Spaces:
Sleeping
Sleeping
Muhusystem
commited on
Commit
·
a5ee181
1
Parent(s):
a0803c3
Split text prediction and attribution analysis into separate buttons
Browse files
app.py
CHANGED
@@ -61,11 +61,9 @@ feature_extractor = ViTImageProcessor.from_pretrained("google/vit-base-patch16-2
|
|
61 |
|
62 |
# 定义推理函数
|
63 |
def predict_text(image, text):
|
64 |
-
# 处理图像
|
65 |
image = Image.fromarray(image)
|
66 |
image_features = feature_extractor(images=image, return_tensors="pt")
|
67 |
|
68 |
-
# 处理文本
|
69 |
inputs = tokenizer.encode_plus(
|
70 |
f"Question: {text} Answer:",
|
71 |
return_tensors="pt",
|
@@ -78,7 +76,6 @@ def predict_text(image, text):
|
|
78 |
attention_mask = inputs["attention_mask"].long()
|
79 |
pixel_values = image_features["pixel_values"]
|
80 |
|
81 |
-
# 推理
|
82 |
with torch.no_grad():
|
83 |
logits = model(input_ids, attention_mask, pixel_values)
|
84 |
prediction = torch.argmax(logits, dim=1).item()
|
@@ -87,11 +84,9 @@ def predict_text(image, text):
|
|
87 |
|
88 |
# 定义归因分析函数
|
89 |
def generate_attribution(image, text):
|
90 |
-
# 处理图像
|
91 |
image = Image.fromarray(image)
|
92 |
image_features = feature_extractor(images=image, return_tensors="pt")
|
93 |
|
94 |
-
# 处理文本
|
95 |
inputs = tokenizer.encode_plus(
|
96 |
f"Question: {text} Answer:",
|
97 |
return_tensors="pt",
|
@@ -104,7 +99,6 @@ def generate_attribution(image, text):
|
|
104 |
attention_mask = inputs["attention_mask"].long()
|
105 |
pixel_values = image_features["pixel_values"]
|
106 |
|
107 |
-
# 推理和归因分析
|
108 |
with torch.no_grad():
|
109 |
logits = model(input_ids, attention_mask, pixel_values)
|
110 |
prediction = torch.argmax(logits, dim=1).item()
|
@@ -117,36 +111,39 @@ def generate_attribution(image, text):
|
|
117 |
return_convergence_delta=True
|
118 |
)
|
119 |
|
120 |
-
# 归因图像处理
|
121 |
attribution_image = attributions.squeeze().cpu().numpy()
|
122 |
attribution_image = (attribution_image - attribution_image.min()) / (attribution_image.max() - attribution_image.min())
|
123 |
attribution_image = np.uint8(255 * attribution_image)
|
124 |
attribution_image_real = convert_tensor_to_pil(attribution_image)
|
125 |
|
126 |
-
# 轮廓检测
|
127 |
attribution_gray = cv2.cvtColor(np.array(attribution_image_real), cv2.COLOR_RGB2GRAY)
|
128 |
_, binary_mask = cv2.threshold(attribution_gray, 128, 255, cv2.THRESH_BINARY)
|
129 |
contours, _ = cv2.findContours(binary_mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
|
130 |
-
contour_image = np.array(attribution_image_real)
|
131 |
-
cv2.drawContours(contour_image, contours, -1, (255, 0, 0), 2)
|
132 |
|
133 |
-
|
134 |
-
|
|
|
|
|
|
|
135 |
|
136 |
# 创建 Gradio 界面
|
137 |
-
|
138 |
-
|
139 |
-
|
140 |
-
|
141 |
-
|
142 |
-
)
|
143 |
-
|
144 |
-
|
145 |
-
|
146 |
-
|
147 |
-
|
148 |
-
|
149 |
-
)
|
150 |
-
|
151 |
-
|
|
|
|
|
|
|
|
|
152 |
demo.launch()
|
|
|
61 |
|
62 |
# 定义推理函数
|
63 |
def predict_text(image, text):
|
|
|
64 |
image = Image.fromarray(image)
|
65 |
image_features = feature_extractor(images=image, return_tensors="pt")
|
66 |
|
|
|
67 |
inputs = tokenizer.encode_plus(
|
68 |
f"Question: {text} Answer:",
|
69 |
return_tensors="pt",
|
|
|
76 |
attention_mask = inputs["attention_mask"].long()
|
77 |
pixel_values = image_features["pixel_values"]
|
78 |
|
|
|
79 |
with torch.no_grad():
|
80 |
logits = model(input_ids, attention_mask, pixel_values)
|
81 |
prediction = torch.argmax(logits, dim=1).item()
|
|
|
84 |
|
85 |
# 定义归因分析函数
|
86 |
def generate_attribution(image, text):
|
|
|
87 |
image = Image.fromarray(image)
|
88 |
image_features = feature_extractor(images=image, return_tensors="pt")
|
89 |
|
|
|
90 |
inputs = tokenizer.encode_plus(
|
91 |
f"Question: {text} Answer:",
|
92 |
return_tensors="pt",
|
|
|
99 |
attention_mask = inputs["attention_mask"].long()
|
100 |
pixel_values = image_features["pixel_values"]
|
101 |
|
|
|
102 |
with torch.no_grad():
|
103 |
logits = model(input_ids, attention_mask, pixel_values)
|
104 |
prediction = torch.argmax(logits, dim=1).item()
|
|
|
111 |
return_convergence_delta=True
|
112 |
)
|
113 |
|
|
|
114 |
attribution_image = attributions.squeeze().cpu().numpy()
|
115 |
attribution_image = (attribution_image - attribution_image.min()) / (attribution_image.max() - attribution_image.min())
|
116 |
attribution_image = np.uint8(255 * attribution_image)
|
117 |
attribution_image_real = convert_tensor_to_pil(attribution_image)
|
118 |
|
|
|
119 |
attribution_gray = cv2.cvtColor(np.array(attribution_image_real), cv2.COLOR_RGB2GRAY)
|
120 |
_, binary_mask = cv2.threshold(attribution_gray, 128, 255, cv2.THRESH_BINARY)
|
121 |
contours, _ = cv2.findContours(binary_mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
|
|
|
|
|
122 |
|
123 |
+
original_image = convert_tensor_to_pil(pixel_values.squeeze(0).numpy())
|
124 |
+
original_image_np = np.array(original_image)
|
125 |
+
cv2.drawContours(original_image_np, contours, -1, (255, 0, 0), 2)
|
126 |
+
|
127 |
+
return attribution_image_real, Image.fromarray(original_image_np)
|
128 |
|
129 |
# 创建 Gradio 界面
|
130 |
+
with gr.Blocks() as demo:
|
131 |
+
with gr.Row():
|
132 |
+
with gr.Column():
|
133 |
+
input_image = gr.Image(label="Input Image", interactive=True)
|
134 |
+
question_input = gr.Textbox(label="Question", lines=2, interactive=True)
|
135 |
+
clear_button = gr.Button("Clear")
|
136 |
+
with gr.Column():
|
137 |
+
predict_button = gr.Button("Answer")
|
138 |
+
prediction_output = gr.Textbox(label="Answer", lines=2, interactive=False)
|
139 |
+
attribution_button = gr.Button("Generate Attribution")
|
140 |
+
with gr.Row():
|
141 |
+
attribution_image_1 = gr.Image(label="Attribution Image", interactive=False)
|
142 |
+
attribution_image_2 = gr.Image(label="Attribution with Contours", interactive=False)
|
143 |
+
|
144 |
+
# 按钮事件绑定
|
145 |
+
predict_button.click(predict_text, inputs=[input_image, question_input], outputs=prediction_output)
|
146 |
+
attribution_button.click(generate_attribution, inputs=[input_image, question_input], outputs=[attribution_image_1, attribution_image_2])
|
147 |
+
clear_button.click(lambda: (None, "", ""), outputs=[input_image, question_input, prediction_output])
|
148 |
+
|
149 |
demo.launch()
|