Spaces:

Muhusjf
/

XAI-Medical

Sleeping

App Files Files Community

Muhusystem commited on Oct 27, 2024

Commit

a0803c3

1 Parent(s): fc7ef08

Split text prediction and attribution analysis into separate buttons

Browse files

Files changed (1) hide show

app.py +27 -26

app.py CHANGED Viewed

@@ -61,9 +61,11 @@ feature_extractor = ViTImageProcessor.from_pretrained("google/vit-base-patch16-2
 # 定义推理函数
 def predict_text(image, text):
     image = Image.fromarray(image)
     image_features = feature_extractor(images=image, return_tensors="pt")
     inputs = tokenizer.encode_plus(
         f"Question: {text} Answer:",
         return_tensors="pt",
@@ -76,6 +78,7 @@ def predict_text(image, text):
     attention_mask = inputs["attention_mask"].long()
     pixel_values = image_features["pixel_values"]
     with torch.no_grad():
         logits = model(input_ids, attention_mask, pixel_values)
         prediction = torch.argmax(logits, dim=1).item()
@@ -84,9 +87,11 @@ def predict_text(image, text):
 # 定义归因分析函数
 def generate_attribution(image, text):
     image = Image.fromarray(image)
     image_features = feature_extractor(images=image, return_tensors="pt")
     inputs = tokenizer.encode_plus(
         f"Question: {text} Answer:",
         return_tensors="pt",
@@ -99,6 +104,7 @@ def generate_attribution(image, text):
     attention_mask = inputs["attention_mask"].long()
     pixel_values = image_features["pixel_values"]
     with torch.no_grad():
         logits = model(input_ids, attention_mask, pixel_values)
         prediction = torch.argmax(logits, dim=1).item()
@@ -111,41 +117,36 @@ def generate_attribution(image, text):
         return_convergence_delta=True
     )
     attribution_image = attributions.squeeze().cpu().numpy()
     attribution_image = (attribution_image - attribution_image.min()) / (attribution_image.max() - attribution_image.min())
     attribution_image = np.uint8(255 * attribution_image)
     attribution_image_real = convert_tensor_to_pil(attribution_image)
     attribution_gray = cv2.cvtColor(np.array(attribution_image_real), cv2.COLOR_RGB2GRAY)
     _, binary_mask = cv2.threshold(attribution_gray, 128, 255, cv2.THRESH_BINARY)
     contours, _ = cv2.findContours(binary_mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
-    original_image = convert_tensor_to_pil(pixel_values.squeeze(0).numpy())
-    original_image_np = np.array(original_image)
-    cv2.drawContours(original_image_np, contours, -1, (255, 0, 0), 2)
-    return attribution_image_real, Image.fromarray(original_image_np)
 # 创建 Gradio 界面
-with gr.Blocks() as demo:
-    with gr.Row():
-        with gr.Column():
-            input_image = gr.Image(label="Input Image", type="pil", height=400)
-            question_input = gr.Textbox(label="Question", lines=3, max_lines=3)
-            clear_button = gr.Button("Clear")
-        with gr.Column():
-            with gr.Row():
-                predict_button = gr.Button("Answer")
-            prediction_output = gr.Textbox(label="Answer", lines=3, max_lines=3)
-            with gr.Row():
-                attribution_button = gr.Button("Generate Attribution")
-            with gr.Row():
-                attribution_image_1 = gr.Image(label="Attribution Image", height=400)
-                attribution_image_2 = gr.Image(label="Attribution with Contours", height=400)
-    predict_button.click(predict_text, inputs=[input_image, question_input], outputs=prediction_output)
-    attribution_button.click(generate_attribution, inputs=[input_image, question_input], outputs=[attribution_image_1, attribution_image_2])
-    clear_button.click(lambda: (None, "", ""), outputs=[input_image, question_input, prediction_output])
-# 启动 Gradio 界面
 demo.launch()

 # 定义推理函数
 def predict_text(image, text):
+    # 处理图像
     image = Image.fromarray(image)
     image_features = feature_extractor(images=image, return_tensors="pt")
+    # 处理文本
     inputs = tokenizer.encode_plus(
         f"Question: {text} Answer:",
         return_tensors="pt",
     attention_mask = inputs["attention_mask"].long()
     pixel_values = image_features["pixel_values"]
+    # 推理
     with torch.no_grad():
         logits = model(input_ids, attention_mask, pixel_values)
         prediction = torch.argmax(logits, dim=1).item()
 # 定义归因分析函数
 def generate_attribution(image, text):
+    # 处理图像
     image = Image.fromarray(image)
     image_features = feature_extractor(images=image, return_tensors="pt")
+    # 处理文本
     inputs = tokenizer.encode_plus(
         f"Question: {text} Answer:",
         return_tensors="pt",
     attention_mask = inputs["attention_mask"].long()
     pixel_values = image_features["pixel_values"]
+    # 推理和归因分析
     with torch.no_grad():
         logits = model(input_ids, attention_mask, pixel_values)
         prediction = torch.argmax(logits, dim=1).item()
         return_convergence_delta=True
     )
+    # 归因图像处理
     attribution_image = attributions.squeeze().cpu().numpy()
     attribution_image = (attribution_image - attribution_image.min()) / (attribution_image.max() - attribution_image.min())
     attribution_image = np.uint8(255 * attribution_image)
     attribution_image_real = convert_tensor_to_pil(attribution_image)
+    # 轮廓检测
     attribution_gray = cv2.cvtColor(np.array(attribution_image_real), cv2.COLOR_RGB2GRAY)
     _, binary_mask = cv2.threshold(attribution_gray, 128, 255, cv2.THRESH_BINARY)
     contours, _ = cv2.findContours(binary_mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
+    contour_image = np.array(attribution_image_real)
+    cv2.drawContours(contour_image, contours, -1, (255, 0, 0), 2)
+    # 原始图像、热图和带轮廓的图像
+    return Image.fromarray(image_features['pixel_values'][0].byte().permute(1, 2, 0).numpy()), Image.fromarray(contour_image)
 # 创建 Gradio 界面
+text_button = gr.Interface(
+    fn=predict_text,
+    inputs=["image", "text"],
+    outputs="text",
+    title="Multi-modal Inference: Text Prediction"
+)
+attribution_button = gr.Interface(
+    fn=generate_attribution,
+    inputs=["image", "text"],
+    outputs=[gr.Image(), gr.Image()],
+    title="Multi-modal Inference: Attribution Analysis"
+)
+demo = gr.TabbedInterface([text_button, attribution_button], ["Text Prediction", "Attribution Analysis"])
 demo.launch()