Spaces:

Sleepyriizi
/

Combined_Model

Running

App Files Files Community

Sleepyriizi commited on Jun 2

Commit

868e837

1 Parent(s): 85d85cc

init

Browse files

Files changed (5) hide show

README.md +0 -12
SuSy.pt +3 -0
app.py +272 -4
requirements.txt +31 -0
resnet_inception.pth +3 -0

README.md DELETED Viewed

@@ -1,12 +0,0 @@
----
-title: Combined Model
-emoji: 🏃
-colorFrom: pink
-colorTo: red
-sdk: gradio
-sdk_version: 5.32.0
-app_file: app.py
-pinned: false
----
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

SuSy.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:fa10fae300ee2742c7a373b6c3332c2595b461954b8f5616d2d382ef2751020e
+size 50810392

app.py CHANGED Viewed

@@ -1,7 +1,275 @@
 import gradio as gr
-def greet(name):
-    return "Hello " + name + "!!"
-demo = gr.Interface(fn=greet, inputs="text", outputs="text")
-demo.launch()

+"""
+Unified AI-Image & Deepfake Detector
+===================================
+• Combines a generic AI-image detector (Swin-V2 + SuSy) *and*
+  a deepfake-specialist face detector (Inception-ResNet V1).
+• Always runs both experts → fuses their calibrated scores.
+• Works on images **and** short videos (≤ 30 s).
+Add/keep in requirements.txt (versions pinned earlier):
+torch torchvision facenet-pytorch transformers torchcam captum timm
+mediapipe opencv-python-headless pillow scikit-image matplotlib
+gradio fpdf pandas numpy absl-py ttach
+"""
+# ───────────────────── bootstrap for extra wheels ──────────────────────
+import os, uuid, warnings, math, tempfile
+from pathlib import Path
+from typing import List, Tuple
+warnings.filterwarnings("ignore")
+def _ensure_deps():
+    try:
+        import mediapipe, fpdf  # noqa: F401
+    except ImportError:
+        os.system("pip install --quiet --upgrade mediapipe fpdf")
+_ensure_deps()
+# ─────────────────────────────── imports ───────────────────────────────
+import cv2
 import gradio as gr
+import numpy as np
+import torch
+import torch.nn.functional as F
+from PIL import Image
+from fpdf import FPDF
+import mediapipe as mp
+from facenet_pytorch import InceptionResnetV1, MTCNN
+from pytorch_grad_cam import GradCAM
+from pytorch_grad_cam.utils.model_targets import ClassifierOutputTarget
+from torchvision import transforms
+from transformers import AutoImageProcessor, AutoModelForImageClassification
+from torchcam.methods import GradCAM as TCGradCAM
+from captum.attr import Saliency
+from skimage.feature import graycomatrix, graycoprops
+import matplotlib.pyplot as plt
+import pandas as pd
+# ───────────────────────── runtime / models ────────────────────────────
+plt.set_loglevel("ERROR")
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+# Deep-fake specialist
+_face_det = MTCNN(select_largest=False, post_process=False, device=device).eval().to(device)
+_df_model = InceptionResnetV1(pretrained="vggface2", classify=True, num_classes=1, device=device)
+_df_model.load_state_dict(torch.load("resnet_inception.pth", map_location="cpu")["model_state_dict"])
+_df_model.to(device).eval()
+_df_cam = GradCAM(_df_model, target_layers=[_df_model.block8.branch1[-1]],
+                  use_cuda=device.type == "cuda")
+# Helper: robust layer fetch
+def _get_layer(model, name: str):
+    mods = dict(model.named_modules())
+    return mods.get(name) or next(m for n, m in mods.items() if n.endswith(name))
+# Binary AI-image detector (Swin-V2)
+BIN_ID = "haywoodsloan/ai-image-detector-deploy"
+_bin_proc = AutoImageProcessor.from_pretrained(BIN_ID)
+_bin_mod  = AutoModelForImageClassification.from_pretrained(BIN_ID).to(device).eval()
+_CAM_LAYER_BIN = "encoder.layers.3.blocks.1.layernorm_after"
+_bin_cam = TCGradCAM(_bin_mod, target_layer=_get_layer(_bin_mod, _CAM_LAYER_BIN))
+# Generator classifier (SuSy — ScriptModule → Captum only)
+_susy_mod = torch.jit.load("SuSy.pt").to(device).eval()
+_GEN_CLASSES = ["Stable Diffusion 1.x", "DALL·E 3",
+                "MJ V5/V6", "Stable Diffusion XL", "MJ V1/V2"]
+_PATCH, _TOP = 224, 5
+_to_tensor = transforms.ToTensor()
+_to_gray   = transforms.Compose([transforms.PILToTensor(), transforms.Grayscale()])
+# ─────────────── calibration placeholders (optional tune) ──────────────
+_calib_df_slope, _calib_df_inter = 1.0, 0.0
+_calib_ai_slope, _calib_ai_inter = 1.0, 0.0
+def _calibrate_df(p: float) -> float:
+    return 1 / (1 + math.exp(-(_calib_df_slope * (p + _calib_df_inter))))
+def _calibrate_ai(p: float) -> float:
+    return 1 / (1 + math.exp(-(_calib_ai_slope * (p + _calib_ai_inter))))
+# ───────────────────────────── misc helpers ────────────────────────────
+UNCERTAIN_GAP = 0.10
+MIN_FRAMES, MAX_SAMPLES = 4, 20
+def _extract_landmarks(rgb: np.ndarray) -> Tuple[np.ndarray, np.ndarray | None]:
+    mesh = mp.solutions.face_mesh.FaceMesh(static_image_mode=True, max_num_faces=1)
+    res  = mesh.process(rgb); mesh.close()
+    if not res.multi_face_landmarks:
+        return rgb, None
+    h, w, _ = rgb.shape
+    out = rgb.copy()
+    for lm in res.multi_face_landmarks[0].landmark:
+        cx, cy = int(lm.x * w), int(lm.y * h)
+        cv2.circle(out, (cx, cy), 1, (0, 255, 0), -1)
+    return out, None
+def _overlay_cam(cam: np.ndarray, base: np.ndarray) -> Image.Image:
+    cam = (cam - cam.min()) / (cam.max() - cam.min() + 1e-6)
+    heat = Image.fromarray((plt.cm.jet(cam)[:, :, :3] * 255).astype(np.uint8))\
+                 .resize((base.shape[1], base.shape[0]), Image.BICUBIC)
+    return Image.blend(Image.fromarray(base).convert("RGBA"), heat.convert("RGBA"), alpha=0.45)
+def _render_pdf(title: str, verdict: str, conf: dict, pages: List[Image.Image]) -> str:
+    out = Path(f"/tmp/report_{uuid.uuid4().hex}.pdf")
+    pdf = FPDF(); pdf.set_auto_page_break(True, 15); pdf.add_page()
+    pdf.set_font("Helvetica", size=14); pdf.cell(0, 10, title, ln=True, align="C")
+    pdf.ln(4); pdf.set_font("Helvetica", size=12)
+    pdf.multi_cell(0, 6, f"Verdict: {verdict}\n"
+                         f"Confidence → Real {conf['real']:.3f}  Fake {conf['fake']:.3f}")
+    for idx, img in enumerate(pages):
+        pdf.ln(4); pdf.set_font("Helvetica", size=11)
+        pdf.cell(0, 6, f"Figure {idx+1}", ln=True)
+        tmp = Path(tempfile.mktemp(suffix=".jpg")); img.save(tmp)
+        pdf.image(str(tmp), x=10, w=90); tmp.unlink(missing_ok=True)
+    pdf.output(out)
+    return str(out)
+# ────────────────────────── SuSy helpers (saliency) ────────────────────
+def _susy_cam(tensor: torch.Tensor, class_idx: int) -> np.ndarray:
+    sal = Saliency(_susy_mod)
+    grad = sal.attribute(tensor, target=class_idx).abs().mean(1, keepdim=True)
+    return grad.squeeze().detach().cpu().numpy()
+def _susy_predict(img: Image.Image):
+    w, h = img.size
+    npx, npy = max(1, w // _PATCH), max(1, h // _PATCH)
+    patches  = np.zeros((npx * npy, _PATCH, _PATCH, 3), dtype=np.uint8)
+    for i in range(npx):
+        for j in range(npy):
+            x, y = i * _PATCH, j * _PATCH
+            patches[i*npy + j] = np.array(img.crop((x, y, x+_PATCH, y+_PATCH))
+                                              .resize((_PATCH, _PATCH)))
+    contrasts = []
+    for p in patches:
+        g = _to_gray(Image.fromarray(p)).squeeze(0).numpy()
+        glcm = graycomatrix(g, [5], [0], 256, symmetric=True, normed=True)
+        contrasts.append(graycoprops(glcm, "contrast")[0, 0])
+    idx   = np.argsort(contrasts)[::-1][:_TOP]
+    tens  = torch.from_numpy(patches[idx].transpose(0, 3, 1, 2)).float() / 255.0
+    with torch.no_grad():
+        probs = _susy_mod(tens.to(device)).softmax(-1).mean(0).cpu().numpy()[1:]
+    return dict(zip(_GEN_CLASSES, probs))
+# ───────────────────────────── fusion math ─────────────────────────────
+def _fuse(p_ai: float, p_df: float) -> float:
+    return 1 - (1 - p_ai) * (1 - p_df)
+def _verdict(p: float) -> str:
+    return "uncertain" if abs(p - 0.5) <= UNCERTAIN_GAP else ("fake" if p > 0.5 else "real")
+# ─────────────────────────── IMAGE PIPELINE ────────────────────────────
+def _predict_image(pil: Image.Image):
+    gallery: List[Image.Image] = []
+    # Deep-fake path
+    try:
+        face = _face_det(pil)
+    except Exception:
+        face = None
+    if face is not None:
+        ft = F.interpolate(face.unsqueeze(0), (256, 256), mode="bilinear",
+                           align_corners=False).float() / 255.0
+        p_df_raw = torch.sigmoid(_df_model(ft.to(device))).item()
+        p_df = _calibrate_df(p_df_raw)
+        crop_np = (ft.squeeze(0).permute(1, 2, 0).cpu().numpy() * 255).astype(np.uint8)
+        cam_df  = _df_cam(ft, [ClassifierOutputTarget(0)])[0]
+        gallery.append(_overlay_cam(cam_df, crop_np))
+        gallery.append(Image.fromarray(_extract_landmarks(
+            cv2.cvtColor(np.array(pil), cv2.COLOR_BGR2RGB))[0]))
+    else:
+        p_df = 0.5
+    # Binary AI model
+    inp_bin = _bin_proc(images=pil, return_tensors="pt").to(device)
+    logits  = _bin_mod(**inp_bin).logits.softmax(-1)[0]
+    p_ai_raw = logits[0].item()
+    p_ai = _calibrate_ai(p_ai_raw)
+    winner_idx = 0 if p_ai_raw >= logits[1].item() else 1
+    inp_bin_h = {k: v.clone().detach().requires_grad_(True) for k, v in inp_bin.items()}
+    cam_bin = _bin_cam(winner_idx, scores=_bin_mod(**inp_bin_h).logits)[0]
+    gallery.append(_overlay_cam(cam_bin, np.array(pil)))
+    # Generator breakdown (SuSy) if AI
+    bar_plot = gr.update(visible=False)
+    if p_ai_raw > logits[1].item():
+        gen_probs = _susy_predict(pil)
+        bar_plot  = gr.update(value=pd.DataFrame(gen_probs.items(), columns=["class", "prob"]),
+                              visible=True)
+        susy_in   = _to_tensor(pil.resize((224, 224))).unsqueeze(0).to(device)
+        g_idx     = _susy_mod(susy_in)[0, 1:].argmax().item() + 1
+        cam_susy  = _susy_cam(susy_in, g_idx)
+        gallery.append(_overlay_cam(cam_susy, np.array(pil)))
+    # Fusion
+    p_final = _fuse(p_ai, p_df)
+    verdict = _verdict(p_final)
+    conf    = {"real": round(1-p_final, 4), "fake": round(p_final, 4)}
+    pdf     = _render_pdf("Unified Detector", verdict, conf, gallery[:3])
+    return verdict, conf, gallery, bar_plot, pdf
+# ─────────────────────────── VIDEO PIPELINE ────────────────────────────
+def _sample_idx(n):  # max 20 evenly spaced
+    return list(range(n)) if n <= MAX_SAMPLES else np.linspace(0, n-1, MAX_SAMPLES, dtype=int)
+def _predict_video(path: str):
+    cap = cv2.VideoCapture(path); total = int(cap.get(cv2.CAP_PROP_FRAME_COUNT)) or 1
+    probs, frames = [], []
+    for i in _sample_idx(total):
+        cap.set(cv2.CAP_PROP_POS_FRAMES, i)
+        ok, frm = cap.read()
+        if not ok:
+            continue
+        pil = Image.fromarray(cv2.cvtColor(frm, cv2.COLOR_BGR2RGB))
+        verdict, conf, _, _, _ = _predict_image(pil)
+        probs.append(conf["fake"])
+        if len(frames) < MIN_FRAMES:
+            frames.append(Image.fromarray(frm))
+    cap.release()
+    if not probs:
+        blank = Image.new("RGB", (256, 256))
+        return "No frames analysed", {"real": 0, "fake": 0}, [blank]
+    p_final = float(np.mean(probs))
+    return _verdict(p_final), {"real": round(1-p_final, 4), "fake": round(p_final, 4)}, frames
+# ───────────────────────────────── UI ──────────────────────────────────
+_css = "footer{visibility:hidden!important}.logo,#logo{display:none!important}"
+with gr.Blocks(css=_css, title="Unified AI-Fake & Deepfake Detector") as demo:
+    gr.Markdown("""
+    ## Unified AI-Fake & Deepfake Detector
+    Upload an **image** or a short **video**.
+    The app fuses two complementary models, then shows heat-maps & a PDF report.
+    """)
+    with gr.Tab("Image"):
+        with gr.Row():
+            with gr.Column(scale=1):
+                img_in = gr.Image(label="Upload image", type="pil")
+                btn_i  = gr.Button("Analyze")
+            with gr.Column(scale=2):
+                txt_v  = gr.Textbox(label="Verdict", interactive=False)
+                lbl_c  = gr.Label(label="Confidence")
+        gal   = gr.Gallery(label="Explanations", columns=3, height=320)
+        bar   = gr.BarPlot(x="class", y="prob", title="Likely generator",
+                           y_label="probability", visible=False)
+        pdf_f = gr.File(label="Download PDF report")
+        btn_i.click(_predict_image, img_in, [txt_v, lbl_c, gal, bar, pdf_f])
+    with gr.Tab("Video"):
+        with gr.Row():
+            with gr.Column(scale=1):
+                vid_in = gr.Video(label="Upload MP4/AVI", format="mp4")
+                btn_v  = gr.Button("Analyze")
+            with gr.Column(scale=2):
+                txt_vv = gr.Textbox(label="Verdict", interactive=False)
+                lbl_cv = gr.Label(label="Confidence")
+        gal_v = gr.Gallery(label="Sample frames", columns=4, height=240)
+        btn_v.click(_predict_video, vid_in, [txt_vv, lbl_cv, gal_v])
+demo.launch(share=True, show_api=False)

requirements.txt ADDED Viewed

	@@ -0,0 +1,31 @@

+torch==2.1.2
+torchvision==0.16.2
+torchaudio==2.1.2          # optional but same CUDA tag
+# vision / CAM libs
+facenet-pytorch==2.5.2
+grad-cam==1.4.6
+torchcam==0.4.0
+captum==0.8.0
+ttach==0.0.3               # grad-cam helper
+# AI-detector deps
+transformers==4.52.4
+timm==1.0.15
+huggingface_hub>=0.22
+# utils
+opencv-python-headless==4.7.0.72
+mediapipe==0.10.21
+Pillow>=10.1               # <── drop the old pin
+scikit-image==0.25.2       # requires Pillow ≥ 10.1
+scikit-learn==1.6.1
+matplotlib>=3.8
+numpy>=1.26
+pandas
+absl-py==2.3.0             # mediapipe dep
+# UI
+gradio==5.23.2
+pydantic==2.10.6
+wheel

resnet_inception.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:794ebe83c6a7d7959c30c175030b4885e2b9fa175f1cc3e582236595d119f52b
+size 282395989