Sleepyriizi commited on
Commit
868e837
Β·
1 Parent(s): 85d85cc
Files changed (5) hide show
  1. README.md +0 -12
  2. SuSy.pt +3 -0
  3. app.py +272 -4
  4. requirements.txt +31 -0
  5. resnet_inception.pth +3 -0
README.md DELETED
@@ -1,12 +0,0 @@
1
- ---
2
- title: Combined Model
3
- emoji: πŸƒ
4
- colorFrom: pink
5
- colorTo: red
6
- sdk: gradio
7
- sdk_version: 5.32.0
8
- app_file: app.py
9
- pinned: false
10
- ---
11
-
12
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
 
 
 
 
 
 
SuSy.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fa10fae300ee2742c7a373b6c3332c2595b461954b8f5616d2d382ef2751020e
3
+ size 50810392
app.py CHANGED
@@ -1,7 +1,275 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import gradio as gr
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2
 
3
- def greet(name):
4
- return "Hello " + name + "!!"
5
 
6
- demo = gr.Interface(fn=greet, inputs="text", outputs="text")
7
- demo.launch()
 
1
+ """
2
+ Unified AI-Image & Deepfake Detector
3
+ ===================================
4
+ β€’ Combines a generic AI-image detector (Swin-V2 + SuSy) *and*
5
+ a deepfake-specialist face detector (Inception-ResNet V1).
6
+ β€’ Always runs both experts β†’ fuses their calibrated scores.
7
+ β€’ Works on images **and** short videos (≀ 30 s).
8
+
9
+ Add/keep in requirements.txt (versions pinned earlier):
10
+ torch torchvision facenet-pytorch transformers torchcam captum timm
11
+ mediapipe opencv-python-headless pillow scikit-image matplotlib
12
+ gradio fpdf pandas numpy absl-py ttach
13
+ """
14
+
15
+ # ───────────────────── bootstrap for extra wheels ──────────────────────
16
+ import os, uuid, warnings, math, tempfile
17
+ from pathlib import Path
18
+ from typing import List, Tuple
19
+
20
+ warnings.filterwarnings("ignore")
21
+
22
+ def _ensure_deps():
23
+ try:
24
+ import mediapipe, fpdf # noqa: F401
25
+ except ImportError:
26
+ os.system("pip install --quiet --upgrade mediapipe fpdf")
27
+
28
+ _ensure_deps()
29
+
30
+ # ─────────────────────────────── imports ───────────────────────────────
31
+ import cv2
32
  import gradio as gr
33
+ import numpy as np
34
+ import torch
35
+ import torch.nn.functional as F
36
+ from PIL import Image
37
+ from fpdf import FPDF
38
+ import mediapipe as mp
39
+ from facenet_pytorch import InceptionResnetV1, MTCNN
40
+ from pytorch_grad_cam import GradCAM
41
+ from pytorch_grad_cam.utils.model_targets import ClassifierOutputTarget
42
+ from torchvision import transforms
43
+ from transformers import AutoImageProcessor, AutoModelForImageClassification
44
+ from torchcam.methods import GradCAM as TCGradCAM
45
+ from captum.attr import Saliency
46
+ from skimage.feature import graycomatrix, graycoprops
47
+ import matplotlib.pyplot as plt
48
+ import pandas as pd
49
+
50
+ # ───────────────────────── runtime / models ────────────────────────────
51
+ plt.set_loglevel("ERROR")
52
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
53
+
54
+ # Deep-fake specialist
55
+ _face_det = MTCNN(select_largest=False, post_process=False, device=device).eval().to(device)
56
+ _df_model = InceptionResnetV1(pretrained="vggface2", classify=True, num_classes=1, device=device)
57
+ _df_model.load_state_dict(torch.load("resnet_inception.pth", map_location="cpu")["model_state_dict"])
58
+ _df_model.to(device).eval()
59
+ _df_cam = GradCAM(_df_model, target_layers=[_df_model.block8.branch1[-1]],
60
+ use_cuda=device.type == "cuda")
61
+
62
+ # Helper: robust layer fetch
63
+ def _get_layer(model, name: str):
64
+ mods = dict(model.named_modules())
65
+ return mods.get(name) or next(m for n, m in mods.items() if n.endswith(name))
66
+
67
+ # Binary AI-image detector (Swin-V2)
68
+ BIN_ID = "haywoodsloan/ai-image-detector-deploy"
69
+ _bin_proc = AutoImageProcessor.from_pretrained(BIN_ID)
70
+ _bin_mod = AutoModelForImageClassification.from_pretrained(BIN_ID).to(device).eval()
71
+ _CAM_LAYER_BIN = "encoder.layers.3.blocks.1.layernorm_after"
72
+ _bin_cam = TCGradCAM(_bin_mod, target_layer=_get_layer(_bin_mod, _CAM_LAYER_BIN))
73
+
74
+ # Generator classifier (SuSy β€” ScriptModule β†’ Captum only)
75
+ _susy_mod = torch.jit.load("SuSy.pt").to(device).eval()
76
+ _GEN_CLASSES = ["Stable Diffusion 1.x", "DALLΒ·E 3",
77
+ "MJ V5/V6", "Stable Diffusion XL", "MJ V1/V2"]
78
+ _PATCH, _TOP = 224, 5
79
+ _to_tensor = transforms.ToTensor()
80
+ _to_gray = transforms.Compose([transforms.PILToTensor(), transforms.Grayscale()])
81
+
82
+ # ─────────────── calibration placeholders (optional tune) ──────────────
83
+ _calib_df_slope, _calib_df_inter = 1.0, 0.0
84
+ _calib_ai_slope, _calib_ai_inter = 1.0, 0.0
85
+
86
+ def _calibrate_df(p: float) -> float:
87
+ return 1 / (1 + math.exp(-(_calib_df_slope * (p + _calib_df_inter))))
88
+
89
+ def _calibrate_ai(p: float) -> float:
90
+ return 1 / (1 + math.exp(-(_calib_ai_slope * (p + _calib_ai_inter))))
91
+
92
+ # ───────────────────────────── misc helpers ────────────────────────────
93
+ UNCERTAIN_GAP = 0.10
94
+ MIN_FRAMES, MAX_SAMPLES = 4, 20
95
+
96
+ def _extract_landmarks(rgb: np.ndarray) -> Tuple[np.ndarray, np.ndarray | None]:
97
+ mesh = mp.solutions.face_mesh.FaceMesh(static_image_mode=True, max_num_faces=1)
98
+ res = mesh.process(rgb); mesh.close()
99
+ if not res.multi_face_landmarks:
100
+ return rgb, None
101
+ h, w, _ = rgb.shape
102
+ out = rgb.copy()
103
+ for lm in res.multi_face_landmarks[0].landmark:
104
+ cx, cy = int(lm.x * w), int(lm.y * h)
105
+ cv2.circle(out, (cx, cy), 1, (0, 255, 0), -1)
106
+ return out, None
107
+
108
+ def _overlay_cam(cam: np.ndarray, base: np.ndarray) -> Image.Image:
109
+ cam = (cam - cam.min()) / (cam.max() - cam.min() + 1e-6)
110
+ heat = Image.fromarray((plt.cm.jet(cam)[:, :, :3] * 255).astype(np.uint8))\
111
+ .resize((base.shape[1], base.shape[0]), Image.BICUBIC)
112
+ return Image.blend(Image.fromarray(base).convert("RGBA"), heat.convert("RGBA"), alpha=0.45)
113
+
114
+ def _render_pdf(title: str, verdict: str, conf: dict, pages: List[Image.Image]) -> str:
115
+ out = Path(f"/tmp/report_{uuid.uuid4().hex}.pdf")
116
+ pdf = FPDF(); pdf.set_auto_page_break(True, 15); pdf.add_page()
117
+ pdf.set_font("Helvetica", size=14); pdf.cell(0, 10, title, ln=True, align="C")
118
+ pdf.ln(4); pdf.set_font("Helvetica", size=12)
119
+ pdf.multi_cell(0, 6, f"Verdict: {verdict}\n"
120
+ f"Confidence β†’ Real {conf['real']:.3f} Fake {conf['fake']:.3f}")
121
+ for idx, img in enumerate(pages):
122
+ pdf.ln(4); pdf.set_font("Helvetica", size=11)
123
+ pdf.cell(0, 6, f"Figure {idx+1}", ln=True)
124
+ tmp = Path(tempfile.mktemp(suffix=".jpg")); img.save(tmp)
125
+ pdf.image(str(tmp), x=10, w=90); tmp.unlink(missing_ok=True)
126
+ pdf.output(out)
127
+ return str(out)
128
+
129
+ # ────────────────────────── SuSy helpers (saliency) ────────────────────
130
+ def _susy_cam(tensor: torch.Tensor, class_idx: int) -> np.ndarray:
131
+ sal = Saliency(_susy_mod)
132
+ grad = sal.attribute(tensor, target=class_idx).abs().mean(1, keepdim=True)
133
+ return grad.squeeze().detach().cpu().numpy()
134
+
135
+ def _susy_predict(img: Image.Image):
136
+ w, h = img.size
137
+ npx, npy = max(1, w // _PATCH), max(1, h // _PATCH)
138
+ patches = np.zeros((npx * npy, _PATCH, _PATCH, 3), dtype=np.uint8)
139
+ for i in range(npx):
140
+ for j in range(npy):
141
+ x, y = i * _PATCH, j * _PATCH
142
+ patches[i*npy + j] = np.array(img.crop((x, y, x+_PATCH, y+_PATCH))
143
+ .resize((_PATCH, _PATCH)))
144
+ contrasts = []
145
+ for p in patches:
146
+ g = _to_gray(Image.fromarray(p)).squeeze(0).numpy()
147
+ glcm = graycomatrix(g, [5], [0], 256, symmetric=True, normed=True)
148
+ contrasts.append(graycoprops(glcm, "contrast")[0, 0])
149
+ idx = np.argsort(contrasts)[::-1][:_TOP]
150
+ tens = torch.from_numpy(patches[idx].transpose(0, 3, 1, 2)).float() / 255.0
151
+ with torch.no_grad():
152
+ probs = _susy_mod(tens.to(device)).softmax(-1).mean(0).cpu().numpy()[1:]
153
+ return dict(zip(_GEN_CLASSES, probs))
154
+
155
+ # ───────────────────────────── fusion math ─────────────────────────────
156
+ def _fuse(p_ai: float, p_df: float) -> float:
157
+ return 1 - (1 - p_ai) * (1 - p_df)
158
+
159
+ def _verdict(p: float) -> str:
160
+ return "uncertain" if abs(p - 0.5) <= UNCERTAIN_GAP else ("fake" if p > 0.5 else "real")
161
+
162
+ # ─────────────────────────── IMAGE PIPELINE ────────────────────────────
163
+ def _predict_image(pil: Image.Image):
164
+ gallery: List[Image.Image] = []
165
+
166
+ # Deep-fake path
167
+ try:
168
+ face = _face_det(pil)
169
+ except Exception:
170
+ face = None
171
+ if face is not None:
172
+ ft = F.interpolate(face.unsqueeze(0), (256, 256), mode="bilinear",
173
+ align_corners=False).float() / 255.0
174
+ p_df_raw = torch.sigmoid(_df_model(ft.to(device))).item()
175
+ p_df = _calibrate_df(p_df_raw)
176
+ crop_np = (ft.squeeze(0).permute(1, 2, 0).cpu().numpy() * 255).astype(np.uint8)
177
+ cam_df = _df_cam(ft, [ClassifierOutputTarget(0)])[0]
178
+ gallery.append(_overlay_cam(cam_df, crop_np))
179
+ gallery.append(Image.fromarray(_extract_landmarks(
180
+ cv2.cvtColor(np.array(pil), cv2.COLOR_BGR2RGB))[0]))
181
+ else:
182
+ p_df = 0.5
183
+
184
+ # Binary AI model
185
+ inp_bin = _bin_proc(images=pil, return_tensors="pt").to(device)
186
+ logits = _bin_mod(**inp_bin).logits.softmax(-1)[0]
187
+ p_ai_raw = logits[0].item()
188
+ p_ai = _calibrate_ai(p_ai_raw)
189
+ winner_idx = 0 if p_ai_raw >= logits[1].item() else 1
190
+ inp_bin_h = {k: v.clone().detach().requires_grad_(True) for k, v in inp_bin.items()}
191
+ cam_bin = _bin_cam(winner_idx, scores=_bin_mod(**inp_bin_h).logits)[0]
192
+ gallery.append(_overlay_cam(cam_bin, np.array(pil)))
193
+
194
+ # Generator breakdown (SuSy) if AI
195
+ bar_plot = gr.update(visible=False)
196
+ if p_ai_raw > logits[1].item():
197
+ gen_probs = _susy_predict(pil)
198
+ bar_plot = gr.update(value=pd.DataFrame(gen_probs.items(), columns=["class", "prob"]),
199
+ visible=True)
200
+ susy_in = _to_tensor(pil.resize((224, 224))).unsqueeze(0).to(device)
201
+ g_idx = _susy_mod(susy_in)[0, 1:].argmax().item() + 1
202
+ cam_susy = _susy_cam(susy_in, g_idx)
203
+ gallery.append(_overlay_cam(cam_susy, np.array(pil)))
204
+
205
+ # Fusion
206
+ p_final = _fuse(p_ai, p_df)
207
+ verdict = _verdict(p_final)
208
+ conf = {"real": round(1-p_final, 4), "fake": round(p_final, 4)}
209
+ pdf = _render_pdf("Unified Detector", verdict, conf, gallery[:3])
210
+
211
+ return verdict, conf, gallery, bar_plot, pdf
212
+
213
+ # ─────────────────────────── VIDEO PIPELINE ────────────────────────────
214
+ def _sample_idx(n): # max 20 evenly spaced
215
+ return list(range(n)) if n <= MAX_SAMPLES else np.linspace(0, n-1, MAX_SAMPLES, dtype=int)
216
+
217
+ def _predict_video(path: str):
218
+ cap = cv2.VideoCapture(path); total = int(cap.get(cv2.CAP_PROP_FRAME_COUNT)) or 1
219
+ probs, frames = [], []
220
+ for i in _sample_idx(total):
221
+ cap.set(cv2.CAP_PROP_POS_FRAMES, i)
222
+ ok, frm = cap.read()
223
+ if not ok:
224
+ continue
225
+ pil = Image.fromarray(cv2.cvtColor(frm, cv2.COLOR_BGR2RGB))
226
+ verdict, conf, _, _, _ = _predict_image(pil)
227
+ probs.append(conf["fake"])
228
+ if len(frames) < MIN_FRAMES:
229
+ frames.append(Image.fromarray(frm))
230
+ cap.release()
231
+ if not probs:
232
+ blank = Image.new("RGB", (256, 256))
233
+ return "No frames analysed", {"real": 0, "fake": 0}, [blank]
234
+
235
+ p_final = float(np.mean(probs))
236
+ return _verdict(p_final), {"real": round(1-p_final, 4), "fake": round(p_final, 4)}, frames
237
+
238
+ # ───────────────────────────────── UI ──────────────────────────────────
239
+ _css = "footer{visibility:hidden!important}.logo,#logo{display:none!important}"
240
+
241
+ with gr.Blocks(css=_css, title="Unified AI-Fake & Deepfake Detector") as demo:
242
+ gr.Markdown("""
243
+ ## Unified AI-Fake & Deepfake Detector
244
+ Upload an **image** or a short **video**.
245
+ The app fuses two complementary models, then shows heat-maps & a PDF report.
246
+ """)
247
+
248
+ with gr.Tab("Image"):
249
+ with gr.Row():
250
+ with gr.Column(scale=1):
251
+ img_in = gr.Image(label="Upload image", type="pil")
252
+ btn_i = gr.Button("Analyze")
253
+ with gr.Column(scale=2):
254
+ txt_v = gr.Textbox(label="Verdict", interactive=False)
255
+ lbl_c = gr.Label(label="Confidence")
256
+ gal = gr.Gallery(label="Explanations", columns=3, height=320)
257
+ bar = gr.BarPlot(x="class", y="prob", title="Likely generator",
258
+ y_label="probability", visible=False)
259
+ pdf_f = gr.File(label="Download PDF report")
260
+
261
+ btn_i.click(_predict_image, img_in, [txt_v, lbl_c, gal, bar, pdf_f])
262
+
263
+ with gr.Tab("Video"):
264
+ with gr.Row():
265
+ with gr.Column(scale=1):
266
+ vid_in = gr.Video(label="Upload MP4/AVI", format="mp4")
267
+ btn_v = gr.Button("Analyze")
268
+ with gr.Column(scale=2):
269
+ txt_vv = gr.Textbox(label="Verdict", interactive=False)
270
+ lbl_cv = gr.Label(label="Confidence")
271
+ gal_v = gr.Gallery(label="Sample frames", columns=4, height=240)
272
 
273
+ btn_v.click(_predict_video, vid_in, [txt_vv, lbl_cv, gal_v])
 
274
 
275
+ demo.launch(share=True, show_api=False)
 
requirements.txt ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ torch==2.1.2
2
+ torchvision==0.16.2
3
+ torchaudio==2.1.2 # optional but same CUDA tag
4
+
5
+ # vision / CAM libs
6
+ facenet-pytorch==2.5.2
7
+ grad-cam==1.4.6
8
+ torchcam==0.4.0
9
+ captum==0.8.0
10
+ ttach==0.0.3 # grad-cam helper
11
+
12
+ # AI-detector deps
13
+ transformers==4.52.4
14
+ timm==1.0.15
15
+ huggingface_hub>=0.22
16
+
17
+ # utils
18
+ opencv-python-headless==4.7.0.72
19
+ mediapipe==0.10.21
20
+ Pillow>=10.1 # <── drop the old pin
21
+ scikit-image==0.25.2 # requires Pillow β‰₯ 10.1
22
+ scikit-learn==1.6.1
23
+ matplotlib>=3.8
24
+ numpy>=1.26
25
+ pandas
26
+ absl-py==2.3.0 # mediapipe dep
27
+
28
+ # UI
29
+ gradio==5.23.2
30
+ pydantic==2.10.6
31
+ wheel
resnet_inception.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:794ebe83c6a7d7959c30c175030b4885e2b9fa175f1cc3e582236595d119f52b
3
+ size 282395989