fancyfeast commited on
Commit
c6034c4
·
1 Parent(s): 1c7fa88

Initial commit

Browse files
Files changed (4) hide show
  1. app.py +138 -0
  2. far5y1y5-8000.pt +3 -0
  3. requirements.txt +2 -0
  4. yolo11x-train28-best.pt +3 -0
app.py ADDED
@@ -0,0 +1,138 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from PIL import Image
3
+ from ultralytics import YOLO
4
+ import torchvision.transforms.functional as TVF
5
+ from transformers import Owlv2VisionModel
6
+ from torch import nn
7
+ import torch
8
+ import torch.nn.functional as F
9
+
10
+
11
+ # OWLv2 classification head
12
+ class DetectorModelOwl(nn.Module):
13
+ owl: Owlv2VisionModel
14
+
15
+ def __init__(self, model_path: str, dropout: float, n_hidden: int = 768):
16
+ super().__init__()
17
+
18
+ owl = Owlv2VisionModel.from_pretrained(model_path)
19
+ assert isinstance(owl, Owlv2VisionModel)
20
+ self.owl = owl
21
+ self.owl.requires_grad_(False)
22
+ self.transforms = None
23
+
24
+ self.dropout1 = nn.Dropout(dropout)
25
+ self.ln1 = nn.LayerNorm(n_hidden, eps=1e-5)
26
+ self.linear1 = nn.Linear(n_hidden, n_hidden * 2)
27
+ self.act1 = nn.GELU()
28
+ self.dropout2 = nn.Dropout(dropout)
29
+ self.ln2 = nn.LayerNorm(n_hidden * 2, eps=1e-5)
30
+ self.linear2 = nn.Linear(n_hidden * 2, 2)
31
+
32
+ def forward(self, pixel_values: torch.Tensor, labels: torch.Tensor | None = None):
33
+ with torch.autocast("cuda", dtype=torch.bfloat16):
34
+ # Embed the image
35
+ outputs = self.owl(pixel_values=pixel_values, output_hidden_states=True)
36
+ x = outputs.last_hidden_state # B, N, C
37
+
38
+ # Linear
39
+ x = self.dropout1(x)
40
+ x = self.ln1(x)
41
+ x = self.linear1(x)
42
+ x = self.act1(x)
43
+
44
+ # Norm and Mean
45
+ x = self.dropout2(x)
46
+ #x = x.mean(dim=1)
47
+ x, _ = x.max(dim=1)
48
+ x = self.ln2(x)
49
+
50
+ # Linear
51
+ x = self.linear2(x)
52
+
53
+ if labels is not None:
54
+ loss = F.cross_entropy(x, labels)
55
+ return (x, loss)
56
+
57
+ return (x,)
58
+
59
+
60
+ def owl_predict(image: Image.Image) -> bool:
61
+ # Process the image
62
+ # Pad to square
63
+ big_side = max(image.size)
64
+ new_image = Image.new("RGB", (big_side, big_side), (128, 128, 128))
65
+ new_image.paste(image, (0, 0))
66
+
67
+ # Resize to 960x960
68
+ preped = new_image.resize((960, 960), Image.BICUBIC) # Bicubic performed best in my tests (even compared to Lanczos)
69
+ #preped = new_image.resize((1008, 1008), Image.BICUBIC) # Bicubic performed best in my tests (even compared to Lanczos)
70
+
71
+ # Convert to tensor and normalize
72
+ preped = TVF.pil_to_tensor(preped)
73
+ preped = preped / 255.0
74
+ input_image = TVF.normalize(preped, [0.48145466, 0.4578275, 0.40821073], [0.26862954, 0.26130258, 0.27577711])
75
+
76
+ # Run
77
+ logits, = model(input_image.to('cuda').unsqueeze(0), None)
78
+ probs = F.softmax(logits, dim=1)
79
+ prediction = torch.argmax(probs.cpu(), dim=1)
80
+
81
+ return prediction.item() == 1
82
+
83
+
84
+ def yolo_predict(image: Image.Image) -> Image.Image:
85
+ results = yolo_model(image, imgsz=1024, augment=True, iou=0.5)
86
+ assert len(results) == 1
87
+ result = results[0]
88
+ im_array = result.plot()
89
+ im = Image.fromarray(im_array[..., ::-1])
90
+
91
+ return im
92
+
93
+
94
+ def predict(image: Image.Image, conf_threshold: float):
95
+ # OWLv2
96
+ owl_prediction = owl_predict(image)
97
+ label_owl = "Watermarked" if owl_prediction else "Not Watermarked"
98
+
99
+ # YOLO
100
+ yolo_image = yolo_predict(image)
101
+
102
+ return yolo_image, f"OWLv2 Prediction: {label_owl}"
103
+
104
+
105
+ # Load OWLv2 classification model
106
+ model = DetectorModelOwl("google/owlv2-base-patch16-ensemble", dropout=0.0)
107
+ model.load_state_dict(torch.load("far5y1y5-8000.pt", map_location="cpu"))
108
+ model.eval()
109
+ model.cuda()
110
+
111
+ # Load YOLO model
112
+ yolo_model = YOLO("yolo11x-train28-best.pt")
113
+
114
+
115
+ gradio_app = gr.Blocks()
116
+ with gr.Blocks() as app:
117
+ gr.HTML(
118
+ """
119
+ <h1>Watermark Detection</h1>
120
+ """
121
+ )
122
+
123
+ with gr.Row():
124
+ with gr.Column():
125
+ image = gr.Image(type="pil", label="Image")
126
+ conf_threshold = gr.Slider(minimum=0.0, maximum=1.0, default=0.5, label="Confidence Threshold")
127
+ btn_submit = gr.Button(text="Detect Watermarks")
128
+
129
+ with gr.Column():
130
+ image_yolo = gr.Image(type="pil", label="YOLO Detections")
131
+ label_owl = gr.Label(label="OWLv2 Prediction: N/A")
132
+
133
+
134
+ btn_submit.click(fn=predict, inputs=[image, conf_threshold], outputs=[image_yolo, label_owl])
135
+
136
+
137
+ if __name__ == "__main__":
138
+ app.launch()
far5y1y5-8000.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f600472e0bf8d8051046541230dd736bc862d5c964ff591b719abf7d80ed1835
3
+ size 358478062
requirements.txt ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ ultralytics==8.3.55
2
+ transformers==4.45.2
yolo11x-train28-best.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:80b022e94239a65a3b50173096523f202ca39608b9063dd391d043323a51326e
3
+ size 114512018