Update README.md
Browse files
README.md
CHANGED
@@ -4,4 +4,323 @@ license: gpl-3.0
|
|
4 |
pipeline_tag: object-detection
|
5 |
---
|
6 |
|
7 |
-
https://huggingface.co/DILHTWD/documentlayoutsegmentation_YOLOv8_ondoclaynet with ONNX weights to be compatible with Transformers.js.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
4 |
pipeline_tag: object-detection
|
5 |
---
|
6 |
|
7 |
+
https://huggingface.co/DILHTWD/documentlayoutsegmentation_YOLOv8_ondoclaynet with ONNX weights to be compatible with Transformers.js.
|
8 |
+
|
9 |
+
## Usage (Transformers.js)
|
10 |
+
|
11 |
+
If you haven't already, you can install the [Transformers.js](https://huggingface.co/docs/transformers.js) JavaScript library from [NPM](https://www.npmjs.com/package/@xenova/transformers) using:
|
12 |
+
```bash
|
13 |
+
npm i @xenova/transformers
|
14 |
+
```
|
15 |
+
|
16 |
+
**Example:** Perform object-detection with `Oblix/yolov8x-doclaynet_ONNX`.
|
17 |
+
|
18 |
+
```js
|
19 |
+
import { AutoModel, AutoProcessor, RawImage } from '@xenova/transformers';
|
20 |
+
|
21 |
+
const model = await AutoModel.from_pretrained(
|
22 |
+
"Oblix/yolov8x-doclaynet_ONNX",
|
23 |
+
{
|
24 |
+
quantized: false,
|
25 |
+
}
|
26 |
+
);
|
27 |
+
const processor = await AutoProcessor.from_pretrained("Oblix/yolov8x-doclaynet_ONNX");
|
28 |
+
|
29 |
+
const url = 'https://huggingface.co/DILHTWD/documentlayoutsegmentation_YOLOv8_ondoclaynet/resolve/main/sample1.png';
|
30 |
+
const rawImage = await RawImage.fromURL(url);
|
31 |
+
const { pixel_values } = await processor(rawImage);
|
32 |
+
const output = await model({ images: pixel_values });
|
33 |
+
|
34 |
+
// Post-process:
|
35 |
+
const permuted = output.output0[0].transpose(1, 0);
|
36 |
+
// `permuted` is a Tensor of shape [ 8400, 15 ]:
|
37 |
+
// - 8400 potential bounding boxes
|
38 |
+
// - 15 parameters for each box:
|
39 |
+
// - first 4 are coordinates for the bounding boxes (x-center, y-center, width, height)
|
40 |
+
// - the remaining 11 are the probabilities for each class
|
41 |
+
|
42 |
+
// Example code to format it nicely:
|
43 |
+
const result = [];
|
44 |
+
const threshold = 0.5;
|
45 |
+
const [scaledHeight, scaledWidth] = pixel_values.dims.slice(-2);
|
46 |
+
for (const [xc, yc, w, h, ...scores] of permuted.tolist()) {
|
47 |
+
|
48 |
+
// Get pixel values, taking into account the original image size
|
49 |
+
const x1 = (xc - w/2) / scaledWidth * rawImage.width;
|
50 |
+
const y1 = (yc - h/2) / scaledHeight * rawImage.height;
|
51 |
+
const x2 = (xc + w/2) / scaledWidth * rawImage.width;
|
52 |
+
const y2 = (yc + h/2) / scaledHeight * rawImage.height;
|
53 |
+
|
54 |
+
// Get best class
|
55 |
+
const argmax = scores.reduce((maxIndex, currentVal, currentIndex, arr) => currentVal > arr[maxIndex] ? currentIndex : maxIndex, 0);
|
56 |
+
const score = scores[argmax];
|
57 |
+
if (score < threshold) continue; // Not confident enough
|
58 |
+
|
59 |
+
const label = model.config.id2label[argmax];
|
60 |
+
result.push({
|
61 |
+
x1, x2, y1, y2, score, label, index: argmax,
|
62 |
+
});
|
63 |
+
}
|
64 |
+
|
65 |
+
const iouThreshold = 0.5; // Adjust the threshold as needed
|
66 |
+
const filteredResults = removeDuplicates(results, iouThreshold);
|
67 |
+
console.log(filteredResults);
|
68 |
+
|
69 |
+
function removeDuplicates(detections, iouThreshold) {
|
70 |
+
const filteredDetections = [];
|
71 |
+
|
72 |
+
for (const detection of detections) {
|
73 |
+
let isDuplicate = false;
|
74 |
+
let duplicateIndex = -1;
|
75 |
+
let maxIoU = 0;
|
76 |
+
|
77 |
+
for (let i = 0; i < filteredDetections.length; i++) {
|
78 |
+
const filteredDetection = filteredDetections[i];
|
79 |
+
const iou = calculateIoU(detection, filteredDetection);
|
80 |
+
if (iou > iouThreshold) {
|
81 |
+
isDuplicate = true;
|
82 |
+
if (iou > maxIoU) {
|
83 |
+
maxIoU = iou;
|
84 |
+
duplicateIndex = i;
|
85 |
+
}
|
86 |
+
}
|
87 |
+
}
|
88 |
+
|
89 |
+
if (!isDuplicate) {
|
90 |
+
filteredDetections.push(detection);
|
91 |
+
} else if (duplicateIndex !== -1) {
|
92 |
+
if (detection.score > filteredDetections[duplicateIndex].score) {
|
93 |
+
filteredDetections[duplicateIndex] = detection;
|
94 |
+
}
|
95 |
+
}
|
96 |
+
}
|
97 |
+
|
98 |
+
return filteredDetections;
|
99 |
+
}
|
100 |
+
|
101 |
+
function calculateIoU(detection1, detection2) {
|
102 |
+
const xOverlap = Math.max(0, Math.min(detection1.x2, detection2.x2) - Math.max(detection1.x1, detection2.x1));
|
103 |
+
const yOverlap = Math.max(0, Math.min(detection1.y2, detection2.y2) - Math.max(detection1.y1, detection2.y1));
|
104 |
+
const overlapArea = xOverlap * yOverlap;
|
105 |
+
|
106 |
+
const area1 = (detection1.x2 - detection1.x1) * (detection1.y2 - detection1.y1);
|
107 |
+
const area2 = (detection2.x2 - detection2.x1) * (detection2.y2 - detection2.y1);
|
108 |
+
const unionArea = area1 + area2 - overlapArea;
|
109 |
+
|
110 |
+
return overlapArea / unionArea;
|
111 |
+
}
|
112 |
+
```
|
113 |
+
|
114 |
+
**Result**
|
115 |
+
```
|
116 |
+
[
|
117 |
+
{
|
118 |
+
"x1": 54.53195288479328,
|
119 |
+
"y1": 170.06781649589539,
|
120 |
+
"x2": 95.52642979323865,
|
121 |
+
"y2": 186.62115139961244,
|
122 |
+
"score": 0.8901662826538086,
|
123 |
+
"label": "Text",
|
124 |
+
"index": 9
|
125 |
+
},
|
126 |
+
{
|
127 |
+
"x1": 53.96503926515579,
|
128 |
+
"y1": 195.67131299972536,
|
129 |
+
"x2": 221.8717828631401,
|
130 |
+
"y2": 212.6188931465149,
|
131 |
+
"score": 0.8967247605323792,
|
132 |
+
"label": "Text",
|
133 |
+
"index": 9
|
134 |
+
},
|
135 |
+
{
|
136 |
+
"x1": 54.53195288479328,
|
137 |
+
"y1": 221.1506155014038,
|
138 |
+
"x2": 98.4759178608656,
|
139 |
+
"y2": 238.44384784698488,
|
140 |
+
"score": 0.8795284032821655,
|
141 |
+
"label": "Text",
|
142 |
+
"index": 9
|
143 |
+
},
|
144 |
+
{
|
145 |
+
"x1": 55.731045877933504,
|
146 |
+
"y1": 338.1506155014038,
|
147 |
+
"x2": 103.58089088201523,
|
148 |
+
"y2": 355.22782917022704,
|
149 |
+
"score": 0.9104153513908386,
|
150 |
+
"label": "Section-header",
|
151 |
+
"index": 7
|
152 |
+
},
|
153 |
+
{
|
154 |
+
"x1": 54.501348263025285,
|
155 |
+
"y1": 452.59601612091063,
|
156 |
+
"x2": 144.76493505835532,
|
157 |
+
"y2": 469.1547849655152,
|
158 |
+
"score": 0.9181555509567261,
|
159 |
+
"label": "Section-header",
|
160 |
+
"index": 7
|
161 |
+
},
|
162 |
+
{
|
163 |
+
"x1": 54.37510642111301,
|
164 |
+
"y1": 568.1918724060059,
|
165 |
+
"x2": 73.67877252995967,
|
166 |
+
"y2": 584.1619010925293,
|
167 |
+
"score": 0.899300754070282,
|
168 |
+
"label": "Section-header",
|
169 |
+
"index": 7
|
170 |
+
},
|
171 |
+
{
|
172 |
+
"x1": 54.27563991844654,
|
173 |
+
"y1": 840.2569072723389,
|
174 |
+
"x2": 70.35437833964825,
|
175 |
+
"y2": 859.4512378692626,
|
176 |
+
"score": 0.6805046796798706,
|
177 |
+
"label": "Section-header",
|
178 |
+
"index": 7
|
179 |
+
},
|
180 |
+
{
|
181 |
+
"x1": 309.2861147403717,
|
182 |
+
"y1": 908.7717830657958,
|
183 |
+
"x2": 373.8879840373993,
|
184 |
+
"y2": 922.6841892242431,
|
185 |
+
"score": 0.8969672918319702,
|
186 |
+
"label": "Page-footer",
|
187 |
+
"index": 4
|
188 |
+
},
|
189 |
+
{
|
190 |
+
"x1": 311.53335428237915,
|
191 |
+
"y1": 10.31740515232086,
|
192 |
+
"x2": 607.2475433349609,
|
193 |
+
"y2": 33.85392036437988,
|
194 |
+
"score": 0.9498511552810669,
|
195 |
+
"label": "Page-header",
|
196 |
+
"index": 5
|
197 |
+
},
|
198 |
+
{
|
199 |
+
"x1": 56.66784882545471,
|
200 |
+
"y1": 289.38916368484496,
|
201 |
+
"x2": 416.7734823703766,
|
202 |
+
"y2": 306.94164075851444,
|
203 |
+
"score": 0.856067419052124,
|
204 |
+
"label": "Text",
|
205 |
+
"index": 9
|
206 |
+
},
|
207 |
+
{
|
208 |
+
"x1": 56.03344459533691,
|
209 |
+
"y1": 309.5055012702942,
|
210 |
+
"x2": 317.7232768535614,
|
211 |
+
"y2": 325.49175367355343,
|
212 |
+
"score": 0.8314194083213806,
|
213 |
+
"label": "Text",
|
214 |
+
"index": 9
|
215 |
+
},
|
216 |
+
{
|
217 |
+
"x1": 53.00637502670288,
|
218 |
+
"y1": 429.9619674682617,
|
219 |
+
"x2": 414.61163306236267,
|
220 |
+
"y2": 445.95904312133786,
|
221 |
+
"score": 0.8927980661392212,
|
222 |
+
"label": "Text",
|
223 |
+
"index": 9
|
224 |
+
},
|
225 |
+
{
|
226 |
+
"x1": 55.619012689590456,
|
227 |
+
"y1": 638.6609138488769,
|
228 |
+
"x2": 384.32462439537045,
|
229 |
+
"y2": 656.8182655334473,
|
230 |
+
"score": 0.9029342532157898,
|
231 |
+
"label": "List-item",
|
232 |
+
"index": 3
|
233 |
+
},
|
234 |
+
{
|
235 |
+
"x1": 58.06927928924561,
|
236 |
+
"y1": 794.932172012329,
|
237 |
+
"x2": 520.523375415802,
|
238 |
+
"y2": 811.1884700775146,
|
239 |
+
"score": 0.9037705063819885,
|
240 |
+
"label": "List-item",
|
241 |
+
"index": 3
|
242 |
+
},
|
243 |
+
{
|
244 |
+
"x1": 54.25830144882202,
|
245 |
+
"y1": 76.01902542114259,
|
246 |
+
"x2": 552.8331304550171,
|
247 |
+
"y2": 158.67227897644042,
|
248 |
+
"score": 0.9725438356399536,
|
249 |
+
"label": "Title",
|
250 |
+
"index": 10
|
251 |
+
},
|
252 |
+
{
|
253 |
+
"x1": 53.636448097229,
|
254 |
+
"y1": 244.93504171371458,
|
255 |
+
"x2": 610.1452471733094,
|
256 |
+
"y2": 274.8768593788147,
|
257 |
+
"score": 0.8954038619995117,
|
258 |
+
"label": "Text",
|
259 |
+
"index": 9
|
260 |
+
},
|
261 |
+
{
|
262 |
+
"x1": 54.76330833435059,
|
263 |
+
"y1": 364.74734601974484,
|
264 |
+
"x2": 625.0439935684204,
|
265 |
+
"y2": 405.74994478225705,
|
266 |
+
"score": 0.7930819988250732,
|
267 |
+
"label": "Text",
|
268 |
+
"index": 9
|
269 |
+
},
|
270 |
+
{
|
271 |
+
"x1": 55.78299608230591,
|
272 |
+
"y1": 480.10940895080563,
|
273 |
+
"x2": 623.4623931884765,
|
274 |
+
"y2": 556.692225265503,
|
275 |
+
"score": 0.9482676982879639,
|
276 |
+
"label": "Text",
|
277 |
+
"index": 9
|
278 |
+
},
|
279 |
+
{
|
280 |
+
"x1": 52.160629177093504,
|
281 |
+
"y1": 593.5841983795166,
|
282 |
+
"x2": 609.7405840873719,
|
283 |
+
"y2": 635.7749668121338,
|
284 |
+
"score": 0.9440742135047913,
|
285 |
+
"label": "Text",
|
286 |
+
"index": 9
|
287 |
+
},
|
288 |
+
{
|
289 |
+
"x1": 53.12467575073242,
|
290 |
+
"y1": 654.1885282516479,
|
291 |
+
"x2": 615.2034725189209,
|
292 |
+
"y2": 697.286619758606,
|
293 |
+
"score": 0.9134702086448669,
|
294 |
+
"label": "List-item",
|
295 |
+
"index": 3
|
296 |
+
},
|
297 |
+
{
|
298 |
+
"x1": 52.52786092758179,
|
299 |
+
"y1": 712.9350305557251,
|
300 |
+
"x2": 622.7321027755737,
|
301 |
+
"y2": 754.2832815170287,
|
302 |
+
"score": 0.9259238243103027,
|
303 |
+
"label": "Text",
|
304 |
+
"index": 9
|
305 |
+
},
|
306 |
+
{
|
307 |
+
"x1": 56.837522792816166,
|
308 |
+
"y1": 758.6981185913086,
|
309 |
+
"x2": 607.179635810852,
|
310 |
+
"y2": 787.9486541748047,
|
311 |
+
"score": 0.9015638828277588,
|
312 |
+
"label": "List-item",
|
313 |
+
"index": 3
|
314 |
+
},
|
315 |
+
{
|
316 |
+
"x1": 56.57186779975891,
|
317 |
+
"y1": 810.8556049346925,
|
318 |
+
"x2": 446.48612236976624,
|
319 |
+
"y2": 828.0084697723388,
|
320 |
+
"score": 0.8806689977645874,
|
321 |
+
"label": "List-item",
|
322 |
+
"index": 3
|
323 |
+
}
|
324 |
+
]
|
325 |
+
```
|
326 |
+
![image/png](https://cdn-uploads.huggingface.co/production/uploads/64bad74f94c0e3be4aa7cd76/MIja7FCRhuXsjpv25_A8u.png)
|