Oblix commited on
Commit
f58ee24
1 Parent(s): cbc113a

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +320 -1
README.md CHANGED
@@ -4,4 +4,323 @@ license: gpl-3.0
4
  pipeline_tag: object-detection
5
  ---
6
 
7
- https://huggingface.co/DILHTWD/documentlayoutsegmentation_YOLOv8_ondoclaynet with ONNX weights to be compatible with Transformers.js.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4
  pipeline_tag: object-detection
5
  ---
6
 
7
+ https://huggingface.co/DILHTWD/documentlayoutsegmentation_YOLOv8_ondoclaynet with ONNX weights to be compatible with Transformers.js.
8
+
9
+ ## Usage (Transformers.js)
10
+
11
+ If you haven't already, you can install the [Transformers.js](https://huggingface.co/docs/transformers.js) JavaScript library from [NPM](https://www.npmjs.com/package/@xenova/transformers) using:
12
+ ```bash
13
+ npm i @xenova/transformers
14
+ ```
15
+
16
+ **Example:** Perform object-detection with `Oblix/yolov8x-doclaynet_ONNX`.
17
+
18
+ ```js
19
+ import { AutoModel, AutoProcessor, RawImage } from '@xenova/transformers';
20
+
21
+ const model = await AutoModel.from_pretrained(
22
+ "Oblix/yolov8x-doclaynet_ONNX",
23
+ {
24
+ quantized: false,
25
+ }
26
+ );
27
+ const processor = await AutoProcessor.from_pretrained("Oblix/yolov8x-doclaynet_ONNX");
28
+
29
+ const url = 'https://huggingface.co/DILHTWD/documentlayoutsegmentation_YOLOv8_ondoclaynet/resolve/main/sample1.png';
30
+ const rawImage = await RawImage.fromURL(url);
31
+ const { pixel_values } = await processor(rawImage);
32
+ const output = await model({ images: pixel_values });
33
+
34
+ // Post-process:
35
+ const permuted = output.output0[0].transpose(1, 0);
36
+ // `permuted` is a Tensor of shape [ 8400, 15 ]:
37
+ // - 8400 potential bounding boxes
38
+ // - 15 parameters for each box:
39
+ // - first 4 are coordinates for the bounding boxes (x-center, y-center, width, height)
40
+ // - the remaining 11 are the probabilities for each class
41
+
42
+ // Example code to format it nicely:
43
+ const result = [];
44
+ const threshold = 0.5;
45
+ const [scaledHeight, scaledWidth] = pixel_values.dims.slice(-2);
46
+ for (const [xc, yc, w, h, ...scores] of permuted.tolist()) {
47
+
48
+ // Get pixel values, taking into account the original image size
49
+ const x1 = (xc - w/2) / scaledWidth * rawImage.width;
50
+ const y1 = (yc - h/2) / scaledHeight * rawImage.height;
51
+ const x2 = (xc + w/2) / scaledWidth * rawImage.width;
52
+ const y2 = (yc + h/2) / scaledHeight * rawImage.height;
53
+
54
+ // Get best class
55
+ const argmax = scores.reduce((maxIndex, currentVal, currentIndex, arr) => currentVal > arr[maxIndex] ? currentIndex : maxIndex, 0);
56
+ const score = scores[argmax];
57
+ if (score < threshold) continue; // Not confident enough
58
+
59
+ const label = model.config.id2label[argmax];
60
+ result.push({
61
+ x1, x2, y1, y2, score, label, index: argmax,
62
+ });
63
+ }
64
+
65
+ const iouThreshold = 0.5; // Adjust the threshold as needed
66
+ const filteredResults = removeDuplicates(results, iouThreshold);
67
+ console.log(filteredResults);
68
+
69
+ function removeDuplicates(detections, iouThreshold) {
70
+ const filteredDetections = [];
71
+
72
+ for (const detection of detections) {
73
+ let isDuplicate = false;
74
+ let duplicateIndex = -1;
75
+ let maxIoU = 0;
76
+
77
+ for (let i = 0; i < filteredDetections.length; i++) {
78
+ const filteredDetection = filteredDetections[i];
79
+ const iou = calculateIoU(detection, filteredDetection);
80
+ if (iou > iouThreshold) {
81
+ isDuplicate = true;
82
+ if (iou > maxIoU) {
83
+ maxIoU = iou;
84
+ duplicateIndex = i;
85
+ }
86
+ }
87
+ }
88
+
89
+ if (!isDuplicate) {
90
+ filteredDetections.push(detection);
91
+ } else if (duplicateIndex !== -1) {
92
+ if (detection.score > filteredDetections[duplicateIndex].score) {
93
+ filteredDetections[duplicateIndex] = detection;
94
+ }
95
+ }
96
+ }
97
+
98
+ return filteredDetections;
99
+ }
100
+
101
+ function calculateIoU(detection1, detection2) {
102
+ const xOverlap = Math.max(0, Math.min(detection1.x2, detection2.x2) - Math.max(detection1.x1, detection2.x1));
103
+ const yOverlap = Math.max(0, Math.min(detection1.y2, detection2.y2) - Math.max(detection1.y1, detection2.y1));
104
+ const overlapArea = xOverlap * yOverlap;
105
+
106
+ const area1 = (detection1.x2 - detection1.x1) * (detection1.y2 - detection1.y1);
107
+ const area2 = (detection2.x2 - detection2.x1) * (detection2.y2 - detection2.y1);
108
+ const unionArea = area1 + area2 - overlapArea;
109
+
110
+ return overlapArea / unionArea;
111
+ }
112
+ ```
113
+
114
+ **Result**
115
+ ```
116
+ [
117
+ {
118
+ "x1": 54.53195288479328,
119
+ "y1": 170.06781649589539,
120
+ "x2": 95.52642979323865,
121
+ "y2": 186.62115139961244,
122
+ "score": 0.8901662826538086,
123
+ "label": "Text",
124
+ "index": 9
125
+ },
126
+ {
127
+ "x1": 53.96503926515579,
128
+ "y1": 195.67131299972536,
129
+ "x2": 221.8717828631401,
130
+ "y2": 212.6188931465149,
131
+ "score": 0.8967247605323792,
132
+ "label": "Text",
133
+ "index": 9
134
+ },
135
+ {
136
+ "x1": 54.53195288479328,
137
+ "y1": 221.1506155014038,
138
+ "x2": 98.4759178608656,
139
+ "y2": 238.44384784698488,
140
+ "score": 0.8795284032821655,
141
+ "label": "Text",
142
+ "index": 9
143
+ },
144
+ {
145
+ "x1": 55.731045877933504,
146
+ "y1": 338.1506155014038,
147
+ "x2": 103.58089088201523,
148
+ "y2": 355.22782917022704,
149
+ "score": 0.9104153513908386,
150
+ "label": "Section-header",
151
+ "index": 7
152
+ },
153
+ {
154
+ "x1": 54.501348263025285,
155
+ "y1": 452.59601612091063,
156
+ "x2": 144.76493505835532,
157
+ "y2": 469.1547849655152,
158
+ "score": 0.9181555509567261,
159
+ "label": "Section-header",
160
+ "index": 7
161
+ },
162
+ {
163
+ "x1": 54.37510642111301,
164
+ "y1": 568.1918724060059,
165
+ "x2": 73.67877252995967,
166
+ "y2": 584.1619010925293,
167
+ "score": 0.899300754070282,
168
+ "label": "Section-header",
169
+ "index": 7
170
+ },
171
+ {
172
+ "x1": 54.27563991844654,
173
+ "y1": 840.2569072723389,
174
+ "x2": 70.35437833964825,
175
+ "y2": 859.4512378692626,
176
+ "score": 0.6805046796798706,
177
+ "label": "Section-header",
178
+ "index": 7
179
+ },
180
+ {
181
+ "x1": 309.2861147403717,
182
+ "y1": 908.7717830657958,
183
+ "x2": 373.8879840373993,
184
+ "y2": 922.6841892242431,
185
+ "score": 0.8969672918319702,
186
+ "label": "Page-footer",
187
+ "index": 4
188
+ },
189
+ {
190
+ "x1": 311.53335428237915,
191
+ "y1": 10.31740515232086,
192
+ "x2": 607.2475433349609,
193
+ "y2": 33.85392036437988,
194
+ "score": 0.9498511552810669,
195
+ "label": "Page-header",
196
+ "index": 5
197
+ },
198
+ {
199
+ "x1": 56.66784882545471,
200
+ "y1": 289.38916368484496,
201
+ "x2": 416.7734823703766,
202
+ "y2": 306.94164075851444,
203
+ "score": 0.856067419052124,
204
+ "label": "Text",
205
+ "index": 9
206
+ },
207
+ {
208
+ "x1": 56.03344459533691,
209
+ "y1": 309.5055012702942,
210
+ "x2": 317.7232768535614,
211
+ "y2": 325.49175367355343,
212
+ "score": 0.8314194083213806,
213
+ "label": "Text",
214
+ "index": 9
215
+ },
216
+ {
217
+ "x1": 53.00637502670288,
218
+ "y1": 429.9619674682617,
219
+ "x2": 414.61163306236267,
220
+ "y2": 445.95904312133786,
221
+ "score": 0.8927980661392212,
222
+ "label": "Text",
223
+ "index": 9
224
+ },
225
+ {
226
+ "x1": 55.619012689590456,
227
+ "y1": 638.6609138488769,
228
+ "x2": 384.32462439537045,
229
+ "y2": 656.8182655334473,
230
+ "score": 0.9029342532157898,
231
+ "label": "List-item",
232
+ "index": 3
233
+ },
234
+ {
235
+ "x1": 58.06927928924561,
236
+ "y1": 794.932172012329,
237
+ "x2": 520.523375415802,
238
+ "y2": 811.1884700775146,
239
+ "score": 0.9037705063819885,
240
+ "label": "List-item",
241
+ "index": 3
242
+ },
243
+ {
244
+ "x1": 54.25830144882202,
245
+ "y1": 76.01902542114259,
246
+ "x2": 552.8331304550171,
247
+ "y2": 158.67227897644042,
248
+ "score": 0.9725438356399536,
249
+ "label": "Title",
250
+ "index": 10
251
+ },
252
+ {
253
+ "x1": 53.636448097229,
254
+ "y1": 244.93504171371458,
255
+ "x2": 610.1452471733094,
256
+ "y2": 274.8768593788147,
257
+ "score": 0.8954038619995117,
258
+ "label": "Text",
259
+ "index": 9
260
+ },
261
+ {
262
+ "x1": 54.76330833435059,
263
+ "y1": 364.74734601974484,
264
+ "x2": 625.0439935684204,
265
+ "y2": 405.74994478225705,
266
+ "score": 0.7930819988250732,
267
+ "label": "Text",
268
+ "index": 9
269
+ },
270
+ {
271
+ "x1": 55.78299608230591,
272
+ "y1": 480.10940895080563,
273
+ "x2": 623.4623931884765,
274
+ "y2": 556.692225265503,
275
+ "score": 0.9482676982879639,
276
+ "label": "Text",
277
+ "index": 9
278
+ },
279
+ {
280
+ "x1": 52.160629177093504,
281
+ "y1": 593.5841983795166,
282
+ "x2": 609.7405840873719,
283
+ "y2": 635.7749668121338,
284
+ "score": 0.9440742135047913,
285
+ "label": "Text",
286
+ "index": 9
287
+ },
288
+ {
289
+ "x1": 53.12467575073242,
290
+ "y1": 654.1885282516479,
291
+ "x2": 615.2034725189209,
292
+ "y2": 697.286619758606,
293
+ "score": 0.9134702086448669,
294
+ "label": "List-item",
295
+ "index": 3
296
+ },
297
+ {
298
+ "x1": 52.52786092758179,
299
+ "y1": 712.9350305557251,
300
+ "x2": 622.7321027755737,
301
+ "y2": 754.2832815170287,
302
+ "score": 0.9259238243103027,
303
+ "label": "Text",
304
+ "index": 9
305
+ },
306
+ {
307
+ "x1": 56.837522792816166,
308
+ "y1": 758.6981185913086,
309
+ "x2": 607.179635810852,
310
+ "y2": 787.9486541748047,
311
+ "score": 0.9015638828277588,
312
+ "label": "List-item",
313
+ "index": 3
314
+ },
315
+ {
316
+ "x1": 56.57186779975891,
317
+ "y1": 810.8556049346925,
318
+ "x2": 446.48612236976624,
319
+ "y2": 828.0084697723388,
320
+ "score": 0.8806689977645874,
321
+ "label": "List-item",
322
+ "index": 3
323
+ }
324
+ ]
325
+ ```
326
+ ![image/png](https://cdn-uploads.huggingface.co/production/uploads/64bad74f94c0e3be4aa7cd76/MIja7FCRhuXsjpv25_A8u.png)