--- library_name: transformers.js license: gpl-3.0 pipeline_tag: object-detection --- https://huggingface.co/DILHTWD/documentlayoutsegmentation_YOLOv8_ondoclaynet with ONNX weights to be compatible with Transformers.js. ## Usage (Transformers.js) If you haven't already, you can install the [Transformers.js](https://huggingface.co/docs/transformers.js) JavaScript library from [NPM](https://www.npmjs.com/package/@xenova/transformers) using: ```bash npm i @xenova/transformers ``` **Example:** Perform object-detection with `Oblix/yolov8x-doclaynet_ONNX`. ```js import { AutoModel, AutoProcessor, RawImage } from '@xenova/transformers'; const model = await AutoModel.from_pretrained( "Oblix/yolov8x-doclaynet_ONNX", { quantized: false, } ); const processor = await AutoProcessor.from_pretrained("Oblix/yolov8x-doclaynet_ONNX"); const url = 'https://huggingface.co/DILHTWD/documentlayoutsegmentation_YOLOv8_ondoclaynet/resolve/main/sample1.png'; const rawImage = await RawImage.fromURL(url); const { pixel_values } = await processor(rawImage); const output = await model({ images: pixel_values }); // Post-process: const permuted = output.output0[0].transpose(1, 0); // `permuted` is a Tensor of shape [ 8400, 15 ]: // - 8400 potential bounding boxes // - 15 parameters for each box: // - first 4 are coordinates for the bounding boxes (x-center, y-center, width, height) // - the remaining 11 are the probabilities for each class // Example code to format it nicely: const results = []; const threshold = 0.5; // Adjust the threshold as needed const [scaledHeight, scaledWidth] = pixel_values.dims.slice(-2); for (const [xc, yc, w, h, ...scores] of permuted.tolist()) { // Get pixel values, taking into account the original image size const x1 = (xc - w/2) / scaledWidth * rawImage.width; const y1 = (yc - h/2) / scaledHeight * rawImage.height; const x2 = (xc + w/2) / scaledWidth * rawImage.width; const y2 = (yc + h/2) / scaledHeight * rawImage.height; // Get best class const argmax = scores.reduce((maxIndex, currentVal, currentIndex, arr) => currentVal > arr[maxIndex] ? currentIndex : maxIndex, 0); const score = scores[argmax]; if (score < threshold) continue; // Not confident enough const label = model.config.id2label[argmax]; results.push({ x1, x2, y1, y2, score, label, index: argmax, }); } const iouThreshold = 0.5; // Adjust the threshold as needed const filteredResults = removeDuplicates(results, iouThreshold); console.log(filteredResults); function removeDuplicates(detections, iouThreshold) { const filteredDetections = []; for (const detection of detections) { let isDuplicate = false; let duplicateIndex = -1; let maxIoU = 0; for (let i = 0; i < filteredDetections.length; i++) { const filteredDetection = filteredDetections[i]; const iou = calculateIoU(detection, filteredDetection); if (iou > iouThreshold) { isDuplicate = true; if (iou > maxIoU) { maxIoU = iou; duplicateIndex = i; } } } if (!isDuplicate) { filteredDetections.push(detection); } else if (duplicateIndex !== -1) { if (detection.score > filteredDetections[duplicateIndex].score) { filteredDetections[duplicateIndex] = detection; } } } return filteredDetections; } function calculateIoU(detection1, detection2) { const xOverlap = Math.max(0, Math.min(detection1.x2, detection2.x2) - Math.max(detection1.x1, detection2.x1)); const yOverlap = Math.max(0, Math.min(detection1.y2, detection2.y2) - Math.max(detection1.y1, detection2.y1)); const overlapArea = xOverlap * yOverlap; const area1 = (detection1.x2 - detection1.x1) * (detection1.y2 - detection1.y1); const area2 = (detection2.x2 - detection2.x1) * (detection2.y2 - detection2.y1); const unionArea = area1 + area2 - overlapArea; return overlapArea / unionArea; } ``` **Result** ``` [ { "x1": 54.53195288479328, "y1": 170.06781649589539, "x2": 95.52642979323865, "y2": 186.62115139961244, "score": 0.8901662826538086, "label": "Text", "index": 9 }, { "x1": 53.96503926515579, "y1": 195.67131299972536, "x2": 221.8717828631401, "y2": 212.6188931465149, "score": 0.8967247605323792, "label": "Text", "index": 9 }, { "x1": 54.53195288479328, "y1": 221.1506155014038, "x2": 98.4759178608656, "y2": 238.44384784698488, "score": 0.8795284032821655, "label": "Text", "index": 9 }, { "x1": 55.731045877933504, "y1": 338.1506155014038, "x2": 103.58089088201523, "y2": 355.22782917022704, "score": 0.9104153513908386, "label": "Section-header", "index": 7 }, { "x1": 54.501348263025285, "y1": 452.59601612091063, "x2": 144.76493505835532, "y2": 469.1547849655152, "score": 0.9181555509567261, "label": "Section-header", "index": 7 }, { "x1": 54.37510642111301, "y1": 568.1918724060059, "x2": 73.67877252995967, "y2": 584.1619010925293, "score": 0.899300754070282, "label": "Section-header", "index": 7 }, { "x1": 54.27563991844654, "y1": 840.2569072723389, "x2": 70.35437833964825, "y2": 859.4512378692626, "score": 0.6805046796798706, "label": "Section-header", "index": 7 }, { "x1": 309.2861147403717, "y1": 908.7717830657958, "x2": 373.8879840373993, "y2": 922.6841892242431, "score": 0.8969672918319702, "label": "Page-footer", "index": 4 }, { "x1": 311.53335428237915, "y1": 10.31740515232086, "x2": 607.2475433349609, "y2": 33.85392036437988, "score": 0.9498511552810669, "label": "Page-header", "index": 5 }, { "x1": 56.66784882545471, "y1": 289.38916368484496, "x2": 416.7734823703766, "y2": 306.94164075851444, "score": 0.856067419052124, "label": "Text", "index": 9 }, { "x1": 56.03344459533691, "y1": 309.5055012702942, "x2": 317.7232768535614, "y2": 325.49175367355343, "score": 0.8314194083213806, "label": "Text", "index": 9 }, { "x1": 53.00637502670288, "y1": 429.9619674682617, "x2": 414.61163306236267, "y2": 445.95904312133786, "score": 0.8927980661392212, "label": "Text", "index": 9 }, { "x1": 55.619012689590456, "y1": 638.6609138488769, "x2": 384.32462439537045, "y2": 656.8182655334473, "score": 0.9029342532157898, "label": "List-item", "index": 3 }, { "x1": 58.06927928924561, "y1": 794.932172012329, "x2": 520.523375415802, "y2": 811.1884700775146, "score": 0.9037705063819885, "label": "List-item", "index": 3 }, { "x1": 54.25830144882202, "y1": 76.01902542114259, "x2": 552.8331304550171, "y2": 158.67227897644042, "score": 0.9725438356399536, "label": "Title", "index": 10 }, { "x1": 53.636448097229, "y1": 244.93504171371458, "x2": 610.1452471733094, "y2": 274.8768593788147, "score": 0.8954038619995117, "label": "Text", "index": 9 }, { "x1": 54.76330833435059, "y1": 364.74734601974484, "x2": 625.0439935684204, "y2": 405.74994478225705, "score": 0.7930819988250732, "label": "Text", "index": 9 }, { "x1": 55.78299608230591, "y1": 480.10940895080563, "x2": 623.4623931884765, "y2": 556.692225265503, "score": 0.9482676982879639, "label": "Text", "index": 9 }, { "x1": 52.160629177093504, "y1": 593.5841983795166, "x2": 609.7405840873719, "y2": 635.7749668121338, "score": 0.9440742135047913, "label": "Text", "index": 9 }, { "x1": 53.12467575073242, "y1": 654.1885282516479, "x2": 615.2034725189209, "y2": 697.286619758606, "score": 0.9134702086448669, "label": "List-item", "index": 3 }, { "x1": 52.52786092758179, "y1": 712.9350305557251, "x2": 622.7321027755737, "y2": 754.2832815170287, "score": 0.9259238243103027, "label": "Text", "index": 9 }, { "x1": 56.837522792816166, "y1": 758.6981185913086, "x2": 607.179635810852, "y2": 787.9486541748047, "score": 0.9015638828277588, "label": "List-item", "index": 3 }, { "x1": 56.57186779975891, "y1": 810.8556049346925, "x2": 446.48612236976624, "y2": 828.0084697723388, "score": 0.8806689977645874, "label": "List-item", "index": 3 } ] ``` ![image/png](https://cdn-uploads.huggingface.co/production/uploads/64bad74f94c0e3be4aa7cd76/MIja7FCRhuXsjpv25_A8u.png) ## Labels - Caption - Footnote - Formula - List-item - Page-footer - Page-header - Picture - Section-header - Table - Text - Title