|
--- |
|
base_model: DILHTWD/documentlayoutsegmentation_YOLOv8_ondoclaynet |
|
library_name: transformers.js |
|
license: gpl-3.0 |
|
pipeline_tag: object-detection |
|
--- |
|
|
|
https://huggingface.co/DILHTWD/documentlayoutsegmentation_YOLOv8_ondoclaynet with ONNX weights to be compatible with Transformers.js. |
|
|
|
## Usage (Transformers.js) |
|
|
|
If you haven't already, you can install the [Transformers.js](https://huggingface.co/docs/transformers.js) JavaScript library from [NPM](https://www.npmjs.com/package/@xenova/transformers) using: |
|
```bash |
|
npm i @xenova/transformers |
|
``` |
|
|
|
**Example:** Perform object-detection with `Oblix/yolov8x-doclaynet_ONNX`. |
|
|
|
```js |
|
import { AutoModel, AutoProcessor, RawImage } from '@xenova/transformers'; |
|
|
|
const model = await AutoModel.from_pretrained( |
|
"Oblix/yolov8x-doclaynet_ONNX", |
|
{ |
|
quantized: false, |
|
} |
|
); |
|
const processor = await AutoProcessor.from_pretrained("Oblix/yolov8x-doclaynet_ONNX"); |
|
|
|
const url = 'https://huggingface.co/DILHTWD/documentlayoutsegmentation_YOLOv8_ondoclaynet/resolve/main/sample1.png'; |
|
const rawImage = await RawImage.fromURL(url); |
|
const { pixel_values } = await processor(rawImage); |
|
const output = await model({ images: pixel_values }); |
|
|
|
// Post-process: |
|
const permuted = output.output0[0].transpose(1, 0); |
|
// `permuted` is a Tensor of shape [ 8400, 15 ]: |
|
// - 8400 potential bounding boxes |
|
// - 15 parameters for each box: |
|
// - first 4 are coordinates for the bounding boxes (x-center, y-center, width, height) |
|
// - the remaining 11 are the probabilities for each class |
|
|
|
// Example code to format it nicely: |
|
const results = []; |
|
const threshold = 0.5; // Adjust the threshold as needed |
|
const [scaledHeight, scaledWidth] = pixel_values.dims.slice(-2); |
|
for (const [xc, yc, w, h, ...scores] of permuted.tolist()) { |
|
|
|
// Get pixel values, taking into account the original image size |
|
const x1 = (xc - w/2) / scaledWidth * rawImage.width; |
|
const y1 = (yc - h/2) / scaledHeight * rawImage.height; |
|
const x2 = (xc + w/2) / scaledWidth * rawImage.width; |
|
const y2 = (yc + h/2) / scaledHeight * rawImage.height; |
|
|
|
// Get best class |
|
const argmax = scores.reduce((maxIndex, currentVal, currentIndex, arr) => currentVal > arr[maxIndex] ? currentIndex : maxIndex, 0); |
|
const score = scores[argmax]; |
|
if (score < threshold) continue; // Not confident enough |
|
|
|
const label = model.config.id2label[argmax]; |
|
results.push({ |
|
x1, x2, y1, y2, score, label, index: argmax, |
|
}); |
|
} |
|
|
|
const iouThreshold = 0.5; // Adjust the threshold as needed |
|
const filteredResults = removeDuplicates(results, iouThreshold); |
|
console.log(filteredResults); |
|
|
|
function removeDuplicates(detections, iouThreshold) { |
|
const filteredDetections = []; |
|
|
|
for (const detection of detections) { |
|
let isDuplicate = false; |
|
let duplicateIndex = -1; |
|
let maxIoU = 0; |
|
|
|
for (let i = 0; i < filteredDetections.length; i++) { |
|
const filteredDetection = filteredDetections[i]; |
|
const iou = calculateIoU(detection, filteredDetection); |
|
if (iou > iouThreshold) { |
|
isDuplicate = true; |
|
if (iou > maxIoU) { |
|
maxIoU = iou; |
|
duplicateIndex = i; |
|
} |
|
} |
|
} |
|
|
|
if (!isDuplicate) { |
|
filteredDetections.push(detection); |
|
} else if (duplicateIndex !== -1) { |
|
if (detection.score > filteredDetections[duplicateIndex].score) { |
|
filteredDetections[duplicateIndex] = detection; |
|
} |
|
} |
|
} |
|
|
|
return filteredDetections; |
|
} |
|
|
|
function calculateIoU(detection1, detection2) { |
|
const xOverlap = Math.max(0, Math.min(detection1.x2, detection2.x2) - Math.max(detection1.x1, detection2.x1)); |
|
const yOverlap = Math.max(0, Math.min(detection1.y2, detection2.y2) - Math.max(detection1.y1, detection2.y1)); |
|
const overlapArea = xOverlap * yOverlap; |
|
|
|
const area1 = (detection1.x2 - detection1.x1) * (detection1.y2 - detection1.y1); |
|
const area2 = (detection2.x2 - detection2.x1) * (detection2.y2 - detection2.y1); |
|
const unionArea = area1 + area2 - overlapArea; |
|
|
|
return overlapArea / unionArea; |
|
} |
|
``` |
|
|
|
**Result** |
|
``` |
|
[ |
|
{ |
|
"x1": 54.53195288479328, |
|
"y1": 170.06781649589539, |
|
"x2": 95.52642979323865, |
|
"y2": 186.62115139961244, |
|
"score": 0.8901662826538086, |
|
"label": "Text", |
|
"index": 9 |
|
}, |
|
{ |
|
"x1": 53.96503926515579, |
|
"y1": 195.67131299972536, |
|
"x2": 221.8717828631401, |
|
"y2": 212.6188931465149, |
|
"score": 0.8967247605323792, |
|
"label": "Text", |
|
"index": 9 |
|
}, |
|
{ |
|
"x1": 54.53195288479328, |
|
"y1": 221.1506155014038, |
|
"x2": 98.4759178608656, |
|
"y2": 238.44384784698488, |
|
"score": 0.8795284032821655, |
|
"label": "Text", |
|
"index": 9 |
|
}, |
|
{ |
|
"x1": 55.731045877933504, |
|
"y1": 338.1506155014038, |
|
"x2": 103.58089088201523, |
|
"y2": 355.22782917022704, |
|
"score": 0.9104153513908386, |
|
"label": "Section-header", |
|
"index": 7 |
|
}, |
|
{ |
|
"x1": 54.501348263025285, |
|
"y1": 452.59601612091063, |
|
"x2": 144.76493505835532, |
|
"y2": 469.1547849655152, |
|
"score": 0.9181555509567261, |
|
"label": "Section-header", |
|
"index": 7 |
|
}, |
|
{ |
|
"x1": 54.37510642111301, |
|
"y1": 568.1918724060059, |
|
"x2": 73.67877252995967, |
|
"y2": 584.1619010925293, |
|
"score": 0.899300754070282, |
|
"label": "Section-header", |
|
"index": 7 |
|
}, |
|
{ |
|
"x1": 54.27563991844654, |
|
"y1": 840.2569072723389, |
|
"x2": 70.35437833964825, |
|
"y2": 859.4512378692626, |
|
"score": 0.6805046796798706, |
|
"label": "Section-header", |
|
"index": 7 |
|
}, |
|
{ |
|
"x1": 309.2861147403717, |
|
"y1": 908.7717830657958, |
|
"x2": 373.8879840373993, |
|
"y2": 922.6841892242431, |
|
"score": 0.8969672918319702, |
|
"label": "Page-footer", |
|
"index": 4 |
|
}, |
|
{ |
|
"x1": 311.53335428237915, |
|
"y1": 10.31740515232086, |
|
"x2": 607.2475433349609, |
|
"y2": 33.85392036437988, |
|
"score": 0.9498511552810669, |
|
"label": "Page-header", |
|
"index": 5 |
|
}, |
|
{ |
|
"x1": 56.66784882545471, |
|
"y1": 289.38916368484496, |
|
"x2": 416.7734823703766, |
|
"y2": 306.94164075851444, |
|
"score": 0.856067419052124, |
|
"label": "Text", |
|
"index": 9 |
|
}, |
|
{ |
|
"x1": 56.03344459533691, |
|
"y1": 309.5055012702942, |
|
"x2": 317.7232768535614, |
|
"y2": 325.49175367355343, |
|
"score": 0.8314194083213806, |
|
"label": "Text", |
|
"index": 9 |
|
}, |
|
{ |
|
"x1": 53.00637502670288, |
|
"y1": 429.9619674682617, |
|
"x2": 414.61163306236267, |
|
"y2": 445.95904312133786, |
|
"score": 0.8927980661392212, |
|
"label": "Text", |
|
"index": 9 |
|
}, |
|
{ |
|
"x1": 55.619012689590456, |
|
"y1": 638.6609138488769, |
|
"x2": 384.32462439537045, |
|
"y2": 656.8182655334473, |
|
"score": 0.9029342532157898, |
|
"label": "List-item", |
|
"index": 3 |
|
}, |
|
{ |
|
"x1": 58.06927928924561, |
|
"y1": 794.932172012329, |
|
"x2": 520.523375415802, |
|
"y2": 811.1884700775146, |
|
"score": 0.9037705063819885, |
|
"label": "List-item", |
|
"index": 3 |
|
}, |
|
{ |
|
"x1": 54.25830144882202, |
|
"y1": 76.01902542114259, |
|
"x2": 552.8331304550171, |
|
"y2": 158.67227897644042, |
|
"score": 0.9725438356399536, |
|
"label": "Title", |
|
"index": 10 |
|
}, |
|
{ |
|
"x1": 53.636448097229, |
|
"y1": 244.93504171371458, |
|
"x2": 610.1452471733094, |
|
"y2": 274.8768593788147, |
|
"score": 0.8954038619995117, |
|
"label": "Text", |
|
"index": 9 |
|
}, |
|
{ |
|
"x1": 54.76330833435059, |
|
"y1": 364.74734601974484, |
|
"x2": 625.0439935684204, |
|
"y2": 405.74994478225705, |
|
"score": 0.7930819988250732, |
|
"label": "Text", |
|
"index": 9 |
|
}, |
|
{ |
|
"x1": 55.78299608230591, |
|
"y1": 480.10940895080563, |
|
"x2": 623.4623931884765, |
|
"y2": 556.692225265503, |
|
"score": 0.9482676982879639, |
|
"label": "Text", |
|
"index": 9 |
|
}, |
|
{ |
|
"x1": 52.160629177093504, |
|
"y1": 593.5841983795166, |
|
"x2": 609.7405840873719, |
|
"y2": 635.7749668121338, |
|
"score": 0.9440742135047913, |
|
"label": "Text", |
|
"index": 9 |
|
}, |
|
{ |
|
"x1": 53.12467575073242, |
|
"y1": 654.1885282516479, |
|
"x2": 615.2034725189209, |
|
"y2": 697.286619758606, |
|
"score": 0.9134702086448669, |
|
"label": "List-item", |
|
"index": 3 |
|
}, |
|
{ |
|
"x1": 52.52786092758179, |
|
"y1": 712.9350305557251, |
|
"x2": 622.7321027755737, |
|
"y2": 754.2832815170287, |
|
"score": 0.9259238243103027, |
|
"label": "Text", |
|
"index": 9 |
|
}, |
|
{ |
|
"x1": 56.837522792816166, |
|
"y1": 758.6981185913086, |
|
"x2": 607.179635810852, |
|
"y2": 787.9486541748047, |
|
"score": 0.9015638828277588, |
|
"label": "List-item", |
|
"index": 3 |
|
}, |
|
{ |
|
"x1": 56.57186779975891, |
|
"y1": 810.8556049346925, |
|
"x2": 446.48612236976624, |
|
"y2": 828.0084697723388, |
|
"score": 0.8806689977645874, |
|
"label": "List-item", |
|
"index": 3 |
|
} |
|
] |
|
``` |
|
![image/png](https://cdn-uploads.huggingface.co/production/uploads/64bad74f94c0e3be4aa7cd76/MIja7FCRhuXsjpv25_A8u.png) |
|
|
|
## Labels |
|
- Caption |
|
- Footnote |
|
- Formula |
|
- List-item |
|
- Page-footer |
|
- Page-header |
|
- Picture |
|
- Section-header |
|
- Table |
|
- Text |
|
- Title |