Spaces:
Running
Running
<html> | |
<head> | |
<meta charset="utf-8" /> | |
<meta name="viewport" content="width=device-width" /> | |
<title>xet-repo-dedupe</title> | |
<link rel="stylesheet" href="style.css" /> | |
<script src="https://cdn.jsdelivr.net/npm/vega@5"></script> | |
<script src="https://cdn.jsdelivr.net/npm/vega-lite@5"></script> | |
<script src="https://cdn.jsdelivr.net/npm/vega-embed@6"></script> | |
<style> | |
#vis { | |
width: 100%; | |
text-align: center; | |
} | |
</style> | |
</head> | |
<body> | |
<div class="card"> | |
<h1>Visualizing Repo-level Dedupe</h1> | |
<p>This visualization demonstrates the amount of <a href="https://huggingface.co/blog/from-files-to-chunks">chunk-level dedupe</a> within a repo or across a selection of repos. (For now, demonstrates a hardcoded selection.)</p> | |
</div> | |
<div id="vis"></div> | |
<script> | |
var vlSpec = { | |
"$schema": "https://vega.github.io/schema/vega-lite/v5.json", | |
"resolve": {"scale": {"x": "independent"}}, | |
"width": 600, | |
"height": 12, | |
"data": { | |
"url": "xorbs.json" | |
}, | |
"mark": "rect", | |
"encoding": { | |
"x": { | |
"field": "xorb_id", | |
"axis": null, | |
"sort": {"field": "dedupe_factor", "order": "descending"}, | |
"stack": "normalize" | |
}, | |
"color": { | |
"field": "dedupe_factor", | |
"type": "quantitative", | |
"scale": {"domain": [0, 10]} | |
}, | |
"tooltip": {"field": "dedupe_factor"}, | |
"row": { | |
"field": "repo", | |
"spacing": 1, | |
"header": {"labelAngle": 0, "labelAlign": "left"}, | |
"sort": {"field": "dedupe_factor", "order": "descending"} | |
} | |
} | |
}; | |
vegaEmbed('#vis', vlSpec); | |
</script> | |
</body> | |
</html> | |