znation HF Staff commited on
Commit
8200fff
·
1 Parent(s): 0bf9b37

Add xorb selection on click

Browse files
Files changed (1) hide show
  1. index.html +30 -0
index.html CHANGED
@@ -20,6 +20,7 @@
20
  <h1>Visualizing Repo-level Dedupe</h1>
21
  <p>This visualization demonstrates the amount of <a target="_blank" rel="noopener noreferrer" href="https://huggingface.co/blog/from-files-to-chunks">chunk-level dedupe</a> across all public repos.</p>
22
  <p>"Dedupe factor" is defined as the number of re-uses of a given "xorb". A "xorb" is a collection of content-defined chunks, typically around 1,000 chunks comprising up to 64 MB of total data.</p>
 
23
  </div>
24
  <div id="vis"></div>
25
  <script>
@@ -28,6 +29,27 @@
28
  "resolve": {"scale": {"x": "independent"}},
29
  "width": 600,
30
  "height": 12,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
31
  "data": {
32
  "url": "xorbs.json"
33
  },
@@ -44,6 +66,14 @@
44
  "type": "quantitative",
45
  "scale": {"domain": [0, 10]}
46
  },
 
 
 
 
 
 
 
 
47
  "tooltip": {"field": "dedupe_factor"},
48
  "row": {
49
  "field": "repo",
 
20
  <h1>Visualizing Repo-level Dedupe</h1>
21
  <p>This visualization demonstrates the amount of <a target="_blank" rel="noopener noreferrer" href="https://huggingface.co/blog/from-files-to-chunks">chunk-level dedupe</a> across all public repos.</p>
22
  <p>"Dedupe factor" is defined as the number of re-uses of a given "xorb". A "xorb" is a collection of content-defined chunks, typically around 1,000 chunks comprising up to 64 MB of total data.</p>
23
+ <p>Click to select a row, and highlight all xorbs in other repos that have overlap with the selected repo. Double-click to clear selection.</p>
24
  </div>
25
  <div id="vis"></div>
26
  <script>
 
29
  "resolve": {"scale": {"x": "independent"}},
30
  "width": 600,
31
  "height": 12,
32
+ "params": [
33
+ {
34
+ "name": "select",
35
+ "select": {"type": "point", "fields": ["repo"], "toggle": "false"}
36
+ },
37
+ {
38
+ "name": "xorbs_selected",
39
+ "expr": "pluck(data('source_0'), 'repo_xorb_selected')"
40
+ },
41
+ {"name": "any_xorbs_selected", "expr": "extent(xorbs_selected)[0] != null"}
42
+ ],
43
+ "transform": [
44
+ {
45
+ "calculate": "(select.repo != null ? indexof(select.repo, datum.repo) : -1) + 1",
46
+ "as": "repo_selected"
47
+ },
48
+ {
49
+ "calculate": "if(datum.repo_selected > 0, datum.xorb_id, null)",
50
+ "as": "repo_xorb_selected"
51
+ }
52
+ ],
53
  "data": {
54
  "url": "xorbs.json"
55
  },
 
66
  "type": "quantitative",
67
  "scale": {"domain": [0, 10]}
68
  },
69
+ "opacity": {
70
+ "condition": [
71
+ {
72
+ "test": "any_xorbs_selected && indexof(xorbs_selected, datum.xorb_id) == -1",
73
+ "value": 0.3
74
+ }
75
+ ]
76
+ },
77
  "tooltip": {"field": "dedupe_factor"},
78
  "row": {
79
  "field": "repo",