smilkov commited on
Commit
8307312
1 Parent(s): fca48a0

Upload folder using huggingface_hub

Browse files
.gitattributes ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ data/datasets/lilac/pile-of-law-constitutions/data-00000-of-00001.parquet filter=lfs diff=lfs merge=lfs -text
2
+ data/datasets/lilac/pile-of-law-constitutions/text/gte-small/hnsw.hnswlib.bin filter=lfs diff=lfs merge=lfs -text
3
+ data/datasets/lilac/pile-of-law-constitutions/text/gte-small/hnsw.lookup.pkl filter=lfs diff=lfs merge=lfs -text
4
+ data/datasets/lilac/pile-of-law-constitutions/text/gte-small/spans.pkl filter=lfs diff=lfs merge=lfs -text
data/datasets/lilac/pile-of-law-constitutions/config.yml ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ embeddings:
2
+ - embedding: gte-small
3
+ path: text
4
+ name: pile-of-law-constitutions
5
+ namespace: lilac
6
+ settings:
7
+ preferred_embedding: gte-small
8
+ ui:
9
+ media_paths:
10
+ - text
11
+ signals:
12
+ - path: text
13
+ signal:
14
+ signal_name: near_dup
15
+ - path: text
16
+ signal:
17
+ signal_name: pii
18
+ source:
19
+ config_name: constitutions
20
+ dataset_name: pile-of-law/pile-of-law
21
+ source_name: huggingface
22
+ tags:
23
+ - legal
data/datasets/lilac/pile-of-law-constitutions/data-00000-of-00001.parquet ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:95d5ed1a03bb7960e4404a2824e56edc196bc868f2653083d76bd15785354d9b
3
+ size 11644007
data/datasets/lilac/pile-of-law-constitutions/manifest.json ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "files": [
3
+ "data-00000-of-00001.parquet"
4
+ ],
5
+ "data_schema": {
6
+ "fields": {
7
+ "text": {
8
+ "dtype": "string"
9
+ },
10
+ "created_timestamp": {
11
+ "dtype": "string"
12
+ },
13
+ "downloaded_timestamp": {
14
+ "dtype": "string"
15
+ },
16
+ "url": {
17
+ "dtype": "string"
18
+ },
19
+ "__hfsplit__": {
20
+ "dtype": "string"
21
+ },
22
+ "__rowid__": {
23
+ "dtype": "string"
24
+ }
25
+ }
26
+ }
27
+ }
data/datasets/lilac/pile-of-law-constitutions/text/gte-small/hnsw.hnswlib.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e0191d5390ec25b1a5df6558e31eebfb6816ba4b0d056782a1734be53f38ec25
3
+ size 165963072
data/datasets/lilac/pile-of-law-constitutions/text/gte-small/hnsw.lookup.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a0391b6a70d478587e8e4880da808b3964711040244e72030392f7c1cdac7ad4
3
+ size 1832634
data/datasets/lilac/pile-of-law-constitutions/text/gte-small/signal_manifest.json ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "files": [],
3
+ "parquet_id": "gte-small(text)",
4
+ "data_schema": {
5
+ "fields": {
6
+ "__rowid__": {
7
+ "dtype": "string"
8
+ },
9
+ "text": {
10
+ "fields": {
11
+ "gte-small": {
12
+ "repeated_field": {
13
+ "fields": {
14
+ "embedding": {
15
+ "dtype": "embedding"
16
+ }
17
+ },
18
+ "dtype": "string_span"
19
+ },
20
+ "signal": {
21
+ "signal_name": "gte-small"
22
+ }
23
+ }
24
+ }
25
+ }
26
+ }
27
+ },
28
+ "signal": {
29
+ "signal_name": "gte-small"
30
+ },
31
+ "enriched_path": [
32
+ "text"
33
+ ],
34
+ "vector_store": "hnsw"
35
+ }
data/datasets/lilac/pile-of-law-constitutions/text/gte-small/spans.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8daf511d5157878cacab7058e516638d8c3c354434cb2792f9dd2a83637b21b5
3
+ size 1029851
data/datasets/lilac/pile-of-law-constitutions/text/near_dup/data-00000-of-00001.parquet ADDED
Binary file (8.57 kB). View file
 
data/datasets/lilac/pile-of-law-constitutions/text/near_dup/signal_manifest.json ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "files": [
3
+ "data-00000-of-00001.parquet"
4
+ ],
5
+ "parquet_id": "near_dup(text)",
6
+ "data_schema": {
7
+ "fields": {
8
+ "__rowid__": {
9
+ "dtype": "string"
10
+ },
11
+ "text": {
12
+ "fields": {
13
+ "near_dup": {
14
+ "fields": {
15
+ "cluster_id": {
16
+ "dtype": "uint32",
17
+ "categorical": true
18
+ }
19
+ },
20
+ "signal": {
21
+ "threshold": 0.85,
22
+ "signal_name": "near_dup"
23
+ }
24
+ }
25
+ }
26
+ }
27
+ }
28
+ },
29
+ "signal": {
30
+ "threshold": 0.85,
31
+ "signal_name": "near_dup"
32
+ },
33
+ "enriched_path": [
34
+ "text"
35
+ ]
36
+ }
data/datasets/lilac/pile-of-law-constitutions/text/pii/data-00000-of-00001.parquet ADDED
Binary file (9.94 kB). View file
 
data/datasets/lilac/pile-of-law-constitutions/text/pii/signal_manifest.json ADDED
@@ -0,0 +1,45 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "files": [
3
+ "data-00000-of-00001.parquet"
4
+ ],
5
+ "parquet_id": "pii(text)",
6
+ "data_schema": {
7
+ "fields": {
8
+ "__rowid__": {
9
+ "dtype": "string"
10
+ },
11
+ "text": {
12
+ "fields": {
13
+ "pii": {
14
+ "fields": {
15
+ "emails": {
16
+ "repeated_field": {
17
+ "dtype": "string_span"
18
+ }
19
+ },
20
+ "ip_addresses": {
21
+ "repeated_field": {
22
+ "dtype": "string_span"
23
+ }
24
+ },
25
+ "secrets": {
26
+ "repeated_field": {
27
+ "dtype": "string_span"
28
+ }
29
+ }
30
+ },
31
+ "signal": {
32
+ "signal_name": "pii"
33
+ }
34
+ }
35
+ }
36
+ }
37
+ }
38
+ },
39
+ "signal": {
40
+ "signal_name": "pii"
41
+ },
42
+ "enriched_path": [
43
+ "text"
44
+ ]
45
+ }