Ubuntu
commited on
Commit
•
6b1f9f6
1
Parent(s):
f81b82b
added dataset for content moderation
Browse files- data/categories.csv +3 -0
- data/categories_refined.csv +3 -0
- data/categories_refined.json +29 -0
- data/categories_refined_reverse.json +29 -0
- data/final_adult_content.xlsx +0 -0
- data_categories/Adult.csv +3 -0
- data_categories/Arts_and_Entertainment.csv +3 -0
- data_categories/Autos_and_Vehicles.csv +3 -0
- data_categories/Beauty_and_Fitness.csv +3 -0
- data_categories/Books_and_Literature.csv +3 -0
- data_categories/Business_and_Industrial.csv +3 -0
- data_categories/Computers_and_Electronics.csv +3 -0
- data_categories/Finance.csv +3 -0
- data_categories/Food_and_Drink.csv +3 -0
- data_categories/Games.csv +3 -0
- data_categories/Health.csv +3 -0
- data_categories/Hobbies_and_Leisure.csv +3 -0
- data_categories/Home_and_Garden.csv +3 -0
- data_categories/Internet_and_Telecom.csv +3 -0
- data_categories/Jobs_and_Education.csv +3 -0
- data_categories/Law_and_Government.csv +3 -0
- data_categories/News.csv +3 -0
- data_categories/Online Communities.csv +3 -0
- data_categories/People_and_Society.csv +3 -0
- data_categories/Pets_and_Animals.csv +3 -0
- data_categories/Real Estate.csv +3 -0
- data_categories/Reference.csv +3 -0
- data_categories/Science.csv +3 -0
- data_categories/Sensitive Subjects.csv +3 -0
- data_categories/Shopping.csv +3 -0
- data_categories/Sports.csv +3 -0
- data_categories/Travel_and_Transportation.csv +3 -0
- requirements.txt +5 -1
- research/07_adult_content_dataset.ipynb +0 -0
- research/07_creating_data_for_categories.ipynb +0 -0
- research/trials.ipynb +7 -0
data/categories.csv
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:778a6031897e58a22754cb61b8ca1fe3316d360708a855cd2fc7f3b3172dbff9
|
3 |
+
size 52011
|
data/categories_refined.csv
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7dc99dd917f1d90c0e240fafcc0dc2c164d39ca692c5b538558c94213c284d6a
|
3 |
+
size 473
|
data/categories_refined.json
ADDED
@@ -0,0 +1,29 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"Beauty_and_Fitness": 0,
|
3 |
+
"People_and_Society": 1,
|
4 |
+
"Travel_and_Transportation": 2,
|
5 |
+
"Shopping": 3,
|
6 |
+
"Adult": 4,
|
7 |
+
"Sports": 5,
|
8 |
+
"Science": 6,
|
9 |
+
"Food_and_Drink": 7,
|
10 |
+
"News": 8,
|
11 |
+
"Sensitive Subjects": 9,
|
12 |
+
"Autos_and_Vehicles": 10,
|
13 |
+
"Law_and_Government": 11,
|
14 |
+
"Business_and_Industrial": 12,
|
15 |
+
"Health": 13,
|
16 |
+
"Real Estate": 14,
|
17 |
+
"Books_and_Literature": 15,
|
18 |
+
"Computers_and_Electronics": 16,
|
19 |
+
"Internet_and_Telecom": 17,
|
20 |
+
"Home_and_Garden": 18,
|
21 |
+
"Jobs_and_Education": 19,
|
22 |
+
"Online Communities": 20,
|
23 |
+
"Finance": 21,
|
24 |
+
"Arts_and_Entertainment": 22,
|
25 |
+
"Games": 23,
|
26 |
+
"Hobbies_and_Leisure": 24,
|
27 |
+
"Reference": 25,
|
28 |
+
"Pets_and_Animals": 26
|
29 |
+
}
|
data/categories_refined_reverse.json
ADDED
@@ -0,0 +1,29 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"0": "Beauty_and_Fitness",
|
3 |
+
"1": "People_and_Society",
|
4 |
+
"2": "Travel_and_Transportation",
|
5 |
+
"3": "Shopping",
|
6 |
+
"4": "Adult",
|
7 |
+
"5": "Sports",
|
8 |
+
"6": "Science",
|
9 |
+
"7": "Food_and_Drink",
|
10 |
+
"8": "News",
|
11 |
+
"9": "Sensitive Subjects",
|
12 |
+
"10": "Autos_and_Vehicles",
|
13 |
+
"11": "Law_and_Government",
|
14 |
+
"12": "Business_and_Industrial",
|
15 |
+
"13": "Health",
|
16 |
+
"14": "Real Estate",
|
17 |
+
"15": "Books_and_Literature",
|
18 |
+
"16": "Computers_and_Electronics",
|
19 |
+
"17": "Internet_and_Telecom",
|
20 |
+
"18": "Home_and_Garden",
|
21 |
+
"19": "Jobs_and_Education",
|
22 |
+
"20": "Online Communities",
|
23 |
+
"21": "Finance",
|
24 |
+
"22": "Arts_and_Entertainment",
|
25 |
+
"23": "Games",
|
26 |
+
"24": "Hobbies_and_Leisure",
|
27 |
+
"25": "Reference",
|
28 |
+
"26": "Pets_and_Animals"
|
29 |
+
}
|
data/final_adult_content.xlsx
ADDED
Binary file (66.2 kB). View file
|
|
data_categories/Adult.csv
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:22de4244454e054a6eda1b3cb6c6172e7d5b82be16b54a1ce61a4595506a54c0
|
3 |
+
size 17372
|
data_categories/Arts_and_Entertainment.csv
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:57e1362eda4777624ea8a914bad2941f4be81692305e9dd58c8d785d3bf332e9
|
3 |
+
size 40121
|
data_categories/Autos_and_Vehicles.csv
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:eecc87c19910ad8106cd0469d653b648b8467d2e053da2bdb1daf8feadb8a41a
|
3 |
+
size 94471
|
data_categories/Beauty_and_Fitness.csv
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:25b6bfaa17ae6c8a0a0a2216e701d83bce11edd111b40d4d70e6139ef53a692b
|
3 |
+
size 32971
|
data_categories/Books_and_Literature.csv
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8cd3f1c35c45c75730dd5c5b40e6d783d38424a90205d24ba07037efc6d74fd1
|
3 |
+
size 41885
|
data_categories/Business_and_Industrial.csv
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:235b21afaa1e2a4e8f9ffb22df3e5498a29ae47a95ff2bb538ea2947fa685a62
|
3 |
+
size 46290
|
data_categories/Computers_and_Electronics.csv
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:23ba5a3666e7d67ef57a7fcf301685ae7b66ffc1ed4f33ee6d7b01d11af136ed
|
3 |
+
size 27167
|
data_categories/Finance.csv
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5b061d6a2d4a9b298354add7aec0ae21eaff9f0b722b87a90c7e51f39c56b1f7
|
3 |
+
size 37987
|
data_categories/Food_and_Drink.csv
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:317f8d87736b3523b83ec3fe0c8bcd2d36df06fce8076a4a414125bfc1ca249b
|
3 |
+
size 42073
|
data_categories/Games.csv
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:08d8c1f1995e43b827c6abd02781ac9e2aff08937fdf9d210ae3066728816308
|
3 |
+
size 27911
|
data_categories/Health.csv
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5ee5d6e90846d5ca5479c1e2a404b1cdc67a5e18bc464140c724a64efdf66d72
|
3 |
+
size 30005
|
data_categories/Hobbies_and_Leisure.csv
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1c6dd902daefb79a277d66511a78d9d9d68e8b5710ddbd1b9d5c346b06e29c0e
|
3 |
+
size 25404
|
data_categories/Home_and_Garden.csv
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e2037b08533f76de4f4c5512f9a697e76dc7c276fab230eee27bf6af93e8bca4
|
3 |
+
size 41020
|
data_categories/Internet_and_Telecom.csv
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7c2db27b427661774cd4f202a11070c7a586af3498256e836dbccdd951c9b83c
|
3 |
+
size 45065
|
data_categories/Jobs_and_Education.csv
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:aa9facf376cc8f80a8c0c90591bf17665ae254a7545aaf9255a50b74ecd15a91
|
3 |
+
size 41746
|
data_categories/Law_and_Government.csv
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:085a7cc338c3fb1efc9c62443950831d122453d1d7fab84fc317bce0772f90cf
|
3 |
+
size 41879
|
data_categories/News.csv
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:27e0a8d81bbe5cd978dea88d3eda0fc01e3528106a9a3bd2e9fc5bdfdeb50632
|
3 |
+
size 23200
|
data_categories/Online Communities.csv
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:903899c0689fab2c6648a715e5e488429b847054af33c1d26b252eb36352cf29
|
3 |
+
size 46352
|
data_categories/People_and_Society.csv
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6ac0f67f4c684e3a5b56f925e60ed6d5883947b828283d4cbdebae31451487ee
|
3 |
+
size 36511
|
data_categories/Pets_and_Animals.csv
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d7d31c9e93973ac0a65c82b81c2eae9634b06fb3205560b347fd618567e517d7
|
3 |
+
size 57283
|
data_categories/Real Estate.csv
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:98584873072b57c4b8629f8071ae0ffe4548f2c1a953260693659f2f740c43a6
|
3 |
+
size 36020
|
data_categories/Reference.csv
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5d77e7335dab3965e50b81a91d2f4312e5408705847ad425b0261a0e45acd136
|
3 |
+
size 55091
|
data_categories/Science.csv
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c3057d6d3eb499516bd098f13399ab51b4eea95be7816769c8e409638af31d1e
|
3 |
+
size 42699
|
data_categories/Sensitive Subjects.csv
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6851357c45f0aa9baad91b8263d4f85421ae16952682e44279b193e289e9ed32
|
3 |
+
size 10172
|
data_categories/Shopping.csv
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:88fe8138d63bd43921db7982b432d2d78bbcf8c17d182cf55877c616e856675f
|
3 |
+
size 46132
|
data_categories/Sports.csv
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b063901d162d04321a70b2761764a9bf97276b746c5c0084ee974216a2ee4812
|
3 |
+
size 40242
|
data_categories/Travel_and_Transportation.csv
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3d7becc914296a422a71898a7126c9e5af1c2bc23a9ffe0eebef8f72499b68ad
|
3 |
+
size 44633
|
requirements.txt
CHANGED
@@ -8,4 +8,8 @@ tensorflow
|
|
8 |
tensorflow_hub
|
9 |
tensorflow_text
|
10 |
scikit-learn
|
11 |
-
evaluate
|
|
|
|
|
|
|
|
|
|
8 |
tensorflow_hub
|
9 |
tensorflow_text
|
10 |
scikit-learn
|
11 |
+
evaluate
|
12 |
+
openpyxl
|
13 |
+
summa
|
14 |
+
git+https://github.com/LIAAD/yake
|
15 |
+
multi_rake
|
research/07_adult_content_dataset.ipynb
ADDED
The diff for this file is too large to render.
See raw diff
|
|
research/07_creating_data_for_categories.ipynb
ADDED
The diff for this file is too large to render.
See raw diff
|
|
research/trials.ipynb
CHANGED
@@ -810,6 +810,13 @@
|
|
810 |
"# df.head(5).values[4][0].split(\"\\n\")\n"
|
811 |
]
|
812 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
813 |
{
|
814 |
"cell_type": "code",
|
815 |
"execution_count": null,
|
|
|
810 |
"# df.head(5).values[4][0].split(\"\\n\")\n"
|
811 |
]
|
812 |
},
|
813 |
+
{
|
814 |
+
"cell_type": "code",
|
815 |
+
"execution_count": null,
|
816 |
+
"metadata": {},
|
817 |
+
"outputs": [],
|
818 |
+
"source": []
|
819 |
+
},
|
820 |
{
|
821 |
"cell_type": "code",
|
822 |
"execution_count": null,
|