Commit
·
273867b
1
Parent(s):
3d59359
up
Browse files
app.py
CHANGED
@@ -1,5 +1,5 @@
|
|
1 |
from datasets import load_dataset
|
2 |
-
from collections import Counter
|
3 |
from random import sample, shuffle
|
4 |
import datasets
|
5 |
from pandas import DataFrame
|
@@ -65,10 +65,19 @@ def start():
|
|
65 |
|
66 |
# sort by count
|
67 |
ids = sorted(ids.items(), key=lambda x: x[1])
|
68 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
69 |
|
70 |
# get lowest count ids
|
71 |
-
id_candidates =
|
72 |
|
73 |
# get random `NUM_QUESTIONS` ids to check
|
74 |
image_ids = sample(id_candidates, k=NUM_QUESTIONS)
|
|
|
1 |
from datasets import load_dataset
|
2 |
+
from collections import Counter, defaultdict
|
3 |
from random import sample, shuffle
|
4 |
import datasets
|
5 |
from pandas import DataFrame
|
|
|
65 |
|
66 |
# sort by count
|
67 |
ids = sorted(ids.items(), key=lambda x: x[1])
|
68 |
+
freq_ids = defaultdict(list)
|
69 |
+
for k, v in ids:
|
70 |
+
freq_ids[v].append(k)
|
71 |
+
|
72 |
+
# shuffle in-between categories
|
73 |
+
for k, v_list in freq_ids.items():
|
74 |
+
shuffle(v_list)
|
75 |
+
freq_ids[v] = v_list
|
76 |
+
|
77 |
+
shuffled_ids = sum(list(freq_ids.values()), [])
|
78 |
|
79 |
# get lowest count ids
|
80 |
+
id_candidates = shuffled_ids[: (10 * NUM_QUESTIONS)]
|
81 |
|
82 |
# get random `NUM_QUESTIONS` ids to check
|
83 |
image_ids = sample(id_candidates, k=NUM_QUESTIONS)
|