LittleApple-fp16 commited on
Commit
4f8ad24
1 Parent(s): 26a1621

Upload 88 files

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. waifuc/__init__.py +0 -0
  2. waifuc/__pycache__/__init__.cpython-310.pyc +0 -0
  3. waifuc/action/__init__.py +13 -0
  4. waifuc/action/__pycache__/__init__.cpython-310.pyc +0 -0
  5. waifuc/action/__pycache__/align.cpython-310.pyc +0 -0
  6. waifuc/action/__pycache__/augument.cpython-310.pyc +0 -0
  7. waifuc/action/__pycache__/background.cpython-310.pyc +0 -0
  8. waifuc/action/__pycache__/base.cpython-310.pyc +0 -0
  9. waifuc/action/__pycache__/basic.cpython-310.pyc +0 -0
  10. waifuc/action/__pycache__/ccip.cpython-310.pyc +0 -0
  11. waifuc/action/__pycache__/count.cpython-310.pyc +0 -0
  12. waifuc/action/__pycache__/filename.cpython-310.pyc +0 -0
  13. waifuc/action/__pycache__/filter.cpython-310.pyc +0 -0
  14. waifuc/action/__pycache__/lpips.cpython-310.pyc +0 -0
  15. waifuc/action/__pycache__/split.cpython-310.pyc +0 -0
  16. waifuc/action/__pycache__/tagging.cpython-310.pyc +0 -0
  17. waifuc/action/align.py +51 -0
  18. waifuc/action/augument.py +68 -0
  19. waifuc/action/background.py +10 -0
  20. waifuc/action/base.py +51 -0
  21. waifuc/action/basic.py +16 -0
  22. waifuc/action/ccip.py +151 -0
  23. waifuc/action/count.py +72 -0
  24. waifuc/action/filename.py +46 -0
  25. waifuc/action/filter.py +110 -0
  26. waifuc/action/lpips.py +69 -0
  27. waifuc/action/split.py +145 -0
  28. waifuc/action/tagging.py +83 -0
  29. waifuc/config/__init__.py +0 -0
  30. waifuc/config/__pycache__/__init__.cpython-310.pyc +0 -0
  31. waifuc/config/__pycache__/meta.cpython-310.pyc +0 -0
  32. waifuc/config/meta.py +19 -0
  33. waifuc/export/__init__.py +3 -0
  34. waifuc/export/__pycache__/__init__.cpython-310.pyc +0 -0
  35. waifuc/export/__pycache__/base.cpython-310.pyc +0 -0
  36. waifuc/export/__pycache__/huggingface.cpython-310.pyc +0 -0
  37. waifuc/export/__pycache__/textual_inversion.cpython-310.pyc +0 -0
  38. waifuc/export/base.py +79 -0
  39. waifuc/export/huggingface.py +64 -0
  40. waifuc/export/textual_inversion.py +43 -0
  41. waifuc/model/__init__.py +1 -0
  42. waifuc/model/__pycache__/__init__.cpython-310.pyc +0 -0
  43. waifuc/model/__pycache__/item.cpython-310.pyc +0 -0
  44. waifuc/model/item.py +98 -0
  45. waifuc/source/__init__.py +19 -0
  46. waifuc/source/__pycache__/__init__.cpython-310.pyc +0 -0
  47. waifuc/source/__pycache__/anime_pictures.cpython-310.pyc +0 -0
  48. waifuc/source/__pycache__/base.cpython-310.pyc +0 -0
  49. waifuc/source/__pycache__/compose.cpython-310.pyc +0 -0
  50. waifuc/source/__pycache__/danbooru.cpython-310.pyc +0 -0
waifuc/__init__.py ADDED
File without changes
waifuc/__pycache__/__init__.cpython-310.pyc ADDED
Binary file (131 Bytes). View file
 
waifuc/action/__init__.py ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from .align import AlignMaxSizeAction, AlignMinSizeAction, PaddingAlignAction
2
+ from .augument import RandomFilenameAction, RandomChoiceAction, BaseRandomAction, MirrorAction
3
+ from .background import BackgroundRemovalAction
4
+ from .base import BaseAction, ProcessAction, FilterAction, ActionStop
5
+ from .basic import ModeConvertAction
6
+ from .ccip import CCIPAction
7
+ from .count import SliceSelectAction, FirstNSelectAction
8
+ from .filename import FileExtAction, FileOrderAction
9
+ from .filter import NoMonochromeAction, OnlyMonochromeAction, ClassFilterAction, RatingFilterAction, FaceCountAction, \
10
+ HeadCountAction, PersonRatioAction, MinSizeFilterAction, MinAreaFilterAction
11
+ from .lpips import FilterSimilarAction
12
+ from .split import PersonSplitAction, ThreeStageSplitAction
13
+ from .tagging import TaggingAction, TagFilterAction
waifuc/action/__pycache__/__init__.cpython-310.pyc ADDED
Binary file (1.25 kB). View file
 
waifuc/action/__pycache__/align.cpython-310.pyc ADDED
Binary file (2.29 kB). View file
 
waifuc/action/__pycache__/augument.cpython-310.pyc ADDED
Binary file (3.33 kB). View file
 
waifuc/action/__pycache__/background.cpython-310.pyc ADDED
Binary file (688 Bytes). View file
 
waifuc/action/__pycache__/base.cpython-310.pyc ADDED
Binary file (2.43 kB). View file
 
waifuc/action/__pycache__/basic.cpython-310.pyc ADDED
Binary file (934 Bytes). View file
 
waifuc/action/__pycache__/ccip.cpython-310.pyc ADDED
Binary file (5.22 kB). View file
 
waifuc/action/__pycache__/count.cpython-310.pyc ADDED
Binary file (2.72 kB). View file
 
waifuc/action/__pycache__/filename.cpython-310.pyc ADDED
Binary file (1.99 kB). View file
 
waifuc/action/__pycache__/filter.cpython-310.pyc ADDED
Binary file (5.37 kB). View file
 
waifuc/action/__pycache__/lpips.cpython-310.pyc ADDED
Binary file (3.01 kB). View file
 
waifuc/action/__pycache__/split.cpython-310.pyc ADDED
Binary file (4.77 kB). View file
 
waifuc/action/__pycache__/tagging.cpython-310.pyc ADDED
Binary file (3.4 kB). View file
 
waifuc/action/align.py ADDED
@@ -0,0 +1,51 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import Tuple
2
+
3
+ from PIL import Image
4
+ from imgutils.data import load_image
5
+
6
+ from .base import ProcessAction
7
+ from ..model import ImageItem
8
+
9
+
10
+ class AlignMaxSizeAction(ProcessAction):
11
+ def __init__(self, max_size: int):
12
+ self._max_size = max_size
13
+
14
+ def process(self, item: ImageItem) -> ImageItem:
15
+ image = item.image
16
+ ms = max(image.width, image.height)
17
+ if ms > self._max_size:
18
+ r = ms / self._max_size
19
+ image = image.resize((int(image.width / r), int(image.height / r)))
20
+
21
+ return ImageItem(image, item.meta)
22
+
23
+
24
+ class AlignMinSizeAction(ProcessAction):
25
+ def __init__(self, min_size: int):
26
+ self._min_size = min_size
27
+
28
+ def process(self, item: ImageItem) -> ImageItem:
29
+ image = item.image
30
+ ms = min(image.width, image.height)
31
+ if ms > self._min_size:
32
+ r = ms / self._min_size
33
+ image = image.resize((int(image.width / r), int(image.height / r)))
34
+
35
+ return ImageItem(image, item.meta)
36
+
37
+
38
+ class PaddingAlignAction(ProcessAction):
39
+ def __init__(self, size: Tuple[int, int], color: str = 'white'):
40
+ self.width, self.height = size
41
+ self.color = color
42
+
43
+ def process(self, item: ImageItem) -> ImageItem:
44
+ image = load_image(item.image, force_background=None, mode='RGBA')
45
+ r = min(self.width / image.width, self.height / image.height)
46
+ resized = image.resize((int(image.width * r), int(image.height * r)))
47
+
48
+ new_image = Image.new('RGBA', (self.width, self.height), self.color)
49
+ left, top = int((new_image.width - resized.width) // 2), int((new_image.height - resized.height) // 2)
50
+ new_image.paste(resized, (left, top, left + resized.width, top + resized.height), resized)
51
+ return ImageItem(new_image.convert(item.image.mode), item.meta)
waifuc/action/augument.py ADDED
@@ -0,0 +1,68 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os.path
2
+ import random
3
+ from typing import Iterator, Optional, Tuple
4
+
5
+ from PIL import ImageOps
6
+ from hbutils.random import random_sha1
7
+
8
+ from .base import BaseAction
9
+ from ..model import ImageItem
10
+
11
+
12
+ class BaseRandomAction(BaseAction):
13
+ def __init__(self, seed=None):
14
+ self.seed = seed
15
+ self.random = random.Random(self.seed)
16
+
17
+ def iter(self, item: ImageItem) -> Iterator[ImageItem]:
18
+ raise NotImplementedError # pragma: no cover
19
+
20
+ def reset(self):
21
+ self.random = random.Random(self.seed)
22
+
23
+
24
+ class RandomChoiceAction(BaseRandomAction):
25
+ def __init__(self, p=0.5, seed=None):
26
+ BaseRandomAction.__init__(self, seed)
27
+ self.p = p
28
+
29
+ def iter(self, item: ImageItem) -> Iterator[ImageItem]:
30
+ if self.random.random() <= self.p:
31
+ yield item
32
+
33
+
34
+ class RandomFilenameAction(BaseRandomAction):
35
+ def __init__(self, ext: Optional[str] = '.png', seed=None):
36
+ BaseRandomAction.__init__(self, seed)
37
+ self.ext = ext
38
+
39
+ def iter(self, item: ImageItem) -> Iterator[ImageItem]:
40
+ if 'filename' in item.meta:
41
+ ext = self.ext or os.path.splitext(os.path.basename(item.meta['filename']))[0]
42
+ else:
43
+ if self.ext:
44
+ ext = self.ext
45
+ else:
46
+ raise NameError(f'Extension (ext) must be specified '
47
+ f'when filename not in metadata of image item - {item!r}.')
48
+
49
+ filename = random_sha1(rnd=self.random) + ext
50
+ yield ImageItem(item.image, {**item.meta, 'filename': filename})
51
+
52
+
53
+ class MirrorAction(BaseAction):
54
+ def __init__(self, names: Tuple[str, str] = ('origin', 'mirror')):
55
+ self.origin_name, self.mirror_name = names
56
+
57
+ def iter(self, item: ImageItem) -> Iterator[ImageItem]:
58
+ if 'filename' in item.meta:
59
+ filebody, ext = os.path.splitext(item.meta['filename'])
60
+ yield ImageItem(item.image, {**item.meta, 'filename': f'{filebody}_{self.origin_name}{ext}'})
61
+ yield ImageItem(ImageOps.mirror(item.image),
62
+ {**item.meta, 'filename': f'{filebody}_{self.mirror_name}{ext}'})
63
+ else:
64
+ yield ImageItem(item.image, item.meta)
65
+ yield ImageItem(ImageOps.mirror(item.image), item.meta)
66
+
67
+ def reset(self):
68
+ pass
waifuc/action/background.py ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ from imgutils.segment import segment_rgba_with_isnetis
2
+
3
+ from .base import ProcessAction
4
+ from ..model import ImageItem
5
+
6
+
7
+ class BackgroundRemovalAction(ProcessAction):
8
+ def process(self, item: ImageItem) -> ImageItem:
9
+ _, image = segment_rgba_with_isnetis(item.image)
10
+ return ImageItem(image, item.meta)
waifuc/action/base.py ADDED
@@ -0,0 +1,51 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import Iterator, Iterable
2
+
3
+ from ..model import ImageItem
4
+
5
+
6
+ class ActionStop(Exception):
7
+ pass
8
+
9
+
10
+ class BaseAction:
11
+ def iter(self, item: ImageItem) -> Iterator[ImageItem]:
12
+ raise NotImplementedError # pragma: no cover
13
+
14
+ def iter_from(self, iter_: Iterable[ImageItem]) -> Iterator[ImageItem]:
15
+ for item in iter_:
16
+ try:
17
+ yield from self.iter(item)
18
+ except ActionStop:
19
+ break
20
+
21
+ def reset(self):
22
+ raise NotImplementedError # pragma: no cover
23
+
24
+
25
+ class ProcessAction(BaseAction):
26
+ def process(self, item: ImageItem) -> ImageItem:
27
+ raise NotImplementedError # pragma: no cover
28
+
29
+ def iter(self, item: ImageItem) -> Iterator[ImageItem]:
30
+ yield self.process(item)
31
+
32
+ def reset(self):
33
+ pass
34
+
35
+ def __call__(self, item: ImageItem) -> ImageItem:
36
+ return self.process(item)
37
+
38
+
39
+ class FilterAction(BaseAction):
40
+ def check(self, item: ImageItem) -> bool:
41
+ raise NotImplementedError # pragma: no cover
42
+
43
+ def iter(self, item: ImageItem) -> Iterator[ImageItem]:
44
+ if self.check(item):
45
+ yield item
46
+
47
+ def reset(self):
48
+ pass
49
+
50
+ def __call__(self, item: ImageItem) -> bool:
51
+ return self.check(item)
waifuc/action/basic.py ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import Optional
2
+
3
+ from imgutils.data import load_image
4
+
5
+ from .base import ProcessAction
6
+ from ..model import ImageItem
7
+
8
+
9
+ class ModeConvertAction(ProcessAction):
10
+ def __init__(self, mode='RGB', force_background: Optional[str] = 'white'):
11
+ self.mode = mode
12
+ self.force_background = force_background
13
+
14
+ def process(self, item: ImageItem) -> ImageItem:
15
+ image = load_image(item.image, mode=self.mode, force_background=self.force_background)
16
+ return ImageItem(image, item.meta)
waifuc/action/ccip.py ADDED
@@ -0,0 +1,151 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import logging
2
+ from enum import IntEnum
3
+ from typing import Iterator, Optional, List, Tuple
4
+
5
+ import numpy as np
6
+ from hbutils.string import plural_word
7
+ from hbutils.testing import disable_output
8
+ from imgutils.metrics import ccip_extract_feature, ccip_default_threshold, ccip_clustering, ccip_batch_differences
9
+
10
+ from .base import BaseAction
11
+ from ..model import ImageItem
12
+
13
+
14
+ class CCIPStatus(IntEnum):
15
+ INIT = 0x1
16
+ APPROACH = 0x2
17
+ EVAL = 0x3
18
+ INIT_WITH_SOURCE = 0x4
19
+
20
+
21
+ class CCIPAction(BaseAction):
22
+ def __init__(self, init_source=None, *, min_val_count: int = 15, step: int = 5,
23
+ ratio_threshold: float = 0.6, min_clu_dump_ratio: float = 0.3, cmp_threshold: float = 0.5,
24
+ eps: Optional[float] = None, min_samples: Optional[int] = None,
25
+ model='ccip-caformer-24-randaug-pruned', threshold: Optional[float] = None):
26
+ self.init_source = init_source
27
+
28
+ self.min_val_count = min_val_count
29
+ self.step = step
30
+ self.ratio_threshold = ratio_threshold
31
+ self.min_clu_dump_ratio = min_clu_dump_ratio
32
+ self.cmp_threshold = cmp_threshold
33
+ self.eps, self.min_samples = eps, min_samples
34
+ self.model = model
35
+ self.threshold = threshold or ccip_default_threshold(self.model)
36
+
37
+ self.items = []
38
+ self.item_released = []
39
+ self.feats = []
40
+ if self.init_source is not None:
41
+ self.status = CCIPStatus.INIT_WITH_SOURCE
42
+ else:
43
+ self.status = CCIPStatus.INIT
44
+
45
+ def _extract_feature(self, item: ImageItem):
46
+ if 'ccip_feature' in item.meta:
47
+ return item.meta['ccip_feature']
48
+ else:
49
+ return ccip_extract_feature(item.image, model=self.model)
50
+
51
+ def _try_cluster(self) -> bool:
52
+ with disable_output():
53
+ clu_ids = ccip_clustering(self.feats, method='optics', model=self.model,
54
+ eps=self.eps, min_samples=self.min_samples)
55
+ clu_counts = {}
56
+ for id_ in clu_ids:
57
+ if id_ != -1:
58
+ clu_counts[id_] = clu_counts.get(id_, 0) + 1
59
+
60
+ clu_total = sum(clu_counts.values()) if clu_counts else 0
61
+ chosen_id = None
62
+ for id_, count in clu_counts.items():
63
+ if count >= clu_total * self.ratio_threshold:
64
+ chosen_id = id_
65
+ break
66
+
67
+ if chosen_id is not None:
68
+ feats = [feat for i, feat in enumerate(self.feats) if clu_ids[i] == chosen_id]
69
+ clu_dump_ratio = np.array([
70
+ self._compare_to_exists(feat, base_set=feats)
71
+ for feat in feats
72
+ ]).astype(float).mean()
73
+
74
+ if clu_dump_ratio >= self.min_clu_dump_ratio:
75
+ self.items = [item for i, item in enumerate(self.items) if clu_ids[i] == chosen_id]
76
+ self.item_released = [False] * len(self.items)
77
+ self.feats = [feat for i, feat in enumerate(self.feats) if clu_ids[i] == chosen_id]
78
+ return True
79
+ else:
80
+ return False
81
+ else:
82
+ return False
83
+
84
+ def _compare_to_exists(self, feat, base_set=None) -> Tuple[bool, List[int]]:
85
+ diffs = ccip_batch_differences([feat, *(base_set or self.feats)], model=self.model)[0, 1:]
86
+ matches = diffs <= self.threshold
87
+ return matches.astype(float).mean() >= self.cmp_threshold
88
+
89
+ def _dump_items(self) -> Iterator[ImageItem]:
90
+ for i in range(len(self.items)):
91
+ if not self.item_released[i]:
92
+ if self._compare_to_exists(self.feats[i]):
93
+ self.item_released[i] = True
94
+ yield self.items[i]
95
+
96
+ def _eval_iter(self, item: ImageItem) -> Iterator[ImageItem]:
97
+ feat = self._extract_feature(item)
98
+ if self._compare_to_exists(feat):
99
+ self.feats.append(feat)
100
+ yield item
101
+
102
+ if (len(self.feats) - len(self.items)) % self.step == 0:
103
+ yield from self._dump_items()
104
+
105
+ def iter(self, item: ImageItem) -> Iterator[ImageItem]:
106
+ if self.status == CCIPStatus.INIT_WITH_SOURCE:
107
+ cnt = 0
108
+ logging.info('Existing anchor detected.')
109
+ for item_ in self.init_source:
110
+ self.feats.append(self._extract_feature(item_))
111
+ yield item_
112
+ cnt += 1
113
+ logging.info(f'{plural_word(cnt, "items")} loaded from anchor.')
114
+
115
+ self.status = CCIPStatus.EVAL
116
+ yield from self._eval_iter(item)
117
+
118
+ elif self.status == CCIPStatus.INIT:
119
+ self.items.append(item)
120
+ self.feats.append(self._extract_feature(item))
121
+
122
+ if len(self.items) >= self.min_val_count:
123
+ if self._try_cluster():
124
+ self.status = CCIPStatus.EVAL
125
+ yield from self._dump_items()
126
+ else:
127
+ self.status = CCIPStatus.APPROACH
128
+
129
+ elif self.status == CCIPStatus.APPROACH:
130
+ self.items.append(item)
131
+ self.feats.append(self._extract_feature(item))
132
+
133
+ if (len(self.items) - self.min_val_count) % self.step == 0:
134
+ if self._try_cluster():
135
+ self.status = CCIPStatus.EVAL
136
+ yield from self._dump_items()
137
+
138
+ elif self.status == CCIPStatus.EVAL:
139
+ yield from self._eval_iter(item)
140
+
141
+ else:
142
+ raise ValueError(f'Unknown status for {self.__class__.__name__} - {self.status!r}.')
143
+
144
+ def reset(self):
145
+ self.items.clear()
146
+ self.item_released.clear()
147
+ self.feats.clear()
148
+ if self.init_source:
149
+ self.status = CCIPStatus.INIT_WITH_SOURCE
150
+ else:
151
+ self.status = CCIPStatus.INIT
waifuc/action/count.py ADDED
@@ -0,0 +1,72 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import Iterator
2
+
3
+ from .base import BaseAction, ActionStop
4
+ from ..model import ImageItem
5
+
6
+
7
+ class FirstNSelectAction(BaseAction):
8
+ def __init__(self, n: int):
9
+ self._n = n
10
+ self._passed = 0
11
+
12
+ def iter(self, item: ImageItem) -> Iterator[ImageItem]:
13
+ if self._passed < self._n:
14
+ yield item
15
+ self._passed += 1
16
+ else:
17
+ raise ActionStop
18
+
19
+ def reset(self):
20
+ self._passed = 0
21
+
22
+
23
+ def _slice_process(start, stop, step):
24
+ start = 0 if start is None else start
25
+ step = 1 if step is None else step
26
+ if not isinstance(start, int) or start < 0:
27
+ raise ValueError(f'Start should be an integer no less than 0, but {start!r} found.')
28
+ if stop is not None and (not isinstance(stop, int) or stop < 0):
29
+ raise ValueError(f'Stop should be an integer no less than 0, but {stop!r} found.')
30
+ if not isinstance(step, int) or step < 1:
31
+ raise ValueError(f'Step should be an integer no less than 1, but {step!r} found.')
32
+
33
+ return start, stop, step
34
+
35
+
36
+ class SliceSelectAction(BaseAction):
37
+ def __init__(self, *args):
38
+ if len(args) == 0:
39
+ slice_args = _slice_process(None, None, None)
40
+ elif len(args) == 1:
41
+ slice_args = _slice_process(None, args[0], None)
42
+ elif len(args) == 2:
43
+ slice_args = _slice_process(args[0], args[1], None)
44
+ elif len(args) == 3:
45
+ slice_args = _slice_process(args[0], args[1], args[2])
46
+ else:
47
+ raise ValueError(f'Arguments of {self.__class__.__name__} should no no more than 3, but {args!r} found.')
48
+
49
+ self._start, self._stop, self._step = slice_args
50
+ if self._stop is not None:
51
+ self._max = self._start + ((self._stop - self._start - 1) // self._step) * self._step
52
+ else:
53
+ self._max = None
54
+ self._current = 0
55
+
56
+ def _check_current(self):
57
+ if self._stop is not None and self._current >= self._stop:
58
+ return False
59
+ if self._current < self._start:
60
+ return False
61
+ return (self._current - self._start) % self._step == 0
62
+
63
+ def iter(self, item: ImageItem) -> Iterator[ImageItem]:
64
+ if self._current > self._max:
65
+ raise ActionStop
66
+ else:
67
+ if self._check_current():
68
+ yield item
69
+ self._current += 1
70
+
71
+ def reset(self):
72
+ self._current = 0
waifuc/action/filename.py ADDED
@@ -0,0 +1,46 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from typing import Iterator, Optional
3
+
4
+ from .base import BaseAction
5
+ from ..model import ImageItem
6
+
7
+
8
+ class FileExtAction(BaseAction):
9
+ def __init__(self, ext: str):
10
+ self.ext = ext
11
+ self.untitles = 0
12
+
13
+ def iter(self, item: ImageItem) -> Iterator[ImageItem]:
14
+ if 'filename' in item.meta:
15
+ filebody, _ = os.path.splitext(item.meta['filename'])
16
+ filename = f'{filebody}{self.ext}'
17
+ else:
18
+ self.untitles += 1
19
+ filename = f'untitled_{self.untitles}{self.ext}'
20
+
21
+ yield ImageItem(item.image, {**item.meta, 'filename': filename})
22
+
23
+ def reset(self):
24
+ self.untitles = 0
25
+
26
+
27
+ class FileOrderAction(BaseAction):
28
+ def __init__(self, ext: Optional[str] = '.png'):
29
+ self.ext = ext
30
+ self._current = 0
31
+
32
+ def iter(self, item: ImageItem) -> Iterator[ImageItem]:
33
+ self._current += 1
34
+ if 'filename' in item.meta:
35
+ _, ext = os.path.splitext(item.meta['filename'])
36
+ new_filename = f'{self._current}{self.ext or ext}'
37
+ else:
38
+ if not self.ext:
39
+ raise ValueError('No extension name provided for unnamed file.')
40
+ else:
41
+ new_filename = f'{self._current}{self.ext}'
42
+
43
+ yield ImageItem(item.image, {**item.meta, 'filename': new_filename})
44
+
45
+ def reset(self):
46
+ self._current = 0
waifuc/action/filter.py ADDED
@@ -0,0 +1,110 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import List, Optional, Literal
2
+
3
+ from imgutils.detect import detect_faces, detect_heads, detect_person
4
+ from imgutils.validate import is_monochrome, anime_classify, anime_rating
5
+
6
+ from .base import FilterAction
7
+ from ..model import ImageItem
8
+
9
+
10
+ class NoMonochromeAction(FilterAction):
11
+ def check(self, item: ImageItem) -> bool:
12
+ return not is_monochrome(item.image)
13
+
14
+
15
+ class OnlyMonochromeAction(FilterAction):
16
+ def check(self, item: ImageItem) -> bool:
17
+ return is_monochrome(item.image)
18
+
19
+
20
+ ImageClassTyping = Literal['illustration', 'bangumi', 'comic', '3d']
21
+
22
+
23
+ class ClassFilterAction(FilterAction):
24
+ def __init__(self, classes: List[ImageClassTyping], threshold: Optional[float] = None, **kwargs):
25
+ self.classes = classes
26
+ self.threshold = threshold
27
+ self.kwargs = kwargs
28
+
29
+ def check(self, item: ImageItem) -> bool:
30
+ cls, score = anime_classify(item.image, **self.kwargs)
31
+ return cls in self.classes and (self.threshold is None or score >= self.threshold)
32
+
33
+
34
+ ImageRatingTyping = Literal['safe', 'r15', 'r18']
35
+
36
+
37
+ class RatingFilterAction(FilterAction):
38
+ def __init__(self, ratings: List[ImageRatingTyping], threshold: Optional[float] = None, **kwargs):
39
+ self.ratings = ratings
40
+ self.threshold = threshold
41
+ self.kwargs = kwargs
42
+
43
+ def check(self, item: ImageItem) -> bool:
44
+ rating, score = anime_rating(item.image, **self.kwargs)
45
+ return rating in self.ratings and (self.threshold is None or score >= self.threshold)
46
+
47
+
48
+ class FaceCountAction(FilterAction):
49
+ def __init__(self, count: int, level: str = 's', version: str = 'v1.4',
50
+ conf_threshold: float = 0.25, iou_threshold: float = 0.7):
51
+ self.count = count
52
+ self.level = level
53
+ self.version = version
54
+ self.conf_threshold = conf_threshold
55
+ self.iou_threshold = iou_threshold
56
+
57
+ def check(self, item: ImageItem) -> bool:
58
+ detection = detect_faces(item.image, self.level, self.version,
59
+ conf_threshold=self.conf_threshold, iou_threshold=self.iou_threshold)
60
+ return len(detection) == self.count
61
+
62
+
63
+ class HeadCountAction(FilterAction):
64
+ def __init__(self, count: int, level: str = 's', conf_threshold: float = 0.3, iou_threshold: float = 0.7):
65
+ self.count = count
66
+ self.level = level
67
+ self.conf_threshold = conf_threshold
68
+ self.iou_threshold = iou_threshold
69
+
70
+ def check(self, item: ImageItem) -> bool:
71
+ detection = detect_heads(
72
+ item.image, self.level,
73
+ conf_threshold=self.conf_threshold,
74
+ iou_threshold=self.iou_threshold
75
+ )
76
+ return len(detection) == self.count
77
+
78
+
79
+ class PersonRatioAction(FilterAction):
80
+ def __init__(self, ratio: float = 0.4, level: str = 'm', version: str = 'v1.1',
81
+ conf_threshold: float = 0.3, iou_threshold: float = 0.5):
82
+ self.ratio = ratio
83
+ self.level = level
84
+ self.version = version
85
+ self.conf_threshold = conf_threshold
86
+ self.iou_threshold = iou_threshold
87
+
88
+ def check(self, item: ImageItem) -> bool:
89
+ detections = detect_person(item.image, self.level, self.version, 640, self.conf_threshold, self.iou_threshold)
90
+ if len(detections) != 1:
91
+ return False
92
+
93
+ (x0, y0, x1, y1), _, _ = detections[0]
94
+ return abs((x1 - x0) * (y1 - y0)) >= self.ratio * (item.image.width * item.image.height)
95
+
96
+
97
+ class MinSizeFilterAction(FilterAction):
98
+ def __init__(self, min_size: int):
99
+ self.min_size = min_size
100
+
101
+ def check(self, item: ImageItem) -> bool:
102
+ return min(item.image.width, item.image.height) >= self.min_size
103
+
104
+
105
+ class MinAreaFilterAction(FilterAction):
106
+ def __init__(self, min_size: int):
107
+ self.min_size = min_size
108
+
109
+ def check(self, item: ImageItem) -> bool:
110
+ return (item.image.width * item.image.height) ** 0.5 >= self.min_size
waifuc/action/lpips.py ADDED
@@ -0,0 +1,69 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import Dict, Iterator, Literal
2
+
3
+ import numpy as np
4
+ from imgutils.metrics import lpips_difference, lpips_extract_feature
5
+
6
+ from .base import BaseAction
7
+ from ..model import ImageItem
8
+
9
+
10
+ class FeatureBucket:
11
+ def __init__(self, threshold: float = 0.45, capacity: int = 500, rtol=1.e-5, atol=1.e-8):
12
+ self.threshold = threshold
13
+ self.rtol, self.atol = rtol, atol
14
+ self.features = []
15
+ self.ratios = np.array([], dtype=float)
16
+ self.capacity = capacity
17
+
18
+ def check_duplicate(self, feat, ratio: float):
19
+ for id_ in np.where(np.isclose(self.ratios, ratio, rtol=self.rtol, atol=self.atol))[0]:
20
+ exist_feat = self.features[id_.item()]
21
+ if lpips_difference(exist_feat, feat) <= self.threshold:
22
+ return True
23
+
24
+ return False
25
+
26
+ def add(self, feat, ratio: float):
27
+ self.features.append(feat)
28
+ self.ratios = np.append(self.ratios, ratio)
29
+ if len(self.features) >= self.capacity * 2:
30
+ self.features = self.features[-self.capacity:]
31
+ self.ratios = self.ratios[-self.capacity:]
32
+
33
+
34
+ FilterSimilarModeTyping = Literal['all', 'group']
35
+
36
+
37
+ class FilterSimilarAction(BaseAction):
38
+ def __init__(self, mode: FilterSimilarModeTyping = 'all', threshold: float = 0.45,
39
+ capacity: int = 500, rtol=5.e-2, atol=2.e-2):
40
+ self.mode = mode
41
+ self.threshold, self.rtol, self.atol = threshold, rtol, atol
42
+ self.capacity = capacity
43
+ self.buckets: Dict[str, FeatureBucket] = {}
44
+ self.global_bucket = FeatureBucket(threshold, self.capacity, rtol, atol)
45
+
46
+ def _get_bin(self, group_id):
47
+ if self.mode == 'all':
48
+ return self.global_bucket
49
+ elif self.mode == 'group':
50
+ if group_id not in self.buckets:
51
+ self.buckets[group_id] = FeatureBucket(self.threshold, self.capacity, self.rtol, self.atol)
52
+
53
+ return self.buckets[group_id]
54
+ else:
55
+ raise ValueError(f'Unknown mode for filter similar action - {self.mode!r}.')
56
+
57
+ def iter(self, item: ImageItem) -> Iterator[ImageItem]:
58
+ image = item.image
59
+ ratio = image.height * 1.0 / image.width
60
+ feat = lpips_extract_feature(image)
61
+ bucket = self._get_bin(item.meta.get('group_id'))
62
+
63
+ if not bucket.check_duplicate(feat, ratio):
64
+ bucket.add(feat, ratio)
65
+ yield item
66
+
67
+ def reset(self):
68
+ self.buckets.clear()
69
+ self.global_bucket = FeatureBucket(self.threshold, self.capacity, self.rtol, self.atol)
waifuc/action/split.py ADDED
@@ -0,0 +1,145 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from typing import Iterator, Optional
3
+
4
+ from imgutils.detect import detect_person, detect_heads, detect_halfbody, detect_eyes
5
+
6
+ from .base import BaseAction
7
+ from ..model import ImageItem
8
+
9
+
10
+ class PersonSplitAction(BaseAction):
11
+ def __init__(self, keep_original: bool = False, level: str = 'm', version: str = 'v1.1',
12
+ conf_threshold: float = 0.3, iou_threshold: float = 0.5, keep_origin_tags: bool = False):
13
+ self.keep_original = keep_original
14
+ self.level = level
15
+ self.version = version
16
+ self.conf_threshold = conf_threshold
17
+ self.iou_threshold = iou_threshold
18
+ self.keep_origin_tags = keep_origin_tags
19
+
20
+ def iter(self, item: ImageItem) -> Iterator[ImageItem]:
21
+ detection = detect_person(item.image, self.level, self.version,
22
+ conf_threshold=self.conf_threshold, iou_threshold=self.iou_threshold)
23
+
24
+ if 'filename' in item.meta:
25
+ filename = item.meta['filename']
26
+ filebody, ext = os.path.splitext(filename)
27
+ else:
28
+ filebody, ext = None, None
29
+
30
+ if self.keep_original:
31
+ yield item
32
+
33
+ for i, (area, type_, score) in enumerate(detection):
34
+ new_meta = {
35
+ **item.meta,
36
+ 'crop': {'type': type_, 'score': score},
37
+ }
38
+ if 'tags' in new_meta and not self.keep_origin_tags:
39
+ del new_meta['tags']
40
+ if filebody is not None:
41
+ new_meta['filename'] = f'{filebody}_person{i}{ext}'
42
+ yield ImageItem(item.image.crop(area), new_meta)
43
+
44
+ def reset(self):
45
+ pass
46
+
47
+
48
+ class ThreeStageSplitAction(BaseAction):
49
+ def __init__(self, person_conf: Optional[dict] = None, halfbody_conf: Optional[dict] = None,
50
+ head_conf: Optional[dict] = None, head_scale: float = 1.5,
51
+ split_eyes: bool = False, eye_conf: Optional[dict] = None, eye_scale: float = 2.4,
52
+ split_person: bool = True, keep_origin_tags: bool = False):
53
+ self.person_conf = dict(person_conf or {})
54
+ self.halfbody_conf = dict(halfbody_conf or {})
55
+ self.head_conf = dict(head_conf or {})
56
+ self.eye_conf = dict(eye_conf or {})
57
+ self.head_scale = head_scale
58
+ self.eye_scale = eye_scale
59
+ self.split_eyes = split_eyes
60
+ self.split_person = split_person
61
+ self.keep_origin_tags = keep_origin_tags
62
+
63
+ def _split_person(self, item: ImageItem, filebody, ext):
64
+ if self.split_person:
65
+ for i, (px, type_, score) in enumerate(detect_person(item.image, **self.person_conf), start=1):
66
+ person_image = item.image.crop(px)
67
+ person_meta = {
68
+ **item.meta,
69
+ 'crop': {'type': type_, 'score': score},
70
+ }
71
+ if 'tags' in person_meta and not self.keep_origin_tags:
72
+ del person_meta['tags']
73
+ if filebody is not None:
74
+ person_meta['filename'] = f'{filebody}_person{i}{ext}'
75
+ yield i, ImageItem(person_image, person_meta)
76
+
77
+ else:
78
+ yield 1, item
79
+
80
+ def iter(self, item: ImageItem) -> Iterator[ImageItem]:
81
+ if 'filename' in item.meta:
82
+ filename = item.meta['filename']
83
+ filebody, ext = os.path.splitext(filename)
84
+ else:
85
+ filebody, ext = None, None
86
+
87
+ for i, person_item in self._split_person(item, filebody, ext):
88
+ person_image = person_item.image
89
+ yield person_item
90
+
91
+ half_detects = detect_halfbody(person_image, **self.halfbody_conf)
92
+ if half_detects:
93
+ halfbody_area, halfbody_type, halfbody_score = half_detects[0]
94
+ halfbody_image = person_image.crop(halfbody_area)
95
+ halfbody_meta = {
96
+ **item.meta,
97
+ 'crop': {'type': halfbody_type, 'score': halfbody_score},
98
+ }
99
+ if 'tags' in halfbody_meta and not self.keep_origin_tags:
100
+ del halfbody_meta['tags']
101
+ if filebody is not None:
102
+ halfbody_meta['filename'] = f'{filebody}_person{i}_halfbody{ext}'
103
+ yield ImageItem(halfbody_image, halfbody_meta)
104
+
105
+ head_detects = detect_heads(person_image, **self.head_conf)
106
+ if head_detects:
107
+ (hx0, hy0, hx1, hy1), head_type, head_score = head_detects[0]
108
+ cx, cy = (hx0 + hx1) / 2, (hy0 + hy1) / 2
109
+ width, height = hx1 - hx0, hy1 - hy0
110
+ width = height = max(width, height) * self.head_scale
111
+ x0, y0 = int(max(cx - width / 2, 0)), int(max(cy - height / 2, 0))
112
+ x1, y1 = int(min(cx + width / 2, person_image.width)), int(min(cy + height / 2, person_image.height))
113
+ head_image = person_image.crop((x0, y0, x1, y1))
114
+ head_meta = {
115
+ **item.meta,
116
+ 'crop': {'type': head_type, 'score': head_score},
117
+ }
118
+ if 'tags' in head_meta and not self.keep_origin_tags:
119
+ del head_meta['tags']
120
+ if filebody is not None:
121
+ head_meta['filename'] = f'{filebody}_person{i}_head{ext}'
122
+ yield ImageItem(head_image, head_meta)
123
+
124
+ if self.split_eyes:
125
+ eye_detects = detect_eyes(head_image, **self.eye_conf)
126
+ for j, ((ex0, ey0, ex1, ey1), eye_type, eye_score) in enumerate(eye_detects):
127
+ cx, cy = (ex0 + ex1) / 2, (ey0 + ey1) / 2
128
+ width, height = ex1 - ex0, ey1 - ey0
129
+ width = height = max(width, height) * self.eye_scale
130
+ x0, y0 = int(max(cx - width / 2, 0)), int(max(cy - height / 2, 0))
131
+ x1, y1 = int(min(cx + width / 2, head_image.width)), \
132
+ int(min(cy + height / 2, head_image.height))
133
+ eye_image = head_image.crop((x0, y0, x1, y1))
134
+ eye_meta = {
135
+ **item.meta,
136
+ 'crop': {'type': eye_type, 'score': eye_score},
137
+ }
138
+ if 'tags' in eye_meta and not self.keep_origin_tags:
139
+ del eye_meta['tags']
140
+ if filebody is not None:
141
+ eye_meta['filename'] = f'{filebody}_person{i}_head_eye{j}{ext}'
142
+ yield ImageItem(eye_image, eye_meta)
143
+
144
+ def reset(self):
145
+ pass
waifuc/action/tagging.py ADDED
@@ -0,0 +1,83 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from functools import partial
2
+ from typing import Iterator, Union, List, Mapping, Literal
3
+
4
+ from PIL import Image
5
+ from imgutils.tagging import get_deepdanbooru_tags, get_wd14_tags, get_mldanbooru_tags
6
+
7
+ from .base import ProcessAction, BaseAction
8
+ from ..model import ImageItem
9
+
10
+
11
+ def _deepdanbooru_tagging(image: Image.Image, use_real_name: bool = False,
12
+ general_threshold: float = 0.5, character_threshold: float = 0.5, **kwargs):
13
+ _ = kwargs
14
+ _, features, characters = get_deepdanbooru_tags(image, use_real_name, general_threshold, character_threshold)
15
+ return {**features, **characters}
16
+
17
+
18
+ def _wd14_tagging(image: Image.Image, model_name: str,
19
+ general_threshold: float = 0.35, character_threshold: float = 0.85, **kwargs):
20
+ _ = kwargs
21
+ _, features, characters = get_wd14_tags(image, model_name, general_threshold, character_threshold)
22
+ return {**features, **characters}
23
+
24
+
25
+ def _mldanbooru_tagging(image: Image.Image, use_real_name: bool = False, general_threshold: float = 0.7, **kwargs):
26
+ _ = kwargs
27
+ features = get_mldanbooru_tags(image, use_real_name, general_threshold)
28
+ return features
29
+
30
+
31
+ _TAGGING_METHODS = {
32
+ 'deepdanbooru': _deepdanbooru_tagging,
33
+ 'wd14_vit': partial(_wd14_tagging, model_name='ViT'),
34
+ 'wd14_convnext': partial(_wd14_tagging, model_name='ConvNext'),
35
+ 'wd14_convnextv2': partial(_wd14_tagging, model_name='ConvNextV2'),
36
+ 'wd14_swinv2': partial(_wd14_tagging, model_name='SwinV2'),
37
+ 'mldanbooru': _mldanbooru_tagging,
38
+ }
39
+
40
+ TaggingMethodTyping = Literal[
41
+ 'deepdanbooru', 'wd14_vit', 'wd14_convnext', 'wd14_convnextv2', 'wd14_swinv2', 'mldanbooru']
42
+
43
+
44
+ class TaggingAction(ProcessAction):
45
+ def __init__(self, method: TaggingMethodTyping = 'wd14_convnextv2', force: bool = False, **kwargs):
46
+ self.method = _TAGGING_METHODS[method]
47
+ self.force = force
48
+ self.kwargs = kwargs
49
+
50
+ def process(self, item: ImageItem) -> ImageItem:
51
+ if 'tags' in item.meta and not self.force:
52
+ return item
53
+ else:
54
+ tags = self.method(image=item.image, **self.kwargs)
55
+ return ImageItem(item.image, {**item.meta, 'tags': tags})
56
+
57
+
58
+ class TagFilterAction(BaseAction):
59
+ def __init__(self, tags: Union[List[str], Mapping[str, float]],
60
+ method: TaggingMethodTyping = 'wd14_convnextv2', **kwargs):
61
+ if isinstance(tags, (list, tuple)):
62
+ self.tags = {tag: 1e-6 for tag in tags}
63
+ elif isinstance(tags, dict):
64
+ self.tags = dict(tags)
65
+ else:
66
+ raise TypeError(f'Unknown type of tags - {tags!r}.')
67
+ self.tagger = TaggingAction(method, force=False, **kwargs)
68
+
69
+ def iter(self, item: ImageItem) -> Iterator[ImageItem]:
70
+ item = self.tagger(item)
71
+ tags = item.meta['tags']
72
+
73
+ valid = True
74
+ for tag, min_score in self.tags.items():
75
+ if tags[tag] < min_score:
76
+ valid = False
77
+ break
78
+
79
+ if valid:
80
+ yield item
81
+
82
+ def reset(self):
83
+ self.tagger.reset()
waifuc/config/__init__.py ADDED
File without changes
waifuc/config/__pycache__/__init__.cpython-310.pyc ADDED
Binary file (138 Bytes). View file
 
waifuc/config/__pycache__/meta.cpython-310.pyc ADDED
Binary file (395 Bytes). View file
 
waifuc/config/meta.py ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Overview:
3
+ Meta information for waifuc package.
4
+ """
5
+
6
+ #: Title of this project (should be `waifuc`).
7
+ __TITLE__ = 'waifuc'
8
+
9
+ #: Version of this project.
10
+ __VERSION__ = '0.0.1'
11
+
12
+ #: Short description of the project, will be included in ``setup.py``.
13
+ __DESCRIPTION__ = 'Efficient Train Data Collector for Anime Waifu'
14
+
15
+ #: Author of this project.
16
+ __AUTHOR__ = 'narugo1992'
17
+
18
+ #: Email of the authors'.
19
+ __AUTHOR_EMAIL__ = '[email protected]'
waifuc/export/__init__.py ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ from .base import BaseExporter, SaveExporter, LocalDirectoryExporter
2
+ from .huggingface import HuggingFaceExporter
3
+ from .textual_inversion import TextualInversionExporter
waifuc/export/__pycache__/__init__.cpython-310.pyc ADDED
Binary file (356 Bytes). View file
 
waifuc/export/__pycache__/base.cpython-310.pyc ADDED
Binary file (3.32 kB). View file
 
waifuc/export/__pycache__/huggingface.cpython-310.pyc ADDED
Binary file (2.64 kB). View file
 
waifuc/export/__pycache__/textual_inversion.cpython-310.pyc ADDED
Binary file (1.81 kB). View file
 
waifuc/export/base.py ADDED
@@ -0,0 +1,79 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os.path
2
+ from typing import Iterator
3
+
4
+ from hbutils.system import remove
5
+ from tqdm.auto import tqdm
6
+
7
+ from ..model import ImageItem
8
+ from ..utils import get_task_names
9
+
10
+
11
+ class BaseExporter:
12
+ def pre_export(self):
13
+ raise NotImplementedError # pragma: no cover
14
+
15
+ def export_item(self, item: ImageItem):
16
+ raise NotImplementedError # pragma: no cover
17
+
18
+ def post_export(self):
19
+ raise NotImplementedError # pragma: no cover
20
+
21
+ def export_from(self, items: Iterator[ImageItem]):
22
+ self.pre_export()
23
+ names = get_task_names()
24
+ if names:
25
+ desc = f'{self.__class__.__name__} - {".".join(names)}'
26
+ else:
27
+ desc = f'{self.__class__.__name__}'
28
+ for item in tqdm(items, desc=desc):
29
+ self.export_item(item)
30
+ self.post_export()
31
+
32
+ def reset(self):
33
+ raise NotImplementedError # pragma: no cover
34
+
35
+
36
+ class LocalDirectoryExporter(BaseExporter):
37
+ def __init__(self, output_dir, clear: bool = False):
38
+ self.output_dir = output_dir
39
+ self.clear = clear
40
+
41
+ def pre_export(self):
42
+ if self.clear and os.path.exists(self.output_dir):
43
+ remove(self.output_dir)
44
+
45
+ os.makedirs(self.output_dir, exist_ok=True)
46
+
47
+ def export_item(self, item: ImageItem):
48
+ raise NotImplementedError # pragma: no cover
49
+
50
+ def post_export(self):
51
+ pass
52
+
53
+ def reset(self):
54
+ raise NotImplementedError # pragma: no cover
55
+
56
+
57
+ class SaveExporter(LocalDirectoryExporter):
58
+ def __init__(self, output_dir, clear: bool = False, no_meta: bool = False,
59
+ skip_when_image_exist: bool = False):
60
+ LocalDirectoryExporter.__init__(self, output_dir, clear)
61
+ self.no_meta = no_meta
62
+ self.untitles = 0
63
+ self.skip_when_image_exist = skip_when_image_exist
64
+
65
+ def export_item(self, item: ImageItem):
66
+ if 'filename' in item.meta:
67
+ filename = item.meta['filename']
68
+ else:
69
+ self.untitles += 1
70
+ filename = f'untited_{self.untitles}.png'
71
+
72
+ full_filename = os.path.join(self.output_dir, filename)
73
+ full_directory = os.path.dirname(full_filename)
74
+ if full_directory:
75
+ os.makedirs(full_directory, exist_ok=True)
76
+ item.save(full_filename, no_meta=self.no_meta, skip_when_image_exist=self.skip_when_image_exist)
77
+
78
+ def reset(self):
79
+ self.untitles = 0
waifuc/export/huggingface.py ADDED
@@ -0,0 +1,64 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import zipfile
3
+ from typing import Type, Optional, Mapping, Any
4
+
5
+ from hbutils.system import TemporaryDirectory
6
+ from huggingface_hub import HfApi
7
+
8
+ from .base import LocalDirectoryExporter, BaseExporter
9
+ from ..model import ImageItem
10
+
11
+
12
+ class HuggingFaceExporter(BaseExporter):
13
+ def __init__(self, repository: str, file_in_repo: str,
14
+ cls: Type[LocalDirectoryExporter], args: tuple = (), kwargs: Optional[Mapping[str, Any]] = None,
15
+ repo_type: str = 'dataset', revision: str = 'main', hf_token: Optional[str] = None):
16
+ self.repository = repository
17
+ self.repo_type, self.revision = repo_type, revision
18
+ self.file_in_repo = file_in_repo
19
+ self.cls, self.args, self.kwargs = (cls, args, kwargs or {})
20
+ self._tempdir: Optional[TemporaryDirectory] = None
21
+ self._exporter: Optional[LocalDirectoryExporter] = None
22
+ self.hf_token = hf_token or os.environ.get('HF_TOKEN')
23
+
24
+ def pre_export(self):
25
+ self._tempdir = TemporaryDirectory()
26
+ self._exporter = self.cls(self._tempdir.name, *self.args, **self.kwargs)
27
+ self._exporter.pre_export()
28
+
29
+ def export_item(self, item: ImageItem):
30
+ self._exporter.export_item(item)
31
+
32
+ def post_export(self):
33
+ self._exporter.post_export()
34
+
35
+ # upload to huggingface
36
+ hf_api = HfApi(token=self.hf_token)
37
+ hf_api.create_repo(self.repository, repo_type=self.repo_type, exist_ok=True)
38
+ with TemporaryDirectory() as td:
39
+ zip_file = os.path.join(td, 'package.zip')
40
+ with zipfile.ZipFile(zip_file, mode='w') as zf:
41
+ for directory, _, files in os.walk(self._tempdir.name):
42
+ for file in files:
43
+ file_path = os.path.join(directory, file)
44
+ rel_file_path = os.path.relpath(file_path, self._tempdir.name)
45
+ zf.write(
46
+ file_path,
47
+ '/'.join(rel_file_path.split(os.sep))
48
+ )
49
+
50
+ hf_api.upload_file(
51
+ path_or_fileobj=zip_file,
52
+ repo_id=self.repository,
53
+ repo_type=self.repo_type,
54
+ path_in_repo=self.file_in_repo,
55
+ revision=self.revision,
56
+ commit_message=f'Upload {self.file_in_repo} with waifuc'
57
+ )
58
+
59
+ self._exporter = None
60
+ self._tempdir.cleanup()
61
+ self._tempdir = None
62
+
63
+ def reset(self):
64
+ pass
waifuc/export/textual_inversion.py ADDED
@@ -0,0 +1,43 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+
3
+ from imgutils.tagging import tags_to_text
4
+
5
+ from .base import LocalDirectoryExporter
6
+ from ..model import ImageItem
7
+
8
+
9
+ class TextualInversionExporter(LocalDirectoryExporter):
10
+ def __init__(self, output_dir: str, clear: bool = False,
11
+ use_spaces: bool = False, use_escape: bool = True,
12
+ include_score: bool = False, score_descend: bool = True,
13
+ skip_when_image_exist: bool = False):
14
+ LocalDirectoryExporter.__init__(self, output_dir, clear)
15
+ self.use_spaces = use_spaces
16
+ self.use_escape = use_escape
17
+ self.include_score = include_score
18
+ self.score_descend = score_descend
19
+ self.untitles = 0
20
+ self.skip_when_image_exist = skip_when_image_exist
21
+
22
+ def export_item(self, item: ImageItem):
23
+ if 'filename' in item.meta:
24
+ filename = item.meta['filename']
25
+ else:
26
+ self.untitles += 1
27
+ filename = f'untited_{self.untitles}.png'
28
+
29
+ tags = item.meta.get('tags', None) or {}
30
+
31
+ full_filename = os.path.join(self.output_dir, filename)
32
+ full_tagname = os.path.join(self.output_dir, os.path.splitext(filename)[0] + '.txt')
33
+ full_directory = os.path.dirname(full_filename)
34
+ if full_directory:
35
+ os.makedirs(full_directory, exist_ok=True)
36
+
37
+ if not self.skip_when_image_exist or not os.path.exists(full_filename):
38
+ item.image.save(full_filename)
39
+ with open(full_tagname, 'w', encoding='utf-8') as f:
40
+ f.write(tags_to_text(tags, self.use_spaces, self.use_escape, self.include_score, self.score_descend))
41
+
42
+ def reset(self):
43
+ self.untitles = 0
waifuc/model/__init__.py ADDED
@@ -0,0 +1 @@
 
 
1
+ from .item import load_meta, dump_meta, ImageItem
waifuc/model/__pycache__/__init__.cpython-310.pyc ADDED
Binary file (215 Bytes). View file
 
waifuc/model/__pycache__/item.cpython-310.pyc ADDED
Binary file (4.02 kB). View file
 
waifuc/model/item.py ADDED
@@ -0,0 +1,98 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ import os.path
3
+ import pickle
4
+ from dataclasses import dataclass
5
+ from typing import Optional
6
+
7
+ from PIL import Image
8
+ from hbutils.encoding import base64_decode, base64_encode
9
+ from hbutils.reflection import quick_import_object
10
+
11
+ NoneType = type(None)
12
+
13
+ _TYPE_META = '__type'
14
+ _BASE64_META = 'base64'
15
+
16
+
17
+ def load_meta(data, path=()):
18
+ if isinstance(data, (int, float, str, NoneType)):
19
+ return data
20
+ elif isinstance(data, list):
21
+ return [load_meta(item, (*path, i)) for i, item in enumerate(data)]
22
+ elif isinstance(data, dict):
23
+ if _TYPE_META not in data:
24
+ return {key: load_meta(value, (*path, key)) for key, value in data.items()}
25
+ else:
26
+ cls, _, _ = quick_import_object(data[_TYPE_META])
27
+ binary = base64_decode(data[_BASE64_META])
28
+ obj = pickle.loads(binary)
29
+ if isinstance(obj, cls):
30
+ return obj
31
+ else:
32
+ raise TypeError(f'{cls!r} expected but {obj!r} found at {path!r}.')
33
+ else:
34
+ raise TypeError(f'Unknown type {data!r} at {path!r}.')
35
+
36
+
37
+ def dump_meta(data, path=()):
38
+ if isinstance(data, (int, float, str, NoneType)):
39
+ return data
40
+ elif isinstance(data, list):
41
+ return [dump_meta(item, (*path, i)) for i, item in enumerate(data)]
42
+ elif isinstance(data, dict):
43
+ return {key: dump_meta(value, (*path, key)) for key, value in data.items()}
44
+ else:
45
+ cls = type(data)
46
+ type_str = f'{cls.__module__}.{cls.__name__}' if hasattr(cls, '__module__') else cls.__name__
47
+ base64_str = base64_encode(pickle.dumps(data))
48
+ return {
49
+ _TYPE_META: type_str,
50
+ _BASE64_META: base64_str
51
+ }
52
+
53
+
54
+ @dataclass
55
+ class ImageItem:
56
+ image: Image.Image
57
+ meta: dict
58
+
59
+ def __init__(self, image: Image.Image, meta: Optional[dict] = None):
60
+ self.image = image
61
+ self.meta = meta or {}
62
+
63
+ @classmethod
64
+ def _image_file_to_meta_file(cls, image_file):
65
+ directory, filename = os.path.split(image_file)
66
+ filebody, _ = os.path.splitext(filename)
67
+ meta_file = os.path.join(directory, f'.{filebody}_meta.json')
68
+ return meta_file
69
+
70
+ @classmethod
71
+ def load_from_image(cls, image_file):
72
+ image = Image.open(image_file)
73
+ meta_file = cls._image_file_to_meta_file(image_file)
74
+
75
+ if os.path.exists(meta_file):
76
+ with open(meta_file, 'r', encoding='utf-8') as f:
77
+ meta = load_meta(json.load(f))
78
+ else:
79
+ meta = {}
80
+
81
+ return cls(image, meta)
82
+
83
+ def save(self, image_file, no_meta: bool = False, skip_when_image_exist: bool = False):
84
+ if not skip_when_image_exist or not os.path.exists(image_file):
85
+ self.image.save(image_file)
86
+ if not no_meta and self.meta:
87
+ meta_file = self._image_file_to_meta_file(image_file)
88
+ with open(meta_file, 'w', encoding='utf-8') as f:
89
+ json.dump(dump_meta(self.meta), f)
90
+
91
+ def __repr__(self):
92
+ values = {'size': self.image.size}
93
+ for key, value in self.meta.items():
94
+ if isinstance(value, (int, float, str)):
95
+ values[key] = value
96
+
97
+ content = ', '.join(f'{key}: {values[key]!r}' for key in sorted(values.keys()))
98
+ return f'<{self.__class__.__name__} {content}>'
waifuc/source/__init__.py ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from .anime_pictures import AnimePicturesSource
2
+ from .base import BaseDataSource, EmptySource
3
+ from .compose import ParallelDataSource, ComposedDataSource
4
+ from .danbooru import DanbooruSource, SafebooruSource, ATFBooruSource, E621LikeSource, E621Source, E926Source
5
+ from .derpibooru import DerpibooruLikeSource, DerpibooruSource, FurbooruSource
6
+ from .duitang import DuitangSource
7
+ from .gchar import GcharAutoSource
8
+ from .huashi6 import Huashi6Source
9
+ from .konachan import KonachanLikeSource, YandeSource, KonachanSource, KonachanNetSource, LolibooruSource, \
10
+ Rule34LikeSource, Rule34Source, HypnoHubSource, GelbooruSource, XbooruLikeSource, XbooruSource, \
11
+ SafebooruOrgSource, TBIBSource
12
+ from .local import LocalSource, LocalTISource
13
+ from .paheal import PahealSource
14
+ from .pixiv import BasePixivSource, PixivSearchSource, PixivUserSource, PixivRankingSource
15
+ from .sankaku import SankakuSource, PostOrder, Rating, FileType
16
+ from .video import VideoSource
17
+ from .wallhaven import WallHavenSource
18
+ from .web import WebDataSource
19
+ from .zerochan import ZerochanSource
waifuc/source/__pycache__/__init__.cpython-310.pyc ADDED
Binary file (1.61 kB). View file
 
waifuc/source/__pycache__/anime_pictures.cpython-310.pyc ADDED
Binary file (4.1 kB). View file
 
waifuc/source/__pycache__/base.cpython-310.pyc ADDED
Binary file (3.82 kB). View file
 
waifuc/source/__pycache__/compose.cpython-310.pyc ADDED
Binary file (2.02 kB). View file
 
waifuc/source/__pycache__/danbooru.cpython-310.pyc ADDED
Binary file (6.36 kB). View file