AppleJupyter-test

Runtime error

+from .align import AlignMaxSizeAction, AlignMinSizeAction, PaddingAlignAction
+from .augument import RandomFilenameAction, RandomChoiceAction, BaseRandomAction, MirrorAction
+from .background import BackgroundRemovalAction
+from .base import BaseAction, ProcessAction, FilterAction, ActionStop
+from .basic import ModeConvertAction
+from .ccip import CCIPAction
+from .count import SliceSelectAction, FirstNSelectAction
+from .filename import FileExtAction, FileOrderAction
+from .filter import NoMonochromeAction, OnlyMonochromeAction, ClassFilterAction, RatingFilterAction, FaceCountAction, \
+ HeadCountAction, PersonRatioAction, MinSizeFilterAction, MinAreaFilterAction
+from .lpips import FilterSimilarAction
+from .split import PersonSplitAction, ThreeStageSplitAction
+from .tagging import TaggingAction, TagFilterAction

waifuc/action/__pycache__/__init__.cpython-310.pyc ADDED Viewed

Binary file (1.25 kB). View file

waifuc/action/__pycache__/align.cpython-310.pyc ADDED Viewed

Binary file (2.29 kB). View file

waifuc/action/__pycache__/augument.cpython-310.pyc ADDED Viewed

Binary file (3.33 kB). View file

waifuc/action/__pycache__/background.cpython-310.pyc ADDED Viewed

Binary file (688 Bytes). View file

waifuc/action/__pycache__/base.cpython-310.pyc ADDED Viewed

Binary file (2.43 kB). View file

waifuc/action/__pycache__/basic.cpython-310.pyc ADDED Viewed

Binary file (934 Bytes). View file

waifuc/action/__pycache__/ccip.cpython-310.pyc ADDED Viewed

Binary file (5.22 kB). View file

waifuc/action/__pycache__/count.cpython-310.pyc ADDED Viewed

Binary file (2.72 kB). View file

waifuc/action/__pycache__/filename.cpython-310.pyc ADDED Viewed

Binary file (1.99 kB). View file

waifuc/action/__pycache__/filter.cpython-310.pyc ADDED Viewed

Binary file (5.37 kB). View file

waifuc/action/__pycache__/lpips.cpython-310.pyc ADDED Viewed

Binary file (3.01 kB). View file

waifuc/action/__pycache__/split.cpython-310.pyc ADDED Viewed

Binary file (4.77 kB). View file

waifuc/action/__pycache__/tagging.cpython-310.pyc ADDED Viewed

Binary file (3.4 kB). View file

waifuc/action/align.py ADDED Viewed

	@@ -0,0 +1,51 @@

+from typing import Tuple
+from PIL import Image
+from imgutils.data import load_image
+from .base import ProcessAction
+from ..model import ImageItem
+class AlignMaxSizeAction(ProcessAction):
+ def __init__(self, max_size: int):
+ self._max_size = max_size
+ def process(self, item: ImageItem) -> ImageItem:
+ image = item.image
+ ms = max(image.width, image.height)
+ if ms > self._max_size:
+ r = ms / self._max_size
+ image = image.resize((int(image.width / r), int(image.height / r)))
+ return ImageItem(image, item.meta)
+class AlignMinSizeAction(ProcessAction):
+ def __init__(self, min_size: int):
+ self._min_size = min_size
+ def process(self, item: ImageItem) -> ImageItem:
+ image = item.image
+ ms = min(image.width, image.height)
+ if ms > self._min_size:
+ r = ms / self._min_size
+ image = image.resize((int(image.width / r), int(image.height / r)))
+ return ImageItem(image, item.meta)
+class PaddingAlignAction(ProcessAction):
+ def __init__(self, size: Tuple[int, int], color: str = 'white'):
+ self.width, self.height = size
+ self.color = color
+ def process(self, item: ImageItem) -> ImageItem:
+ image = load_image(item.image, force_background=None, mode='RGBA')
+ r = min(self.width / image.width, self.height / image.height)
+ resized = image.resize((int(image.width * r), int(image.height * r)))
+ new_image = Image.new('RGBA', (self.width, self.height), self.color)
+ left, top = int((new_image.width - resized.width) // 2), int((new_image.height - resized.height) // 2)
+ new_image.paste(resized, (left, top, left + resized.width, top + resized.height), resized)
+ return ImageItem(new_image.convert(item.image.mode), item.meta)

waifuc/action/augument.py ADDED Viewed

	@@ -0,0 +1,68 @@

+import os.path
+import random
+from typing import Iterator, Optional, Tuple
+from PIL import ImageOps
+from hbutils.random import random_sha1
+from .base import BaseAction
+from ..model import ImageItem
+class BaseRandomAction(BaseAction):
+ def __init__(self, seed=None):
+ self.seed = seed
+ self.random = random.Random(self.seed)
+ def iter(self, item: ImageItem) -> Iterator[ImageItem]:
+ raise NotImplementedError # pragma: no cover
+ def reset(self):
+ self.random = random.Random(self.seed)
+class RandomChoiceAction(BaseRandomAction):
+ def __init__(self, p=0.5, seed=None):
+ BaseRandomAction.__init__(self, seed)
+ self.p = p
+ def iter(self, item: ImageItem) -> Iterator[ImageItem]:
+ if self.random.random() <= self.p:
+ yield item
+class RandomFilenameAction(BaseRandomAction):
+ def __init__(self, ext: Optional[str] = '.png', seed=None):
+ BaseRandomAction.__init__(self, seed)
+ self.ext = ext
+ def iter(self, item: ImageItem) -> Iterator[ImageItem]:
+ if 'filename' in item.meta:
+ ext = self.ext or os.path.splitext(os.path.basename(item.meta['filename']))[0]
+ else:
+ if self.ext:
+ ext = self.ext
+ else:
+ raise NameError(f'Extension (ext) must be specified '
+ f'when filename not in metadata of image item - {item!r}.')
+ filename = random_sha1(rnd=self.random) + ext
+ yield ImageItem(item.image, {**item.meta, 'filename': filename})
+class MirrorAction(BaseAction):
+ def __init__(self, names: Tuple[str, str] = ('origin', 'mirror')):
+ self.origin_name, self.mirror_name = names
+ def iter(self, item: ImageItem) -> Iterator[ImageItem]:
+ if 'filename' in item.meta:
+ filebody, ext = os.path.splitext(item.meta['filename'])
+ yield ImageItem(item.image, {**item.meta, 'filename': f'{filebody}_{self.origin_name}{ext}'})
+ yield ImageItem(ImageOps.mirror(item.image),
+ {**item.meta, 'filename': f'{filebody}_{self.mirror_name}{ext}'})
+ else:
+ yield ImageItem(item.image, item.meta)
+ yield ImageItem(ImageOps.mirror(item.image), item.meta)
+ def reset(self):
+ pass

waifuc/action/background.py ADDED Viewed

	@@ -0,0 +1,10 @@

+from imgutils.segment import segment_rgba_with_isnetis
+from .base import ProcessAction
+from ..model import ImageItem
+class BackgroundRemovalAction(ProcessAction):
+ def process(self, item: ImageItem) -> ImageItem:
+ _, image = segment_rgba_with_isnetis(item.image)
+ return ImageItem(image, item.meta)

waifuc/action/base.py ADDED Viewed

	@@ -0,0 +1,51 @@

+from typing import Iterator, Iterable
+from ..model import ImageItem
+class ActionStop(Exception):
+ pass
+class BaseAction:
+ def iter(self, item: ImageItem) -> Iterator[ImageItem]:
+ raise NotImplementedError # pragma: no cover
+ def iter_from(self, iter_: Iterable[ImageItem]) -> Iterator[ImageItem]:
+ for item in iter_:
+ try:
+ yield from self.iter(item)
+ except ActionStop:
+ break
+ def reset(self):
+ raise NotImplementedError # pragma: no cover
+class ProcessAction(BaseAction):
+ def process(self, item: ImageItem) -> ImageItem:
+ raise NotImplementedError # pragma: no cover
+ def iter(self, item: ImageItem) -> Iterator[ImageItem]:
+ yield self.process(item)
+ def reset(self):
+ pass
+ def __call__(self, item: ImageItem) -> ImageItem:
+ return self.process(item)
+class FilterAction(BaseAction):
+ def check(self, item: ImageItem) -> bool:
+ raise NotImplementedError # pragma: no cover
+ def iter(self, item: ImageItem) -> Iterator[ImageItem]:
+ if self.check(item):
+ yield item
+ def reset(self):
+ pass
+ def __call__(self, item: ImageItem) -> bool:
+ return self.check(item)

waifuc/action/basic.py ADDED Viewed

	@@ -0,0 +1,16 @@

+from typing import Optional
+from imgutils.data import load_image
+from .base import ProcessAction
+from ..model import ImageItem
+class ModeConvertAction(ProcessAction):
+ def __init__(self, mode='RGB', force_background: Optional[str] = 'white'):
+ self.mode = mode
+ self.force_background = force_background
+ def process(self, item: ImageItem) -> ImageItem:
+ image = load_image(item.image, mode=self.mode, force_background=self.force_background)
+ return ImageItem(image, item.meta)

waifuc/action/ccip.py ADDED Viewed

	@@ -0,0 +1,151 @@

+import logging
+from enum import IntEnum
+from typing import Iterator, Optional, List, Tuple
+import numpy as np
+from hbutils.string import plural_word
+from hbutils.testing import disable_output
+from imgutils.metrics import ccip_extract_feature, ccip_default_threshold, ccip_clustering, ccip_batch_differences
+from .base import BaseAction
+from ..model import ImageItem
+class CCIPStatus(IntEnum):
+ INIT = 0x1
+ APPROACH = 0x2
+ EVAL = 0x3
+ INIT_WITH_SOURCE = 0x4
+class CCIPAction(BaseAction):
+ def __init__(self, init_source=None, *, min_val_count: int = 15, step: int = 5,
+ ratio_threshold: float = 0.6, min_clu_dump_ratio: float = 0.3, cmp_threshold: float = 0.5,
+ eps: Optional[float] = None, min_samples: Optional[int] = None,
+ model='ccip-caformer-24-randaug-pruned', threshold: Optional[float] = None):
+ self.init_source = init_source
+ self.min_val_count = min_val_count
+ self.step = step
+ self.ratio_threshold = ratio_threshold
+ self.min_clu_dump_ratio = min_clu_dump_ratio
+ self.cmp_threshold = cmp_threshold
+ self.eps, self.min_samples = eps, min_samples
+ self.model = model
+ self.threshold = threshold or ccip_default_threshold(self.model)
+ self.items = []
+ self.item_released = []
+ self.feats = []
+ if self.init_source is not None:
+ self.status = CCIPStatus.INIT_WITH_SOURCE
+ else:
+ self.status = CCIPStatus.INIT
+ def _extract_feature(self, item: ImageItem):
+ if 'ccip_feature' in item.meta:
+ return item.meta['ccip_feature']
+ else:
+ return ccip_extract_feature(item.image, model=self.model)
+ def _try_cluster(self) -> bool:
+ with disable_output():
+ clu_ids = ccip_clustering(self.feats, method='optics', model=self.model,
+ eps=self.eps, min_samples=self.min_samples)
+ clu_counts = {}
+ for id_ in clu_ids:
+ if id_ != -1:
+ clu_counts[id_] = clu_counts.get(id_, 0) + 1
+ clu_total = sum(clu_counts.values()) if clu_counts else 0
+ chosen_id = None
+ for id_, count in clu_counts.items():
+ if count >= clu_total * self.ratio_threshold:
+ chosen_id = id_
+ break
+ if chosen_id is not None:
+ feats = [feat for i, feat in enumerate(self.feats) if clu_ids[i] == chosen_id]
+ clu_dump_ratio = np.array([
+ self._compare_to_exists(feat, base_set=feats)
+ for feat in feats
+ ]).astype(float).mean()
+ if clu_dump_ratio >= self.min_clu_dump_ratio:
+ self.items = [item for i, item in enumerate(self.items) if clu_ids[i] == chosen_id]
+ self.item_released = [False] * len(self.items)
+ self.feats = [feat for i, feat in enumerate(self.feats) if clu_ids[i] == chosen_id]
+ return True
+ else:
+ return False
+ else:
+ return False
+ def _compare_to_exists(self, feat, base_set=None) -> Tuple[bool, List[int]]:
+ diffs = ccip_batch_differences([feat, *(base_set or self.feats)], model=self.model)[0, 1:]
+ matches = diffs <= self.threshold
+ return matches.astype(float).mean() >= self.cmp_threshold
+ def _dump_items(self) -> Iterator[ImageItem]:
+ for i in range(len(self.items)):
+ if not self.item_released[i]:
+ if self._compare_to_exists(self.feats[i]):
+ self.item_released[i] = True
+ yield self.items[i]
+ def _eval_iter(self, item: ImageItem) -> Iterator[ImageItem]:
+ feat = self._extract_feature(item)
+ if self._compare_to_exists(feat):
+ self.feats.append(feat)
+ yield item
+ if (len(self.feats) - len(self.items)) % self.step == 0:
+ yield from self._dump_items()
+ def iter(self, item: ImageItem) -> Iterator[ImageItem]:
+ if self.status == CCIPStatus.INIT_WITH_SOURCE:
+ cnt = 0
+ logging.info('Existing anchor detected.')
+ for item_ in self.init_source:
+ self.feats.append(self._extract_feature(item_))
+ yield item_
+ cnt += 1
+ logging.info(f'{plural_word(cnt, "items")} loaded from anchor.')
+ self.status = CCIPStatus.EVAL
+ yield from self._eval_iter(item)
+ elif self.status == CCIPStatus.INIT:
+ self.items.append(item)
+ self.feats.append(self._extract_feature(item))
+ if len(self.items) >= self.min_val_count:
+ if self._try_cluster():
+ self.status = CCIPStatus.EVAL
+ yield from self._dump_items()
+ else:
+ self.status = CCIPStatus.APPROACH
+ elif self.status == CCIPStatus.APPROACH:
+ self.items.append(item)
+ self.feats.append(self._extract_feature(item))
+ if (len(self.items) - self.min_val_count) % self.step == 0:
+ if self._try_cluster():
+ self.status = CCIPStatus.EVAL
+ yield from self._dump_items()
+ elif self.status == CCIPStatus.EVAL:
+ yield from self._eval_iter(item)
+ else:
+ raise ValueError(f'Unknown status for {self.__class__.__name__} - {self.status!r}.')
+ def reset(self):
+ self.items.clear()
+ self.item_released.clear()
+ self.feats.clear()
+ if self.init_source:
+ self.status = CCIPStatus.INIT_WITH_SOURCE
+ else:
+ self.status = CCIPStatus.INIT

waifuc/action/count.py ADDED Viewed

	@@ -0,0 +1,72 @@

+from typing import Iterator
+from .base import BaseAction, ActionStop
+from ..model import ImageItem
+class FirstNSelectAction(BaseAction):
+ def __init__(self, n: int):
+ self._n = n
+ self._passed = 0
+ def iter(self, item: ImageItem) -> Iterator[ImageItem]:
+ if self._passed < self._n:
+ yield item
+ self._passed += 1
+ else:
+ raise ActionStop
+ def reset(self):
+ self._passed = 0
+def _slice_process(start, stop, step):
+ start = 0 if start is None else start
+ step = 1 if step is None else step
+ if not isinstance(start, int) or start < 0:
+ raise ValueError(f'Start should be an integer no less than 0, but {start!r} found.')
+ if stop is not None and (not isinstance(stop, int) or stop < 0):
+ raise ValueError(f'Stop should be an integer no less than 0, but {stop!r} found.')
+ if not isinstance(step, int) or step < 1:
+ raise ValueError(f'Step should be an integer no less than 1, but {step!r} found.')
+ return start, stop, step
+class SliceSelectAction(BaseAction):
+ def __init__(self, *args):
+ if len(args) == 0:
+ slice_args = _slice_process(None, None, None)
+ elif len(args) == 1:
+ slice_args = _slice_process(None, args[0], None)
+ elif len(args) == 2:
+ slice_args = _slice_process(args[0], args[1], None)
+ elif len(args) == 3:
+ slice_args = _slice_process(args[0], args[1], args[2])
+ else:
+ raise ValueError(f'Arguments of {self.__class__.__name__} should no no more than 3, but {args!r} found.')
+ self._start, self._stop, self._step = slice_args
+ if self._stop is not None:
+ self._max = self._start + ((self._stop - self._start - 1) // self._step) * self._step
+ else:
+ self._max = None
+ self._current = 0
+ def _check_current(self):
+ if self._stop is not None and self._current >= self._stop:
+ return False
+ if self._current < self._start:
+ return False
+ return (self._current - self._start) % self._step == 0
+ def iter(self, item: ImageItem) -> Iterator[ImageItem]:
+ if self._current > self._max:
+ raise ActionStop
+ else:
+ if self._check_current():
+ yield item
+ self._current += 1
+ def reset(self):
+ self._current = 0

waifuc/action/filename.py ADDED Viewed

	@@ -0,0 +1,46 @@

+import os
+from typing import Iterator, Optional
+from .base import BaseAction
+from ..model import ImageItem
+class FileExtAction(BaseAction):
+ def __init__(self, ext: str):
+ self.ext = ext
+ self.untitles = 0
+ def iter(self, item: ImageItem) -> Iterator[ImageItem]:
+ if 'filename' in item.meta:
+ filebody, _ = os.path.splitext(item.meta['filename'])
+ filename = f'{filebody}{self.ext}'
+ else:
+ self.untitles += 1
+ filename = f'untitled_{self.untitles}{self.ext}'
+ yield ImageItem(item.image, {**item.meta, 'filename': filename})
+ def reset(self):
+ self.untitles = 0
+class FileOrderAction(BaseAction):
+ def __init__(self, ext: Optional[str] = '.png'):
+ self.ext = ext
+ self._current = 0
+ def iter(self, item: ImageItem) -> Iterator[ImageItem]:
+ self._current += 1
+ if 'filename' in item.meta:
+ _, ext = os.path.splitext(item.meta['filename'])
+ new_filename = f'{self._current}{self.ext or ext}'
+ else:
+ if not self.ext:
+ raise ValueError('No extension name provided for unnamed file.')
+ else:
+ new_filename = f'{self._current}{self.ext}'
+ yield ImageItem(item.image, {**item.meta, 'filename': new_filename})
+ def reset(self):
+ self._current = 0

waifuc/action/filter.py ADDED Viewed

	@@ -0,0 +1,110 @@

+from typing import List, Optional, Literal
+from imgutils.detect import detect_faces, detect_heads, detect_person
+from imgutils.validate import is_monochrome, anime_classify, anime_rating
+from .base import FilterAction
+from ..model import ImageItem
+class NoMonochromeAction(FilterAction):
+ def check(self, item: ImageItem) -> bool:
+ return not is_monochrome(item.image)
+class OnlyMonochromeAction(FilterAction):
+ def check(self, item: ImageItem) -> bool:
+ return is_monochrome(item.image)
+ImageClassTyping = Literal['illustration', 'bangumi', 'comic', '3d']
+class ClassFilterAction(FilterAction):
+ def __init__(self, classes: List[ImageClassTyping], threshold: Optional[float] = None, **kwargs):
+ self.classes = classes
+ self.threshold = threshold
+ self.kwargs = kwargs
+ def check(self, item: ImageItem) -> bool:
+ cls, score = anime_classify(item.image, **self.kwargs)
+ return cls in self.classes and (self.threshold is None or score >= self.threshold)
+ImageRatingTyping = Literal['safe', 'r15', 'r18']
+class RatingFilterAction(FilterAction):
+ def __init__(self, ratings: List[ImageRatingTyping], threshold: Optional[float] = None, **kwargs):
+ self.ratings = ratings
+ self.threshold = threshold
+ self.kwargs = kwargs
+ def check(self, item: ImageItem) -> bool:
+ rating, score = anime_rating(item.image, **self.kwargs)
+ return rating in self.ratings and (self.threshold is None or score >= self.threshold)
+class FaceCountAction(FilterAction):
+ def __init__(self, count: int, level: str = 's', version: str = 'v1.4',
+ conf_threshold: float = 0.25, iou_threshold: float = 0.7):
+ self.count = count
+ self.level = level
+ self.version = version
+ self.conf_threshold = conf_threshold
+ self.iou_threshold = iou_threshold
+ def check(self, item: ImageItem) -> bool:
+ detection = detect_faces(item.image, self.level, self.version,
+ conf_threshold=self.conf_threshold, iou_threshold=self.iou_threshold)
+ return len(detection) == self.count
+class HeadCountAction(FilterAction):
+ def __init__(self, count: int, level: str = 's', conf_threshold: float = 0.3, iou_threshold: float = 0.7):
+ self.count = count
+ self.level = level
+ self.conf_threshold = conf_threshold
+ self.iou_threshold = iou_threshold
+ def check(self, item: ImageItem) -> bool:
+ detection = detect_heads(
+ item.image, self.level,
+ conf_threshold=self.conf_threshold,
+ iou_threshold=self.iou_threshold
+ )
+ return len(detection) == self.count
+class PersonRatioAction(FilterAction):
+ def __init__(self, ratio: float = 0.4, level: str = 'm', version: str = 'v1.1',
+ conf_threshold: float = 0.3, iou_threshold: float = 0.5):
+ self.ratio = ratio
+ self.level = level
+ self.version = version
+ self.conf_threshold = conf_threshold
+ self.iou_threshold = iou_threshold
+ def check(self, item: ImageItem) -> bool:
+ detections = detect_person(item.image, self.level, self.version, 640, self.conf_threshold, self.iou_threshold)
+ if len(detections) != 1:
+ return False
+ (x0, y0, x1, y1), _, _ = detections[0]
+ return abs((x1 - x0) * (y1 - y0)) >= self.ratio * (item.image.width * item.image.height)
+class MinSizeFilterAction(FilterAction):
+ def __init__(self, min_size: int):
+ self.min_size = min_size
+ def check(self, item: ImageItem) -> bool:
+ return min(item.image.width, item.image.height) >= self.min_size
+class MinAreaFilterAction(FilterAction):
+ def __init__(self, min_size: int):
+ self.min_size = min_size
+ def check(self, item: ImageItem) -> bool:
+ return (item.image.width * item.image.height) ** 0.5 >= self.min_size

waifuc/action/lpips.py ADDED Viewed

	@@ -0,0 +1,69 @@

+from typing import Dict, Iterator, Literal
+import numpy as np
+from imgutils.metrics import lpips_difference, lpips_extract_feature
+from .base import BaseAction
+from ..model import ImageItem
+class FeatureBucket:
+ def __init__(self, threshold: float = 0.45, capacity: int = 500, rtol=1.e-5, atol=1.e-8):
+ self.threshold = threshold
+ self.rtol, self.atol = rtol, atol
+ self.features = []
+ self.ratios = np.array([], dtype=float)
+ self.capacity = capacity
+ def check_duplicate(self, feat, ratio: float):
+ for id_ in np.where(np.isclose(self.ratios, ratio, rtol=self.rtol, atol=self.atol))[0]:
+ exist_feat = self.features[id_.item()]
+ if lpips_difference(exist_feat, feat) <= self.threshold:
+ return True
+ return False
+ def add(self, feat, ratio: float):
+ self.features.append(feat)
+ self.ratios = np.append(self.ratios, ratio)
+ if len(self.features) >= self.capacity * 2:
+ self.features = self.features[-self.capacity:]
+ self.ratios = self.ratios[-self.capacity:]
+FilterSimilarModeTyping = Literal['all', 'group']
+class FilterSimilarAction(BaseAction):
+ def __init__(self, mode: FilterSimilarModeTyping = 'all', threshold: float = 0.45,
+ capacity: int = 500, rtol=5.e-2, atol=2.e-2):
+ self.mode = mode
+ self.threshold, self.rtol, self.atol = threshold, rtol, atol
+ self.capacity = capacity
+ self.buckets: Dict[str, FeatureBucket] = {}
+ self.global_bucket = FeatureBucket(threshold, self.capacity, rtol, atol)
+ def _get_bin(self, group_id):
+ if self.mode == 'all':
+ return self.global_bucket
+ elif self.mode == 'group':
+ if group_id not in self.buckets:
+ self.buckets[group_id] = FeatureBucket(self.threshold, self.capacity, self.rtol, self.atol)
+ return self.buckets[group_id]
+ else:
+ raise ValueError(f'Unknown mode for filter similar action - {self.mode!r}.')
+ def iter(self, item: ImageItem) -> Iterator[ImageItem]:
+ image = item.image
+ ratio = image.height * 1.0 / image.width
+ feat = lpips_extract_feature(image)
+ bucket = self._get_bin(item.meta.get('group_id'))
+ if not bucket.check_duplicate(feat, ratio):
+ bucket.add(feat, ratio)
+ yield item
+ def reset(self):
+ self.buckets.clear()
+ self.global_bucket = FeatureBucket(self.threshold, self.capacity, self.rtol, self.atol)

waifuc/action/split.py ADDED Viewed

	@@ -0,0 +1,145 @@

+import os
+from typing import Iterator, Optional
+from imgutils.detect import detect_person, detect_heads, detect_halfbody, detect_eyes
+from .base import BaseAction
+from ..model import ImageItem
+class PersonSplitAction(BaseAction):
+ def __init__(self, keep_original: bool = False, level: str = 'm', version: str = 'v1.1',
+ conf_threshold: float = 0.3, iou_threshold: float = 0.5, keep_origin_tags: bool = False):
+ self.keep_original = keep_original
+ self.level = level
+ self.version = version
+ self.conf_threshold = conf_threshold
+ self.iou_threshold = iou_threshold
+ self.keep_origin_tags = keep_origin_tags
+ def iter(self, item: ImageItem) -> Iterator[ImageItem]:
+ detection = detect_person(item.image, self.level, self.version,
+ conf_threshold=self.conf_threshold, iou_threshold=self.iou_threshold)
+ if 'filename' in item.meta:
+ filename = item.meta['filename']
+ filebody, ext = os.path.splitext(filename)
+ else:
+ filebody, ext = None, None
+ if self.keep_original:
+ yield item
+ for i, (area, type_, score) in enumerate(detection):
+ new_meta = {
+ **item.meta,
+ 'crop': {'type': type_, 'score': score},
+ }
+ if 'tags' in new_meta and not self.keep_origin_tags:
+ del new_meta['tags']
+ if filebody is not None:
+ new_meta['filename'] = f'{filebody}_person{i}{ext}'
+ yield ImageItem(item.image.crop(area), new_meta)
+ def reset(self):
+ pass
+class ThreeStageSplitAction(BaseAction):
+ def __init__(self, person_conf: Optional[dict] = None, halfbody_conf: Optional[dict] = None,
+ head_conf: Optional[dict] = None, head_scale: float = 1.5,
+ split_eyes: bool = False, eye_conf: Optional[dict] = None, eye_scale: float = 2.4,
+ split_person: bool = True, keep_origin_tags: bool = False):
+ self.person_conf = dict(person_conf or {})
+ self.halfbody_conf = dict(halfbody_conf or {})
+ self.head_conf = dict(head_conf or {})
+ self.eye_conf = dict(eye_conf or {})
+ self.head_scale = head_scale
+ self.eye_scale = eye_scale
+ self.split_eyes = split_eyes
+ self.split_person = split_person
+ self.keep_origin_tags = keep_origin_tags
+ def _split_person(self, item: ImageItem, filebody, ext):
+ if self.split_person:
+ for i, (px, type_, score) in enumerate(detect_person(item.image, **self.person_conf), start=1):
+ person_image = item.image.crop(px)
+ person_meta = {
+ **item.meta,
+ 'crop': {'type': type_, 'score': score},
+ }
+ if 'tags' in person_meta and not self.keep_origin_tags:
+ del person_meta['tags']
+ if filebody is not None:
+ person_meta['filename'] = f'{filebody}_person{i}{ext}'
+ yield i, ImageItem(person_image, person_meta)
+ else:
+ yield 1, item
+ def iter(self, item: ImageItem) -> Iterator[ImageItem]:
+ if 'filename' in item.meta:
+ filename = item.meta['filename']
+ filebody, ext = os.path.splitext(filename)
+ else:
+ filebody, ext = None, None
+ for i, person_item in self._split_person(item, filebody, ext):
+ person_image = person_item.image
+ yield person_item
+ half_detects = detect_halfbody(person_image, **self.halfbody_conf)
+ if half_detects:
+ halfbody_area, halfbody_type, halfbody_score = half_detects[0]
+ halfbody_image = person_image.crop(halfbody_area)
+ halfbody_meta = {
+ **item.meta,
+ 'crop': {'type': halfbody_type, 'score': halfbody_score},
+ }
+ if 'tags' in halfbody_meta and not self.keep_origin_tags:
+ del halfbody_meta['tags']
+ if filebody is not None:
+ halfbody_meta['filename'] = f'{filebody}_person{i}_halfbody{ext}'
+ yield ImageItem(halfbody_image, halfbody_meta)
+ head_detects = detect_heads(person_image, **self.head_conf)
+ if head_detects:
+ (hx0, hy0, hx1, hy1), head_type, head_score = head_detects[0]
+ cx, cy = (hx0 + hx1) / 2, (hy0 + hy1) / 2
+ width, height = hx1 - hx0, hy1 - hy0
+ width = height = max(width, height) * self.head_scale
+ x0, y0 = int(max(cx - width / 2, 0)), int(max(cy - height / 2, 0))
+ x1, y1 = int(min(cx + width / 2, person_image.width)), int(min(cy + height / 2, person_image.height))
+ head_image = person_image.crop((x0, y0, x1, y1))
+ head_meta = {
+ **item.meta,
+ 'crop': {'type': head_type, 'score': head_score},
+ }
+ if 'tags' in head_meta and not self.keep_origin_tags:
+ del head_meta['tags']
+ if filebody is not None:
+ head_meta['filename'] = f'{filebody}_person{i}_head{ext}'
+ yield ImageItem(head_image, head_meta)
+ if self.split_eyes:
+ eye_detects = detect_eyes(head_image, **self.eye_conf)
+ for j, ((ex0, ey0, ex1, ey1), eye_type, eye_score) in enumerate(eye_detects):
+ cx, cy = (ex0 + ex1) / 2, (ey0 + ey1) / 2
+ width, height = ex1 - ex0, ey1 - ey0
+ width = height = max(width, height) * self.eye_scale
+ x0, y0 = int(max(cx - width / 2, 0)), int(max(cy - height / 2, 0))
+ x1, y1 = int(min(cx + width / 2, head_image.width)), \
+ int(min(cy + height / 2, head_image.height))
+ eye_image = head_image.crop((x0, y0, x1, y1))
+ eye_meta = {
+ **item.meta,
+ 'crop': {'type': eye_type, 'score': eye_score},
+ }
+ if 'tags' in eye_meta and not self.keep_origin_tags:
+ del eye_meta['tags']
+ if filebody is not None:
+ eye_meta['filename'] = f'{filebody}_person{i}_head_eye{j}{ext}'
+ yield ImageItem(eye_image, eye_meta)
+ def reset(self):
+ pass

waifuc/action/tagging.py ADDED Viewed

	@@ -0,0 +1,83 @@

+from functools import partial
+from typing import Iterator, Union, List, Mapping, Literal
+from PIL import Image
+from imgutils.tagging import get_deepdanbooru_tags, get_wd14_tags, get_mldanbooru_tags
+from .base import ProcessAction, BaseAction
+from ..model import ImageItem
+def _deepdanbooru_tagging(image: Image.Image, use_real_name: bool = False,
+ general_threshold: float = 0.5, character_threshold: float = 0.5, **kwargs):
+ _ = kwargs
+ _, features, characters = get_deepdanbooru_tags(image, use_real_name, general_threshold, character_threshold)
+ return {**features, **characters}
+def _wd14_tagging(image: Image.Image, model_name: str,
+ general_threshold: float = 0.35, character_threshold: float = 0.85, **kwargs):
+ _ = kwargs
+ _, features, characters = get_wd14_tags(image, model_name, general_threshold, character_threshold)
+ return {**features, **characters}
+def _mldanbooru_tagging(image: Image.Image, use_real_name: bool = False, general_threshold: float = 0.7, **kwargs):
+ _ = kwargs
+ features = get_mldanbooru_tags(image, use_real_name, general_threshold)
+ return features
+_TAGGING_METHODS = {
+ 'deepdanbooru': _deepdanbooru_tagging,
+ 'wd14_vit': partial(_wd14_tagging, model_name='ViT'),
+ 'wd14_convnext': partial(_wd14_tagging, model_name='ConvNext'),
+ 'wd14_convnextv2': partial(_wd14_tagging, model_name='ConvNextV2'),
+ 'wd14_swinv2': partial(_wd14_tagging, model_name='SwinV2'),
+ 'mldanbooru': _mldanbooru_tagging,
+}
+TaggingMethodTyping = Literal[
+ 'deepdanbooru', 'wd14_vit', 'wd14_convnext', 'wd14_convnextv2', 'wd14_swinv2', 'mldanbooru']
+class TaggingAction(ProcessAction):
+ def __init__(self, method: TaggingMethodTyping = 'wd14_convnextv2', force: bool = False, **kwargs):
+ self.method = _TAGGING_METHODS[method]
+ self.force = force
+ self.kwargs = kwargs
+ def process(self, item: ImageItem) -> ImageItem:
+ if 'tags' in item.meta and not self.force:
+ return item
+ else:
+ tags = self.method(image=item.image, **self.kwargs)
+ return ImageItem(item.image, {**item.meta, 'tags': tags})
+class TagFilterAction(BaseAction):
+ def __init__(self, tags: Union[List[str], Mapping[str, float]],
+ method: TaggingMethodTyping = 'wd14_convnextv2', **kwargs):
+ if isinstance(tags, (list, tuple)):
+ self.tags = {tag: 1e-6 for tag in tags}
+ elif isinstance(tags, dict):
+ self.tags = dict(tags)
+ else:
+ raise TypeError(f'Unknown type of tags - {tags!r}.')
+ self.tagger = TaggingAction(method, force=False, **kwargs)
+ def iter(self, item: ImageItem) -> Iterator[ImageItem]:
+ item = self.tagger(item)
+ tags = item.meta['tags']
+ valid = True
+ for tag, min_score in self.tags.items():
+ if tags[tag] < min_score:
+ valid = False
+ break
+ if valid:
+ yield item
+ def reset(self):
+ self.tagger.reset()

waifuc/config/__init__.py ADDED Viewed

File without changes

waifuc/config/__pycache__/__init__.cpython-310.pyc ADDED Viewed

Binary file (138 Bytes). View file

waifuc/config/__pycache__/meta.cpython-310.pyc ADDED Viewed

Binary file (395 Bytes). View file

waifuc/config/meta.py ADDED Viewed

	@@ -0,0 +1,19 @@

+"""
+Overview:
+ Meta information for waifuc package.
+"""
+#: Title of this project (should be `waifuc`).
+__TITLE__ = 'waifuc'
+#: Version of this project.
+__VERSION__ = '0.0.1'
+#: Short description of the project, will be included in ``setup.py``.
+__DESCRIPTION__ = 'Efficient Train Data Collector for Anime Waifu'
+#: Author of this project.
+__AUTHOR__ = 'narugo1992'
+#: Email of the authors'.
+__AUTHOR_EMAIL__ = '[email protected]'

waifuc/export/__init__.py ADDED Viewed

	@@ -0,0 +1,3 @@

+from .base import BaseExporter, SaveExporter, LocalDirectoryExporter
+from .huggingface import HuggingFaceExporter
+from .textual_inversion import TextualInversionExporter

waifuc/export/__pycache__/__init__.cpython-310.pyc ADDED Viewed

Binary file (356 Bytes). View file

waifuc/export/__pycache__/base.cpython-310.pyc ADDED Viewed

Binary file (3.32 kB). View file

waifuc/export/__pycache__/huggingface.cpython-310.pyc ADDED Viewed

Binary file (2.64 kB). View file

waifuc/export/__pycache__/textual_inversion.cpython-310.pyc ADDED Viewed

Binary file (1.81 kB). View file

waifuc/export/base.py ADDED Viewed

	@@ -0,0 +1,79 @@

+import os.path
+from typing import Iterator
+from hbutils.system import remove
+from tqdm.auto import tqdm
+from ..model import ImageItem
+from ..utils import get_task_names
+class BaseExporter:
+ def pre_export(self):
+ raise NotImplementedError # pragma: no cover
+ def export_item(self, item: ImageItem):
+ raise NotImplementedError # pragma: no cover
+ def post_export(self):
+ raise NotImplementedError # pragma: no cover
+ def export_from(self, items: Iterator[ImageItem]):
+ self.pre_export()
+ names = get_task_names()
+ if names:
+ desc = f'{self.__class__.__name__} - {".".join(names)}'
+ else:
+ desc = f'{self.__class__.__name__}'
+ for item in tqdm(items, desc=desc):
+ self.export_item(item)
+ self.post_export()
+ def reset(self):
+ raise NotImplementedError # pragma: no cover
+class LocalDirectoryExporter(BaseExporter):
+ def __init__(self, output_dir, clear: bool = False):
+ self.output_dir = output_dir
+ self.clear = clear
+ def pre_export(self):
+ if self.clear and os.path.exists(self.output_dir):
+ remove(self.output_dir)
+ os.makedirs(self.output_dir, exist_ok=True)
+ def export_item(self, item: ImageItem):
+ raise NotImplementedError # pragma: no cover
+ def post_export(self):
+ pass
+ def reset(self):
+ raise NotImplementedError # pragma: no cover
+class SaveExporter(LocalDirectoryExporter):
+ def __init__(self, output_dir, clear: bool = False, no_meta: bool = False,
+ skip_when_image_exist: bool = False):
+ LocalDirectoryExporter.__init__(self, output_dir, clear)
+ self.no_meta = no_meta
+ self.untitles = 0
+ self.skip_when_image_exist = skip_when_image_exist
+ def export_item(self, item: ImageItem):
+ if 'filename' in item.meta:
+ filename = item.meta['filename']
+ else:
+ self.untitles += 1
+ filename = f'untited_{self.untitles}.png'
+ full_filename = os.path.join(self.output_dir, filename)
+ full_directory = os.path.dirname(full_filename)
+ if full_directory:
+ os.makedirs(full_directory, exist_ok=True)
+ item.save(full_filename, no_meta=self.no_meta, skip_when_image_exist=self.skip_when_image_exist)
+ def reset(self):
+ self.untitles = 0

waifuc/export/huggingface.py ADDED Viewed

	@@ -0,0 +1,64 @@

+import os
+import zipfile
+from typing import Type, Optional, Mapping, Any
+from hbutils.system import TemporaryDirectory
+from huggingface_hub import HfApi
+from .base import LocalDirectoryExporter, BaseExporter
+from ..model import ImageItem
+class HuggingFaceExporter(BaseExporter):
+ def __init__(self, repository: str, file_in_repo: str,
+ cls: Type[LocalDirectoryExporter], args: tuple = (), kwargs: Optional[Mapping[str, Any]] = None,
+ repo_type: str = 'dataset', revision: str = 'main', hf_token: Optional[str] = None):
+ self.repository = repository
+ self.repo_type, self.revision = repo_type, revision
+ self.file_in_repo = file_in_repo
+ self.cls, self.args, self.kwargs = (cls, args, kwargs or {})
+ self._tempdir: Optional[TemporaryDirectory] = None
+ self._exporter: Optional[LocalDirectoryExporter] = None
+ self.hf_token = hf_token or os.environ.get('HF_TOKEN')
+ def pre_export(self):
+ self._tempdir = TemporaryDirectory()
+ self._exporter = self.cls(self._tempdir.name, *self.args, **self.kwargs)
+ self._exporter.pre_export()
+ def export_item(self, item: ImageItem):
+ self._exporter.export_item(item)
+ def post_export(self):
+ self._exporter.post_export()
+ # upload to huggingface
+ hf_api = HfApi(token=self.hf_token)
+ hf_api.create_repo(self.repository, repo_type=self.repo_type, exist_ok=True)
+ with TemporaryDirectory() as td:
+ zip_file = os.path.join(td, 'package.zip')
+ with zipfile.ZipFile(zip_file, mode='w') as zf:
+ for directory, _, files in os.walk(self._tempdir.name):
+ for file in files:
+ file_path = os.path.join(directory, file)
+ rel_file_path = os.path.relpath(file_path, self._tempdir.name)
+ zf.write(
+ file_path,
+ '/'.join(rel_file_path.split(os.sep))
+ )
+ hf_api.upload_file(
+ path_or_fileobj=zip_file,
+ repo_id=self.repository,
+ repo_type=self.repo_type,
+ path_in_repo=self.file_in_repo,
+ revision=self.revision,
+ commit_message=f'Upload {self.file_in_repo} with waifuc'
+ )
+ self._exporter = None
+ self._tempdir.cleanup()
+ self._tempdir = None
+ def reset(self):
+ pass

waifuc/export/textual_inversion.py ADDED Viewed

	@@ -0,0 +1,43 @@

+import os
+from imgutils.tagging import tags_to_text
+from .base import LocalDirectoryExporter
+from ..model import ImageItem
+class TextualInversionExporter(LocalDirectoryExporter):
+ def __init__(self, output_dir: str, clear: bool = False,
+ use_spaces: bool = False, use_escape: bool = True,
+ include_score: bool = False, score_descend: bool = True,
+ skip_when_image_exist: bool = False):
+ LocalDirectoryExporter.__init__(self, output_dir, clear)
+ self.use_spaces = use_spaces
+ self.use_escape = use_escape
+ self.include_score = include_score
+ self.score_descend = score_descend
+ self.untitles = 0
+ self.skip_when_image_exist = skip_when_image_exist
+ def export_item(self, item: ImageItem):
+ if 'filename' in item.meta:
+ filename = item.meta['filename']
+ else:
+ self.untitles += 1
+ filename = f'untited_{self.untitles}.png'
+ tags = item.meta.get('tags', None) or {}
+ full_filename = os.path.join(self.output_dir, filename)
+ full_tagname = os.path.join(self.output_dir, os.path.splitext(filename)[0] + '.txt')
+ full_directory = os.path.dirname(full_filename)
+ if full_directory:
+ os.makedirs(full_directory, exist_ok=True)
+ if not self.skip_when_image_exist or not os.path.exists(full_filename):
+ item.image.save(full_filename)
+ with open(full_tagname, 'w', encoding='utf-8') as f:
+ f.write(tags_to_text(tags, self.use_spaces, self.use_escape, self.include_score, self.score_descend))
+ def reset(self):
+ self.untitles = 0

waifuc/model/__init__.py ADDED Viewed

	@@ -0,0 +1 @@


1	+ from .item import load_meta, dump_meta, ImageItem

waifuc/model/__pycache__/__init__.cpython-310.pyc ADDED Viewed

Binary file (215 Bytes). View file

waifuc/model/__pycache__/item.cpython-310.pyc ADDED Viewed

Binary file (4.02 kB). View file

waifuc/model/item.py ADDED Viewed

	@@ -0,0 +1,98 @@

+import json
+import os.path
+import pickle
+from dataclasses import dataclass
+from typing import Optional
+from PIL import Image
+from hbutils.encoding import base64_decode, base64_encode
+from hbutils.reflection import quick_import_object
+NoneType = type(None)
+_TYPE_META = '__type'
+_BASE64_META = 'base64'
+def load_meta(data, path=()):
+ if isinstance(data, (int, float, str, NoneType)):
+ return data
+ elif isinstance(data, list):
+ return [load_meta(item, (*path, i)) for i, item in enumerate(data)]
+ elif isinstance(data, dict):
+ if _TYPE_META not in data:
+ return {key: load_meta(value, (*path, key)) for key, value in data.items()}
+ else:
+ cls, _, _ = quick_import_object(data[_TYPE_META])
+ binary = base64_decode(data[_BASE64_META])
+ obj = pickle.loads(binary)
+ if isinstance(obj, cls):
+ return obj
+ else:
+ raise TypeError(f'{cls!r} expected but {obj!r} found at {path!r}.')
+ else:
+ raise TypeError(f'Unknown type {data!r} at {path!r}.')
+def dump_meta(data, path=()):
+ if isinstance(data, (int, float, str, NoneType)):
+ return data
+ elif isinstance(data, list):
+ return [dump_meta(item, (*path, i)) for i, item in enumerate(data)]
+ elif isinstance(data, dict):
+ return {key: dump_meta(value, (*path, key)) for key, value in data.items()}
+ else:
+ cls = type(data)
+ type_str = f'{cls.__module__}.{cls.__name__}' if hasattr(cls, '__module__') else cls.__name__
+ base64_str = base64_encode(pickle.dumps(data))
+ return {
+ _TYPE_META: type_str,
+ _BASE64_META: base64_str
+ }
+@dataclass
+class ImageItem:
+ image: Image.Image
+ meta: dict
+ def __init__(self, image: Image.Image, meta: Optional[dict] = None):
+ self.image = image
+ self.meta = meta or {}
+ @classmethod
+ def _image_file_to_meta_file(cls, image_file):
+ directory, filename = os.path.split(image_file)
+ filebody, _ = os.path.splitext(filename)
+ meta_file = os.path.join(directory, f'.{filebody}_meta.json')
+ return meta_file
+ @classmethod
+ def load_from_image(cls, image_file):
+ image = Image.open(image_file)
+ meta_file = cls._image_file_to_meta_file(image_file)
+ if os.path.exists(meta_file):
+ with open(meta_file, 'r', encoding='utf-8') as f:
+ meta = load_meta(json.load(f))
+ else:
+ meta = {}
+ return cls(image, meta)
+ def save(self, image_file, no_meta: bool = False, skip_when_image_exist: bool = False):
+ if not skip_when_image_exist or not os.path.exists(image_file):
+ self.image.save(image_file)
+ if not no_meta and self.meta:
+ meta_file = self._image_file_to_meta_file(image_file)
+ with open(meta_file, 'w', encoding='utf-8') as f:
+ json.dump(dump_meta(self.meta), f)
+ def __repr__(self):
+ values = {'size': self.image.size}
+ for key, value in self.meta.items():
+ if isinstance(value, (int, float, str)):
+ values[key] = value
+ content = ', '.join(f'{key}: {values[key]!r}' for key in sorted(values.keys()))
+ return f'<{self.__class__.__name__} {content}>'

waifuc/source/__init__.py ADDED Viewed

	@@ -0,0 +1,19 @@

+from .anime_pictures import AnimePicturesSource
+from .base import BaseDataSource, EmptySource
+from .compose import ParallelDataSource, ComposedDataSource
+from .danbooru import DanbooruSource, SafebooruSource, ATFBooruSource, E621LikeSource, E621Source, E926Source
+from .derpibooru import DerpibooruLikeSource, DerpibooruSource, FurbooruSource
+from .duitang import DuitangSource
+from .gchar import GcharAutoSource
+from .huashi6 import Huashi6Source
+from .konachan import KonachanLikeSource, YandeSource, KonachanSource, KonachanNetSource, LolibooruSource, \
+ Rule34LikeSource, Rule34Source, HypnoHubSource, GelbooruSource, XbooruLikeSource, XbooruSource, \
+ SafebooruOrgSource, TBIBSource
+from .local import LocalSource, LocalTISource
+from .paheal import PahealSource
+from .pixiv import BasePixivSource, PixivSearchSource, PixivUserSource, PixivRankingSource
+from .sankaku import SankakuSource, PostOrder, Rating, FileType
+from .video import VideoSource
+from .wallhaven import WallHavenSource
+from .web import WebDataSource
+from .zerochan import ZerochanSource

waifuc/source/__pycache__/__init__.cpython-310.pyc ADDED Viewed

Binary file (1.61 kB). View file

waifuc/source/__pycache__/anime_pictures.cpython-310.pyc ADDED Viewed

Binary file (4.1 kB). View file

waifuc/source/__pycache__/base.cpython-310.pyc ADDED Viewed

Binary file (3.82 kB). View file

waifuc/source/__pycache__/compose.cpython-310.pyc ADDED Viewed

Binary file (2.02 kB). View file

waifuc/source/__pycache__/danbooru.cpython-310.pyc ADDED Viewed

Binary file (6.36 kB). View file