diff --git a/waifuc/__init__.py b/waifuc/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/waifuc/__pycache__/__init__.cpython-310.pyc b/waifuc/__pycache__/__init__.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..2052eafc697c8e9cf21abf1617f35b0009fa755d
Binary files /dev/null and b/waifuc/__pycache__/__init__.cpython-310.pyc differ
diff --git a/waifuc/action/__init__.py b/waifuc/action/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..1305e1870b7b640634e62c896c08d985e89e33d6
--- /dev/null
+++ b/waifuc/action/__init__.py
@@ -0,0 +1,13 @@
+from .align import AlignMaxSizeAction, AlignMinSizeAction, PaddingAlignAction
+from .augument import RandomFilenameAction, RandomChoiceAction, BaseRandomAction, MirrorAction
+from .background import BackgroundRemovalAction
+from .base import BaseAction, ProcessAction, FilterAction, ActionStop
+from .basic import ModeConvertAction
+from .ccip import CCIPAction
+from .count import SliceSelectAction, FirstNSelectAction
+from .filename import FileExtAction, FileOrderAction
+from .filter import NoMonochromeAction, OnlyMonochromeAction, ClassFilterAction, RatingFilterAction, FaceCountAction, \
+    HeadCountAction, PersonRatioAction, MinSizeFilterAction, MinAreaFilterAction
+from .lpips import FilterSimilarAction
+from .split import PersonSplitAction, ThreeStageSplitAction
+from .tagging import TaggingAction, TagFilterAction
diff --git a/waifuc/action/__pycache__/__init__.cpython-310.pyc b/waifuc/action/__pycache__/__init__.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..d580cc008cfc7e24e71beffc7f62b6b0c7ba9f48
Binary files /dev/null and b/waifuc/action/__pycache__/__init__.cpython-310.pyc differ
diff --git a/waifuc/action/__pycache__/align.cpython-310.pyc b/waifuc/action/__pycache__/align.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..af6399c3ecc68a9a7f54ac79d9259cfe05479740
Binary files /dev/null and b/waifuc/action/__pycache__/align.cpython-310.pyc differ
diff --git a/waifuc/action/__pycache__/augument.cpython-310.pyc b/waifuc/action/__pycache__/augument.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..8f5abe604e428b23135235d05f91c1d166ff4c3a
Binary files /dev/null and b/waifuc/action/__pycache__/augument.cpython-310.pyc differ
diff --git a/waifuc/action/__pycache__/background.cpython-310.pyc b/waifuc/action/__pycache__/background.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..f834cce44a5d072bf0b90cb953d44c1d70b6c8ed
Binary files /dev/null and b/waifuc/action/__pycache__/background.cpython-310.pyc differ
diff --git a/waifuc/action/__pycache__/base.cpython-310.pyc b/waifuc/action/__pycache__/base.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..69fa0ddf938ee3965c32a457b027596022e80668
Binary files /dev/null and b/waifuc/action/__pycache__/base.cpython-310.pyc differ
diff --git a/waifuc/action/__pycache__/basic.cpython-310.pyc b/waifuc/action/__pycache__/basic.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..cb6c9fbe45fde4a19f200c03ca8cb4117fb06dfe
Binary files /dev/null and b/waifuc/action/__pycache__/basic.cpython-310.pyc differ
diff --git a/waifuc/action/__pycache__/ccip.cpython-310.pyc b/waifuc/action/__pycache__/ccip.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..c1a723411349015e8277b330600bd6060dc6004c
Binary files /dev/null and b/waifuc/action/__pycache__/ccip.cpython-310.pyc differ
diff --git a/waifuc/action/__pycache__/count.cpython-310.pyc b/waifuc/action/__pycache__/count.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..ed4e1c39e73aed617dbaa4e88c9504e9ff78a9df
Binary files /dev/null and b/waifuc/action/__pycache__/count.cpython-310.pyc differ
diff --git a/waifuc/action/__pycache__/filename.cpython-310.pyc b/waifuc/action/__pycache__/filename.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..a38991068a05f7e1d1db540eda131ba2ff3b92f6
Binary files /dev/null and b/waifuc/action/__pycache__/filename.cpython-310.pyc differ
diff --git a/waifuc/action/__pycache__/filter.cpython-310.pyc b/waifuc/action/__pycache__/filter.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..8d3c19918688f74f06d285bfa2c5c692d08c969e
Binary files /dev/null and b/waifuc/action/__pycache__/filter.cpython-310.pyc differ
diff --git a/waifuc/action/__pycache__/lpips.cpython-310.pyc b/waifuc/action/__pycache__/lpips.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..ee773059c10440ace26e93424eb51122456b8d3e
Binary files /dev/null and b/waifuc/action/__pycache__/lpips.cpython-310.pyc differ
diff --git a/waifuc/action/__pycache__/split.cpython-310.pyc b/waifuc/action/__pycache__/split.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..e7f710b69169e936f8f3ceac5e001594a94e5956
Binary files /dev/null and b/waifuc/action/__pycache__/split.cpython-310.pyc differ
diff --git a/waifuc/action/__pycache__/tagging.cpython-310.pyc b/waifuc/action/__pycache__/tagging.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..3f1c137342875bc484beef8dee1ec558bcc02440
Binary files /dev/null and b/waifuc/action/__pycache__/tagging.cpython-310.pyc differ
diff --git a/waifuc/action/align.py b/waifuc/action/align.py
new file mode 100644
index 0000000000000000000000000000000000000000..9e9ad152f5602e411397bef0c942f0fe081cbca2
--- /dev/null
+++ b/waifuc/action/align.py
@@ -0,0 +1,51 @@
+from typing import Tuple
+
+from PIL import Image
+from imgutils.data import load_image
+
+from .base import ProcessAction
+from ..model import ImageItem
+
+
+class AlignMaxSizeAction(ProcessAction):
+    def __init__(self, max_size: int):
+        self._max_size = max_size
+
+    def process(self, item: ImageItem) -> ImageItem:
+        image = item.image
+        ms = max(image.width, image.height)
+        if ms > self._max_size:
+            r = ms / self._max_size
+            image = image.resize((int(image.width / r), int(image.height / r)))
+
+        return ImageItem(image, item.meta)
+
+
+class AlignMinSizeAction(ProcessAction):
+    def __init__(self, min_size: int):
+        self._min_size = min_size
+
+    def process(self, item: ImageItem) -> ImageItem:
+        image = item.image
+        ms = min(image.width, image.height)
+        if ms > self._min_size:
+            r = ms / self._min_size
+            image = image.resize((int(image.width / r), int(image.height / r)))
+
+        return ImageItem(image, item.meta)
+
+
+class PaddingAlignAction(ProcessAction):
+    def __init__(self, size: Tuple[int, int], color: str = 'white'):
+        self.width, self.height = size
+        self.color = color
+
+    def process(self, item: ImageItem) -> ImageItem:
+        image = load_image(item.image, force_background=None, mode='RGBA')
+        r = min(self.width / image.width, self.height / image.height)
+        resized = image.resize((int(image.width * r), int(image.height * r)))
+
+        new_image = Image.new('RGBA', (self.width, self.height), self.color)
+        left, top = int((new_image.width - resized.width) // 2), int((new_image.height - resized.height) // 2)
+        new_image.paste(resized, (left, top, left + resized.width, top + resized.height), resized)
+        return ImageItem(new_image.convert(item.image.mode), item.meta)
diff --git a/waifuc/action/augument.py b/waifuc/action/augument.py
new file mode 100644
index 0000000000000000000000000000000000000000..58ba9cb02e0157ac5a989b28ecea344fd9db92b2
--- /dev/null
+++ b/waifuc/action/augument.py
@@ -0,0 +1,68 @@
+import os.path
+import random
+from typing import Iterator, Optional, Tuple
+
+from PIL import ImageOps
+from hbutils.random import random_sha1
+
+from .base import BaseAction
+from ..model import ImageItem
+
+
+class BaseRandomAction(BaseAction):
+    def __init__(self, seed=None):
+        self.seed = seed
+        self.random = random.Random(self.seed)
+
+    def iter(self, item: ImageItem) -> Iterator[ImageItem]:
+        raise NotImplementedError  # pragma: no cover
+
+    def reset(self):
+        self.random = random.Random(self.seed)
+
+
+class RandomChoiceAction(BaseRandomAction):
+    def __init__(self, p=0.5, seed=None):
+        BaseRandomAction.__init__(self, seed)
+        self.p = p
+
+    def iter(self, item: ImageItem) -> Iterator[ImageItem]:
+        if self.random.random() <= self.p:
+            yield item
+
+
+class RandomFilenameAction(BaseRandomAction):
+    def __init__(self, ext: Optional[str] = '.png', seed=None):
+        BaseRandomAction.__init__(self, seed)
+        self.ext = ext
+
+    def iter(self, item: ImageItem) -> Iterator[ImageItem]:
+        if 'filename' in item.meta:
+            ext = self.ext or os.path.splitext(os.path.basename(item.meta['filename']))[0]
+        else:
+            if self.ext:
+                ext = self.ext
+            else:
+                raise NameError(f'Extension (ext) must be specified '
+                                f'when filename not in metadata of image item - {item!r}.')
+
+        filename = random_sha1(rnd=self.random) + ext
+        yield ImageItem(item.image, {**item.meta, 'filename': filename})
+
+
+class MirrorAction(BaseAction):
+    def __init__(self, names: Tuple[str, str] = ('origin', 'mirror')):
+        self.origin_name, self.mirror_name = names
+
+    def iter(self, item: ImageItem) -> Iterator[ImageItem]:
+        if 'filename' in item.meta:
+            filebody, ext = os.path.splitext(item.meta['filename'])
+            yield ImageItem(item.image, {**item.meta, 'filename': f'{filebody}_{self.origin_name}{ext}'})
+            yield ImageItem(ImageOps.mirror(item.image),
+                            {**item.meta, 'filename': f'{filebody}_{self.mirror_name}{ext}'})
+        else:
+            yield ImageItem(item.image, item.meta)
+            yield ImageItem(ImageOps.mirror(item.image), item.meta)
+
+    def reset(self):
+        pass
diff --git a/waifuc/action/background.py b/waifuc/action/background.py
new file mode 100644
index 0000000000000000000000000000000000000000..247cca95394807570509831e15ebdd919f34ca07
--- /dev/null
+++ b/waifuc/action/background.py
@@ -0,0 +1,10 @@
+from imgutils.segment import segment_rgba_with_isnetis
+
+from .base import ProcessAction
+from ..model import ImageItem
+
+
+class BackgroundRemovalAction(ProcessAction):
+    def process(self, item: ImageItem) -> ImageItem:
+        _, image = segment_rgba_with_isnetis(item.image)
+        return ImageItem(image, item.meta)
diff --git a/waifuc/action/base.py b/waifuc/action/base.py
new file mode 100644
index 0000000000000000000000000000000000000000..5f5696d678c4065d53b4616d6c2cbc3f14269b91
--- /dev/null
+++ b/waifuc/action/base.py
@@ -0,0 +1,51 @@
+from typing import Iterator, Iterable
+
+from ..model import ImageItem
+
+
+class ActionStop(Exception):
+    pass
+
+
+class BaseAction:
+    def iter(self, item: ImageItem) -> Iterator[ImageItem]:
+        raise NotImplementedError  # pragma: no cover
+
+    def iter_from(self, iter_: Iterable[ImageItem]) -> Iterator[ImageItem]:
+        for item in iter_:
+            try:
+                yield from self.iter(item)
+            except ActionStop:
+                break
+
+    def reset(self):
+        raise NotImplementedError  # pragma: no cover
+
+
+class ProcessAction(BaseAction):
+    def process(self, item: ImageItem) -> ImageItem:
+        raise NotImplementedError  # pragma: no cover
+
+    def iter(self, item: ImageItem) -> Iterator[ImageItem]:
+        yield self.process(item)
+
+    def reset(self):
+        pass
+
+    def __call__(self, item: ImageItem) -> ImageItem:
+        return self.process(item)
+
+
+class FilterAction(BaseAction):
+    def check(self, item: ImageItem) -> bool:
+        raise NotImplementedError  # pragma: no cover
+
+    def iter(self, item: ImageItem) -> Iterator[ImageItem]:
+        if self.check(item):
+            yield item
+
+    def reset(self):
+        pass
+
+    def __call__(self, item: ImageItem) -> bool:
+        return self.check(item)
diff --git a/waifuc/action/basic.py b/waifuc/action/basic.py
new file mode 100644
index 0000000000000000000000000000000000000000..4087eb86df03342cbc70e6d5d71aa4cb6f6a17a8
--- /dev/null
+++ b/waifuc/action/basic.py
@@ -0,0 +1,16 @@
+from typing import Optional
+
+from imgutils.data import load_image
+
+from .base import ProcessAction
+from ..model import ImageItem
+
+
+class ModeConvertAction(ProcessAction):
+    def __init__(self, mode='RGB', force_background: Optional[str] = 'white'):
+        self.mode = mode
+        self.force_background = force_background
+
+    def process(self, item: ImageItem) -> ImageItem:
+        image = load_image(item.image, mode=self.mode, force_background=self.force_background)
+        return ImageItem(image, item.meta)
diff --git a/waifuc/action/ccip.py b/waifuc/action/ccip.py
new file mode 100644
index 0000000000000000000000000000000000000000..2cec82502ded408ff006ec580cf297b612f72a7d
--- /dev/null
+++ b/waifuc/action/ccip.py
@@ -0,0 +1,151 @@
+import logging
+from enum import IntEnum
+from typing import Iterator, Optional, List, Tuple
+
+import numpy as np
+from hbutils.string import plural_word
+from hbutils.testing import disable_output
+from imgutils.metrics import ccip_extract_feature, ccip_default_threshold, ccip_clustering, ccip_batch_differences
+
+from .base import BaseAction
+from ..model import ImageItem
+
+
+class CCIPStatus(IntEnum):
+    INIT = 0x1
+    APPROACH = 0x2
+    EVAL = 0x3
+    INIT_WITH_SOURCE = 0x4
+
+
+class CCIPAction(BaseAction):
+    def __init__(self, init_source=None, *, min_val_count: int = 15, step: int = 5,
+                 ratio_threshold: float = 0.6, min_clu_dump_ratio: float = 0.3, cmp_threshold: float = 0.5,
+                 eps: Optional[float] = None, min_samples: Optional[int] = None,
+                 model='ccip-caformer-24-randaug-pruned', threshold: Optional[float] = None):
+        self.init_source = init_source
+
+        self.min_val_count = min_val_count
+        self.step = step
+        self.ratio_threshold = ratio_threshold
+        self.min_clu_dump_ratio = min_clu_dump_ratio
+        self.cmp_threshold = cmp_threshold
+        self.eps, self.min_samples = eps, min_samples
+        self.model = model
+        self.threshold = threshold or ccip_default_threshold(self.model)
+
+        self.items = []
+        self.item_released = []
+        self.feats = []
+        if self.init_source is not None:
+            self.status = CCIPStatus.INIT_WITH_SOURCE
+        else:
+            self.status = CCIPStatus.INIT
+
+    def _extract_feature(self, item: ImageItem):
+        if 'ccip_feature' in item.meta:
+            return item.meta['ccip_feature']
+        else:
+            return ccip_extract_feature(item.image, model=self.model)
+
+    def _try_cluster(self) -> bool:
+        with disable_output():
+            clu_ids = ccip_clustering(self.feats, method='optics', model=self.model,
+                                      eps=self.eps, min_samples=self.min_samples)
+        clu_counts = {}
+        for id_ in clu_ids:
+            if id_ != -1:
+                clu_counts[id_] = clu_counts.get(id_, 0) + 1
+
+        clu_total = sum(clu_counts.values()) if clu_counts else 0
+        chosen_id = None
+        for id_, count in clu_counts.items():
+            if count >= clu_total * self.ratio_threshold:
+                chosen_id = id_
+                break
+
+        if chosen_id is not None:
+            feats = [feat for i, feat in enumerate(self.feats) if clu_ids[i] == chosen_id]
+            clu_dump_ratio = np.array([
+                self._compare_to_exists(feat, base_set=feats)
+                for feat in feats
+            ]).astype(float).mean()
+
+            if clu_dump_ratio >= self.min_clu_dump_ratio:
+                self.items = [item for i, item in enumerate(self.items) if clu_ids[i] == chosen_id]
+                self.item_released = [False] * len(self.items)
+                self.feats = [feat for i, feat in enumerate(self.feats) if clu_ids[i] == chosen_id]
+                return True
+            else:
+                return False
+        else:
+            return False
+
+    def _compare_to_exists(self, feat, base_set=None) -> Tuple[bool, List[int]]:
+        diffs = ccip_batch_differences([feat, *(base_set or self.feats)], model=self.model)[0, 1:]
+        matches = diffs <= self.threshold
+        return matches.astype(float).mean() >= self.cmp_threshold
+
+    def _dump_items(self) -> Iterator[ImageItem]:
+        for i in range(len(self.items)):
+            if not self.item_released[i]:
+                if self._compare_to_exists(self.feats[i]):
+                    self.item_released[i] = True
+                    yield self.items[i]
+
+    def _eval_iter(self, item: ImageItem) -> Iterator[ImageItem]:
+        feat = self._extract_feature(item)
+        if self._compare_to_exists(feat):
+            self.feats.append(feat)
+            yield item
+
+            if (len(self.feats) - len(self.items)) % self.step == 0:
+                yield from self._dump_items()
+
+    def iter(self, item: ImageItem) -> Iterator[ImageItem]:
+        if self.status == CCIPStatus.INIT_WITH_SOURCE:
+            cnt = 0
+            logging.info('Existing anchor detected.')
+            for item_ in self.init_source:
+                self.feats.append(self._extract_feature(item_))
+                yield item_
+                cnt += 1
+            logging.info(f'{plural_word(cnt, "items")} loaded from anchor.')
+
+            self.status = CCIPStatus.EVAL
+            yield from self._eval_iter(item)
+
+        elif self.status == CCIPStatus.INIT:
+            self.items.append(item)
+            self.feats.append(self._extract_feature(item))
+
+            if len(self.items) >= self.min_val_count:
+                if self._try_cluster():
+                    self.status = CCIPStatus.EVAL
+                    yield from self._dump_items()
+                else:
+                    self.status = CCIPStatus.APPROACH
+
+        elif self.status == CCIPStatus.APPROACH:
+            self.items.append(item)
+            self.feats.append(self._extract_feature(item))
+
+            if (len(self.items) - self.min_val_count) % self.step == 0:
+                if self._try_cluster():
+                    self.status = CCIPStatus.EVAL
+                    yield from self._dump_items()
+
+        elif self.status == CCIPStatus.EVAL:
+            yield from self._eval_iter(item)
+
+        else:
+            raise ValueError(f'Unknown status for {self.__class__.__name__} - {self.status!r}.')
+
+    def reset(self):
+        self.items.clear()
+        self.item_released.clear()
+        self.feats.clear()
+        if self.init_source:
+            self.status = CCIPStatus.INIT_WITH_SOURCE
+        else:
+            self.status = CCIPStatus.INIT
diff --git a/waifuc/action/count.py b/waifuc/action/count.py
new file mode 100644
index 0000000000000000000000000000000000000000..b04443de064e06fc90f61aab998ef42002f76759
--- /dev/null
+++ b/waifuc/action/count.py
@@ -0,0 +1,72 @@
+from typing import Iterator
+
+from .base import BaseAction, ActionStop
+from ..model import ImageItem
+
+
+class FirstNSelectAction(BaseAction):
+    def __init__(self, n: int):
+        self._n = n
+        self._passed = 0
+
+    def iter(self, item: ImageItem) -> Iterator[ImageItem]:
+        if self._passed < self._n:
+            yield item
+            self._passed += 1
+        else:
+            raise ActionStop
+
+    def reset(self):
+        self._passed = 0
+
+
+def _slice_process(start, stop, step):
+    start = 0 if start is None else start
+    step = 1 if step is None else step
+    if not isinstance(start, int) or start < 0:
+        raise ValueError(f'Start should be an integer no less than 0, but {start!r} found.')
+    if stop is not None and (not isinstance(stop, int) or stop < 0):
+        raise ValueError(f'Stop should be an integer no less than 0, but {stop!r} found.')
+    if not isinstance(step, int) or step < 1:
+        raise ValueError(f'Step should be an integer no less than 1, but {step!r} found.')
+
+    return start, stop, step
+
+
+class SliceSelectAction(BaseAction):
+    def __init__(self, *args):
+        if len(args) == 0:
+            slice_args = _slice_process(None, None, None)
+        elif len(args) == 1:
+            slice_args = _slice_process(None, args[0], None)
+        elif len(args) == 2:
+            slice_args = _slice_process(args[0], args[1], None)
+        elif len(args) == 3:
+            slice_args = _slice_process(args[0], args[1], args[2])
+        else:
+            raise ValueError(f'Arguments of {self.__class__.__name__} should no no more than 3, but {args!r} found.')
+
+        self._start, self._stop, self._step = slice_args
+        if self._stop is not None:
+            self._max = self._start + ((self._stop - self._start - 1) // self._step) * self._step
+        else:
+            self._max = None
+        self._current = 0
+
+    def _check_current(self):
+        if self._stop is not None and self._current >= self._stop:
+            return False
+        if self._current < self._start:
+            return False
+        return (self._current - self._start) % self._step == 0
+
+    def iter(self, item: ImageItem) -> Iterator[ImageItem]:
+        if self._current > self._max:
+            raise ActionStop
+        else:
+            if self._check_current():
+                yield item
+            self._current += 1
+
+    def reset(self):
+        self._current = 0
diff --git a/waifuc/action/filename.py b/waifuc/action/filename.py
new file mode 100644
index 0000000000000000000000000000000000000000..2d35eed4f61bd541772c6a673033a7148c7039c0
--- /dev/null
+++ b/waifuc/action/filename.py
@@ -0,0 +1,46 @@
+import os
+from typing import Iterator, Optional
+
+from .base import BaseAction
+from ..model import ImageItem
+
+
+class FileExtAction(BaseAction):
+    def __init__(self, ext: str):
+        self.ext = ext
+        self.untitles = 0
+
+    def iter(self, item: ImageItem) -> Iterator[ImageItem]:
+        if 'filename' in item.meta:
+            filebody, _ = os.path.splitext(item.meta['filename'])
+            filename = f'{filebody}{self.ext}'
+        else:
+            self.untitles += 1
+            filename = f'untitled_{self.untitles}{self.ext}'
+
+        yield ImageItem(item.image, {**item.meta, 'filename': filename})
+
+    def reset(self):
+        self.untitles = 0
+
+
+class FileOrderAction(BaseAction):
+    def __init__(self, ext: Optional[str] = '.png'):
+        self.ext = ext
+        self._current = 0
+
+    def iter(self, item: ImageItem) -> Iterator[ImageItem]:
+        self._current += 1
+        if 'filename' in item.meta:
+            _, ext = os.path.splitext(item.meta['filename'])
+            new_filename = f'{self._current}{self.ext or ext}'
+        else:
+            if not self.ext:
+                raise ValueError('No extension name provided for unnamed file.')
+            else:
+                new_filename = f'{self._current}{self.ext}'
+
+        yield ImageItem(item.image, {**item.meta, 'filename': new_filename})
+
+    def reset(self):
+        self._current = 0
diff --git a/waifuc/action/filter.py b/waifuc/action/filter.py
new file mode 100644
index 0000000000000000000000000000000000000000..7f43f3dac81a3405d57eddd970b2ca16642c4824
--- /dev/null
+++ b/waifuc/action/filter.py
@@ -0,0 +1,110 @@
+from typing import List, Optional, Literal
+
+from imgutils.detect import detect_faces, detect_heads, detect_person
+from imgutils.validate import is_monochrome, anime_classify, anime_rating
+
+from .base import FilterAction
+from ..model import ImageItem
+
+
+class NoMonochromeAction(FilterAction):
+    def check(self, item: ImageItem) -> bool:
+        return not is_monochrome(item.image)
+
+
+class OnlyMonochromeAction(FilterAction):
+    def check(self, item: ImageItem) -> bool:
+        return is_monochrome(item.image)
+
+
+ImageClassTyping = Literal['illustration', 'bangumi', 'comic', '3d']
+
+
+class ClassFilterAction(FilterAction):
+    def __init__(self, classes: List[ImageClassTyping], threshold: Optional[float] = None, **kwargs):
+        self.classes = classes
+        self.threshold = threshold
+        self.kwargs = kwargs
+
+    def check(self, item: ImageItem) -> bool:
+        cls, score = anime_classify(item.image, **self.kwargs)
+        return cls in self.classes and (self.threshold is None or score >= self.threshold)
+
+
+ImageRatingTyping = Literal['safe', 'r15', 'r18']
+
+
+class RatingFilterAction(FilterAction):
+    def __init__(self, ratings: List[ImageRatingTyping], threshold: Optional[float] = None, **kwargs):
+        self.ratings = ratings
+        self.threshold = threshold
+        self.kwargs = kwargs
+
+    def check(self, item: ImageItem) -> bool:
+        rating, score = anime_rating(item.image, **self.kwargs)
+        return rating in self.ratings and (self.threshold is None or score >= self.threshold)
+
+
+class FaceCountAction(FilterAction):
+    def __init__(self, count: int, level: str = 's', version: str = 'v1.4',
+                 conf_threshold: float = 0.25, iou_threshold: float = 0.7):
+        self.count = count
+        self.level = level
+        self.version = version
+        self.conf_threshold = conf_threshold
+        self.iou_threshold = iou_threshold
+
+    def check(self, item: ImageItem) -> bool:
+        detection = detect_faces(item.image, self.level, self.version,
+                                 conf_threshold=self.conf_threshold, iou_threshold=self.iou_threshold)
+        return len(detection) == self.count
+
+
+class HeadCountAction(FilterAction):
+    def __init__(self, count: int, level: str = 's', conf_threshold: float = 0.3, iou_threshold: float = 0.7):
+        self.count = count
+        self.level = level
+        self.conf_threshold = conf_threshold
+        self.iou_threshold = iou_threshold
+
+    def check(self, item: ImageItem) -> bool:
+        detection = detect_heads(
+            item.image, self.level,
+            conf_threshold=self.conf_threshold,
+            iou_threshold=self.iou_threshold
+        )
+        return len(detection) == self.count
+
+
+class PersonRatioAction(FilterAction):
+    def __init__(self, ratio: float = 0.4, level: str = 'm', version: str = 'v1.1',
+                 conf_threshold: float = 0.3, iou_threshold: float = 0.5):
+        self.ratio = ratio
+        self.level = level
+        self.version = version
+        self.conf_threshold = conf_threshold
+        self.iou_threshold = iou_threshold
+
+    def check(self, item: ImageItem) -> bool:
+        detections = detect_person(item.image, self.level, self.version, 640, self.conf_threshold, self.iou_threshold)
+        if len(detections) != 1:
+            return False
+
+        (x0, y0, x1, y1), _, _ = detections[0]
+        return abs((x1 - x0) * (y1 - y0)) >= self.ratio * (item.image.width * item.image.height)
+
+
+class MinSizeFilterAction(FilterAction):
+    def __init__(self, min_size: int):
+        self.min_size = min_size
+
+    def check(self, item: ImageItem) -> bool:
+        return min(item.image.width, item.image.height) >= self.min_size
+
+
+class MinAreaFilterAction(FilterAction):
+    def __init__(self, min_size: int):
+        self.min_size = min_size
+
+    def check(self, item: ImageItem) -> bool:
+        return (item.image.width * item.image.height) ** 0.5 >= self.min_size
diff --git a/waifuc/action/lpips.py b/waifuc/action/lpips.py
new file mode 100644
index 0000000000000000000000000000000000000000..c1b5e7196381ce0f1fd7e441f9d09a0182a27c16
--- /dev/null
+++ b/waifuc/action/lpips.py
@@ -0,0 +1,69 @@
+from typing import Dict, Iterator, Literal
+
+import numpy as np
+from imgutils.metrics import lpips_difference, lpips_extract_feature
+
+from .base import BaseAction
+from ..model import ImageItem
+
+
+class FeatureBucket:
+    def __init__(self, threshold: float = 0.45, capacity: int = 500, rtol=1.e-5, atol=1.e-8):
+        self.threshold = threshold
+        self.rtol, self.atol = rtol, atol
+        self.features = []
+        self.ratios = np.array([], dtype=float)
+        self.capacity = capacity
+
+    def check_duplicate(self, feat, ratio: float):
+        for id_ in np.where(np.isclose(self.ratios, ratio, rtol=self.rtol, atol=self.atol))[0]:
+            exist_feat = self.features[id_.item()]
+            if lpips_difference(exist_feat, feat) <= self.threshold:
+                return True
+
+        return False
+
+    def add(self, feat, ratio: float):
+        self.features.append(feat)
+        self.ratios = np.append(self.ratios, ratio)
+        if len(self.features) >= self.capacity * 2:
+            self.features = self.features[-self.capacity:]
+            self.ratios = self.ratios[-self.capacity:]
+
+
+FilterSimilarModeTyping = Literal['all', 'group']
+
+
+class FilterSimilarAction(BaseAction):
+    def __init__(self, mode: FilterSimilarModeTyping = 'all', threshold: float = 0.45,
+                 capacity: int = 500, rtol=5.e-2, atol=2.e-2):
+        self.mode = mode
+        self.threshold, self.rtol, self.atol = threshold, rtol, atol
+        self.capacity = capacity
+        self.buckets: Dict[str, FeatureBucket] = {}
+        self.global_bucket = FeatureBucket(threshold, self.capacity, rtol, atol)
+
+    def _get_bin(self, group_id):
+        if self.mode == 'all':
+            return self.global_bucket
+        elif self.mode == 'group':
+            if group_id not in self.buckets:
+                self.buckets[group_id] = FeatureBucket(self.threshold, self.capacity, self.rtol, self.atol)
+
+            return self.buckets[group_id]
+        else:
+            raise ValueError(f'Unknown mode for filter similar action - {self.mode!r}.')
+
+    def iter(self, item: ImageItem) -> Iterator[ImageItem]:
+        image = item.image
+        ratio = image.height * 1.0 / image.width
+        feat = lpips_extract_feature(image)
+        bucket = self._get_bin(item.meta.get('group_id'))
+
+        if not bucket.check_duplicate(feat, ratio):
+            bucket.add(feat, ratio)
+            yield item
+
+    def reset(self):
+        self.buckets.clear()
+        self.global_bucket = FeatureBucket(self.threshold, self.capacity, self.rtol, self.atol)
diff --git a/waifuc/action/split.py b/waifuc/action/split.py
new file mode 100644
index 0000000000000000000000000000000000000000..2aa9b4c6191bba6488051ac27f89b7a07aac39a1
--- /dev/null
+++ b/waifuc/action/split.py
@@ -0,0 +1,145 @@
+import os
+from typing import Iterator, Optional
+
+from imgutils.detect import detect_person, detect_heads, detect_halfbody, detect_eyes
+
+from .base import BaseAction
+from ..model import ImageItem
+
+
+class PersonSplitAction(BaseAction):
+    def __init__(self, keep_original: bool = False, level: str = 'm', version: str = 'v1.1',
+                 conf_threshold: float = 0.3, iou_threshold: float = 0.5, keep_origin_tags: bool = False):
+        self.keep_original = keep_original
+        self.level = level
+        self.version = version
+        self.conf_threshold = conf_threshold
+        self.iou_threshold = iou_threshold
+        self.keep_origin_tags = keep_origin_tags
+
+    def iter(self, item: ImageItem) -> Iterator[ImageItem]:
+        detection = detect_person(item.image, self.level, self.version,
+                                  conf_threshold=self.conf_threshold, iou_threshold=self.iou_threshold)
+
+        if 'filename' in item.meta:
+            filename = item.meta['filename']
+            filebody, ext = os.path.splitext(filename)
+        else:
+            filebody, ext = None, None
+
+        if self.keep_original:
+            yield item
+
+        for i, (area, type_, score) in enumerate(detection):
+            new_meta = {
+                **item.meta,
+                'crop': {'type': type_, 'score': score},
+            }
+            if 'tags' in new_meta and not self.keep_origin_tags:
+                del new_meta['tags']
+            if filebody is not None:
+                new_meta['filename'] = f'{filebody}_person{i}{ext}'
+            yield ImageItem(item.image.crop(area), new_meta)
+
+    def reset(self):
+        pass
+
+
+class ThreeStageSplitAction(BaseAction):
+    def __init__(self, person_conf: Optional[dict] = None, halfbody_conf: Optional[dict] = None,
+                 head_conf: Optional[dict] = None, head_scale: float = 1.5,
+                 split_eyes: bool = False, eye_conf: Optional[dict] = None, eye_scale: float = 2.4,
+                 split_person: bool = True, keep_origin_tags: bool = False):
+        self.person_conf = dict(person_conf or {})
+        self.halfbody_conf = dict(halfbody_conf or {})
+        self.head_conf = dict(head_conf or {})
+        self.eye_conf = dict(eye_conf or {})
+        self.head_scale = head_scale
+        self.eye_scale = eye_scale
+        self.split_eyes = split_eyes
+        self.split_person = split_person
+        self.keep_origin_tags = keep_origin_tags
+
+    def _split_person(self, item: ImageItem, filebody, ext):
+        if self.split_person:
+            for i, (px, type_, score) in enumerate(detect_person(item.image, **self.person_conf), start=1):
+                person_image = item.image.crop(px)
+                person_meta = {
+                    **item.meta,
+                    'crop': {'type': type_, 'score': score},
+                }
+                if 'tags' in person_meta and not self.keep_origin_tags:
+                    del person_meta['tags']
+                if filebody is not None:
+                    person_meta['filename'] = f'{filebody}_person{i}{ext}'
+                yield i, ImageItem(person_image, person_meta)
+
+        else:
+            yield 1, item
+
+    def iter(self, item: ImageItem) -> Iterator[ImageItem]:
+        if 'filename' in item.meta:
+            filename = item.meta['filename']
+            filebody, ext = os.path.splitext(filename)
+        else:
+            filebody, ext = None, None
+
+        for i, person_item in self._split_person(item, filebody, ext):
+            person_image = person_item.image
+            yield person_item
+
+            half_detects = detect_halfbody(person_image, **self.halfbody_conf)
+            if half_detects:
+                halfbody_area, halfbody_type, halfbody_score = half_detects[0]
+                halfbody_image = person_image.crop(halfbody_area)
+                halfbody_meta = {
+                    **item.meta,
+                    'crop': {'type': halfbody_type, 'score': halfbody_score},
+                }
+                if 'tags' in halfbody_meta and not self.keep_origin_tags:
+                    del halfbody_meta['tags']
+                if filebody is not None:
+                    halfbody_meta['filename'] = f'{filebody}_person{i}_halfbody{ext}'
+                yield ImageItem(halfbody_image, halfbody_meta)
+
+            head_detects = detect_heads(person_image, **self.head_conf)
+            if head_detects:
+                (hx0, hy0, hx1, hy1), head_type, head_score = head_detects[0]
+                cx, cy = (hx0 + hx1) / 2, (hy0 + hy1) / 2
+                width, height = hx1 - hx0, hy1 - hy0
+                width = height = max(width, height) * self.head_scale
+                x0, y0 = int(max(cx - width / 2, 0)), int(max(cy - height / 2, 0))
+                x1, y1 = int(min(cx + width / 2, person_image.width)), int(min(cy + height / 2, person_image.height))
+                head_image = person_image.crop((x0, y0, x1, y1))
+                head_meta = {
+                    **item.meta,
+                    'crop': {'type': head_type, 'score': head_score},
+                }
+                if 'tags' in head_meta and not self.keep_origin_tags:
+                    del head_meta['tags']
+                if filebody is not None:
+                    head_meta['filename'] = f'{filebody}_person{i}_head{ext}'
+                yield ImageItem(head_image, head_meta)
+
+                if self.split_eyes:
+                    eye_detects = detect_eyes(head_image, **self.eye_conf)
+                    for j, ((ex0, ey0, ex1, ey1), eye_type, eye_score) in enumerate(eye_detects):
+                        cx, cy = (ex0 + ex1) / 2, (ey0 + ey1) / 2
+                        width, height = ex1 - ex0, ey1 - ey0
+                        width = height = max(width, height) * self.eye_scale
+                        x0, y0 = int(max(cx - width / 2, 0)), int(max(cy - height / 2, 0))
+                        x1, y1 = int(min(cx + width / 2, head_image.width)), \
+                            int(min(cy + height / 2, head_image.height))
+                        eye_image = head_image.crop((x0, y0, x1, y1))
+                        eye_meta = {
+                            **item.meta,
+                            'crop': {'type': eye_type, 'score': eye_score},
+                        }
+                        if 'tags' in eye_meta and not self.keep_origin_tags:
+                            del eye_meta['tags']
+                        if filebody is not None:
+                            eye_meta['filename'] = f'{filebody}_person{i}_head_eye{j}{ext}'
+                        yield ImageItem(eye_image, eye_meta)
+
+    def reset(self):
+        pass
diff --git a/waifuc/action/tagging.py b/waifuc/action/tagging.py
new file mode 100644
index 0000000000000000000000000000000000000000..2c5f4986074a0b7a6b07f456b6091d297b65a0bd
--- /dev/null
+++ b/waifuc/action/tagging.py
@@ -0,0 +1,83 @@
+from functools import partial
+from typing import Iterator, Union, List, Mapping, Literal
+
+from PIL import Image
+from imgutils.tagging import get_deepdanbooru_tags, get_wd14_tags, get_mldanbooru_tags
+
+from .base import ProcessAction, BaseAction
+from ..model import ImageItem
+
+
+def _deepdanbooru_tagging(image: Image.Image, use_real_name: bool = False,
+                          general_threshold: float = 0.5, character_threshold: float = 0.5, **kwargs):
+    _ = kwargs
+    _, features, characters = get_deepdanbooru_tags(image, use_real_name, general_threshold, character_threshold)
+    return {**features, **characters}
+
+
+def _wd14_tagging(image: Image.Image, model_name: str,
+                  general_threshold: float = 0.35, character_threshold: float = 0.85, **kwargs):
+    _ = kwargs
+    _, features, characters = get_wd14_tags(image, model_name, general_threshold, character_threshold)
+    return {**features, **characters}
+
+
+def _mldanbooru_tagging(image: Image.Image, use_real_name: bool = False, general_threshold: float = 0.7, **kwargs):
+    _ = kwargs
+    features = get_mldanbooru_tags(image, use_real_name, general_threshold)
+    return features
+
+
+_TAGGING_METHODS = {
+    'deepdanbooru': _deepdanbooru_tagging,
+    'wd14_vit': partial(_wd14_tagging, model_name='ViT'),
+    'wd14_convnext': partial(_wd14_tagging, model_name='ConvNext'),
+    'wd14_convnextv2': partial(_wd14_tagging, model_name='ConvNextV2'),
+    'wd14_swinv2': partial(_wd14_tagging, model_name='SwinV2'),
+    'mldanbooru': _mldanbooru_tagging,
+}
+
+TaggingMethodTyping = Literal[
+    'deepdanbooru', 'wd14_vit', 'wd14_convnext', 'wd14_convnextv2', 'wd14_swinv2', 'mldanbooru']
+
+
+class TaggingAction(ProcessAction):
+    def __init__(self, method: TaggingMethodTyping = 'wd14_convnextv2', force: bool = False, **kwargs):
+        self.method = _TAGGING_METHODS[method]
+        self.force = force
+        self.kwargs = kwargs
+
+    def process(self, item: ImageItem) -> ImageItem:
+        if 'tags' in item.meta and not self.force:
+            return item
+        else:
+            tags = self.method(image=item.image, **self.kwargs)
+            return ImageItem(item.image, {**item.meta, 'tags': tags})
+
+
+class TagFilterAction(BaseAction):
+    def __init__(self, tags: Union[List[str], Mapping[str, float]],
+                 method: TaggingMethodTyping = 'wd14_convnextv2', **kwargs):
+        if isinstance(tags, (list, tuple)):
+            self.tags = {tag: 1e-6 for tag in tags}
+        elif isinstance(tags, dict):
+            self.tags = dict(tags)
+        else:
+            raise TypeError(f'Unknown type of tags - {tags!r}.')
+        self.tagger = TaggingAction(method, force=False, **kwargs)
+
+    def iter(self, item: ImageItem) -> Iterator[ImageItem]:
+        item = self.tagger(item)
+        tags = item.meta['tags']
+
+        valid = True
+        for tag, min_score in self.tags.items():
+            if tags[tag] < min_score:
+                valid = False
+                break
+
+        if valid:
+            yield item
+
+    def reset(self):
+        self.tagger.reset()
diff --git a/waifuc/config/__init__.py b/waifuc/config/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/waifuc/config/__pycache__/__init__.cpython-310.pyc b/waifuc/config/__pycache__/__init__.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..64e9186a709311f1e7d5400fa72996f9f0072114
Binary files /dev/null and b/waifuc/config/__pycache__/__init__.cpython-310.pyc differ
diff --git a/waifuc/config/__pycache__/meta.cpython-310.pyc b/waifuc/config/__pycache__/meta.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..d98d2f6096dbfecf53ae9725f908a08d810f4ff7
Binary files /dev/null and b/waifuc/config/__pycache__/meta.cpython-310.pyc differ
diff --git a/waifuc/config/meta.py b/waifuc/config/meta.py
new file mode 100644
index 0000000000000000000000000000000000000000..6155456c454bbc47c770bc5c05cdaa49fdc56938
--- /dev/null
+++ b/waifuc/config/meta.py
@@ -0,0 +1,19 @@
+"""
+Overview:
+    Meta information for waifuc package.
+"""
+
+#: Title of this project (should be `waifuc`).
+__TITLE__ = 'waifuc'
+
+#: Version of this project.
+__VERSION__ = '0.0.1'
+
+#: Short description of the project, will be included in ``setup.py``.
+__DESCRIPTION__ = 'Efficient Train Data Collector for Anime Waifu'
+
+#: Author of this project.
+__AUTHOR__ = 'narugo1992'
+
+#: Email of the authors'.
+__AUTHOR_EMAIL__ = 'narugo992@gmail.com'
diff --git a/waifuc/export/__init__.py b/waifuc/export/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..0a4f9cae0b1df0077bf1bdae6a1a832600637f8b
--- /dev/null
+++ b/waifuc/export/__init__.py
@@ -0,0 +1,3 @@
+from .base import BaseExporter, SaveExporter, LocalDirectoryExporter
+from .huggingface import HuggingFaceExporter
+from .textual_inversion import TextualInversionExporter
diff --git a/waifuc/export/__pycache__/__init__.cpython-310.pyc b/waifuc/export/__pycache__/__init__.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..f4d1c97eabb0806ccd59b26bb1fba8df62beabff
Binary files /dev/null and b/waifuc/export/__pycache__/__init__.cpython-310.pyc differ
diff --git a/waifuc/export/__pycache__/base.cpython-310.pyc b/waifuc/export/__pycache__/base.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..f239c5c38aa59caf30a131f0481f85199d95319e
Binary files /dev/null and b/waifuc/export/__pycache__/base.cpython-310.pyc differ
diff --git a/waifuc/export/__pycache__/huggingface.cpython-310.pyc b/waifuc/export/__pycache__/huggingface.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..6e9b4f14d242de1fe9f20a689d6b574bd207987c
Binary files /dev/null and b/waifuc/export/__pycache__/huggingface.cpython-310.pyc differ
diff --git a/waifuc/export/__pycache__/textual_inversion.cpython-310.pyc b/waifuc/export/__pycache__/textual_inversion.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..f58b6500767ecbcedc8411e8ac5a7d5d0e148aba
Binary files /dev/null and b/waifuc/export/__pycache__/textual_inversion.cpython-310.pyc differ
diff --git a/waifuc/export/base.py b/waifuc/export/base.py
new file mode 100644
index 0000000000000000000000000000000000000000..d95778e42ba99659a56ca669dced3abdafacd05b
--- /dev/null
+++ b/waifuc/export/base.py
@@ -0,0 +1,79 @@
+import os.path
+from typing import Iterator
+
+from hbutils.system import remove
+from tqdm.auto import tqdm
+
+from ..model import ImageItem
+from ..utils import get_task_names
+
+
+class BaseExporter:
+    def pre_export(self):
+        raise NotImplementedError  # pragma: no cover
+
+    def export_item(self, item: ImageItem):
+        raise NotImplementedError  # pragma: no cover
+
+    def post_export(self):
+        raise NotImplementedError  # pragma: no cover
+
+    def export_from(self, items: Iterator[ImageItem]):
+        self.pre_export()
+        names = get_task_names()
+        if names:
+            desc = f'{self.__class__.__name__} - {".".join(names)}'
+        else:
+            desc = f'{self.__class__.__name__}'
+        for item in tqdm(items, desc=desc):
+            self.export_item(item)
+        self.post_export()
+
+    def reset(self):
+        raise NotImplementedError  # pragma: no cover
+
+
+class LocalDirectoryExporter(BaseExporter):
+    def __init__(self, output_dir, clear: bool = False):
+        self.output_dir = output_dir
+        self.clear = clear
+
+    def pre_export(self):
+        if self.clear and os.path.exists(self.output_dir):
+            remove(self.output_dir)
+
+        os.makedirs(self.output_dir, exist_ok=True)
+
+    def export_item(self, item: ImageItem):
+        raise NotImplementedError  # pragma: no cover
+
+    def post_export(self):
+        pass
+
+    def reset(self):
+        raise NotImplementedError  # pragma: no cover
+
+
+class SaveExporter(LocalDirectoryExporter):
+    def __init__(self, output_dir, clear: bool = False, no_meta: bool = False,
+                 skip_when_image_exist: bool = False):
+        LocalDirectoryExporter.__init__(self, output_dir, clear)
+        self.no_meta = no_meta
+        self.untitles = 0
+        self.skip_when_image_exist = skip_when_image_exist
+
+    def export_item(self, item: ImageItem):
+        if 'filename' in item.meta:
+            filename = item.meta['filename']
+        else:
+            self.untitles += 1
+            filename = f'untited_{self.untitles}.png'
+
+        full_filename = os.path.join(self.output_dir, filename)
+        full_directory = os.path.dirname(full_filename)
+        if full_directory:
+            os.makedirs(full_directory, exist_ok=True)
+        item.save(full_filename, no_meta=self.no_meta, skip_when_image_exist=self.skip_when_image_exist)
+
+    def reset(self):
+        self.untitles = 0
diff --git a/waifuc/export/huggingface.py b/waifuc/export/huggingface.py
new file mode 100644
index 0000000000000000000000000000000000000000..2d1232a1bba2340a893a517468e2357e45b70174
--- /dev/null
+++ b/waifuc/export/huggingface.py
@@ -0,0 +1,64 @@
+import os
+import zipfile
+from typing import Type, Optional, Mapping, Any
+
+from hbutils.system import TemporaryDirectory
+from huggingface_hub import HfApi
+
+from .base import LocalDirectoryExporter, BaseExporter
+from ..model import ImageItem
+
+
+class HuggingFaceExporter(BaseExporter):
+    def __init__(self, repository: str, file_in_repo: str,
+                 cls: Type[LocalDirectoryExporter], args: tuple = (), kwargs: Optional[Mapping[str, Any]] = None,
+                 repo_type: str = 'dataset', revision: str = 'main', hf_token: Optional[str] = None):
+        self.repository = repository
+        self.repo_type, self.revision = repo_type, revision
+        self.file_in_repo = file_in_repo
+        self.cls, self.args, self.kwargs = (cls, args, kwargs or {})
+        self._tempdir: Optional[TemporaryDirectory] = None
+        self._exporter: Optional[LocalDirectoryExporter] = None
+        self.hf_token = hf_token or os.environ.get('HF_TOKEN')
+
+    def pre_export(self):
+        self._tempdir = TemporaryDirectory()
+        self._exporter = self.cls(self._tempdir.name, *self.args, **self.kwargs)
+        self._exporter.pre_export()
+
+    def export_item(self, item: ImageItem):
+        self._exporter.export_item(item)
+
+    def post_export(self):
+        self._exporter.post_export()
+
+        # upload to huggingface
+        hf_api = HfApi(token=self.hf_token)
+        hf_api.create_repo(self.repository, repo_type=self.repo_type, exist_ok=True)
+        with TemporaryDirectory() as td:
+            zip_file = os.path.join(td, 'package.zip')
+            with zipfile.ZipFile(zip_file, mode='w') as zf:
+                for directory, _, files in os.walk(self._tempdir.name):
+                    for file in files:
+                        file_path = os.path.join(directory, file)
+                        rel_file_path = os.path.relpath(file_path, self._tempdir.name)
+                        zf.write(
+                            file_path,
+                            '/'.join(rel_file_path.split(os.sep))
+                        )
+
+            hf_api.upload_file(
+                path_or_fileobj=zip_file,
+                repo_id=self.repository,
+                repo_type=self.repo_type,
+                path_in_repo=self.file_in_repo,
+                revision=self.revision,
+                commit_message=f'Upload {self.file_in_repo} with waifuc'
+            )
+
+        self._exporter = None
+        self._tempdir.cleanup()
+        self._tempdir = None
+
+    def reset(self):
+        pass
diff --git a/waifuc/export/textual_inversion.py b/waifuc/export/textual_inversion.py
new file mode 100644
index 0000000000000000000000000000000000000000..22f0098c1494b58994193871dfb5f635976a1aa7
--- /dev/null
+++ b/waifuc/export/textual_inversion.py
@@ -0,0 +1,43 @@
+import os
+
+from imgutils.tagging import tags_to_text
+
+from .base import LocalDirectoryExporter
+from ..model import ImageItem
+
+
+class TextualInversionExporter(LocalDirectoryExporter):
+    def __init__(self, output_dir: str, clear: bool = False,
+                 use_spaces: bool = False, use_escape: bool = True,
+                 include_score: bool = False, score_descend: bool = True,
+                 skip_when_image_exist: bool = False):
+        LocalDirectoryExporter.__init__(self, output_dir, clear)
+        self.use_spaces = use_spaces
+        self.use_escape = use_escape
+        self.include_score = include_score
+        self.score_descend = score_descend
+        self.untitles = 0
+        self.skip_when_image_exist = skip_when_image_exist
+
+    def export_item(self, item: ImageItem):
+        if 'filename' in item.meta:
+            filename = item.meta['filename']
+        else:
+            self.untitles += 1
+            filename = f'untited_{self.untitles}.png'
+
+        tags = item.meta.get('tags', None) or {}
+
+        full_filename = os.path.join(self.output_dir, filename)
+        full_tagname = os.path.join(self.output_dir, os.path.splitext(filename)[0] + '.txt')
+        full_directory = os.path.dirname(full_filename)
+        if full_directory:
+            os.makedirs(full_directory, exist_ok=True)
+
+        if not self.skip_when_image_exist or not os.path.exists(full_filename):
+            item.image.save(full_filename)
+        with open(full_tagname, 'w', encoding='utf-8') as f:
+            f.write(tags_to_text(tags, self.use_spaces, self.use_escape, self.include_score, self.score_descend))
+
+    def reset(self):
+        self.untitles = 0
diff --git a/waifuc/model/__init__.py b/waifuc/model/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..ea87ea1e21fe4ab2c26bb7d1e9496fe96c427420
--- /dev/null
+++ b/waifuc/model/__init__.py
@@ -0,0 +1 @@
+from .item import load_meta, dump_meta, ImageItem
diff --git a/waifuc/model/__pycache__/__init__.cpython-310.pyc b/waifuc/model/__pycache__/__init__.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..ead3e4a61e1291d5810733ee82cc5bf3ac8bf077
Binary files /dev/null and b/waifuc/model/__pycache__/__init__.cpython-310.pyc differ
diff --git a/waifuc/model/__pycache__/item.cpython-310.pyc b/waifuc/model/__pycache__/item.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..b8fba78024460d71031ee03593ed6336815e5c90
Binary files /dev/null and b/waifuc/model/__pycache__/item.cpython-310.pyc differ
diff --git a/waifuc/model/item.py b/waifuc/model/item.py
new file mode 100644
index 0000000000000000000000000000000000000000..4f585706d26b596677aa353fd82b3385fd0f82df
--- /dev/null
+++ b/waifuc/model/item.py
@@ -0,0 +1,98 @@
+import json
+import os.path
+import pickle
+from dataclasses import dataclass
+from typing import Optional
+
+from PIL import Image
+from hbutils.encoding import base64_decode, base64_encode
+from hbutils.reflection import quick_import_object
+
+NoneType = type(None)
+
+_TYPE_META = '__type'
+_BASE64_META = 'base64'
+
+
+def load_meta(data, path=()):
+    if isinstance(data, (int, float, str, NoneType)):
+        return data
+    elif isinstance(data, list):
+        return [load_meta(item, (*path, i)) for i, item in enumerate(data)]
+    elif isinstance(data, dict):
+        if _TYPE_META not in data:
+            return {key: load_meta(value, (*path, key)) for key, value in data.items()}
+        else:
+            cls, _, _ = quick_import_object(data[_TYPE_META])
+            binary = base64_decode(data[_BASE64_META])
+            obj = pickle.loads(binary)
+            if isinstance(obj, cls):
+                return obj
+            else:
+                raise TypeError(f'{cls!r} expected but {obj!r} found at {path!r}.')
+    else:
+        raise TypeError(f'Unknown type {data!r} at {path!r}.')
+
+
+def dump_meta(data, path=()):
+    if isinstance(data, (int, float, str, NoneType)):
+        return data
+    elif isinstance(data, list):
+        return [dump_meta(item, (*path, i)) for i, item in enumerate(data)]
+    elif isinstance(data, dict):
+        return {key: dump_meta(value, (*path, key)) for key, value in data.items()}
+    else:
+        cls = type(data)
+        type_str = f'{cls.__module__}.{cls.__name__}' if hasattr(cls, '__module__') else cls.__name__
+        base64_str = base64_encode(pickle.dumps(data))
+        return {
+            _TYPE_META: type_str,
+            _BASE64_META: base64_str
+        }
+
+
+@dataclass
+class ImageItem:
+    image: Image.Image
+    meta: dict
+
+    def __init__(self, image: Image.Image, meta: Optional[dict] = None):
+        self.image = image
+        self.meta = meta or {}
+
+    @classmethod
+    def _image_file_to_meta_file(cls, image_file):
+        directory, filename = os.path.split(image_file)
+        filebody, _ = os.path.splitext(filename)
+        meta_file = os.path.join(directory, f'.{filebody}_meta.json')
+        return meta_file
+
+    @classmethod
+    def load_from_image(cls, image_file):
+        image = Image.open(image_file)
+        meta_file = cls._image_file_to_meta_file(image_file)
+
+        if os.path.exists(meta_file):
+            with open(meta_file, 'r', encoding='utf-8') as f:
+                meta = load_meta(json.load(f))
+        else:
+            meta = {}
+
+        return cls(image, meta)
+
+    def save(self, image_file, no_meta: bool = False, skip_when_image_exist: bool = False):
+        if not skip_when_image_exist or not os.path.exists(image_file):
+            self.image.save(image_file)
+        if not no_meta and self.meta:
+            meta_file = self._image_file_to_meta_file(image_file)
+            with open(meta_file, 'w', encoding='utf-8') as f:
+                json.dump(dump_meta(self.meta), f)
+
+    def __repr__(self):
+        values = {'size': self.image.size}
+        for key, value in self.meta.items():
+            if isinstance(value, (int, float, str)):
+                values[key] = value
+
+        content = ', '.join(f'{key}: {values[key]!r}' for key in sorted(values.keys()))
+        return f'<{self.__class__.__name__} {content}>'
diff --git a/waifuc/source/__init__.py b/waifuc/source/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..28d3c82ca71185f84769654072a7e95e8b0c1b82
--- /dev/null
+++ b/waifuc/source/__init__.py
@@ -0,0 +1,19 @@
+from .anime_pictures import AnimePicturesSource
+from .base import BaseDataSource, EmptySource
+from .compose import ParallelDataSource, ComposedDataSource
+from .danbooru import DanbooruSource, SafebooruSource, ATFBooruSource, E621LikeSource, E621Source, E926Source
+from .derpibooru import DerpibooruLikeSource, DerpibooruSource, FurbooruSource
+from .duitang import DuitangSource
+from .gchar import GcharAutoSource
+from .huashi6 import Huashi6Source
+from .konachan import KonachanLikeSource, YandeSource, KonachanSource, KonachanNetSource, LolibooruSource, \
+    Rule34LikeSource, Rule34Source, HypnoHubSource, GelbooruSource, XbooruLikeSource, XbooruSource, \
+    SafebooruOrgSource, TBIBSource
+from .local import LocalSource, LocalTISource
+from .paheal import PahealSource
+from .pixiv import BasePixivSource, PixivSearchSource, PixivUserSource, PixivRankingSource
+from .sankaku import SankakuSource, PostOrder, Rating, FileType
+from .video import VideoSource
+from .wallhaven import WallHavenSource
+from .web import WebDataSource
+from .zerochan import ZerochanSource
diff --git a/waifuc/source/__pycache__/__init__.cpython-310.pyc b/waifuc/source/__pycache__/__init__.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..22eb1ff549e426949d796d07fc055785c1c9c04f
Binary files /dev/null and b/waifuc/source/__pycache__/__init__.cpython-310.pyc differ
diff --git a/waifuc/source/__pycache__/anime_pictures.cpython-310.pyc b/waifuc/source/__pycache__/anime_pictures.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..08ad1347805b4741dc349e4bb1e2f839564ab1b5
Binary files /dev/null and b/waifuc/source/__pycache__/anime_pictures.cpython-310.pyc differ
diff --git a/waifuc/source/__pycache__/base.cpython-310.pyc b/waifuc/source/__pycache__/base.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..dbf620c7effaed99d6e8b11404778c9eb778c919
Binary files /dev/null and b/waifuc/source/__pycache__/base.cpython-310.pyc differ
diff --git a/waifuc/source/__pycache__/compose.cpython-310.pyc b/waifuc/source/__pycache__/compose.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..8c963bcb81cf5b7cb36cbbf19c06a80a9689da3a
Binary files /dev/null and b/waifuc/source/__pycache__/compose.cpython-310.pyc differ
diff --git a/waifuc/source/__pycache__/danbooru.cpython-310.pyc b/waifuc/source/__pycache__/danbooru.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..ab195856340d5ebbf1f25f2326ee5264188b7178
Binary files /dev/null and b/waifuc/source/__pycache__/danbooru.cpython-310.pyc differ
diff --git a/waifuc/source/__pycache__/derpibooru.cpython-310.pyc b/waifuc/source/__pycache__/derpibooru.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..0b8afd327a8c5648fa8d2452a83fb854a45cf7bc
Binary files /dev/null and b/waifuc/source/__pycache__/derpibooru.cpython-310.pyc differ
diff --git a/waifuc/source/__pycache__/duitang.cpython-310.pyc b/waifuc/source/__pycache__/duitang.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..3fde9e5004228c061c9dddd75259fd8cd5783cf6
Binary files /dev/null and b/waifuc/source/__pycache__/duitang.cpython-310.pyc differ
diff --git a/waifuc/source/__pycache__/gchar.cpython-310.pyc b/waifuc/source/__pycache__/gchar.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..cd002c12d5d5b7b042b6743a759b83987a072233
Binary files /dev/null and b/waifuc/source/__pycache__/gchar.cpython-310.pyc differ
diff --git a/waifuc/source/__pycache__/huashi6.cpython-310.pyc b/waifuc/source/__pycache__/huashi6.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..48c1416556e413c0760ffe7017e922f4fd8962b2
Binary files /dev/null and b/waifuc/source/__pycache__/huashi6.cpython-310.pyc differ
diff --git a/waifuc/source/__pycache__/konachan.cpython-310.pyc b/waifuc/source/__pycache__/konachan.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..39f03b7b1f5120b3f72f58aac4dfbe46695d9c72
Binary files /dev/null and b/waifuc/source/__pycache__/konachan.cpython-310.pyc differ
diff --git a/waifuc/source/__pycache__/local.cpython-310.pyc b/waifuc/source/__pycache__/local.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..e906c84bc7779ccc14b14a4a16a45c5a193d81f9
Binary files /dev/null and b/waifuc/source/__pycache__/local.cpython-310.pyc differ
diff --git a/waifuc/source/__pycache__/paheal.cpython-310.pyc b/waifuc/source/__pycache__/paheal.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..a51b24210ac8d35caa117f69e9146d4095d99c45
Binary files /dev/null and b/waifuc/source/__pycache__/paheal.cpython-310.pyc differ
diff --git a/waifuc/source/__pycache__/pixiv.cpython-310.pyc b/waifuc/source/__pycache__/pixiv.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..fde867817e54f196b8f29212aa5c684ec3497482
Binary files /dev/null and b/waifuc/source/__pycache__/pixiv.cpython-310.pyc differ
diff --git a/waifuc/source/__pycache__/sankaku.cpython-310.pyc b/waifuc/source/__pycache__/sankaku.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..67e7f911cbbe1bfb9d5afdc042a37687d51dc1bc
Binary files /dev/null and b/waifuc/source/__pycache__/sankaku.cpython-310.pyc differ
diff --git a/waifuc/source/__pycache__/video.cpython-310.pyc b/waifuc/source/__pycache__/video.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..c652536e5eda0df63bdf40429021410c5bda8caa
Binary files /dev/null and b/waifuc/source/__pycache__/video.cpython-310.pyc differ
diff --git a/waifuc/source/__pycache__/wallhaven.cpython-310.pyc b/waifuc/source/__pycache__/wallhaven.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..5ca09caf53c845083405b180c3d9d3c5bb5d6191
Binary files /dev/null and b/waifuc/source/__pycache__/wallhaven.cpython-310.pyc differ
diff --git a/waifuc/source/__pycache__/web.cpython-310.pyc b/waifuc/source/__pycache__/web.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..213ad4270e396d83f46212743ae5c555b0fd62da
Binary files /dev/null and b/waifuc/source/__pycache__/web.cpython-310.pyc differ
diff --git a/waifuc/source/__pycache__/zerochan.cpython-310.pyc b/waifuc/source/__pycache__/zerochan.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..237e02a4890ec7cd808f20da879517e3aab702fe
Binary files /dev/null and b/waifuc/source/__pycache__/zerochan.cpython-310.pyc differ
diff --git a/waifuc/source/anime_pictures.py b/waifuc/source/anime_pictures.py
new file mode 100644
index 0000000000000000000000000000000000000000..be308f285a7d92ce5f529fd3e410a7fb6d04ca0c
--- /dev/null
+++ b/waifuc/source/anime_pictures.py
@@ -0,0 +1,111 @@
+import os
+from enum import Enum
+from typing import Iterator, Tuple, Union, List, Literal
+
+import cloudscraper
+from hbutils.system import urlsplit
+from pyquery import PyQuery as pq
+
+from .web import WebDataSource
+from ..utils import get_requests_session, srequest
+
+
+class OrderBy(str, Enum):
+    STAR_DATE = "stars_date"
+    DATE = "date"
+    DATE_REVERS = "date_r"
+    RATING = "rating"
+    DOWNLOADS = "views"
+    SIZE = "size"
+    TAG_COUNT = "tag_num"
+
+
+class Period(str, Enum):
+    ANYTIME = "0"
+    PAST_DAY = "3"
+    PAST_WEEK = "1"
+    PAST_MONTH = "2"
+    PAST_6_MONTHS = "4"
+    PAST_YEAR = "5"
+    PAST_2_YEARS = "6"
+    PAST_3_YEARS = "7"
+
+
+class AnimePicturesSource(WebDataSource):
+    __root__ = 'https://anime-pictures.net'
+
+    def __init__(self, tags: List[str], tag_mode: Literal['or', 'and'] = 'and',
+                 denied_tags: List[str] = None, denied_tag_mode: Literal['or', 'and'] = 'or',
+                 order_by: OrderBy = OrderBy.RATING, period: Period = Period.ANYTIME,
+                 select: Literal['thumbnail', 'preview', 'original'] = 'original',
+                 group_name: str = 'anime_pictures', download_silent: bool = True, **kwargs):
+        WebDataSource.__init__(
+            self, group_name,
+            get_requests_session(session=cloudscraper.create_scraper()),
+            download_silent,
+        )
+        self.tags, self.tag_mode = tags, tag_mode
+        self.denied_tags, self.denied_tag_mode = (denied_tags or []), denied_tag_mode
+        self.tag_mode = tag_mode
+        self.order_by = order_by
+        self.period = period
+        self.select = select
+        self.kwargs = kwargs
+
+    def _params(self, page):
+        params = {
+            'order_by': self.order_by.value,
+            'ldate': self.period.value,
+            'lang': 'en',
+            'page': str(page),
+        }
+        if self.tag_mode == 'and':
+            params['search_tag'] = '&&'.join(self.tags)
+        else:
+            params['search_tag'] = '||'.join(self.tags)
+        if self.denied_tags:
+            if self.denied_tag_mode == 'and':
+                params['denied_tags'] = '&&'.join(self.denied_tags)
+            else:
+                params['denied_tags'] = '||'.join(self.denied_tags)
+
+        return {**params, **self.kwargs}
+
+    def _get_url(self, post, resp):
+        id_, md5 = post['id'], post['md5']
+        if self.select == 'thumbnail':
+            return f'https://cdn.anime-pictures.net/previews/{md5[:3]}/{md5}_bp.jpg'
+        elif self.select == 'preview':
+            return f'https://cdn.anime-pictures.net/previews/{md5[:3]}/{md5}_cp.jpg'
+        elif self.select == 'original':
+            return pq(resp.text)('#rating a.download_icon').attr('href')
+        else:
+            raise ValueError(f'Invalid image selection - {self.select!r}.')
+
+    def _iter_data(self) -> Iterator[Tuple[Union[str, int], str, dict]]:
+        page = 0
+        while True:
+            resp = srequest(self.session, 'GET', f'{self.__root__}/api/v3/posts', params=self._params(page))
+            resp.raise_for_status()
+
+            posts = resp.json()['posts']
+            if not posts:
+                break
+
+            for post in posts:
+                resp_page = srequest(self.session, 'GET', f'{self.__root__}/posts/{post["id"]}?lang=en')
+                resp_page.raise_for_status()
+
+                url = self._get_url(post, resp_page)
+                tags = [item.text().replace(' ', '_') for item in pq(resp_page.text)('ul.tags li > a').items()]
+                _, ext_name = os.path.splitext(urlsplit(url).filename)
+                filename = f'{self.group_name}_{post["id"]}{ext_name}'
+                meta = {
+                    'anime_pictures': post,
+                    'group_id': f'{self.group_name}_{post["id"]}',
+                    'filename': filename,
+                    'tags': {key: 1.0 for key in tags}
+                }
+                yield post['id'], url, meta
+
+            page += 1
diff --git a/waifuc/source/base.py b/waifuc/source/base.py
new file mode 100644
index 0000000000000000000000000000000000000000..0f4ced0a70e1b85fa29e493a03604c905bef329a
--- /dev/null
+++ b/waifuc/source/base.py
@@ -0,0 +1,96 @@
+import copy
+from typing import Iterator, Optional
+
+from tqdm.auto import tqdm
+
+from ..action import BaseAction
+from ..export import BaseExporter
+from ..model import ImageItem
+from ..utils import task_ctx, get_task_names
+
+
+class BaseDataSource:
+    def _iter(self) -> Iterator[ImageItem]:
+        raise NotImplementedError  # pragma: no cover
+
+    def _iter_from(self) -> Iterator[ImageItem]:
+        yield from self._iter()
+
+    def __iter__(self) -> Iterator[ImageItem]:
+        yield from self._iter_from()
+
+    def __or__(self, other):
+        from .compose import ParallelDataSource
+        if isinstance(self, ParallelDataSource):
+            if isinstance(other, ParallelDataSource):
+                return ParallelDataSource(*self.sources, *other.sources)
+            else:
+                return ParallelDataSource(*self.sources, other)
+        else:
+            if isinstance(other, ParallelDataSource):
+                return ParallelDataSource(self, *other.sources)
+            else:
+                return ParallelDataSource(self, other)
+
+    def __add__(self, other):
+        from .compose import ComposedDataSource
+        if isinstance(self, ComposedDataSource):
+            if isinstance(other, ComposedDataSource):
+                return ComposedDataSource(*self.sources, *other.sources)
+            else:
+                return ComposedDataSource(*self.sources, other)
+        else:
+            if isinstance(other, ComposedDataSource):
+                return ComposedDataSource(self, *other.sources)
+            else:
+                return ComposedDataSource(self, other)
+
+    def __getitem__(self, item):
+        from ..action import SliceSelectAction
+        if isinstance(item, slice):
+            return self.attach(SliceSelectAction(item.start, item.stop, item.step))
+        else:
+            raise TypeError(f'Data source\'s getitem only accept slices, but {item!r} found.')
+
+    def attach(self, *actions: BaseAction) -> 'AttachedDataSource':
+        return AttachedDataSource(self, *actions)
+
+    def export(self, exporter: BaseExporter, name: Optional[str] = None):
+        exporter = copy.deepcopy(exporter)
+        exporter.reset()
+        with task_ctx(name):
+            return exporter.export_from(iter(self))
+
+
+class RootDataSource(BaseDataSource):
+    def _iter(self) -> Iterator[ImageItem]:
+        raise NotImplementedError  # pragma: no cover
+
+    def _iter_from(self) -> Iterator[ImageItem]:
+        names = get_task_names()
+        if names:
+            desc = f'{self.__class__.__name__} - {".".join(names)}'
+        else:
+            desc = f'{self.__class__.__name__}'
+        for item in tqdm(self._iter(), desc=desc):
+            yield item
+
+
+class AttachedDataSource(BaseDataSource):
+    def __init__(self, source: BaseDataSource, *actions: BaseAction):
+        self.source = source
+        self.actions = actions
+
+    def _iter(self) -> Iterator[ImageItem]:
+        t = self.source
+        for action in self.actions:
+            action = copy.deepcopy(action)
+            action.reset()
+            t = action.iter_from(t)
+
+        yield from t
+
+
+class EmptySource(BaseDataSource):
+    def _iter(self) -> Iterator[ImageItem]:
+        yield from []
diff --git a/waifuc/source/compose.py b/waifuc/source/compose.py
new file mode 100644
index 0000000000000000000000000000000000000000..49d449fff95f2d42f0d3f8e726bdf8d6c5d97532
--- /dev/null
+++ b/waifuc/source/compose.py
@@ -0,0 +1,37 @@
+import random
+from typing import Iterator, Optional
+
+from .base import BaseDataSource
+from ..model import ImageItem
+
+
+class ComposedDataSource(BaseDataSource):
+    def __init__(self, *sources: BaseDataSource):
+        self.sources = sources
+
+    def _iter(self) -> Iterator[ImageItem]:
+        for source in self.sources:
+            yield from iter(source)
+
+    def _iter_from(self) -> Iterator[ImageItem]:
+        yield from self._iter()
+
+
+class ParallelDataSource(BaseDataSource):
+    def __init__(self, *sources: BaseDataSource, seed: Optional[int] = None):
+        self.sources = sources
+        self.random = random.Random(seed)
+
+    def _iter(self) -> Iterator[ImageItem]:
+        iters = [iter(source) for source in self.sources]
+        while len(iters) > 0:
+            id_ = self.random.choice(range(len(iters)))
+            iter_ = iters[id_]
+
+            try:
+                yield next(iter_)
+            except StopIteration:
+                iters.pop(id_)
+
+    def _iter_from(self) -> Iterator[ImageItem]:
+        yield from self._iter()
diff --git a/waifuc/source/danbooru.py b/waifuc/source/danbooru.py
new file mode 100644
index 0000000000000000000000000000000000000000..90ef68dfa30fccbe55ed5d5da4f1619c9dea3380
--- /dev/null
+++ b/waifuc/source/danbooru.py
@@ -0,0 +1,167 @@
+import os.path
+import re
+from typing import Optional, Iterator, List, Tuple, Union, Literal
+
+from hbutils.system import urlsplit
+from requests.auth import HTTPBasicAuth
+
+from .web import NoURL, WebDataSource
+from ..config.meta import __TITLE__, __VERSION__
+from ..utils import get_requests_session, srequest
+
+_DanbooruSiteTyping = Literal['konachan', 'yandere', 'danbooru', 'safebooru', 'lolibooru']
+
+
+class DanbooruLikeSource(WebDataSource):
+    def __init__(self, tags: List[str], min_size: Optional[int] = 800, download_silent: bool = True,
+                 username: Optional[str] = None, api_key: Optional[str] = None,
+                 site_name: Optional[str] = 'danbooru', site_url: Optional[str] = 'https://danbooru.donmai.us/',
+                 group_name: Optional[str] = None):
+        WebDataSource.__init__(self, group_name or site_name, None, download_silent)
+        self.session = get_requests_session(headers={
+            "User-Agent": f"{__TITLE__}/{__VERSION__}",
+            'Content-Type': 'application/json; charset=utf-8',
+        })
+        self.auth = HTTPBasicAuth(username, api_key) if username and api_key else None
+        self.site_name, self.site_url = site_name, site_url
+        self.tags = tags
+        self.min_size = min_size
+
+    def _get_data_from_raw(self, raw):
+        return raw
+
+    def _select_url(self, data):
+        if self.min_size is not None and "media_asset" in data and "variants" in data["media_asset"]:
+            variants = data["media_asset"]["variants"]
+            width, height, url = None, None, None
+            for item in variants:
+                if 'width' in item and 'height' in item and \
+                        item['width'] >= self.min_size and item['height'] >= self.min_size:
+                    if url is None or item['width'] < width:
+                        width, height, url = item['width'], item['height'], item['url']
+
+            if url is not None:
+                return url
+
+        if 'file_url' not in data:
+            raise NoURL
+
+        return data['file_url']
+
+    def _get_tags(self, data):
+        return re.split(r'\s+', data["tag_string"])
+
+    def _iter_data(self) -> Iterator[Tuple[Union[str, int], str, dict]]:
+        page = 1
+        while True:
+            resp = srequest(self.session, 'GET', f'{self.site_url}/posts.json', params={
+                "format": "json",
+                "limit": "100",
+                "page": str(page),
+                "tags": ' '.join(self.tags),
+            }, auth=self.auth)
+            resp.raise_for_status()
+            page_items = self._get_data_from_raw(resp.json())
+            if not page_items:
+                break
+
+            for data in page_items:
+                try:
+                    url = self._select_url(data)
+                except NoURL:
+                    continue
+
+                _, ext_name = os.path.splitext(urlsplit(url).filename)
+                filename = f'{self.group_name}_{data["id"]}{ext_name}'
+                meta = {
+                    self.site_name: data,
+                    'group_id': f'{self.group_name}_{data["id"]}',
+                    'filename': filename,
+                    'tags': {key: 1.0 for key in self._get_tags(data)}
+                }
+                yield data['id'], url, meta
+
+            page += 1
+
+
+class DanbooruSource(DanbooruLikeSource):
+    def __init__(self, tags: List[str],
+                 min_size: Optional[int] = 800, download_silent: bool = True,
+                 username: Optional[str] = None, api_key: Optional[str] = None,
+                 group_name: Optional[str] = None):
+        DanbooruLikeSource.__init__(self, tags, min_size, download_silent, username, api_key,
+                                    'danbooru', 'https://danbooru.donmai.us/', group_name)
+
+
+class SafebooruSource(DanbooruLikeSource):
+    def __init__(self, tags: List[str],
+                 min_size: Optional[int] = 800, download_silent: bool = True,
+                 username: Optional[str] = None, api_key: Optional[str] = None,
+                 group_name: Optional[str] = None):
+        DanbooruLikeSource.__init__(self, tags, min_size, download_silent, username, api_key,
+                                    'safebooru', 'https://safebooru.donmai.us', group_name)
+
+
+class ATFBooruSource(DanbooruLikeSource):
+    def __init__(self, tags: List[str],
+                 min_size: Optional[int] = 800, download_silent: bool = True,
+                 username: Optional[str] = None, api_key: Optional[str] = None,
+                 group_name: Optional[str] = None):
+        DanbooruLikeSource.__init__(self, tags, min_size, download_silent, username, api_key,
+                                    'danbooru', 'https://booru.allthefallen.moe', group_name)
+
+
+class E621LikeSource(DanbooruLikeSource):
+    def __init__(self, tags: List[str],
+                 min_size: Optional[int] = 800, download_silent: bool = True,
+                 username: Optional[str] = None, api_key: Optional[str] = None,
+                 site_name: Optional[str] = 'e621', site_url: Optional[str] = 'https://e621.net/',
+                 group_name: Optional[str] = None):
+        DanbooruLikeSource.__init__(self, tags, min_size, download_silent, username, api_key,
+                                    site_name, site_url, group_name or site_name)
+
+    def _get_data_from_raw(self, raw):
+        return raw['posts']
+
+    def _select_url(self, data):
+        urls = []
+        urls.append((data['file']['url'], data['file']['width'], data['file']['height']))
+        urls.append((data['preview']['url'], data['preview']['width'], data['preview']['height']))
+        if 'sample' in data and data['sample']['has']:
+            urls.append((data['sample']['url'], data['sample']['width'], data['sample']['height']))
+
+        if self.min_size is not None:
+            f_url, f_width, f_height = None, None, None
+            for url, width, height in urls:
+                if width >= self.min_size and height >= self.min_size:
+                    if f_url is None or width < f_width:
+                        f_url, f_width, f_height = url, width, height
+
+            if f_url is not None:
+                return f_url
+
+        return urls[0][0]
+
+    def _get_tags(self, data):
+        tags = []
+        for value in data['tags'].values():
+            tags.extend(value)
+        return tags
+
+
+class E621Source(E621LikeSource):
+    def __init__(self, tags: List[str],
+                 min_size: Optional[int] = 800, download_silent: bool = True,
+                 username: Optional[str] = None, api_key: Optional[str] = None,
+                 group_name: Optional[str] = 'e621'):
+        E621LikeSource.__init__(self, tags, min_size, download_silent, username, api_key,
+                                'e621', 'https://e621.net/', group_name)
+
+
+class E926Source(E621LikeSource):
+    def __init__(self, tags: List[str],
+                 min_size: Optional[int] = 800, download_silent: bool = True,
+                 username: Optional[str] = None, api_key: Optional[str] = None,
+                 group_name: Optional[str] = 'e926'):
+        E621LikeSource.__init__(self, tags, min_size, download_silent, username, api_key,
+                                'e926', 'https://e926.net/', group_name)
diff --git a/waifuc/source/derpibooru.py b/waifuc/source/derpibooru.py
new file mode 100644
index 0000000000000000000000000000000000000000..bc6f0ed1d21ab2feebe1a6024e920779f75d0fac
--- /dev/null
+++ b/waifuc/source/derpibooru.py
@@ -0,0 +1,74 @@
+import os
+from typing import Literal, Optional, Iterator, Tuple, Union, List
+
+from hbutils.system import urlsplit
+
+from .web import WebDataSource
+from ..utils import get_requests_session, srequest
+
+SelectTyping = Literal['thumb', 'small', 'medium', 'large', 'full']
+
+
+class DerpibooruLikeSource(WebDataSource):
+    def __init__(self, site_name: str, site_url: str,
+                 tags: List[str], key: Optional[str] = None, select: SelectTyping = 'large',
+                 download_silent: bool = True, group_name: Optional[str] = None):
+        WebDataSource.__init__(self, group_name or site_name, get_requests_session(), download_silent)
+        self.tags = tags
+        self.key = key
+        self.select = select
+        self.site_name = site_name
+        self.site_url = site_url
+
+    def _params(self, page):
+        params = {
+            'q': ' '.join(self.tags),
+            'per_page': '100',
+            'page': str(page),
+        }
+        if self.key:
+            params['key'] = self.key
+
+        return params
+
+    def _get_url(self, data):
+        if self.select in data['representations']:
+            return data['representations'][self.select]
+        else:
+            return data['representations']['full']
+
+    def _iter_data(self) -> Iterator[Tuple[Union[str, int], str, dict]]:
+        page = 1
+        while True:
+            resp = srequest(self.session, 'GET', f'{self.site_url}/api/v1/json/search/images',
+                            params=self._params(page))
+            resp.raise_for_status()
+
+            posts = resp.json()['images']
+            for data in posts:
+                url = self._get_url(data)
+                _, ext_name = os.path.splitext(urlsplit(url).filename)
+                filename = f'{self.group_name}_{data["id"]}{ext_name}'
+                meta = {
+                    self.site_name: data,
+                    'group_id': f'{self.group_name}_{data["id"]}',
+                    'filename': filename,
+                    'tags': {key.replace(' ', '_'): 1.0 for key in data['tags']}
+                }
+                yield data['id'], url, meta
+
+            page += 1
+
+
+class DerpibooruSource(DerpibooruLikeSource):
+    def __init__(self, tags: List[str], key: Optional[str] = None, select: SelectTyping = 'large',
+                 download_silent: bool = True, group_name: str = 'derpibooru'):
+        DerpibooruLikeSource.__init__(self, 'derpibooru', 'https://derpibooru.org',
+                                      tags, key, select, download_silent, group_name)
+
+
+class FurbooruSource(DerpibooruLikeSource):
+    def __init__(self, tags: List[str], key: Optional[str] = None, select: SelectTyping = 'large',
+                 download_silent: bool = True, group_name: str = 'furbooru'):
+        DerpibooruLikeSource.__init__(self, 'furbooru', 'https://furbooru.com',
+                                      tags, key, select, download_silent, group_name)
diff --git a/waifuc/source/duitang.py b/waifuc/source/duitang.py
new file mode 100644
index 0000000000000000000000000000000000000000..8cde661bed8f3c9729b5fcb4ba86ef987faaaab1
--- /dev/null
+++ b/waifuc/source/duitang.py
@@ -0,0 +1,63 @@
+import os
+import re
+from typing import Iterator, Tuple, Union
+
+from hbutils.system import urlsplit
+
+from .web import WebDataSource
+from ..utils import get_requests_session, srequest
+
+
+def _extract_words(keyword):
+    return list(filter(bool, re.split(r'[\W_]+', keyword)))
+
+
+class DuitangSource(WebDataSource):
+    def __init__(self, keyword: str, strict: bool = True, page_size: int = 100,
+                 group_name: str = 'duitang', download_silent: bool = True):
+        WebDataSource.__init__(self, group_name, get_requests_session(), download_silent)
+        self.keyword = keyword
+        self.words = set(_extract_words(keyword))
+        self.page_size: int = page_size
+        self.strict = strict
+
+    def _check_title(self, title):
+        if not self.strict:
+            return True
+        else:
+            t_words = set(_extract_words(title))
+            return len(t_words & self.words) == len(self.words)
+
+    def _iter_data(self) -> Iterator[Tuple[Union[str, int], str, dict]]:
+        offset = 0
+        while True:
+            resp = srequest(self.session, 'GET', 'https://www.duitang.com/napi/blog/list/by_search/', params={
+                'kw': self.keyword,
+                'start': str(offset),
+                'limit': str(self.page_size),
+            })
+            resp.raise_for_status()
+
+            raw = resp.json()
+            if 'data' not in raw or 'object_list' not in raw['data']:
+                break
+
+            posts = raw['data']['object_list']
+            if not posts:
+                break
+
+            for post in posts:
+                if not self._check_title(post['msg']):
+                    continue
+
+                url = post['photo']['path']
+                _, ext_name = os.path.splitext(urlsplit(url).filename)
+                filename = f'{self.group_name}_{post["id"]}{ext_name}'
+                meta = {
+                    'duitang': post,
+                    'group_id': f'{self.group_name}_{post["id"]}',
+                    'filename': filename,
+                }
+                yield post['id'], url, meta
+
+            offset += self.page_size
diff --git a/waifuc/source/gchar.py b/waifuc/source/gchar.py
new file mode 100644
index 0000000000000000000000000000000000000000..adbdc57eb44c9d3ce609bd40701d9e19c9b9f4da
--- /dev/null
+++ b/waifuc/source/gchar.py
@@ -0,0 +1,166 @@
+import logging
+from functools import reduce
+from operator import __or__
+from typing import Iterator, Tuple, Optional, List, Mapping
+
+from hbutils.string import plural_word
+
+from .anime_pictures import AnimePicturesSource
+from .base import BaseDataSource
+from .danbooru import ATFBooruSource, DanbooruSource, DanbooruLikeSource
+from .konachan import KonachanSource, KonachanNetSource, HypnoHubSource, LolibooruSource, XbooruSource, YandeSource, \
+    Rule34Source, KonachanLikeSource
+from .pixiv import PixivSearchSource
+from .sankaku import SankakuSource
+from .wallhaven import WallHavenSource
+from .zerochan import ZerochanSource
+from ..model import ImageItem
+
+_PRESET_SITES = ('zerochan', 'danbooru')
+_REGISTERED_SITE_SOURCES = {
+    'anime_pictures': AnimePicturesSource,
+    'atfbooru': ATFBooruSource,
+    # 'sankaku': SankakuSource,  # still something wrong with sankaku source
+    'danbooru': DanbooruSource,
+    'hypnohub': HypnoHubSource,
+    'konachan': KonachanSource,
+    'konachan_net': KonachanNetSource,
+    'lolibooru': LolibooruSource,
+    'rule34': Rule34Source,
+    # 'safebooru': SafebooruSource,
+    'xbooru': XbooruSource,
+    'yande': YandeSource,
+    'zerochan': ZerochanSource,
+    'wallhaven': WallHavenSource,
+    'pixiv': PixivSearchSource,
+}
+
+
+class GcharAutoSource(BaseDataSource):
+    def __init__(self, ch, allow_fuzzy: bool = False, fuzzy_threshold: int = 80, contains_extra: bool = True,
+                 sure_only: bool = True, preset_sites: Tuple[str, ...] = _PRESET_SITES,
+                 max_preset_limit: Optional[int] = None, main_sources_count: int = 3,
+                 blacklist_sites: Tuple[str, ...] = (), pixiv_refresh_token: Optional[str] = None,
+                 extra_cfg: Optional[Mapping[str, dict]] = None):
+        from gchar.games import get_character
+        from gchar.games.base import Character
+
+        if isinstance(ch, Character):
+            self.ch = ch
+        else:
+            self.ch = get_character(ch, allow_fuzzy, fuzzy_threshold, contains_extra)
+        if not self.ch:
+            raise ValueError(f'Character {ch!r} not found.')
+        logging.info(f'Character {self.ch!r} found in gchar.')
+
+        self.sure_only = sure_only
+        self.pixiv_refresh_token = pixiv_refresh_token
+        self.extra_cfg = dict(extra_cfg or {})
+
+        for site in preset_sites:
+            assert site in _REGISTERED_SITE_SOURCES, f'Preset site {site!r} not available.'
+        self.preset_sites = sorted(preset_sites)
+        self.max_preset_limit = max_preset_limit
+        if 'pixiv' in self.preset_sites and not self.pixiv_refresh_token:
+            raise ValueError('Pixiv refresh token not given for presetting pixiv source!')
+        self.main_sources_count = main_sources_count
+
+        self.blacklist_sites = blacklist_sites
+
+    def _select_keyword_for_site(self, site) -> Tuple[Optional[str], Optional[int]]:
+        from gchar.resources.sites import list_site_tags
+        from gchar.resources.pixiv import get_pixiv_keywords, get_pixiv_posts
+
+        if site == 'pixiv':
+            keyword = get_pixiv_keywords(self.ch)
+            cnt = get_pixiv_posts(self.ch)
+            count = 0 if cnt is None else cnt[0]
+            return keyword, count
+
+        else:
+            tags: List[Tuple[str, int]] = list_site_tags(self.ch, site, sure_only=self.sure_only, with_posts=True)
+            tags = sorted(tags, key=lambda x: (-x[1], x[0]))
+            if tags:
+                return tags[0]
+            else:
+                return None, None
+
+    def _build_source_on_site(self, site) -> Optional[BaseDataSource]:
+        site_class = _REGISTERED_SITE_SOURCES[site]
+        keyword, count = self._select_keyword_for_site(site)
+        if keyword is not None:
+            extra_cfg = dict(self.extra_cfg.get(site, None) or {})
+            logging.info(f'Recommended keyword for site {site!r} is {keyword!r}, '
+                         f'with {plural_word(count, "known post")}.')
+            if issubclass(site_class, (DanbooruLikeSource, AnimePicturesSource)):
+                return site_class([keyword, 'solo'], **extra_cfg)
+            elif issubclass(site_class, (KonachanLikeSource, SankakuSource)):
+                return site_class([keyword], **extra_cfg)
+            elif issubclass(site_class, ZerochanSource):
+                return ZerochanSource(keyword, strict=True, **extra_cfg)
+            elif issubclass(site_class, WallHavenSource):
+                return site_class(keyword, **extra_cfg)
+            elif issubclass(site_class, (PixivSearchSource,)):
+                return site_class(keyword, refresh_token=self.pixiv_refresh_token, **extra_cfg)
+            else:
+                raise TypeError(f'Unknown class {site_class!r} for keyword {keyword!r}.')  # pragma: no cover
+        else:
+            logging.info(f'No keyword recommendation for site {site!r}.')
+            return None
+
+    def _build_preset_source(self) -> Optional[BaseDataSource]:
+        logging.info('Building preset sites sources ...')
+        sources = [
+            self._build_source_on_site(site)
+            for site in self.preset_sites
+        ]
+        sources = [source for source in sources if source is not None]
+        if sources:
+            retval = reduce(__or__, sources)
+            if self.max_preset_limit is not None:
+                retval = retval[:self.max_preset_limit]
+            return retval
+        else:
+            return None
+
+    def _build_main_source(self) -> Optional[BaseDataSource]:
+        _all_sites = set(_REGISTERED_SITE_SOURCES.keys())
+        if not self.pixiv_refresh_token:
+            _all_sites.remove('pixiv')
+        _all_sites = sorted(_all_sites - set(self.preset_sites) - set(self.blacklist_sites))
+        logging.info(f'Available sites for main sources: {_all_sites!r}.')
+
+        site_pairs = []
+        for site in _all_sites:
+            keyword, count = self._select_keyword_for_site(site)
+            if keyword is not None:
+                site_pairs.append((site, keyword, count))
+        site_pairs = sorted(site_pairs, key=lambda x: -x[2])[:self.main_sources_count]
+        logging.info(f'Selected main sites: {site_pairs!r}')
+
+        sources = [
+            self._build_source_on_site(site)
+            for site, _, _ in site_pairs
+        ]
+        sources = [source for source in sources if source is not None]
+        if sources:
+            return reduce(__or__, sources)
+        else:
+            return None
+
+    def _build_source(self) -> Optional[BaseDataSource]:
+        preset_source = self._build_preset_source()
+        main_source = self._build_main_source()
+        if preset_source and main_source:
+            return preset_source + main_source
+        elif preset_source:
+            return preset_source
+        elif main_source:
+            return main_source
+        else:
+            return None
+
+    def _iter(self) -> Iterator[ImageItem]:
+        source = self._build_source()
+        if source is not None:
+            yield from source._iter()
diff --git a/waifuc/source/huashi6.py b/waifuc/source/huashi6.py
new file mode 100644
index 0000000000000000000000000000000000000000..b7e8f164a5c2d3f65f7ff6f89a1a8b87de195a48
--- /dev/null
+++ b/waifuc/source/huashi6.py
@@ -0,0 +1,50 @@
+import os
+from functools import lru_cache
+from typing import Iterator, Tuple, Union
+from urllib.parse import quote_plus, urljoin
+
+from hbutils.system import urlsplit
+
+from .web import WebDataSource
+from ..utils import get_requests_session, srequest
+
+
+class Huashi6Source(WebDataSource):
+    __img_site_url__ = 'https://img2.huashi6.com'
+
+    def __init__(self, word: str,
+                 group_name: str = 'huashi6', download_silent: bool = True):
+        WebDataSource.__init__(self, group_name, get_requests_session(), download_silent)
+        self.word = word
+
+    @classmethod
+    @lru_cache()
+    def _get_img_site_url(cls):
+        return cls.__img_site_url__
+
+    def _iter_data(self) -> Iterator[Tuple[Union[str, int], str, dict]]:
+        page = 1
+
+        while True:
+            resp = srequest(self.session, 'POST', "https://rt.huashi6.com/search/all", data={
+                'word': self.word,
+                'index': str(page),
+            }, headers={
+                "referrer": f"https://www.huashi6.com/search?searchText={quote_plus(self.word)}&p={page}",
+            })
+            raw = resp.json()['data']
+            if 'works' not in raw or not raw['works']:
+                break
+
+            for post in raw['works']:
+                url = urljoin(self._get_img_site_url(), post['coverImage']['path'])
+                _, ext_name = os.path.splitext(urlsplit(url).filename)
+                filename = f'{self.group_name}_{post["id"]}{ext_name}'
+                meta = {
+                    'huashi6': post,
+                    'group_id': f'{self.group_name}_{post["id"]}',
+                    'filename': filename,
+                }
+                yield post['id'], url, meta
+
+            page += 1
diff --git a/waifuc/source/konachan.py b/waifuc/source/konachan.py
new file mode 100644
index 0000000000000000000000000000000000000000..afcc8c6f7cf6ed8924a244550427ecc405536285
--- /dev/null
+++ b/waifuc/source/konachan.py
@@ -0,0 +1,221 @@
+import os
+import re
+from typing import Iterator, Tuple, Union, List, Optional
+
+from hbutils.system import urlsplit
+
+from .web import WebDataSource, NoURL
+from ..utils import get_requests_session, srequest
+
+
+class KonachanLikeSource(WebDataSource):
+    def __init__(self, site_name: str, site_url: str,
+                 tags: List[str], start_page: int = 1, min_size: Optional[int] = 800,
+                 group_name: Optional[str] = None, download_silent: bool = True):
+        WebDataSource.__init__(self, group_name or site_name, get_requests_session(), download_silent)
+        self.site_name = site_name
+        self.site_url = site_url
+        self.start_page = start_page
+        self.min_size = min_size
+        self.tags: List[str] = tags
+
+    def _select_url(self, data):
+        if self.min_size is not None:
+            url_names = [key for key in data.keys() if key.endswith('_url')]
+            name_pairs = [
+                *(
+                    (name, f'{name[:-4]}_width', f'{name[:-4]}_height')
+                    for name in url_names
+                ),
+                ('file_url', 'width', 'height'),
+            ]
+
+            f_url, f_width, f_height = None, None, None
+            for url_name, width_name, height_name in name_pairs:
+                if url_name in data and width_name in data and height_name in data:
+                    url, width, height = data[url_name], data[width_name], data[height_name]
+                    if width >= self.min_size and height >= self.min_size:
+                        if f_url is None or width < f_width:
+                            f_url, f_width, f_height = url, width, height
+
+            if f_url is not None:
+                return f_url
+
+        if 'file_url' in data:
+            return data['file_url']
+        else:
+            raise NoURL
+
+    def _request(self, page):
+        return srequest(self.session, 'GET', f'{self.site_url}/post.json', params={
+            'tags': ' '.join(self.tags),
+            'limit': '100',
+            'page': str(page),
+        })
+
+    def _get_data_from_raw(self, raw):
+        return raw
+
+    def _iter_data(self) -> Iterator[Tuple[Union[str, int], str, dict]]:
+        page = self.start_page
+        while True:
+            resp = self._request(page)
+            resp.raise_for_status()
+
+            # response may be simply empty in rule34.xxx and xbooru.com
+            if not resp.text.strip():
+                break
+            page_list = self._get_data_from_raw(resp.json())
+            if not page_list:
+                break
+
+            for data in page_list:
+                try:
+                    url = self._select_url(data)
+                except NoURL:
+                    continue
+
+                _, ext_name = os.path.splitext(urlsplit(url).filename)
+                filename = f'{self.group_name}_{data["id"]}{ext_name}'
+                meta = {
+                    self.site_name: data,
+                    'group_id': f'{self.group_name}_{data["id"]}',
+                    'filename': filename,
+                    'tags': {key: 1.0 for key in re.split(r'\s+', data['tags'])}
+                }
+                yield data["id"], url, meta
+
+            page += 1
+
+
+class YandeSource(KonachanLikeSource):
+    def __init__(self, tags: List[str], min_size: Optional[int] = 800,
+                 group_name: str = 'yande', download_silent: bool = True):
+        KonachanLikeSource.__init__(self, 'yande', 'https://yande.re',
+                                    tags, 1, min_size, group_name, download_silent)
+
+
+class KonachanSource(KonachanLikeSource):
+    def __init__(self, tags: List[str], min_size: Optional[int] = 800,
+                 group_name: str = 'konachan', download_silent: bool = True):
+        KonachanLikeSource.__init__(self, 'konachan', 'https://konachan.com',
+                                    tags, 1, min_size, group_name, download_silent)
+
+
+class KonachanNetSource(KonachanLikeSource):
+    def __init__(self, tags: List[str], min_size: Optional[int] = 800,
+                 group_name: str = 'konachan_net', download_silent: bool = True):
+        KonachanLikeSource.__init__(self, 'konachan_net', 'https://konachan.net',
+                                    tags, 1, min_size, group_name, download_silent)
+
+
+class LolibooruSource(KonachanLikeSource):
+    def __init__(self, tags: List[str], min_size: Optional[int] = 800,
+                 group_name: str = 'lolibooru', download_silent: bool = True):
+        KonachanLikeSource.__init__(self, 'lolibooru', 'https://lolibooru.moe',
+                                    tags, 1, min_size, group_name, download_silent)
+
+    def _request(self, page):
+        return srequest(self.session, 'GET', f'{self.site_url}/post/index.json', params={
+            'tags': ' '.join(self.tags),
+            'limit': '100',
+            'page': str(page),
+        })
+
+
+class Rule34LikeSource(KonachanLikeSource):
+    def __init__(self, site_name: str, site_url: str,
+                 tags: List[str], min_size: Optional[int] = 800,
+                 group_name: Optional[str] = None, download_silent: bool = True):
+        KonachanLikeSource.__init__(self, site_name, site_url, tags, 0, min_size, group_name, download_silent)
+
+    def _request(self, page):
+        return srequest(self.session, 'GET', f'{self.site_url}/index.php', params={
+            'page': 'dapi',
+            's': 'post',
+            'q': 'index',
+            'tags': ' '.join(self.tags),
+            'json': '1',
+            'limit': '100',
+            'pid': str(page),
+        })
+
+
+class Rule34Source(Rule34LikeSource):
+    def __init__(self, tags: List[str], min_size: Optional[int] = 800,
+                 group_name: str = 'rule34', download_silent: bool = True):
+        Rule34LikeSource.__init__(self, 'rule34', 'https://rule34.xxx',
+                                  tags, min_size, group_name, download_silent)
+
+
+class HypnoHubSource(Rule34LikeSource):
+    def __init__(self, tags: List[str], min_size: Optional[int] = 800,
+                 group_name: str = 'hypnohub', download_silent: bool = True):
+        Rule34LikeSource.__init__(self, 'hypnohub', 'https://hypnohub.net',
+                                  tags, min_size, group_name, download_silent)
+
+
+class GelbooruSource(Rule34LikeSource):
+    def __init__(self, tags: List[str], min_size: Optional[int] = 800,
+                 group_name: str = 'gelbooru', download_silent: bool = True):
+        Rule34LikeSource.__init__(self, 'gelbooru', 'https://gelbooru.com',
+                                  tags, min_size, group_name, download_silent)
+
+    def _get_data_from_raw(self, raw):
+        return raw['post'] if 'post' in raw else None
+
+
+class XbooruLikeSource(Rule34LikeSource):
+    def __init__(self, site_name: str, site_url: str, img_site_url: str,
+                 tags: List[str], min_size: Optional[int] = 800,
+                 group_name: Optional[str] = None, download_silent: bool = True):
+        Rule34LikeSource.__init__(self, site_name, site_url, tags, min_size, group_name, download_silent)
+        self.img_site_url = img_site_url
+
+    def _select_url(self, data):
+        name, _ = os.path.splitext(data['image'])
+        urls = [(f'{self.img_site_url}/images/{data["directory"]}/{data["image"]}', data['width'], data['height'])]
+        if data['sample']:
+            urls.append((
+                f'{self.img_site_url}/samples/{data["directory"]}/sample_{name}.jpg?{data["id"]}',
+                data['sample_width'], data['sample_height'],
+            ))
+
+        if self.min_size is not None:
+            f_url, f_width, f_height = None, None, None
+            for url, width, height in urls:
+                if width >= self.min_size and height >= self.min_size:
+                    if f_url is None or width < f_width:
+                        f_url, f_width, f_height = url, width, height
+
+            if f_url is not None:
+                return f_url
+
+        return urls[0][0]
+
+
+class XbooruSource(XbooruLikeSource):
+    def __init__(self, tags: List[str], min_size: Optional[int] = 800,
+                 group_name: str = 'xbooru', download_silent: bool = True):
+        XbooruLikeSource.__init__(
+            self, 'xbooru', 'https://xbooru.com', 'https://img.xbooru.com',
+            tags, min_size, group_name, download_silent,
+        )
+
+
+class SafebooruOrgSource(XbooruLikeSource):
+    def __init__(self, tags: List[str], min_size: Optional[int] = 800,
+                 group_name: str = 'safebooru_org', download_silent: bool = True):
+        XbooruLikeSource.__init__(
+            self, 'safebooru_org', 'https://safebooru.org', 'https://safebooru.org',
+            tags, min_size, group_name, download_silent,
+        )
+
+
+class TBIBSource(XbooruLikeSource):
+    def __init__(self, tags: List[str], min_size: Optional[int] = 800,
+                 group_name: str = 'tbib', download_silent: bool = True):
+        XbooruLikeSource.__init__(
+            self, 'tbib', 'https://tbib.org', 'https://tbib.org',
+            tags, min_size, group_name, download_silent,
+        )
diff --git a/waifuc/source/local.py b/waifuc/source/local.py
new file mode 100644
index 0000000000000000000000000000000000000000..1a8432495f74c53264e2808e3ea02061dedd7e9b
--- /dev/null
+++ b/waifuc/source/local.py
@@ -0,0 +1,84 @@
+import glob
+import os
+import pathlib
+import random
+import re
+from typing import Iterator
+
+from PIL import UnidentifiedImageError
+from imgutils.data import load_image
+
+from .base import RootDataSource
+from ..model import ImageItem
+
+
+class LocalSource(RootDataSource):
+    def __init__(self, directory: str, recursive: bool = True, shuffle: bool = False):
+        self.directory = directory
+        self.recursive = recursive
+        self.shuffle = shuffle
+
+    def _iter_files(self):
+        if self.recursive:
+            for directory, _, files in os.walk(self.directory):
+                group_name = re.sub(r'[\W_]+', '_', directory).strip('_')
+                for file in files:
+                    yield os.path.join(directory, file), group_name
+        else:
+            group_name = re.sub(r'[\W_]+', '_', self.directory).strip('_')
+            for file in os.listdir(self.directory):
+                yield os.path.join(self.directory, file), group_name
+
+    def _actual_iter_files(self):
+        lst = list(self._iter_files())
+        if self.shuffle:
+            random.shuffle(lst)
+        yield from lst
+
+    def _iter(self) -> Iterator[ImageItem]:
+        for file, group_name in self._iter_files():
+            try:
+                origin_item = ImageItem.load_from_image(file)
+                origin_item.image.load()
+            except UnidentifiedImageError:
+                continue
+
+            meta = origin_item.meta or {
+                'path': os.path.abspath(file),
+                'group_id': group_name,
+                'filename': os.path.basename(file),
+            }
+            yield ImageItem(origin_item.image, meta)
+
+
+class LocalTISource(RootDataSource):
+    def __init__(self, directory: str):
+        self.directory = directory
+
+    def _iter(self) -> Iterator[ImageItem]:
+        group_name = re.sub(r'[\W_]+', '_', self.directory).strip('_')
+        for f in glob.glob(os.path.join(self.directory, '*')):
+            if not os.path.isfile(f):
+                continue
+
+            try:
+                image = load_image(f)
+            except UnidentifiedImageError:
+                continue
+
+            id_ = os.path.splitext(os.path.basename(f))[0]
+            txt_file = os.path.join(self.directory, f'{id_}.txt')
+            if os.path.exists(txt_file):
+                full_text = pathlib.Path(txt_file).read_text(encoding='utf-8')
+                words = re.split(r'\s*,\s*', full_text)
+                tags = {word: 1.0 for word in words}
+            else:
+                tags = {}
+
+            meta = {
+                'path': os.path.abspath(f),
+                'group_id': group_name,
+                'filename': os.path.basename(f),
+                'tags': tags,
+            }
+            yield ImageItem(image, meta)
diff --git a/waifuc/source/paheal.py b/waifuc/source/paheal.py
new file mode 100644
index 0000000000000000000000000000000000000000..d47e8e83ccf906edb9690e81ac44ccdfa7fd5527
--- /dev/null
+++ b/waifuc/source/paheal.py
@@ -0,0 +1,86 @@
+import os
+import re
+from typing import Optional, List, Iterator, Tuple, Union
+
+import requests
+import xmltodict
+from hbutils.system import urlsplit
+
+from .web import WebDataSource, NoURL
+from ..utils import get_requests_session
+
+
+class PahealSource(WebDataSource):
+    def __init__(self, tags: List[str], user_id: Optional[str] = None, api_key: Optional[str] = None,
+                 min_size: Optional[int] = 800, download_silent: bool = True, group_name: str = 'paheal'):
+        WebDataSource.__init__(self, group_name, get_requests_session(), download_silent)
+        self.tags = tags
+        self.min_size = min_size
+        self.user_id, self.api_key = user_id, api_key
+
+    def _params(self, page):
+        params = {
+            'tags': ' '.join(self.tags),
+            'limit': '100',
+            'page': str(page),
+        }
+        if self.user_id and self.api_key:
+            params['user_id'] = self.user_id
+            params['api_key'] = self.api_key
+
+        return params
+
+    def _select_url(self, data):
+        if self.min_size is not None:
+            url_names = [key for key in data.keys() if key.endswith('_url')]
+            name_pairs = [
+                *(
+                    (name, f'{name[:-4]}_width', f'{name[:-4]}_height')
+                    for name in url_names
+                ),
+                ('file_url', 'width', 'height'),
+            ]
+
+            f_url, f_width, f_height = None, None, None
+            for url_name, width_name, height_name in name_pairs:
+                if url_name in data and width_name in data and height_name in data:
+                    url, width, height = data[url_name], int(data[width_name]), int(data[height_name])
+                    if width >= self.min_size and height >= self.min_size:
+                        if f_url is None or width < f_width:
+                            f_url, f_width, f_height = url, width, height
+
+            if f_url is not None:
+                return f_url
+
+        if 'file_url' in data:
+            return data['file_url']
+        else:
+            raise NoURL
+
+    def _iter_data(self) -> Iterator[Tuple[Union[str, int], str, dict]]:
+        page = 1
+        while True:
+            resp = requests.get('https://rule34.paheal.net/api/danbooru/find_posts/index.xml',
+                                params=self._params(page))
+            resp.raise_for_status()
+            posts = xmltodict.parse(resp.text)['posts']['tag']
+
+            for data in posts:
+                data = {key.lstrip('@'): value for key, value in data.items()}
+
+                try:
+                    url = self._select_url(data)
+                except NoURL:
+                    continue
+
+                _, ext_name = os.path.splitext(urlsplit(url).filename)
+                filename = f'{self.group_name}_{data["id"]}{ext_name}'
+                meta = {
+                    'paheal': data,
+                    'group_id': f'{self.group_name}_{data["id"]}',
+                    'filename': filename,
+                    'tags': {key: 1.0 for key in re.split(r'\s+', data['tags'])}
+                }
+                yield data["id"], url, meta
+
+            page += 1
diff --git a/waifuc/source/pixiv.py b/waifuc/source/pixiv.py
new file mode 100644
index 0000000000000000000000000000000000000000..35047ef38acc2dfc7e92571b22c456c1e06c52b5
--- /dev/null
+++ b/waifuc/source/pixiv.py
@@ -0,0 +1,168 @@
+import logging
+import os
+from typing import Iterator, Optional, Union, Tuple, Literal
+
+from hbutils.system import urlsplit
+from pixivpy3 import AppPixivAPI
+
+from .web import WebDataSource
+from ..utils import get_requests_session
+
+_FILTER = Literal["for_ios", ""]
+_TYPE = Literal["illust", "manga", ""]
+_RESTRICT = Literal["public", "private", ""]
+_CONTENT_TYPE = Literal["illust", "manga", ""]
+_MODE = Literal[
+    "day",
+    "week",
+    "month",
+    "day_male",
+    "day_female",
+    "week_original",
+    "week_rookie",
+    "day_manga",
+    "day_r18",
+    "day_male_r18",
+    "day_female_r18",
+    "week_r18",
+    "week_r18g",
+    "",
+]
+_SEARCH_TARGET = Literal[
+    "partial_match_for_tags", "exact_match_for_tags", "title_and_caption", "keyword", ""
+]
+_SORT = Literal["date_desc", "date_asc", "popular_desc", ""]
+_DURATION = Literal[
+    "within_last_day", "within_last_week", "within_last_month", "", None
+]
+_BOOL = Literal["true", "false"]
+_SELECT = Literal['square_medium', 'medium', 'large', 'original']
+
+
+class BasePixivSource(WebDataSource):
+    def __init__(self, group_name: str = 'pixiv', select: _SELECT = 'large',
+                 no_ai: bool = False, refresh_token: Optional[str] = None, download_silent: bool = True):
+        self.select = select
+        self.no_ai = no_ai
+        self.refresh_token = refresh_token
+        self.client = AppPixivAPI()
+        self.client.requests = get_requests_session(session=self.client.requests)
+        self.client.requests.headers.update({"Referer": "https://app-api.pixiv.net/"})
+        WebDataSource.__init__(self, group_name, self.client.requests, download_silent)
+
+    def _iter_illustration(self) -> Iterator[dict]:
+        raise NotImplementedError  # pragma: no cover
+
+    def _iter_data(self) -> Iterator[Tuple[Union[str, int], str, dict]]:
+        if self.refresh_token:
+            self.client.auth(refresh_token=self.refresh_token)
+
+        for illust in self._iter_illustration():
+            if illust['type'] != 'illust':
+                continue
+            if self.no_ai and illust['illust_ai_type'] == 2:
+                continue
+
+            if illust['page_count'] == 1:
+                if self.select != 'original':
+                    urls = [illust['image_urls'][self.select]]
+                else:
+                    urls = [illust['meta_single_page']['original_image_url']]
+
+            else:
+                urls = [page['image_urls'][self.select] for page in illust['meta_pages']]
+
+            for i, url in enumerate(urls):
+                _, ext_name = os.path.splitext(urlsplit(url).filename)
+                filename = f'{self.group_name}_{illust["id"]}_{i}{ext_name}'
+                meta = {
+                    'pixiv': illust,
+                    'group_id': f'{self.group_name}_{illust["id"]}',
+                    'instance_id': f'{self.group_name}_{illust["id"]}_{i}',
+                    'filename': filename,
+                }
+                yield f'{illust["id"]}_{i}', url, meta
+
+
+class PixivSearchSource(BasePixivSource):
+    def __init__(self, word: str, search_target: _SEARCH_TARGET = "partial_match_for_tags",
+                 sort: _SORT = "date_desc", duration: _DURATION = None, start_date: Optional[str] = None,
+                 end_date: Optional[str] = None, filter: _FILTER = "for_ios", req_auth: bool = True,
+                 group_name: str = 'pixiv', select: _SELECT = 'large',
+                 no_ai: bool = False, refresh_token: Optional[str] = None, download_silent: bool = True):
+        BasePixivSource.__init__(self, group_name, select, no_ai, refresh_token, download_silent)
+        self.word = word
+        self.search_target = search_target
+        self.sort = sort
+        self.duration = duration
+        self.start_date = start_date
+        self.end_date = end_date
+        self.filter = filter
+        self.req_auth = req_auth
+
+    def _iter_illustration(self) -> Iterator[dict]:
+        offset = 0
+        while True:
+            data = self.client.search_illust(self.word, self.search_target, self.sort, self.duration,
+                                             self.start_date, self.end_date, self.filter, offset, self.req_auth)
+            if 'illusts' not in data:
+                logging.warning(f'Illusts not found in page (offset: {offset!r}), skipped: {data!r}.')
+                break
+            illustrations = data['illusts']
+            yield from illustrations
+
+            offset += len(illustrations)
+            if not illustrations:
+                break
+
+
+class PixivUserSource(BasePixivSource):
+    def __init__(self, user_id: Union[int, str], type: _TYPE = "illust",
+                 filter: _FILTER = "for_ios", req_auth: bool = True,
+                 group_name: str = 'pixiv', select: _SELECT = 'large',
+                 no_ai: bool = False, refresh_token: Optional[str] = None, download_silent: bool = True):
+        BasePixivSource.__init__(self, group_name, select, no_ai, refresh_token, download_silent)
+        self.user_id = user_id
+        self.type = type
+        self.filter = filter
+        self.req_auth = req_auth
+
+    def _iter_illustration(self) -> Iterator[dict]:
+        offset = 0
+        while True:
+            data = self.client.user_illusts(self.user_id, self.type, self.filter, offset, self.req_auth)
+            if 'illusts' not in data:
+                logging.warning(f'Illusts not found in page (offset: {offset!r}), skipped: {data!r}.')
+                break
+            illustrations = data['illusts']
+            yield from illustrations
+
+            offset += len(illustrations)
+            if not illustrations:
+                break
+
+
+class PixivRankingSource(BasePixivSource):
+    def __init__(self, mode: _MODE = "day", filter: _FILTER = "for_ios",
+                 date: Optional[str] = None, req_auth: bool = True,
+                 group_name: str = 'pixiv', select: _SELECT = 'large',
+                 no_ai: bool = False, refresh_token: Optional[str] = None, download_silent: bool = True):
+        BasePixivSource.__init__(self, group_name, select, no_ai, refresh_token, download_silent)
+        self.mode = mode
+        self.filter = filter
+        self.date = date
+        self.req_auth = req_auth
+
+    def _iter_illustration(self) -> Iterator[dict]:
+        offset = 0
+        while True:
+            data = self.client.illust_ranking(self.mode, self.filter, self.date, offset, self.req_auth)
+            if 'illusts' not in data:
+                logging.warning(f'Illusts not found in page (offset: {offset!r}), skipped: {data!r}.')
+                break
+            illustrations = data['illusts']
+            yield from illustrations
+
+            offset += len(illustrations)
+            if not illustrations:
+                break
diff --git a/waifuc/source/sankaku.py b/waifuc/source/sankaku.py
new file mode 100644
index 0000000000000000000000000000000000000000..bed57ab435c5dd402d402271c0b7da8166896287
--- /dev/null
+++ b/waifuc/source/sankaku.py
@@ -0,0 +1,143 @@
+import datetime
+import os
+from enum import Enum
+from typing import Optional, Iterator, List, Tuple, Union
+
+from hbutils.system import urlsplit
+
+from .web import NoURL, WebDataSource
+from ..utils import get_requests_session, srequest
+
+
+class Rating(str, Enum):
+    SAFE = "s"
+    QUESTIONABLE = "q"
+    EXPLICIT = "e"
+
+
+class PostOrder(Enum):
+    POPULARITY = "popularity"
+    DATE = "date"
+    QUALITY = "quality"
+    RANDOM = "random"
+    RECENTLY_FAVORITED = "recently_favorited"
+    RECENTLY_VOTED = "recently_voted"
+
+
+class FileType(Enum):
+    IMAGE = "image"  # jpeg, png, webp formats
+    GIF = "animated_gif"  # gif format
+    VIDEO = "video"  # mp4, webm formats
+
+
+def _tags_by_kwargs(**kwargs):
+    tags = []
+    for k, v in kwargs.items():
+        if v is None:
+            pass
+        elif k in {"order", "rating", "file_type"} and v is not FileType.IMAGE:  # noqa
+            tags.append(f"{k}:{v.value}")
+        elif k in {"threshold", "recommended_for", "voted"}:
+            tags.append(f"{k}:{v}")
+        elif k == "date":
+            date = "..".join(d.strftime("%Y-%m-%dT%H:%M") for d in self.date)  # type: ignore[union-attr]
+            tags.append(f"date:{date}")
+        elif k == "added_by":
+            for user in self.added_by:  # type: ignore[union-attr]
+                tags.append(f"user:{user}")
+
+    return tags
+
+
+class SankakuSource(WebDataSource):
+    def __init__(self, tags: List[str], order: Optional[PostOrder] = None,
+                 rating: Optional[Rating] = None, file_type: Optional[FileType] = None,
+                 date: Optional[Tuple[datetime.datetime, datetime.datetime]] = None,
+                 username: Optional[str] = None, password: Optional[str] = None, access_token: Optional[str] = None,
+                 min_size: Optional[int] = 800, download_silent: bool = True, group_name: str = 'sankaku', **kwargs):
+        WebDataSource.__init__(self, group_name, get_requests_session(), download_silent)
+        self.tags = tags + _tags_by_kwargs(order=order, rating=rating, file_type=file_type, date=date, **kwargs)
+        self.username, self.password = username, password
+        self.access_token = access_token
+
+        self.min_size = min_size
+        self.auth_session = get_requests_session(headers={
+            'Content-Type': 'application/json; charset=utf-8',
+            'Accept-Encoding': 'gzip, deflate, br',
+            'Host': 'capi-v2.sankakucomplex.com',
+            'X-Requested-With': 'com.android.browser',
+        })
+
+    _FILE_URLS = [
+        ('sample_url', 'sample_width', 'sample_height'),
+        ('preview_url', 'preview_width', 'preview_height'),
+        ('file_url', 'width', 'height'),
+    ]
+
+    def _select_url(self, data):
+        if self.min_size is not None:
+            f_url, f_width, f_height = None, None, None
+            for url_name, width_name, height_name in self._FILE_URLS:
+                if url_name in data and width_name in data and height_name in data:
+                    url, width, height = data[url_name], data[width_name], data[height_name]
+                    if width and height and width >= self.min_size and height >= self.min_size:
+                        if f_url is None or width < f_width:
+                            f_url, f_width, f_height = url, width, height
+
+            if f_url is not None:
+                return f_url
+
+        if 'file_url' in data and data['file_url']:
+            return data['file_url']
+        else:
+            raise NoURL
+
+    def _login(self):
+        if self.access_token:
+            self.auth_session.headers.update({
+                "Authorization": f"Bearer {self.access_token}",
+            })
+        elif self.username and self.password:
+            resp = srequest(self.auth_session, 'POST', 'https://login.sankakucomplex.com/auth/token',
+                            json={"login": self.username, "password": self.password})
+            resp.raise_for_status()
+            login_data = resp.json()
+            self.auth_session.headers.update({
+                "Authorization": f"{login_data['token_type']} {login_data['access_token']}",
+            })
+
+    def _iter_data(self) -> Iterator[Tuple[Union[str, int], str, dict]]:
+        self._login()
+
+        page = 1
+        while True:
+            resp = srequest(self.auth_session, 'GET', 'https://capi-v2.sankakucomplex.com/posts', params={
+                'lang': 'en',
+                'page': str(page),
+                'limit': '100',
+                'tags': ' '.join(self.tags),
+            })
+            resp.raise_for_status()
+            if not resp.json():
+                break
+
+            for data in resp.json():
+                if 'file_type' not in data or 'image' not in data['file_type']:
+                    continue
+
+                try:
+                    url = self._select_url(data)
+                except NoURL:
+                    continue
+
+                _, ext_name = os.path.splitext(urlsplit(url).filename)
+                filename = f'{self.group_name}_{data["id"]}{ext_name}'
+                meta = {
+                    'sankaku': data,
+                    'group_id': f'{self.group_name}_{data["id"]}',
+                    'filename': filename,
+                    'tags': {key: 1.0 for key in [t_item['name'] for t_item in data['tags']]}
+                }
+                yield data["id"], url, meta
+
+            page += 1
diff --git a/waifuc/source/video.py b/waifuc/source/video.py
new file mode 100644
index 0000000000000000000000000000000000000000..353be5a43a62914039ba660374dc0fd0a46d7ae1
--- /dev/null
+++ b/waifuc/source/video.py
@@ -0,0 +1,62 @@
+import glob
+import logging
+import os
+from typing import Iterator
+from urllib.error import HTTPError
+
+from tqdm.auto import tqdm
+
+from .base import BaseDataSource, EmptySource
+from ..model import ImageItem
+
+try:
+    import av
+    import av.datasets
+    from av.error import InvalidDataError
+except (ImportError, ModuleNotFoundError):
+    av = None
+
+
+class VideoSource(BaseDataSource):
+    def __init__(self, video_file):
+        if av is None:
+            raise ImportError(f'pyav not installed, {self.__class__.__name__} is unavailable. '
+                              f'Please install this with `pip install git+https://github.com/deepghs/waifuc.git@main#egg=waifuc[video]` to solve this problem.')
+        self.video_file = video_file
+
+    def _iter(self) -> Iterator[ImageItem]:
+        try:
+            content = av.datasets.curated(self.video_file)
+        except HTTPError:
+            logging.error(f'Video {self.video_file!r} is invalid, skipped')
+            return
+
+        try:
+            with av.open(content) as container:
+                stream = container.streams.video[0]
+                stream.codec_context.skip_frame = "NONKEY"
+
+                for i, frame in enumerate(tqdm(
+                        container.decode(stream),
+                        desc=f'Video Extracting - {os.path.basename(self.video_file)}')):
+                    meta = {
+                        'video_file': self.video_file,
+                        'time': frame.time,
+                        'index': i,
+                    }
+                    yield ImageItem(frame.to_image(), meta)
+        except (InvalidDataError, av.error.ValueError, IndexError) as err:
+            logging.warning(f'Video extraction skipped due to error - {err!r}')
+
+    @classmethod
+    def from_directory(cls, directory: str, recursive: bool = True) -> BaseDataSource:
+        if recursive:
+            files = glob.glob(os.path.join(glob.escape(directory), '**', '*'), recursive=True)
+        else:
+            files = glob.glob(os.path.join(glob.escape(directory), '*'))
+
+        source = EmptySource()
+        for file in files:
+            if os.path.isfile(file) and os.access(file, os.R_OK):
+                source = source + cls(file)
+        return source
diff --git a/waifuc/source/wallhaven.py b/waifuc/source/wallhaven.py
new file mode 100644
index 0000000000000000000000000000000000000000..e8cb71a505711127e024c16867f7009c64a73396
--- /dev/null
+++ b/waifuc/source/wallhaven.py
@@ -0,0 +1,101 @@
+import os
+from enum import IntFlag
+from typing import Iterator, Tuple, Union, Optional, Literal
+
+import cloudscraper
+from hbutils.system import urlsplit
+
+from .web import WebDataSource
+from ..utils import get_requests_session, srequest
+
+
+class Category(IntFlag):
+    GENERAL = 0x4
+    ANIME = 0x2
+    PEOPLE = 0x1
+
+    DEFAULT = GENERAL | ANIME
+    ALL = GENERAL | ANIME | PEOPLE
+
+    @property
+    def mark(self) -> str:
+        return f'{"1" if self & self.GENERAL else "0"}' \
+               f'{"1" if self & self.ANIME else "0"}' \
+               f'{"1" if self & self.PEOPLE else "0"}'
+
+
+class Purity(IntFlag):
+    SFW = 0x4
+    SKETCHY = 0x2
+    NSFW = 0x1
+
+    DEFAULT = SFW | SKETCHY
+    ALL = SFW | SKETCHY | NSFW
+
+    @property
+    def mark(self) -> str:
+        return f'{"1" if self & self.SFW else "0"}' \
+               f'{"1" if self & self.SKETCHY else "0"}' \
+               f'{"1" if self & self.NSFW else "0"}'
+
+
+SortingTyping = Literal['date_added', 'relevance', 'random', 'views', 'favorites', 'toplist']
+SelectTyping = Literal['original', 'thumbnail']
+
+
+class WallHavenSource(WebDataSource):
+    def __init__(self, query: str, category: Category = Category.DEFAULT,
+                 purity: Purity = Purity.DEFAULT, sorting: SortingTyping = 'relavance',
+                 no_ai: bool = True, min_size: Tuple[int, int] = (1, 1),
+                 select: SelectTyping = 'original', api_key: Optional[str] = None,
+                 group_name: str = 'wallhaven', download_silent: bool = True):
+        session = get_requests_session(session=cloudscraper.create_scraper())
+        if api_key:
+            session.headers.update({'X-API-Key': api_key})
+        WebDataSource.__init__(self, group_name, session, download_silent)
+
+        self.query = query
+        self.category = category
+        self.purity = purity
+        self.sorting = sorting
+        self.no_ai = no_ai
+        self.min_size = min_size
+        self.select = select
+
+    def _select_url(self, data):
+        if self.select == 'original':
+            return data['path']
+        elif self.select == 'thumbnail':
+            return data['thumbs']['original']
+        else:
+            raise ValueError(f'Unknown image selection - {self.select!r}.')
+
+    def _iter_data(self) -> Iterator[Tuple[Union[str, int], str, dict]]:
+        page = 1
+        while True:
+            resp = srequest(self.session, 'GET', 'https://wallhaven.cc/api/v1/search', params={
+                'q': self.query,
+                'categories': self.category.mark,
+                'purity': self.purity.mark,
+                'sorting': self.sorting,
+                'ai_art_filter': "1" if self.no_ai else "0",
+                'atleast': f'{self.min_size[0]}x{self.min_size[1]}',
+                'page': str(page),
+            })
+            raw = resp.json()
+            if not raw or not raw['data']:
+                break
+
+            for data in raw['data']:
+                url = self._select_url(data)
+
+                _, ext_name = os.path.splitext(urlsplit(url).filename)
+                filename = f'{self.group_name}_{data["id"]}{ext_name}'
+                meta = {
+                    'wallhaven': data,
+                    'group_id': f'{self.group_name}_{data["id"]}',
+                    'filename': filename,
+                }
+                yield data['id'], url, meta
+
+            page += 1
diff --git a/waifuc/source/web.py b/waifuc/source/web.py
new file mode 100644
index 0000000000000000000000000000000000000000..7000e08d6d7c9c49ed2964590bad52b7870c09e7
--- /dev/null
+++ b/waifuc/source/web.py
@@ -0,0 +1,52 @@
+import os
+import warnings
+from typing import Iterator, Tuple, Union
+
+import requests
+from PIL import UnidentifiedImageError, Image
+from PIL.Image import DecompressionBombError
+from hbutils.system import urlsplit, TemporaryDirectory
+
+from .base import RootDataSource
+from ..model import ImageItem
+from ..utils import get_requests_session, download_file
+
+
+class NoURL(Exception):
+    pass
+
+
+class WebDataSource(RootDataSource):
+    def __init__(self, group_name: str, session: requests.Session = None, download_silent: bool = True):
+        self.download_silent = download_silent
+        self.session = session or get_requests_session()
+        self.group_name = group_name
+
+    def _iter_data(self) -> Iterator[Tuple[Union[str, int], str, dict]]:
+        raise NotImplementedError  # pragma: no cover
+
+    def _iter(self) -> Iterator[ImageItem]:
+        for id_, url, meta in self._iter_data():
+            _, ext_name = os.path.splitext(urlsplit(url).filename)
+            if ext_name.lower() == '.gif':
+                warnings.warn(f'{self.group_name.capitalize()} resource {id_} is a GIF image, skipped.')
+                continue
+            filename = f'{self.group_name}_{id_}{ext_name}'
+            with TemporaryDirectory(ignore_cleanup_errors=True) as td:
+                td_file = os.path.join(td, filename)
+                try:
+                    download_file(
+                        url, td_file, desc=filename,
+                        session=self.session, silent=self.download_silent
+                    )
+                    image = Image.open(td_file)
+                    image.load()
+                except UnidentifiedImageError:
+                    warnings.warn(f'{self.group_name.capitalize()} resource {id_} unidentified as image, skipped.')
+                    continue
+                except (IOError, DecompressionBombError) as err:
+                    warnings.warn(f'Skipped due to error: {err!r}')
+                    continue
+
+                meta = {**meta, 'url': url}
+                yield ImageItem(image, meta)
diff --git a/waifuc/source/zerochan.py b/waifuc/source/zerochan.py
new file mode 100644
index 0000000000000000000000000000000000000000..62dda406b031bc34e548fbc3a25431f224eeae5b
--- /dev/null
+++ b/waifuc/source/zerochan.py
@@ -0,0 +1,177 @@
+import os
+from enum import Enum
+from typing import Iterator, Union, List, Optional, Mapping, Tuple, Literal
+from urllib.parse import quote_plus, urljoin
+
+from hbutils.system import urlsplit
+
+from .web import WebDataSource
+from ..utils import get_requests_session, srequest
+
+
+class Sort(str, Enum):
+    ID = 'id'
+    FAV = 'fav'
+
+
+class Time(str, Enum):
+    ALL = '0'
+    LAST_7000 = '1'
+    LAST_15000 = '2'
+
+
+class Dimension(str, Enum):
+    LARGE = 'large'
+    HUGE = 'huge'
+    LANDSCAPE = 'landscape'
+    PORTRAIT = 'portrait'
+    SQUARE = 'square'
+
+
+SelectTyping = Literal['medium', 'large', 'full']
+
+
+class ZerochanSource(WebDataSource):
+    __SITE__ = 'https://www.zerochan.net'
+
+    def __init__(self, word: Union[str, List[str]], sort: Sort = Sort.FAV, time: Time = Time.ALL,
+                 dimension: Optional[Dimension] = None, color: Optional[str] = None, strict: bool = False,
+                 select: SelectTyping = 'large', group_name: str = 'zerochan', download_silent: bool = True,
+                 user_agent=None, username: Optional[str] = None, password: Optional[str] = None):
+        if user_agent:
+            headers = {'User-Agent': user_agent}
+        else:
+            headers = {}
+        WebDataSource.__init__(self, group_name, get_requests_session(headers=headers), download_silent)
+        self.word = word
+        self.sort = sort
+        self.time = time
+        self.dimension = dimension
+        self.color = color
+        self.strict = strict
+        self.select = select
+
+        self.username = username
+        self._password = password
+        self._is_authed = False
+
+    def _auth(self):
+        if not self._is_authed and self.username is not None:
+            resp = self.session.post(
+                'https://www.zerochan.net/login',
+                data={
+                    'ref': '/',
+                    'name': self.username,
+                    'password': self._password,
+                    'login': 'Login'
+                },
+                headers={
+                    'Referrer': "https://www.zerochan.net/login?ref=%2F",
+                    'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,'
+                              'image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7',
+                    'Accept-Encoding': 'gzip, deflate, br',
+                    'Content-Type': 'application/x-www-form-urlencoded',
+                },
+                allow_redirects=False,
+            )
+            if resp.status_code != 303:
+                raise ConnectionError('Username or password wrong, failed to login to zerochan.net.')
+
+            self._is_authed = True
+
+    @property
+    def _base_url(self) -> str:
+        if isinstance(self.word, str):
+            return f'{self.__SITE__}/{quote_plus(self.word)}'
+        elif isinstance(self.word, (list, tuple)):
+            return f'{self.__SITE__}/{",".join(map(quote_plus, self.word))}'
+        else:
+            raise TypeError(f'Unknown type of word - {self.word!r}.')
+
+    @property
+    def _params(self) -> Mapping[str, str]:
+        params = {
+            'json': '1',
+            's': self.sort.value,
+            't': self.time.value,
+        }
+        if self.dimension is not None:
+            params['d'] = self.dimension.value
+        if self.color is not None:
+            params['c'] = self.color
+        if self.strict:
+            params['strict'] = '1'
+
+        return params
+
+    @classmethod
+    def _get_urls(cls, data):
+        id_ = data['id']
+        medium_url = data['thumbnail']
+        prefix = quote_plus(data['tag'].replace(' ', '.'))
+        large_urls = [f'https://s1.zerochan.net/{prefix}.600.{id_}.jpg']
+        full_urls = [
+            f"https://static.zerochan.net/{prefix}.full.{id_}{ext}"
+            for ext in ['.jpg', '.png']
+        ]
+
+        return {'medium': medium_url, 'large': large_urls, 'full': full_urls}
+
+    def _get_url(self, data):
+        urls = self._get_urls(data)
+        if self.select == 'full':
+            url_fallbacks = [*urls['full'], *urls['large']]
+        elif self.select == 'large':
+            url_fallbacks = urls['large']
+        else:
+            url_fallbacks = []
+
+        for url in url_fallbacks:
+            resp = srequest(self.session, 'HEAD', url, raise_for_status=False)
+            if resp.ok:
+                return url
+        else:
+            return urls['medium']
+
+    def _iter_data(self) -> Iterator[Tuple[Union[str, int], str, dict]]:
+        self._auth()
+        page = 1
+        while True:
+            quit_ = False
+            _base_url = self._base_url
+            while True:
+                resp = srequest(self.session, 'GET', _base_url,
+                                params={**self._params, 'p': str(page), 'l': '200'},
+                                allow_redirects=False, raise_for_status=False)
+                if resp.status_code // 100 == 3:
+                    _base_url = urljoin(_base_url, resp.headers['Location'])
+                elif resp.status_code in {403, 404}:
+                    quit_ = True
+                    break
+                else:
+                    resp.raise_for_status()
+                    break
+
+            if quit_:
+                break
+
+            json_ = resp.json()
+            if 'items' in json_:
+                items = json_['items']
+                for data in items:
+                    url = self._get_url(data)
+                    _, ext_name = os.path.splitext(urlsplit(url).filename)
+                    filename = f'{self.group_name}_{data["id"]}{ext_name}'
+                    meta = {
+                        'zerochan': {
+                            **data,
+                            'url': url,
+                        },
+                        'group_id': f'{self.group_name}_{data["id"]}',
+                        'filename': filename,
+                    }
+                    yield data["id"], url, meta
+            else:
+                break
+
+            page += 1
diff --git a/waifuc/utils/__init__.py b/waifuc/utils/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..83eb0e64aed0dea55199f52ad6a323751e1f8577
--- /dev/null
+++ b/waifuc/utils/__init__.py
@@ -0,0 +1,3 @@
+from .context import task_ctx, get_task_names
+from .download import download_file
+from .session import get_requests_session, srequest, TimeoutHTTPAdapter
diff --git a/waifuc/utils/__pycache__/__init__.cpython-310.pyc b/waifuc/utils/__pycache__/__init__.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..d1c4f5d70561914002ad1f247d620090940e117f
Binary files /dev/null and b/waifuc/utils/__pycache__/__init__.cpython-310.pyc differ
diff --git a/waifuc/utils/__pycache__/context.cpython-310.pyc b/waifuc/utils/__pycache__/context.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..f8a979d102ee0143ae8a4eda8c683fa65013b94f
Binary files /dev/null and b/waifuc/utils/__pycache__/context.cpython-310.pyc differ
diff --git a/waifuc/utils/__pycache__/download.cpython-310.pyc b/waifuc/utils/__pycache__/download.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..a486102d917d008ff417d10cb8db44fcf84d7089
Binary files /dev/null and b/waifuc/utils/__pycache__/download.cpython-310.pyc differ
diff --git a/waifuc/utils/__pycache__/session.cpython-310.pyc b/waifuc/utils/__pycache__/session.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..0a6949e74adfcefdbcf62c5dce9255feff1a65e5
Binary files /dev/null and b/waifuc/utils/__pycache__/session.cpython-310.pyc differ
diff --git a/waifuc/utils/context.py b/waifuc/utils/context.py
new file mode 100644
index 0000000000000000000000000000000000000000..22d0df21d51eef4943742da2160feb2579c75a36
--- /dev/null
+++ b/waifuc/utils/context.py
@@ -0,0 +1,23 @@
+from contextlib import contextmanager
+from typing import Tuple, Optional
+
+from hbutils.reflection import context
+
+WAIFUC_TASK_NAME = 'waifuc_task_name'
+
+
+@contextmanager
+def task_ctx(name: Optional[str]):
+    ctx = context()
+    names = tuple(ctx.get(WAIFUC_TASK_NAME, None) or ())
+    if name:
+        with ctx.vars(**{WAIFUC_TASK_NAME: (*names, name)}):
+            yield
+    else:
+        yield
+
+
+def get_task_names() -> Tuple[str, ...]:
+    ctx = context()
+    names = tuple(ctx.get(WAIFUC_TASK_NAME, None) or ())
+    return names
diff --git a/waifuc/utils/download.py b/waifuc/utils/download.py
new file mode 100644
index 0000000000000000000000000000000000000000..280fcc7a8e917edb25198c0354559b1ac1ef2478
--- /dev/null
+++ b/waifuc/utils/download.py
@@ -0,0 +1,47 @@
+import os
+from contextlib import contextmanager
+
+import requests
+from tqdm.auto import tqdm
+
+from .session import get_requests_session, srequest
+
+
+class _FakeClass:
+    def update(self, *args, **kwargs):
+        pass
+
+
+@contextmanager
+def _with_tqdm(expected_size, desc, silent: bool = False):
+    if not silent:
+        with tqdm(total=expected_size, unit='B', unit_scale=True, unit_divisor=1024, desc=desc) as pbar:
+            yield pbar
+    else:
+        yield _FakeClass()
+
+
+def download_file(url, filename, expected_size: int = None, desc=None, session=None, silent: bool = False, **kwargs):
+    session = session or get_requests_session()
+    response = srequest(session, 'GET', url, stream=True, allow_redirects=True, **kwargs)
+    expected_size = expected_size or response.headers.get('Content-Length', None)
+    expected_size = int(expected_size) if expected_size is not None else expected_size
+
+    desc = desc or os.path.basename(filename)
+    directory = os.path.dirname(filename)
+    if directory:
+        os.makedirs(directory, exist_ok=True)
+
+    with open(filename, 'wb') as f:
+        with _with_tqdm(expected_size, desc, silent) as pbar:
+            for chunk in response.iter_content(chunk_size=1024):
+                f.write(chunk)
+                pbar.update(len(chunk))
+
+    actual_size = os.path.getsize(filename)
+    if expected_size is not None and actual_size != expected_size:
+        os.remove(filename)
+        raise requests.exceptions.HTTPError(f"Downloaded file is not of expected size, "
+                                            f"{expected_size} expected but {actual_size} found.")
+
+    return filename
diff --git a/waifuc/utils/session.py b/waifuc/utils/session.py
new file mode 100644
index 0000000000000000000000000000000000000000..484ea1bbb2421267ea422f6b080a10fb25d49687
--- /dev/null
+++ b/waifuc/utils/session.py
@@ -0,0 +1,62 @@
+import time
+from typing import Optional, Dict
+
+import requests
+from requests.adapters import HTTPAdapter, Retry
+from requests.exceptions import RequestException
+
+DEFAULT_TIMEOUT = 10  # seconds
+
+
+class TimeoutHTTPAdapter(HTTPAdapter):
+    def __init__(self, *args, **kwargs):
+        self.timeout = DEFAULT_TIMEOUT
+        if "timeout" in kwargs:
+            self.timeout = kwargs["timeout"]
+            del kwargs["timeout"]
+        super().__init__(*args, **kwargs)
+
+    def send(self, request, **kwargs):
+        timeout = kwargs.get("timeout")
+        if timeout is None:
+            kwargs["timeout"] = self.timeout
+        return super().send(request, **kwargs)
+
+
+def get_requests_session(max_retries: int = 5, timeout: int = DEFAULT_TIMEOUT,
+                         headers: Optional[Dict[str, str]] = None, session: Optional[requests.Session] = None) \
+        -> requests.Session:
+    session = session or requests.session()
+    retries = Retry(
+        total=max_retries, backoff_factor=1,
+        status_forcelist=[413, 429, 500, 501, 502, 503, 504, 505, 506, 507, 509, 510, 511],
+        allowed_methods=["HEAD", "GET", "POST", "PUT", "DELETE", "OPTIONS", "TRACE"],
+    )
+    adapter = TimeoutHTTPAdapter(max_retries=retries, timeout=timeout)
+    session.mount('http://', adapter)
+    session.mount('https://', adapter)
+    session.headers.update({
+        "User-Agent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 "
+                      "(KHTML, like Gecko) Chrome/108.0.0.0 Safari/537.36",
+        **dict(headers or {}),
+    })
+
+    return session
+
+
+def srequest(session: requests.Session, method, url, *, max_retries: int = 5,
+             sleep_time: float = 5.0, raise_for_status: bool = True, **kwargs) -> requests.Response:
+    resp = None
+    for _ in range(max_retries):
+        try:
+            resp = session.request(method, url, **kwargs)
+        except RequestException:
+            time.sleep(sleep_time)
+        else:
+            break
+
+    assert resp is not None, f'Request failed for {max_retries} time(s) - [{method}] {url!r}.'
+    if raise_for_status:
+        resp.raise_for_status()
+
+    return resp