Spaces:

furonghuang-lab
/

Erasing-Invisible-Demo

Runtime error

App Files Files Community

mcding commited on Sep 24, 2024

Commit

ad552d8

0 Parent(s):

published version

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

.gitattributes +2 -0
.gitignore +164 -0
README.md +10 -0
app.py +426 -0
icon.jpg +0 -0
image.png +0 -0
kit/__init__.py +121 -0
kit/metrics/__init__.py +27 -0
kit/metrics/aesthetics.py +38 -0
kit/metrics/aesthetics_scorer/__init__.py +4 -0
kit/metrics/aesthetics_scorer/model.py +104 -0
kit/metrics/aesthetics_scorer/weights/aesthetics_scorer_artifacts_openclip_vit_bigg_14.config +8 -0
kit/metrics/aesthetics_scorer/weights/aesthetics_scorer_artifacts_openclip_vit_bigg_14.pth +3 -0
kit/metrics/aesthetics_scorer/weights/aesthetics_scorer_artifacts_openclip_vit_h_14.config +8 -0
kit/metrics/aesthetics_scorer/weights/aesthetics_scorer_artifacts_openclip_vit_h_14.pth +3 -0
kit/metrics/aesthetics_scorer/weights/aesthetics_scorer_artifacts_openclip_vit_l_14.config +8 -0
kit/metrics/aesthetics_scorer/weights/aesthetics_scorer_artifacts_openclip_vit_l_14.pth +3 -0
kit/metrics/aesthetics_scorer/weights/aesthetics_scorer_rating_openclip_vit_bigg_14.config +8 -0
kit/metrics/aesthetics_scorer/weights/aesthetics_scorer_rating_openclip_vit_bigg_14.pth +3 -0
kit/metrics/aesthetics_scorer/weights/aesthetics_scorer_rating_openclip_vit_h_14.config +8 -0
kit/metrics/aesthetics_scorer/weights/aesthetics_scorer_rating_openclip_vit_h_14.pth +3 -0
kit/metrics/aesthetics_scorer/weights/aesthetics_scorer_rating_openclip_vit_l_14.config +8 -0
kit/metrics/aesthetics_scorer/weights/aesthetics_scorer_rating_openclip_vit_l_14.pth +3 -0
kit/metrics/clean_fid/__init__.py +3 -0
kit/metrics/clean_fid/clip_features.py +40 -0
kit/metrics/clean_fid/downloads_helper.py +75 -0
kit/metrics/clean_fid/features.py +117 -0
kit/metrics/clean_fid/fid.py +836 -0
kit/metrics/clean_fid/inception_pytorch.py +329 -0
kit/metrics/clean_fid/inception_torchscript.py +59 -0
kit/metrics/clean_fid/leaderboard.py +58 -0
kit/metrics/clean_fid/resize.py +108 -0
kit/metrics/clean_fid/utils.py +75 -0
kit/metrics/clean_fid/wrappers.py +111 -0
kit/metrics/clip.py +32 -0
kit/metrics/distributional.py +104 -0
kit/metrics/image.py +112 -0
kit/metrics/lpips/__init__.py +4 -0
kit/metrics/lpips/lpips.py +338 -0
kit/metrics/lpips/pretrained_networks.py +188 -0
kit/metrics/lpips/trainer.py +314 -0
kit/metrics/lpips/utils.py +137 -0
kit/metrics/lpips/weights/v0.0/alex.pth +3 -0
kit/metrics/lpips/weights/v0.0/squeeze.pth +3 -0
kit/metrics/lpips/weights/v0.0/vgg.pth +3 -0
kit/metrics/lpips/weights/v0.1/alex.pth +3 -0
kit/metrics/lpips/weights/v0.1/squeeze.pth +3 -0
kit/metrics/lpips/weights/v0.1/vgg.pth +3 -0
kit/metrics/perceptual.py +93 -0
kit/metrics/prompt.py +39 -0

.gitattributes ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ *.pth filter=lfs diff=lfs merge=lfs -text
2	+ *.onnx filter=lfs diff=lfs merge=lfs -text

.gitignore ADDED Viewed

	@@ -0,0 +1,164 @@

+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+# C extensions
+*.so
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py,cover
+.hypothesis/
+.pytest_cache/
+cover/
+# Translations
+*.mo
+*.pot
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+# Flask stuff:
+instance/
+.webassets-cache
+# Scrapy stuff:
+.scrapy
+# Sphinx documentation
+docs/_build/
+# PyBuilder
+.pybuilder/
+target/
+# Jupyter Notebook
+.ipynb_checkpoints
+# IPython
+profile_default/
+ipython_config.py
+# pyenv
+#   For a library or package, you might want to ignore these files since the code is
+#   intended to run in multiple environments; otherwise, check them in:
+# .python-version
+# pipenv
+#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+#   However, in case of collaboration, if having platform-specific dependencies or dependencies
+#   having no cross-platform support, pipenv may install dependencies that don't work, or not
+#   install all needed dependencies.
+#Pipfile.lock
+# poetry
+#   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
+#   This is especially recommended for binary packages to ensure reproducibility, and is more
+#   commonly ignored for libraries.
+#   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
+#poetry.lock
+# pdm
+#   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
+#pdm.lock
+#   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
+#   in version control.
+#   https://pdm.fming.dev/latest/usage/project/#working-with-version-control
+.pdm.toml
+.pdm-python
+.pdm-build/
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
+__pypackages__/
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+# SageMath parsed files
+*.sage.py
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+# Spyder project settings
+.spyderproject
+.spyproject
+# Rope project settings
+.ropeproject
+# mkdocs documentation
+/site
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+# Pyre type checker
+.pyre/
+# pytype static type analyzer
+.pytype/
+# Cython debug symbols
+cython_debug/
+# PyCharm
+#  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
+#  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
+#  and can be added to the global gitignore or merged into this file.  For a more nuclear
+#  option (not recommended) you can uncomment the following to ignore the entire idea folder.
+#.idea/
+.env

README.md ADDED Viewed

	@@ -0,0 +1,10 @@

+---
+title: Erasing the Invisible
+emoji: 🌊
+colorFrom: purple
+colorTo: pink
+sdk: gradio
+sdk_version: 4.44.0
+app_file: app.py
+pinned: true
+---

app.py ADDED Viewed

	@@ -0,0 +1,426 @@

+import os
+import gradio as gr
+import numpy as np
+import json
+import redis
+import plotly.graph_objects as go
+from datetime import datetime
+from PIL import Image
+from kit import compute_performance, compute_quality
+import dotenv
+import pandas as pd
+dotenv.load_dotenv()
+CSS = """
+.tabs button{
+    font-size: 20px;
+}
+#download_btn {
+    height: 91.6px;
+}
+#submit_btn {
+    height: 91.6px;
+}
+#original_image {
+    display: block;
+    margin-left: auto;
+    margin-right: auto;
+}
+#uploaded_image {
+    display: block;
+    margin-left: auto;
+    margin-right: auto;
+}
+#leaderboard_plot {
+    display: block;
+    margin-left: auto;
+    margin-right: auto;
+    width: 640px;  /* Adjust width as needed */
+    height: 640px;  /* Adjust height as needed */
+#leaderboard_table {
+    display: block;
+    margin-left: auto;
+    margin-right: auto;
+}
+"""
+JS = """
+function refresh() {
+    const url = new URL(window.location);
+    if (url.searchParams.get('__theme') !== 'dark') {
+        url.searchParams.set('__theme', 'dark');
+        window.location.href = url.href;
+    }
+}
+"""
+QUALITY_POST_FUNC = lambda x: x / 4 * 8
+PERFORMANCE_POST_FUNC = lambda x: abs(x - 0.5) * 2
+# Connect to Redis
+redis_client = redis.Redis(
+    host=os.getenv("REDIS_HOST"),
+    port=os.getenv("REDIS_PORT"),
+    username=os.getenv("REDIS_USERNAME"),
+    password=os.getenv("REDIS_PASSWORD"),
+    decode_responses=True,
+)
+def save_to_redis(name, performance, quality):
+    submission = {
+        "name": name,
+        "performance": performance,
+        "quality": quality,
+        "timestamp": datetime.now().isoformat(),
+    }
+    redis_client.lpush("submissions", json.dumps(submission))
+def get_submissions_from_redis():
+    submissions = redis_client.lrange("submissions", 0, -1)
+    submissions = [json.loads(submission) for submission in submissions]
+    for s in submissions:
+        s["quality"] = QUALITY_POST_FUNC(s["quality"])
+        s["performance"] = PERFORMANCE_POST_FUNC(s["performance"])
+        s["score"] = np.sqrt(float(s["quality"]) ** 2 + float(s["performance"]) ** 2)
+    return submissions
+def update_plot(
+    submissions,
+    current_name=None,
+):
+    names = [sub["name"] for sub in submissions]
+    performances = [float(sub["performance"]) for sub in submissions]
+    qualities = [float(sub["quality"]) for sub in submissions]
+    # Create scatter plot
+    fig = go.Figure()
+    for name, quality, performance in zip(names, qualities, performances):
+        if name == current_name:
+            marker = dict(symbol="star", size=15, color="orange")
+        elif name.startswith("Baseline: "):
+            marker = dict(symbol="square", size=8, color="blue")
+        else:
+            marker = dict(symbol="circle", size=10, color="green")
+        fig.add_trace(
+            go.Scatter(
+                x=[quality],
+                y=[performance],
+                mode="markers+text",
+                text=[name if not name.startswith("Baseline: ") else ""],
+                textposition="top center",
+                name=name,
+                marker=marker,
+                customdata=[
+                    name if name.startswith("Baseline: ") else f"User: {name}",
+                ],
+                hovertemplate="<b>%{customdata}</b><br>"
+                + "Performance: %{y:.3f}<br>"
+                + "Quality: %{x:.3f}<br>"
+                + "<extra></extra>",
+            )
+        )
+    # Add circles
+    circle_radii = np.linspace(0, 1, 5)
+    for radius in circle_radii:
+        theta = np.linspace(0, 2 * np.pi, 100)
+        x = radius * np.cos(theta)
+        y = radius * np.sin(theta)
+        fig.add_trace(
+            go.Scatter(
+                x=x,
+                y=y,
+                mode="lines",
+                line=dict(color="gray", dash="dash"),
+                showlegend=False,
+                hovertemplate="Performance: %{x:.3f}<br>"
+                + "Quality: %{y:.3f}<br>"
+                + "<extra></extra>",
+            )
+        )
+    # Update layout
+    fig.update_layout(
+        xaxis_title="Image Quality Degredation",
+        yaxis_title="Watermark Detection Performance",
+        xaxis=dict(
+            range=[0, 1.1], titlefont=dict(size=16)  # Adjust this value as needed
+        ),
+        yaxis=dict(
+            range=[0, 1.1], titlefont=dict(size=16)  # Adjust this value as needed
+        ),
+        width=640,
+        height=640,
+        showlegend=False,  # Remove legend
+        modebar=dict(remove=["all"]),
+    )
+    fig.update_xaxes(title_font_size=20)
+    fig.update_yaxes(title_font_size=20)
+    return fig
+def update_table(
+    submissions,
+    current_name=None,
+):
+    def tp(timestamp):
+        return timestamp.replace("T", " ").split(".")[0]
+    names = [
+        (
+            sub["name"][len("Baseline: ") :]
+            if sub["name"].startswith("Baseline: ")
+            else sub["name"]
+        )
+        for sub in submissions
+    ]
+    times = [
+        (
+            ""
+            if sub["name"].startswith("Baseline: ")
+            else (
+                tp(sub["timestamp"]) + " (Current)"
+                if sub["name"] == current_name
+                else tp(sub["timestamp"])
+            )
+        )
+        for sub in submissions
+    ]
+    performances = ["%.4f" % (float(sub["performance"])) for sub in submissions]
+    qualities = ["%.4f" % (float(sub["quality"])) for sub in submissions]
+    scores = ["%.4f" % (float(sub["score"])) for sub in submissions]
+    df = pd.DataFrame(
+        {
+            "Name": names,
+            "Submission Time": times,
+            "Performance": performances,
+            "Quality": qualities,
+            "Score": scores,
+        }
+    ).sort_values(by=["Score"])
+    df.insert(0, "Rank #", list(np.arange(len(names)) + 1), True)
+    def highlight_null(s):
+        con = s.copy()
+        con[:] = None
+        if s["Submission Time"] == "":
+            con[:] = "background-color: darkgrey"
+        return con
+    return df.style.apply(highlight_null, axis=1)
+def process_submission(name, image):
+    original_image = Image.open("./image.png")
+    progress = gr.Progress()
+    progress(0, desc="Detecting Watermark")
+    performance = compute_performance(image)
+    progress(0.4, desc="Evaluating Image Quality")
+    quality = compute_quality(image, original_image)
+    progress(1.0, desc="Uploading Results")
+    # Save unprocessed values but display processed values
+    save_to_redis(name, performance, quality)
+    quality = QUALITY_POST_FUNC(quality)
+    performance = PERFORMANCE_POST_FUNC(performance)
+    submissions = get_submissions_from_redis()
+    leaderboard_table = update_table(submissions, current_name=name)
+    leaderboard_plot = update_plot(submissions, current_name=name)
+    # Calculate rank
+    distances = [
+        np.sqrt(float(s["quality"]) ** 2 + float(s["performance"]) ** 2)
+        for s in submissions
+    ]
+    rank = sorted(distances).index(np.sqrt(quality**2 + performance**2)) + 1
+    gr.Info(f"You ranked {rank} out of {len(submissions)}!")
+    return (
+        leaderboard_plot,
+        leaderboard_table,
+        f"{rank} out of {len(submissions)}",
+        name,
+        f"{performance:.3f}",
+        f"{quality:.3f}",
+        f"{np.sqrt(quality**2 + performance**2):.3f}",
+    )
+def upload_and_evaluate(name, image):
+    if name == "":
+        raise gr.Error("Please enter your name before submitting.")
+    if image is None:
+        raise gr.Error("Please upload an image before submitting.")
+    return process_submission(name, image)
+def create_interface():
+    with gr.Blocks(theme=gr.themes.Soft(), css=CSS, js=JS) as demo:
+        gr.Markdown(
+            """
+            # Erasing the Invisible (Demo of NeurIPS'24 competition)
+            Welcome to the demo of the NeurIPS'24 competition [Erasing the Invisible: A Stress-Test Challenge for Image Watermarks](https://erasinginvisible.github.io/).
+            You could use this demo to better understand the competition pipeline or just for fun! 🎮
+            Here, we provide an image embedded with an invisible watermark. You only need to:
+            1. **Download** the original watermarked image. 🌊
+            2. **Remove** the invisible watermark using your preferred attack. 🧼
+            3. **Upload** your image. We will evaluate and rank your attack. 📊
+            That's it! 🚀
+            *Note: This is just a demo. The watermark used here is not necessarily representative of those used for the competition. To officially participate in the competition, please follow the guidelines [here](https://erasinginvisible.github.io/).*
+            """
+        )
+        with gr.Tabs(elem_classes=["tabs"]) as tabs:
+            with gr.Tab("Original Watermarked Image", id="download"):
+                with gr.Column():
+                    original_image = gr.Image(
+                        value="./image.png",
+                        format="png",
+                        label="Original Watermarked Image",
+                        show_label=True,
+                        height=512,
+                        width=512,
+                        type="filepath",
+                        show_download_button=False,
+                        show_share_button=False,
+                        show_fullscreen_button=False,
+                        container=True,
+                        elem_id="original_image",
+                    )
+                    with gr.Row():
+                        download_btn = gr.DownloadButton(
+                            "Download Watermarked Image",
+                            value="./image.png",
+                            elem_id="download_btn",
+                        )
+                        submit_btn = gr.Button(
+                            "Submit Your Removal", elem_id="submit_btn"
+                        )
+            with gr.Tab(
+                "Submit Watermark Removed Image",
+                id="submit",
+                elem_classes="gr-tab-header",
+            ):
+                with gr.Column():
+                    uploaded_image = gr.Image(
+                        label="Your Watermark Removed Image",
+                        format="png",
+                        show_label=True,
+                        height=512,
+                        width=512,
+                        sources=["upload"],
+                        type="pil",
+                        show_download_button=False,
+                        show_share_button=False,
+                        show_fullscreen_button=False,
+                        container=True,
+                        placeholder="Upload your watermark removed image",
+                        elem_id="uploaded_image",
+                    )
+                    with gr.Row():
+                        name_input = gr.Textbox(
+                            label="Your Name", placeholder="Anonymous"
+                        )
+                        upload_btn = gr.Button("Upload and Evaluate")
+            with gr.Tab(
+                "Evaluation Results",
+                id="plot",
+                elem_classes="gr-tab-header",
+            ):
+                gr.Markdown(
+                    "The evaluation is based on two metrics, watermark performance ($$A$$) and image quality degradation ($$Q$$).",
+                    latex_delimiters=[{"left": "$$", "right": "$$", "display": False}],
+                )
+                gr.Markdown(
+                    "The lower the watermark performance and less quality degradation, the more effective the attack is. The overall score is $$\sqrt{Q^2+A^2}$$, the smaller the better.",
+                    latex_delimiters=[{"left": "$$", "right": "$$", "display": False}],
+                )
+                gr.Markdown(
+                    """
+                    <p>
+                        <span style="display: inline-block; width: 20px;"></span>🟦: Baseline attacks
+                        <span style="display: inline-block; width: 20px;"></span>🟢: Users' submissions
+                        <span style="display: inline-block; width: 20px;"></span>⭐: Your current submission
+                    </p>
+                    <p><em>Note: The performance and quality metrics differ from those in the competition (as only one image is used here), but they still give you an idea of how effective your attack is.</em></p>
+                    """
+                )
+                with gr.Column():
+                    leaderboard_plot = gr.Plot(
+                        value=update_plot(get_submissions_from_redis()),
+                        show_label=False,
+                        elem_id="leaderboard_plot",
+                    )
+                    with gr.Row():
+                        rank_output = gr.Textbox(label="Your Ranking")
+                        name_output = gr.Textbox(label="Your Name")
+                        performance_output = gr.Textbox(label="Watermark Performance")
+                        quality_output = gr.Textbox(label="Quality Degredation")
+                        overall_output = gr.Textbox(label="Overall Score")
+            with gr.Tab(
+                "Leaderboard",
+                id="leaderboard",
+                elem_classes="gr-tab-header",
+            ):
+                gr.Markdown("Find your ranking on the leaderboard!")
+                gr.Markdown(
+                    "Gray-shaded rows are baseline results provided by the organziers."
+                )
+                with gr.Column():
+                    leaderboard_table = gr.Dataframe(
+                        value=update_table(get_submissions_from_redis()),
+                        show_label=False,
+                        elem_id="leaderboard_table",
+                    )
+        submit_btn.click(lambda: gr.Tabs(selected="submit"), None, tabs)
+        upload_btn.click(lambda: gr.Tabs(selected="plot"), None, tabs).then(
+            upload_and_evaluate,
+            inputs=[name_input, uploaded_image],
+            outputs=[
+                leaderboard_plot,
+                leaderboard_table,
+                rank_output,
+                name_output,
+                performance_output,
+                quality_output,
+                overall_output,
+            ],
+        )
+        demo.load(
+            lambda: [
+                gr.Image(value="./image.png", height=512, width=512),
+                gr.Plot(update_plot(get_submissions_from_redis())),
+                gr.Dataframe(update_table(get_submissions_from_redis())),
+            ],
+            outputs=[original_image, leaderboard_plot, leaderboard_table],
+        )
+    return demo
+# Create the demo object
+demo = create_interface()
+# Launch the app
+if __name__ == "__main__":
+    demo.launch(share=False)

icon.jpg ADDED Viewed

image.png ADDED Viewed

kit/__init__.py ADDED Viewed

	@@ -0,0 +1,121 @@

+import os
+import io
+import numpy as np
+import onnxruntime as ort
+from PIL import Image
+import dotenv
+dotenv.load_dotenv()
+GT_MESSAGE = os.environ["GT_MESSAGE"]
+QUALITY_COEFFICIENTS = {
+    "psnr": -0.0022186489180419534,
+    "ssim": -0.11337077856710862,
+    "nmi": -0.09878221979274945,
+    "lpips": 0.3412626374646173,
+}
+QUALITY_OFFSETS = {
+    "psnr": 43.54757854447622,
+    "ssim": 0.984229018845295,
+    "nmi": 1.7536553655336136,
+    "lpips": 0.014247652621287854,
+}
+def compute_performance(image):
+    session_options = ort.SessionOptions()
+    session_options.intra_op_num_threads = 1
+    session_options.inter_op_num_threads = 1
+    session_options.log_severity_level = 3
+    model = ort.InferenceSession(
+        "./kit/models/stable_signature.onnx",
+        sess_options=session_options,
+    )
+    inputs = np.stack(
+        [
+            (
+                (
+                    np.array(
+                        image,
+                        dtype=np.float32,
+                    )
+                    / 255.0
+                    - [0.485, 0.456, 0.406]
+                )
+                / [0.229, 0.224, 0.225]
+            )
+            .transpose((2, 0, 1))
+            .astype(np.float32)
+        ],
+        axis=0,
+    )
+    outputs = model.run(
+        None,
+        {
+            "image": inputs,
+        },
+    )
+    decoded = (outputs[0] > 0).astype(int)[0]
+    gt_message = np.array([int(bit) for bit in GT_MESSAGE])
+    return 1 - np.mean(gt_message != decoded)
+from .metrics import (
+    compute_image_distance_repeated,
+    load_perceptual_models,
+    compute_perceptual_metric_repeated,
+    load_aesthetics_and_artifacts_models,
+    compute_aesthetics_and_artifacts_scores,
+)
+def compute_quality(attacked_image, clean_image, quiet=True):
+    # Compress the image
+    buffer = io.BytesIO()
+    attacked_image.save(buffer, format="JPEG", quality=95)
+    buffer.seek(0)
+    # Update attacked_image with the compressed version
+    attacked_image = Image.open(buffer)
+    modes = ["psnr", "ssim", "nmi", "lpips"]
+    results = {}
+    for mode in modes:
+        if mode in ["psnr", "ssim", "nmi"]:
+            metrics = compute_image_distance_repeated(
+                [clean_image],
+                [attacked_image],
+                metric_name=mode,
+                num_workers=1,
+                verbose=not quiet,
+            )
+            results[mode] = metrics
+        elif mode == "lpips":
+            model = load_perceptual_models(
+                mode,
+                mode="alex",
+                device="cpu",
+            )
+            metrics = compute_perceptual_metric_repeated(
+                [clean_image],
+                [attacked_image],
+                metric_name=mode,
+                mode="alex",
+                model=model,
+                device="cpu",
+            )
+            results[mode] = metrics
+    normalized_quality = 0
+    for key, value in results.items():
+        normalized_quality += (value[0] - QUALITY_OFFSETS[key]) * QUALITY_COEFFICIENTS[
+            key
+        ]
+    return normalized_quality

kit/metrics/__init__.py ADDED Viewed

	@@ -0,0 +1,27 @@

+from .distributional import compute_fid
+from .image import (
+    compute_mse,
+    compute_psnr,
+    compute_ssim,
+    compute_nmi,
+    compute_mse_repeated,
+    compute_psnr_repeated,
+    compute_ssim_repeated,
+    compute_nmi_repeated,
+    compute_image_distance_repeated,
+)
+from .perceptual import (
+    load_perceptual_models,
+    compute_lpips,
+    compute_lpips_repeated,
+    compute_perceptual_metric_repeated,
+)
+from .aesthetics import (
+    load_aesthetics_and_artifacts_models,
+    compute_aesthetics_and_artifacts_scores,
+)
+from .clip import load_open_clip_model_preprocess_and_tokenizer, compute_clip_score
+from .prompt import (
+    load_perplexity_model_and_tokenizer,
+    compute_prompt_perplexity,
+)

kit/metrics/aesthetics.py ADDED Viewed

	@@ -0,0 +1,38 @@

+import torch
+from PIL import Image
+from transformers import CLIPModel, CLIPProcessor
+from .aesthetics_scorer import preprocess, load_model
+def load_aesthetics_and_artifacts_models(device=torch.device("cuda")):
+    model = CLIPModel.from_pretrained("laion/CLIP-ViT-H-14-laion2B-s32B-b79K")
+    vision_model = model.vision_model
+    vision_model.to(device)
+    del model
+    clip_processor = CLIPProcessor.from_pretrained(
+        "laion/CLIP-ViT-H-14-laion2B-s32B-b79K"
+    )
+    rating_model = load_model("aesthetics_scorer_rating_openclip_vit_h_14").to(device)
+    artifacts_model = load_model("aesthetics_scorer_artifacts_openclip_vit_h_14").to(
+        device
+    )
+    return vision_model, clip_processor, rating_model, artifacts_model
+def compute_aesthetics_and_artifacts_scores(
+    images, models, device=torch.device("cuda")
+):
+    vision_model, clip_processor, rating_model, artifacts_model = models
+    inputs = clip_processor(images=images, return_tensors="pt").to(device)
+    with torch.no_grad():
+        vision_output = vision_model(**inputs)
+    pooled_output = vision_output.pooler_output
+    embedding = preprocess(pooled_output)
+    with torch.no_grad():
+        rating = rating_model(embedding)
+        artifact = artifacts_model(embedding)
+    return (
+        rating.detach().cpu().numpy().flatten().tolist(),
+        artifact.detach().cpu().numpy().flatten().tolist(),
+    )

kit/metrics/aesthetics_scorer/__init__.py ADDED Viewed

	@@ -0,0 +1,4 @@

+"""
+From https://github.com/kenjiqq/aesthetics-scorer#validation-split-of-diffusiondb-dataset
+"""
+from .model import preprocess, load_model

kit/metrics/aesthetics_scorer/model.py ADDED Viewed

	@@ -0,0 +1,104 @@

+import torch
+import torch.nn as nn
+import json
+import os
+import inspect
+class AestheticScorer(nn.Module):
+    def __init__(
+        self,
+        input_size=0,
+        use_activation=False,
+        dropout=0.2,
+        config=None,
+        hidden_dim=1024,
+        reduce_dims=False,
+        output_activation=None,
+    ):
+        super().__init__()
+        self.config = {
+            "input_size": input_size,
+            "use_activation": use_activation,
+            "dropout": dropout,
+            "hidden_dim": hidden_dim,
+            "reduce_dims": reduce_dims,
+            "output_activation": output_activation,
+        }
+        if config != None:
+            self.config.update(config)
+        layers = [
+            nn.Linear(self.config["input_size"], self.config["hidden_dim"]),
+            nn.ReLU() if self.config["use_activation"] else None,
+            nn.Dropout(self.config["dropout"]),
+            nn.Linear(
+                self.config["hidden_dim"],
+                round(self.config["hidden_dim"] / (2 if reduce_dims else 1)),
+            ),
+            nn.ReLU() if self.config["use_activation"] else None,
+            nn.Dropout(self.config["dropout"]),
+            nn.Linear(
+                round(self.config["hidden_dim"] / (2 if reduce_dims else 1)),
+                round(self.config["hidden_dim"] / (4 if reduce_dims else 1)),
+            ),
+            nn.ReLU() if self.config["use_activation"] else None,
+            nn.Dropout(self.config["dropout"]),
+            nn.Linear(
+                round(self.config["hidden_dim"] / (4 if reduce_dims else 1)),
+                round(self.config["hidden_dim"] / (8 if reduce_dims else 1)),
+            ),
+            nn.ReLU() if self.config["use_activation"] else None,
+            nn.Linear(round(self.config["hidden_dim"] / (8 if reduce_dims else 1)), 1),
+        ]
+        if self.config["output_activation"] == "sigmoid":
+            layers.append(nn.Sigmoid())
+        layers = [x for x in layers if x is not None]
+        self.layers = nn.Sequential(*layers)
+    def forward(self, x):
+        if self.config["output_activation"] == "sigmoid":
+            upper, lower = 10, 1
+            scale = upper - lower
+            return (self.layers(x) * scale) + lower
+        else:
+            return self.layers(x)
+    def save(self, save_name):
+        split_name = os.path.splitext(save_name)
+        with open(f"{split_name[0]}.config", "w") as outfile:
+            outfile.write(json.dumps(self.config, indent=4))
+        for i in range(
+            6
+        ):  # saving sometiles fails, so retry 5 times, might be windows issue
+            try:
+                torch.save(self.state_dict(), save_name)
+                break
+            except RuntimeError as e:
+                # check if error contains string "File"
+                if "cannot be opened" in str(e) and i < 5:
+                    print("Model save failed, retrying...")
+                else:
+                    raise e
+def preprocess(embeddings):
+    return embeddings / embeddings.norm(p=2, dim=-1, keepdim=True)
+def load_model(weight_name, device="cuda" if torch.cuda.is_available() else "cpu"):
+    weight_folder = os.path.abspath(
+        os.path.join(
+            inspect.getfile(load_model),
+            "../weights",
+        )
+    )
+    weight_path = os.path.join(weight_folder, f"{weight_name}.pth")
+    config_path = os.path.join(weight_folder, f"{weight_name}.config")
+    with open(config_path, "r") as config_file:
+        config = json.load(config_file)
+    model = AestheticScorer(config=config)
+    model.load_state_dict(torch.load(weight_path, map_location=device))
+    model.eval()
+    return model

kit/metrics/aesthetics_scorer/weights/aesthetics_scorer_artifacts_openclip_vit_bigg_14.config ADDED Viewed

	@@ -0,0 +1,8 @@

+{
+    "input_size": 1664,
+    "use_activation": false,
+    "dropout": 0.0,
+    "hidden_dim": 1024,
+    "reduce_dims": false,
+    "output_activation": null
+}

kit/metrics/aesthetics_scorer/weights/aesthetics_scorer_artifacts_openclip_vit_bigg_14.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:39a5d014670226d52c408e0dfec840b7626d80a73d003a6a144caafd5e02d031
+size 19423219

kit/metrics/aesthetics_scorer/weights/aesthetics_scorer_artifacts_openclip_vit_h_14.config ADDED Viewed

	@@ -0,0 +1,8 @@

+{
+    "input_size": 1280,
+    "use_activation": false,
+    "dropout": 0.0,
+    "hidden_dim": 1024,
+    "reduce_dims": false,
+    "output_activation": null
+}

kit/metrics/aesthetics_scorer/weights/aesthetics_scorer_artifacts_openclip_vit_h_14.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:dc48a8a2315cfdbc7bb8278be55f645e8a995e1a2fa234baec5eb41c4d33e070
+size 17850319

kit/metrics/aesthetics_scorer/weights/aesthetics_scorer_artifacts_openclip_vit_l_14.config ADDED Viewed

	@@ -0,0 +1,8 @@

+{
+    "input_size": 1024,
+    "use_activation": false,
+    "dropout": 0.0,
+    "hidden_dim": 1024,
+    "reduce_dims": false,
+    "output_activation": null
+}

kit/metrics/aesthetics_scorer/weights/aesthetics_scorer_artifacts_openclip_vit_l_14.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:c4a9481fdbce5ff02b252bcb25109b9f3b29841289fadf7e79e884d59f9357d5
+size 16801743

kit/metrics/aesthetics_scorer/weights/aesthetics_scorer_rating_openclip_vit_bigg_14.config ADDED Viewed

	@@ -0,0 +1,8 @@

+{
+    "input_size": 1664,
+    "use_activation": false,
+    "dropout": 0.0,
+    "hidden_dim": 1024,
+    "reduce_dims": false,
+    "output_activation": null
+}

kit/metrics/aesthetics_scorer/weights/aesthetics_scorer_rating_openclip_vit_bigg_14.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:19b016304f54ae866e27f1eb498c0861f704958e7c37693adc5ce094e63904a8
+size 19423099

kit/metrics/aesthetics_scorer/weights/aesthetics_scorer_rating_openclip_vit_h_14.config ADDED Viewed

	@@ -0,0 +1,8 @@

+{
+    "input_size": 1280,
+    "use_activation": false,
+    "dropout": 0.0,
+    "hidden_dim": 1024,
+    "reduce_dims": false,
+    "output_activation": null
+}

kit/metrics/aesthetics_scorer/weights/aesthetics_scorer_rating_openclip_vit_h_14.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:03603eee1864c2e5e97ef7079229609653db5b10594ca8b1de9e541d838cae9c
+size 17850199

kit/metrics/aesthetics_scorer/weights/aesthetics_scorer_rating_openclip_vit_l_14.config ADDED Viewed

	@@ -0,0 +1,8 @@

+{
+    "input_size": 1024,
+    "use_activation": false,
+    "dropout": 0.0,
+    "hidden_dim": 1024,
+    "reduce_dims": false,
+    "output_activation": null
+}

kit/metrics/aesthetics_scorer/weights/aesthetics_scorer_rating_openclip_vit_l_14.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:eb7fe561369ab6c7dad34b9316a56d2c6070582f0323656148e1107a242cd666
+size 16801623

kit/metrics/clean_fid/__init__.py ADDED Viewed

	@@ -0,0 +1,3 @@

+"""
+From https://github.com/GaParmar/clean-fid/tree/main
+"""

kit/metrics/clean_fid/clip_features.py ADDED Viewed

	@@ -0,0 +1,40 @@

+# pip install git+https://github.com/openai/CLIP.git
+import pdb
+from PIL import Image
+import numpy as np
+import torch
+import torchvision.transforms as transforms
+import clip
+from .fid import compute_fid
+def img_preprocess_clip(img_np):
+    x = Image.fromarray(img_np.astype(np.uint8)).convert("RGB")
+    T = transforms.Compose(
+        [
+            transforms.Resize(224, interpolation=transforms.InterpolationMode.BICUBIC),
+            transforms.CenterCrop(224),
+        ]
+    )
+    return np.asarray(T(x)).clip(0, 255).astype(np.uint8)
+class CLIP_fx:
+    def __init__(self, name="ViT-B/32", device="cuda"):
+        self.model, _ = clip.load(name, device=device)
+        self.model.eval()
+        self.name = "clip_" + name.lower().replace("-", "_").replace("/", "_")
+    def __call__(self, img_t):
+        img_x = img_t / 255.0
+        T_norm = transforms.Normalize(
+            (0.48145466, 0.4578275, 0.40821073), (0.26862954, 0.26130258, 0.27577711)
+        )
+        img_x = T_norm(img_x)
+        assert torch.is_tensor(img_x)
+        if len(img_x.shape) == 3:
+            img_x = img_x.unsqueeze(0)
+        B, C, H, W = img_x.shape
+        with torch.no_grad():
+            z = self.model.encode_image(img_x)
+        return z

kit/metrics/clean_fid/downloads_helper.py ADDED Viewed

	@@ -0,0 +1,75 @@

+import os
+import urllib.request
+import requests
+import shutil
+inception_url = "https://nvlabs-fi-cdn.nvidia.com/stylegan2-ada-pytorch/pretrained/metrics/inception-2015-12-05.pt"
+"""
+Download the pretrined inception weights if it does not exists
+ARGS:
+    fpath - output folder path
+"""
+def check_download_inception(fpath="./"):
+    inception_path = os.path.join(fpath, "inception-2015-12-05.pt")
+    if not os.path.exists(inception_path):
+        # download the file
+        with urllib.request.urlopen(inception_url) as response, open(
+            inception_path, "wb"
+        ) as f:
+            shutil.copyfileobj(response, f)
+    return inception_path
+"""
+Download any url if it does not exist
+ARGS:
+    local_folder - output folder path
+    url - the weburl to download
+"""
+def check_download_url(local_folder, url):
+    name = os.path.basename(url)
+    local_path = os.path.join(local_folder, name)
+    if not os.path.exists(local_path):
+        os.makedirs(local_folder, exist_ok=True)
+        print(f"downloading statistics to {local_path}")
+        with urllib.request.urlopen(url) as response, open(local_path, "wb") as f:
+            shutil.copyfileobj(response, f)
+    return local_path
+"""
+Download a file from google drive
+ARGS:
+    file_id - id of the google drive file
+    out_path - output folder path
+"""
+def download_google_drive(file_id, out_path):
+    def get_confirm_token(response):
+        for key, value in response.cookies.items():
+            if key.startswith("download_warning"):
+                return value
+        return None
+    URL = "https://drive.google.com/uc?export=download"
+    session = requests.Session()
+    response = session.get(URL, params={"id": file_id}, stream=True)
+    token = get_confirm_token(response)
+    if token:
+        params = {"id": file_id, "confirm": token}
+        response = session.get(URL, params=params, stream=True)
+    CHUNK_SIZE = 32768
+    with open(out_path, "wb") as f:
+        for chunk in response.iter_content(CHUNK_SIZE):
+            if chunk:
+                f.write(chunk)

kit/metrics/clean_fid/features.py ADDED Viewed

	@@ -0,0 +1,117 @@

+"""
+helpers for extracting features from image
+"""
+import os
+import platform
+import numpy as np
+import torch
+from torch.hub import get_dir
+from .downloads_helper import check_download_url
+from .inception_pytorch import InceptionV3
+from .inception_torchscript import InceptionV3W
+"""
+returns a functions that takes an image in range [0,255]
+and outputs a feature embedding vector
+"""
+def feature_extractor(
+    name="torchscript_inception",
+    device=torch.device("cuda"),
+    resize_inside=False,
+    use_dataparallel=True,
+):
+    if name == "torchscript_inception":
+        path = "./" if platform.system() == "Windows" else "/tmp"
+        model = InceptionV3W(path, download=True, resize_inside=resize_inside).to(
+            device
+        )
+        model.eval()
+        if use_dataparallel:
+            model = torch.nn.DataParallel(model)
+        def model_fn(x):
+            return model(x)
+    elif name == "pytorch_inception":
+        model = InceptionV3(output_blocks=[3], resize_input=False).to(device)
+        model.eval()
+        if use_dataparallel:
+            model = torch.nn.DataParallel(model)
+        def model_fn(x):
+            return model(x / 255)[0].squeeze(-1).squeeze(-1)
+    else:
+        raise ValueError(f"{name} feature extractor not implemented")
+    return model_fn
+"""
+Build a feature extractor for each of the modes
+"""
+def build_feature_extractor(mode, device=torch.device("cuda"), use_dataparallel=True):
+    if mode == "legacy_pytorch":
+        feat_model = feature_extractor(
+            name="pytorch_inception",
+            resize_inside=False,
+            device=device,
+            use_dataparallel=use_dataparallel,
+        )
+    elif mode == "legacy_tensorflow":
+        feat_model = feature_extractor(
+            name="torchscript_inception",
+            resize_inside=True,
+            device=device,
+            use_dataparallel=use_dataparallel,
+        )
+    elif mode == "clean":
+        feat_model = feature_extractor(
+            name="torchscript_inception",
+            resize_inside=False,
+            device=device,
+            use_dataparallel=use_dataparallel,
+        )
+    return feat_model
+"""
+Load precomputed reference statistics for commonly used datasets
+"""
+def get_reference_statistics(
+    name,
+    res,
+    mode="clean",
+    model_name="inception_v3",
+    seed=0,
+    split="test",
+    metric="FID",
+):
+    base_url = "https://www.cs.cmu.edu/~clean-fid/stats/"
+    if split == "custom":
+        res = "na"
+    if model_name == "inception_v3":
+        model_modifier = ""
+    else:
+        model_modifier = "_" + model_name
+    if metric == "FID":
+        rel_path = (f"{name}_{mode}{model_modifier}_{split}_{res}.npz")
+        url = f"{base_url}/{rel_path}"
+        stats_folder = os.path.join(get_dir(), "fid_stats")
+        fpath = check_download_url(local_folder=stats_folder, url=url)
+        stats = np.load(fpath)
+        mu, sigma = stats["mu"], stats["sigma"]
+        return mu, sigma
+    elif metric == "KID":
+        rel_path = (f"{name}_{mode}{model_modifier}_{split}_{res}_kid.npz")
+        url = f"{base_url}/{rel_path}"
+        stats_folder = os.path.join(get_dir(), "fid_stats")
+        fpath = check_download_url(local_folder=stats_folder, url=url)
+        stats = np.load(fpath)
+        return stats["feats"]

kit/metrics/clean_fid/fid.py ADDED Viewed

	@@ -0,0 +1,836 @@

+import os
+import random
+from tqdm.auto import tqdm
+from glob import glob
+import torch
+import numpy as np
+from PIL import Image
+from scipy import linalg
+import zipfile
+from torch.hub import get_dir
+from .utils import *
+from .features import build_feature_extractor, get_reference_statistics
+from .resize import *
+"""
+Numpy implementation of the Frechet Distance.
+The Frechet distance between two multivariate Gaussians X_1 ~ N(mu_1, C_1)
+and X_2 ~ N(mu_2, C_2) is
+        d^2 = ||mu_1 - mu_2||^2 + Tr(C_1 + C_2 - 2*sqrt(C_1*C_2)).
+Stable version by Danica J. Sutherland.
+Params:
+    mu1   : Numpy array containing the activations of a layer of the
+            inception net (like returned by the function 'get_predictions')
+            for generated samples.
+    mu2   : The sample mean over activations, precalculated on an
+            representative data set.
+    sigma1: The covariance matrix over activations for generated samples.
+    sigma2: The covariance matrix over activations, precalculated on an
+            representative data set.
+"""
+def frechet_distance(mu1, sigma1, mu2, sigma2, eps=1e-6):
+    mu1 = np.atleast_1d(mu1)
+    mu2 = np.atleast_1d(mu2)
+    sigma1 = np.atleast_2d(sigma1)
+    sigma2 = np.atleast_2d(sigma2)
+    assert (
+        mu1.shape == mu2.shape
+    ), "Training and test mean vectors have different lengths"
+    assert (
+        sigma1.shape == sigma2.shape
+    ), "Training and test covariances have different dimensions"
+    diff = mu1 - mu2
+    # Product might be almost singular
+    covmean, _ = linalg.sqrtm(sigma1.dot(sigma2), disp=False)
+    if not np.isfinite(covmean).all():
+        msg = (
+            "fid calculation produces singular product; "
+            "adding %s to diagonal of cov estimates"
+        ) % eps
+        print(msg)
+        offset = np.eye(sigma1.shape[0]) * eps
+        covmean = linalg.sqrtm((sigma1 + offset).dot(sigma2 + offset))
+    # Numerical error might give slight imaginary component
+    if np.iscomplexobj(covmean):
+        if not np.allclose(np.diagonal(covmean).imag, 0, atol=1e-3):
+            m = np.max(np.abs(covmean.imag))
+            raise ValueError("Imaginary component {}".format(m))
+        covmean = covmean.real
+    tr_covmean = np.trace(covmean)
+    return diff.dot(diff) + np.trace(sigma1) + np.trace(sigma2) - 2 * tr_covmean
+"""
+Compute the KID score given the sets of features
+"""
+def kernel_distance(feats1, feats2, num_subsets=100, max_subset_size=1000):
+    n = feats1.shape[1]
+    m = min(min(feats1.shape[0], feats2.shape[0]), max_subset_size)
+    t = 0
+    for _subset_idx in range(num_subsets):
+        x = feats2[np.random.choice(feats2.shape[0], m, replace=False)]
+        y = feats1[np.random.choice(feats1.shape[0], m, replace=False)]
+        a = (x @ x.T / n + 1) ** 3 + (y @ y.T / n + 1) ** 3
+        b = (x @ y.T / n + 1) ** 3
+        t += (a.sum() - np.diag(a).sum()) / (m - 1) - b.sum() * 2 / m
+    kid = t / num_subsets / m
+    return float(kid)
+"""
+Compute the inception features for a batch of images
+"""
+def get_batch_features(batch, model, device):
+    with torch.no_grad():
+        feat = model(batch.to(device))
+    return feat.detach().cpu().numpy()
+"""
+Compute the inception features for a list of files
+"""
+def get_files_features(
+    l_files,
+    model=None,
+    num_workers=12,
+    batch_size=128,
+    device=torch.device("cuda"),
+    mode="clean",
+    custom_fn_resize=None,
+    description="",
+    fdir=None,
+    verbose=True,
+    custom_image_tranform=None,
+):
+    # wrap the images in a dataloader for parallelizing the resize operation
+    dataset = ResizeDataset(l_files, fdir=fdir, mode=mode)
+    if custom_image_tranform is not None:
+        dataset.custom_image_tranform = custom_image_tranform
+    if custom_fn_resize is not None:
+        dataset.fn_resize = custom_fn_resize
+    dataloader = torch.utils.data.DataLoader(
+        dataset,
+        batch_size=batch_size,
+        shuffle=False,
+        drop_last=False,
+        num_workers=num_workers,
+    )
+    # collect all inception features
+    l_feats = []
+    if verbose:
+        pbar = tqdm(dataloader, desc=description)
+    else:
+        pbar = dataloader
+    for batch in pbar:
+        l_feats.append(get_batch_features(batch, model, device))
+    np_feats = np.concatenate(l_feats)
+    return np_feats
+"""
+Compute the inception features for a folder of image files
+"""
+def get_folder_features(
+    fdir,
+    model=None,
+    num_workers=12,
+    num=None,
+    shuffle=False,
+    seed=0,
+    batch_size=128,
+    device=torch.device("cuda"),
+    mode="clean",
+    custom_fn_resize=None,
+    description="",
+    verbose=True,
+    custom_image_tranform=None,
+):
+    # get all relevant files in the dataset
+    if ".zip" in fdir:
+        files = list(set(zipfile.ZipFile(fdir).namelist()))
+        # remove the non-image files inside the zip
+        files = [x for x in files if os.path.splitext(x)[1].lower()[1:] in EXTENSIONS]
+    else:
+        files = sorted(
+            [
+                file
+                for ext in EXTENSIONS
+                for file in glob(os.path.join(fdir, f"**/*.{ext}"), recursive=True)
+            ]
+        )
+    # use a subset number of files if needed
+    if num is not None:
+        if shuffle:
+            random.seed(seed)
+            random.shuffle(files)
+        files = files[:num]
+    np_feats = get_files_features(
+        files,
+        model,
+        num_workers=num_workers,
+        batch_size=batch_size,
+        device=device,
+        mode=mode,
+        custom_fn_resize=custom_fn_resize,
+        custom_image_tranform=custom_image_tranform,
+        description=description,
+        fdir=fdir,
+        verbose=verbose,
+    )
+    return np_feats
+"""
+Compute the FID score given the inception features stack
+"""
+def fid_from_feats(feats1, feats2):
+    mu1, sig1 = np.mean(feats1, axis=0), np.cov(feats1, rowvar=False)
+    mu2, sig2 = np.mean(feats2, axis=0), np.cov(feats2, rowvar=False)
+    return frechet_distance(mu1, sig1, mu2, sig2)
+"""
+Computes the FID score for a folder of images for a specific dataset
+and a specific resolution
+"""
+def fid_folder(
+    fdir,
+    dataset_name,
+    dataset_res,
+    dataset_split,
+    model=None,
+    mode="clean",
+    model_name="inception_v3",
+    num_workers=12,
+    batch_size=128,
+    device=torch.device("cuda"),
+    verbose=True,
+    custom_image_tranform=None,
+    custom_fn_resize=None,
+):
+    # Load reference FID statistics (download if needed)
+    ref_mu, ref_sigma = get_reference_statistics(
+        dataset_name,
+        dataset_res,
+        mode=mode,
+        model_name=model_name,
+        seed=0,
+        split=dataset_split,
+    )
+    fbname = os.path.basename(fdir)
+    # get all inception features for folder images
+    np_feats = get_folder_features(
+        fdir,
+        model,
+        num_workers=num_workers,
+        batch_size=batch_size,
+        device=device,
+        mode=mode,
+        description=f"FID {fbname} : ",
+        verbose=verbose,
+        custom_image_tranform=custom_image_tranform,
+        custom_fn_resize=custom_fn_resize,
+    )
+    mu = np.mean(np_feats, axis=0)
+    sigma = np.cov(np_feats, rowvar=False)
+    fid = frechet_distance(mu, sigma, ref_mu, ref_sigma)
+    return fid
+"""
+Compute the FID stats from a generator model
+"""
+def get_model_features(
+    G,
+    model,
+    mode="clean",
+    z_dim=512,
+    num_gen=50_000,
+    batch_size=128,
+    device=torch.device("cuda"),
+    desc="FID model: ",
+    verbose=True,
+    return_z=False,
+    custom_image_tranform=None,
+    custom_fn_resize=None,
+):
+    if custom_fn_resize is None:
+        fn_resize = build_resizer(mode)
+    else:
+        fn_resize = custom_fn_resize
+    # Generate test features
+    num_iters = int(np.ceil(num_gen / batch_size))
+    l_feats = []
+    latents = []
+    if verbose:
+        pbar = tqdm(range(num_iters), desc=desc)
+    else:
+        pbar = range(num_iters)
+    for idx in pbar:
+        with torch.no_grad():
+            z_batch = torch.randn((batch_size, z_dim)).to(device)
+            if return_z:
+                latents.append(z_batch)
+            # generated image is in range [0,255]
+            img_batch = G(z_batch)
+            # split into individual batches for resizing if needed
+            if mode != "legacy_tensorflow":
+                l_resized_batch = []
+                for idx in range(batch_size):
+                    curr_img = img_batch[idx]
+                    img_np = curr_img.cpu().numpy().transpose((1, 2, 0))
+                    if custom_image_tranform is not None:
+                        img_np = custom_image_tranform(img_np)
+                    img_resize = fn_resize(img_np)
+                    l_resized_batch.append(
+                        torch.tensor(img_resize.transpose((2, 0, 1))).unsqueeze(0)
+                    )
+                resized_batch = torch.cat(l_resized_batch, dim=0)
+            else:
+                resized_batch = img_batch
+            feat = get_batch_features(resized_batch, model, device)
+        l_feats.append(feat)
+    np_feats = np.concatenate(l_feats)[:num_gen]
+    if return_z:
+        latents = torch.cat(latents, 0)
+        return np_feats, latents
+    return np_feats
+"""
+Computes the FID score for a generator model for a specific dataset
+and a specific resolution
+"""
+def fid_model(
+    G,
+    dataset_name,
+    dataset_res,
+    dataset_split,
+    model=None,
+    model_name="inception_v3",
+    z_dim=512,
+    num_gen=50_000,
+    mode="clean",
+    num_workers=0,
+    batch_size=128,
+    device=torch.device("cuda"),
+    verbose=True,
+    custom_image_tranform=None,
+    custom_fn_resize=None,
+):
+    # Load reference FID statistics (download if needed)
+    ref_mu, ref_sigma = get_reference_statistics(
+        dataset_name,
+        dataset_res,
+        mode=mode,
+        model_name=model_name,
+        seed=0,
+        split=dataset_split,
+    )
+    # Generate features of images generated by the model
+    np_feats = get_model_features(
+        G,
+        model,
+        mode=mode,
+        z_dim=z_dim,
+        num_gen=num_gen,
+        batch_size=batch_size,
+        device=device,
+        verbose=verbose,
+        custom_image_tranform=custom_image_tranform,
+        custom_fn_resize=custom_fn_resize,
+    )
+    mu = np.mean(np_feats, axis=0)
+    sigma = np.cov(np_feats, rowvar=False)
+    fid = frechet_distance(mu, sigma, ref_mu, ref_sigma)
+    return fid
+"""
+Computes the FID score between the two given folders
+"""
+def compare_folders(
+    fdir1,
+    fdir2,
+    feat_model,
+    mode,
+    num_workers=0,
+    batch_size=8,
+    device=torch.device("cuda"),
+    verbose=True,
+    custom_image_tranform=None,
+    custom_fn_resize=None,
+):
+    # get all inception features for the first folder
+    fbname1 = os.path.basename(fdir1)
+    np_feats1 = get_folder_features(
+        fdir1,
+        feat_model,
+        num_workers=num_workers,
+        batch_size=batch_size,
+        device=device,
+        mode=mode,
+        description=f"FID {fbname1} : ",
+        verbose=verbose,
+        custom_image_tranform=custom_image_tranform,
+        custom_fn_resize=custom_fn_resize,
+    )
+    mu1 = np.mean(np_feats1, axis=0)
+    sigma1 = np.cov(np_feats1, rowvar=False)
+    # get all inception features for the second folder
+    fbname2 = os.path.basename(fdir2)
+    np_feats2 = get_folder_features(
+        fdir2,
+        feat_model,
+        num_workers=num_workers,
+        batch_size=batch_size,
+        device=device,
+        mode=mode,
+        description=f"FID {fbname2} : ",
+        verbose=verbose,
+        custom_image_tranform=custom_image_tranform,
+        custom_fn_resize=custom_fn_resize,
+    )
+    mu2 = np.mean(np_feats2, axis=0)
+    sigma2 = np.cov(np_feats2, rowvar=False)
+    fid = frechet_distance(mu1, sigma1, mu2, sigma2)
+    return fid
+"""
+Test if a custom statistic exists
+"""
+def test_stats_exists(name, mode, model_name="inception_v3", metric="FID"):
+    stats_folder = os.path.join(get_dir(), "fid_stats")
+    split, res = "custom", "na"
+    if model_name == "inception_v3":
+        model_modifier = ""
+    else:
+        model_modifier = "_" + model_name
+    if metric == "FID":
+        fname = f"{name}_{mode}{model_modifier}_{split}_{res}.npz"
+    elif metric == "KID":
+        fname = f"{name}_{mode}{model_modifier}_{split}_{res}_kid.npz"
+    fpath = os.path.join(stats_folder, fname)
+    return os.path.exists(fpath)
+"""
+Remove the custom FID features from the stats folder
+"""
+def remove_custom_stats(name, mode="clean", model_name="inception_v3"):
+    stats_folder = os.path.join(get_dir(), "fid_stats")
+    # remove the FID stats
+    split, res = "custom", "na"
+    if model_name == "inception_v3":
+        model_modifier = ""
+    else:
+        model_modifier = "_" + model_name
+    outf = os.path.join(
+        stats_folder, f"{name}_{mode}{model_modifier}_{split}_{res}.npz"
+    )
+    if not os.path.exists(outf):
+        msg = f"The stats file {name} does not exist."
+        raise Exception(msg)
+    os.remove(outf)
+    # remove the KID stats
+    outf = os.path.join(
+        stats_folder, f"{name}_{mode}{model_modifier}_{split}_{res}_kid.npz"
+    )
+    if not os.path.exists(outf):
+        msg = f"The stats file {name} does not exist."
+        raise Exception(msg)
+    os.remove(outf)
+"""
+Cache a custom dataset statistics file
+"""
+def make_custom_stats(
+    name,
+    fdir,
+    num=None,
+    mode="clean",
+    model_name="inception_v3",
+    num_workers=0,
+    batch_size=64,
+    device=torch.device("cuda"),
+    verbose=True,
+):
+    stats_folder = os.path.join(get_dir(), "fid_stats")
+    os.makedirs(stats_folder, exist_ok=True)
+    split, res = "custom", "na"
+    if model_name == "inception_v3":
+        model_modifier = ""
+    else:
+        model_modifier = "_" + model_name
+    outf = os.path.join(
+        stats_folder, f"{name}_{mode}{model_modifier}_{split}_{res}.npz"
+    )
+    # if the custom stat file already exists
+    if os.path.exists(outf):
+        msg = f"The statistics file {name} already exists. "
+        msg += "Use remove_custom_stats function to delete it first."
+        raise Exception(msg)
+    if model_name == "inception_v3":
+        feat_model = build_feature_extractor(mode, device)
+        custom_fn_resize = None
+        custom_image_tranform = None
+    elif model_name == "clip_vit_b_32":
+        from .clip_features import CLIP_fx, img_preprocess_clip
+        clip_fx = CLIP_fx("ViT-B/32")
+        feat_model = clip_fx
+        custom_fn_resize = img_preprocess_clip
+        custom_image_tranform = None
+    else:
+        raise ValueError(f"The entered model name - {model_name} was not recognized.")
+    # get all inception features for folder images
+    np_feats = get_folder_features(
+        fdir,
+        feat_model,
+        num_workers=num_workers,
+        num=num,
+        batch_size=batch_size,
+        device=device,
+        verbose=verbose,
+        mode=mode,
+        description=f"custom stats: {os.path.basename(fdir)} : ",
+        custom_image_tranform=custom_image_tranform,
+        custom_fn_resize=custom_fn_resize,
+    )
+    mu = np.mean(np_feats, axis=0)
+    sigma = np.cov(np_feats, rowvar=False)
+    # print(f"saving custom FID stats to {outf}")
+    np.savez_compressed(outf, mu=mu, sigma=sigma)
+    # KID stats
+    outf = os.path.join(
+        stats_folder, f"{name}_{mode}{model_modifier}_{split}_{res}_kid.npz"
+    )
+    # print(f"saving custom KID stats to {outf}")
+    np.savez_compressed(outf, feats=np_feats)
+def compute_kid(
+    fdir1=None,
+    fdir2=None,
+    gen=None,
+    mode="clean",
+    num_workers=12,
+    batch_size=32,
+    device=torch.device("cuda"),
+    dataset_name="FFHQ",
+    dataset_res=1024,
+    dataset_split="train",
+    num_gen=50_000,
+    z_dim=512,
+    verbose=True,
+    use_dataparallel=True,
+):
+    # build the feature extractor based on the mode
+    feat_model = build_feature_extractor(
+        mode, device, use_dataparallel=use_dataparallel
+    )
+    # if both dirs are specified, compute KID between folders
+    if fdir1 is not None and fdir2 is not None:
+        # get all inception features for the first folder
+        fbname1 = os.path.basename(fdir1)
+        np_feats1 = get_folder_features(
+            fdir1,
+            feat_model,
+            num_workers=num_workers,
+            batch_size=batch_size,
+            device=device,
+            mode=mode,
+            description=f"KID {fbname1} : ",
+            verbose=verbose,
+        )
+        # get all inception features for the second folder
+        fbname2 = os.path.basename(fdir2)
+        np_feats2 = get_folder_features(
+            fdir2,
+            feat_model,
+            num_workers=num_workers,
+            batch_size=batch_size,
+            device=device,
+            mode=mode,
+            description=f"KID {fbname2} : ",
+            verbose=verbose,
+        )
+        score = kernel_distance(np_feats1, np_feats2)
+        return score
+    # compute kid of a folder
+    elif fdir1 is not None and fdir2 is None:
+        if verbose:
+            print(f"compute KID of a folder with {dataset_name} statistics")
+        ref_feats = get_reference_statistics(
+            dataset_name,
+            dataset_res,
+            mode=mode,
+            seed=0,
+            split=dataset_split,
+            metric="KID",
+        )
+        fbname = os.path.basename(fdir1)
+        # get all inception features for folder images
+        np_feats = get_folder_features(
+            fdir1,
+            feat_model,
+            num_workers=num_workers,
+            batch_size=batch_size,
+            device=device,
+            mode=mode,
+            description=f"KID {fbname} : ",
+            verbose=verbose,
+        )
+        score = kernel_distance(ref_feats, np_feats)
+        return score
+    # compute kid for a generator, using images in fdir2
+    elif gen is not None and fdir2 is not None:
+        if verbose:
+            print(f"compute KID of a model, using references in fdir2")
+        # get all inception features for the second folder
+        fbname2 = os.path.basename(fdir2)
+        ref_feats = get_folder_features(
+            fdir2,
+            feat_model,
+            num_workers=num_workers,
+            batch_size=batch_size,
+            device=device,
+            mode=mode,
+            description=f"KID {fbname2} : ",
+        )
+        # Generate test features
+        np_feats = get_model_features(
+            gen,
+            feat_model,
+            mode=mode,
+            z_dim=z_dim,
+            num_gen=num_gen,
+            desc="KID model: ",
+            batch_size=batch_size,
+            device=device,
+        )
+        score = kernel_distance(ref_feats, np_feats)
+        return score
+    # compute fid for a generator, using reference statistics
+    elif gen is not None:
+        if verbose:
+            print(
+                f"compute KID of a model with {dataset_name}-{dataset_res} statistics"
+            )
+        ref_feats = get_reference_statistics(
+            dataset_name,
+            dataset_res,
+            mode=mode,
+            seed=0,
+            split=dataset_split,
+            metric="KID",
+        )
+        # Generate test features
+        np_feats = get_model_features(
+            gen,
+            feat_model,
+            mode=mode,
+            z_dim=z_dim,
+            num_gen=num_gen,
+            desc="KID model: ",
+            batch_size=batch_size,
+            device=device,
+            verbose=verbose,
+        )
+        score = kernel_distance(ref_feats, np_feats)
+        return score
+    else:
+        raise ValueError("invalid combination of directories and models entered")
+"""
+custom_image_tranform:
+    function that takes an np_array image as input [0,255] and
+    applies a custom transform such as cropping
+"""
+def compute_fid(
+    fdir1=None,
+    fdir2=None,
+    gen=None,
+    mode="clean",
+    model_name="inception_v3",
+    num_workers=12,
+    batch_size=32,
+    device=torch.device("cuda"),
+    dataset_name="FFHQ",
+    dataset_res=1024,
+    dataset_split="train",
+    num_gen=50_000,
+    z_dim=512,
+    custom_feat_extractor=None,
+    verbose=True,
+    custom_image_tranform=None,
+    custom_fn_resize=None,
+    use_dataparallel=True,
+):
+    # build the feature extractor based on the mode and the model to be used
+    if custom_feat_extractor is None and model_name == "inception_v3":
+        feat_model = build_feature_extractor(
+            mode, device, use_dataparallel=use_dataparallel
+        )
+    elif custom_feat_extractor is None and model_name == "clip_vit_b_32":
+        from .clip_features import CLIP_fx, img_preprocess_clip
+        clip_fx = CLIP_fx("ViT-B/32", device=device)
+        feat_model = clip_fx
+        custom_fn_resize = img_preprocess_clip
+    else:
+        feat_model = custom_feat_extractor
+    # if both dirs are specified, compute FID between folders
+    if fdir1 is not None and fdir2 is not None:
+        score = compare_folders(
+            fdir1,
+            fdir2,
+            feat_model,
+            mode=mode,
+            batch_size=batch_size,
+            num_workers=num_workers,
+            device=device,
+            custom_image_tranform=custom_image_tranform,
+            custom_fn_resize=custom_fn_resize,
+            verbose=verbose,
+        )
+        return score
+    # compute fid of a folder
+    elif fdir1 is not None and fdir2 is None:
+        if verbose:
+            print(f"compute FID of a folder with {dataset_name} statistics")
+        score = fid_folder(
+            fdir1,
+            dataset_name,
+            dataset_res,
+            dataset_split,
+            model=feat_model,
+            mode=mode,
+            model_name=model_name,
+            custom_fn_resize=custom_fn_resize,
+            custom_image_tranform=custom_image_tranform,
+            num_workers=num_workers,
+            batch_size=batch_size,
+            device=device,
+            verbose=verbose,
+        )
+        return score
+    # compute fid for a generator, using images in fdir2
+    elif gen is not None and fdir2 is not None:
+        if verbose:
+            print(f"compute FID of a model, using references in fdir2")
+        # get all inception features for the second folder
+        fbname2 = os.path.basename(fdir2)
+        np_feats2 = get_folder_features(
+            fdir2,
+            feat_model,
+            num_workers=num_workers,
+            batch_size=batch_size,
+            device=device,
+            mode=mode,
+            description=f"FID {fbname2} : ",
+            verbose=verbose,
+            custom_fn_resize=custom_fn_resize,
+            custom_image_tranform=custom_image_tranform,
+        )
+        mu2 = np.mean(np_feats2, axis=0)
+        sigma2 = np.cov(np_feats2, rowvar=False)
+        # Generate test features
+        np_feats = get_model_features(
+            gen,
+            feat_model,
+            mode=mode,
+            z_dim=z_dim,
+            num_gen=num_gen,
+            custom_fn_resize=custom_fn_resize,
+            custom_image_tranform=custom_image_tranform,
+            batch_size=batch_size,
+            device=device,
+            verbose=verbose,
+        )
+        mu = np.mean(np_feats, axis=0)
+        sigma = np.cov(np_feats, rowvar=False)
+        fid = frechet_distance(mu, sigma, mu2, sigma2)
+        return fid
+    # compute fid for a generator, using reference statistics
+    elif gen is not None:
+        if verbose:
+            print(
+                f"compute FID of a model with {dataset_name}-{dataset_res} statistics"
+            )
+        score = fid_model(
+            gen,
+            dataset_name,
+            dataset_res,
+            dataset_split,
+            model=feat_model,
+            model_name=model_name,
+            z_dim=z_dim,
+            num_gen=num_gen,
+            mode=mode,
+            num_workers=num_workers,
+            batch_size=batch_size,
+            custom_image_tranform=custom_image_tranform,
+            custom_fn_resize=custom_fn_resize,
+            device=device,
+            verbose=verbose,
+        )
+        return score
+    else:
+        raise ValueError("invalid combination of directories and models entered")

kit/metrics/clean_fid/inception_pytorch.py ADDED Viewed

	@@ -0,0 +1,329 @@

+"""
+File from: https://github.com/mseitzer/pytorch-fid
+"""
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+import torchvision
+import warnings
+from torch.utils.model_zoo import load_url as load_state_dict_from_url
+# Inception weights ported to Pytorch from
+# http://download.tensorflow.org/models/image/imagenet/inception-2015-12-05.tgz
+FID_WEIGHTS_URL = "https://github.com/mseitzer/pytorch-fid/releases/download/fid_weights/pt_inception-2015-12-05-6726825d.pth"  # noqa: E501
+class InceptionV3(nn.Module):
+    """Pretrained InceptionV3 network returning feature maps"""
+    # Index of default block of inception to return,
+    # corresponds to output of final average pooling
+    DEFAULT_BLOCK_INDEX = 3
+    # Maps feature dimensionality to their output blocks indices
+    BLOCK_INDEX_BY_DIM = {
+        64: 0,  # First max pooling features
+        192: 1,  # Second max pooling featurs
+        768: 2,  # Pre-aux classifier features
+        2048: 3,  # Final average pooling features
+    }
+    def __init__(
+        self,
+        output_blocks=(DEFAULT_BLOCK_INDEX,),
+        resize_input=True,
+        normalize_input=True,
+        requires_grad=False,
+        use_fid_inception=True,
+    ):
+        """Build pretrained InceptionV3
+        Parameters
+        ----------
+        output_blocks : list of int
+            Indices of blocks to return features of. Possible values are:
+                - 0: corresponds to output of first max pooling
+                - 1: corresponds to output of second max pooling
+                - 2: corresponds to output which is fed to aux classifier
+                - 3: corresponds to output of final average pooling
+        resize_input : bool
+            If true, bilinearly resizes input to width and height 299 before
+            feeding input to model. As the network without fully connected
+            layers is fully convolutional, it should be able to handle inputs
+            of arbitrary size, so resizing might not be strictly needed
+        normalize_input : bool
+            If true, scales the input from range (0, 1) to the range the
+            pretrained Inception network expects, namely (-1, 1)
+        requires_grad : bool
+            If true, parameters of the model require gradients. Possibly useful
+            for finetuning the network
+        use_fid_inception : bool
+            If true, uses the pretrained Inception model used in Tensorflow's
+            FID implementation. If false, uses the pretrained Inception model
+            available in torchvision. The FID Inception model has different
+            weights and a slightly different structure from torchvision's
+            Inception model. If you want to compute FID scores, you are
+            strongly advised to set this parameter to true to get comparable
+            results.
+        """
+        super(InceptionV3, self).__init__()
+        self.resize_input = resize_input
+        self.normalize_input = normalize_input
+        self.output_blocks = sorted(output_blocks)
+        self.last_needed_block = max(output_blocks)
+        assert self.last_needed_block <= 3, "Last possible output block index is 3"
+        self.blocks = nn.ModuleList()
+        if use_fid_inception:
+            inception = fid_inception_v3()
+        else:
+            inception = _inception_v3(pretrained=True)
+        # Block 0: input to maxpool1
+        block0 = [
+            inception.Conv2d_1a_3x3,
+            inception.Conv2d_2a_3x3,
+            inception.Conv2d_2b_3x3,
+            nn.MaxPool2d(kernel_size=3, stride=2),
+        ]
+        self.blocks.append(nn.Sequential(*block0))
+        # Block 1: maxpool1 to maxpool2
+        if self.last_needed_block >= 1:
+            block1 = [
+                inception.Conv2d_3b_1x1,
+                inception.Conv2d_4a_3x3,
+                nn.MaxPool2d(kernel_size=3, stride=2),
+            ]
+            self.blocks.append(nn.Sequential(*block1))
+        # Block 2: maxpool2 to aux classifier
+        if self.last_needed_block >= 2:
+            block2 = [
+                inception.Mixed_5b,
+                inception.Mixed_5c,
+                inception.Mixed_5d,
+                inception.Mixed_6a,
+                inception.Mixed_6b,
+                inception.Mixed_6c,
+                inception.Mixed_6d,
+                inception.Mixed_6e,
+            ]
+            self.blocks.append(nn.Sequential(*block2))
+        # Block 3: aux classifier to final avgpool
+        if self.last_needed_block >= 3:
+            block3 = [
+                inception.Mixed_7a,
+                inception.Mixed_7b,
+                inception.Mixed_7c,
+                nn.AdaptiveAvgPool2d(output_size=(1, 1)),
+            ]
+            self.blocks.append(nn.Sequential(*block3))
+        for param in self.parameters():
+            param.requires_grad = requires_grad
+    def forward(self, inp):
+        """Get Inception feature maps
+        Parameters
+        ----------
+        inp : torch.autograd.Variable
+            Input tensor of shape Bx3xHxW. Values are expected to be in
+            range (0, 1)
+        Returns
+        -------
+        List of torch.autograd.Variable, corresponding to the selected output
+        block, sorted ascending by index
+        """
+        outp = []
+        x = inp
+        if self.resize_input:
+            raise ValueError("should not resize here")
+            x = F.interpolate(x, size=(299, 299), mode="bilinear", align_corners=False)
+        if self.normalize_input:
+            x = 2 * x - 1  # Scale from range (0, 1) to range (-1, 1)
+        for idx, block in enumerate(self.blocks):
+            x = block(x)
+            if idx in self.output_blocks:
+                outp.append(x)
+            if idx == self.last_needed_block:
+                break
+        return outp
+def _inception_v3(*args, **kwargs):
+    """Wraps `torchvision.models.inception_v3`
+    Skips default weight inititialization if supported by torchvision version.
+    See https://github.com/mseitzer/pytorch-fid/issues/28.
+    """
+    warnings.filterwarnings("ignore")
+    try:
+        version = tuple(map(int, torchvision.__version__.split(".")[:2]))
+    except ValueError:
+        # Just a caution against weird version strings
+        version = (0,)
+    if version >= (0, 6):
+        kwargs["init_weights"] = False
+    return torchvision.models.inception_v3(*args, **kwargs)
+def fid_inception_v3():
+    """Build pretrained Inception model for FID computation
+    The Inception model for FID computation uses a different set of weights
+    and has a slightly different structure than torchvision's Inception.
+    This method first constructs torchvision's Inception and then patches the
+    necessary parts that are different in the FID Inception model.
+    """
+    inception = _inception_v3(num_classes=1008, aux_logits=False, pretrained=False)
+    inception.Mixed_5b = FIDInceptionA(192, pool_features=32)
+    inception.Mixed_5c = FIDInceptionA(256, pool_features=64)
+    inception.Mixed_5d = FIDInceptionA(288, pool_features=64)
+    inception.Mixed_6b = FIDInceptionC(768, channels_7x7=128)
+    inception.Mixed_6c = FIDInceptionC(768, channels_7x7=160)
+    inception.Mixed_6d = FIDInceptionC(768, channels_7x7=160)
+    inception.Mixed_6e = FIDInceptionC(768, channels_7x7=192)
+    inception.Mixed_7b = FIDInceptionE_1(1280)
+    inception.Mixed_7c = FIDInceptionE_2(2048)
+    state_dict = load_state_dict_from_url(FID_WEIGHTS_URL, progress=False)
+    inception.load_state_dict(state_dict)
+    return inception
+class FIDInceptionA(torchvision.models.inception.InceptionA):
+    """InceptionA block patched for FID computation"""
+    def __init__(self, in_channels, pool_features):
+        super(FIDInceptionA, self).__init__(in_channels, pool_features)
+    def forward(self, x):
+        branch1x1 = self.branch1x1(x)
+        branch5x5 = self.branch5x5_1(x)
+        branch5x5 = self.branch5x5_2(branch5x5)
+        branch3x3dbl = self.branch3x3dbl_1(x)
+        branch3x3dbl = self.branch3x3dbl_2(branch3x3dbl)
+        branch3x3dbl = self.branch3x3dbl_3(branch3x3dbl)
+        # Patch: Tensorflow's average pool does not use the padded zero's in
+        # its average calculation
+        branch_pool = F.avg_pool2d(
+            x, kernel_size=3, stride=1, padding=1, count_include_pad=False
+        )
+        branch_pool = self.branch_pool(branch_pool)
+        outputs = [branch1x1, branch5x5, branch3x3dbl, branch_pool]
+        return torch.cat(outputs, 1)
+class FIDInceptionC(torchvision.models.inception.InceptionC):
+    """InceptionC block patched for FID computation"""
+    def __init__(self, in_channels, channels_7x7):
+        super(FIDInceptionC, self).__init__(in_channels, channels_7x7)
+    def forward(self, x):
+        branch1x1 = self.branch1x1(x)
+        branch7x7 = self.branch7x7_1(x)
+        branch7x7 = self.branch7x7_2(branch7x7)
+        branch7x7 = self.branch7x7_3(branch7x7)
+        branch7x7dbl = self.branch7x7dbl_1(x)
+        branch7x7dbl = self.branch7x7dbl_2(branch7x7dbl)
+        branch7x7dbl = self.branch7x7dbl_3(branch7x7dbl)
+        branch7x7dbl = self.branch7x7dbl_4(branch7x7dbl)
+        branch7x7dbl = self.branch7x7dbl_5(branch7x7dbl)
+        # Patch: Tensorflow's average pool does not use the padded zero's in
+        # its average calculation
+        branch_pool = F.avg_pool2d(
+            x, kernel_size=3, stride=1, padding=1, count_include_pad=False
+        )
+        branch_pool = self.branch_pool(branch_pool)
+        outputs = [branch1x1, branch7x7, branch7x7dbl, branch_pool]
+        return torch.cat(outputs, 1)
+class FIDInceptionE_1(torchvision.models.inception.InceptionE):
+    """First InceptionE block patched for FID computation"""
+    def __init__(self, in_channels):
+        super(FIDInceptionE_1, self).__init__(in_channels)
+    def forward(self, x):
+        branch1x1 = self.branch1x1(x)
+        branch3x3 = self.branch3x3_1(x)
+        branch3x3 = [
+            self.branch3x3_2a(branch3x3),
+            self.branch3x3_2b(branch3x3),
+        ]
+        branch3x3 = torch.cat(branch3x3, 1)
+        branch3x3dbl = self.branch3x3dbl_1(x)
+        branch3x3dbl = self.branch3x3dbl_2(branch3x3dbl)
+        branch3x3dbl = [
+            self.branch3x3dbl_3a(branch3x3dbl),
+            self.branch3x3dbl_3b(branch3x3dbl),
+        ]
+        branch3x3dbl = torch.cat(branch3x3dbl, 1)
+        # Patch: Tensorflow's average pool does not use the padded zero's in
+        # its average calculation
+        branch_pool = F.avg_pool2d(
+            x, kernel_size=3, stride=1, padding=1, count_include_pad=False
+        )
+        branch_pool = self.branch_pool(branch_pool)
+        outputs = [branch1x1, branch3x3, branch3x3dbl, branch_pool]
+        return torch.cat(outputs, 1)
+class FIDInceptionE_2(torchvision.models.inception.InceptionE):
+    """Second InceptionE block patched for FID computation"""
+    def __init__(self, in_channels):
+        super(FIDInceptionE_2, self).__init__(in_channels)
+    def forward(self, x):
+        branch1x1 = self.branch1x1(x)
+        branch3x3 = self.branch3x3_1(x)
+        branch3x3 = [
+            self.branch3x3_2a(branch3x3),
+            self.branch3x3_2b(branch3x3),
+        ]
+        branch3x3 = torch.cat(branch3x3, 1)
+        branch3x3dbl = self.branch3x3dbl_1(x)
+        branch3x3dbl = self.branch3x3dbl_2(branch3x3dbl)
+        branch3x3dbl = [
+            self.branch3x3dbl_3a(branch3x3dbl),
+            self.branch3x3dbl_3b(branch3x3dbl),
+        ]
+        branch3x3dbl = torch.cat(branch3x3dbl, 1)
+        # Patch: The FID Inception model uses max pooling instead of average
+        # pooling. This is likely an error in this specific Inception
+        # implementation, as other Inception models use average pooling here
+        # (which matches the description in the paper).
+        branch_pool = F.max_pool2d(x, kernel_size=3, stride=1, padding=1)
+        branch_pool = self.branch_pool(branch_pool)
+        outputs = [branch1x1, branch3x3, branch3x3dbl, branch_pool]
+        return torch.cat(outputs, 1)

kit/metrics/clean_fid/inception_torchscript.py ADDED Viewed

	@@ -0,0 +1,59 @@

+import os
+import torch
+import torch.nn as nn
+import contextlib
+from .downloads_helper import *
+@contextlib.contextmanager
+def disable_gpu_fuser_on_pt19():
+    # On PyTorch 1.9 a CUDA fuser bug prevents the Inception JIT model to run. See
+    #   https://github.com/GaParmar/clean-fid/issues/5
+    #   https://github.com/pytorch/pytorch/issues/64062
+    if torch.__version__.startswith("1.9."):
+        old_val = torch._C._jit_can_fuse_on_gpu()
+        torch._C._jit_override_can_fuse_on_gpu(False)
+    yield
+    if torch.__version__.startswith("1.9."):
+        torch._C._jit_override_can_fuse_on_gpu(old_val)
+class InceptionV3W(nn.Module):
+    """
+    Wrapper around Inception V3 torchscript model provided here
+    https://nvlabs-fi-cdn.nvidia.com/stylegan2-ada-pytorch/pretrained/metrics/inception-2015-12-05.pt
+    path: locally saved inception weights
+    """
+    def __init__(self, path, download=True, resize_inside=False):
+        super(InceptionV3W, self).__init__()
+        # download the network if it is not present at the given directory
+        # use the current directory by default
+        if download:
+            check_download_inception(fpath=path)
+        path = os.path.join(path, "inception-2015-12-05.pt")
+        self.base = torch.jit.load(path).eval()
+        self.layers = self.base.layers
+        self.resize_inside = resize_inside
+    """
+    Get the inception features without resizing
+    x: Image with values in range [0,255]
+    """
+    def forward(self, x):
+        with disable_gpu_fuser_on_pt19():
+            bs = x.shape[0]
+            if self.resize_inside:
+                features = self.base(x, return_features=True).view((bs, 2048))
+            else:
+                # make sure it is resized already
+                assert (x.shape[2] == 299) and (x.shape[3] == 299)
+                # apply normalization
+                x1 = x - 128
+                x2 = x1 / 128
+                features = self.layers.forward(
+                    x2,
+                ).view((bs, 2048))
+            return features

kit/metrics/clean_fid/leaderboard.py ADDED Viewed

	@@ -0,0 +1,58 @@

+import os
+import csv
+import shutil
+import urllib.request
+def get_score(
+    model_name=None,
+    dataset_name=None,
+    dataset_res=None,
+    dataset_split=None,
+    task_name=None,
+):
+    # download the csv file from server
+    url = "https://www.cs.cmu.edu/~clean-fid/files/leaderboard.csv"
+    local_path = "/tmp/leaderboard.csv"
+    with urllib.request.urlopen(url) as response, open(local_path, "wb") as f:
+        shutil.copyfileobj(response, f)
+    d_field2idx = {}
+    l_matches = []
+    with open(local_path, "r") as f:
+        csvreader = csv.reader(f)
+        l_fields = next(csvreader)
+        for idx, val in enumerate(l_fields):
+            d_field2idx[val.strip()] = idx
+        # iterate through all rows
+        for row in csvreader:
+            # skip empty rows
+            if len(row) == 0:
+                continue
+            # skip if the filter doesn't match
+            if model_name is not None and (
+                row[d_field2idx["model_name"]].strip() != model_name
+            ):
+                continue
+            if dataset_name is not None and (
+                row[d_field2idx["dataset_name"]].strip() != dataset_name
+            ):
+                continue
+            if dataset_res is not None and (
+                row[d_field2idx["dataset_res"]].strip() != dataset_res
+            ):
+                continue
+            if dataset_split is not None and (
+                row[d_field2idx["dataset_split"]].strip() != dataset_split
+            ):
+                continue
+            if task_name is not None and (
+                row[d_field2idx["task_name"]].strip() != task_name
+            ):
+                continue
+            curr = {}
+            for f in l_fields:
+                curr[f.strip()] = row[d_field2idx[f.strip()]].strip()
+            l_matches.append(curr)
+    os.remove(local_path)
+    return l_matches

kit/metrics/clean_fid/resize.py ADDED Viewed

	@@ -0,0 +1,108 @@

+"""
+Helpers for resizing with multiple CPU cores
+"""
+import os
+import numpy as np
+import torch
+from PIL import Image
+import torch.nn.functional as F
+def build_resizer(mode):
+    if mode == "clean":
+        return make_resizer("PIL", False, "bicubic", (299, 299))
+    # if using legacy tensorflow, do not manually resize outside the network
+    elif mode == "legacy_tensorflow":
+        return lambda x: x
+    elif mode == "legacy_pytorch":
+        return make_resizer("PyTorch", False, "bilinear", (299, 299))
+    else:
+        raise ValueError(f"Invalid mode {mode} specified")
+"""
+Construct a function that resizes a numpy image based on the
+flags passed in.
+"""
+def make_resizer(library, quantize_after, filter, output_size):
+    if library == "PIL" and quantize_after:
+        name_to_filter = {
+            "bicubic": Image.BICUBIC,
+            "bilinear": Image.BILINEAR,
+            "nearest": Image.NEAREST,
+            "lanczos": Image.LANCZOS,
+            "box": Image.BOX,
+        }
+        def func(x):
+            x = Image.fromarray(x)
+            x = x.resize(output_size, resample=name_to_filter[filter])
+            x = np.asarray(x).clip(0, 255).astype(np.uint8)
+            return x
+    elif library == "PIL" and not quantize_after:
+        name_to_filter = {
+            "bicubic": Image.BICUBIC,
+            "bilinear": Image.BILINEAR,
+            "nearest": Image.NEAREST,
+            "lanczos": Image.LANCZOS,
+            "box": Image.BOX,
+        }
+        s1, s2 = output_size
+        def resize_single_channel(x_np):
+            img = Image.fromarray(x_np.astype(np.float32), mode="F")
+            img = img.resize(output_size, resample=name_to_filter[filter])
+            return np.asarray(img).clip(0, 255).reshape(s2, s1, 1)
+        def func(x):
+            x = [resize_single_channel(x[:, :, idx]) for idx in range(3)]
+            x = np.concatenate(x, axis=2).astype(np.float32)
+            return x
+    elif library == "PyTorch":
+        import warnings
+        # ignore the numpy warnings
+        warnings.filterwarnings("ignore")
+        def func(x):
+            x = torch.Tensor(x.transpose((2, 0, 1)))[None, ...]
+            x = F.interpolate(x, size=output_size, mode=filter, align_corners=False)
+            x = x[0, ...].cpu().data.numpy().transpose((1, 2, 0)).clip(0, 255)
+            if quantize_after:
+                x = x.astype(np.uint8)
+            return x
+    else:
+        raise NotImplementedError("library [%s] is not include" % library)
+    return func
+class FolderResizer(torch.utils.data.Dataset):
+    def __init__(self, files, outpath, fn_resize, output_ext=".png"):
+        self.files = files
+        self.outpath = outpath
+        self.output_ext = output_ext
+        self.fn_resize = fn_resize
+    def __len__(self):
+        return len(self.files)
+    def __getitem__(self, i):
+        path = str(self.files[i])
+        img_np = np.asarray(Image.open(path))
+        img_resize_np = self.fn_resize(img_np)
+        # swap the output extension
+        basename = os.path.basename(path).split(".")[0] + self.output_ext
+        outname = os.path.join(self.outpath, basename)
+        if self.output_ext == ".npy":
+            np.save(outname, img_resize_np)
+        elif self.output_ext == ".png":
+            img_resized_pil = Image.fromarray(img_resize_np)
+            img_resized_pil.save(outname)
+        else:
+            raise ValueError("invalid output extension")
+        return 0

kit/metrics/clean_fid/utils.py ADDED Viewed

	@@ -0,0 +1,75 @@

+import numpy as np
+import torch
+import torchvision
+from PIL import Image
+import zipfile
+from .resize import build_resizer
+class ResizeDataset(torch.utils.data.Dataset):
+    """
+    A placeholder Dataset that enables parallelizing the resize operation
+    using multiple CPU cores
+    files: list of all files in the folder
+    fn_resize: function that takes an np_array as input [0,255]
+    """
+    def __init__(self, files, mode, size=(299, 299), fdir=None):
+        self.files = files
+        self.fdir = fdir
+        self.transforms = torchvision.transforms.ToTensor()
+        self.size = size
+        self.fn_resize = build_resizer(mode)
+        self.custom_image_tranform = lambda x: x
+        self._zipfile = None
+    def _get_zipfile(self):
+        assert self.fdir is not None and ".zip" in self.fdir
+        if self._zipfile is None:
+            self._zipfile = zipfile.ZipFile(self.fdir)
+        return self._zipfile
+    def __len__(self):
+        return len(self.files)
+    def __getitem__(self, i):
+        path = str(self.files[i])
+        if self.fdir is not None and ".zip" in self.fdir:
+            with self._get_zipfile().open(path, "r") as f:
+                img_np = np.array(Image.open(f).convert("RGB"))
+        elif ".npy" in path:
+            img_np = np.load(path)
+        else:
+            img_pil = Image.open(path).convert("RGB")
+            img_np = np.array(img_pil)
+        # apply a custom image transform before resizing the image to 299x299
+        img_np = self.custom_image_tranform(img_np)
+        # fn_resize expects a np array and returns a np array
+        img_resized = self.fn_resize(img_np)
+        # ToTensor() converts to [0,1] only if input in uint8
+        if img_resized.dtype == "uint8":
+            img_t = self.transforms(np.array(img_resized)) * 255
+        elif img_resized.dtype == "float32":
+            img_t = self.transforms(img_resized)
+        return img_t
+EXTENSIONS = {
+    "bmp",
+    "jpg",
+    "jpeg",
+    "pgm",
+    "png",
+    "ppm",
+    "tif",
+    "tiff",
+    "webp",
+    "npy",
+    "JPEG",
+    "JPG",
+    "PNG",
+}

kit/metrics/clean_fid/wrappers.py ADDED Viewed

	@@ -0,0 +1,111 @@

+from PIL import Image
+import numpy as np
+import torch
+from .features import build_feature_extractor, get_reference_statistics
+from .fid import get_batch_features, fid_from_feats
+from .resize import build_resizer
+"""
+A helper class that allowing adding the images one batch at a time.
+"""
+class CleanFID:
+    def __init__(self, mode="clean", model_name="inception_v3", device="cuda"):
+        self.real_features = []
+        self.gen_features = []
+        self.mode = mode
+        self.device = device
+        if model_name == "inception_v3":
+            self.feat_model = build_feature_extractor(mode, device)
+            self.fn_resize = build_resizer(mode)
+        elif model_name == "clip_vit_b_32":
+            from .clip_features import CLIP_fx, img_preprocess_clip
+            clip_fx = CLIP_fx("ViT-B/32")
+            self.feat_model = clip_fx
+            self.fn_resize = img_preprocess_clip
+    """
+    Funtion that takes an image (PIL.Image or np.array or torch.tensor)
+    and returns the corresponding feature embedding vector.
+    The image x is expected to be in range [0, 255]
+    """
+    def compute_features(self, x):
+        # if x is a PIL Image
+        if isinstance(x, Image.Image):
+            x_np = np.array(x)
+            x_np_resized = self.fn_resize(x_np)
+            x_t = torch.tensor(x_np_resized.transpose((2, 0, 1))).unsqueeze(0)
+            x_feat = get_batch_features(x_t, self.feat_model, self.device)
+        elif isinstance(x, np.ndarray):
+            x_np_resized = self.fn_resize(x)
+            x_t = (
+                torch.tensor(x_np_resized.transpose((2, 0, 1)))
+                .unsqueeze(0)
+                .to(self.device)
+            )
+            # normalization happens inside the self.feat_model, expected image range here is [0,255]
+            x_feat = get_batch_features(x_t, self.feat_model, self.device)
+        elif isinstance(x, torch.Tensor):
+            # pdb.set_trace()
+            # add the batch dimension if x is passed in as C,H,W
+            if len(x.shape) == 3:
+                x = x.unsqueeze(0)
+            b, c, h, w = x.shape
+            # convert back to np array and resize
+            l_x_np_resized = []
+            for _ in range(b):
+                x_np = x[_].cpu().numpy().transpose((1, 2, 0))
+                l_x_np_resized.append(self.fn_resize(x_np)[None,])
+            x_np_resized = np.concatenate(l_x_np_resized)
+            x_t = torch.tensor(x_np_resized.transpose((0, 3, 1, 2))).to(self.device)
+            # normalization happens inside the self.feat_model, expected image range here is [0,255]
+            x_feat = get_batch_features(x_t, self.feat_model, self.device)
+        else:
+            raise ValueError("image type could not be inferred")
+        return x_feat
+    """
+    Extract the faetures from x and add to the list of reference real images
+    """
+    def add_real_images(self, x):
+        x_feat = self.compute_features(x)
+        self.real_features.append(x_feat)
+    """
+    Extract the faetures from x and add to the list of generated images
+    """
+    def add_gen_images(self, x):
+        x_feat = self.compute_features(x)
+        self.gen_features.append(x_feat)
+    """
+    Compute FID between the real and generated images added so far
+    """
+    def calculate_fid(self, verbose=True):
+        feats1 = np.concatenate(self.real_features)
+        feats2 = np.concatenate(self.gen_features)
+        if verbose:
+            print(f"# real images = {feats1.shape[0]}")
+            print(f"# generated images = {feats2.shape[0]}")
+        return fid_from_feats(feats1, feats2)
+    """
+    Remove the real image features added so far
+    """
+    def reset_real_features(self):
+        self.real_features = []
+    """
+    Remove the generated image features added so far
+    """
+    def reset_gen_features(self):
+        self.gen_features = []

kit/metrics/clip.py ADDED Viewed

	@@ -0,0 +1,32 @@

+import torch
+from PIL import Image
+import open_clip
+def load_open_clip_model_preprocess_and_tokenizer(device=torch.device("cuda")):
+    clip_model, _, clip_preprocess = open_clip.create_model_and_transforms(
+        "ViT-g-14", pretrained="laion2b_s12b_b42k", device=device
+    )
+    clip_tokenizer = open_clip.get_tokenizer("ViT-g-14")
+    return clip_model, clip_preprocess, clip_tokenizer
+def compute_clip_score(
+    images,
+    prompts,
+    models,
+    device=torch.device("cuda"),
+):
+    clip_model, clip_preprocess, clip_tokenizer = models
+    with torch.no_grad():
+        tensors = [clip_preprocess(image) for image in images]
+        image_processed_tensor = torch.stack(tensors, 0).to(device)
+        image_features = clip_model.encode_image(image_processed_tensor)
+        encoding = clip_tokenizer(prompts).to(device)
+        text_features = clip_model.encode_text(encoding)
+        image_features /= image_features.norm(dim=-1, keepdim=True)
+        text_features /= text_features.norm(dim=-1, keepdim=True)
+        return (image_features @ text_features.T).mean(-1).cpu().numpy().tolist()

kit/metrics/distributional.py ADDED Viewed

	@@ -0,0 +1,104 @@

+import os
+import tempfile
+import torch
+from PIL import Image
+from tqdm.auto import tqdm
+from concurrent.futures import ProcessPoolExecutor
+from functools import partial
+from PIL import Image
+from .clean_fid import fid
+def save_single_image_to_temp(i, image, temp_dir):
+    save_path = os.path.join(temp_dir, f"{i}.png")
+    image.save(save_path, "PNG")
+def save_images_to_temp(images, num_workers, verbose=False):
+    assert isinstance(images, list) and isinstance(images[0], Image.Image)
+    temp_dir = tempfile.mkdtemp()
+    # Using ProcessPoolExecutor to save images in parallel
+    func = partial(save_single_image_to_temp, temp_dir=temp_dir)
+    with ProcessPoolExecutor(max_workers=num_workers) as executor:
+        tasks = executor.map(func, range(len(images)), images)
+        list(tasks) if not verbose else list(
+            tqdm(
+                tasks,
+                total=len(images),
+                desc="Saving images ",
+            )
+        )
+    return temp_dir
+# Compute FID between two sets of images
+def compute_fid(
+    images1,
+    images2,
+    mode="legacy",
+    device=None,
+    batch_size=64,
+    num_workers=None,
+    verbose=False,
+):
+    # Support four types of FID scores
+    assert mode in ["legacy", "clean", "clip"]
+    if mode == "legacy":
+        mode = "legacy_pytorch"
+        model_name = "inception_v3"
+    elif mode == "clean":
+        mode = "clean"
+        model_name = "inception_v3"
+    elif mode == "clip":
+        mode = "clean"
+        model_name = "clip_vit_b_32"
+    else:
+        assert False
+    # Set up device and num_workers
+    if device is None:
+        device = (
+            torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
+        )
+    if num_workers is not None:
+        assert 1 <= num_workers <= os.cpu_count()
+    else:
+        num_workers = max(torch.cuda.device_count() * 4, 8)
+    # Check images, can be paths or lists of PIL images
+    if not isinstance(images1, list):
+        assert isinstance(images1, str) and os.path.exists(images1)
+        assert isinstance(images2, str) and os.path.exists(images2)
+        path1 = images1
+        path2 = images2
+    else:
+        assert isinstance(images1, list) and isinstance(images1[0], Image.Image)
+        assert isinstance(images2, list) and isinstance(images2[0], Image.Image)
+        # Save images to temp dir if needed
+        path1 = save_images_to_temp(images1, num_workers=num_workers, verbose=verbose)
+        path2 = save_images_to_temp(images2, num_workers=num_workers, verbose=verbose)
+    # Attempt to cache statistics for path1
+    if not fid.test_stats_exists(name=str(os.path.abspath(path1)).replace("/", "_"), mode=mode, model_name=model_name):
+        fid.make_custom_stats(
+            name=str(os.path.abspath(path1)).replace("/", "_"),
+            fdir=path1,
+            mode=mode,
+            model_name=model_name,
+            device=device,
+            num_workers=num_workers,
+            verbose=verbose,
+        )
+    fid_score = fid.compute_fid(
+        path2,
+        dataset_name=str(os.path.abspath(path1)).replace("/", "_"),
+        dataset_split="custom",
+        mode=mode,
+        model_name=model_name,
+        device=device,
+        batch_size=batch_size,
+        num_workers=num_workers,
+        verbose=verbose,
+    )
+    return fid_score

kit/metrics/image.py ADDED Viewed

	@@ -0,0 +1,112 @@

+import os
+import numpy as np
+import torch
+from PIL import Image
+from skimage.metrics import (
+    mean_squared_error,
+    peak_signal_noise_ratio,
+    structural_similarity as structural_similarity_index_measure,
+    normalized_mutual_information,
+)
+from tqdm.auto import tqdm
+from concurrent.futures import ThreadPoolExecutor
+# Process images to numpy arrays
+def convert_image_pair_to_numpy(image1, image2):
+    assert isinstance(image1, Image.Image) and isinstance(image2, Image.Image)
+    image1_np = np.array(image1)
+    image2_np = np.array(image2)
+    assert image1_np.shape == image2_np.shape
+    return image1_np, image2_np
+# Compute MSE between two images
+def compute_mse(image1, image2):
+    image1_np, image2_np = convert_image_pair_to_numpy(image1, image2)
+    return float(mean_squared_error(image1_np, image2_np))
+# Compute PSNR between two images
+def compute_psnr(image1, image2):
+    image1_np, image2_np = convert_image_pair_to_numpy(image1, image2)
+    return float(peak_signal_noise_ratio(image1_np, image2_np))
+# Compute SSIM between two images
+def compute_ssim(image1, image2):
+    image1_np, image2_np = convert_image_pair_to_numpy(image1, image2)
+    return float(
+        structural_similarity_index_measure(image1_np, image2_np, channel_axis=2)
+    )
+# Compute NMI between two images
+def compute_nmi(image1, image2):
+    image1_np, image2_np = convert_image_pair_to_numpy(image1, image2)
+    return float(normalized_mutual_information(image1_np, image2_np))
+# Compute metrics
+def compute_metric_repeated(
+    images1, images2, metric_func, num_workers=None, verbose=False
+):
+    # Accept list of PIL images
+    assert isinstance(images1, list) and isinstance(images1[0], Image.Image)
+    assert isinstance(images2, list) and isinstance(images2[0], Image.Image)
+    assert len(images1) == len(images2)
+    if num_workers is not None:
+        assert 1 <= num_workers <= os.cpu_count()
+    else:
+        num_workers = max(torch.cuda.device_count() * 4, 8)
+    metric_name = metric_func.__name__.split("_")[1].upper()
+    with ThreadPoolExecutor(max_workers=num_workers) as executor:
+        tasks = executor.map(metric_func, images1, images2)
+        values = (
+            list(tasks)
+            if not verbose
+            else list(
+                tqdm(
+                    tasks,
+                    total=len(images1),
+                    desc=f"{metric_name} ",
+                )
+            )
+        )
+    return values
+# Compute MSE between pairs of images
+def compute_mse_repeated(images1, images2, num_workers=None, verbose=False):
+    return compute_metric_repeated(images1, images2, compute_mse, num_workers, verbose)
+# Compute PSNR between pairs of images
+def compute_psnr_repeated(images1, images2, num_workers=None, verbose=False):
+    return compute_metric_repeated(images1, images2, compute_psnr, num_workers, verbose)
+# Compute SSIM between pairs of images
+def compute_ssim_repeated(images1, images2, num_workers=None, verbose=False):
+    return compute_metric_repeated(images1, images2, compute_ssim, num_workers, verbose)
+# Compute NMI between pairs of images
+def compute_nmi_repeated(images1, images2, num_workers=None, verbose=False):
+    return compute_metric_repeated(images1, images2, compute_nmi, num_workers, verbose)
+def compute_image_distance_repeated(
+    images1, images2, metric_name, num_workers=None, verbose=False
+):
+    metric_func = {
+        "psnr": compute_psnr,
+        "ssim": compute_ssim,
+        "nmi": compute_nmi,
+    }[metric_name]
+    return compute_metric_repeated(images1, images2, metric_func, num_workers, verbose)

kit/metrics/lpips/__init__.py ADDED Viewed

	@@ -0,0 +1,4 @@

+"""
+From https://github.com/richzhang/PerceptualSimilarity
+"""
+from .lpips import LPIPS

kit/metrics/lpips/lpips.py ADDED Viewed

	@@ -0,0 +1,338 @@

+from __future__ import absolute_import
+import torch
+import torch.nn as nn
+from torch.autograd import Variable
+import warnings
+from . import pretrained_networks as pn
+from .utils import normalize_tensor, l2, dssim, tensor2np, tensor2tensorlab, tensor2im
+def spatial_average(in_tens, keepdim=True):
+    return in_tens.mean([2, 3], keepdim=keepdim)
+def upsample(in_tens, out_HW=(64, 64)):  # assumes scale factor is same for H and W
+    in_H, in_W = in_tens.shape[2], in_tens.shape[3]
+    return nn.Upsample(size=out_HW, mode="bilinear", align_corners=False)(in_tens)
+# Learned perceptual metric
+class LPIPS(nn.Module):
+    def __init__(
+        self,
+        pretrained=True,
+        net="alex",
+        version="0.1",
+        lpips=True,
+        spatial=False,
+        pnet_rand=False,
+        pnet_tune=False,
+        use_dropout=True,
+        model_path=None,
+        eval_mode=True,
+        verbose=True,
+    ):
+        """Initializes a perceptual loss torch.nn.Module
+        Parameters (default listed first)
+        ---------------------------------
+        lpips : bool
+            [True] use linear layers on top of base/trunk network
+            [False] means no linear layers; each layer is averaged together
+        pretrained : bool
+            This flag controls the linear layers, which are only in effect when lpips=True above
+            [True] means linear layers are calibrated with human perceptual judgments
+            [False] means linear layers are randomly initialized
+        pnet_rand : bool
+            [False] means trunk loaded with ImageNet classification weights
+            [True] means randomly initialized trunk
+        net : str
+            ['alex','vgg','squeeze'] are the base/trunk networks available
+        version : str
+            ['v0.1'] is the default and latest
+            ['v0.0'] contained a normalization bug; corresponds to old arxiv v1 (https://arxiv.org/abs/1801.03924v1)
+        model_path : 'str'
+            [None] is default and loads the pretrained weights from paper https://arxiv.org/abs/1801.03924v1
+        The following parameters should only be changed if training the network
+        eval_mode : bool
+            [True] is for test mode (default)
+            [False] is for training mode
+        pnet_tune
+            [False] keep base/trunk frozen
+            [True] tune the base/trunk network
+        use_dropout : bool
+            [True] to use dropout when training linear layers
+            [False] for no dropout when training linear layers
+        """
+        super(LPIPS, self).__init__()
+        warnings.filterwarnings("ignore")
+        if verbose:
+            pass
+            # print(
+            #     "Setting up [%s] perceptual loss: trunk [%s], v[%s], spatial [%s]"
+            #     % (
+            #         "LPIPS" if lpips else "baseline",
+            #         net,
+            #         version,
+            #         "on" if spatial else "off",
+            #     )
+            # )
+        self.pnet_type = net
+        self.pnet_tune = pnet_tune
+        self.pnet_rand = pnet_rand
+        self.spatial = spatial
+        self.lpips = lpips  # false means baseline of just averaging all layers
+        self.version = version
+        self.scaling_layer = ScalingLayer()
+        if self.pnet_type in ["vgg", "vgg16"]:
+            net_type = pn.vgg16
+            self.chns = [64, 128, 256, 512, 512]
+        elif self.pnet_type == "alex":
+            net_type = pn.alexnet
+            self.chns = [64, 192, 384, 256, 256]
+        elif self.pnet_type == "squeeze":
+            net_type = pn.squeezenet
+            self.chns = [64, 128, 256, 384, 384, 512, 512]
+        self.L = len(self.chns)
+        self.net = net_type(pretrained=not self.pnet_rand, requires_grad=self.pnet_tune)
+        if lpips:
+            self.lin0 = NetLinLayer(self.chns[0], use_dropout=use_dropout)
+            self.lin1 = NetLinLayer(self.chns[1], use_dropout=use_dropout)
+            self.lin2 = NetLinLayer(self.chns[2], use_dropout=use_dropout)
+            self.lin3 = NetLinLayer(self.chns[3], use_dropout=use_dropout)
+            self.lin4 = NetLinLayer(self.chns[4], use_dropout=use_dropout)
+            self.lins = [self.lin0, self.lin1, self.lin2, self.lin3, self.lin4]
+            if self.pnet_type == "squeeze":  # 7 layers for squeezenet
+                self.lin5 = NetLinLayer(self.chns[5], use_dropout=use_dropout)
+                self.lin6 = NetLinLayer(self.chns[6], use_dropout=use_dropout)
+                self.lins += [self.lin5, self.lin6]
+            self.lins = nn.ModuleList(self.lins)
+            if pretrained:
+                if model_path is None:
+                    import inspect
+                    import os
+                    model_path = os.path.abspath(
+                        os.path.join(
+                            inspect.getfile(self.__init__),
+                            "..",
+                            "weights/v%s/%s.pth" % (version, net),
+                        )
+                    )
+                if verbose:
+                    pass
+                    # print("Loading model from: %s" % model_path)
+                self.load_state_dict(
+                    torch.load(model_path, map_location="cpu"), strict=False
+                )
+        if eval_mode:
+            self.eval()
+    def forward(self, in0, in1, retPerLayer=False, normalize=False):
+        if (
+            normalize
+        ):  # turn on this flag if input is [0,1] so it can be adjusted to [-1, +1]
+            in0 = 2 * in0 - 1
+            in1 = 2 * in1 - 1
+        # v0.0 - original release had a bug, where input was not scaled
+        in0_input, in1_input = (
+            (self.scaling_layer(in0), self.scaling_layer(in1))
+            if self.version == "0.1"
+            else (in0, in1)
+        )
+        outs0, outs1 = self.net.forward(in0_input), self.net.forward(in1_input)
+        feats0, feats1, diffs = {}, {}, {}
+        for kk in range(self.L):
+            feats0[kk], feats1[kk] = normalize_tensor(outs0[kk]), normalize_tensor(
+                outs1[kk]
+            )
+            diffs[kk] = (feats0[kk] - feats1[kk]) ** 2
+        if self.lpips:
+            if self.spatial:
+                res = [
+                    upsample(self.lins[kk](diffs[kk]), out_HW=in0.shape[2:])
+                    for kk in range(self.L)
+                ]
+            else:
+                res = [
+                    spatial_average(self.lins[kk](diffs[kk]), keepdim=True)
+                    for kk in range(self.L)
+                ]
+        else:
+            if self.spatial:
+                res = [
+                    upsample(diffs[kk].sum(dim=1, keepdim=True), out_HW=in0.shape[2:])
+                    for kk in range(self.L)
+                ]
+            else:
+                res = [
+                    spatial_average(diffs[kk].sum(dim=1, keepdim=True), keepdim=True)
+                    for kk in range(self.L)
+                ]
+        val = 0
+        for l in range(self.L):
+            val += res[l]
+        if retPerLayer:
+            return (val, res)
+        else:
+            return val
+class ScalingLayer(nn.Module):
+    def __init__(self):
+        super(ScalingLayer, self).__init__()
+        self.register_buffer(
+            "shift", torch.Tensor([-0.030, -0.088, -0.188])[None, :, None, None]
+        )
+        self.register_buffer(
+            "scale", torch.Tensor([0.458, 0.448, 0.450])[None, :, None, None]
+        )
+    def forward(self, inp):
+        return (inp - self.shift) / self.scale
+class NetLinLayer(nn.Module):
+    """A single linear layer which does a 1x1 conv"""
+    def __init__(self, chn_in, chn_out=1, use_dropout=False):
+        super(NetLinLayer, self).__init__()
+        layers = (
+            [
+                nn.Dropout(),
+            ]
+            if (use_dropout)
+            else []
+        )
+        layers += [
+            nn.Conv2d(chn_in, chn_out, 1, stride=1, padding=0, bias=False),
+        ]
+        self.model = nn.Sequential(*layers)
+    def forward(self, x):
+        return self.model(x)
+class Dist2LogitLayer(nn.Module):
+    """takes 2 distances, puts through fc layers, spits out value between [0,1] (if use_sigmoid is True)"""
+    def __init__(self, chn_mid=32, use_sigmoid=True):
+        super(Dist2LogitLayer, self).__init__()
+        layers = [
+            nn.Conv2d(5, chn_mid, 1, stride=1, padding=0, bias=True),
+        ]
+        layers += [
+            nn.LeakyReLU(0.2, True),
+        ]
+        layers += [
+            nn.Conv2d(chn_mid, chn_mid, 1, stride=1, padding=0, bias=True),
+        ]
+        layers += [
+            nn.LeakyReLU(0.2, True),
+        ]
+        layers += [
+            nn.Conv2d(chn_mid, 1, 1, stride=1, padding=0, bias=True),
+        ]
+        if use_sigmoid:
+            layers += [
+                nn.Sigmoid(),
+            ]
+        self.model = nn.Sequential(*layers)
+    def forward(self, d0, d1, eps=0.1):
+        return self.model.forward(
+            torch.cat((d0, d1, d0 - d1, d0 / (d1 + eps), d1 / (d0 + eps)), dim=1)
+        )
+class BCERankingLoss(nn.Module):
+    def __init__(self, chn_mid=32):
+        super(BCERankingLoss, self).__init__()
+        self.net = Dist2LogitLayer(chn_mid=chn_mid)
+        # self.parameters = list(self.net.parameters())
+        self.loss = torch.nn.BCELoss()
+    def forward(self, d0, d1, judge):
+        per = (judge + 1.0) / 2.0
+        self.logit = self.net.forward(d0, d1)
+        return self.loss(self.logit, per)
+# L2, DSSIM metrics
+class FakeNet(nn.Module):
+    def __init__(self, use_gpu=True, colorspace="Lab"):
+        super(FakeNet, self).__init__()
+        self.use_gpu = use_gpu
+        self.colorspace = colorspace
+class L2(FakeNet):
+    def forward(self, in0, in1, retPerLayer=None):
+        assert in0.size()[0] == 1  # currently only supports batchSize 1
+        if self.colorspace == "RGB":
+            (N, C, X, Y) = in0.size()
+            value = torch.mean(
+                torch.mean(
+                    torch.mean((in0 - in1) ** 2, dim=1).view(N, 1, X, Y), dim=2
+                ).view(N, 1, 1, Y),
+                dim=3,
+            ).view(N)
+            return value
+        elif self.colorspace == "Lab":
+            value = l2(
+                tensor2np(tensor2tensorlab(in0.data, to_norm=False)),
+                tensor2np(tensor2tensorlab(in1.data, to_norm=False)),
+                range=100.0,
+            ).astype("float")
+            ret_var = Variable(torch.Tensor((value,)))
+            if self.use_gpu:
+                ret_var = ret_var.cuda()
+            return ret_var
+class DSSIM(FakeNet):
+    def forward(self, in0, in1, retPerLayer=None):
+        assert in0.size()[0] == 1  # currently only supports batchSize 1
+        if self.colorspace == "RGB":
+            value = dssim(
+                1.0 * tensor2im(in0.data),
+                1.0 * tensor2im(in1.data),
+                range=255.0,
+            ).astype("float")
+        elif self.colorspace == "Lab":
+            value = dssim(
+                tensor2np(tensor2tensorlab(in0.data, to_norm=False)),
+                tensor2np(tensor2tensorlab(in1.data, to_norm=False)),
+                range=100.0,
+            ).astype("float")
+        ret_var = Variable(torch.Tensor((value,)))
+        if self.use_gpu:
+            ret_var = ret_var.cuda()
+        return ret_var
+def print_network(net):
+    num_params = 0
+    for param in net.parameters():
+        num_params += param.numel()
+    print("Network", net)
+    print("Total number of parameters: %d" % num_params)

kit/metrics/lpips/pretrained_networks.py ADDED Viewed

	@@ -0,0 +1,188 @@

+from collections import namedtuple
+import torch
+from torchvision import models as tv
+class squeezenet(torch.nn.Module):
+    def __init__(self, requires_grad=False, pretrained=True):
+        super(squeezenet, self).__init__()
+        pretrained_features = tv.squeezenet1_1(pretrained=pretrained).features
+        self.slice1 = torch.nn.Sequential()
+        self.slice2 = torch.nn.Sequential()
+        self.slice3 = torch.nn.Sequential()
+        self.slice4 = torch.nn.Sequential()
+        self.slice5 = torch.nn.Sequential()
+        self.slice6 = torch.nn.Sequential()
+        self.slice7 = torch.nn.Sequential()
+        self.N_slices = 7
+        for x in range(2):
+            self.slice1.add_module(str(x), pretrained_features[x])
+        for x in range(2, 5):
+            self.slice2.add_module(str(x), pretrained_features[x])
+        for x in range(5, 8):
+            self.slice3.add_module(str(x), pretrained_features[x])
+        for x in range(8, 10):
+            self.slice4.add_module(str(x), pretrained_features[x])
+        for x in range(10, 11):
+            self.slice5.add_module(str(x), pretrained_features[x])
+        for x in range(11, 12):
+            self.slice6.add_module(str(x), pretrained_features[x])
+        for x in range(12, 13):
+            self.slice7.add_module(str(x), pretrained_features[x])
+        if not requires_grad:
+            for param in self.parameters():
+                param.requires_grad = False
+    def forward(self, X):
+        h = self.slice1(X)
+        h_relu1 = h
+        h = self.slice2(h)
+        h_relu2 = h
+        h = self.slice3(h)
+        h_relu3 = h
+        h = self.slice4(h)
+        h_relu4 = h
+        h = self.slice5(h)
+        h_relu5 = h
+        h = self.slice6(h)
+        h_relu6 = h
+        h = self.slice7(h)
+        h_relu7 = h
+        vgg_outputs = namedtuple(
+            "SqueezeOutputs",
+            ["relu1", "relu2", "relu3", "relu4", "relu5", "relu6", "relu7"],
+        )
+        out = vgg_outputs(h_relu1, h_relu2, h_relu3, h_relu4, h_relu5, h_relu6, h_relu7)
+        return out
+class alexnet(torch.nn.Module):
+    def __init__(self, requires_grad=False, pretrained=True):
+        super(alexnet, self).__init__()
+        alexnet_pretrained_features = tv.alexnet(pretrained=pretrained).features
+        self.slice1 = torch.nn.Sequential()
+        self.slice2 = torch.nn.Sequential()
+        self.slice3 = torch.nn.Sequential()
+        self.slice4 = torch.nn.Sequential()
+        self.slice5 = torch.nn.Sequential()
+        self.N_slices = 5
+        for x in range(2):
+            self.slice1.add_module(str(x), alexnet_pretrained_features[x])
+        for x in range(2, 5):
+            self.slice2.add_module(str(x), alexnet_pretrained_features[x])
+        for x in range(5, 8):
+            self.slice3.add_module(str(x), alexnet_pretrained_features[x])
+        for x in range(8, 10):
+            self.slice4.add_module(str(x), alexnet_pretrained_features[x])
+        for x in range(10, 12):
+            self.slice5.add_module(str(x), alexnet_pretrained_features[x])
+        if not requires_grad:
+            for param in self.parameters():
+                param.requires_grad = False
+    def forward(self, X):
+        h = self.slice1(X)
+        h_relu1 = h
+        h = self.slice2(h)
+        h_relu2 = h
+        h = self.slice3(h)
+        h_relu3 = h
+        h = self.slice4(h)
+        h_relu4 = h
+        h = self.slice5(h)
+        h_relu5 = h
+        alexnet_outputs = namedtuple(
+            "AlexnetOutputs", ["relu1", "relu2", "relu3", "relu4", "relu5"]
+        )
+        out = alexnet_outputs(h_relu1, h_relu2, h_relu3, h_relu4, h_relu5)
+        return out
+class vgg16(torch.nn.Module):
+    def __init__(self, requires_grad=False, pretrained=True):
+        super(vgg16, self).__init__()
+        vgg_pretrained_features = tv.vgg16(pretrained=pretrained).features
+        self.slice1 = torch.nn.Sequential()
+        self.slice2 = torch.nn.Sequential()
+        self.slice3 = torch.nn.Sequential()
+        self.slice4 = torch.nn.Sequential()
+        self.slice5 = torch.nn.Sequential()
+        self.N_slices = 5
+        for x in range(4):
+            self.slice1.add_module(str(x), vgg_pretrained_features[x])
+        for x in range(4, 9):
+            self.slice2.add_module(str(x), vgg_pretrained_features[x])
+        for x in range(9, 16):
+            self.slice3.add_module(str(x), vgg_pretrained_features[x])
+        for x in range(16, 23):
+            self.slice4.add_module(str(x), vgg_pretrained_features[x])
+        for x in range(23, 30):
+            self.slice5.add_module(str(x), vgg_pretrained_features[x])
+        if not requires_grad:
+            for param in self.parameters():
+                param.requires_grad = False
+    def forward(self, X):
+        h = self.slice1(X)
+        h_relu1_2 = h
+        h = self.slice2(h)
+        h_relu2_2 = h
+        h = self.slice3(h)
+        h_relu3_3 = h
+        h = self.slice4(h)
+        h_relu4_3 = h
+        h = self.slice5(h)
+        h_relu5_3 = h
+        vgg_outputs = namedtuple(
+            "VggOutputs", ["relu1_2", "relu2_2", "relu3_3", "relu4_3", "relu5_3"]
+        )
+        out = vgg_outputs(h_relu1_2, h_relu2_2, h_relu3_3, h_relu4_3, h_relu5_3)
+        return out
+class resnet(torch.nn.Module):
+    def __init__(self, requires_grad=False, pretrained=True, num=18):
+        super(resnet, self).__init__()
+        if num == 18:
+            self.net = tv.resnet18(pretrained=pretrained)
+        elif num == 34:
+            self.net = tv.resnet34(pretrained=pretrained)
+        elif num == 50:
+            self.net = tv.resnet50(pretrained=pretrained)
+        elif num == 101:
+            self.net = tv.resnet101(pretrained=pretrained)
+        elif num == 152:
+            self.net = tv.resnet152(pretrained=pretrained)
+        self.N_slices = 5
+        self.conv1 = self.net.conv1
+        self.bn1 = self.net.bn1
+        self.relu = self.net.relu
+        self.maxpool = self.net.maxpool
+        self.layer1 = self.net.layer1
+        self.layer2 = self.net.layer2
+        self.layer3 = self.net.layer3
+        self.layer4 = self.net.layer4
+    def forward(self, X):
+        h = self.conv1(X)
+        h = self.bn1(h)
+        h = self.relu(h)
+        h_relu1 = h
+        h = self.maxpool(h)
+        h = self.layer1(h)
+        h_conv2 = h
+        h = self.layer2(h)
+        h_conv3 = h
+        h = self.layer3(h)
+        h_conv4 = h
+        h = self.layer4(h)
+        h_conv5 = h
+        outputs = namedtuple("Outputs", ["relu1", "conv2", "conv3", "conv4", "conv5"])
+        out = outputs(h_relu1, h_conv2, h_conv3, h_conv4, h_conv5)
+        return out

kit/metrics/lpips/trainer.py ADDED Viewed

	@@ -0,0 +1,314 @@

+from __future__ import absolute_import
+import numpy as np
+import torch
+from collections import OrderedDict
+from torch.autograd import Variable
+from scipy.ndimage import zoom
+from tqdm import tqdm
+import os
+from .lpips import LPIPS, L2, DSSIM, BCERankingLoss
+from .utils import tensor2im, voc_ap
+class Trainer:
+    def name(self):
+        return self.model_name
+    def initialize(
+        self,
+        model="lpips",
+        net="alex",
+        colorspace="Lab",
+        pnet_rand=False,
+        pnet_tune=False,
+        model_path=None,
+        use_gpu=True,
+        printNet=False,
+        spatial=False,
+        is_train=False,
+        lr=0.0001,
+        beta1=0.5,
+        version="0.1",
+        gpu_ids=[0],
+    ):
+        """
+        INPUTS
+            model - ['lpips'] for linearly calibrated network
+                    ['baseline'] for off-the-shelf network
+                    ['L2'] for L2 distance in Lab colorspace
+                    ['SSIM'] for ssim in RGB colorspace
+            net - ['squeeze','alex','vgg']
+            model_path - if None, will look in weights/[NET_NAME].pth
+            colorspace - ['Lab','RGB'] colorspace to use for L2 and SSIM
+            use_gpu - bool - whether or not to use a GPU
+            printNet - bool - whether or not to print network architecture out
+            spatial - bool - whether to output an array containing varying distances across spatial dimensions
+            is_train - bool - [True] for training mode
+            lr - float - initial learning rate
+            beta1 - float - initial momentum term for adam
+            version - 0.1 for latest, 0.0 was original (with a bug)
+            gpu_ids - int array - [0] by default, gpus to use
+        """
+        self.use_gpu = use_gpu
+        self.gpu_ids = gpu_ids
+        self.model = model
+        self.net = net
+        self.is_train = is_train
+        self.spatial = spatial
+        self.model_name = "%s [%s]" % (model, net)
+        if self.model == "lpips":  # pretrained net + linear layer
+            self.net = LPIPS(
+                pretrained=not is_train,
+                net=net,
+                version=version,
+                lpips=True,
+                spatial=spatial,
+                pnet_rand=pnet_rand,
+                pnet_tune=pnet_tune,
+                use_dropout=True,
+                model_path=model_path,
+                eval_mode=False,
+            )
+        elif self.model == "baseline":  # pretrained network
+            self.net = LPIPS(pnet_rand=pnet_rand, net=net, lpips=False)
+        elif self.model in ["L2", "l2"]:
+            self.net = L2(
+                use_gpu=use_gpu, colorspace=colorspace
+            )  # not really a network, only for testing
+            self.model_name = "L2"
+        elif self.model in ["DSSIM", "dssim", "SSIM", "ssim"]:
+            self.net = DSSIM(use_gpu=use_gpu, colorspace=colorspace)
+            self.model_name = "SSIM"
+        else:
+            raise ValueError("Model [%s] not recognized." % self.model)
+        self.parameters = list(self.net.parameters())
+        if self.is_train:  # training mode
+            # extra network on top to go from distances (d0,d1) => predicted human judgment (h*)
+            self.rankLoss = BCERankingLoss()
+            self.parameters += list(self.rankLoss.net.parameters())
+            self.lr = lr
+            self.old_lr = lr
+            self.optimizer_net = torch.optim.Adam(
+                self.parameters, lr=lr, betas=(beta1, 0.999)
+            )
+        else:  # test mode
+            self.net.eval()
+        if use_gpu:
+            self.net.to(gpu_ids[0])
+            self.net = torch.nn.DataParallel(self.net, device_ids=gpu_ids)
+            if self.is_train:
+                self.rankLoss = self.rankLoss.to(
+                    device=gpu_ids[0]
+                )  # just put this on GPU0
+        if printNet:
+            pass
+    def forward(self, in0, in1, retPerLayer=False):
+        """Function computes the distance between image patches in0 and in1
+        INPUTS
+            in0, in1 - torch.Tensor object of shape Nx3xXxY - image patch scaled to [-1,1]
+        OUTPUT
+            computed distances between in0 and in1
+        """
+        return self.net.forward(in0, in1, retPerLayer=retPerLayer)
+    # ***** TRAINING FUNCTIONS *****
+    def optimize_parameters(self):
+        self.forward_train()
+        self.optimizer_net.zero_grad()
+        self.backward_train()
+        self.optimizer_net.step()
+        self.clamp_weights()
+    def clamp_weights(self):
+        for module in self.net.modules():
+            if hasattr(module, "weight") and module.kernel_size == (1, 1):
+                module.weight.data = torch.clamp(module.weight.data, min=0)
+    def set_input(self, data):
+        self.input_ref = data["ref"]
+        self.input_p0 = data["p0"]
+        self.input_p1 = data["p1"]
+        self.input_judge = data["judge"]
+        if self.use_gpu:
+            self.input_ref = self.input_ref.to(device=self.gpu_ids[0])
+            self.input_p0 = self.input_p0.to(device=self.gpu_ids[0])
+            self.input_p1 = self.input_p1.to(device=self.gpu_ids[0])
+            self.input_judge = self.input_judge.to(device=self.gpu_ids[0])
+        self.var_ref = Variable(self.input_ref, requires_grad=True)
+        self.var_p0 = Variable(self.input_p0, requires_grad=True)
+        self.var_p1 = Variable(self.input_p1, requires_grad=True)
+    def forward_train(self):  # run forward pass
+        self.d0 = self.forward(self.var_ref, self.var_p0)
+        self.d1 = self.forward(self.var_ref, self.var_p1)
+        self.acc_r = self.compute_accuracy(self.d0, self.d1, self.input_judge)
+        self.var_judge = Variable(1.0 * self.input_judge).view(self.d0.size())
+        self.loss_total = self.rankLoss.forward(
+            self.d0, self.d1, self.var_judge * 2.0 - 1.0
+        )
+        return self.loss_total
+    def backward_train(self):
+        torch.mean(self.loss_total).backward()
+    def compute_accuracy(self, d0, d1, judge):
+        """d0, d1 are Variables, judge is a Tensor"""
+        d1_lt_d0 = (d1 < d0).cpu().data.numpy().flatten()
+        judge_per = judge.cpu().numpy().flatten()
+        return d1_lt_d0 * judge_per + (1 - d1_lt_d0) * (1 - judge_per)
+    def get_current_errors(self):
+        retDict = OrderedDict(
+            [("loss_total", self.loss_total.data.cpu().numpy()), ("acc_r", self.acc_r)]
+        )
+        for key in retDict.keys():
+            retDict[key] = np.mean(retDict[key])
+        return retDict
+    def get_current_visuals(self):
+        zoom_factor = 256 / self.var_ref.data.size()[2]
+        ref_img = tensor2im(self.var_ref.data)
+        p0_img = tensor2im(self.var_p0.data)
+        p1_img = tensor2im(self.var_p1.data)
+        ref_img_vis = zoom(ref_img, [zoom_factor, zoom_factor, 1], order=0)
+        p0_img_vis = zoom(p0_img, [zoom_factor, zoom_factor, 1], order=0)
+        p1_img_vis = zoom(p1_img, [zoom_factor, zoom_factor, 1], order=0)
+        return OrderedDict(
+            [("ref", ref_img_vis), ("p0", p0_img_vis), ("p1", p1_img_vis)]
+        )
+    def save(self, path, label):
+        if self.use_gpu:
+            self.save_network(self.net.module, path, "", label)
+        else:
+            self.save_network(self.net, path, "", label)
+        self.save_network(self.rankLoss.net, path, "rank", label)
+    # helper saving function that can be used by subclasses
+    def save_network(self, network, path, network_label, epoch_label):
+        save_filename = "%s_net_%s.pth" % (epoch_label, network_label)
+        save_path = os.path.join(path, save_filename)
+        torch.save(network.state_dict(), save_path)
+    # helper loading function that can be used by subclasses
+    def load_network(self, network, network_label, epoch_label):
+        save_filename = "%s_net_%s.pth" % (epoch_label, network_label)
+        save_path = os.path.join(self.save_dir, save_filename)
+        print("Loading network from %s" % save_path)
+        network.load_state_dict(torch.load(save_path))
+    def update_learning_rate(self, nepoch_decay):
+        lrd = self.lr / nepoch_decay
+        lr = self.old_lr - lrd
+        for param_group in self.optimizer_net.param_groups:
+            param_group["lr"] = lr
+        print("update lr [%s] decay: %f -> %f" % (type, self.old_lr, lr))
+        self.old_lr = lr
+    def get_image_paths(self):
+        return self.image_paths
+    def save_done(self, flag=False):
+        np.save(os.path.join(self.save_dir, "done_flag"), flag)
+        np.savetxt(
+            os.path.join(self.save_dir, "done_flag"),
+            [
+                flag,
+            ],
+            fmt="%i",
+        )
+def score_2afc_dataset(data_loader, func, name=""):
+    """Function computes Two Alternative Forced Choice (2AFC) score using
+        distance function 'func' in dataset 'data_loader'
+    INPUTS
+        data_loader - CustomDatasetDataLoader object - contains a TwoAFCDataset inside
+        func - callable distance function - calling d=func(in0,in1) should take 2
+            pytorch tensors with shape Nx3xXxY, and return numpy array of length N
+    OUTPUTS
+        [0] - 2AFC score in [0,1], fraction of time func agrees with human evaluators
+        [1] - dictionary with following elements
+            d0s,d1s - N arrays containing distances between reference patch to perturbed patches
+            gts - N array in [0,1], preferred patch selected by human evaluators
+                (closer to "0" for left patch p0, "1" for right patch p1,
+                "0.6" means 60pct people preferred right patch, 40pct preferred left)
+            scores - N array in [0,1], corresponding to what percentage function agreed with humans
+    CONSTS
+        N - number of test triplets in data_loader
+    """
+    d0s = []
+    d1s = []
+    gts = []
+    for data in tqdm(data_loader.load_data(), desc=name):
+        d0s += func(data["ref"], data["p0"]).data.cpu().numpy().flatten().tolist()
+        d1s += func(data["ref"], data["p1"]).data.cpu().numpy().flatten().tolist()
+        gts += data["judge"].cpu().numpy().flatten().tolist()
+    d0s = np.array(d0s)
+    d1s = np.array(d1s)
+    gts = np.array(gts)
+    scores = (d0s < d1s) * (1.0 - gts) + (d1s < d0s) * gts + (d1s == d0s) * 0.5
+    return (np.mean(scores), dict(d0s=d0s, d1s=d1s, gts=gts, scores=scores))
+def score_jnd_dataset(data_loader, func, name=""):
+    """Function computes JND score using distance function 'func' in dataset 'data_loader'
+    INPUTS
+        data_loader - CustomDatasetDataLoader object - contains a JNDDataset inside
+        func - callable distance function - calling d=func(in0,in1) should take 2
+            pytorch tensors with shape Nx3xXxY, and return pytorch array of length N
+    OUTPUTS
+        [0] - JND score in [0,1], mAP score (area under precision-recall curve)
+        [1] - dictionary with following elements
+            ds - N array containing distances between two patches shown to human evaluator
+            sames - N array containing fraction of people who thought the two patches were identical
+    CONSTS
+        N - number of test triplets in data_loader
+    """
+    ds = []
+    gts = []
+    for data in tqdm(data_loader.load_data(), desc=name):
+        ds += func(data["p0"], data["p1"]).data.cpu().numpy().tolist()
+        gts += data["same"].cpu().numpy().flatten().tolist()
+    sames = np.array(gts)
+    ds = np.array(ds)
+    sorted_inds = np.argsort(ds)
+    ds_sorted = ds[sorted_inds]
+    sames_sorted = sames[sorted_inds]
+    TPs = np.cumsum(sames_sorted)
+    FPs = np.cumsum(1 - sames_sorted)
+    FNs = np.sum(sames_sorted) - TPs
+    precs = TPs / (TPs + FPs)
+    recs = TPs / (TPs + FNs)
+    score = voc_ap(recs, precs)
+    return (score, dict(ds=ds, sames=sames))

kit/metrics/lpips/utils.py ADDED Viewed

	@@ -0,0 +1,137 @@

+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+import numpy as np
+import torch
+def normalize_tensor(in_feat, eps=1e-10):
+    norm_factor = torch.sqrt(torch.sum(in_feat**2, dim=1, keepdim=True))
+    return in_feat / (norm_factor + eps)
+def l2(p0, p1, range=255.0):
+    return 0.5 * np.mean((p0 / range - p1 / range) ** 2)
+def psnr(p0, p1, peak=255.0):
+    return 10 * np.log10(peak**2 / np.mean((1.0 * p0 - 1.0 * p1) ** 2))
+def dssim(p0, p1, range=255.0):
+    from skimage.measure import compare_ssim
+    return (1 - compare_ssim(p0, p1, data_range=range, multichannel=True)) / 2.0
+def tensor2np(tensor_obj):
+    # change dimension of a tensor object into a numpy array
+    return tensor_obj[0].cpu().float().numpy().transpose((1, 2, 0))
+def np2tensor(np_obj):
+    # change dimenion of np array into tensor array
+    return torch.Tensor(np_obj[:, :, :, np.newaxis].transpose((3, 2, 0, 1)))
+def tensor2tensorlab(image_tensor, to_norm=True, mc_only=False):
+    # image tensor to lab tensor
+    from skimage import color
+    img = tensor2im(image_tensor)
+    img_lab = color.rgb2lab(img)
+    if mc_only:
+        img_lab[:, :, 0] = img_lab[:, :, 0] - 50
+    if to_norm and not mc_only:
+        img_lab[:, :, 0] = img_lab[:, :, 0] - 50
+        img_lab = img_lab / 100.0
+    return np2tensor(img_lab)
+def tensorlab2tensor(lab_tensor, return_inbnd=False):
+    from skimage import color
+    import warnings
+    warnings.filterwarnings("ignore")
+    lab = tensor2np(lab_tensor) * 100.0
+    lab[:, :, 0] = lab[:, :, 0] + 50
+    rgb_back = 255.0 * np.clip(color.lab2rgb(lab.astype("float")), 0, 1)
+    if return_inbnd:
+        # convert back to lab, see if we match
+        lab_back = color.rgb2lab(rgb_back.astype("uint8"))
+        mask = 1.0 * np.isclose(lab_back, lab, atol=2.0)
+        mask = np2tensor(np.prod(mask, axis=2)[:, :, np.newaxis])
+        return (im2tensor(rgb_back), mask)
+    else:
+        return im2tensor(rgb_back)
+def load_image(path):
+    if (
+        path[-3:] == "bmp"
+        or path[-3:] == "jpg"
+        or path[-3:] == "png"
+        or path[-4:] == "jpeg"
+    ):
+        import cv2
+        return cv2.imread(path)[:, :, ::-1]
+    else:
+        import matplotlib.pyplot as plt
+        img = (255 * plt.imread(path)[:, :, :3]).astype("uint8")
+    return img
+def tensor2im(image_tensor, imtype=np.uint8, cent=1.0, factor=255.0 / 2.0):
+    image_numpy = image_tensor[0].cpu().float().numpy()
+    image_numpy = (np.transpose(image_numpy, (1, 2, 0)) + cent) * factor
+    return image_numpy.astype(imtype)
+def im2tensor(image, imtype=np.uint8, cent=1.0, factor=255.0 / 2.0):
+    return torch.Tensor(
+        (image / factor - cent)[:, :, :, np.newaxis].transpose((3, 2, 0, 1))
+    )
+def tensor2vec(vector_tensor):
+    return vector_tensor.data.cpu().numpy()[:, :, 0, 0]
+def voc_ap(rec, prec, use_07_metric=False):
+    """ap = voc_ap(rec, prec, [use_07_metric])
+    Compute VOC AP given precision and recall.
+    If use_07_metric is true, uses the
+    VOC 07 11 point method (default:False).
+    """
+    if use_07_metric:
+        # 11 point metric
+        ap = 0.0
+        for t in np.arange(0.0, 1.1, 0.1):
+            if np.sum(rec >= t) == 0:
+                p = 0
+            else:
+                p = np.max(prec[rec >= t])
+            ap = ap + p / 11.0
+    else:
+        # correct AP calculation
+        # first append sentinel values at the end
+        mrec = np.concatenate(([0.0], rec, [1.0]))
+        mpre = np.concatenate(([0.0], prec, [0.0]))
+        # compute the precision envelope
+        for i in range(mpre.size - 1, 0, -1):
+            mpre[i - 1] = np.maximum(mpre[i - 1], mpre[i])
+        # to calculate area under PR curve, look for points
+        # where X axis (recall) changes value
+        i = np.where(mrec[1:] != mrec[:-1])[0]
+        # and sum (\Delta recall) * prec
+        ap = np.sum((mrec[i + 1] - mrec[i]) * mpre[i + 1])
+    return ap

kit/metrics/lpips/weights/v0.0/alex.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:18720f55913d0af89042f13faa7e536a6ce1444a0914e6db9461355ece1e8cd5
+size 5455

kit/metrics/lpips/weights/v0.0/squeeze.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:c27abd3a0145541baa50990817df58d3759c3f8154949f42af3b59b4e042d0bf
+size 10057

kit/metrics/lpips/weights/v0.0/vgg.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b9e4236260c3dd988fc79d2a48d645d885afcbb21f9fd595e6744cf7419b582c
+size 6735

kit/metrics/lpips/weights/v0.1/alex.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:df73285e35b22355a2df87cdb6b70b343713b667eddbda73e1977e0c860835c0
+size 6009

kit/metrics/lpips/weights/v0.1/squeeze.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:4a5350f23600cb79923ce65bb07cbf57dca461329894153e05a1346bd531cf76
+size 10811

kit/metrics/lpips/weights/v0.1/vgg.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:a78928a0af1e5f0fcb1f3b9e8f8c3a2a5a3de244d830ad5c1feddc79b8432868
+size 7289

kit/metrics/perceptual.py ADDED Viewed

	@@ -0,0 +1,93 @@

+import torch
+from PIL import Image
+from torchvision import transforms
+from .lpips import LPIPS
+# Normalize image tensors
+def normalize_tensor(images, norm_type):
+    assert norm_type in ["imagenet", "naive"]
+    # Two possible normalization conventions
+    if norm_type == "imagenet":
+        mean = [0.485, 0.456, 0.406]
+        std = [0.229, 0.224, 0.225]
+        normalize = transforms.Normalize(mean, std)
+    elif norm_type == "naive":
+        mean = [0.5, 0.5, 0.5]
+        std = [0.5, 0.5, 0.5]
+        normalize = transforms.Normalize(mean, std)
+    else:
+        assert False
+    return torch.stack([normalize(image) for image in images])
+def to_tensor(images, norm_type="naive"):
+    assert isinstance(images, list) and all(
+        [isinstance(image, Image.Image) for image in images]
+    )
+    images = torch.stack([transforms.ToTensor()(image) for image in images])
+    if norm_type is not None:
+        images = normalize_tensor(images, norm_type)
+    return images
+def load_perceptual_models(metric_name, mode, device=torch.device("cuda")):
+    assert metric_name in ["lpips"]
+    if metric_name == "lpips":
+        assert mode in ["vgg", "alex"]
+        perceptual_model = LPIPS(net=mode).to(device)
+    else:
+        assert False
+    return perceptual_model
+# Compute metric between two images
+def compute_metric(image1, image2, perceptual_model, device=torch.device("cuda")):
+    assert isinstance(image1, Image.Image) and isinstance(image2, Image.Image)
+    image1_tensor = to_tensor([image1]).to(device)
+    image2_tensor = to_tensor([image2]).to(device)
+    return perceptual_model(image1_tensor, image2_tensor).cpu().item()
+# Compute LPIPS distance between two images
+def compute_lpips(image1, image2, mode="alex", device=torch.device("cuda")):
+    perceptual_model = load_perceptual_models("lpips", mode, device)
+    return compute_metric(image1, image2, perceptual_model, device)
+# Compute metrics between pairs of images
+def compute_perceptual_metric_repeated(
+    images1,
+    images2,
+    metric_name,
+    mode,
+    model,
+    device,
+):
+    # Accept list of PIL images
+    assert isinstance(images1, list) and isinstance(images1[0], Image.Image)
+    assert isinstance(images2, list) and isinstance(images2[0], Image.Image)
+    assert len(images1) == len(images2)
+    if model is None:
+        model = load_perceptual_models(metric_name, mode).to(device)
+    return (
+        model(to_tensor(images1).to(device), to_tensor(images2).to(device))
+        .detach()
+        .cpu()
+        .numpy()
+        .flatten()
+        .tolist()
+    )
+# Compute LPIPS distance between pairs of images
+def compute_lpips_repeated(
+    images1,
+    images2,
+    mode="alex",
+    model=None,
+    device=torch.device("cuda"),
+):
+    return compute_perceptual_metric_repeated(
+        images1, images2, "lpips", mode, model, device
+    )

kit/metrics/prompt.py ADDED Viewed

	@@ -0,0 +1,39 @@

+import torch
+from transformers import GPT2LMHeadModel, GPT2TokenizerFast
+# Load GPT-2 large model and tokenizer
+def load_perplexity_model_and_tokenizer():
+    device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
+    ppl_model = GPT2LMHeadModel.from_pretrained("gpt2-large").to(device)
+    ppl_tokenizer = GPT2TokenizerFast.from_pretrained("gpt2-large")
+    return ppl_model, ppl_tokenizer
+# Compute perplexity for a single prompt
+def compute_prompt_perplexity(prompt, models, stride=512):
+    assert isinstance(prompt, str)
+    assert isinstance(models, tuple) and len(models) == 2
+    ppl_model, ppl_tokenizer = models
+    encodings = ppl_tokenizer(prompt, return_tensors="pt")
+    max_length = ppl_model.config.n_positions
+    seq_len = encodings.input_ids.size(1)
+    nlls = []
+    prev_end_loc = 0
+    for begin_loc in range(0, seq_len, stride):
+        end_loc = min(begin_loc + max_length, seq_len)
+        trg_len = end_loc - prev_end_loc  # may be different from stride on last loop
+        input_ids = encodings.input_ids[:, begin_loc:end_loc].to(
+            next(ppl_model.parameters()).device
+        )
+        target_ids = input_ids.clone()
+        target_ids[:, :-trg_len] = -100
+        with torch.no_grad():
+            outputs = ppl_model(input_ids, labels=target_ids)
+            neg_log_likelihood = outputs.loss
+        nlls.append(neg_log_likelihood)
+        prev_end_loc = end_loc
+        if end_loc == seq_len:
+            break
+    ppl = torch.exp(torch.stack(nlls).mean()).item()
+    return ppl