Spaces:

Oumar199
/

Fake-Real-Face-Detection

Build error

App Files Files Community

= commited on May 27, 2023

Commit

783053f

1 Parent(s): 036d892

adding app to hugging face

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

.vscode/settings.json +14 -0
app.py +122 -0
data/checkpoints/model_2yW4AcqNIb6zLKNIb6zLK/checkpoint-1500/config.json +32 -0
data/checkpoints/model_2yW4AcqNIb6zLKNIb6zLK/checkpoint-1500/optimizer.pt +3 -0
data/checkpoints/model_2yW4AcqNIb6zLKNIb6zLK/checkpoint-1500/pytorch_model.bin +3 -0
data/checkpoints/model_2yW4AcqNIb6zLKNIb6zLK/checkpoint-1500/rng_state.pth +3 -0
data/checkpoints/model_2yW4AcqNIb6zLKNIb6zLK/checkpoint-1500/scaler.pt +3 -0
data/checkpoints/model_2yW4AcqNIb6zLKNIb6zLK/checkpoint-1500/scheduler.pt +3 -0
data/checkpoints/model_2yW4AcqNIb6zLKNIb6zLK/checkpoint-1500/trainer_state.json +64 -0
data/checkpoints/model_2yW4AcqNIb6zLKNIb6zLK/checkpoint-1500/training_args.bin +3 -0
fake-face-detection/fake_face_detection.egg-info/PKG-INFO +6 -0
fake-face-detection/fake_face_detection.egg-info/SOURCES.txt +12 -0
fake-face-detection/fake_face_detection.egg-info/dependency_links.txt +1 -0
fake-face-detection/fake_face_detection.egg-info/top_level.txt +1 -0
fake-face-detection/fake_face_detection/__init__.py +0 -0
fake-face-detection/fake_face_detection/__pycache__/__init__.cpython-310.pyc +0 -0
fake-face-detection/fake_face_detection/data/__init__.py +0 -0
fake-face-detection/fake_face_detection/data/__pycache__/__init__.cpython-310.pyc +0 -0
fake-face-detection/fake_face_detection/data/__pycache__/collator.cpython-310.pyc +0 -0
fake-face-detection/fake_face_detection/data/__pycache__/fake_face_dataset.cpython-310.pyc +0 -0
fake-face-detection/fake_face_detection/data/collator.py +33 -0
fake-face-detection/fake_face_detection/data/fake_face_dataset.py +62 -0
fake-face-detection/fake_face_detection/metrics/__init__.py +0 -0
fake-face-detection/fake_face_detection/metrics/__pycache__/__init__.cpython-310.pyc +0 -0
fake-face-detection/fake_face_detection/metrics/__pycache__/compute_metrics.cpython-310.pyc +0 -0
fake-face-detection/fake_face_detection/metrics/__pycache__/make_predictions.cpython-310.pyc +0 -0
fake-face-detection/fake_face_detection/metrics/compute_metrics.py +33 -0
fake-face-detection/fake_face_detection/metrics/make_predictions.py +147 -0
fake-face-detection/fake_face_detection/optimization/__init__.py +0 -0
fake-face-detection/fake_face_detection/optimization/__pycache__/__init__.cpython-310.pyc +0 -0
fake-face-detection/fake_face_detection/optimization/__pycache__/bayesian_optimization.cpython-310.pyc +0 -0
fake-face-detection/fake_face_detection/optimization/__pycache__/fake_face_bayesian_optimization.cpython-310.pyc +0 -0
fake-face-detection/fake_face_detection/optimization/bayesian_optimization.py +78 -0
fake-face-detection/fake_face_detection/optimization/fake_face_bayesian_optimization.py +165 -0
fake-face-detection/fake_face_detection/trainers/__init__.py +0 -0
fake-face-detection/fake_face_detection/trainers/__pycache__/__init__.cpython-310.pyc +0 -0
fake-face-detection/fake_face_detection/trainers/__pycache__/custom_trainer.cpython-310.pyc +0 -0
fake-face-detection/fake_face_detection/trainers/custom_trainer.py +27 -0
fake-face-detection/fake_face_detection/trainers/custom_trainer_v1.txt +0 -0
fake-face-detection/fake_face_detection/trainers/search_train.py +73 -0
fake-face-detection/fake_face_detection/utils/__pycache__/acquisitions.cpython-310.pyc +0 -0
fake-face-detection/fake_face_detection/utils/__pycache__/compute_weights.cpython-310.pyc +0 -0
fake-face-detection/fake_face_detection/utils/__pycache__/generation.cpython-310.pyc +0 -0
fake-face-detection/fake_face_detection/utils/__pycache__/sampling.cpython-310.pyc +0 -0
fake-face-detection/fake_face_detection/utils/acquisitions.py +36 -0
fake-face-detection/fake_face_detection/utils/compute_weights.py +21 -0
fake-face-detection/fake_face_detection/utils/display_pil.py +43 -0
fake-face-detection/fake_face_detection/utils/downscale_image.py +46 -0
fake-face-detection/fake_face_detection/utils/generation.py +28 -0
fake-face-detection/fake_face_detection/utils/get_patches.py +35 -0

.vscode/settings.json ADDED Viewed

	@@ -0,0 +1,14 @@

+{
+    "python.defaultInterpreterPath": "C:\\Users\\Oumar Kane\\AppData\\Local\\pypoetry\\Cache\\virtualenvs\\pytorch1-HleOW5am-py3.10\\Scripts\\python.exe",
+    "python.terminal.activateEnvironment": true,
+    "terminal.integrated.defaultProfile.windows": "Command Prompt",
+    "files.autoSave": "afterDelay",
+    "files.autoSaveDelay": 1000,
+    "cSpell.words": [
+        "hyperparameters",
+        "photoshed",
+        "photoshep",
+        "photoshop",
+        "photoshopped"
+    ]
+}

app.py ADDED Viewed

	@@ -0,0 +1,122 @@

+from transformers import ViTForImageClassification, ViTFeatureExtractor
+from fake_face_detection.metrics.make_predictions import get_attention
+from torchvision import transforms
+import streamlit as st
+from PIL import Image
+import numpy as np
+import pickle
+import torch
+import cv2
+# set the color of the header
+def header(text):
+    st.markdown(f"<h1 style = 'color: #4B4453; text-align: center'>{text}</h1>", unsafe_allow_html=True)
+    st.markdown("""---""")
+# initialize the size
+size = (224, 224)
+# let us add a header
+header("FAKE AND REAL FACE DETECTION")
+# let us add an expander to write some description of the application
+expander = st.expander('Description', expanded=True)
+with expander:
+    st.write('''This is a long text lorem ipsum dolor''')
+# let us initialize two columns
+left, mid, right = st.columns(3)
+# the following function will load the model (must be in cache)
+@st.cache_resource
+def get_model():
+    # let us load the image characteristics
+    with open('data/extractions/fake_real_dict.txt', 'rb') as f:
+        depick = pickle.Unpickler(f)
+        characs = depick.load()
+    # define the model name
+    model_name = 'google/vit-base-patch16-224-in21k'
+    # recuperate the model
+    model = ViTForImageClassification.from_pretrained(
+        'data\checkpoints\model_2yW4AcqNIb6zLKNIb6zLK',
+        num_labels = len(characs['ids']),
+        id2label = {name: key for key, name in characs['ids'].items()},
+        label2id = characs['ids']
+    )
+    # recuperate the feature_extractor
+    feature_extractor = ViTFeatureExtractor(model_name)
+    return model, feature_extractor
+# let us add a file uploader
+st.subheader("Choose an image to inspect")
+file = st.file_uploader("", type='jpg')
+# if the file is correctly uploaded make the next processes
+if file is not None:
+    # convert the file to an opencv image
+    file_bytes = np.asarray(bytearray(file.read()), dtype=np.uint8)
+    opencv_image = cv2.imdecode(file_bytes, 1)
+    # resize the image
+    opencv_image = cv2.resize(opencv_image, size)
+    # Let us display the image
+    left.header("Loaded image")
+    left.image(opencv_image, channels='BGR')
+    left.markdown("""---""")
+    if left.button("SUBMIT"):
+        # Let us convert the image format to 'RGB'
+        image = cv2.cvtColor(opencv_image, cv2.COLOR_BGR2RGB)
+        # Let us convert from opencv image to pil image
+        image = Image.fromarray(image)
+        with torch.no_grad():
+            # Recuperate the model and the feature extractor
+            model, feature_extractor = get_model()
+            # Change to evaluation mode
+            _ = model.eval()
+            # Apply transformation on the image
+            image_ = feature_extractor(image, return_tensors = 'pt')
+            # # Recuperate output from the model
+            outputs = model(image_['pixel_values'], output_attentions = True)
+            # Recuperate the predictions
+            predictions = torch.argmax(outputs.logits, axis = -1)
+            # Write the prediction to the middle
+            mid.markdown(f"<h2 style='text-align: center; padding: 2cm; color: black; background-color: orange; border: darkorange solid 0.3px; box-shadow: 0.2px 0.2px 0.6px 0.1px gray'>{model.config.id2label[predictions[0].item()]}</h2>", unsafe_allow_html=True)
+            # Let us recuperate the attention
+            attention = outputs.attentions[-1][0]
+            # Let us recuperate the attention image
+            attention_image = get_attention(image, attention, size = (224, 224), patch_size = (14, 14))
+            # Let us transform the attention image to a opencv image
+            attention_image = cv2.cvtColor(attention_image.astype('float32'), cv2.COLOR_RGB2BGR)
+            # Let us display the attention image
+            right.header("Attention")
+            right.image(attention_image, channels='BGR')
+            right.markdown("""---""")

data/checkpoints/model_2yW4AcqNIb6zLKNIb6zLK/checkpoint-1500/config.json ADDED Viewed

	@@ -0,0 +1,32 @@

+{
+  "_name_or_path": "google/vit-base-patch16-224-in21k",
+  "architectures": [
+    "ViTForImageClassification"
+  ],
+  "attention_probs_dropout_prob": 0.0,
+  "encoder_stride": 16,
+  "hidden_act": "gelu",
+  "hidden_dropout_prob": 0.0,
+  "hidden_size": 768,
+  "id2label": {
+    "0": "fake",
+    "1": "real"
+  },
+  "image_size": 224,
+  "initializer_range": 0.02,
+  "intermediate_size": 3072,
+  "label2id": {
+    "fake": 0,
+    "real": 1
+  },
+  "layer_norm_eps": 1e-12,
+  "model_type": "vit",
+  "num_attention_heads": 12,
+  "num_channels": 3,
+  "num_hidden_layers": 12,
+  "patch_size": 16,
+  "problem_type": "single_label_classification",
+  "qkv_bias": true,
+  "torch_dtype": "float32",
+  "transformers_version": "4.28.1"
+}

data/checkpoints/model_2yW4AcqNIb6zLKNIb6zLK/checkpoint-1500/optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:2db8a17d7b7b6fb8c5b9ca808dfbd68977ee3eb53721a36287e69b2b0ca9a600
+size 686518917

data/checkpoints/model_2yW4AcqNIb6zLKNIb6zLK/checkpoint-1500/pytorch_model.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:5f62ac6440dfa6ddd0737da7788da4569a54b1d0d8cc027ce7f623e6db55b05b
+size 343268717

data/checkpoints/model_2yW4AcqNIb6zLKNIb6zLK/checkpoint-1500/rng_state.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:f6f8978ea188121d9fecb3f115bfdfa2c5cad2a9b0c1d0a104dddd3e07af89f6
+size 14575

data/checkpoints/model_2yW4AcqNIb6zLKNIb6zLK/checkpoint-1500/scaler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:7cefe4c84c26da8b0778281baf520dd6d4b41a18ea28fd317c86c1f2b76d30fb
+size 557

data/checkpoints/model_2yW4AcqNIb6zLKNIb6zLK/checkpoint-1500/scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ea1d89fe668b9776e02fd071e17549ee7882e574b4efb0629a6d930572aab462
+size 627

data/checkpoints/model_2yW4AcqNIb6zLKNIb6zLK/checkpoint-1500/trainer_state.json ADDED Viewed

	@@ -0,0 +1,64 @@

+{
+  "best_metric": 0.6927365064620972,
+  "best_model_checkpoint": "data/checkpoints/model_2yW4AcqNIb6zLKNIb6zLK\\checkpoint-1500",
+  "epoch": 1.710376282782212,
+  "global_step": 1500,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.57,
+      "learning_rate": 0.00012064414686134504,
+      "loss": 0.6945,
+      "step": 500
+    },
+    {
+      "epoch": 0.57,
+      "eval_accuracy": 0.5081081081081081,
+      "eval_f1": 0.38095238095238093,
+      "eval_loss": 0.6931825280189514,
+      "eval_runtime": 6.1462,
+      "eval_samples_per_second": 30.1,
+      "eval_steps_per_second": 3.905,
+      "step": 500
+    },
+    {
+      "epoch": 1.14,
+      "learning_rate": 0.00010514828346782865,
+      "loss": 0.6937,
+      "step": 1000
+    },
+    {
+      "epoch": 1.14,
+      "eval_accuracy": 0.4702702702702703,
+      "eval_f1": 0.0,
+      "eval_loss": 0.6942673325538635,
+      "eval_runtime": 11.0225,
+      "eval_samples_per_second": 16.784,
+      "eval_steps_per_second": 2.177,
+      "step": 1000
+    },
+    {
+      "epoch": 1.71,
+      "learning_rate": 8.962136623985633e-05,
+      "loss": 0.6936,
+      "step": 1500
+    },
+    {
+      "epoch": 1.71,
+      "eval_accuracy": 0.5297297297297298,
+      "eval_f1": 0.6925795053003534,
+      "eval_loss": 0.6927365064620972,
+      "eval_runtime": 6.7463,
+      "eval_samples_per_second": 27.423,
+      "eval_steps_per_second": 3.558,
+      "step": 1500
+    }
+  ],
+  "max_steps": 4385,
+  "num_train_epochs": 5,
+  "total_flos": 2.323984768541614e+17,
+  "trial_name": null,
+  "trial_params": null
+}

data/checkpoints/model_2yW4AcqNIb6zLKNIb6zLK/checkpoint-1500/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:f595feb7a87c355b9cc6060e25a6fe727190de55b96c20541d829a9c3b0fe7af
+size 3643

fake-face-detection/fake_face_detection.egg-info/PKG-INFO ADDED Viewed

	@@ -0,0 +1,6 @@

+Metadata-Version: 2.1
+Name: fake-face-detection
+Version: 0.0.1
+Summary: This package contains the main functions and classes used to achieve the project of detecting if a face's image is true or not.
+Author: Oumar Kane
+Author-email: [email protected]

fake-face-detection/fake_face_detection.egg-info/SOURCES.txt ADDED Viewed

	@@ -0,0 +1,12 @@

+setup.py
+fake_face_detection/__init__.py
+fake_face_detection.egg-info/PKG-INFO
+fake_face_detection.egg-info/SOURCES.txt
+fake_face_detection.egg-info/dependency_links.txt
+fake_face_detection.egg-info/top_level.txt
+fake_face_detection/optimization/__init__.py
+fake_face_detection/optimization/bayesian_optimization.py
+fake_face_detection/utils/acquisitions.py
+fake_face_detection/utils/compute_weights.py
+fake_face_detection/utils/generation.py
+fake_face_detection/utils/sampling.py

fake-face-detection/fake_face_detection.egg-info/dependency_links.txt ADDED Viewed

	@@ -0,0 +1 @@


1	+

fake-face-detection/fake_face_detection.egg-info/top_level.txt ADDED Viewed

	@@ -0,0 +1 @@


1	+ fake_face_detection

fake-face-detection/fake_face_detection/__init__.py ADDED Viewed

File without changes

fake-face-detection/fake_face_detection/__pycache__/__init__.cpython-310.pyc ADDED Viewed

Binary file (262 Bytes). View file

fake-face-detection/fake_face_detection/data/__init__.py ADDED Viewed

File without changes

fake-face-detection/fake_face_detection/data/__pycache__/__init__.cpython-310.pyc ADDED Viewed

Binary file (267 Bytes). View file

fake-face-detection/fake_face_detection/data/__pycache__/collator.cpython-310.pyc ADDED Viewed

Binary file (939 Bytes). View file

fake-face-detection/fake_face_detection/data/__pycache__/fake_face_dataset.cpython-310.pyc ADDED Viewed

Binary file (1.7 kB). View file

fake-face-detection/fake_face_detection/data/collator.py ADDED Viewed

	@@ -0,0 +1,33 @@

+import torch
+import numpy as np
+def fake_face_collator(batch):
+    """The data collator for training vision transformer models on fake and real face dataset
+    Args:
+        batch (list): A dictionary containing the pixel values and the labels
+    Returns:
+        dict: The final dictionary
+    """
+    new_batch = {
+        'pixel_values': [],
+        'labels': []
+    }
+    for x in batch:
+        pixel_values = torch.from_numpy(x['pixel_values'][0]) if isinstance(x['pixel_values'][0], np.ndarray) \
+            else x['pixel_values'][0]
+        new_batch['pixel_values'].append(pixel_values)
+        new_batch['labels'].append(torch.tensor(x['labels']))
+    new_batch['pixel_values'] = torch.stack(new_batch['pixel_values'])
+    new_batch['labels'] = torch.stack(new_batch['labels'])
+    return new_batch

fake-face-detection/fake_face_detection/data/fake_face_dataset.py ADDED Viewed

	@@ -0,0 +1,62 @@

+from fake_face_detection.utils.compute_weights import compute_weights
+from torch.utils.data import Dataset
+from PIL import Image
+from glob import glob
+import torch
+import os
+class FakeFaceDetectionDataset(Dataset):
+    def __init__(self, fake_path: str, real_path: str, id_map: dict, transformer, **transformer_kwargs):
+        # let us load the images
+        self.fake_images = glob(os.path.join(fake_path, "*"))
+        self.real_images = glob(os.path.join(real_path, "*"))
+        self.images = self.fake_images + self.real_images
+        # let us recuperate the labels
+        self.fake_labels = [int(id_map['fake'])] * len(self.fake_images)
+        self.real_labels = [int(id_map['real'])] * len(self.real_images)
+        self.labels = self.fake_labels + self.real_labels
+        # let us recuperate the weights
+        self.weights = torch.from_numpy(compute_weights(self.labels))
+        # let us recuperate the transformer
+        self.transformer = transformer
+        # let us recuperate the length
+        self.length = len(self.labels)
+        # let us recuperate the transformer kwargs
+        self.transformer_kwargs = transformer_kwargs
+    def __getitem__(self, index):
+        # let us recuperate an image
+        image = self.images[index]
+        with Image.open(image) as img:
+            # let us recuperate a label
+            label = self.labels[index]
+            # let us add a transformation on the images
+            if self.transformer:
+                image = self.transformer(img, **self.transformer_kwargs)
+        # let us add the label inside the obtained dictionary
+        image['labels'] = label
+        return image
+    def __len__(self):
+        return self.length

fake-face-detection/fake_face_detection/metrics/__init__.py ADDED Viewed

File without changes

fake-face-detection/fake_face_detection/metrics/__pycache__/__init__.cpython-310.pyc ADDED Viewed

Binary file (270 Bytes). View file

fake-face-detection/fake_face_detection/metrics/__pycache__/compute_metrics.cpython-310.pyc ADDED Viewed

Binary file (825 Bytes). View file

fake-face-detection/fake_face_detection/metrics/__pycache__/make_predictions.cpython-310.pyc ADDED Viewed

Binary file (3.35 kB). View file

fake-face-detection/fake_face_detection/metrics/compute_metrics.py ADDED Viewed

	@@ -0,0 +1,33 @@

+import numpy as np
+import evaluate
+metrics = {
+    'f1': evaluate.load('f1'),
+    'accuracy': evaluate.load('accuracy'),
+    'roc_auc': evaluate.load('roc_auc', 'multiclass')
+}
+def compute_metrics(p): # some part was got from https://huggingface.co/blog/fine-tune-vit
+    predictions, label_ids = p
+    metric = metrics['accuracy'].compute(predictions = np.argmax(predictions, axis = 1), references=label_ids)
+    f1_score = metrics['f1'].compute(predictions=np.argmax(predictions, axis = 1), references=label_ids)
+    metric.update(f1_score)
+    try:
+        auc = metrics['roc_auc'].compute(prediction_scores=predictions, references=label_ids)
+        metric.update(auc)
+    except:
+        pass
+    return metric

fake-face-detection/fake_face_detection/metrics/make_predictions.py ADDED Viewed

	@@ -0,0 +1,147 @@

+from fake_face_detection.data.fake_face_dataset import FakeFaceDetectionDataset
+from fake_face_detection.metrics.compute_metrics import compute_metrics
+from torch.utils.tensorboard import SummaryWriter
+from PIL.JpegImagePlugin import JpegImageFile
+from torch.utils.data import DataLoader
+from torch.nn import functional as F
+from torchvision import transforms
+import matplotlib.pyplot as plt
+from glob import glob
+from PIL import Image
+from typing import *
+import pandas as pd
+from math import *
+import numpy as np
+import torch
+import os
+def get_attention(image: Union[str, JpegImageFile], attention: torch.Tensor, size: tuple, patch_size: tuple):
+    # recuperate the image as a numpy array
+    if isinstance(image, str):
+        with Image.open(image) as img:
+            img = np.array(transforms.Resize(size)(img))
+    else:
+        img = np.array(transforms.Resize(size)(image))
+    # recuperate the attention provided by the last patch (notice that we eliminate 1 because of the +1 added by the convolutation layer)
+    attention = attention[:, -1, 1:]
+    # calculate the mean attention
+    attention = attention.mean(axis = 0)
+    # let us reshape transform the image to a numpy array
+    # calculate the scale factor
+    scale_factor = size[0] * size[1] / (patch_size[0] * patch_size[1])
+    # rescale the attention with the nearest scaler
+    attention = F.interpolate(attention.reshape(1, 1, -1), scale_factor=scale_factor,
+                            mode='nearest')
+    # let us reshape the attention to the right size
+    attention = attention.reshape(size[0], size[1], 1)
+    # recuperate the result
+    attention_image = img / 255 * attention.numpy()
+    return attention_image
+def make_predictions(test_dataset: FakeFaceDetectionDataset,
+                     model,
+                     log_dir: str = "fake_face_logs",
+                     tag: str = "Attentions",
+                     batch_size: int = 3,
+                     size: tuple = (224, 224),
+                     patch_size: tuple = (14, 14),
+                     figsize: tuple = (24, 24)):
+    with torch.no_grad():
+        _ = model.eval()
+        # initialize the logger
+        writer = SummaryWriter(os.path.join(log_dir, "attentions"))
+        # let us recuperate the images and labels
+        images = test_dataset.images
+        labels = test_dataset.labels
+        # let us initialize the predictions
+        predictions = {'attentions': [], 'predictions': [], 'true_labels': labels, 'predicted_labels': []}
+        # let us initialize the dataloader
+        test_dataloader = DataLoader(test_dataset, batch_size=batch_size)
+        # get the loss
+        loss = 0
+        for data in test_dataloader:
+            # recuperate the pixel values
+            pixel_values = data['pixel_values'][0].cuda()
+            # recuperate the labels
+            labels_ = data['labels'].cuda()
+            # # recuperate the outputs
+            outputs = model(pixel_values, labels = labels_, output_attentions = True)
+            # recuperate the predictions
+            predictions['predictions'].append(torch.softmax(outputs.logits.detach().cpu(), axis = -1).numpy())
+            # recuperate the attentions of the last encoder layer
+            predictions['attentions'].append(outputs.attentions[-1].detach().cpu())
+            # add the loss
+            loss += outputs.loss.detach().cpu().item()
+        predictions['predictions'] = np.concatenate(predictions['predictions'], axis = 0)
+        predictions['attentions'] = torch.concatenate(predictions['attentions'], axis = 0)
+        predictions['predicted_labels'] = np.argmax(predictions['predictions'], axis = -1).tolist()
+        # let us calculate the metrics
+        metrics = compute_metrics((predictions['predictions'], np.array(predictions['true_labels'])))
+        metrics['loss'] = loss / len(test_dataloader)
+        # for each image we will visualize his attention
+        nrows = ceil(sqrt(len(images)))
+        fig, axes = plt.subplots(nrows=nrows, ncols=nrows, figsize = figsize)
+        axes = axes.flat
+        for i in range(len(images)):
+            attention_image = get_attention(images[i], predictions['attentions'][i], size, patch_size)
+            axes[i].imshow(attention_image)
+            axes[i].set_title(f'Image {i + 1}')
+            axes[i].axis('off')
+        fig.tight_layout()
+        [fig.delaxes(axes[i]) for i in range(len(images), nrows * nrows)]
+        writer.add_figure(tag, fig)
+        # let us remove the predictions and the attentions
+        del predictions['predictions']
+        del predictions['attentions']
+        # let us recuperate the metrics and the predictions
+        return pd.DataFrame(predictions), metrics

fake-face-detection/fake_face_detection/optimization/__init__.py ADDED Viewed

File without changes

fake-face-detection/fake_face_detection/optimization/__pycache__/__init__.cpython-310.pyc ADDED Viewed

Binary file (275 Bytes). View file

fake-face-detection/fake_face_detection/optimization/__pycache__/bayesian_optimization.cpython-310.pyc ADDED Viewed

Binary file (2.78 kB). View file

fake-face-detection/fake_face_detection/optimization/__pycache__/fake_face_bayesian_optimization.cpython-310.pyc ADDED Viewed

Binary file (4.49 kB). View file

fake-face-detection/fake_face_detection/optimization/bayesian_optimization.py ADDED Viewed

	@@ -0,0 +1,78 @@

+from fake_face_detection.utils.generation import PI_generate_sample as generate_sample
+from fake_face_detection.utils.acquisitions import PI_acquisition as acquisition
+from fake_face_detection.utils.sampling import get_random_samples
+from sklearn.gaussian_process import GaussianProcessRegressor
+from typing import *
+import pandas as pd
+import numpy as np
+class SimpleBayesianOptimization:
+    def __init__(self, objective: Callable, search_spaces: dict, maximize: bool = True):
+        # recuperate the optimization strategy
+        self.maximize = maximize
+        # recuperate random sample
+        sample = get_random_samples(search_spaces)
+        # initialize the search spaces
+        self.search_spaces = search_spaces
+        # initialize the objective function
+        self.objective = objective
+        # calculate the first score
+        score = objective(sample)
+        # initialize the model
+        self.model = GaussianProcessRegressor()
+        # initialize the input data
+        self.data = [list(sample.values())]
+        # initialize the scores
+        self.scores = [[score]]
+        # fit the model with the input data and the target
+        self.model.fit(self.data, self.scores)
+    def optimize(self, n_trials: int = 50, n_tests: int = 100):
+        """Finding the best hyperparameters with the Bayesian Optimization
+        Args:
+            n_trials (int, optional): The number of trials. Defaults to 50.
+            n_tests (int, optional): The number of random samples to test for each trial. Defaults to 100.
+        """
+        # let us make multiple trials in order to find the best params
+        for _ in range(n_trials):
+            # let us generate new samples with the acquisition and the surrogate functions
+            new_sample = generate_sample(self.data, self.model, self.search_spaces, n_tests, maximize = self.maximize)
+            sample = {key: new_sample[i] for i, key in enumerate(self.search_spaces)}
+            # let us recuperate a new score from the new sample
+            new_score = self.objective(sample)
+            # let us add the new sample, target and score to their lists
+            self.data.append(new_sample)
+            self.scores.append([new_score])
+            # let us train again the model
+            self.model.fit(self.data, self.scores)
+    def get_results(self):
+        """Recuperate the generated samples and the scores
+        Returns:
+            pd.DataFrame: A data frame containing the results
+        """
+        # let us return the results as a data frame
+        data = {key: np.array(self.data, dtype = object)[:, i] for i, key in enumerate(self.search_spaces)}
+        data.update({'score': np.array(self.scores)[:, 0]})
+        return pd.DataFrame(data)

fake-face-detection/fake_face_detection/optimization/fake_face_bayesian_optimization.py ADDED Viewed

	@@ -0,0 +1,165 @@

+from fake_face_detection.utils.generation import PI_generate_sample as generate_sample
+from fake_face_detection.utils.acquisitions import PI_acquisition as acquisition
+from fake_face_detection.utils.sampling import get_random_samples
+from sklearn.gaussian_process import GaussianProcessRegressor
+from functools import partial
+from typing import *
+import pandas as pd
+import numpy as np
+import string
+import random
+import pickle
+import os
+letters = string.ascii_letters + string.digits
+class SimpleBayesianOptimizationForFakeReal:
+    def __init__(self, objective: Callable, search_spaces: dict, maximize: bool = True, random_kwargs: dict = {}, kwargs: dict = {}, checkpoint: str = "data/trials/checkpoint.txt"):
+        # recuperate the optimization strategy
+        self.maximize = maximize
+        # checkpoint where the data and score will be saved
+        self.checkpoint = checkpoint
+        # initialize the search spaces
+        self.search_spaces = search_spaces
+        # recuperate the random kwargs
+        self.random_kwargs = random_kwargs
+        # initialize the objective function
+        self.objective = objective
+        # initialize the kwargs
+        self.kwargs = kwargs
+        # initialize the model
+        self.model = GaussianProcessRegressor()
+        # initialize the random kwargs with a random values
+        random_kwargs = {key: value + ''.join(random.choice(letters) for i in range(7)) for key, value in self.random_kwargs.items()}
+        # add random kwargs to the kwargs
+        self.kwargs.update(random_kwargs)
+        # recuperate random sample
+        config = get_random_samples(search_spaces)
+        if os.path.exists(self.checkpoint):
+            with open(self.checkpoint, 'rb') as f:
+                pickler = pickle.Unpickler(f)
+                self.checkpoint = pickler.load()
+                self.data = self.checkpoint['data']
+                self.scores = self.checkpoint['scores']
+                self.model = self.checkpoint['model']
+                self.current_trial = self.checkpoint['trial']
+                print(f"Checkpoint loaded at trial {self.current_trial}")
+        else:
+            # add config to kwargs
+            self.kwargs['config'] = config
+            # calculate the first score
+            score = self.objective(**self.kwargs)
+            # initialize the input data
+            self.data = [list(config.values())]
+            # initialize the scores
+            self.scores = [[score]]
+            # fit the model with the input data and the target
+            self.model.fit(self.data, self.scores)
+            # initialize the number of trials to zero
+            self.current_trial = 0
+            with open(self.checkpoint, 'wb') as f:
+                pickler = pickle.Pickler(f)
+                checkpoint = {
+                    'data': self.data,
+                    'scores': self.scores,
+                    'model': self.model,
+                    'trial': self.current_trial
+                }
+                pickler.dump(checkpoint)
+    def optimize(self, n_trials: int = 50, n_tests: int = 100):
+        """Finding the best hyperparameters with the Bayesian Optimization
+        Args:
+            n_trials (int, optional): The number of trials. Defaults to 50.
+            n_tests (int, optional): The number of random samples to test for each trial. Defaults to 100.
+        """
+        # let us make multiple trials in order to find the best params
+        for trial in range(self.current_trial + 1, self.current_trial + n_trials + 1):
+            # let us generate new samples with the acquisition and the surrogate functions
+            new_sample = generate_sample(self.data, self.model, self.search_spaces, n_tests, maximize = self.maximize)
+            config = {key: new_sample[i] for i, key in enumerate(self.search_spaces)}
+            # initialize the random kwargs with a random values
+            random_kwargs = {key: value + ''.join(random.choice(letters) for i in range(7)) for key, value in self.random_kwargs.items()}
+            # add random kwargs to the kwargs
+            self.kwargs.update(random_kwargs)
+            # add config to kwargs
+            self.kwargs['config'] = config
+            # calculate the first score
+            new_score = self.objective(**self.kwargs)
+            # let us add the new sample, target and score to their lists
+            self.data.append(new_sample)
+            self.scores.append([new_score])
+            # let us train again the model
+            self.model.fit(self.data, self.scores)
+            # recuperate the current trial
+            self.current_trial = trial
+            with open(self.checkpoint, 'wb') as f:
+                pickler = pickle.Pickler(f)
+                checkpoint = {
+                    'data': self.data,
+                    'scores': self.scores,
+                    'model': self.model,
+                    'trial': self.current_trial
+                }
+                pickler.dump(checkpoint)
+    def get_results(self):
+        """Recuperate the generated samples and the scores
+        Returns:
+            pd.DataFrame: A data frame containing the results
+        """
+        # let us return the results as a data frame
+        data = {key: np.array(self.data, dtype = object)[:, i] for i, key in enumerate(self.search_spaces)}
+        data.update({'score': np.array(self.scores)[:, 0]})
+        return pd.DataFrame(data)

fake-face-detection/fake_face_detection/trainers/__init__.py ADDED Viewed

File without changes

fake-face-detection/fake_face_detection/trainers/__pycache__/__init__.cpython-310.pyc ADDED Viewed

Binary file (271 Bytes). View file

fake-face-detection/fake_face_detection/trainers/__pycache__/custom_trainer.cpython-310.pyc ADDED Viewed

Binary file (1.11 kB). View file

fake-face-detection/fake_face_detection/trainers/custom_trainer.py ADDED Viewed

	@@ -0,0 +1,27 @@

+from transformers import Trainer
+import torch
+def get_custom_trainer(weights: torch.Tensor):
+    class CustomTrainer(Trainer): # got from https://huggingface.co/docs/transformers/main_classes/trainer
+        def compute_loss(self, model, inputs, return_outputs=False):
+            # recuperate labels
+            labels = inputs.get("labels")
+            # forward pass
+            outputs = model(**inputs)
+            # recuperate logits
+            logits = outputs.get("logits")
+            # compute custom loss (passing the weights)
+            loss_fct = nn.CrossEntropyLoss(weight=weights)
+            loss = loss_fct(logits.view(-1, self.model.config.num_labels), labels.view(-1))
+            return (loss, outputs) if return_outputs else loss
+    return CustomTrainer

fake-face-detection/fake_face_detection/trainers/custom_trainer_v1.txt ADDED Viewed

File without changes

fake-face-detection/fake_face_detection/trainers/search_train.py ADDED Viewed

	@@ -0,0 +1,73 @@

+from fake_face_detection.metrics.compute_metrics import compute_metrics
+from fake_face_detection.data.collator import fake_face_collator
+from transformers import Trainer, TrainingArguments
+from torch.utils.tensorboard import SummaryWriter
+from torch import nn
+from typing import *
+import numpy as np
+import json
+import os
+def train(epochs: int, output_dir: str, config: dict, model: nn.Module, trainer, get_datasets: Callable, log_dir: str = "fake_face_logs", metric = 'accuracy'):
+    print("------------------------- Beginning of training")
+    # reformat the config integer type
+    for key, value in config.items():
+        if isinstance(value, np.int32): config[key] = int(value)
+    pretty = json.dumps(config, indent = 4)
+    print(f"Current Config: \n {pretty}")
+    # recuperate the dataset
+    train_dataset, test_dataset = get_datasets(config['h_flip_p'], config['v_flip_p'], config['gray_scale_p'], config['rotation'])
+    # initialize the arguments of the training
+    training_args = TrainingArguments(output_dir,
+                                      per_device_train_batch_size=config['batch_size'],
+                                      evaluation_strategy='steps',
+                                      save_strategy='steps',
+                                      logging_strategy='steps',
+                                      num_train_epochs=epochs,
+                                      fp16=True,
+                                      save_total_limit=2,
+                                      remove_unused_columns=True,
+                                      push_to_hub=False,
+                                      logging_dir=os.path.join(log_dir, os.path.basename(output_dir)),
+                                      load_best_model_at_end=True,
+                                      learning_rate=config['lr'],
+                                      weight_decay=config['weight_decay']
+                                      )
+    # train the model
+    trainer_ = trainer(
+        model = model,
+        args = training_args,
+        data_collator = fake_face_collator,
+        compute_metrics = compute_metrics,
+        train_dataset = train_dataset,
+        eval_dataset = test_dataset
+    )
+    # train the model
+    trainer_.train()
+    # evaluate the model and recuperate metrics
+    metrics = trainer_.evaluate(test_dataset)
+    # add metrics and config to the hyperparameter panel of tensorboard
+    with SummaryWriter(os.path.join(log_dir, 'hparams')) as logger:
+        logger.add_hparams(
+            config, metrics
+        )
+    print(metrics)
+    print("------------------------- End of training")
+    # recuperate the metric to evaluate
+    return metrics[f'eval_{metric}']

fake-face-detection/fake_face_detection/utils/__pycache__/acquisitions.cpython-310.pyc ADDED Viewed

Binary file (1.21 kB). View file

fake-face-detection/fake_face_detection/utils/__pycache__/compute_weights.cpython-310.pyc ADDED Viewed

Binary file (815 Bytes). View file

fake-face-detection/fake_face_detection/utils/__pycache__/generation.cpython-310.pyc ADDED Viewed

Binary file (1.55 kB). View file

fake-face-detection/fake_face_detection/utils/__pycache__/sampling.cpython-310.pyc ADDED Viewed

Binary file (1.99 kB). View file

fake-face-detection/fake_face_detection/utils/acquisitions.py ADDED Viewed

	@@ -0,0 +1,36 @@

+from sklearn.gaussian_process import GaussianProcessRegressor
+from scipy.stats import norm
+from typing import *
+def PI_acquisition(X: List, X_prime: List, model: GaussianProcessRegressor, maximize: bool = True):
+    """Acquisition function for bayesian optimization using probability of improvement
+    Args:
+        X (List): A list containing the input data
+        X_prime (List): A list containing the generate samples
+        model (GaussianProcessRegressor): The gaussian model to use
+        maximize (bool, optional): A boolean value indicating the optimization objective. Defaults to True.
+    Returns:
+        List: A list containing the probabilities
+    """
+    # let us predict the means for the input data
+    mu = model.predict(X)
+    # let us calculate the means and standard deviation for the random samples
+    mu_e, std_e = model.predict(X_prime, return_std=True)
+    if not maximize:
+        mu = -mu
+        mu_e = -mu_e
+    # let us take the best mean
+    mu_best = max(mu)
+    # let us calculate the probability of improvement
+    probs = norm.cdf((mu_e - mu_best) / std_e)
+    return probs

fake-face-detection/fake_face_detection/utils/compute_weights.py ADDED Viewed

	@@ -0,0 +1,21 @@

+import sklearn.utils as skl
+from typing import *
+import numpy as np
+def compute_weights(samples: List[int]):
+    """Compute the weights with the 'balanced' method
+    Args:
+        samples (List[int]): The samples: A list of integers
+    Returns:
+        numpy.ndarray: A array containing the weights
+    """
+    # get unique classes
+    classes = np.unique(samples)
+    # calculate the weights with the balanced method
+    weights = skl.class_weight.compute_class_weight('balanced', classes=classes, y = samples)
+    return weights

fake-face-detection/fake_face_detection/utils/display_pil.py ADDED Viewed

	@@ -0,0 +1,43 @@

+from PIL.JpegImagePlugin import JpegImageFile
+from PIL import ImageDraw
+from PIL import Image
+from typing import *
+def display(images: List[JpegImageFile], labels: List[str], w: int = 300, h: int = 200, left_color: str = "white", right_color: str = "white"):
+    """Display a dual image
+    Args:
+        images (List[JpegImageFile]): A list containing two images
+        labels (List[str]): The labels of the images
+        w (int, optional): The width. Defaults to 300.
+        h (int, optional): The height. Defaults to 200.
+        left_color (str, optional): The color of left label. Defaults to "white".
+        right_color (str, optional): The color of the right label. Defaults to "white".
+    Returns:
+        PIL.Image: A pillow image
+    """
+    # define a grid
+    grid = Image.new('RGB', size=(w, h))
+    # draw the grid
+    draw = ImageDraw.Draw(grid, mode='RGB')
+    # define the second box
+    box = (w // 2,  0)
+    # define the size of the images
+    size = (w // 2, h)
+    # add images to the grid
+    grid.paste(images[0].resize(size))
+    grid.paste(images[1].resize(size), box = box)
+    # draw labels
+    draw.text((0, 0), labels[0], fill=left_color)
+    draw.text(box, labels[1], fill=right_color)
+    return grid

fake-face-detection/fake_face_detection/utils/downscale_image.py ADDED Viewed

	@@ -0,0 +1,46 @@

+from PIL.JpegImagePlugin import JpegImageFile
+from PIL import Image
+from math import *
+import numpy as np
+import itertools
+def downscale_image(image: JpegImageFile, size: tuple = (224, 224)):
+    assert image.size[0] % size[0] == 0 and image.size[1] % size[1] == 0
+    # get box size
+    height, width = int(image.size[0] / size[0]), int(image.size[1] / size[1])
+    print(f"Height and width of each box: {(height, width)}")
+    # we will concatenate the patches over the height axis (axis 0)
+    patches = []
+    for j in range(0, size[1] * width, width):
+        # we must recuperate each width division in order to concatenate the results (on axis 1)
+        h_div = []
+        for i in range(0, size[0] * height, height):
+            box = (j, i, j + width, i + height)
+            current_box = image.crop(box)
+            # let us convert the box to a numpy array and calculate the mean
+            current_box = np.array(current_box).mean(axis = (0, 1))[np.newaxis, np.newaxis, :]
+            # add to h_div
+            h_div.append(current_box)
+        # concatenate over width axis
+        patches.append(np.concatenate(h_div, axis = 0))
+    # concatenate over the height axis and transform to a pillow image
+    image = Image.fromarray(np.uint8(np.concatenate(patches, axis = 1)))
+    return image

fake-face-detection/fake_face_detection/utils/generation.py ADDED Viewed

	@@ -0,0 +1,28 @@

+from fake_face_detection.utils.acquisitions import PI_acquisition
+from fake_face_detection.utils.sampling import get_random_samples
+from sklearn.gaussian_process import GaussianProcessRegressor
+from typing import *
+import numpy as np
+def PI_generate_sample(X: Iterable, model: GaussianProcessRegressor, search_spaces: dict, n_tests: int = 100, maximize: bool = True):
+    """Generate new samples with the probability of improvement
+    Args:
+        X (Iterable): The list of input data
+        model (GaussianProcessRegressor): The model to train
+        search_spaces (dict): The search spaces
+        n_tests (int, optional): The number of random samples to test. Defaults to 100.
+        maximize (bool, optional): The optimization strategy. If maximize == True -> maximize, else -> minimize. Defaults to True.
+    Returns:
+        List: The new sample
+    """
+    # let us create random samples
+    X_prime = [list(get_random_samples(search_spaces).values()) for i in range(n_tests)]
+    # let us recuperate the probabilities from the acquisition function
+    probs = PI_acquisition(X, X_prime, model, maximize = maximize)
+    # let us return the best sample
+    return X_prime[np.argmax(probs)]

fake-face-detection/fake_face_detection/utils/get_patches.py ADDED Viewed

	@@ -0,0 +1,35 @@

+from PIL.JpegImagePlugin import JpegImageFile
+from math import *
+import itertools
+def get_patches(image: JpegImageFile, n_patches: int):
+    # get height and width of the image
+    height, width = image.size
+    # let us calculate the number of divisions to make to the width and height of the image
+    n_patch = int(sqrt(n_patches))
+    patch_h = int(height / n_patch) # notice that the height must be divisible by the number of divisions
+    patch_w = int(width / n_patch) # notice that the width must be divisible by the number of divisions
+    print(f"Height and width of each patch: {(patch_h, patch_w)}")
+    # we will find the first coordinates of the boxes with product function of itertools
+    first_coordinates = list(itertools.product(range(0, patch_h * n_patch, patch_h),
+                                        range(0, patch_w * n_patch, patch_w)))
+    patches = []
+    for pos1, pos2 in first_coordinates:
+        box = (pos2, pos1, pos2 + patch_w, pos1 + patch_h)
+        patches.append(image.crop(box))
+    return patches