diff --git a/.vscode/settings.json b/.vscode/settings.json
new file mode 100644
index 0000000000000000000000000000000000000000..418ee5ad96059344bd6a28834857fc98cf427e2b
--- /dev/null
+++ b/.vscode/settings.json
@@ -0,0 +1,14 @@
+{
+ "python.defaultInterpreterPath": "C:\\Users\\Oumar Kane\\AppData\\Local\\pypoetry\\Cache\\virtualenvs\\pytorch1-HleOW5am-py3.10\\Scripts\\python.exe",
+ "python.terminal.activateEnvironment": true,
+ "terminal.integrated.defaultProfile.windows": "Command Prompt",
+ "files.autoSave": "afterDelay",
+ "files.autoSaveDelay": 1000,
+ "cSpell.words": [
+ "hyperparameters",
+ "photoshed",
+ "photoshep",
+ "photoshop",
+ "photoshopped"
+ ]
+}
\ No newline at end of file
diff --git a/app.py b/app.py
new file mode 100644
index 0000000000000000000000000000000000000000..6baf62a9a72ad5d003ed022fe8e8e4375de3839b
--- /dev/null
+++ b/app.py
@@ -0,0 +1,122 @@
+from transformers import ViTForImageClassification, ViTFeatureExtractor
+from fake_face_detection.metrics.make_predictions import get_attention
+from torchvision import transforms
+import streamlit as st
+from PIL import Image
+import numpy as np
+import pickle
+import torch
+import cv2
+
+# set the color of the header
+def header(text):
+ st.markdown(f"
{text}
", unsafe_allow_html=True)
+ st.markdown("""---""")
+
+# initialize the size
+size = (224, 224)
+
+# let us add a header
+header("FAKE AND REAL FACE DETECTION")
+
+# let us add an expander to write some description of the application
+expander = st.expander('Description', expanded=True)
+
+with expander:
+ st.write('''This is a long text lorem ipsum dolor''')
+
+# let us initialize two columns
+left, mid, right = st.columns(3)
+
+# the following function will load the model (must be in cache)
+@st.cache_resource
+def get_model():
+
+ # let us load the image characteristics
+ with open('data/extractions/fake_real_dict.txt', 'rb') as f:
+
+ depick = pickle.Unpickler(f)
+
+ characs = depick.load()
+
+ # define the model name
+ model_name = 'google/vit-base-patch16-224-in21k'
+
+ # recuperate the model
+ model = ViTForImageClassification.from_pretrained(
+ 'data\checkpoints\model_2yW4AcqNIb6zLKNIb6zLK',
+ num_labels = len(characs['ids']),
+ id2label = {name: key for key, name in characs['ids'].items()},
+ label2id = characs['ids']
+ )
+
+ # recuperate the feature_extractor
+ feature_extractor = ViTFeatureExtractor(model_name)
+
+ return model, feature_extractor
+
+# let us add a file uploader
+st.subheader("Choose an image to inspect")
+file = st.file_uploader("", type='jpg')
+
+# if the file is correctly uploaded make the next processes
+if file is not None:
+
+ # convert the file to an opencv image
+ file_bytes = np.asarray(bytearray(file.read()), dtype=np.uint8)
+
+ opencv_image = cv2.imdecode(file_bytes, 1)
+
+ # resize the image
+ opencv_image = cv2.resize(opencv_image, size)
+
+ # Let us display the image
+ left.header("Loaded image")
+
+ left.image(opencv_image, channels='BGR')
+
+ left.markdown("""---""")
+
+ if left.button("SUBMIT"):
+
+ # Let us convert the image format to 'RGB'
+ image = cv2.cvtColor(opencv_image, cv2.COLOR_BGR2RGB)
+
+ # Let us convert from opencv image to pil image
+ image = Image.fromarray(image)
+
+ with torch.no_grad():
+
+ # Recuperate the model and the feature extractor
+ model, feature_extractor = get_model()
+
+ # Change to evaluation mode
+ _ = model.eval()
+
+ # Apply transformation on the image
+ image_ = feature_extractor(image, return_tensors = 'pt')
+
+ # # Recuperate output from the model
+ outputs = model(image_['pixel_values'], output_attentions = True)
+
+ # Recuperate the predictions
+ predictions = torch.argmax(outputs.logits, axis = -1)
+
+ # Write the prediction to the middle
+ mid.markdown(f"{model.config.id2label[predictions[0].item()]}
", unsafe_allow_html=True)
+
+ # Let us recuperate the attention
+ attention = outputs.attentions[-1][0]
+
+ # Let us recuperate the attention image
+ attention_image = get_attention(image, attention, size = (224, 224), patch_size = (14, 14))
+
+ # Let us transform the attention image to a opencv image
+ attention_image = cv2.cvtColor(attention_image.astype('float32'), cv2.COLOR_RGB2BGR)
+
+ # Let us display the attention image
+ right.header("Attention")
+
+ right.image(attention_image, channels='BGR')
+
+ right.markdown("""---""")
diff --git a/data/checkpoints/model_2yW4AcqNIb6zLKNIb6zLK/checkpoint-1500/config.json b/data/checkpoints/model_2yW4AcqNIb6zLKNIb6zLK/checkpoint-1500/config.json
new file mode 100644
index 0000000000000000000000000000000000000000..3557fd99d8d061edafc566a35a11118221e39e0c
--- /dev/null
+++ b/data/checkpoints/model_2yW4AcqNIb6zLKNIb6zLK/checkpoint-1500/config.json
@@ -0,0 +1,32 @@
+{
+ "_name_or_path": "google/vit-base-patch16-224-in21k",
+ "architectures": [
+ "ViTForImageClassification"
+ ],
+ "attention_probs_dropout_prob": 0.0,
+ "encoder_stride": 16,
+ "hidden_act": "gelu",
+ "hidden_dropout_prob": 0.0,
+ "hidden_size": 768,
+ "id2label": {
+ "0": "fake",
+ "1": "real"
+ },
+ "image_size": 224,
+ "initializer_range": 0.02,
+ "intermediate_size": 3072,
+ "label2id": {
+ "fake": 0,
+ "real": 1
+ },
+ "layer_norm_eps": 1e-12,
+ "model_type": "vit",
+ "num_attention_heads": 12,
+ "num_channels": 3,
+ "num_hidden_layers": 12,
+ "patch_size": 16,
+ "problem_type": "single_label_classification",
+ "qkv_bias": true,
+ "torch_dtype": "float32",
+ "transformers_version": "4.28.1"
+}
diff --git a/data/checkpoints/model_2yW4AcqNIb6zLKNIb6zLK/checkpoint-1500/optimizer.pt b/data/checkpoints/model_2yW4AcqNIb6zLKNIb6zLK/checkpoint-1500/optimizer.pt
new file mode 100644
index 0000000000000000000000000000000000000000..5877e219829151e89e93064c81635534a8e4ec31
--- /dev/null
+++ b/data/checkpoints/model_2yW4AcqNIb6zLKNIb6zLK/checkpoint-1500/optimizer.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2db8a17d7b7b6fb8c5b9ca808dfbd68977ee3eb53721a36287e69b2b0ca9a600
+size 686518917
diff --git a/data/checkpoints/model_2yW4AcqNIb6zLKNIb6zLK/checkpoint-1500/pytorch_model.bin b/data/checkpoints/model_2yW4AcqNIb6zLKNIb6zLK/checkpoint-1500/pytorch_model.bin
new file mode 100644
index 0000000000000000000000000000000000000000..f8e9fabded4b6b258fd271db0b30e216b6ab0be2
--- /dev/null
+++ b/data/checkpoints/model_2yW4AcqNIb6zLKNIb6zLK/checkpoint-1500/pytorch_model.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5f62ac6440dfa6ddd0737da7788da4569a54b1d0d8cc027ce7f623e6db55b05b
+size 343268717
diff --git a/data/checkpoints/model_2yW4AcqNIb6zLKNIb6zLK/checkpoint-1500/rng_state.pth b/data/checkpoints/model_2yW4AcqNIb6zLKNIb6zLK/checkpoint-1500/rng_state.pth
new file mode 100644
index 0000000000000000000000000000000000000000..2d6af45510cc94930f69df5e75d7ba697e4b3f4f
--- /dev/null
+++ b/data/checkpoints/model_2yW4AcqNIb6zLKNIb6zLK/checkpoint-1500/rng_state.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f6f8978ea188121d9fecb3f115bfdfa2c5cad2a9b0c1d0a104dddd3e07af89f6
+size 14575
diff --git a/data/checkpoints/model_2yW4AcqNIb6zLKNIb6zLK/checkpoint-1500/scaler.pt b/data/checkpoints/model_2yW4AcqNIb6zLKNIb6zLK/checkpoint-1500/scaler.pt
new file mode 100644
index 0000000000000000000000000000000000000000..2b4e1696fe03642c4134a3216bdef2336343ca69
--- /dev/null
+++ b/data/checkpoints/model_2yW4AcqNIb6zLKNIb6zLK/checkpoint-1500/scaler.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7cefe4c84c26da8b0778281baf520dd6d4b41a18ea28fd317c86c1f2b76d30fb
+size 557
diff --git a/data/checkpoints/model_2yW4AcqNIb6zLKNIb6zLK/checkpoint-1500/scheduler.pt b/data/checkpoints/model_2yW4AcqNIb6zLKNIb6zLK/checkpoint-1500/scheduler.pt
new file mode 100644
index 0000000000000000000000000000000000000000..78537900a72e00c70213051ed0c63622f9f85cda
--- /dev/null
+++ b/data/checkpoints/model_2yW4AcqNIb6zLKNIb6zLK/checkpoint-1500/scheduler.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ea1d89fe668b9776e02fd071e17549ee7882e574b4efb0629a6d930572aab462
+size 627
diff --git a/data/checkpoints/model_2yW4AcqNIb6zLKNIb6zLK/checkpoint-1500/trainer_state.json b/data/checkpoints/model_2yW4AcqNIb6zLKNIb6zLK/checkpoint-1500/trainer_state.json
new file mode 100644
index 0000000000000000000000000000000000000000..f05665bbd2fb7526c2ef5b6b19c6a2ea6759fd74
--- /dev/null
+++ b/data/checkpoints/model_2yW4AcqNIb6zLKNIb6zLK/checkpoint-1500/trainer_state.json
@@ -0,0 +1,64 @@
+{
+ "best_metric": 0.6927365064620972,
+ "best_model_checkpoint": "data/checkpoints/model_2yW4AcqNIb6zLKNIb6zLK\\checkpoint-1500",
+ "epoch": 1.710376282782212,
+ "global_step": 1500,
+ "is_hyper_param_search": false,
+ "is_local_process_zero": true,
+ "is_world_process_zero": true,
+ "log_history": [
+ {
+ "epoch": 0.57,
+ "learning_rate": 0.00012064414686134504,
+ "loss": 0.6945,
+ "step": 500
+ },
+ {
+ "epoch": 0.57,
+ "eval_accuracy": 0.5081081081081081,
+ "eval_f1": 0.38095238095238093,
+ "eval_loss": 0.6931825280189514,
+ "eval_runtime": 6.1462,
+ "eval_samples_per_second": 30.1,
+ "eval_steps_per_second": 3.905,
+ "step": 500
+ },
+ {
+ "epoch": 1.14,
+ "learning_rate": 0.00010514828346782865,
+ "loss": 0.6937,
+ "step": 1000
+ },
+ {
+ "epoch": 1.14,
+ "eval_accuracy": 0.4702702702702703,
+ "eval_f1": 0.0,
+ "eval_loss": 0.6942673325538635,
+ "eval_runtime": 11.0225,
+ "eval_samples_per_second": 16.784,
+ "eval_steps_per_second": 2.177,
+ "step": 1000
+ },
+ {
+ "epoch": 1.71,
+ "learning_rate": 8.962136623985633e-05,
+ "loss": 0.6936,
+ "step": 1500
+ },
+ {
+ "epoch": 1.71,
+ "eval_accuracy": 0.5297297297297298,
+ "eval_f1": 0.6925795053003534,
+ "eval_loss": 0.6927365064620972,
+ "eval_runtime": 6.7463,
+ "eval_samples_per_second": 27.423,
+ "eval_steps_per_second": 3.558,
+ "step": 1500
+ }
+ ],
+ "max_steps": 4385,
+ "num_train_epochs": 5,
+ "total_flos": 2.323984768541614e+17,
+ "trial_name": null,
+ "trial_params": null
+}
diff --git a/data/checkpoints/model_2yW4AcqNIb6zLKNIb6zLK/checkpoint-1500/training_args.bin b/data/checkpoints/model_2yW4AcqNIb6zLKNIb6zLK/checkpoint-1500/training_args.bin
new file mode 100644
index 0000000000000000000000000000000000000000..19715330d7fc75b96e5da58f2b4d71da87f3c484
--- /dev/null
+++ b/data/checkpoints/model_2yW4AcqNIb6zLKNIb6zLK/checkpoint-1500/training_args.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f595feb7a87c355b9cc6060e25a6fe727190de55b96c20541d829a9c3b0fe7af
+size 3643
diff --git a/fake-face-detection/fake_face_detection.egg-info/PKG-INFO b/fake-face-detection/fake_face_detection.egg-info/PKG-INFO
new file mode 100644
index 0000000000000000000000000000000000000000..c30565f4c9d69aeaf89535f2f2103e9f05914c19
--- /dev/null
+++ b/fake-face-detection/fake_face_detection.egg-info/PKG-INFO
@@ -0,0 +1,6 @@
+Metadata-Version: 2.1
+Name: fake-face-detection
+Version: 0.0.1
+Summary: This package contains the main functions and classes used to achieve the project of detecting if a face's image is true or not.
+Author: Oumar Kane
+Author-email: oumar.kane@univ-thies.sn
diff --git a/fake-face-detection/fake_face_detection.egg-info/SOURCES.txt b/fake-face-detection/fake_face_detection.egg-info/SOURCES.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2587114ba68209415d51647abdb76ad899f427a8
--- /dev/null
+++ b/fake-face-detection/fake_face_detection.egg-info/SOURCES.txt
@@ -0,0 +1,12 @@
+setup.py
+fake_face_detection/__init__.py
+fake_face_detection.egg-info/PKG-INFO
+fake_face_detection.egg-info/SOURCES.txt
+fake_face_detection.egg-info/dependency_links.txt
+fake_face_detection.egg-info/top_level.txt
+fake_face_detection/optimization/__init__.py
+fake_face_detection/optimization/bayesian_optimization.py
+fake_face_detection/utils/acquisitions.py
+fake_face_detection/utils/compute_weights.py
+fake_face_detection/utils/generation.py
+fake_face_detection/utils/sampling.py
\ No newline at end of file
diff --git a/fake-face-detection/fake_face_detection.egg-info/dependency_links.txt b/fake-face-detection/fake_face_detection.egg-info/dependency_links.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8b137891791fe96927ad78e64b0aad7bded08bdc
--- /dev/null
+++ b/fake-face-detection/fake_face_detection.egg-info/dependency_links.txt
@@ -0,0 +1 @@
+
diff --git a/fake-face-detection/fake_face_detection.egg-info/top_level.txt b/fake-face-detection/fake_face_detection.egg-info/top_level.txt
new file mode 100644
index 0000000000000000000000000000000000000000..7a372451ce3e5042daae8a98f034e198ec14c258
--- /dev/null
+++ b/fake-face-detection/fake_face_detection.egg-info/top_level.txt
@@ -0,0 +1 @@
+fake_face_detection
diff --git a/fake-face-detection/fake_face_detection/__init__.py b/fake-face-detection/fake_face_detection/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/fake-face-detection/fake_face_detection/__pycache__/__init__.cpython-310.pyc b/fake-face-detection/fake_face_detection/__pycache__/__init__.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..19070272c4623d7707f014c13629c133b4936f5a
Binary files /dev/null and b/fake-face-detection/fake_face_detection/__pycache__/__init__.cpython-310.pyc differ
diff --git a/fake-face-detection/fake_face_detection/data/__init__.py b/fake-face-detection/fake_face_detection/data/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/fake-face-detection/fake_face_detection/data/__pycache__/__init__.cpython-310.pyc b/fake-face-detection/fake_face_detection/data/__pycache__/__init__.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..8796bfd0df1a38463456a30561e640f5b422110c
Binary files /dev/null and b/fake-face-detection/fake_face_detection/data/__pycache__/__init__.cpython-310.pyc differ
diff --git a/fake-face-detection/fake_face_detection/data/__pycache__/collator.cpython-310.pyc b/fake-face-detection/fake_face_detection/data/__pycache__/collator.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..0605035bfde0ca92e160fc40a0c44d20255511a6
Binary files /dev/null and b/fake-face-detection/fake_face_detection/data/__pycache__/collator.cpython-310.pyc differ
diff --git a/fake-face-detection/fake_face_detection/data/__pycache__/fake_face_dataset.cpython-310.pyc b/fake-face-detection/fake_face_detection/data/__pycache__/fake_face_dataset.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..cc0c86710cde2eb417473fa554fa373b25a9ba91
Binary files /dev/null and b/fake-face-detection/fake_face_detection/data/__pycache__/fake_face_dataset.cpython-310.pyc differ
diff --git a/fake-face-detection/fake_face_detection/data/collator.py b/fake-face-detection/fake_face_detection/data/collator.py
new file mode 100644
index 0000000000000000000000000000000000000000..44780306c69e3d5d054d06d03d08f3e2e6ade517
--- /dev/null
+++ b/fake-face-detection/fake_face_detection/data/collator.py
@@ -0,0 +1,33 @@
+
+import torch
+import numpy as np
+
+def fake_face_collator(batch):
+ """The data collator for training vision transformer models on fake and real face dataset
+
+ Args:
+ batch (list): A dictionary containing the pixel values and the labels
+
+ Returns:
+ dict: The final dictionary
+ """
+
+ new_batch = {
+ 'pixel_values': [],
+ 'labels': []
+ }
+
+ for x in batch:
+
+ pixel_values = torch.from_numpy(x['pixel_values'][0]) if isinstance(x['pixel_values'][0], np.ndarray) \
+ else x['pixel_values'][0]
+
+ new_batch['pixel_values'].append(pixel_values)
+
+ new_batch['labels'].append(torch.tensor(x['labels']))
+
+ new_batch['pixel_values'] = torch.stack(new_batch['pixel_values'])
+
+ new_batch['labels'] = torch.stack(new_batch['labels'])
+
+ return new_batch
diff --git a/fake-face-detection/fake_face_detection/data/fake_face_dataset.py b/fake-face-detection/fake_face_detection/data/fake_face_dataset.py
new file mode 100644
index 0000000000000000000000000000000000000000..7431477ec62e18884f17863ddd442a58c50c4c71
--- /dev/null
+++ b/fake-face-detection/fake_face_detection/data/fake_face_dataset.py
@@ -0,0 +1,62 @@
+
+from fake_face_detection.utils.compute_weights import compute_weights
+from torch.utils.data import Dataset
+from PIL import Image
+from glob import glob
+import torch
+import os
+
+class FakeFaceDetectionDataset(Dataset):
+
+ def __init__(self, fake_path: str, real_path: str, id_map: dict, transformer, **transformer_kwargs):
+
+ # let us load the images
+ self.fake_images = glob(os.path.join(fake_path, "*"))
+
+ self.real_images = glob(os.path.join(real_path, "*"))
+
+ self.images = self.fake_images + self.real_images
+
+ # let us recuperate the labels
+ self.fake_labels = [int(id_map['fake'])] * len(self.fake_images)
+
+ self.real_labels = [int(id_map['real'])] * len(self.real_images)
+
+ self.labels = self.fake_labels + self.real_labels
+
+ # let us recuperate the weights
+ self.weights = torch.from_numpy(compute_weights(self.labels))
+
+ # let us recuperate the transformer
+ self.transformer = transformer
+
+ # let us recuperate the length
+ self.length = len(self.labels)
+
+ # let us recuperate the transformer kwargs
+ self.transformer_kwargs = transformer_kwargs
+
+ def __getitem__(self, index):
+
+ # let us recuperate an image
+ image = self.images[index]
+
+ with Image.open(image) as img:
+
+ # let us recuperate a label
+ label = self.labels[index]
+
+ # let us add a transformation on the images
+ if self.transformer:
+
+ image = self.transformer(img, **self.transformer_kwargs)
+
+ # let us add the label inside the obtained dictionary
+ image['labels'] = label
+
+ return image
+
+ def __len__(self):
+
+ return self.length
+
diff --git a/fake-face-detection/fake_face_detection/metrics/__init__.py b/fake-face-detection/fake_face_detection/metrics/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/fake-face-detection/fake_face_detection/metrics/__pycache__/__init__.cpython-310.pyc b/fake-face-detection/fake_face_detection/metrics/__pycache__/__init__.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..74d75b2e2fa4078d3162184b51c22495db66ff04
Binary files /dev/null and b/fake-face-detection/fake_face_detection/metrics/__pycache__/__init__.cpython-310.pyc differ
diff --git a/fake-face-detection/fake_face_detection/metrics/__pycache__/compute_metrics.cpython-310.pyc b/fake-face-detection/fake_face_detection/metrics/__pycache__/compute_metrics.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..755acbfd969055343c6728ffd651d234ed137be5
Binary files /dev/null and b/fake-face-detection/fake_face_detection/metrics/__pycache__/compute_metrics.cpython-310.pyc differ
diff --git a/fake-face-detection/fake_face_detection/metrics/__pycache__/make_predictions.cpython-310.pyc b/fake-face-detection/fake_face_detection/metrics/__pycache__/make_predictions.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..f2a9200372759e38a6dc23d518800ac8054b5589
Binary files /dev/null and b/fake-face-detection/fake_face_detection/metrics/__pycache__/make_predictions.cpython-310.pyc differ
diff --git a/fake-face-detection/fake_face_detection/metrics/compute_metrics.py b/fake-face-detection/fake_face_detection/metrics/compute_metrics.py
new file mode 100644
index 0000000000000000000000000000000000000000..65e01c32ebbdcca4d02f25602a0efeb274154e66
--- /dev/null
+++ b/fake-face-detection/fake_face_detection/metrics/compute_metrics.py
@@ -0,0 +1,33 @@
+
+import numpy as np
+import evaluate
+
+metrics = {
+ 'f1': evaluate.load('f1'),
+ 'accuracy': evaluate.load('accuracy'),
+ 'roc_auc': evaluate.load('roc_auc', 'multiclass')
+}
+
+def compute_metrics(p): # some part was got from https://huggingface.co/blog/fine-tune-vit
+
+ predictions, label_ids = p
+
+ metric = metrics['accuracy'].compute(predictions = np.argmax(predictions, axis = 1), references=label_ids)
+
+ f1_score = metrics['f1'].compute(predictions=np.argmax(predictions, axis = 1), references=label_ids)
+
+ metric.update(f1_score)
+
+ try:
+
+ auc = metrics['roc_auc'].compute(prediction_scores=predictions, references=label_ids)
+
+ metric.update(auc)
+
+ except:
+
+ pass
+
+ return metric
+
+
diff --git a/fake-face-detection/fake_face_detection/metrics/make_predictions.py b/fake-face-detection/fake_face_detection/metrics/make_predictions.py
new file mode 100644
index 0000000000000000000000000000000000000000..e4a9014f73f4ddc731a7204860d0605749020a20
--- /dev/null
+++ b/fake-face-detection/fake_face_detection/metrics/make_predictions.py
@@ -0,0 +1,147 @@
+
+from fake_face_detection.data.fake_face_dataset import FakeFaceDetectionDataset
+from fake_face_detection.metrics.compute_metrics import compute_metrics
+from torch.utils.tensorboard import SummaryWriter
+from PIL.JpegImagePlugin import JpegImageFile
+from torch.utils.data import DataLoader
+from torch.nn import functional as F
+from torchvision import transforms
+import matplotlib.pyplot as plt
+from glob import glob
+from PIL import Image
+from typing import *
+import pandas as pd
+from math import *
+import numpy as np
+import torch
+import os
+
+def get_attention(image: Union[str, JpegImageFile], attention: torch.Tensor, size: tuple, patch_size: tuple):
+
+ # recuperate the image as a numpy array
+ if isinstance(image, str):
+
+ with Image.open(image) as img:
+
+ img = np.array(transforms.Resize(size)(img))
+
+ else:
+
+ img = np.array(transforms.Resize(size)(image))
+
+ # recuperate the attention provided by the last patch (notice that we eliminate 1 because of the +1 added by the convolutation layer)
+ attention = attention[:, -1, 1:]
+
+ # calculate the mean attention
+ attention = attention.mean(axis = 0)
+
+ # let us reshape transform the image to a numpy array
+
+ # calculate the scale factor
+ scale_factor = size[0] * size[1] / (patch_size[0] * patch_size[1])
+
+ # rescale the attention with the nearest scaler
+ attention = F.interpolate(attention.reshape(1, 1, -1), scale_factor=scale_factor,
+ mode='nearest')
+
+ # let us reshape the attention to the right size
+ attention = attention.reshape(size[0], size[1], 1)
+
+ # recuperate the result
+ attention_image = img / 255 * attention.numpy()
+
+ return attention_image
+
+
+def make_predictions(test_dataset: FakeFaceDetectionDataset,
+ model,
+ log_dir: str = "fake_face_logs",
+ tag: str = "Attentions",
+ batch_size: int = 3,
+ size: tuple = (224, 224),
+ patch_size: tuple = (14, 14),
+ figsize: tuple = (24, 24)):
+
+ with torch.no_grad():
+
+ _ = model.eval()
+
+ # initialize the logger
+ writer = SummaryWriter(os.path.join(log_dir, "attentions"))
+
+ # let us recuperate the images and labels
+ images = test_dataset.images
+
+ labels = test_dataset.labels
+
+ # let us initialize the predictions
+ predictions = {'attentions': [], 'predictions': [], 'true_labels': labels, 'predicted_labels': []}
+
+ # let us initialize the dataloader
+ test_dataloader = DataLoader(test_dataset, batch_size=batch_size)
+
+ # get the loss
+ loss = 0
+
+ for data in test_dataloader:
+
+ # recuperate the pixel values
+ pixel_values = data['pixel_values'][0].cuda()
+
+ # recuperate the labels
+ labels_ = data['labels'].cuda()
+
+ # # recuperate the outputs
+ outputs = model(pixel_values, labels = labels_, output_attentions = True)
+
+ # recuperate the predictions
+ predictions['predictions'].append(torch.softmax(outputs.logits.detach().cpu(), axis = -1).numpy())
+
+ # recuperate the attentions of the last encoder layer
+ predictions['attentions'].append(outputs.attentions[-1].detach().cpu())
+
+ # add the loss
+ loss += outputs.loss.detach().cpu().item()
+
+ predictions['predictions'] = np.concatenate(predictions['predictions'], axis = 0)
+
+ predictions['attentions'] = torch.concatenate(predictions['attentions'], axis = 0)
+
+ predictions['predicted_labels'] = np.argmax(predictions['predictions'], axis = -1).tolist()
+
+ # let us calculate the metrics
+ metrics = compute_metrics((predictions['predictions'], np.array(predictions['true_labels'])))
+ metrics['loss'] = loss / len(test_dataloader)
+
+ # for each image we will visualize his attention
+ nrows = ceil(sqrt(len(images)))
+
+ fig, axes = plt.subplots(nrows=nrows, ncols=nrows, figsize = figsize)
+
+ axes = axes.flat
+
+ for i in range(len(images)):
+
+ attention_image = get_attention(images[i], predictions['attentions'][i], size, patch_size)
+
+ axes[i].imshow(attention_image)
+
+ axes[i].set_title(f'Image {i + 1}')
+
+ axes[i].axis('off')
+
+ fig.tight_layout()
+
+ [fig.delaxes(axes[i]) for i in range(len(images), nrows * nrows)]
+
+ writer.add_figure(tag, fig)
+
+ # let us remove the predictions and the attentions
+ del predictions['predictions']
+ del predictions['attentions']
+
+ # let us recuperate the metrics and the predictions
+ return pd.DataFrame(predictions), metrics
+
+
+
diff --git a/fake-face-detection/fake_face_detection/optimization/__init__.py b/fake-face-detection/fake_face_detection/optimization/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/fake-face-detection/fake_face_detection/optimization/__pycache__/__init__.cpython-310.pyc b/fake-face-detection/fake_face_detection/optimization/__pycache__/__init__.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..e834d9b389d7e42358210eb281676958b646afbe
Binary files /dev/null and b/fake-face-detection/fake_face_detection/optimization/__pycache__/__init__.cpython-310.pyc differ
diff --git a/fake-face-detection/fake_face_detection/optimization/__pycache__/bayesian_optimization.cpython-310.pyc b/fake-face-detection/fake_face_detection/optimization/__pycache__/bayesian_optimization.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..51cb06b8ed97ba322bae8dae124108c94351ef99
Binary files /dev/null and b/fake-face-detection/fake_face_detection/optimization/__pycache__/bayesian_optimization.cpython-310.pyc differ
diff --git a/fake-face-detection/fake_face_detection/optimization/__pycache__/fake_face_bayesian_optimization.cpython-310.pyc b/fake-face-detection/fake_face_detection/optimization/__pycache__/fake_face_bayesian_optimization.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..961735b6bba1ee674eab8323e44f5cb0809a3cdf
Binary files /dev/null and b/fake-face-detection/fake_face_detection/optimization/__pycache__/fake_face_bayesian_optimization.cpython-310.pyc differ
diff --git a/fake-face-detection/fake_face_detection/optimization/bayesian_optimization.py b/fake-face-detection/fake_face_detection/optimization/bayesian_optimization.py
new file mode 100644
index 0000000000000000000000000000000000000000..957281c615273418c382e951bc38ceb9c036d37b
--- /dev/null
+++ b/fake-face-detection/fake_face_detection/optimization/bayesian_optimization.py
@@ -0,0 +1,78 @@
+from fake_face_detection.utils.generation import PI_generate_sample as generate_sample
+from fake_face_detection.utils.acquisitions import PI_acquisition as acquisition
+from fake_face_detection.utils.sampling import get_random_samples
+from sklearn.gaussian_process import GaussianProcessRegressor
+from typing import *
+import pandas as pd
+import numpy as np
+
+class SimpleBayesianOptimization:
+
+ def __init__(self, objective: Callable, search_spaces: dict, maximize: bool = True):
+
+ # recuperate the optimization strategy
+ self.maximize = maximize
+
+ # recuperate random sample
+ sample = get_random_samples(search_spaces)
+
+ # initialize the search spaces
+ self.search_spaces = search_spaces
+
+ # initialize the objective function
+ self.objective = objective
+
+ # calculate the first score
+ score = objective(sample)
+
+ # initialize the model
+ self.model = GaussianProcessRegressor()
+
+ # initialize the input data
+ self.data = [list(sample.values())]
+
+ # initialize the scores
+ self.scores = [[score]]
+
+ # fit the model with the input data and the target
+ self.model.fit(self.data, self.scores)
+
+ def optimize(self, n_trials: int = 50, n_tests: int = 100):
+ """Finding the best hyperparameters with the Bayesian Optimization
+
+ Args:
+ n_trials (int, optional): The number of trials. Defaults to 50.
+ n_tests (int, optional): The number of random samples to test for each trial. Defaults to 100.
+ """
+ # let us make multiple trials in order to find the best params
+ for _ in range(n_trials):
+
+ # let us generate new samples with the acquisition and the surrogate functions
+ new_sample = generate_sample(self.data, self.model, self.search_spaces, n_tests, maximize = self.maximize)
+ sample = {key: new_sample[i] for i, key in enumerate(self.search_spaces)}
+
+ # let us recuperate a new score from the new sample
+ new_score = self.objective(sample)
+
+ # let us add the new sample, target and score to their lists
+ self.data.append(new_sample)
+
+ self.scores.append([new_score])
+
+ # let us train again the model
+ self.model.fit(self.data, self.scores)
+
+ def get_results(self):
+ """Recuperate the generated samples and the scores
+
+ Returns:
+ pd.DataFrame: A data frame containing the results
+ """
+ # let us return the results as a data frame
+ data = {key: np.array(self.data, dtype = object)[:, i] for i, key in enumerate(self.search_spaces)}
+
+ data.update({'score': np.array(self.scores)[:, 0]})
+
+ return pd.DataFrame(data)
+
+
diff --git a/fake-face-detection/fake_face_detection/optimization/fake_face_bayesian_optimization.py b/fake-face-detection/fake_face_detection/optimization/fake_face_bayesian_optimization.py
new file mode 100644
index 0000000000000000000000000000000000000000..f71707301e3b6a398d29a48f87bb67e904a86946
--- /dev/null
+++ b/fake-face-detection/fake_face_detection/optimization/fake_face_bayesian_optimization.py
@@ -0,0 +1,165 @@
+from fake_face_detection.utils.generation import PI_generate_sample as generate_sample
+from fake_face_detection.utils.acquisitions import PI_acquisition as acquisition
+from fake_face_detection.utils.sampling import get_random_samples
+from sklearn.gaussian_process import GaussianProcessRegressor
+from functools import partial
+from typing import *
+import pandas as pd
+import numpy as np
+import string
+import random
+import pickle
+import os
+
+letters = string.ascii_letters + string.digits
+
+class SimpleBayesianOptimizationForFakeReal:
+
+ def __init__(self, objective: Callable, search_spaces: dict, maximize: bool = True, random_kwargs: dict = {}, kwargs: dict = {}, checkpoint: str = "data/trials/checkpoint.txt"):
+
+ # recuperate the optimization strategy
+ self.maximize = maximize
+
+ # checkpoint where the data and score will be saved
+ self.checkpoint = checkpoint
+
+ # initialize the search spaces
+ self.search_spaces = search_spaces
+
+ # recuperate the random kwargs
+ self.random_kwargs = random_kwargs
+
+ # initialize the objective function
+ self.objective = objective
+
+ # initialize the kwargs
+ self.kwargs = kwargs
+
+ # initialize the model
+ self.model = GaussianProcessRegressor()
+
+ # initialize the random kwargs with a random values
+ random_kwargs = {key: value + ''.join(random.choice(letters) for i in range(7)) for key, value in self.random_kwargs.items()}
+
+ # add random kwargs to the kwargs
+ self.kwargs.update(random_kwargs)
+
+ # recuperate random sample
+ config = get_random_samples(search_spaces)
+
+ if os.path.exists(self.checkpoint):
+
+ with open(self.checkpoint, 'rb') as f:
+
+ pickler = pickle.Unpickler(f)
+
+ self.checkpoint = pickler.load()
+
+ self.data = self.checkpoint['data']
+
+ self.scores = self.checkpoint['scores']
+
+ self.model = self.checkpoint['model']
+
+ self.current_trial = self.checkpoint['trial']
+
+ print(f"Checkpoint loaded at trial {self.current_trial}")
+
+ else:
+
+ # add config to kwargs
+ self.kwargs['config'] = config
+
+ # calculate the first score
+ score = self.objective(**self.kwargs)
+
+ # initialize the input data
+ self.data = [list(config.values())]
+
+ # initialize the scores
+ self.scores = [[score]]
+
+ # fit the model with the input data and the target
+ self.model.fit(self.data, self.scores)
+
+ # initialize the number of trials to zero
+ self.current_trial = 0
+
+ with open(self.checkpoint, 'wb') as f:
+
+ pickler = pickle.Pickler(f)
+
+ checkpoint = {
+ 'data': self.data,
+ 'scores': self.scores,
+ 'model': self.model,
+ 'trial': self.current_trial
+ }
+
+ pickler.dump(checkpoint)
+
+ def optimize(self, n_trials: int = 50, n_tests: int = 100):
+ """Finding the best hyperparameters with the Bayesian Optimization
+
+ Args:
+ n_trials (int, optional): The number of trials. Defaults to 50.
+ n_tests (int, optional): The number of random samples to test for each trial. Defaults to 100.
+ """
+
+ # let us make multiple trials in order to find the best params
+ for trial in range(self.current_trial + 1, self.current_trial + n_trials + 1):
+
+ # let us generate new samples with the acquisition and the surrogate functions
+ new_sample = generate_sample(self.data, self.model, self.search_spaces, n_tests, maximize = self.maximize)
+ config = {key: new_sample[i] for i, key in enumerate(self.search_spaces)}
+
+ # initialize the random kwargs with a random values
+ random_kwargs = {key: value + ''.join(random.choice(letters) for i in range(7)) for key, value in self.random_kwargs.items()}
+
+ # add random kwargs to the kwargs
+ self.kwargs.update(random_kwargs)
+
+ # add config to kwargs
+ self.kwargs['config'] = config
+
+ # calculate the first score
+ new_score = self.objective(**self.kwargs)
+
+ # let us add the new sample, target and score to their lists
+ self.data.append(new_sample)
+
+ self.scores.append([new_score])
+
+ # let us train again the model
+ self.model.fit(self.data, self.scores)
+
+ # recuperate the current trial
+ self.current_trial = trial
+
+ with open(self.checkpoint, 'wb') as f:
+
+ pickler = pickle.Pickler(f)
+
+ checkpoint = {
+ 'data': self.data,
+ 'scores': self.scores,
+ 'model': self.model,
+ 'trial': self.current_trial
+ }
+
+ pickler.dump(checkpoint)
+
+ def get_results(self):
+ """Recuperate the generated samples and the scores
+
+ Returns:
+ pd.DataFrame: A data frame containing the results
+ """
+ # let us return the results as a data frame
+ data = {key: np.array(self.data, dtype = object)[:, i] for i, key in enumerate(self.search_spaces)}
+
+ data.update({'score': np.array(self.scores)[:, 0]})
+
+ return pd.DataFrame(data)
+
+
diff --git a/fake-face-detection/fake_face_detection/trainers/__init__.py b/fake-face-detection/fake_face_detection/trainers/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/fake-face-detection/fake_face_detection/trainers/__pycache__/__init__.cpython-310.pyc b/fake-face-detection/fake_face_detection/trainers/__pycache__/__init__.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..a18c911b9481c214098a8a807c50522d7fe9502c
Binary files /dev/null and b/fake-face-detection/fake_face_detection/trainers/__pycache__/__init__.cpython-310.pyc differ
diff --git a/fake-face-detection/fake_face_detection/trainers/__pycache__/custom_trainer.cpython-310.pyc b/fake-face-detection/fake_face_detection/trainers/__pycache__/custom_trainer.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..a7e554d6d0ffd29c64a2b471efd52a1ca616f2bc
Binary files /dev/null and b/fake-face-detection/fake_face_detection/trainers/__pycache__/custom_trainer.cpython-310.pyc differ
diff --git a/fake-face-detection/fake_face_detection/trainers/custom_trainer.py b/fake-face-detection/fake_face_detection/trainers/custom_trainer.py
new file mode 100644
index 0000000000000000000000000000000000000000..6575eef80157f93143fe3e15d00c137f9fa2c7c3
--- /dev/null
+++ b/fake-face-detection/fake_face_detection/trainers/custom_trainer.py
@@ -0,0 +1,27 @@
+
+from transformers import Trainer
+import torch
+
+def get_custom_trainer(weights: torch.Tensor):
+
+ class CustomTrainer(Trainer): # got from https://huggingface.co/docs/transformers/main_classes/trainer
+
+ def compute_loss(self, model, inputs, return_outputs=False):
+
+ # recuperate labels
+ labels = inputs.get("labels")
+
+ # forward pass
+ outputs = model(**inputs)
+
+ # recuperate logits
+ logits = outputs.get("logits")
+
+ # compute custom loss (passing the weights)
+ loss_fct = nn.CrossEntropyLoss(weight=weights)
+
+ loss = loss_fct(logits.view(-1, self.model.config.num_labels), labels.view(-1))
+
+ return (loss, outputs) if return_outputs else loss
+
+ return CustomTrainer
diff --git a/fake-face-detection/fake_face_detection/trainers/custom_trainer_v1.txt b/fake-face-detection/fake_face_detection/trainers/custom_trainer_v1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/fake-face-detection/fake_face_detection/trainers/search_train.py b/fake-face-detection/fake_face_detection/trainers/search_train.py
new file mode 100644
index 0000000000000000000000000000000000000000..e3ecd38018ecfec3fd9ca81add28ec3b106f6814
--- /dev/null
+++ b/fake-face-detection/fake_face_detection/trainers/search_train.py
@@ -0,0 +1,73 @@
+
+from fake_face_detection.metrics.compute_metrics import compute_metrics
+from fake_face_detection.data.collator import fake_face_collator
+from transformers import Trainer, TrainingArguments
+from torch.utils.tensorboard import SummaryWriter
+from torch import nn
+from typing import *
+import numpy as np
+import json
+import os
+
+def train(epochs: int, output_dir: str, config: dict, model: nn.Module, trainer, get_datasets: Callable, log_dir: str = "fake_face_logs", metric = 'accuracy'):
+
+ print("------------------------- Beginning of training")
+
+ # reformat the config integer type
+ for key, value in config.items():
+
+ if isinstance(value, np.int32): config[key] = int(value)
+
+ pretty = json.dumps(config, indent = 4)
+
+ print(f"Current Config: \n {pretty}")
+
+ # recuperate the dataset
+ train_dataset, test_dataset = get_datasets(config['h_flip_p'], config['v_flip_p'], config['gray_scale_p'], config['rotation'])
+
+ # initialize the arguments of the training
+ training_args = TrainingArguments(output_dir,
+ per_device_train_batch_size=config['batch_size'],
+ evaluation_strategy='steps',
+ save_strategy='steps',
+ logging_strategy='steps',
+ num_train_epochs=epochs,
+ fp16=True,
+ save_total_limit=2,
+ remove_unused_columns=True,
+ push_to_hub=False,
+ logging_dir=os.path.join(log_dir, os.path.basename(output_dir)),
+ load_best_model_at_end=True,
+ learning_rate=config['lr'],
+ weight_decay=config['weight_decay']
+ )
+
+ # train the model
+ trainer_ = trainer(
+ model = model,
+ args = training_args,
+ data_collator = fake_face_collator,
+ compute_metrics = compute_metrics,
+ train_dataset = train_dataset,
+ eval_dataset = test_dataset
+ )
+
+ # train the model
+ trainer_.train()
+
+ # evaluate the model and recuperate metrics
+ metrics = trainer_.evaluate(test_dataset)
+
+ # add metrics and config to the hyperparameter panel of tensorboard
+ with SummaryWriter(os.path.join(log_dir, 'hparams')) as logger:
+
+ logger.add_hparams(
+ config, metrics
+ )
+
+ print(metrics)
+
+ print("------------------------- End of training")
+ # recuperate the metric to evaluate
+ return metrics[f'eval_{metric}']
+
diff --git a/fake-face-detection/fake_face_detection/utils/__pycache__/acquisitions.cpython-310.pyc b/fake-face-detection/fake_face_detection/utils/__pycache__/acquisitions.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..dd2f28a21735e4f25fd43177316316665108be3c
Binary files /dev/null and b/fake-face-detection/fake_face_detection/utils/__pycache__/acquisitions.cpython-310.pyc differ
diff --git a/fake-face-detection/fake_face_detection/utils/__pycache__/compute_weights.cpython-310.pyc b/fake-face-detection/fake_face_detection/utils/__pycache__/compute_weights.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..e1433b6ad11b770dd7cf7760f744f3b72d6c0dfc
Binary files /dev/null and b/fake-face-detection/fake_face_detection/utils/__pycache__/compute_weights.cpython-310.pyc differ
diff --git a/fake-face-detection/fake_face_detection/utils/__pycache__/generation.cpython-310.pyc b/fake-face-detection/fake_face_detection/utils/__pycache__/generation.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..7db3cb01c308aead5da15d0917f113e4f30e9464
Binary files /dev/null and b/fake-face-detection/fake_face_detection/utils/__pycache__/generation.cpython-310.pyc differ
diff --git a/fake-face-detection/fake_face_detection/utils/__pycache__/sampling.cpython-310.pyc b/fake-face-detection/fake_face_detection/utils/__pycache__/sampling.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..fa050c4342b8a2cbbf87f30af96d0d99d5905341
Binary files /dev/null and b/fake-face-detection/fake_face_detection/utils/__pycache__/sampling.cpython-310.pyc differ
diff --git a/fake-face-detection/fake_face_detection/utils/acquisitions.py b/fake-face-detection/fake_face_detection/utils/acquisitions.py
new file mode 100644
index 0000000000000000000000000000000000000000..bfb966e6bcbc37a914a4ae22f20a6dabc28ac36d
--- /dev/null
+++ b/fake-face-detection/fake_face_detection/utils/acquisitions.py
@@ -0,0 +1,36 @@
+from sklearn.gaussian_process import GaussianProcessRegressor
+from scipy.stats import norm
+from typing import *
+
+def PI_acquisition(X: List, X_prime: List, model: GaussianProcessRegressor, maximize: bool = True):
+ """Acquisition function for bayesian optimization using probability of improvement
+
+ Args:
+ X (List): A list containing the input data
+ X_prime (List): A list containing the generate samples
+ model (GaussianProcessRegressor): The gaussian model to use
+ maximize (bool, optional): A boolean value indicating the optimization objective. Defaults to True.
+
+ Returns:
+ List: A list containing the probabilities
+ """
+
+ # let us predict the means for the input data
+ mu = model.predict(X)
+
+ # let us calculate the means and standard deviation for the random samples
+ mu_e, std_e = model.predict(X_prime, return_std=True)
+
+ if not maximize:
+
+ mu = -mu
+
+ mu_e = -mu_e
+
+ # let us take the best mean
+ mu_best = max(mu)
+
+ # let us calculate the probability of improvement
+ probs = norm.cdf((mu_e - mu_best) / std_e)
+
+ return probs
diff --git a/fake-face-detection/fake_face_detection/utils/compute_weights.py b/fake-face-detection/fake_face_detection/utils/compute_weights.py
new file mode 100644
index 0000000000000000000000000000000000000000..b90b26fa9667273bc05ba7d778cc255eca36d6c1
--- /dev/null
+++ b/fake-face-detection/fake_face_detection/utils/compute_weights.py
@@ -0,0 +1,21 @@
+import sklearn.utils as skl
+from typing import *
+import numpy as np
+
+def compute_weights(samples: List[int]):
+ """Compute the weights with the 'balanced' method
+
+ Args:
+ samples (List[int]): The samples: A list of integers
+
+ Returns:
+ numpy.ndarray: A array containing the weights
+ """
+
+ # get unique classes
+ classes = np.unique(samples)
+
+ # calculate the weights with the balanced method
+ weights = skl.class_weight.compute_class_weight('balanced', classes=classes, y = samples)
+
+ return weights
diff --git a/fake-face-detection/fake_face_detection/utils/display_pil.py b/fake-face-detection/fake_face_detection/utils/display_pil.py
new file mode 100644
index 0000000000000000000000000000000000000000..a29b76f8c7502fb6b866f39ad4f9b13a9328f3c8
--- /dev/null
+++ b/fake-face-detection/fake_face_detection/utils/display_pil.py
@@ -0,0 +1,43 @@
+from PIL.JpegImagePlugin import JpegImageFile
+from PIL import ImageDraw
+from PIL import Image
+from typing import *
+
+def display(images: List[JpegImageFile], labels: List[str], w: int = 300, h: int = 200, left_color: str = "white", right_color: str = "white"):
+ """Display a dual image
+
+ Args:
+ images (List[JpegImageFile]): A list containing two images
+ labels (List[str]): The labels of the images
+ w (int, optional): The width. Defaults to 300.
+ h (int, optional): The height. Defaults to 200.
+ left_color (str, optional): The color of left label. Defaults to "white".
+ right_color (str, optional): The color of the right label. Defaults to "white".
+
+ Returns:
+ PIL.Image: A pillow image
+ """
+
+ # define a grid
+ grid = Image.new('RGB', size=(w, h))
+
+ # draw the grid
+ draw = ImageDraw.Draw(grid, mode='RGB')
+
+ # define the second box
+ box = (w // 2, 0)
+
+ # define the size of the images
+ size = (w // 2, h)
+
+ # add images to the grid
+ grid.paste(images[0].resize(size))
+
+ grid.paste(images[1].resize(size), box = box)
+
+ # draw labels
+ draw.text((0, 0), labels[0], fill=left_color)
+
+ draw.text(box, labels[1], fill=right_color)
+
+ return grid
diff --git a/fake-face-detection/fake_face_detection/utils/downscale_image.py b/fake-face-detection/fake_face_detection/utils/downscale_image.py
new file mode 100644
index 0000000000000000000000000000000000000000..8dd49666b14f7faabdeb015420d5640b22fc6a42
--- /dev/null
+++ b/fake-face-detection/fake_face_detection/utils/downscale_image.py
@@ -0,0 +1,46 @@
+
+from PIL.JpegImagePlugin import JpegImageFile
+from PIL import Image
+from math import *
+import numpy as np
+import itertools
+
+def downscale_image(image: JpegImageFile, size: tuple = (224, 224)):
+
+ assert image.size[0] % size[0] == 0 and image.size[1] % size[1] == 0
+
+ # get box size
+ height, width = int(image.size[0] / size[0]), int(image.size[1] / size[1])
+
+ print(f"Height and width of each box: {(height, width)}")
+
+ # we will concatenate the patches over the height axis (axis 0)
+ patches = []
+
+ for j in range(0, size[1] * width, width):
+
+ # we must recuperate each width division in order to concatenate the results (on axis 1)
+ h_div = []
+
+ for i in range(0, size[0] * height, height):
+
+ box = (j, i, j + width, i + height)
+
+ current_box = image.crop(box)
+
+ # let us convert the box to a numpy array and calculate the mean
+ current_box = np.array(current_box).mean(axis = (0, 1))[np.newaxis, np.newaxis, :]
+
+ # add to h_div
+ h_div.append(current_box)
+
+ # concatenate over width axis
+ patches.append(np.concatenate(h_div, axis = 0))
+
+ # concatenate over the height axis and transform to a pillow image
+ image = Image.fromarray(np.uint8(np.concatenate(patches, axis = 1)))
+
+ return image
+
+
+
diff --git a/fake-face-detection/fake_face_detection/utils/generation.py b/fake-face-detection/fake_face_detection/utils/generation.py
new file mode 100644
index 0000000000000000000000000000000000000000..242fe7a4a034abe348e86f4a802938afc055c96a
--- /dev/null
+++ b/fake-face-detection/fake_face_detection/utils/generation.py
@@ -0,0 +1,28 @@
+from fake_face_detection.utils.acquisitions import PI_acquisition
+from fake_face_detection.utils.sampling import get_random_samples
+from sklearn.gaussian_process import GaussianProcessRegressor
+from typing import *
+import numpy as np
+
+def PI_generate_sample(X: Iterable, model: GaussianProcessRegressor, search_spaces: dict, n_tests: int = 100, maximize: bool = True):
+ """Generate new samples with the probability of improvement
+
+ Args:
+ X (Iterable): The list of input data
+ model (GaussianProcessRegressor): The model to train
+ search_spaces (dict): The search spaces
+ n_tests (int, optional): The number of random samples to test. Defaults to 100.
+ maximize (bool, optional): The optimization strategy. If maximize == True -> maximize, else -> minimize. Defaults to True.
+
+ Returns:
+ List: The new sample
+ """
+
+ # let us create random samples
+ X_prime = [list(get_random_samples(search_spaces).values()) for i in range(n_tests)]
+
+ # let us recuperate the probabilities from the acquisition function
+ probs = PI_acquisition(X, X_prime, model, maximize = maximize)
+
+ # let us return the best sample
+ return X_prime[np.argmax(probs)]
diff --git a/fake-face-detection/fake_face_detection/utils/get_patches.py b/fake-face-detection/fake_face_detection/utils/get_patches.py
new file mode 100644
index 0000000000000000000000000000000000000000..60ba64a000e7e693faff1907410322d3e7fa79cb
--- /dev/null
+++ b/fake-face-detection/fake_face_detection/utils/get_patches.py
@@ -0,0 +1,35 @@
+
+from PIL.JpegImagePlugin import JpegImageFile
+from math import *
+import itertools
+
+def get_patches(image: JpegImageFile, n_patches: int):
+
+ # get height and width of the image
+ height, width = image.size
+
+ # let us calculate the number of divisions to make to the width and height of the image
+ n_patch = int(sqrt(n_patches))
+
+ patch_h = int(height / n_patch) # notice that the height must be divisible by the number of divisions
+
+ patch_w = int(width / n_patch) # notice that the width must be divisible by the number of divisions
+
+ print(f"Height and width of each patch: {(patch_h, patch_w)}")
+
+ # we will find the first coordinates of the boxes with product function of itertools
+ first_coordinates = list(itertools.product(range(0, patch_h * n_patch, patch_h),
+ range(0, patch_w * n_patch, patch_w)))
+
+ patches = []
+
+ for pos1, pos2 in first_coordinates:
+
+ box = (pos2, pos1, pos2 + patch_w, pos1 + patch_h)
+
+ patches.append(image.crop(box))
+
+ return patches
+
+
+
diff --git a/fake-face-detection/fake_face_detection/utils/sampling.py b/fake-face-detection/fake_face_detection/utils/sampling.py
new file mode 100644
index 0000000000000000000000000000000000000000..e1714ee6a21633eefaacdcdcb25cd6991063abe6
--- /dev/null
+++ b/fake-face-detection/fake_face_detection/utils/sampling.py
@@ -0,0 +1,70 @@
+from typing import *
+import numpy as np
+import random
+
+def get_random_sample(search_space: dict, p: Union[List[float], None] = None):
+ """Recuperate a random sample
+
+ Args:
+ search_space (dict): A dictionary defining the search space
+
+ Raises:
+ ValueError: 'min' and 'max' can only be numbers
+ KeyError: Only the following keys can be provided {'min', 'max'}, {'value'}, {'values'} or {'values', 'p'}
+
+ Returns:
+ Union[int, float, str]: The random sample
+ """
+
+ keys = set(search_space)
+
+ if keys == set(['min', 'max']):
+
+ assert search_space['min'] < search_space['max']
+
+ if isinstance(search_space['min'], int) and isinstance(search_space['max'], int):
+
+ return random.randint(search_space['min'], search_space['max'])
+
+ elif isinstance(search_space['min'], float) or isinstance(search_space, float):
+
+ return random.uniform(search_space['min'], search_space['max'])
+
+ else:
+
+ raise ValueError("You can only provide int or float values with min max!")
+
+ elif keys == set(['value']):
+
+ return search_space['value']
+
+ elif keys.issubset(set(['values'])):
+
+ p = None
+
+ if 'p' in keys: p = search_space['p']
+
+ return np.random.choice(search_space['values'], size = (1), p = p)[0]
+
+ else:
+
+ raise KeyError("You didn't provide right keys! Try between: {'min', 'max'}, {'value'}, {'values'} or {'values', 'p'}")
+
+
+def get_random_samples(search_spaces: dict):
+ """Recuperate random samples from a dictionary of search spaces
+
+ Args:
+ search_spaces (dict): A dictionary where the keys are the hyperparameter names and the values are the search spaces
+
+ Returns:
+ dict: A dictionary where the keys are the hyperparameter names and the values are the sampled values from the search spaces
+ """
+
+ samples = {}
+
+ for search_space in search_spaces:
+
+ samples[search_space] = get_random_sample(search_spaces[search_space])
+
+ return samples
diff --git a/fake-face-detection/fake_face_detection/utils/split_data.py b/fake-face-detection/fake_face_detection/utils/split_data.py
new file mode 100644
index 0000000000000000000000000000000000000000..b79b0028087968d78637b74f81235dc2a2c3096c
--- /dev/null
+++ b/fake-face-detection/fake_face_detection/utils/split_data.py
@@ -0,0 +1,54 @@
+
+from sklearn.model_selection import train_test_split
+from glob import glob
+import shutil
+import os
+
+def split_data_from_dir(path: str, new_path: str, test_size: float = 0.2, valid_size: float = 0.2, force_placement: bool = True):
+
+ assert test_size > 0 and test_size < 0.5 and valid_size >= 0 and valid_size < 0.5
+
+ assert os.path.exists(path) and os.path.isdir(path)
+
+ assert os.path.exists(new_path) and os.path.isdir(new_path)
+
+ # let us recuperate the images' path from the directory
+ dirs = os.listdir(path)
+
+ # let us recuperate the image of each directory and split the images before making them in new directories
+ for dir_ in dirs:
+
+ # let us recuperate the path of the directory
+ dir_path = os.path.join(path, dir_)
+
+ # let us verify if it is truly a directory before making the following processes
+ if os.path.isdir(dir_path):
+
+ # let us recuperate the files' paths in it
+ images = os.listdir(dir_path)
+
+ # let us split the data between training and test set
+ train_set, test_set = train_test_split(images, test_size = test_size)
+
+ # let us split the training set between training and validation set
+ train_set, valid_set = train_test_split(train_set, test_size = valid_size)
+
+ # let us create the train test and valid directories
+ if not os.path.exists(os.path.join(os.path.join(new_path, 'train'), dir_)) or\
+ not os.path.exists(os.path.join(os.path.join(new_path, 'test'), dir_)) or\
+ not os.path.exists(os.path.join(os.path.join(new_path, 'valid'), dir_)):
+
+ [os.makedirs(os.path.join(os.path.join(new_path, set_), dir_)) for set_ in ['train', 'test', 'valid']]
+
+ elif not force_placement:
+
+ raise OSError(f"One of the training, validation or testing directory for the class {dir_} already exists! Enable the force_placement argument if you want to use already created directories.")
+
+ # let us place the sets in their locations
+ [shutil.copyfile(os.path.join(dir_path, image), os.path.join(os.path.join(os.path.join(new_path, 'train'), dir_), image)) for image in train_set]
+ [shutil.copyfile(os.path.join(dir_path, image), os.path.join(os.path.join(os.path.join(new_path, 'test'), dir_), image)) for image in test_set]
+ [shutil.copyfile(os.path.join(dir_path, image), os.path.join(os.path.join(os.path.join(new_path, 'valid'), dir_), image)) for image in valid_set]
+
+ print(f"All the file in {path} was copied in {new_path} successfully!")
+
+
diff --git a/fake-face-detection/fake_face_detection/utils/visualize_images.py b/fake-face-detection/fake_face_detection/utils/visualize_images.py
new file mode 100644
index 0000000000000000000000000000000000000000..ab97a5fe330399d647ad0f6588e58243121bf977
--- /dev/null
+++ b/fake-face-detection/fake_face_detection/utils/visualize_images.py
@@ -0,0 +1,79 @@
+from torch.utils.tensorboard import SummaryWriter
+from PIL.JpegImagePlugin import JpegImageFile
+import matplotlib.pyplot as plt
+from typing import *
+from math import *
+import numpy as np
+import random
+import torch
+import os
+
+# use a style with no grid
+plt.style.use("_mpl-gallery-nogrid")
+
+def visualize_images(images_dict: Dict[str, Iterable[Union[JpegImageFile, torch.Tensor, np.ndarray]]],
+ log_directory: str = "fake_face_logs",
+ n_images: int = 40,
+ figsize = (15, 15),
+ seed: Union[int, None] = None):
+
+ assert len(images_dict) > 0
+
+ assert isinstance(images_dict, dict)
+
+ # add seed
+ random.seed(seed)
+
+ # verify if we must add a title for each image
+ add_titles = len(images_dict) > 1
+
+ images_ = []
+
+ # modify the dictionary to obtain a tuple of images with their corresponding tags
+ for key in images_dict:
+
+ for image in images_dict[key]:
+
+ images_.append((key, image))
+
+ # we take the number of images in the list if n_images is larger
+ if n_images > len(images_): n_images = len(images_)
+
+ # choose random images
+ images = random.choices(images_, k = n_images)
+
+ if isinstance(images[0], JpegImageFile):
+
+ images = [np.array(image[1]) for image in images if type(image[1]) in [JpegImageFile, torch.Tensor, np.ndarray]]
+
+ # calculate the number of rows and columns
+ n_rows = ceil(sqrt(n_images))
+
+ fig, axs = plt.subplots(nrows=n_rows, ncols=n_rows, figsize = figsize)
+
+ # flat the axes
+ axs = axs.flat
+
+ # trace images
+ for i in range(n_images):
+
+ axs[i].imshow(images[i][1], interpolation = "nearest")
+
+ if add_titles: axs[i].set_title(images[i][0])
+
+ axs[i].axis('off')
+
+ # add padding to the figure
+ fig.tight_layout()
+
+ # deleting no necessary plots
+ [fig.delaxes(axs[i]) for i in range(n_images, n_rows * n_rows)]
+
+ # add figure to tensorboard
+ with SummaryWriter(os.path.join(log_directory, "images")) as writer:
+
+ # identify the tag
+ tag = "_".join(list(images_dict)) if add_titles else list(images_dict.keys())[0]
+
+ writer.add_figure(tag = tag, figure = fig)
+
diff --git a/fake-face-detection/setup.py b/fake-face-detection/setup.py
new file mode 100644
index 0000000000000000000000000000000000000000..5879c22b146a5a832b38ffb59ed8962af084a799
--- /dev/null
+++ b/fake-face-detection/setup.py
@@ -0,0 +1,9 @@
+from setuptools import setup
+
+setup(
+ name="fake_face_detection",
+ version="0.0.1",
+ author="Oumar Kane",
+ author_email="oumar.kane@univ-thies.sn",
+ description="This package contains the main functions and classes used to achieve the project of detecting if a face's image is true or not.",
+)
\ No newline at end of file
diff --git a/requirements.txt b/requirements.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b78b35a8596c550abf1e1f6bf1a10e9af9ea3c49
--- /dev/null
+++ b/requirements.txt
@@ -0,0 +1,327 @@
+absl-py==1.3.0
+accessible-pygments==0.0.4
+aiohttp==3.8.3
+aiosignal==1.3.1
+alabaster==0.7.13
+alembic==1.10.4
+altair==4.2.2
+appdirs==1.4.4
+argcomplete==1.10.3
+asttokens==2.1.0
+async-timeout==4.0.2
+atomicwrites==1.4.1
+attrs==21.4.0
+audioread==3.0.0
+Babel==2.12.1
+backcall==0.2.0
+beautifulsoup4==4.8.2
+black==22.12.0
+bleach==6.0.0
+blinker==1.6.2
+blis==0.7.9
+boto3==1.26.118
+botocore==1.29.118
+cachelib==0.9.0
+cachetools==5.2.0
+catalogue==2.0.8
+certifi==2022.9.24
+cffi==1.15.1
+chardet==3.0.4
+charset-normalizer==2.0.12
+click==8.1.3
+click-default-group==1.2.2
+cloup==0.13.1
+cmaes==0.9.1
+colorama==0.4.6
+colorlog==6.7.0
+colour==0.1.5
+compressed-rtf==1.0.6
+confection==0.0.4
+contourpy==1.0.6
+cryptography==40.0.1
+cssselect==1.2.0
+cssselect2==0.7.0
+cvxpy==1.2.2
+cycler==0.11.0
+cymem==2.0.7
+Cython==0.29.28
+datasets==2.11.0
+debugpy==1.6.3
+decorator==5.1.1
+defusedxml==0.7.1
+dill==0.3.6
+distlib==0.3.6
+docker-pycreds==0.4.0
+docutils==0.19
+docx2txt==0.8
+earthengine-api==0.1.331
+ebcdic==1.1.1
+ecos==2.0.10
+efficientnet-pytorch==0.7.1
+entrypoints==0.4
+evaluate==0.4.0
+executing==1.2.0
+extract-msg==0.28.7
+fastjsonschema==2.16.3
+filelock==3.12.0
+fire==0.4.0
+Flask==2.2.2
+Flask-Session==0.4.0
+Flask-SQLAlchemy==3.0.2
+fonttools==4.38.0
+fr-core-news-lg @ https://github.com/explosion/spacy-models/releases/download/fr_core_news_lg-3.5.0/fr_core_news_lg-3.5.0-py3-none-any.whl
+fr-core-news-md @ https://github.com/explosion/spacy-models/releases/download/fr_core_news_md-3.5.0/fr_core_news_md-3.5.0-py3-none-any.whl
+frozenlist==1.3.3
+fsspec==2022.11.0
+furo==2023.5.20
+future==0.18.2
+gdown==4.7.1
+gensim==4.2.0
+gitdb==4.0.10
+GitPython==3.1.31
+glcontext==2.3.7
+google-api-core==2.10.2
+google-api-python-client==2.65.0
+google-auth==2.14.1
+google-auth-httplib2==0.1.0
+google-auth-oauthlib==0.4.6
+google-cloud-core==2.3.2
+google-cloud-storage==2.6.0
+google-crc32c==1.5.0
+google-resumable-media==2.4.0
+googleapis-common-protos==1.56.4
+greenlet==2.0.1
+groundwork-sphinx-theme==1.1.1
+grpcio==1.50.0
+httplib2==0.21.0
+huggingface-hub==0.12.0
+idna==3.4
+imagesize==1.4.1
+IMAPClient==2.1.0
+importlib-metadata==6.6.0
+ipykernel==6.17.1
+ipython==8.6.0
+isosurfaces==0.1.0
+itsdangerous==2.1.2
+jedi==0.18.1
+Jinja2==3.1.2
+jmespath==1.0.1
+joblib==1.2.0
+jsonschema==4.17.3
+jupyter_client==7.4.5
+jupyter_core==5.3.0
+jupyterlab-pygments==0.2.2
+kiwisolver==1.4.4
+langcodes==3.3.0
+librosa==0.9.2
+lightning-utilities==0.3.0
+livereload==2.6.3
+llvmlite==0.39.1
+lxml==4.9.2
+Mako==1.2.4
+manim==0.17.2
+ManimPango==0.4.3
+mapbox-earcut==1.0.1
+mapping==0.1.6
+Markdown==3.4.1
+markdown-it-py==1.1.0
+MarkupSafe==2.1.1
+matplotlib==3.6.2
+matplotlib-inline==0.1.6
+mdformat==0.7.16
+mdformat_deflist==0.1.2
+mdformat_frontmatter==2.0.1
+mdformat_myst==0.1.5
+mdformat_tables==0.4.1
+mdit-py-plugins==0.3.5
+mdurl==0.1.2
+mistune==2.0.5
+moderngl==5.7.4
+moderngl-window==2.4.2
+more-itertools==8.13.0
+msgpack==1.0.5
+multidict==6.0.2
+multipledispatch==0.6.0
+multiprocess==0.70.14
+munch==2.5.0
+murmurhash==1.0.9
+mypy-extensions==0.4.3
+myst-parser==1.0.0
+nbclient==0.8.0
+nbconvert==7.4.0
+nbformat==5.8.0
+nbsphinx==0.9.2
+nest-asyncio==1.5.6
+networkx==2.8.8
+nlpaug==1.1.11
+nltk==3.7
+numba==0.56.4
+numpy==1.23.4
+oauthlib==3.2.2
+olefile==0.46
+opencv-python==4.7.0.68
+optuna==3.1.1
+osqp==0.6.2.post5
+packaging==21.3
+pandarallel==1.6.3
+pandas==1.5.1
+pandocfilters==1.5.0
+parso==0.8.3
+pathspec==0.10.3
+pathtools==0.1.2
+pathy==0.10.1
+pdf2image==1.16.3
+pdfminer.six==20191110
+pdfquery==0.4.3
+pickleshare==0.7.5
+Pillow==9.3.0
+piqa==1.2.2
+platformdirs==3.2.0
+plotly==5.14.1
+pluggy==0.13.1
+pooch==1.6.0
+portalocker==2.7.0
+preshed==3.0.8
+pretrainedmodels==0.7.4
+prompt-toolkit==3.0.32
+protobuf==3.20.3
+psutil==5.9.4
+pure-eval==0.2.2
+py==1.11.0
+pyarrow==11.0.0
+pyasn1==0.4.8
+pyasn1-modules==0.2.8
+pycairo==1.23.0
+pycparser==2.21
+pycryptodome==3.17
+pydantic==1.10.7
+pydata-sphinx-theme==0.13.3
+pydeck==0.8.1b0
+pydub==0.25.1
+pyglet==2.0.4
+Pygments==2.14.0
+Pympler==1.0.1
+pyparsing==3.0.9
+PyPDF2==3.0.1
+pyquery==2.0.0
+pyrr==0.10.3
+pyrsistent==0.19.3
+PySocks==1.7.1
+pytesseract==0.3.10
+pytest==5.4.3
+python-dateutil==2.8.2
+python-json-logger==2.0.7
+python-pptx==0.6.21
+pytorch-lightning==1.8.1
+pytorch-pretrained-bert==0.6.2
+pytz==2022.6
+pytz-deprecation-shim==0.1.0.post0
+pywin32==305
+PyYAML==6.0
+pyzmq==24.0.1
+qdldl==0.1.5.post2
+ray==2.3.1
+regex==2022.10.31
+reportlab==3.6.12
+requests==2.27.1
+requests-oauthlib==1.3.1
+resampy==0.4.2
+responses==0.18.0
+rich==13.3.1
+roman==4.0
+rsa==4.9
+rst-to-myst==0.3.4
+ruamel.yaml==0.17.28
+ruamel.yaml.clib==0.2.7
+s3transfer==0.6.0
+sacrebleu==2.3.1
+scikit-learn==1.1.3
+scipy==1.9.3
+screeninfo==0.8.1
+scs==3.2.2
+seaborn==0.12.1
+segmentation-models-pytorch==0.3.2
+sentry-sdk==1.21.0
+setproctitle==1.3.2
+setuptools-scm==7.0.5
+six==1.12.0
+skia-pathops==0.7.4
+smart-open==6.2.0
+smmap==5.0.0
+snowballstemmer==2.2.0
+sortedcontainers==2.4.0
+soundfile==0.11.0
+soupsieve==2.4
+spacy==3.5.1
+spacy-legacy==3.0.12
+spacy-loggers==1.0.4
+SpeechRecognition==3.8.1
+Sphinx==6.2.1
+sphinx-autobuild==2021.3.14
+sphinx-basic-ng==1.0.0b1
+sphinx-book-theme==1.0.1
+sphinx-proof==0.1.3
+sphinx-rtd-theme==1.2.1
+sphinxcontrib-applehelp==1.0.4
+sphinxcontrib-devhelp==1.0.2
+sphinxcontrib-htmlhelp==2.0.1
+sphinxcontrib-jquery==4.1
+sphinxcontrib-jsmath==1.0.1
+sphinxcontrib-qthelp==1.0.3
+sphinxcontrib-serializinghtml==1.1.5
+sphynx==0.0.3
+SQLAlchemy==1.4.44
+srsly==2.4.6
+srt==3.5.2
+stack-data==0.6.1
+streamlit==1.22.0
+structlog==21.5.0
+svgelements==1.9.0
+svglib==1.5.1
+tabulate==0.9.0
+tenacity==8.2.2
+tensorboard==2.11.0
+tensorboard-data-server==0.6.1
+tensorboard-plugin-wit==1.8.1
+tensorboardX==2.6
+termcolor==2.1.0
+textract==1.6.5
+thinc==8.1.9
+threadpoolctl==3.1.0
+timm==0.6.12
+tinycss2==1.2.1
+tokenize-rt==5.0.0
+tokenizers==0.13.3
+toml==0.10.2
+tomli==2.0.1
+toolz==0.12.0
+torch==1.13.0+cu116
+torch-lr-finder==0.2.1
+torchaudio==0.13.0+cu116
+torchmetrics==0.10.2
+torchsummary==1.5.1
+torchvision==0.14.0+cu116
+tornado==6.2
+tqdm==4.64.1
+traitlets==5.5.0
+transformers==4.28.1
+typer==0.7.0
+typing_extensions==4.4.0
+tzdata==2023.3
+tzlocal==4.3
+uritemplate==4.1.1
+urllib3==1.26.15
+validators==0.20.0
+virtualenv==20.22.0
+wandb==0.15.2
+wasabi==1.1.1
+watchdog==2.2.1
+wcwidth==0.2.5
+webencodings==0.5.1
+Werkzeug==2.2.2
+wordcloud==1.8.2.2
+xlrd==1.2.0
+XlsxWriter==3.0.9
+xxhash==3.2.0
+yarl==1.8.1
+zipp==3.15.0