{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "c:\\Users\\debonair\\anaconda3\\lib\\site-packages\\numpy\\_distributor_init.py:30: UserWarning: loaded more than 1 DLL from .libs:\n", "c:\\Users\\debonair\\anaconda3\\lib\\site-packages\\numpy\\.libs\\libopenblas.XWYDX2IKJW2NMTWSFYNGFUWKQU3LYTCZ.gfortran-win_amd64.dll\n", "c:\\Users\\debonair\\anaconda3\\lib\\site-packages\\numpy\\.libs\\libopenblas64__v0.3.23-gcc_10_3_0.dll\n", " warnings.warn(\"loaded more than 1 DLL from .libs:\"\n", "c:\\Users\\debonair\\anaconda3\\lib\\site-packages\\tensorflow_addons\\utils\\tfa_eol_msg.py:23: UserWarning: \n", "\n", "TensorFlow Addons (TFA) has ended development and introduction of new features.\n", "TFA has entered a minimal maintenance and release mode until a planned end of life in May 2024.\n", "Please modify downstream libraries to take dependencies from other repositories in our TensorFlow community (e.g. Keras, Keras-CV, and Keras-NLP). \n", "\n", "For more information see: https://github.com/tensorflow/addons/issues/2807 \n", "\n", " warnings.warn(\n" ] } ], "source": [ "import cv2\n", "import numpy as np\n", "from PIL import Image\n", "import tensorflow as tf\n", "import tensorflow_addons\n", "import moviepy.editor as mp\n", "from facenet_pytorch import MTCNN" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "ename": "OSError", "evalue": "No file or directory found at FINAL-EFFICIENTNETV2-B0", "output_type": "error", "traceback": [ "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[1;31mOSError\u001b[0m Traceback (most recent call last)", "\u001b[1;32m~\\AppData\\Local\\Temp\\ipykernel_25172\\3936866724.py\u001b[0m in \u001b[0;36m\u001b[1;34m\u001b[0m\n\u001b[0;32m 2\u001b[0m \u001b[0mmtcnn\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mMTCNN\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mmargin\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;36m14\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mkeep_all\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;32mTrue\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mfactor\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;36m0.7\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mdevice\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;34m'cpu'\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 3\u001b[0m \u001b[1;31m#Load model.\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m----> 4\u001b[1;33m \u001b[0mmodel\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mtf\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mkeras\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mmodels\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mload_model\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34m\"FINAL-EFFICIENTNETV2-B0\"\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m", "\u001b[1;32m~\\AppData\\Roaming\\Python\\Python39\\site-packages\\keras\\utils\\traceback_utils.py\u001b[0m in \u001b[0;36merror_handler\u001b[1;34m(*args, **kwargs)\u001b[0m\n\u001b[0;32m 68\u001b[0m \u001b[1;31m# To get the full stack trace, call:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 69\u001b[0m \u001b[1;31m# `tf.debugging.disable_traceback_filtering()`\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 70\u001b[1;33m \u001b[1;32mraise\u001b[0m \u001b[0me\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mwith_traceback\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mfiltered_tb\u001b[0m\u001b[1;33m)\u001b[0m \u001b[1;32mfrom\u001b[0m \u001b[1;32mNone\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 71\u001b[0m \u001b[1;32mfinally\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 72\u001b[0m \u001b[1;32mdel\u001b[0m \u001b[0mfiltered_tb\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", "\u001b[1;32m~\\AppData\\Roaming\\Python\\Python39\\site-packages\\keras\\saving\\save.py\u001b[0m in \u001b[0;36mload_model\u001b[1;34m(filepath, custom_objects, compile, options)\u001b[0m\n\u001b[0;32m 224\u001b[0m \u001b[1;32mif\u001b[0m \u001b[0misinstance\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mfilepath_str\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mstr\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 225\u001b[0m \u001b[1;32mif\u001b[0m \u001b[1;32mnot\u001b[0m \u001b[0mtf\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mio\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mgfile\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mexists\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mfilepath_str\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 226\u001b[1;33m raise IOError(\n\u001b[0m\u001b[0;32m 227\u001b[0m \u001b[1;34mf\"No file or directory found at {filepath_str}\"\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 228\u001b[0m )\n", "\u001b[1;31mOSError\u001b[0m: No file or directory found at FINAL-EFFICIENTNETV2-B0" ] } ], "source": [ "# Load face detector\n", "mtcnn = MTCNN(margin=14, keep_all=True, factor=0.7, device='cpu')\n", "#Load model.\n", "model = tf.keras.models.load_model(\"FINAL-EFFICIENTNETV2-B0\")" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "#Face Detection function, Reference: (Timesler, 2020);\n", "class DetectionPipeline:\n", " \"\"\"Pipeline class for detecting faces in the frames of a video file.\"\"\"\n", "\n", " def __init__(self, detector, n_frames=None, batch_size=60, resize=None, input_modality = 'video'):\n", " \"\"\"Constructor for DetectionPipeline class.\n", "\n", " Keyword Arguments:\n", " n_frames {int} -- Total number of frames to load. These will be evenly spaced\n", " throughout the video. If not specified (i.e., None), all frames will be loaded.\n", " (default: {None})\n", " batch_size {int} -- Batch size to use with MTCNN face detector. (default: {32})\n", " resize {float} -- Fraction by which to resize frames from original prior to face\n", " detection. A value less than 1 results in downsampling and a value greater than\n", " 1 result in upsampling. (default: {None})\n", " \"\"\"\n", " self.detector = detector\n", " self.n_frames = n_frames\n", " self.batch_size = batch_size\n", " self.resize = resize\n", " self.input_modality = input_modality\n", "\n", " def __call__(self, filename):\n", " \"\"\"Load frames from an MP4 video and detect faces.\n", "\n", " Arguments:\n", " filename {str} -- Path to video.\n", " \"\"\"\n", " # Create video reader and find length\n", " if self.input_modality == 'video':\n", " print('Input modality is video.')\n", " v_cap = cv2.VideoCapture(filename)\n", " v_len = int(v_cap.get(cv2.CAP_PROP_FRAME_COUNT))\n", "\n", " # Pick 'n_frames' evenly spaced frames to sample\n", " if self.n_frames is None:\n", " sample = np.arange(0, v_len)\n", " else:\n", " sample = np.linspace(0, v_len - 1, self.n_frames).astype(int)\n", "\n", " # Loop through frames\n", " faces = []\n", " frames = []\n", " for j in range(v_len):\n", " success = v_cap.grab()\n", " if j in sample:\n", " # Load frame\n", " success, frame = v_cap.retrieve()\n", " if not success:\n", " continue\n", " frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)\n", " # frame = Image.fromarray(frame)\n", "\n", " # Resize frame to desired size\n", " if self.resize is not None:\n", " frame = frame.resize([int(d * self.resize) for d in frame.size])\n", " frames.append(frame)\n", "\n", " # When batch is full, detect faces and reset frame list\n", " if len(frames) % self.batch_size == 0 or j == sample[-1]:\n", "\n", " boxes, probs = self.detector.detect(frames)\n", "\n", " for i in range(len(frames)):\n", "\n", " if boxes[i] is None:\n", " faces.append(face2) #append previous face frame if no face is detected\n", " continue\n", "\n", " box = boxes[i][0].astype(int)\n", " frame = frames[i]\n", " face = frame[box[1]:box[3], box[0]:box[2]]\n", "\n", " if not face.any():\n", " faces.append(face2) #append previous face frame if no face is detected\n", " continue\n", "\n", " face2 = cv2.resize(face, (224, 224))\n", "\n", " faces.append(face2)\n", "\n", " frames = []\n", "\n", " v_cap.release()\n", " return faces\n", "\n", " elif self.input_modality == 'image':\n", " print('Input modality is image.')\n", " #Perform inference for image modality.\n", " image = cv2.imread(filename)\n", " image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)\n", " boxes, probs = self.detector.detect(image)\n", "\n", " if boxes is None:\n", " print('No faces found')\n", "\n", " box = boxes[0].astype(int)\n", " face = image[box[1]:box[3], box[0]:box[2]]\n", " face = cv2.resize(face, (224, 224))\n", "\n", " if not face.any():\n", " print(\"No faces found...\")\n", "\n", " return face\n", " \n", " else:\n", " raise ValueError(\"Invalid input modality. Must be either 'video' or image\")" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "detection_video_pipeline = DetectionPipeline(detector=mtcnn, n_frames=2, batch_size=1, input_modality='video')\n", "def deepfakes_video_predict(input_video):\n", "\n", " faces = detection_video_pipeline(input_video)\n", " total = 0\n", " real = 0\n", " fake = 0\n", "\n", " for face in faces:\n", "\n", " face2 = face/255\n", " pred = model.predict(np.expand_dims(face2, axis=0))[0]\n", " total+=1\n", "\n", " pred2 = pred[1]\n", "\n", " if pred2 > 0.5:\n", " fake+=1\n", " else:\n", " real+=1\n", "\n", " fake_ratio = fake/total\n", "\n", " text =\"\"\n", " text2 = \"Deepfakes Confidence: \" + str(fake_ratio*100) + \"%\"\n", "\n", " if fake_ratio >= 0.5:\n", " text = \"The video is FAKE.\"\n", " else:\n", " text = \"The video is REAL.\"\n", "\n", " return text, text2\n", "\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "detection_image_pipeline = DetectionPipeline(detector=mtcnn, batch_size = 1, input_modality = 'image')\n", "def deepfakes_image_predict(input_image):\n", " faces = detection_image_pipeline(input_image)\n", " face2 = faces/255\n", " pred = model.predict(np.expand_dims(face2, axis = 0))[0]\n", " real, fake = pred[0], pred[1]\n", " if real > 0.5:\n", " text = \"The image is REAL.\"\n", " text2 = \"Deepfakes Confidence: \" + str(round(real*100, 3)) + \"%\"\n", " else:\n", " text = \"The image is FAKE.\"\n", " text2 = \"Deepfakes Confidence: \" + str(round(fake*100, 3)) + \"%\"\n", " return text, text2" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Input modality is image.\n", "1/1 [==============================] - 0s 75ms/step\n", "('The video is FAKE.', 'Deepfakes Confidence: 99.957%')\n", "Input modality is image.\n", "1/1 [==============================] - 0s 85ms/step\n", "('The video is REAL.', 'Deepfakes Confidence: 99.992%')\n" ] } ], "source": [ "image_res = deepfakes_image_predict('fake_image.jpg')\n", "print(image_res)\n", "\n", "image_res = deepfakes_image_predict('lady.jpg')\n", "print(image_res)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Input modality is video.\n", "1/1 [==============================] - 0s 80ms/step\n", "1/1 [==============================] - 0s 71ms/step\n", "('The video is FAKE.', 'Deepfakes Confidence: 100.0%')\n" ] } ], "source": [ "video_dir = 'Video1-fake-1-ff.mp4'\n", "videos = deepfakes_video_predict(video_dir)\n", "print(videos)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Input modality is video.\n", "1/1 [==============================] - 0s 82ms/step\n", "1/1 [==============================] - 0s 78ms/step\n", "('The video is REAL.', 'Deepfakes Confidence: 0.0%')\n" ] } ], "source": [ "video_dir = 'real-1.mp4'\n", "videos = deepfakes_video_predict(video_dir)\n", "print(videos)" ] }, { "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ "### Audio modality pipeline." ] }, { "cell_type": "code", "execution_count": 45, "metadata": {}, "outputs": [], "source": [ "#Load model.\n", "import torch \n", "import torch.nn as nn\n", "import torch.nn.functional as F\n", "from rawnet import SincConv, Residual_block\n", "\n", "\n", "\n", "d_args = {\n", " \"nb_samp\": 64600,\n", " \"first_conv\": 1024,\n", " \"in_channels\": 1,\n", " \"filts\": [20, [20, 20], [20, 128], [128, 128]],\n", " \"blocks\": [2, 4],\n", " \"nb_fc_node\": 1024,\n", " \"gru_node\": 1024,\n", " \"nb_gru_layer\": 3,\n", " \"nb_classes\": 2}\n", "\n", "\n", "class RawNet(nn.Module):\n", " def __init__(self, d_args, device):\n", " super(RawNet, self).__init__()\n", "\n", " \n", " self.device=device\n", "\n", " self.Sinc_conv=SincConv(device=self.device,\n", "\t\t\tout_channels = d_args['filts'][0],\n", "\t\t\tkernel_size = d_args['first_conv'],\n", " in_channels = d_args['in_channels']\n", " )\n", " \n", " self.first_bn = nn.BatchNorm1d(num_features = d_args['filts'][0])\n", " self.selu = nn.SELU(inplace=True)\n", " self.block0 = nn.Sequential(Residual_block(nb_filts = d_args['filts'][1], first = True))\n", " self.block1 = nn.Sequential(Residual_block(nb_filts = d_args['filts'][1]))\n", " self.block2 = nn.Sequential(Residual_block(nb_filts = d_args['filts'][2]))\n", " d_args['filts'][2][0] = d_args['filts'][2][1]\n", " self.block3 = nn.Sequential(Residual_block(nb_filts = d_args['filts'][2]))\n", " self.block4 = nn.Sequential(Residual_block(nb_filts = d_args['filts'][2]))\n", " self.block5 = nn.Sequential(Residual_block(nb_filts = d_args['filts'][2]))\n", " self.avgpool = nn.AdaptiveAvgPool1d(1)\n", "\n", " self.fc_attention0 = self._make_attention_fc(in_features = d_args['filts'][1][-1],\n", " l_out_features = d_args['filts'][1][-1])\n", " self.fc_attention1 = self._make_attention_fc(in_features = d_args['filts'][1][-1],\n", " l_out_features = d_args['filts'][1][-1])\n", " self.fc_attention2 = self._make_attention_fc(in_features = d_args['filts'][2][-1],\n", " l_out_features = d_args['filts'][2][-1])\n", " self.fc_attention3 = self._make_attention_fc(in_features = d_args['filts'][2][-1],\n", " l_out_features = d_args['filts'][2][-1])\n", " self.fc_attention4 = self._make_attention_fc(in_features = d_args['filts'][2][-1],\n", " l_out_features = d_args['filts'][2][-1])\n", " self.fc_attention5 = self._make_attention_fc(in_features = d_args['filts'][2][-1],\n", " l_out_features = d_args['filts'][2][-1])\n", "\n", " self.bn_before_gru = nn.BatchNorm1d(num_features = d_args['filts'][2][-1])\n", " self.gru = nn.GRU(input_size = d_args['filts'][2][-1],\n", "\t\t\thidden_size = d_args['gru_node'],\n", "\t\t\tnum_layers = d_args['nb_gru_layer'],\n", "\t\t\tbatch_first = True)\n", "\n", " \n", " self.fc1_gru = nn.Linear(in_features = d_args['gru_node'],\n", "\t\t\tout_features = d_args['nb_fc_node'])\n", " \n", " self.fc2_gru = nn.Linear(in_features = d_args['nb_fc_node'],\n", "\t\t\tout_features = d_args['nb_classes'],bias=True)\n", "\t\t\t\n", " \n", " self.sig = nn.Sigmoid()\n", " self.logsoftmax = nn.LogSoftmax(dim=1)\n", " \n", " def forward(self, x, y = None):\n", " \n", " \n", " nb_samp = x.shape[0]\n", " len_seq = x.shape[1]\n", " x=x.view(nb_samp,1,len_seq)\n", " \n", " x = self.Sinc_conv(x) \n", " x = F.max_pool1d(torch.abs(x), 3)\n", " x = self.first_bn(x)\n", " x = self.selu(x)\n", " \n", " x0 = self.block0(x)\n", " y0 = self.avgpool(x0).view(x0.size(0), -1) # torch.Size([batch, filter])\n", " y0 = self.fc_attention0(y0)\n", " y0 = self.sig(y0).view(y0.size(0), y0.size(1), -1) # torch.Size([batch, filter, 1])\n", " x = x0 * y0 + y0 # (batch, filter, time) x (batch, filter, 1)\n", " \n", "\n", " x1 = self.block1(x)\n", " y1 = self.avgpool(x1).view(x1.size(0), -1) # torch.Size([batch, filter])\n", " y1 = self.fc_attention1(y1)\n", " y1 = self.sig(y1).view(y1.size(0), y1.size(1), -1) # torch.Size([batch, filter, 1])\n", " x = x1 * y1 + y1 # (batch, filter, time) x (batch, filter, 1)\n", "\n", " x2 = self.block2(x)\n", " y2 = self.avgpool(x2).view(x2.size(0), -1) # torch.Size([batch, filter])\n", " y2 = self.fc_attention2(y2)\n", " y2 = self.sig(y2).view(y2.size(0), y2.size(1), -1) # torch.Size([batch, filter, 1])\n", " x = x2 * y2 + y2 # (batch, filter, time) x (batch, filter, 1)\n", "\n", " x3 = self.block3(x)\n", " y3 = self.avgpool(x3).view(x3.size(0), -1) # torch.Size([batch, filter])\n", " y3 = self.fc_attention3(y3)\n", " y3 = self.sig(y3).view(y3.size(0), y3.size(1), -1) # torch.Size([batch, filter, 1])\n", " x = x3 * y3 + y3 # (batch, filter, time) x (batch, filter, 1)\n", "\n", " x4 = self.block4(x)\n", " y4 = self.avgpool(x4).view(x4.size(0), -1) # torch.Size([batch, filter])\n", " y4 = self.fc_attention4(y4)\n", " y4 = self.sig(y4).view(y4.size(0), y4.size(1), -1) # torch.Size([batch, filter, 1])\n", " x = x4 * y4 + y4 # (batch, filter, time) x (batch, filter, 1)\n", "\n", " x5 = self.block5(x)\n", " y5 = self.avgpool(x5).view(x5.size(0), -1) # torch.Size([batch, filter])\n", " y5 = self.fc_attention5(y5)\n", " y5 = self.sig(y5).view(y5.size(0), y5.size(1), -1) # torch.Size([batch, filter, 1])\n", " x = x5 * y5 + y5 # (batch, filter, time) x (batch, filter, 1)\n", "\n", " x = self.bn_before_gru(x)\n", " x = self.selu(x)\n", " x = x.permute(0, 2, 1) #(batch, filt, time) >> (batch, time, filt)\n", " self.gru.flatten_parameters()\n", " x, _ = self.gru(x)\n", " x = x[:,-1,:]\n", " x = self.fc1_gru(x)\n", " x = self.fc2_gru(x)\n", " output=self.logsoftmax(x)\n", " \n", " return output\n", " \n", " \n", "\n", " def _make_attention_fc(self, in_features, l_out_features):\n", "\n", " l_fc = []\n", " \n", " l_fc.append(nn.Linear(in_features = in_features,\n", "\t\t\t out_features = l_out_features))\n", "\n", " \n", "\n", " return nn.Sequential(*l_fc)\n", "\n", "\n", " def _make_layer(self, nb_blocks, nb_filts, first = False):\n", " layers = []\n", " #def __init__(self, nb_filts, first = False):\n", " for i in range(nb_blocks):\n", " first = first if i == 0 else False\n", " layers.append(Residual_block(nb_filts = nb_filts,\n", "\t\t\t\tfirst = first))\n", " if i == 0: nb_filts[0] = nb_filts[1]\n", " \n", " return nn.Sequential(*layers)\n", "\n", " def summary(self, input_size, batch_size=-1, device=\"cuda\", print_fn = None):\n", " if print_fn == None: printfn = print\n", " model = self\n", " \n", " def register_hook(module):\n", " def hook(module, input, output):\n", " class_name = str(module.__class__).split(\".\")[-1].split(\"'\")[0]\n", " module_idx = len(summary)\n", " \n", " m_key = \"%s-%i\" % (class_name, module_idx + 1)\n", " summary[m_key] = OrderedDict()\n", " summary[m_key][\"input_shape\"] = list(input[0].size())\n", " summary[m_key][\"input_shape\"][0] = batch_size\n", " if isinstance(output, (list, tuple)):\n", " summary[m_key][\"output_shape\"] = [\n", "\t\t\t\t\t\t[-1] + list(o.size())[1:] for o in output\n", "\t\t\t\t\t]\n", " else:\n", " summary[m_key][\"output_shape\"] = list(output.size())\n", " if len(summary[m_key][\"output_shape\"]) != 0:\n", " summary[m_key][\"output_shape\"][0] = batch_size\n", " \n", " params = 0\n", " if hasattr(module, \"weight\") and hasattr(module.weight, \"size\"):\n", " params += torch.prod(torch.LongTensor(list(module.weight.size())))\n", " summary[m_key][\"trainable\"] = module.weight.requires_grad\n", " if hasattr(module, \"bias\") and hasattr(module.bias, \"size\"):\n", " params += torch.prod(torch.LongTensor(list(module.bias.size())))\n", " summary[m_key][\"nb_params\"] = params\n", " \n", " if (\n", "\t\t\t\tnot isinstance(module, nn.Sequential)\n", "\t\t\t\tand not isinstance(module, nn.ModuleList)\n", "\t\t\t\tand not (module == model)\n", "\t\t\t):\n", " hooks.append(module.register_forward_hook(hook))\n", " \n", " device = device.lower()\n", " assert device in [\n", "\t\t\t\"cuda\",\n", "\t\t\t\"cpu\",\n", "\t\t], \"Input device is not valid, please specify 'cuda' or 'cpu'\"\n", " \n", " if device == \"cuda\" and torch.cuda.is_available():\n", " dtype = torch.cuda.FloatTensor\n", " else:\n", " dtype = torch.FloatTensor\n", " if isinstance(input_size, tuple):\n", " input_size = [input_size]\n", " x = [torch.rand(2, *in_size).type(dtype) for in_size in input_size]\n", " summary = OrderedDict()\n", " hooks = []\n", " model.apply(register_hook)\n", " model(*x)\n", " for h in hooks:\n", " h.remove()\n", " \n", " print_fn(\"----------------------------------------------------------------\")\n", " line_new = \"{:>20} {:>25} {:>15}\".format(\"Layer (type)\", \"Output Shape\", \"Param #\")\n", " print_fn(line_new)\n", " print_fn(\"================================================================\")\n", " total_params = 0\n", " total_output = 0\n", " trainable_params = 0\n", " for layer in summary:\n", " # input_shape, output_shape, trainable, nb_params\n", " line_new = \"{:>20} {:>25} {:>15}\".format(\n", "\t\t\t\tlayer,\n", "\t\t\t\tstr(summary[layer][\"output_shape\"]),\n", "\t\t\t\t\"{0:,}\".format(summary[layer][\"nb_params\"]),\n", "\t\t\t)\n", " total_params += summary[layer][\"nb_params\"]\n", " total_output += np.prod(summary[layer][\"output_shape\"])\n", " if \"trainable\" in summary[layer]:\n", " if summary[layer][\"trainable\"] == True:\n", " trainable_params += summary[layer][\"nb_params\"]\n", " print_fn(line_new)\n" ] }, { "cell_type": "code", "execution_count": 46, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "" ] }, "execution_count": 46, "metadata": {}, "output_type": "execute_result" } ], "source": [ "model = RawNet(d_args = d_args, device = 'cpu')\n", "model_dict = model.state_dict()\n", "ckpt = torch.load('pre_trained_DF_RawNet2.pth', map_location = torch.device('cpu'))\n", "model.load_state_dict(ckpt, model_dict)" ] }, { "cell_type": "code", "execution_count": 47, "metadata": {}, "outputs": [], "source": [ "import librosa" ] }, { "cell_type": "code", "execution_count": 48, "metadata": {}, "outputs": [], "source": [ "def load_and_preprocess_audio(audio_path):\n", " '''Loads and returns a torch tensor object'''\n", " x, sr = librosa.load(audio_path)\n", " x_pt = torch.Tensor(x)\n", " x_pt = torch.unsqueeze(x_pt, dim = 0)\n", " return x_pt" ] }, { "cell_type": "code", "execution_count": 49, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "tensor([[2.5792e-05, 3.1405e-05, 4.5405e-05, ..., 0.0000e+00, 0.0000e+00,\n", " 0.0000e+00]])" ] }, "execution_count": 49, "metadata": {}, "output_type": "execute_result" } ], "source": [ "ds = load_and_preprocess_audio(audio_path = 'audios/DF_E_2000027.flac')\n", "ds" ] }, { "cell_type": "code", "execution_count": 50, "metadata": {}, "outputs": [], "source": [ "grads = model(ds)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "tensor([[-6.5565e-06, -1.1934e+01]], grad_fn=)" ] }, "execution_count": 39, "metadata": {}, "output_type": "execute_result" } ], "source": [ "grads" ] }, { "cell_type": "code", "execution_count": 78, "metadata": {}, "outputs": [], "source": [ "batch = grads[:, 1].data.cpu().numpy().ravel()" ] }, { "cell_type": "code", "execution_count": 79, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array([-11.933539], dtype=float32)" ] }, "execution_count": 79, "metadata": {}, "output_type": "execute_result" } ], "source": [ "batch" ] }, { "cell_type": "code", "execution_count": 82, "metadata": {}, "outputs": [], "source": [ "_, batch_pred = grads.max(dim=1)" ] }, { "cell_type": "code", "execution_count": 83, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "tensor([0])" ] }, "execution_count": 83, "metadata": {}, "output_type": "execute_result" } ], "source": [ "batch_pred" ] }, { "cell_type": "code", "execution_count": 58, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "1" ] }, "execution_count": 58, "metadata": {}, "output_type": "execute_result" } ], "source": [ "res = np.argmin(grads.detach().numpy())\n", "res" ] }, { "cell_type": "code", "execution_count": 63, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "-11.933546" ] }, "execution_count": 63, "metadata": {}, "output_type": "execute_result" } ], "source": [ "grads.detach().numpy()[0][0] + grads.detach().numpy()[0][1]" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "base", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.9.13" }, "orig_nbformat": 4 }, "nbformat": 4, "nbformat_minor": 2 }