danieladejumo commited on
Commit
1ba06ec
·
1 Parent(s): 062916a

Files Commit

Browse files
.gitignore ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ images/*
2
+ videos/*
3
+ config/*
Jupyternote Cheatsheet.ipynb ADDED
@@ -0,0 +1 @@
 
 
1
+ {"nbformat":4,"nbformat_minor":0,"metadata":{"colab":{"name":"Jupyternote Cheatsheet.ipynb","provenance":[],"mount_file_id":"1rMSETYdooFC6fVgT0PaOovnBrB4ZWoys","authorship_tag":"ABX9TyN4O59ZYPVT0rGiUB3bfznT"},"kernelspec":{"name":"python3","display_name":"Python 3"},"language_info":{"name":"python"}},"cells":[{"cell_type":"markdown","source":["# Models"],"metadata":{"id":"ODx9TIOB4tCe"}},{"cell_type":"code","execution_count":1,"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"BelRHeLw4qyQ","executionInfo":{"status":"ok","timestamp":1654537166220,"user_tz":-60,"elapsed":22,"user":{"displayName":"Adejumo Daniel","userId":"02925977078148845759"}},"outputId":"60695f20-3957-4958-aabd-c2ecff870977"},"outputs":[{"output_type":"stream","name":"stdout","text":["Writing models.py\n"]}],"source":["%%writefile models.py\n","from __future__ import division\n","\n","import torch\n","import torch.nn as nn\n","import torch.nn.functional as F\n","from torch.autograd import Variable\n","import numpy as np\n","\n","from PIL import Image\n","\n","from utils.parse_config import *\n","from utils.utils import build_targets\n","from collections import defaultdict\n","\n","##import matplotlib.pyplot as plt\n","##import matplotlib.patches as patches\n","\n","\n","def create_modules(module_defs):\n"," \"\"\"\n"," Constructs module list of layer blocks from module configuration in module_defs\n"," \"\"\"\n"," hyperparams = module_defs.pop(0)\n"," output_filters = [int(hyperparams[\"channels\"])]\n"," module_list = nn.ModuleList()\n"," for i, module_def in enumerate(module_defs):\n"," modules = nn.Sequential()\n","\n"," if module_def[\"type\"] == \"convolutional\":\n"," bn = int(module_def[\"batch_normalize\"])\n"," filters = int(module_def[\"filters\"])\n"," kernel_size = int(module_def[\"size\"])\n"," pad = (kernel_size - 1) // 2 if int(module_def[\"pad\"]) else 0\n"," modules.add_module(\n"," \"conv_%d\" % i,\n"," nn.Conv2d(\n"," in_channels=output_filters[-1],\n"," out_channels=filters,\n"," kernel_size=kernel_size,\n"," stride=int(module_def[\"stride\"]),\n"," padding=pad,\n"," bias=not bn,\n"," ),\n"," )\n"," if bn:\n"," modules.add_module(\"batch_norm_%d\" % i, nn.BatchNorm2d(filters))\n"," if module_def[\"activation\"] == \"leaky\":\n"," modules.add_module(\"leaky_%d\" % i, nn.LeakyReLU(0.1))\n","\n"," elif module_def[\"type\"] == \"maxpool\":\n"," kernel_size = int(module_def[\"size\"])\n"," stride = int(module_def[\"stride\"])\n"," if kernel_size == 2 and stride == 1:\n"," padding = nn.ZeroPad2d((0, 1, 0, 1))\n"," modules.add_module(\"_debug_padding_%d\" % i, padding)\n"," maxpool = nn.MaxPool2d(\n"," kernel_size=int(module_def[\"size\"]),\n"," stride=int(module_def[\"stride\"]),\n"," padding=int((kernel_size - 1) // 2),\n"," )\n"," modules.add_module(\"maxpool_%d\" % i, maxpool)\n","\n"," elif module_def[\"type\"] == \"upsample\":\n"," upsample = nn.Upsample(scale_factor=int(module_def[\"stride\"]), mode=\"nearest\")\n"," modules.add_module(\"upsample_%d\" % i, upsample)\n","\n"," elif module_def[\"type\"] == \"route\":\n"," layers = [int(x) for x in module_def[\"layers\"].split(\",\")]\n"," filters = sum([output_filters[layer_i] for layer_i in layers])\n"," modules.add_module(\"route_%d\" % i, EmptyLayer())\n","\n"," elif module_def[\"type\"] == \"shortcut\":\n"," filters = output_filters[int(module_def[\"from\"])]\n"," modules.add_module(\"shortcut_%d\" % i, EmptyLayer())\n","\n"," elif module_def[\"type\"] == \"yolo\":\n"," anchor_idxs = [int(x) for x in module_def[\"mask\"].split(\",\")]\n"," # Extract anchors\n"," anchors = [int(x) for x in module_def[\"anchors\"].split(\",\")]\n"," anchors = [(anchors[i], anchors[i + 1]) for i in range(0, len(anchors), 2)]\n"," anchors = [anchors[i] for i in anchor_idxs]\n"," num_classes = int(module_def[\"classes\"])\n"," img_height = int(hyperparams[\"height\"])\n"," # Define detection layer\n"," yolo_layer = YOLOLayer(anchors, num_classes, img_height)\n"," modules.add_module(\"yolo_%d\" % i, yolo_layer)\n"," # Register module list and number of output filters\n"," module_list.append(modules)\n"," output_filters.append(filters)\n","\n"," return hyperparams, module_list\n","\n","\n","class EmptyLayer(nn.Module):\n"," \"\"\"Placeholder for 'route' and 'shortcut' layers\"\"\"\n","\n"," def __init__(self):\n"," super(EmptyLayer, self).__init__()\n","\n","\n","class YOLOLayer(nn.Module):\n"," \"\"\"Detection layer\"\"\"\n","\n"," def __init__(self, anchors, num_classes, img_dim):\n"," super(YOLOLayer, self).__init__()\n"," self.anchors = anchors\n"," self.num_anchors = len(anchors)\n"," self.num_classes = num_classes\n"," self.bbox_attrs = 5 + num_classes\n"," self.image_dim = img_dim\n"," self.ignore_thres = 0.5\n"," self.lambda_coord = 1\n","\n"," self.mse_loss = nn.MSELoss(size_average=True) # Coordinate loss\n"," self.bce_loss = nn.BCELoss(size_average=True) # Confidence loss\n"," self.ce_loss = nn.CrossEntropyLoss() # Class loss\n","\n"," def forward(self, x, targets=None):\n"," nA = self.num_anchors\n"," nB = x.size(0)\n"," nG = x.size(2)\n"," stride = self.image_dim / nG\n","\n"," # Tensors for cuda support\n"," FloatTensor = torch.cuda.FloatTensor if x.is_cuda else torch.FloatTensor\n"," LongTensor = torch.cuda.LongTensor if x.is_cuda else torch.LongTensor\n"," ByteTensor = torch.cuda.ByteTensor if x.is_cuda else torch.ByteTensor\n","\n"," prediction = x.view(nB, nA, self.bbox_attrs, nG, nG).permute(0, 1, 3, 4, 2).contiguous()\n","\n"," # Get outputs\n"," x = torch.sigmoid(prediction[..., 0]) # Center x\n"," y = torch.sigmoid(prediction[..., 1]) # Center y\n"," w = prediction[..., 2] # Width\n"," h = prediction[..., 3] # Height\n"," pred_conf = torch.sigmoid(prediction[..., 4]) # Conf\n"," pred_cls = torch.sigmoid(prediction[..., 5:]) # Cls pred.\n","\n"," # Calculate offsets for each grid\n"," grid_x = torch.arange(nG).repeat(nG, 1).view([1, 1, nG, nG]).type(FloatTensor)\n"," grid_y = torch.arange(nG).repeat(nG, 1).t().view([1, 1, nG, nG]).type(FloatTensor)\n"," scaled_anchors = FloatTensor([(a_w / stride, a_h / stride) for a_w, a_h in self.anchors])\n"," anchor_w = scaled_anchors[:, 0:1].view((1, nA, 1, 1))\n"," anchor_h = scaled_anchors[:, 1:2].view((1, nA, 1, 1))\n","\n"," # Add offset and scale with anchors\n"," pred_boxes = FloatTensor(prediction[..., :4].shape)\n"," pred_boxes[..., 0] = x.data + grid_x\n"," pred_boxes[..., 1] = y.data + grid_y\n"," pred_boxes[..., 2] = torch.exp(w.data) * anchor_w\n"," pred_boxes[..., 3] = torch.exp(h.data) * anchor_h\n","\n"," # Training\n"," if targets is not None:\n","\n"," if x.is_cuda:\n"," self.mse_loss = self.mse_loss.cuda()\n"," self.bce_loss = self.bce_loss.cuda()\n"," self.ce_loss = self.ce_loss.cuda()\n","\n"," nGT, nCorrect, mask, conf_mask, tx, ty, tw, th, tconf, tcls = build_targets(\n"," pred_boxes=pred_boxes.cpu().data,\n"," pred_conf=pred_conf.cpu().data,\n"," pred_cls=pred_cls.cpu().data,\n"," target=targets.cpu().data,\n"," anchors=scaled_anchors.cpu().data,\n"," num_anchors=nA,\n"," num_classes=self.num_classes,\n"," grid_size=nG,\n"," ignore_thres=self.ignore_thres,\n"," img_dim=self.image_dim,\n"," )\n","\n"," nProposals = int((pred_conf > 0.5).sum().item())\n"," recall = float(nCorrect / nGT) if nGT else 1\n"," precision = float(nCorrect / nProposals)\n","\n"," # Handle masks\n"," mask = Variable(mask.type(ByteTensor))\n"," conf_mask = Variable(conf_mask.type(ByteTensor))\n","\n"," # Handle target variables\n"," tx = Variable(tx.type(FloatTensor), requires_grad=False)\n"," ty = Variable(ty.type(FloatTensor), requires_grad=False)\n"," tw = Variable(tw.type(FloatTensor), requires_grad=False)\n"," th = Variable(th.type(FloatTensor), requires_grad=False)\n"," tconf = Variable(tconf.type(FloatTensor), requires_grad=False)\n"," tcls = Variable(tcls.type(LongTensor), requires_grad=False)\n","\n"," # Get conf mask where gt and where there is no gt\n"," conf_mask_true = mask\n"," conf_mask_false = conf_mask - mask\n","\n"," # Mask outputs to ignore non-existing objects\n"," loss_x = self.mse_loss(x[mask], tx[mask])\n"," loss_y = self.mse_loss(y[mask], ty[mask])\n"," loss_w = self.mse_loss(w[mask], tw[mask])\n"," loss_h = self.mse_loss(h[mask], th[mask])\n"," loss_conf = self.bce_loss(pred_conf[conf_mask_false], tconf[conf_mask_false]) + self.bce_loss(\n"," pred_conf[conf_mask_true], tconf[conf_mask_true]\n"," )\n"," loss_cls = (1 / nB) * self.ce_loss(pred_cls[mask], torch.argmax(tcls[mask], 1))\n"," loss = loss_x + loss_y + loss_w + loss_h + loss_conf + loss_cls\n","\n"," return (\n"," loss,\n"," loss_x.item(),\n"," loss_y.item(),\n"," loss_w.item(),\n"," loss_h.item(),\n"," loss_conf.item(),\n"," loss_cls.item(),\n"," recall,\n"," precision,\n"," )\n","\n"," else:\n"," # If not in training phase return predictions\n"," output = torch.cat(\n"," (\n"," pred_boxes.view(nB, -1, 4) * stride,\n"," pred_conf.view(nB, -1, 1),\n"," pred_cls.view(nB, -1, self.num_classes),\n"," ),\n"," -1,\n"," )\n"," return output\n","\n","\n","class Darknet(nn.Module):\n"," \"\"\"YOLOv3 object detection model\"\"\"\n","\n"," def __init__(self, config_path, img_size=416):\n"," super(Darknet, self).__init__()\n"," self.module_defs = parse_model_config(config_path)\n"," self.hyperparams, self.module_list = create_modules(self.module_defs)\n"," self.img_size = img_size\n"," self.seen = 0\n"," self.header_info = np.array([0, 0, 0, self.seen, 0])\n"," self.loss_names = [\"x\", \"y\", \"w\", \"h\", \"conf\", \"cls\", \"recall\", \"precision\"]\n","\n"," def forward(self, x, targets=None):\n"," is_training = targets is not None\n"," output = []\n"," self.losses = defaultdict(float)\n"," layer_outputs = []\n"," for i, (module_def, module) in enumerate(zip(self.module_defs, self.module_list)):\n"," if module_def[\"type\"] in [\"convolutional\", \"upsample\", \"maxpool\"]:\n"," x = module(x)\n"," elif module_def[\"type\"] == \"route\":\n"," layer_i = [int(x) for x in module_def[\"layers\"].split(\",\")]\n"," x = torch.cat([layer_outputs[i] for i in layer_i], 1)\n"," elif module_def[\"type\"] == \"shortcut\":\n"," layer_i = int(module_def[\"from\"])\n"," x = layer_outputs[-1] + layer_outputs[layer_i]\n"," elif module_def[\"type\"] == \"yolo\":\n"," # Train phase: get loss\n"," if is_training:\n"," x, *losses = module[0](x, targets)\n"," for name, loss in zip(self.loss_names, losses):\n"," self.losses[name] += loss\n"," # Test phase: Get detections\n"," else:\n"," x = module(x)\n"," output.append(x)\n"," layer_outputs.append(x)\n","\n"," self.losses[\"recall\"] /= 3\n"," self.losses[\"precision\"] /= 3\n"," return sum(output) if is_training else torch.cat(output, 1)\n","\n"," def load_weights(self, weights_path):\n"," \"\"\"Parses and loads the weights stored in 'weights_path'\"\"\"\n","\n"," # Open the weights file\n"," fp = open(weights_path, \"rb\")\n"," header = np.fromfile(fp, dtype=np.int32, count=5) # First five are header values\n","\n"," # Needed to write header when saving weights\n"," self.header_info = header\n","\n"," self.seen = header[3]\n"," weights = np.fromfile(fp, dtype=np.float32) # The rest are weights\n"," fp.close()\n","\n"," ptr = 0\n"," for i, (module_def, module) in enumerate(zip(self.module_defs, self.module_list)):\n"," if module_def[\"type\"] == \"convolutional\":\n"," conv_layer = module[0]\n"," if module_def[\"batch_normalize\"]:\n"," # Load BN bias, weights, running mean and running variance\n"," bn_layer = module[1]\n"," num_b = bn_layer.bias.numel() # Number of biases\n"," # Bias\n"," bn_b = torch.from_numpy(weights[ptr : ptr + num_b]).view_as(bn_layer.bias)\n"," bn_layer.bias.data.copy_(bn_b)\n"," ptr += num_b\n"," # Weight\n"," bn_w = torch.from_numpy(weights[ptr : ptr + num_b]).view_as(bn_layer.weight)\n"," bn_layer.weight.data.copy_(bn_w)\n"," ptr += num_b\n"," # Running Mean\n"," bn_rm = torch.from_numpy(weights[ptr : ptr + num_b]).view_as(bn_layer.running_mean)\n"," bn_layer.running_mean.data.copy_(bn_rm)\n"," ptr += num_b\n"," # Running Var\n"," bn_rv = torch.from_numpy(weights[ptr : ptr + num_b]).view_as(bn_layer.running_var)\n"," bn_layer.running_var.data.copy_(bn_rv)\n"," ptr += num_b\n"," else:\n"," # Load conv. bias\n"," num_b = conv_layer.bias.numel()\n"," conv_b = torch.from_numpy(weights[ptr : ptr + num_b]).view_as(conv_layer.bias)\n"," conv_layer.bias.data.copy_(conv_b)\n"," ptr += num_b\n"," # Load conv. weights\n"," num_w = conv_layer.weight.numel()\n"," conv_w = torch.from_numpy(weights[ptr : ptr + num_w]).view_as(conv_layer.weight)\n"," conv_layer.weight.data.copy_(conv_w)\n"," ptr += num_w\n","\n"," \"\"\"\n"," @:param path - path of the new weights file\n"," @:param cutoff - save layers between 0 and cutoff (cutoff = -1 -> all are saved)\n"," \"\"\"\n","\n"," def save_weights(self, path, cutoff=-1):\n","\n"," fp = open(path, \"wb\")\n"," self.header_info[3] = self.seen\n"," self.header_info.tofile(fp)\n","\n"," # Iterate through layers\n"," for i, (module_def, module) in enumerate(zip(self.module_defs[:cutoff], self.module_list[:cutoff])):\n"," if module_def[\"type\"] == \"convolutional\":\n"," conv_layer = module[0]\n"," # If batch norm, load bn first\n"," if module_def[\"batch_normalize\"]:\n"," bn_layer = module[1]\n"," bn_layer.bias.data.cpu().numpy().tofile(fp)\n"," bn_layer.weight.data.cpu().numpy().tofile(fp)\n"," bn_layer.running_mean.data.cpu().numpy().tofile(fp)\n"," bn_layer.running_var.data.cpu().numpy().tofile(fp)\n"," # Load conv bias\n"," else:\n"," conv_layer.bias.data.cpu().numpy().tofile(fp)\n"," # Load conv weights\n"," conv_layer.weight.data.cpu().numpy().tofile(fp)\n","\n"," fp.close()"]},{"cell_type":"code","source":["!ls"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"ar8FuY3z43Fk","executionInfo":{"status":"ok","timestamp":1654537174809,"user_tz":-60,"elapsed":16,"user":{"displayName":"Adejumo Daniel","userId":"02925977078148845759"}},"outputId":"ce227d02-75a3-477d-becf-e1c2702c7001"},"execution_count":2,"outputs":[{"output_type":"stream","name":"stdout","text":["models.py sample_data\n"]}]},{"cell_type":"code","source":["!pwd"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"hRxa6vyoGbla","executionInfo":{"status":"ok","timestamp":1654537258168,"user_tz":-60,"elapsed":26,"user":{"displayName":"Adejumo Daniel","userId":"02925977078148845759"}},"outputId":"ccaaf1dc-6769-4093-8769-c8aa3b809bdf"},"execution_count":3,"outputs":[{"output_type":"stream","name":"stdout","text":["/content\n"]}]},{"cell_type":"code","source":["%%writefile Readme.md\n","Are you for real!!"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"cVKDwgGtGv7g","executionInfo":{"status":"ok","timestamp":1654537404197,"user_tz":-60,"elapsed":21,"user":{"displayName":"Adejumo Daniel","userId":"02925977078148845759"}},"outputId":"41cdc392-059d-42be-b267-2a7f66d0a1f6"},"execution_count":4,"outputs":[{"output_type":"stream","name":"stdout","text":["Overwriting Readme.md\n"]}]},{"cell_type":"code","source":["%cd Computer Vision"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"780vJiykHTmT","executionInfo":{"status":"ok","timestamp":1654537643123,"user_tz":-60,"elapsed":16,"user":{"displayName":"Adejumo Daniel","userId":"02925977078148845759"}},"outputId":"159eb128-2a7a-41b3-b84c-7d517ff92454"},"execution_count":14,"outputs":[{"output_type":"stream","name":"stdout","text":["/content/drive/MyDrive/Python/Machine Learning/Computer Vision\n"]}]},{"cell_type":"code","source":["!pwd"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"WeA417NzHe0W","executionInfo":{"status":"ok","timestamp":1654537646111,"user_tz":-60,"elapsed":408,"user":{"displayName":"Adejumo Daniel","userId":"02925977078148845759"}},"outputId":"036a3c8e-b106-46a8-b5de-b7adf66938ab"},"execution_count":15,"outputs":[{"output_type":"stream","name":"stdout","text":["/content/drive/MyDrive/Python/Machine Learning/Computer Vision\n"]}]},{"cell_type":"code","source":["%%writefile test.and\n","\n","Really I can now write to my drive!"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"hrSVQd-fHzai","executionInfo":{"status":"ok","timestamp":1654537570112,"user_tz":-60,"elapsed":24,"user":{"displayName":"Adejumo Daniel","userId":"02925977078148845759"}},"outputId":"c58a5849-aaba-4fe3-c596-681a5e7df731"},"execution_count":10,"outputs":[{"output_type":"stream","name":"stdout","text":["Writing test.and\n"]}]},{"cell_type":"code","source":["!ls"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"jRtg6b1IH8KV","executionInfo":{"status":"ok","timestamp":1654537654214,"user_tz":-60,"elapsed":24,"user":{"displayName":"Adejumo Daniel","userId":"02925977078148845759"}},"outputId":"dd49447d-6924-4176-f5a9-ca184b671be8"},"execution_count":16,"outputs":[{"output_type":"stream","name":"stdout","text":["cnn-resnet-CIFAR10 darknet-COCO-object_detection feedforward-cnn-MNIST\n"]}]},{"cell_type":"code","source":["%%bash\n","\n","ls -la\n","python --version"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"iUpVW1oZIQnl","executionInfo":{"status":"ok","timestamp":1654537857269,"user_tz":-60,"elapsed":14,"user":{"displayName":"Adejumo Daniel","userId":"02925977078148845759"}},"outputId":"ff54c93a-9f2c-4453-d82f-c6c1683f61b8"},"execution_count":19,"outputs":[{"output_type":"stream","name":"stdout","text":["total 12\n","drwx------ 2 root root 4096 May 17 21:02 cnn-resnet-CIFAR10\n","drwx------ 2 root root 4096 Jun 6 16:38 darknet-COCO-object_detection\n","drwx------ 2 root root 4096 May 17 21:01 feedforward-cnn-MNIST\n","Python 3.7.13\n"]}]},{"cell_type":"code","source":["%cd ../"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"NJ7riTtCI2-V","executionInfo":{"status":"ok","timestamp":1654537984381,"user_tz":-60,"elapsed":14,"user":{"displayName":"Adejumo Daniel","userId":"02925977078148845759"}},"outputId":"713f2de8-ae10-46b9-d5e9-bbfa779de2c8"},"execution_count":21,"outputs":[{"output_type":"stream","name":"stdout","text":["/content\n"]}]},{"cell_type":"code","source":["%%bash\n","\n","cd \"drive/MyDrive/Python/Machine Learning\"\n","ls"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"ZAOqxQzPJc1k","executionInfo":{"status":"ok","timestamp":1654538084191,"user_tz":-60,"elapsed":14,"user":{"displayName":"Adejumo Daniel","userId":"02925977078148845759"}},"outputId":"7b82c13f-3e14-47b5-bc12-25bdf0dee540"},"execution_count":25,"outputs":[{"output_type":"stream","name":"stdout","text":["Articles\n","Computer Vision\n","Datasets\n","Deep-Learning-with-PyTorch-Jovian\n","Deep RL\n","FastAI Course\n","Generative Models\n","HuggingFace-Deep-RL\n","PyTorch\n","ZeroToGANS_Revision\n"]}]},{"cell_type":"code","source":["%run models.py"],"metadata":{"colab":{"base_uri":"https://localhost:8080/","height":235},"id":"HvI6SRX8JsS7","executionInfo":{"status":"ok","timestamp":1654538109961,"user_tz":-60,"elapsed":2355,"user":{"displayName":"Adejumo Daniel","userId":"02925977078148845759"}},"outputId":"08a28f6a-76c2-4eaa-fa36-36d5a8e145ea"},"execution_count":27,"outputs":[{"output_type":"error","ename":"ModuleNotFoundError","evalue":"ignored","traceback":["\u001b[0;31m---------------------------------------------------------------------------\u001b[0m","\u001b[0;31mModuleNotFoundError\u001b[0m Traceback (most recent call last)","\u001b[0;32m/content/models.py\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[1;32m 10\u001b[0m \u001b[0;32mfrom\u001b[0m \u001b[0mPIL\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mImage\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 11\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 12\u001b[0;31m \u001b[0;32mfrom\u001b[0m \u001b[0mutils\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mparse_config\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0;34m*\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 13\u001b[0m \u001b[0;32mfrom\u001b[0m \u001b[0mutils\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mutils\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mbuild_targets\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 14\u001b[0m \u001b[0;32mfrom\u001b[0m \u001b[0mcollections\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mdefaultdict\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n","\u001b[0;31mModuleNotFoundError\u001b[0m: No module named 'utils'"]}]},{"cell_type":"code","source":["%edit"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"JLbktoGWJvft","executionInfo":{"status":"ok","timestamp":1654538391516,"user_tz":-60,"elapsed":21,"user":{"displayName":"Adejumo Daniel","userId":"02925977078148845759"}},"outputId":"cce69d99-b879-4600-a9ba-9afb5a58b76a"},"execution_count":29,"outputs":[{"output_type":"stream","name":"stdout","text":["IPython will make a temporary file named: /tmp/ipython_edit_nffqr1eo/ipython_edit_msvbxat4.py\n"]}]},{"cell_type":"code","source":["%load models.py"],"metadata":{"id":"PI_bYsujKQfx","executionInfo":{"status":"ok","timestamp":1654538646656,"user_tz":-60,"elapsed":443,"user":{"displayName":"Adejumo Daniel","userId":"02925977078148845759"}}},"execution_count":31,"outputs":[]},{"cell_type":"code","source":["%%writefile\n","%run\n","%cd\n","%cat\n","%load [-r, -s]\n","%edit\n","%time, %%time\n","%timeit, %%timeit\n","%%html\n","%env, ...\n","%%file, alias for writefile\n","%%bash\n","%matplotlib [inline, ...]\n","and more\n","%paste, %cpaste\n","%pinfo\n","%who\n","%lsmagic\n","%pwd"],"metadata":{"id":"GdCgR_KCL7MK"},"execution_count":null,"outputs":[]},{"cell_type":"code","source":["%quickref\n","%%js\n","%%python[2, 3]\n","%%latex\n","%%shell\n","%%svg"],"metadata":{"id":"B4QAAv64NHRW","executionInfo":{"status":"ok","timestamp":1654539235689,"user_tz":-60,"elapsed":445,"user":{"displayName":"Adejumo Daniel","userId":"02925977078148845759"}}},"execution_count":38,"outputs":[]},{"cell_type":"code","source":["%system, %%system\n","%sx, %%sx"],"metadata":{"id":"psD0AZ7YNJBZ"},"execution_count":null,"outputs":[]}]}
PyTorch_Object_Detection.ipynb ADDED
The diff for this file is too large to render. See raw diff
 
PyTorch_Object_Tracking.ipynb ADDED
@@ -0,0 +1 @@
 
 
1
+ {"cells":[{"cell_type":"code","source":["from google.colab import drive\n","drive.mount('/content/drive')"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"1VkPIQMBmJMO","executionInfo":{"status":"ok","timestamp":1654700494173,"user_tz":-60,"elapsed":3080,"user":{"displayName":"Adejumo Daniel","userId":"02925977078148845759"}},"outputId":"1e3cd91c-ca69-486b-b182-d2f31583b645"},"execution_count":1,"outputs":[{"output_type":"stream","name":"stdout","text":["Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount(\"/content/drive\", force_remount=True).\n"]}]},{"cell_type":"code","source":["%cd ./drive/MyDrive/Python/Machine Learning/Computer Vision/darknet-COCO-object_detection"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"izkEuuuPmTZf","executionInfo":{"status":"ok","timestamp":1654700494174,"user_tz":-60,"elapsed":11,"user":{"displayName":"Adejumo Daniel","userId":"02925977078148845759"}},"outputId":"daf99ba8-1ed2-4935-e2b9-3481fef9584a"},"execution_count":2,"outputs":[{"output_type":"stream","name":"stdout","text":["/content/drive/MyDrive/Python/Machine Learning/Computer Vision/darknet-COCO-object_detection\n"]}]},{"cell_type":"code","source":["!pip install filterpy --quiet"],"metadata":{"id":"qXFwvyxqmXDr","executionInfo":{"status":"ok","timestamp":1654700498924,"user_tz":-60,"elapsed":4757,"user":{"displayName":"Adejumo Daniel","userId":"02925977078148845759"}}},"execution_count":3,"outputs":[]},{"cell_type":"code","source":["!pip install lap --quiet"],"metadata":{"id":"zqK3-Fn2oRsc","executionInfo":{"status":"ok","timestamp":1654700503070,"user_tz":-60,"elapsed":4165,"user":{"displayName":"Adejumo Daniel","userId":"02925977078148845759"}}},"execution_count":4,"outputs":[]},{"cell_type":"code","execution_count":5,"metadata":{"id":"kHwKuAkPlviV","executionInfo":{"status":"ok","timestamp":1654700504310,"user_tz":-60,"elapsed":1248,"user":{"displayName":"Adejumo Daniel","userId":"02925977078148845759"}}},"outputs":[],"source":["from models import *\n","from utils import *\n","\n","import os, sys, time, datetime, random\n","import torch\n","from torch.utils.data import DataLoader\n","from torchvision import datasets, transforms\n","from torch.autograd import Variable\n","\n","import matplotlib.pyplot as plt\n","import matplotlib.patches as patches\n","from PIL import Image"]},{"cell_type":"code","execution_count":6,"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"N5uZwVlClvie","executionInfo":{"status":"ok","timestamp":1654700508098,"user_tz":-60,"elapsed":3795,"user":{"displayName":"Adejumo Daniel","userId":"02925977078148845759"}},"outputId":"3a3e75b1-3379-4e79-f418-0b8a48ffb62f"},"outputs":[{"output_type":"stream","name":"stderr","text":["/usr/local/lib/python3.7/dist-packages/torch/nn/_reduction.py:42: UserWarning: size_average and reduce args will be deprecated, please use reduction='mean' instead.\n"," warnings.warn(warning.format(ret))\n"]}],"source":["config_path='config/yolov3.cfg'\n","weights_path='config/yolov3.weights'\n","class_path='config/coco.names'\n","img_size=416\n","conf_thres=0.8\n","nms_thres=0.4\n","\n","# Load model and weights\n","model = Darknet(config_path, img_size=img_size)\n","model.load_weights(weights_path)\n","model.cuda()\n","model.eval()\n","classes = utils.load_classes(class_path)\n","Tensor = torch.cuda.FloatTensor"]},{"cell_type":"code","execution_count":7,"metadata":{"id":"n4NNQSOYlvij","executionInfo":{"status":"ok","timestamp":1654700508099,"user_tz":-60,"elapsed":9,"user":{"displayName":"Adejumo Daniel","userId":"02925977078148845759"}}},"outputs":[],"source":["def detect_image(img):\n"," # scale and pad image\n"," ratio = min(img_size/img.size[0], img_size/img.size[1])\n"," imw = round(img.size[0] * ratio)\n"," imh = round(img.size[1] * ratio)\n"," img_transforms = transforms.Compose([ transforms.Resize((imh, imw)),\n"," transforms.Pad((max(int((imh-imw)/2),0), max(int((imw-imh)/2),0), max(int((imh-imw)/2),0), max(int((imw-imh)/2),0)),\n"," (128,128,128)),\n"," transforms.ToTensor(),\n"," ])\n"," # convert image to Tensor\n"," image_tensor = img_transforms(img).float()\n"," image_tensor = image_tensor.unsqueeze_(0)\n"," input_img = Variable(image_tensor.type(Tensor))\n"," # run inference on the model and get detections\n"," with torch.no_grad():\n"," detections = model(input_img)\n"," detections = utils.non_max_suppression(detections, 80, conf_thres, nms_thres)\n"," return detections[0]"]},{"cell_type":"code","execution_count":8,"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"7VUHsE2-lvik","executionInfo":{"status":"ok","timestamp":1654700521379,"user_tz":-60,"elapsed":13287,"user":{"displayName":"Adejumo Daniel","userId":"02925977078148845759"}},"outputId":"6144a350-24e9-4a7c-95c5-96bb66b824e0"},"outputs":[{"output_type":"stream","name":"stdout","text":["Populating the interactive namespace from numpy and matplotlib\n"]},{"output_type":"stream","name":"stderr","text":["/usr/local/lib/python3.7/dist-packages/IPython/core/magics/pylab.py:161: UserWarning: pylab import has clobbered these variables: ['random']\n","`%matplotlib` prevents importing * from pylab and numpy\n"," \"\\n`%matplotlib` prevents importing * from pylab and numpy\"\n"]},{"output_type":"stream","name":"stdout","text":["Video size 1280 720\n"]},{"output_type":"stream","name":"stderr","text":["/content/drive/MyDrive/Python/Machine Learning/Computer Vision/darknet-COCO-object_detection/sort.py:38: NumbaWarning: \n","Compilation is falling back to object mode WITH looplifting enabled because Function \"iou\" failed type inference due to: non-precise type pyobject\n","During: typing of argument at /content/drive/MyDrive/Python/Machine Learning/Computer Vision/darknet-COCO-object_detection/sort.py (43)\n","\n","File \"sort.py\", line 43:\n","def iou(bb_test,bb_gt):\n"," <source elided>\n"," \"\"\"\n"," xx1 = np.maximum(bb_test[0], bb_gt[0])\n"," ^\n","\n"," @jit\n","/usr/local/lib/python3.7/dist-packages/numba/core/object_mode_passes.py:178: NumbaWarning: Function \"iou\" was compiled in object mode without forceobj=True.\n","\n","File \"sort.py\", line 39:\n","@jit\n","def iou(bb_test,bb_gt):\n","^\n","\n"," state.func_ir.loc))\n","/usr/local/lib/python3.7/dist-packages/numba/core/object_mode_passes.py:188: NumbaDeprecationWarning: \n","Fall-back from the nopython compilation path to the object mode compilation path has been detected, this is deprecated behaviour.\n","\n","For more information visit https://numba.pydata.org/numba-doc/latest/reference/deprecation.html#deprecation-of-object-mode-fall-back-behaviour-when-using-jit\n","\n","File \"sort.py\", line 39:\n","@jit\n","def iou(bb_test,bb_gt):\n","^\n","\n"," state.func_ir.loc))\n"]}],"source":["videopath = './videos/HorseRacing.mp4'\n","\n","%pylab inline \n","import cv2\n","from IPython.display import clear_output\n","\n","cmap = plt.get_cmap('tab20b')\n","colors = [cmap(i)[:3] for i in np.linspace(0, 1, 20)]\n","\n","# initialize Sort object and video capture\n","from sort import *\n","vid = cv2.VideoCapture(videopath)\n","mot_tracker = Sort()\n","\n","fourcc = cv2.VideoWriter_fourcc(*'XVID')\n","ret,frame=vid.read()\n","vw = frame.shape[1]\n","vh = frame.shape[0]\n","print (\"Video size\", vw,vh)\n","outvideo = cv2.VideoWriter(videopath.replace(\".mp4\", \"-det.mp4\"),fourcc,20.0,(vw,vh))\n","\n","# while(True):\n","for ii in range(40):\n"," ret, frame = vid.read()\n"," if not ret:\n"," print(\"Done Procesing Video\")\n"," break\n"," frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)\n"," pilimg = Image.fromarray(frame)\n"," detections = detect_image(pilimg)\n","\n"," img = np.array(pilimg)\n"," pad_x = max(img.shape[0] - img.shape[1], 0) * (img_size / max(img.shape))\n"," pad_y = max(img.shape[1] - img.shape[0], 0) * (img_size / max(img.shape))\n"," unpad_h = img_size - pad_y\n"," unpad_w = img_size - pad_x\n"," if detections is not None:\n"," tracked_objects = mot_tracker.update(detections.cpu())\n","\n"," unique_labels = detections[:, -1].cpu().unique()\n"," n_cls_preds = len(unique_labels)\n"," for x1, y1, x2, y2, obj_id, cls_pred in tracked_objects:\n"," box_h = int(((y2 - y1) / unpad_h) * img.shape[0])\n"," box_w = int(((x2 - x1) / unpad_w) * img.shape[1])\n"," y1 = int(((y1 - pad_y // 2) / unpad_h) * img.shape[0])\n"," x1 = int(((x1 - pad_x // 2) / unpad_w) * img.shape[1])\n","\n"," color = colors[int(obj_id) % len(colors)]\n"," color = [i * 255 for i in color]\n"," cls = classes[int(cls_pred)]\n"," cv2.rectangle(frame, (x1, y1), (x1+box_w, y1+box_h), color, 4)\n"," cv2.rectangle(frame, (x1, y1-35), (x1+len(cls)*19+60, y1), color, -1)\n"," cv2.putText(frame, cls + \"-\" + str(int(obj_id)), (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 1, (255,255,255), 3)\n","\n"," outvideo.write(frame)\n","\n","outvideo.release()"]},{"cell_type":"code","source":["from pathlib import Path\n","from IPython import display as ipythondisplay\n","import base64\n","\n","def show_videos(video_path='', prefix=''):\n"," html = []\n"," for mp4 in Path(video_path).glob(f\"{prefix}*.mp4\"):\n"," video_b64 = base64.b64encode(mp4.read_bytes())\n"," html.append('''<video alt=\"{}\" autoplay \n"," loop controls style=\"height: 400px;\">\n"," <source src=\"data:video/mp4;base64,{}\" type=\"video/mp4\" />\n"," </video>'''.format(mp4, video_b64.decode('ascii')))\n"," break\n"," ipythondisplay.display(ipythondisplay.HTML(data=\"<br>\".join(html)))"],"metadata":{"id":"Xx6d_F3VstfA","executionInfo":{"status":"ok","timestamp":1654700521380,"user_tz":-60,"elapsed":19,"user":{"displayName":"Adejumo Daniel","userId":"02925977078148845759"}}},"execution_count":9,"outputs":[]},{"cell_type":"code","source":["video_b64 = base64.b64encode(Path(videopath.replace(\".mp4\", \"-det.mp4\")).read_bytes())\n","html = '''<video alt=\"{}\" autoplay \n"," loop controls style=\"height: 400px;\">\n"," <source src=\"data:video/mp4;base64,{}\" type=\"video/mp4\" />\n"," </video>'''.format(Path(videopath), video_b64.decode('ascii'))\n","ipythondisplay.display(ipythondisplay.HTML(data=html)) "],"metadata":{"colab":{"base_uri":"https://localhost:8080/","height":421,"output_embedded_package_id":"1KE6a6Jf_qBrnIGEjOY8GYXagvaaGt84D"},"id":"K3VrKNb3yUbH","executionInfo":{"status":"ok","timestamp":1654700524974,"user_tz":-60,"elapsed":3611,"user":{"displayName":"Adejumo Daniel","userId":"02925977078148845759"}},"outputId":"92ea1435-9e17-4167-c094-dd1e380b200f"},"execution_count":10,"outputs":[{"output_type":"display_data","data":{"text/plain":"Output hidden; open in https://colab.research.google.com to view."},"metadata":{}}]}],"metadata":{"kernelspec":{"display_name":"Python 3","name":"python3"},"language_info":{"name":"python"},"colab":{"name":"PyTorch_Object_Tracking.ipynb","provenance":[],"collapsed_sections":[]},"accelerator":"GPU"},"nbformat":4,"nbformat_minor":0}
__pycache__/models.cpython-37.pyc ADDED
Binary file (9.65 kB). View file
 
__pycache__/sort.cpython-37.pyc ADDED
Binary file (10.2 kB). View file
 
darknet-coco-object_detection.ipynb ADDED
The diff for this file is too large to render. See raw diff
 
models.py ADDED
@@ -0,0 +1,350 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import division
2
+
3
+ import torch
4
+ import torch.nn as nn
5
+ import torch.nn.functional as F
6
+ from torch.autograd import Variable
7
+ import numpy as np
8
+
9
+ from PIL import Image
10
+
11
+ from utils.parse_config import *
12
+ from utils.utils import build_targets
13
+ from collections import defaultdict
14
+
15
+ ##import matplotlib.pyplot as plt
16
+ ##import matplotlib.patches as patches
17
+
18
+
19
+ def create_modules(module_defs):
20
+ """
21
+ Constructs module list of layer blocks from module configuration in module_defs
22
+ """
23
+ hyperparams = module_defs.pop(0)
24
+ output_filters = [int(hyperparams["channels"])]
25
+ module_list = nn.ModuleList()
26
+ for i, module_def in enumerate(module_defs):
27
+ modules = nn.Sequential()
28
+
29
+ if module_def["type"] == "convolutional":
30
+ bn = int(module_def["batch_normalize"])
31
+ filters = int(module_def["filters"])
32
+ kernel_size = int(module_def["size"])
33
+ pad = (kernel_size - 1) // 2 if int(module_def["pad"]) else 0
34
+ modules.add_module(
35
+ "conv_%d" % i,
36
+ nn.Conv2d(
37
+ in_channels=output_filters[-1],
38
+ out_channels=filters,
39
+ kernel_size=kernel_size,
40
+ stride=int(module_def["stride"]),
41
+ padding=pad,
42
+ bias=not bn,
43
+ ),
44
+ )
45
+ if bn:
46
+ modules.add_module("batch_norm_%d" % i, nn.BatchNorm2d(filters))
47
+ if module_def["activation"] == "leaky":
48
+ modules.add_module("leaky_%d" % i, nn.LeakyReLU(0.1))
49
+
50
+ elif module_def["type"] == "maxpool":
51
+ kernel_size = int(module_def["size"])
52
+ stride = int(module_def["stride"])
53
+ if kernel_size == 2 and stride == 1:
54
+ padding = nn.ZeroPad2d((0, 1, 0, 1))
55
+ modules.add_module("_debug_padding_%d" % i, padding)
56
+ maxpool = nn.MaxPool2d(
57
+ kernel_size=int(module_def["size"]),
58
+ stride=int(module_def["stride"]),
59
+ padding=int((kernel_size - 1) // 2),
60
+ )
61
+ modules.add_module("maxpool_%d" % i, maxpool)
62
+
63
+ elif module_def["type"] == "upsample":
64
+ upsample = nn.Upsample(scale_factor=int(module_def["stride"]), mode="nearest")
65
+ modules.add_module("upsample_%d" % i, upsample)
66
+
67
+ elif module_def["type"] == "route":
68
+ layers = [int(x) for x in module_def["layers"].split(",")]
69
+ filters = sum([output_filters[layer_i] for layer_i in layers])
70
+ modules.add_module("route_%d" % i, EmptyLayer())
71
+
72
+ elif module_def["type"] == "shortcut":
73
+ filters = output_filters[int(module_def["from"])]
74
+ modules.add_module("shortcut_%d" % i, EmptyLayer())
75
+
76
+ elif module_def["type"] == "yolo":
77
+ anchor_idxs = [int(x) for x in module_def["mask"].split(",")]
78
+ # Extract anchors
79
+ anchors = [int(x) for x in module_def["anchors"].split(",")]
80
+ anchors = [(anchors[i], anchors[i + 1]) for i in range(0, len(anchors), 2)]
81
+ anchors = [anchors[i] for i in anchor_idxs]
82
+ num_classes = int(module_def["classes"])
83
+ img_height = int(hyperparams["height"])
84
+ # Define detection layer
85
+ yolo_layer = YOLOLayer(anchors, num_classes, img_height)
86
+ modules.add_module("yolo_%d" % i, yolo_layer)
87
+ # Register module list and number of output filters
88
+ module_list.append(modules)
89
+ output_filters.append(filters)
90
+
91
+ return hyperparams, module_list
92
+
93
+
94
+ class EmptyLayer(nn.Module):
95
+ """Placeholder for 'route' and 'shortcut' layers"""
96
+
97
+ def __init__(self):
98
+ super(EmptyLayer, self).__init__()
99
+
100
+
101
+ class YOLOLayer(nn.Module):
102
+ """Detection layer"""
103
+
104
+ def __init__(self, anchors, num_classes, img_dim):
105
+ super(YOLOLayer, self).__init__()
106
+ self.anchors = anchors
107
+ self.num_anchors = len(anchors)
108
+ self.num_classes = num_classes
109
+ self.bbox_attrs = 5 + num_classes
110
+ self.image_dim = img_dim
111
+ self.ignore_thres = 0.5
112
+ self.lambda_coord = 1
113
+
114
+ self.mse_loss = nn.MSELoss(size_average=True) # Coordinate loss
115
+ self.bce_loss = nn.BCELoss(size_average=True) # Confidence loss
116
+ self.ce_loss = nn.CrossEntropyLoss() # Class loss
117
+
118
+ def forward(self, x, targets=None):
119
+ nA = self.num_anchors
120
+ nB = x.size(0)
121
+ nG = x.size(2)
122
+ stride = self.image_dim / nG
123
+
124
+ # Tensors for cuda support
125
+ FloatTensor = torch.cuda.FloatTensor if x.is_cuda else torch.FloatTensor
126
+ LongTensor = torch.cuda.LongTensor if x.is_cuda else torch.LongTensor
127
+ ByteTensor = torch.cuda.ByteTensor if x.is_cuda else torch.ByteTensor
128
+
129
+ prediction = x.view(nB, nA, self.bbox_attrs, nG, nG).permute(0, 1, 3, 4, 2).contiguous()
130
+
131
+ # Get outputs
132
+ x = torch.sigmoid(prediction[..., 0]) # Center x
133
+ y = torch.sigmoid(prediction[..., 1]) # Center y
134
+ w = prediction[..., 2] # Width
135
+ h = prediction[..., 3] # Height
136
+ pred_conf = torch.sigmoid(prediction[..., 4]) # Conf
137
+ pred_cls = torch.sigmoid(prediction[..., 5:]) # Cls pred.
138
+
139
+ # Calculate offsets for each grid
140
+ grid_x = torch.arange(nG).repeat(nG, 1).view([1, 1, nG, nG]).type(FloatTensor)
141
+ grid_y = torch.arange(nG).repeat(nG, 1).t().view([1, 1, nG, nG]).type(FloatTensor)
142
+ scaled_anchors = FloatTensor([(a_w / stride, a_h / stride) for a_w, a_h in self.anchors])
143
+ anchor_w = scaled_anchors[:, 0:1].view((1, nA, 1, 1))
144
+ anchor_h = scaled_anchors[:, 1:2].view((1, nA, 1, 1))
145
+
146
+ # Add offset and scale with anchors
147
+ pred_boxes = FloatTensor(prediction[..., :4].shape)
148
+ pred_boxes[..., 0] = x.data + grid_x
149
+ pred_boxes[..., 1] = y.data + grid_y
150
+ pred_boxes[..., 2] = torch.exp(w.data) * anchor_w
151
+ pred_boxes[..., 3] = torch.exp(h.data) * anchor_h
152
+
153
+ # Training
154
+ if targets is not None:
155
+
156
+ if x.is_cuda:
157
+ self.mse_loss = self.mse_loss.cuda()
158
+ self.bce_loss = self.bce_loss.cuda()
159
+ self.ce_loss = self.ce_loss.cuda()
160
+
161
+ nGT, nCorrect, mask, conf_mask, tx, ty, tw, th, tconf, tcls = build_targets(
162
+ pred_boxes=pred_boxes.cpu().data,
163
+ pred_conf=pred_conf.cpu().data,
164
+ pred_cls=pred_cls.cpu().data,
165
+ target=targets.cpu().data,
166
+ anchors=scaled_anchors.cpu().data,
167
+ num_anchors=nA,
168
+ num_classes=self.num_classes,
169
+ grid_size=nG,
170
+ ignore_thres=self.ignore_thres,
171
+ img_dim=self.image_dim,
172
+ )
173
+
174
+ nProposals = int((pred_conf > 0.5).sum().item())
175
+ recall = float(nCorrect / nGT) if nGT else 1
176
+ precision = float(nCorrect / nProposals)
177
+
178
+ # Handle masks
179
+ mask = Variable(mask.type(ByteTensor))
180
+ conf_mask = Variable(conf_mask.type(ByteTensor))
181
+
182
+ # Handle target variables
183
+ tx = Variable(tx.type(FloatTensor), requires_grad=False)
184
+ ty = Variable(ty.type(FloatTensor), requires_grad=False)
185
+ tw = Variable(tw.type(FloatTensor), requires_grad=False)
186
+ th = Variable(th.type(FloatTensor), requires_grad=False)
187
+ tconf = Variable(tconf.type(FloatTensor), requires_grad=False)
188
+ tcls = Variable(tcls.type(LongTensor), requires_grad=False)
189
+
190
+ # Get conf mask where gt and where there is no gt
191
+ conf_mask_true = mask
192
+ conf_mask_false = conf_mask - mask
193
+
194
+ # Mask outputs to ignore non-existing objects
195
+ loss_x = self.mse_loss(x[mask], tx[mask])
196
+ loss_y = self.mse_loss(y[mask], ty[mask])
197
+ loss_w = self.mse_loss(w[mask], tw[mask])
198
+ loss_h = self.mse_loss(h[mask], th[mask])
199
+ loss_conf = self.bce_loss(pred_conf[conf_mask_false], tconf[conf_mask_false]) + self.bce_loss(
200
+ pred_conf[conf_mask_true], tconf[conf_mask_true]
201
+ )
202
+ loss_cls = (1 / nB) * self.ce_loss(pred_cls[mask], torch.argmax(tcls[mask], 1))
203
+ loss = loss_x + loss_y + loss_w + loss_h + loss_conf + loss_cls
204
+
205
+ return (
206
+ loss,
207
+ loss_x.item(),
208
+ loss_y.item(),
209
+ loss_w.item(),
210
+ loss_h.item(),
211
+ loss_conf.item(),
212
+ loss_cls.item(),
213
+ recall,
214
+ precision,
215
+ )
216
+
217
+ else:
218
+ # If not in training phase return predictions
219
+ output = torch.cat(
220
+ (
221
+ pred_boxes.view(nB, -1, 4) * stride,
222
+ pred_conf.view(nB, -1, 1),
223
+ pred_cls.view(nB, -1, self.num_classes),
224
+ ),
225
+ -1,
226
+ )
227
+ return output
228
+
229
+
230
+ class Darknet(nn.Module):
231
+ """YOLOv3 object detection model"""
232
+
233
+ def __init__(self, config_path, img_size=416):
234
+ super(Darknet, self).__init__()
235
+ self.module_defs = parse_model_config(config_path)
236
+ self.hyperparams, self.module_list = create_modules(self.module_defs)
237
+ self.img_size = img_size
238
+ self.seen = 0
239
+ self.header_info = np.array([0, 0, 0, self.seen, 0])
240
+ self.loss_names = ["x", "y", "w", "h", "conf", "cls", "recall", "precision"]
241
+
242
+ def forward(self, x, targets=None):
243
+ is_training = targets is not None
244
+ output = []
245
+ self.losses = defaultdict(float)
246
+ layer_outputs = []
247
+ for i, (module_def, module) in enumerate(zip(self.module_defs, self.module_list)):
248
+ if module_def["type"] in ["convolutional", "upsample", "maxpool"]:
249
+ x = module(x)
250
+ elif module_def["type"] == "route":
251
+ layer_i = [int(x) for x in module_def["layers"].split(",")]
252
+ x = torch.cat([layer_outputs[i] for i in layer_i], 1)
253
+ elif module_def["type"] == "shortcut":
254
+ layer_i = int(module_def["from"])
255
+ x = layer_outputs[-1] + layer_outputs[layer_i]
256
+ elif module_def["type"] == "yolo":
257
+ # Train phase: get loss
258
+ if is_training:
259
+ x, *losses = module[0](x, targets)
260
+ for name, loss in zip(self.loss_names, losses):
261
+ self.losses[name] += loss
262
+ # Test phase: Get detections
263
+ else:
264
+ x = module(x)
265
+ output.append(x)
266
+ layer_outputs.append(x)
267
+
268
+ self.losses["recall"] /= 3
269
+ self.losses["precision"] /= 3
270
+ return sum(output) if is_training else torch.cat(output, 1)
271
+
272
+ def load_weights(self, weights_path):
273
+ """Parses and loads the weights stored in 'weights_path'"""
274
+
275
+ # Open the weights file
276
+ fp = open(weights_path, "rb")
277
+ header = np.fromfile(fp, dtype=np.int32, count=5) # First five are header values
278
+
279
+ # Needed to write header when saving weights
280
+ self.header_info = header
281
+
282
+ self.seen = header[3]
283
+ weights = np.fromfile(fp, dtype=np.float32) # The rest are weights
284
+ fp.close()
285
+
286
+ ptr = 0
287
+ for i, (module_def, module) in enumerate(zip(self.module_defs, self.module_list)):
288
+ if module_def["type"] == "convolutional":
289
+ conv_layer = module[0]
290
+ if module_def["batch_normalize"]:
291
+ # Load BN bias, weights, running mean and running variance
292
+ bn_layer = module[1]
293
+ num_b = bn_layer.bias.numel() # Number of biases
294
+ # Bias
295
+ bn_b = torch.from_numpy(weights[ptr : ptr + num_b]).view_as(bn_layer.bias)
296
+ bn_layer.bias.data.copy_(bn_b)
297
+ ptr += num_b
298
+ # Weight
299
+ bn_w = torch.from_numpy(weights[ptr : ptr + num_b]).view_as(bn_layer.weight)
300
+ bn_layer.weight.data.copy_(bn_w)
301
+ ptr += num_b
302
+ # Running Mean
303
+ bn_rm = torch.from_numpy(weights[ptr : ptr + num_b]).view_as(bn_layer.running_mean)
304
+ bn_layer.running_mean.data.copy_(bn_rm)
305
+ ptr += num_b
306
+ # Running Var
307
+ bn_rv = torch.from_numpy(weights[ptr : ptr + num_b]).view_as(bn_layer.running_var)
308
+ bn_layer.running_var.data.copy_(bn_rv)
309
+ ptr += num_b
310
+ else:
311
+ # Load conv. bias
312
+ num_b = conv_layer.bias.numel()
313
+ conv_b = torch.from_numpy(weights[ptr : ptr + num_b]).view_as(conv_layer.bias)
314
+ conv_layer.bias.data.copy_(conv_b)
315
+ ptr += num_b
316
+ # Load conv. weights
317
+ num_w = conv_layer.weight.numel()
318
+ conv_w = torch.from_numpy(weights[ptr : ptr + num_w]).view_as(conv_layer.weight)
319
+ conv_layer.weight.data.copy_(conv_w)
320
+ ptr += num_w
321
+
322
+ """
323
+ @:param path - path of the new weights file
324
+ @:param cutoff - save layers between 0 and cutoff (cutoff = -1 -> all are saved)
325
+ """
326
+
327
+ def save_weights(self, path, cutoff=-1):
328
+
329
+ fp = open(path, "wb")
330
+ self.header_info[3] = self.seen
331
+ self.header_info.tofile(fp)
332
+
333
+ # Iterate through layers
334
+ for i, (module_def, module) in enumerate(zip(self.module_defs[:cutoff], self.module_list[:cutoff])):
335
+ if module_def["type"] == "convolutional":
336
+ conv_layer = module[0]
337
+ # If batch norm, load bn first
338
+ if module_def["batch_normalize"]:
339
+ bn_layer = module[1]
340
+ bn_layer.bias.data.cpu().numpy().tofile(fp)
341
+ bn_layer.weight.data.cpu().numpy().tofile(fp)
342
+ bn_layer.running_mean.data.cpu().numpy().tofile(fp)
343
+ bn_layer.running_var.data.cpu().numpy().tofile(fp)
344
+ # Load conv bias
345
+ else:
346
+ conv_layer.bias.data.cpu().numpy().tofile(fp)
347
+ # Load conv weights
348
+ conv_layer.weight.data.cpu().numpy().tofile(fp)
349
+
350
+ fp.close()
object_tracker.py ADDED
@@ -0,0 +1,110 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from models import *
2
+ from utils import *
3
+
4
+ import os, sys, time, datetime, random
5
+ import torch
6
+ from torch.utils.data import DataLoader
7
+ from torchvision import datasets, transforms
8
+ from torch.autograd import Variable
9
+
10
+ from PIL import Image
11
+
12
+ # load weights and set defaults
13
+ config_path='config/yolov3.cfg'
14
+ weights_path='config/yolov3.weights'
15
+ class_path='config/coco.names'
16
+ img_size=416
17
+ conf_thres=0.8
18
+ nms_thres=0.4
19
+
20
+ # load model and put into eval mode
21
+ model = Darknet(config_path, img_size=img_size)
22
+ model.load_weights(weights_path)
23
+ model.cuda()
24
+ model.eval()
25
+
26
+ classes = utils.load_classes(class_path)
27
+ Tensor = torch.cuda.FloatTensor
28
+
29
+ def detect_image(img):
30
+ # scale and pad image
31
+ ratio = min(img_size/img.size[0], img_size/img.size[1])
32
+ imw = round(img.size[0] * ratio)
33
+ imh = round(img.size[1] * ratio)
34
+ img_transforms = transforms.Compose([ transforms.Resize((imh, imw)),
35
+ transforms.Pad((max(int((imh-imw)/2),0), max(int((imw-imh)/2),0), max(int((imh-imw)/2),0), max(int((imw-imh)/2),0)),
36
+ (128,128,128)),
37
+ transforms.ToTensor(),
38
+ ])
39
+ # convert image to Tensor
40
+ image_tensor = img_transforms(img).float()
41
+ image_tensor = image_tensor.unsqueeze_(0)
42
+ input_img = Variable(image_tensor.type(Tensor))
43
+ # run inference on the model and get detections
44
+ with torch.no_grad():
45
+ detections = model(input_img)
46
+ detections = utils.non_max_suppression(detections, 80, conf_thres, nms_thres)
47
+ return detections[0]
48
+
49
+ videopath = './videos/HorseRacing.mp4'
50
+
51
+ import cv2
52
+ from sort import *
53
+ colors=[(255,0,0),(0,255,0),(0,0,255),(255,0,255),(128,0,0),(0,128,0),(0,0,128),(128,0,128),(128,128,0),(0,128,128)]
54
+
55
+ vid = cv2.VideoCapture(videopath)
56
+ mot_tracker = Sort()
57
+
58
+ cv2.namedWindow('Stream',cv2.WINDOW_NORMAL)
59
+ cv2.resizeWindow('Stream', (800,600))
60
+
61
+ fourcc = cv2.VideoWriter_fourcc(*'XVID')
62
+ ret,frame=vid.read()
63
+ vw = frame.shape[1]
64
+ vh = frame.shape[0]
65
+ print ("Video size", vw,vh)
66
+ outvideo = cv2.VideoWriter(videopath.replace(".mp4", "-det.mp4"),fourcc,20.0,(vw,vh))
67
+
68
+ frames = 0
69
+ starttime = time.time()
70
+ while(True):
71
+ ret, frame = vid.read()
72
+ if not ret:
73
+ break
74
+ frames += 1
75
+ frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
76
+ pilimg = Image.fromarray(frame)
77
+ detections = detect_image(pilimg)
78
+
79
+ frame = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR)
80
+ img = np.array(pilimg)
81
+ pad_x = max(img.shape[0] - img.shape[1], 0) * (img_size / max(img.shape))
82
+ pad_y = max(img.shape[1] - img.shape[0], 0) * (img_size / max(img.shape))
83
+ unpad_h = img_size - pad_y
84
+ unpad_w = img_size - pad_x
85
+ if detections is not None:
86
+ tracked_objects = mot_tracker.update(detections.cpu())
87
+
88
+ unique_labels = detections[:, -1].cpu().unique()
89
+ n_cls_preds = len(unique_labels)
90
+ for x1, y1, x2, y2, obj_id, cls_pred in tracked_objects:
91
+ box_h = int(((y2 - y1) / unpad_h) * img.shape[0])
92
+ box_w = int(((x2 - x1) / unpad_w) * img.shape[1])
93
+ y1 = int(((y1 - pad_y // 2) / unpad_h) * img.shape[0])
94
+ x1 = int(((x1 - pad_x // 2) / unpad_w) * img.shape[1])
95
+ color = colors[int(obj_id) % len(colors)]
96
+ cls = classes[int(cls_pred)]
97
+ cv2.rectangle(frame, (x1, y1), (x1+box_w, y1+box_h), color, 4)
98
+ cv2.rectangle(frame, (x1, y1-35), (x1+len(cls)*19+80, y1), color, -1)
99
+ cv2.putText(frame, cls + "-" + str(int(obj_id)), (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 1, (255,255,255), 3)
100
+
101
+ cv2.imshow('Stream', frame)
102
+ outvideo.write(frame)
103
+ ch = 0xFF & cv2.waitKey(1)
104
+ if ch == 27:
105
+ break
106
+
107
+ totaltime = time.time()-starttime
108
+ print(frames, "frames", totaltime/frames, "s/frame")
109
+ cv2.destroyAllWindows()
110
+ outvideo.release()
sort.py ADDED
@@ -0,0 +1,305 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ SORT: A Simple, Online and Realtime Tracker
3
+ Copyright (C) 2016 Alex Bewley [email protected]
4
+
5
+ This program is free software: you can redistribute it and/or modify
6
+ it under the terms of the GNU General Public License as published by
7
+ the Free Software Foundation, either version 3 of the License, or
8
+ (at your option) any later version.
9
+
10
+ This program is distributed in the hope that it will be useful,
11
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
12
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13
+ GNU General Public License for more details.
14
+
15
+ You should have received a copy of the GNU General Public License
16
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
17
+ """
18
+ from __future__ import print_function
19
+
20
+ from numba import jit
21
+ import os.path
22
+ import numpy as np
23
+ ##import matplotlib.pyplot as plt
24
+ ##import matplotlib.patches as patches
25
+ from skimage import io
26
+ # from sklearn.utils.linear_assignment_ import linear_assignment
27
+ import glob
28
+ import time
29
+ import argparse
30
+ from filterpy.kalman import KalmanFilter
31
+
32
+ from scipy.optimize import linear_sum_assignment
33
+ def linear_assignment(x):
34
+ indices = linear_sum_assignment(x)
35
+ indices = np.asarray(indices)
36
+ return np.transpose(indices)
37
+
38
+ @jit
39
+ def iou(bb_test,bb_gt):
40
+ """
41
+ Computes IUO between two bboxes in the form [x1,y1,x2,y2]
42
+ """
43
+ xx1 = np.maximum(bb_test[0], bb_gt[0])
44
+ yy1 = np.maximum(bb_test[1], bb_gt[1])
45
+ xx2 = np.minimum(bb_test[2], bb_gt[2])
46
+ yy2 = np.minimum(bb_test[3], bb_gt[3])
47
+ w = np.maximum(0., xx2 - xx1)
48
+ h = np.maximum(0., yy2 - yy1)
49
+ wh = w * h
50
+ o = wh / ((bb_test[2]-bb_test[0])*(bb_test[3]-bb_test[1])
51
+ + (bb_gt[2]-bb_gt[0])*(bb_gt[3]-bb_gt[1]) - wh)
52
+ return(o)
53
+
54
+ def convert_bbox_to_z(bbox):
55
+ """
56
+ Takes a bounding box in the form [x1,y1,x2,y2] and returns z in the form
57
+ [x,y,s,r] where x,y is the centre of the box and s is the scale/area and r is
58
+ the aspect ratio
59
+ """
60
+ w = bbox[2]-bbox[0]
61
+ h = bbox[3]-bbox[1]
62
+ x = bbox[0]+w/2.
63
+ y = bbox[1]+h/2.
64
+ s = w*h #scale is just area
65
+ r = w/float(h)
66
+ return np.array([x,y,s,r]).reshape((4,1))
67
+
68
+ def convert_x_to_bbox(x,score=None):
69
+ """
70
+ Takes a bounding box in the centre form [x,y,s,r] and returns it in the form
71
+ [x1,y1,x2,y2] where x1,y1 is the top left and x2,y2 is the bottom right
72
+ """
73
+ w = np.sqrt(x[2]*x[3])
74
+ h = x[2]/w
75
+ if(score==None):
76
+ return np.array([x[0]-w/2.,x[1]-h/2.,x[0]+w/2.,x[1]+h/2.]).reshape((1,4))
77
+ else:
78
+ return np.array([x[0]-w/2.,x[1]-h/2.,x[0]+w/2.,x[1]+h/2.,score]).reshape((1,5))
79
+
80
+
81
+ class KalmanBoxTracker(object):
82
+ """
83
+ This class represents the internel state of individual tracked objects observed as bbox.
84
+ """
85
+ count = 0
86
+ def __init__(self,bbox):
87
+ """
88
+ Initialises a tracker using initial bounding box.
89
+ """
90
+ #define constant velocity model
91
+ self.kf = KalmanFilter(dim_x=7, dim_z=4)
92
+ self.kf.F = np.array([[1,0,0,0,1,0,0],[0,1,0,0,0,1,0],[0,0,1,0,0,0,1],[0,0,0,1,0,0,0], [0,0,0,0,1,0,0],[0,0,0,0,0,1,0],[0,0,0,0,0,0,1]])
93
+ self.kf.H = np.array([[1,0,0,0,0,0,0],[0,1,0,0,0,0,0],[0,0,1,0,0,0,0],[0,0,0,1,0,0,0]])
94
+
95
+ self.kf.R[2:,2:] *= 10.
96
+ self.kf.P[4:,4:] *= 1000. #give high uncertainty to the unobservable initial velocities
97
+ self.kf.P *= 10.
98
+ self.kf.Q[-1,-1] *= 0.01
99
+ self.kf.Q[4:,4:] *= 0.01
100
+
101
+ self.kf.x[:4] = convert_bbox_to_z(bbox)
102
+ self.time_since_update = 0
103
+ self.id = KalmanBoxTracker.count
104
+ KalmanBoxTracker.count += 1
105
+ self.history = []
106
+ self.hits = 0
107
+ self.hit_streak = 0
108
+ self.age = 0
109
+ self.objclass = bbox[6]
110
+
111
+ def update(self,bbox):
112
+ """
113
+ Updates the state vector with observed bbox.
114
+ """
115
+ self.time_since_update = 0
116
+ self.history = []
117
+ self.hits += 1
118
+ self.hit_streak += 1
119
+ self.kf.update(convert_bbox_to_z(bbox))
120
+
121
+ def predict(self):
122
+ """
123
+ Advances the state vector and returns the predicted bounding box estimate.
124
+ """
125
+ if((self.kf.x[6]+self.kf.x[2])<=0):
126
+ self.kf.x[6] *= 0.0
127
+ self.kf.predict()
128
+ self.age += 1
129
+ if(self.time_since_update>0):
130
+ self.hit_streak = 0
131
+ self.time_since_update += 1
132
+ self.history.append(convert_x_to_bbox(self.kf.x))
133
+ return self.history[-1]
134
+
135
+ def get_state(self):
136
+ """
137
+ Returns the current bounding box estimate.
138
+ """
139
+ return convert_x_to_bbox(self.kf.x)
140
+
141
+ def associate_detections_to_trackers(detections,trackers,iou_threshold = 0.3):
142
+ """
143
+ Assigns detections to tracked object (both represented as bounding boxes)
144
+
145
+ Returns 3 lists of matches, unmatched_detections and unmatched_trackers
146
+ """
147
+ if(len(trackers)==0):
148
+ return np.empty((0,2),dtype=int), np.arange(len(detections)), np.empty((0,5),dtype=int)
149
+ iou_matrix = np.zeros((len(detections),len(trackers)),dtype=np.float32)
150
+
151
+ for d,det in enumerate(detections):
152
+ for t,trk in enumerate(trackers):
153
+ iou_matrix[d,t] = iou(det,trk)
154
+ matched_indices = linear_assignment(-iou_matrix)
155
+
156
+ unmatched_detections = []
157
+ for d,det in enumerate(detections):
158
+ if(d not in matched_indices[:,0]):
159
+ unmatched_detections.append(d)
160
+ unmatched_trackers = []
161
+ for t,trk in enumerate(trackers):
162
+ if(t not in matched_indices[:,1]):
163
+ unmatched_trackers.append(t)
164
+
165
+ #filter out matched with low IOU
166
+ matches = []
167
+ for m in matched_indices:
168
+ if(iou_matrix[m[0],m[1]]<iou_threshold):
169
+ unmatched_detections.append(m[0])
170
+ unmatched_trackers.append(m[1])
171
+ else:
172
+ matches.append(m.reshape(1,2))
173
+ if(len(matches)==0):
174
+ matches = np.empty((0,2),dtype=int)
175
+ else:
176
+ matches = np.concatenate(matches,axis=0)
177
+
178
+ return matches, np.array(unmatched_detections), np.array(unmatched_trackers)
179
+
180
+
181
+
182
+ class Sort(object):
183
+ def __init__(self,max_age=1,min_hits=3):
184
+ """
185
+ Sets key parameters for SORT
186
+ """
187
+ self.max_age = max_age
188
+ self.min_hits = min_hits
189
+ self.trackers = []
190
+ self.frame_count = 0
191
+
192
+ def update(self,dets):
193
+ """
194
+ Params:
195
+ dets - a numpy array of detections in the format [[x1,y1,x2,y2,score],[x1,y1,x2,y2,score],...]
196
+ Requires: this method must be called once for each frame even with empty detections.
197
+ Returns the a similar array, where the last column is the object ID.
198
+
199
+ NOTE: The number of objects returned may differ from the number of detections provided.
200
+ """
201
+ self.frame_count += 1
202
+ #get predicted locations from existing trackers.
203
+ trks = np.zeros((len(self.trackers),5))
204
+ to_del = []
205
+ ret = []
206
+ for t,trk in enumerate(trks):
207
+ pos = self.trackers[t].predict()[0]
208
+ trk[:] = [pos[0], pos[1], pos[2], pos[3], 0]
209
+ if(np.any(np.isnan(pos))):
210
+ to_del.append(t)
211
+ trks = np.ma.compress_rows(np.ma.masked_invalid(trks))
212
+ for t in reversed(to_del):
213
+ self.trackers.pop(t)
214
+ matched, unmatched_dets, unmatched_trks = associate_detections_to_trackers(dets,trks)
215
+
216
+ #update matched trackers with assigned detections
217
+ for t,trk in enumerate(self.trackers):
218
+ if(t not in unmatched_trks):
219
+ d = matched[np.where(matched[:,1]==t)[0],0]
220
+ trk.update(dets[d,:][0])
221
+
222
+ #create and initialise new trackers for unmatched detections
223
+ for i in unmatched_dets:
224
+ trk = KalmanBoxTracker(dets[i,:])
225
+ self.trackers.append(trk)
226
+ i = len(self.trackers)
227
+ for trk in reversed(self.trackers):
228
+ d = trk.get_state()[0]
229
+ if((trk.time_since_update < 1) and (trk.hit_streak >= self.min_hits or self.frame_count <= self.min_hits)):
230
+ ret.append(np.concatenate((d,[trk.id+1], [trk.objclass])).reshape(1,-1)) # +1 as MOT benchmark requires positive
231
+ i -= 1
232
+ #remove dead tracklet
233
+ if(trk.time_since_update > self.max_age):
234
+ self.trackers.pop(i)
235
+ if(len(ret)>0):
236
+ return np.concatenate(ret)
237
+ return np.empty((0,5))
238
+
239
+ def parse_args():
240
+ """Parse input arguments."""
241
+ parser = argparse.ArgumentParser(description='SORT demo')
242
+ parser.add_argument('--display', dest='display', help='Display online tracker output (slow) [False]',action='store_true')
243
+ args = parser.parse_args()
244
+ return args
245
+
246
+ if __name__ == '__main__':
247
+ # all train
248
+ sequences = ['PETS09-S2L1','TUD-Campus','TUD-Stadtmitte','ETH-Bahnhof','ETH-Sunnyday','ETH-Pedcross2','KITTI-13','KITTI-17','ADL-Rundle-6','ADL-Rundle-8','Venice-2']
249
+ args = parse_args()
250
+ display = args.display
251
+ phase = 'train'
252
+ total_time = 0.0
253
+ total_frames = 0
254
+ colours = np.random.rand(32,3) #used only for display
255
+ if(display):
256
+ if not os.path.exists('mot_benchmark'):
257
+ print('\n\tERROR: mot_benchmark link not found!\n\n Create a symbolic link to the MOT benchmark\n (https://motchallenge.net/data/2D_MOT_2015/#download). E.g.:\n\n $ ln -s /path/to/MOT2015_challenge/2DMOT2015 mot_benchmark\n\n')
258
+ exit()
259
+ plt.ion()
260
+ fig = plt.figure()
261
+
262
+ if not os.path.exists('output'):
263
+ os.makedirs('output')
264
+
265
+ for seq in sequences:
266
+ mot_tracker = Sort() #create instance of the SORT tracker
267
+ seq_dets = np.loadtxt('data/%s/det.txt'%(seq),delimiter=',') #load detections
268
+ with open('output/%s.txt'%(seq),'w') as out_file:
269
+ print("Processing %s."%(seq))
270
+ for frame in range(int(seq_dets[:,0].max())):
271
+ frame += 1 #detection and frame numbers begin at 1
272
+ dets = seq_dets[seq_dets[:,0]==frame,2:7]
273
+ dets[:,2:4] += dets[:,0:2] #convert to [x1,y1,w,h] to [x1,y1,x2,y2]
274
+ total_frames += 1
275
+
276
+ if(display):
277
+ ax1 = fig.add_subplot(111, aspect='equal')
278
+ fn = 'mot_benchmark/%s/%s/img1/%06d.jpg'%(phase,seq,frame)
279
+ im =io.imread(fn)
280
+ ax1.imshow(im)
281
+ plt.title(seq+' Tracked Targets')
282
+
283
+ start_time = time.time()
284
+ trackers = mot_tracker.update(dets)
285
+ cycle_time = time.time() - start_time
286
+ total_time += cycle_time
287
+
288
+ for d in trackers:
289
+ print('%d,%d,%.2f,%.2f,%.2f,%.2f,1,-1,-1,-1'%(frame,d[4],d[0],d[1],d[2]-d[0],d[3]-d[1]),file=out_file)
290
+ if(display):
291
+ d = d.astype(np.int32)
292
+ ax1.add_patch(patches.Rectangle((d[0],d[1]),d[2]-d[0],d[3]-d[1],fill=False,lw=3,ec=colours[d[4]%32,:]))
293
+ ax1.set_adjustable('box-forced')
294
+
295
+ if(display):
296
+ fig.canvas.flush_events()
297
+ plt.draw()
298
+ ax1.cla()
299
+
300
+ print("Total Tracking took: %.3f for %d frames or %.1f FPS"%(total_time,total_frames,total_frames/total_time))
301
+ if(display):
302
+ print("Note: to get real runtime results run without the option: --display")
303
+
304
+
305
+
sort.py.old ADDED
@@ -0,0 +1,317 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ SORT: A Simple, Online and Realtime Tracker
3
+ Copyright (C) 2016 Alex Bewley [email protected]
4
+
5
+ This program is free software: you can redistribute it and/or modify
6
+ it under the terms of the GNU General Public License as published by
7
+ the Free Software Foundation, either version 3 of the License, or
8
+ (at your option) any later version.
9
+
10
+ This program is distributed in the hope that it will be useful,
11
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
12
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13
+ GNU General Public License for more details.
14
+
15
+ You should have received a copy of the GNU General Public License
16
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
17
+ """
18
+ from __future__ import print_function
19
+
20
+ from numba import jit
21
+ import os.path
22
+ import numpy as np
23
+ ##import matplotlib.pyplot as plt
24
+ ##import matplotlib.patches as patches
25
+ from skimage import io
26
+ # from scipy.optimize import linear_sum_assignment as linear_assignment
27
+ import glob
28
+ import time
29
+ import argparse
30
+ from filterpy.kalman import KalmanFilter
31
+
32
+ # -------- Fixes the Warning ----------------------
33
+ # def linear_assignment(cost_matrix):
34
+ # try:
35
+ # import lap
36
+ # _, x, y = lap.lapjv(cost_matrix, extend_cost=True)
37
+ # return np.array([[y[i], i] for i in x if i >= 0])
38
+ # except ImportError:
39
+ # from scipy.optimize import linear_sum_assignment
40
+ # x, y = linear_sum_assignment(cost_matrix)
41
+ # return np.array(list(zip(x, y)))
42
+
43
+ # --------------- Fixes the Error
44
+ from scipy.optimize import linear_sum_assignment
45
+ def linear_assignment(x):
46
+ indices = linear_sum_assignment(x)
47
+ indices = np.asarray(indices)
48
+ return np.transpose(indices)
49
+
50
+ @jit
51
+ def iou(bb_test,bb_gt):
52
+ """
53
+ Computes IUO between two bboxes in the form [x1,y1,x2,y2]
54
+ """
55
+ xx1 = np.maximum(bb_test[0], bb_gt[0])
56
+ yy1 = np.maximum(bb_test[1], bb_gt[1])
57
+ xx2 = np.minimum(bb_test[2], bb_gt[2])
58
+ yy2 = np.minimum(bb_test[3], bb_gt[3])
59
+ w = np.maximum(0., xx2 - xx1)
60
+ h = np.maximum(0., yy2 - yy1)
61
+ wh = w * h
62
+ o = wh / ((bb_test[2]-bb_test[0])*(bb_test[3]-bb_test[1])
63
+ + (bb_gt[2]-bb_gt[0])*(bb_gt[3]-bb_gt[1]) - wh)
64
+ return(o)
65
+
66
+ def convert_bbox_to_z(bbox):
67
+ """
68
+ Takes a bounding box in the form [x1,y1,x2,y2] and returns z in the form
69
+ [x,y,s,r] where x,y is the centre of the box and s is the scale/area and r is
70
+ the aspect ratio
71
+ """
72
+ w = bbox[2]-bbox[0]
73
+ h = bbox[3]-bbox[1]
74
+ x = bbox[0]+w/2.
75
+ y = bbox[1]+h/2.
76
+ s = w*h #scale is just area
77
+ r = w/float(h)
78
+ return np.array([x,y,s,r]).reshape((4,1))
79
+
80
+ def convert_x_to_bbox(x,score=None):
81
+ """
82
+ Takes a bounding box in the centre form [x,y,s,r] and returns it in the form
83
+ [x1,y1,x2,y2] where x1,y1 is the top left and x2,y2 is the bottom right
84
+ """
85
+ w = np.sqrt(x[2]*x[3])
86
+ h = x[2]/w
87
+ if(score==None):
88
+ return np.array([x[0]-w/2.,x[1]-h/2.,x[0]+w/2.,x[1]+h/2.]).reshape((1,4))
89
+ else:
90
+ return np.array([x[0]-w/2.,x[1]-h/2.,x[0]+w/2.,x[1]+h/2.,score]).reshape((1,5))
91
+
92
+
93
+ class KalmanBoxTracker(object):
94
+ """
95
+ This class represents the internel state of individual tracked objects observed as bbox.
96
+ """
97
+ count = 0
98
+ def __init__(self,bbox):
99
+ """
100
+ Initialises a tracker using initial bounding box.
101
+ """
102
+ #define constant velocity model
103
+ self.kf = KalmanFilter(dim_x=7, dim_z=4)
104
+ self.kf.F = np.array([[1,0,0,0,1,0,0],[0,1,0,0,0,1,0],[0,0,1,0,0,0,1],[0,0,0,1,0,0,0], [0,0,0,0,1,0,0],[0,0,0,0,0,1,0],[0,0,0,0,0,0,1]])
105
+ self.kf.H = np.array([[1,0,0,0,0,0,0],[0,1,0,0,0,0,0],[0,0,1,0,0,0,0],[0,0,0,1,0,0,0]])
106
+
107
+ self.kf.R[2:,2:] *= 10.
108
+ self.kf.P[4:,4:] *= 1000. #give high uncertainty to the unobservable initial velocities
109
+ self.kf.P *= 10.
110
+ self.kf.Q[-1,-1] *= 0.01
111
+ self.kf.Q[4:,4:] *= 0.01
112
+
113
+ self.kf.x[:4] = convert_bbox_to_z(bbox)
114
+ self.time_since_update = 0
115
+ self.id = KalmanBoxTracker.count
116
+ KalmanBoxTracker.count += 1
117
+ self.history = []
118
+ self.hits = 0
119
+ self.hit_streak = 0
120
+ self.age = 0
121
+ self.objclass = bbox[6]
122
+
123
+ def update(self,bbox):
124
+ """
125
+ Updates the state vector with observed bbox.
126
+ """
127
+ self.time_since_update = 0
128
+ self.history = []
129
+ self.hits += 1
130
+ self.hit_streak += 1
131
+ self.kf.update(convert_bbox_to_z(bbox))
132
+
133
+ def predict(self):
134
+ """
135
+ Advances the state vector and returns the predicted bounding box estimate.
136
+ """
137
+ if((self.kf.x[6]+self.kf.x[2])<=0):
138
+ self.kf.x[6] *= 0.0
139
+ self.kf.predict()
140
+ self.age += 1
141
+ if(self.time_since_update>0):
142
+ self.hit_streak = 0
143
+ self.time_since_update += 1
144
+ self.history.append(convert_x_to_bbox(self.kf.x))
145
+ return self.history[-1]
146
+
147
+ def get_state(self):
148
+ """
149
+ Returns the current bounding box estimate.
150
+ """
151
+ return convert_x_to_bbox(self.kf.x)
152
+
153
+ def associate_detections_to_trackers(detections,trackers,iou_threshold = 0.3):
154
+ """
155
+ Assigns detections to tracked object (both represented as bounding boxes)
156
+
157
+ Returns 3 lists of matches, unmatched_detections and unmatched_trackers
158
+ """
159
+ if(len(trackers)==0):
160
+ return np.empty((0,2),dtype=int), np.arange(len(detections)), np.empty((0,5),dtype=int)
161
+ iou_matrix = np.zeros((len(detections),len(trackers)),dtype=np.float32)
162
+
163
+ for d,det in enumerate(detections):
164
+ for t,trk in enumerate(trackers):
165
+ iou_matrix[d,t] = iou(det,trk)
166
+ matched_indices = linear_assignment(-iou_matrix)
167
+
168
+ unmatched_detections = []
169
+ for d,det in enumerate(detections):
170
+ if(d not in matched_indices[:,0]):
171
+ unmatched_detections.append(d)
172
+ unmatched_trackers = []
173
+ for t,trk in enumerate(trackers):
174
+ if(t not in matched_indices[:,1]):
175
+ unmatched_trackers.append(t)
176
+
177
+ #filter out matched with low IOU
178
+ matches = []
179
+ for m in matched_indices:
180
+ if(iou_matrix[m[0],m[1]]<iou_threshold):
181
+ unmatched_detections.append(m[0])
182
+ unmatched_trackers.append(m[1])
183
+ else:
184
+ matches.append(m.reshape(1,2))
185
+ if(len(matches)==0):
186
+ matches = np.empty((0,2),dtype=int)
187
+ else:
188
+ matches = np.concatenate(matches,axis=0)
189
+
190
+ return matches, np.array(unmatched_detections), np.array(unmatched_trackers)
191
+
192
+
193
+
194
+ class Sort(object):
195
+ def __init__(self,max_age=1,min_hits=3):
196
+ """
197
+ Sets key parameters for SORT
198
+ """
199
+ self.max_age = max_age
200
+ self.min_hits = min_hits
201
+ self.trackers = []
202
+ self.frame_count = 0
203
+
204
+ def update(self,dets):
205
+ """
206
+ Params:
207
+ dets - a numpy array of detections in the format [[x1,y1,x2,y2,score],[x1,y1,x2,y2,score],...]
208
+ Requires: this method must be called once for each frame even with empty detections.
209
+ Returns the a similar array, where the last column is the object ID.
210
+
211
+ NOTE: The number of objects returned may differ from the number of detections provided.
212
+ """
213
+ self.frame_count += 1
214
+ #get predicted locations from existing trackers.
215
+ trks = np.zeros((len(self.trackers),5))
216
+ to_del = []
217
+ ret = []
218
+ for t,trk in enumerate(trks):
219
+ pos = self.trackers[t].predict()[0]
220
+ trk[:] = [pos[0], pos[1], pos[2], pos[3], 0]
221
+ if(np.any(np.isnan(pos))):
222
+ to_del.append(t)
223
+ trks = np.ma.compress_rows(np.ma.masked_invalid(trks))
224
+ for t in reversed(to_del):
225
+ self.trackers.pop(t)
226
+ matched, unmatched_dets, unmatched_trks = associate_detections_to_trackers(dets,trks)
227
+
228
+ #update matched trackers with assigned detections
229
+ for t,trk in enumerate(self.trackers):
230
+ if(t not in unmatched_trks):
231
+ d = matched[np.where(matched[:,1]==t)[0],0]
232
+ trk.update(dets[d,:][0])
233
+
234
+ #create and initialise new trackers for unmatched detections
235
+ for i in unmatched_dets:
236
+ trk = KalmanBoxTracker(dets[i,:])
237
+ self.trackers.append(trk)
238
+ i = len(self.trackers)
239
+ for trk in reversed(self.trackers):
240
+ d = trk.get_state()[0]
241
+ if((trk.time_since_update < 1) and (trk.hit_streak >= self.min_hits or self.frame_count <= self.min_hits)):
242
+ ret.append(np.concatenate((d,[trk.id+1], [trk.objclass])).reshape(1,-1)) # +1 as MOT benchmark requires positive
243
+ i -= 1
244
+ #remove dead tracklet
245
+ if(trk.time_since_update > self.max_age):
246
+ self.trackers.pop(i)
247
+ if(len(ret)>0):
248
+ return np.concatenate(ret)
249
+ return np.empty((0,5))
250
+
251
+ def parse_args():
252
+ """Parse input arguments."""
253
+ parser = argparse.ArgumentParser(description='SORT demo')
254
+ parser.add_argument('--display', dest='display', help='Display online tracker output (slow) [False]',action='store_true')
255
+ args = parser.parse_args()
256
+ return args
257
+
258
+ if __name__ == '__main__':
259
+ # all train
260
+ sequences = ['PETS09-S2L1','TUD-Campus','TUD-Stadtmitte','ETH-Bahnhof','ETH-Sunnyday','ETH-Pedcross2','KITTI-13','KITTI-17','ADL-Rundle-6','ADL-Rundle-8','Venice-2']
261
+ args = parse_args()
262
+ display = args.display
263
+ phase = 'train'
264
+ total_time = 0.0
265
+ total_frames = 0
266
+ colours = np.random.rand(32,3) #used only for display
267
+ if(display):
268
+ if not os.path.exists('mot_benchmark'):
269
+ print('\n\tERROR: mot_benchmark link not found!\n\n Create a symbolic link to the MOT benchmark\n (https://motchallenge.net/data/2D_MOT_2015/#download). E.g.:\n\n $ ln -s /path/to/MOT2015_challenge/2DMOT2015 mot_benchmark\n\n')
270
+ exit()
271
+ plt.ion()
272
+ fig = plt.figure()
273
+
274
+ if not os.path.exists('output'):
275
+ os.makedirs('output')
276
+
277
+ for seq in sequences:
278
+ mot_tracker = Sort() #create instance of the SORT tracker
279
+ seq_dets = np.loadtxt('data/%s/det.txt'%(seq),delimiter=',') #load detections
280
+ with open('output/%s.txt'%(seq),'w') as out_file:
281
+ print("Processing %s."%(seq))
282
+ for frame in range(int(seq_dets[:,0].max())):
283
+ frame += 1 #detection and frame numbers begin at 1
284
+ dets = seq_dets[seq_dets[:,0]==frame,2:7]
285
+ dets[:,2:4] += dets[:,0:2] #convert to [x1,y1,w,h] to [x1,y1,x2,y2]
286
+ total_frames += 1
287
+
288
+ if(display):
289
+ ax1 = fig.add_subplot(111, aspect='equal')
290
+ fn = 'mot_benchmark/%s/%s/img1/%06d.jpg'%(phase,seq,frame)
291
+ im =io.imread(fn)
292
+ ax1.imshow(im)
293
+ plt.title(seq+' Tracked Targets')
294
+
295
+ start_time = time.time()
296
+ trackers = mot_tracker.update(dets)
297
+ cycle_time = time.time() - start_time
298
+ total_time += cycle_time
299
+
300
+ for d in trackers:
301
+ print('%d,%d,%.2f,%.2f,%.2f,%.2f,1,-1,-1,-1'%(frame,d[4],d[0],d[1],d[2]-d[0],d[3]-d[1]),file=out_file)
302
+ if(display):
303
+ d = d.astype(np.int32)
304
+ ax1.add_patch(patches.Rectangle((d[0],d[1]),d[2]-d[0],d[3]-d[1],fill=False,lw=3,ec=colours[d[4]%32,:]))
305
+ ax1.set_adjustable('box-forced')
306
+
307
+ if(display):
308
+ fig.canvas.flush_events()
309
+ plt.draw()
310
+ ax1.cla()
311
+
312
+ print("Total Tracking took: %.3f for %d frames or %.1f FPS"%(total_time,total_frames,total_frames/total_time))
313
+ if(display):
314
+ print("Note: to get real runtime results run without the option: --display")
315
+
316
+
317
+
utils/__pycache__/__init__.cpython-36.pyc ADDED
Binary file (125 Bytes). View file
 
utils/__pycache__/datasets.cpython-36.pyc ADDED
Binary file (3.65 kB). View file
 
utils/__pycache__/parse_config.cpython-36.pyc ADDED
Binary file (1.38 kB). View file
 
utils/__pycache__/parse_config.cpython-37.pyc ADDED
Binary file (1.43 kB). View file
 
utils/__pycache__/utils.cpython-36.pyc ADDED
Binary file (7.05 kB). View file
 
utils/__pycache__/utils.cpython-37.pyc ADDED
Binary file (7.1 kB). View file
 
utils/datasets.py ADDED
@@ -0,0 +1,121 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import glob
2
+ import random
3
+ import os
4
+ import numpy as np
5
+
6
+ import torch
7
+
8
+ from torch.utils.data import Dataset
9
+ from PIL import Image
10
+ import torchvision.transforms as transforms
11
+
12
+ ##import matplotlib.pyplot as plt
13
+ ##import matplotlib.patches as patches
14
+
15
+ from skimage.transform import resize
16
+
17
+ import sys
18
+
19
+ class ImageFolder(Dataset):
20
+ def __init__(self, folder_path, img_size=416):
21
+ self.files = sorted(glob.glob('%s/*.*' % folder_path))
22
+ self.img_shape = (img_size, img_size)
23
+
24
+ def __getitem__(self, index):
25
+ img_path = self.files[index % len(self.files)]
26
+ # Extract image
27
+ img = np.array(Image.open(img_path))
28
+ h, w, _ = img.shape
29
+ dim_diff = np.abs(h - w)
30
+ # Upper (left) and lower (right) padding
31
+ pad1, pad2 = dim_diff // 2, dim_diff - dim_diff // 2
32
+ # Determine padding
33
+ pad = ((pad1, pad2), (0, 0), (0, 0)) if h <= w else ((0, 0), (pad1, pad2), (0, 0))
34
+ # Add padding
35
+ input_img = np.pad(img, pad, 'constant', constant_values=127.5) / 255.
36
+ # Resize and normalize
37
+ input_img = resize(input_img, (*self.img_shape, 3), mode='reflect')
38
+ # Channels-first
39
+ input_img = np.transpose(input_img, (2, 0, 1))
40
+ # As pytorch tensor
41
+ input_img = torch.from_numpy(input_img).float()
42
+
43
+ return img_path, input_img
44
+
45
+ def __len__(self):
46
+ return len(self.files)
47
+
48
+
49
+ class ListDataset(Dataset):
50
+ def __init__(self, list_path, img_size=416):
51
+ with open(list_path, 'r') as file:
52
+ self.img_files = file.readlines()
53
+ self.label_files = [path.replace('images', 'labels').replace('.png', '.txt').replace('.jpg', '.txt') for path in self.img_files]
54
+ self.img_shape = (img_size, img_size)
55
+ self.max_objects = 50
56
+
57
+ def __getitem__(self, index):
58
+
59
+ #---------
60
+ # Image
61
+ #---------
62
+
63
+ img_path = self.img_files[index % len(self.img_files)].rstrip()
64
+ img = np.array(Image.open(img_path))
65
+
66
+ # Handles images with less than three channels
67
+ while len(img.shape) != 3:
68
+ index += 1
69
+ img_path = self.img_files[index % len(self.img_files)].rstrip()
70
+ img = np.array(Image.open(img_path))
71
+
72
+ h, w, _ = img.shape
73
+ dim_diff = np.abs(h - w)
74
+ # Upper (left) and lower (right) padding
75
+ pad1, pad2 = dim_diff // 2, dim_diff - dim_diff // 2
76
+ # Determine padding
77
+ pad = ((pad1, pad2), (0, 0), (0, 0)) if h <= w else ((0, 0), (pad1, pad2), (0, 0))
78
+ # Add padding
79
+ input_img = np.pad(img, pad, 'constant', constant_values=128) / 255.
80
+ padded_h, padded_w, _ = input_img.shape
81
+ # Resize and normalize
82
+ input_img = resize(input_img, (*self.img_shape, 3), mode='reflect')
83
+ # Channels-first
84
+ input_img = np.transpose(input_img, (2, 0, 1))
85
+ # As pytorch tensor
86
+ input_img = torch.from_numpy(input_img).float()
87
+
88
+ #---------
89
+ # Label
90
+ #---------
91
+
92
+ label_path = self.label_files[index % len(self.img_files)].rstrip()
93
+
94
+ labels = None
95
+ if os.path.exists(label_path):
96
+ labels = np.loadtxt(label_path).reshape(-1, 5)
97
+ # Extract coordinates for unpadded + unscaled image
98
+ x1 = w * (labels[:, 1] - labels[:, 3]/2)
99
+ y1 = h * (labels[:, 2] - labels[:, 4]/2)
100
+ x2 = w * (labels[:, 1] + labels[:, 3]/2)
101
+ y2 = h * (labels[:, 2] + labels[:, 4]/2)
102
+ # Adjust for added padding
103
+ x1 += pad[1][0]
104
+ y1 += pad[0][0]
105
+ x2 += pad[1][0]
106
+ y2 += pad[0][0]
107
+ # Calculate ratios from coordinates
108
+ labels[:, 1] = ((x1 + x2) / 2) / padded_w
109
+ labels[:, 2] = ((y1 + y2) / 2) / padded_h
110
+ labels[:, 3] *= w / padded_w
111
+ labels[:, 4] *= h / padded_h
112
+ # Fill matrix
113
+ filled_labels = np.zeros((self.max_objects, 5))
114
+ if labels is not None:
115
+ filled_labels[range(len(labels))[:self.max_objects]] = labels[:self.max_objects]
116
+ filled_labels = torch.from_numpy(filled_labels)
117
+
118
+ return img_path, input_img, filled_labels
119
+
120
+ def __len__(self):
121
+ return len(self.img_files)
utils/parse_config.py ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+
3
+ def parse_model_config(path):
4
+ """Parses the yolo-v3 layer configuration file and returns module definitions"""
5
+ file = open(path, 'r')
6
+ lines = file.read().split('\n')
7
+ lines = [x for x in lines if x and not x.startswith('#')]
8
+ lines = [x.rstrip().lstrip() for x in lines] # get rid of fringe whitespaces
9
+ module_defs = []
10
+ for line in lines:
11
+ if line.startswith('['): # This marks the start of a new block
12
+ module_defs.append({})
13
+ module_defs[-1]['type'] = line[1:-1].rstrip()
14
+ if module_defs[-1]['type'] == 'convolutional':
15
+ module_defs[-1]['batch_normalize'] = 0
16
+ else:
17
+ key, value = line.split("=")
18
+ value = value.strip()
19
+ module_defs[-1][key.rstrip()] = value.strip()
20
+
21
+ return module_defs
22
+
23
+ def parse_data_config(path):
24
+ """Parses the data configuration file"""
25
+ options = dict()
26
+ options['gpus'] = '0,1,2,3'
27
+ options['num_workers'] = '10'
28
+ with open(path, 'r') as fp:
29
+ lines = fp.readlines()
30
+ for line in lines:
31
+ line = line.strip()
32
+ if line == '' or line.startswith('#'):
33
+ continue
34
+ key, value = line.split('=')
35
+ options[key.strip()] = value.strip()
36
+ return options
utils/utils.py ADDED
@@ -0,0 +1,258 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import division
2
+ import math
3
+ import time
4
+ import torch
5
+ import torch.nn as nn
6
+ import torch.nn.functional as F
7
+ from torch.autograd import Variable
8
+ import numpy as np
9
+
10
+ #import matplotlib.pyplot as plt
11
+ #import matplotlib.patches as patches
12
+
13
+
14
+ def load_classes(path):
15
+ """
16
+ Loads class labels at 'path'
17
+ """
18
+ fp = open(path, "r")
19
+ names = fp.read().split("\n")[:-1]
20
+ return names
21
+
22
+
23
+ def weights_init_normal(m):
24
+ classname = m.__class__.__name__
25
+ if classname.find("Conv") != -1:
26
+ torch.nn.init.normal_(m.weight.data, 0.0, 0.02)
27
+ elif classname.find("BatchNorm2d") != -1:
28
+ torch.nn.init.normal_(m.weight.data, 1.0, 0.02)
29
+ torch.nn.init.constant_(m.bias.data, 0.0)
30
+
31
+
32
+ def compute_ap(recall, precision):
33
+ """ Compute the average precision, given the recall and precision curves.
34
+ Code originally from https://github.com/rbgirshick/py-faster-rcnn.
35
+
36
+ # Arguments
37
+ recall: The recall curve (list).
38
+ precision: The precision curve (list).
39
+ # Returns
40
+ The average precision as computed in py-faster-rcnn.
41
+ """
42
+ # correct AP calculation
43
+ # first append sentinel values at the end
44
+ mrec = np.concatenate(([0.0], recall, [1.0]))
45
+ mpre = np.concatenate(([0.0], precision, [0.0]))
46
+
47
+ # compute the precision envelope
48
+ for i in range(mpre.size - 1, 0, -1):
49
+ mpre[i - 1] = np.maximum(mpre[i - 1], mpre[i])
50
+
51
+ # to calculate area under PR curve, look for points
52
+ # where X axis (recall) changes value
53
+ i = np.where(mrec[1:] != mrec[:-1])[0]
54
+
55
+ # and sum (\Delta recall) * prec
56
+ ap = np.sum((mrec[i + 1] - mrec[i]) * mpre[i + 1])
57
+ return ap
58
+
59
+
60
+ def bbox_iou(box1, box2, x1y1x2y2=True):
61
+ """
62
+ Returns the IoU of two bounding boxes
63
+ """
64
+ if not x1y1x2y2:
65
+ # Transform from center and width to exact coordinates
66
+ b1_x1, b1_x2 = box1[:, 0] - box1[:, 2] / 2, box1[:, 0] + box1[:, 2] / 2
67
+ b1_y1, b1_y2 = box1[:, 1] - box1[:, 3] / 2, box1[:, 1] + box1[:, 3] / 2
68
+ b2_x1, b2_x2 = box2[:, 0] - box2[:, 2] / 2, box2[:, 0] + box2[:, 2] / 2
69
+ b2_y1, b2_y2 = box2[:, 1] - box2[:, 3] / 2, box2[:, 1] + box2[:, 3] / 2
70
+ else:
71
+ # Get the coordinates of bounding boxes
72
+ b1_x1, b1_y1, b1_x2, b1_y2 = box1[:, 0], box1[:, 1], box1[:, 2], box1[:, 3]
73
+ b2_x1, b2_y1, b2_x2, b2_y2 = box2[:, 0], box2[:, 1], box2[:, 2], box2[:, 3]
74
+
75
+ # get the corrdinates of the intersection rectangle
76
+ inter_rect_x1 = torch.max(b1_x1, b2_x1)
77
+ inter_rect_y1 = torch.max(b1_y1, b2_y1)
78
+ inter_rect_x2 = torch.min(b1_x2, b2_x2)
79
+ inter_rect_y2 = torch.min(b1_y2, b2_y2)
80
+ # Intersection area
81
+ inter_area = torch.clamp(inter_rect_x2 - inter_rect_x1 + 1, min=0) * torch.clamp(
82
+ inter_rect_y2 - inter_rect_y1 + 1, min=0
83
+ )
84
+ # Union Area
85
+ b1_area = (b1_x2 - b1_x1 + 1) * (b1_y2 - b1_y1 + 1)
86
+ b2_area = (b2_x2 - b2_x1 + 1) * (b2_y2 - b2_y1 + 1)
87
+
88
+ iou = inter_area / (b1_area + b2_area - inter_area + 1e-16)
89
+
90
+ return iou
91
+
92
+
93
+ def bbox_iou_numpy(box1, box2):
94
+ """Computes IoU between bounding boxes.
95
+ Parameters
96
+ ----------
97
+ box1 : ndarray
98
+ (N, 4) shaped array with bboxes
99
+ box2 : ndarray
100
+ (M, 4) shaped array with bboxes
101
+ Returns
102
+ -------
103
+ : ndarray
104
+ (N, M) shaped array with IoUs
105
+ """
106
+ area = (box2[:, 2] - box2[:, 0]) * (box2[:, 3] - box2[:, 1])
107
+
108
+ iw = np.minimum(np.expand_dims(box1[:, 2], axis=1), box2[:, 2]) - np.maximum(
109
+ np.expand_dims(box1[:, 0], 1), box2[:, 0]
110
+ )
111
+ ih = np.minimum(np.expand_dims(box1[:, 3], axis=1), box2[:, 3]) - np.maximum(
112
+ np.expand_dims(box1[:, 1], 1), box2[:, 1]
113
+ )
114
+
115
+ iw = np.maximum(iw, 0)
116
+ ih = np.maximum(ih, 0)
117
+
118
+ ua = np.expand_dims((box1[:, 2] - box1[:, 0]) * (box1[:, 3] - box1[:, 1]), axis=1) + area - iw * ih
119
+
120
+ ua = np.maximum(ua, np.finfo(float).eps)
121
+
122
+ intersection = iw * ih
123
+
124
+ return intersection / ua
125
+
126
+
127
+ def non_max_suppression(prediction, num_classes, conf_thres=0.5, nms_thres=0.4):
128
+ """
129
+ Removes detections with lower object confidence score than 'conf_thres' and performs
130
+ Non-Maximum Suppression to further filter detections.
131
+ Returns detections with shape:
132
+ (x1, y1, x2, y2, object_conf, class_score, class_pred)
133
+ """
134
+
135
+ # From (center x, center y, width, height) to (x1, y1, x2, y2)
136
+ box_corner = prediction.new(prediction.shape)
137
+ box_corner[:, :, 0] = prediction[:, :, 0] - prediction[:, :, 2] / 2
138
+ box_corner[:, :, 1] = prediction[:, :, 1] - prediction[:, :, 3] / 2
139
+ box_corner[:, :, 2] = prediction[:, :, 0] + prediction[:, :, 2] / 2
140
+ box_corner[:, :, 3] = prediction[:, :, 1] + prediction[:, :, 3] / 2
141
+ prediction[:, :, :4] = box_corner[:, :, :4]
142
+
143
+ output = [None for _ in range(len(prediction))]
144
+ for image_i, image_pred in enumerate(prediction):
145
+ # Filter out confidence scores below threshold
146
+ conf_mask = (image_pred[:, 4] >= conf_thres).squeeze()
147
+ image_pred = image_pred[conf_mask]
148
+ # If none are remaining => process next image
149
+ if not image_pred.size(0):
150
+ continue
151
+ # Get score and class with highest confidence
152
+ class_conf, class_pred = torch.max(image_pred[:, 5 : 5 + num_classes], 1, keepdim=True)
153
+ # Detections ordered as (x1, y1, x2, y2, obj_conf, class_conf, class_pred)
154
+ detections = torch.cat((image_pred[:, :5], class_conf.float(), class_pred.float()), 1)
155
+ # Iterate through all predicted classes
156
+ unique_labels = detections[:, -1].cpu().unique()
157
+ if prediction.is_cuda:
158
+ unique_labels = unique_labels.cuda()
159
+ for c in unique_labels:
160
+ # Get the detections with the particular class
161
+ detections_class = detections[detections[:, -1] == c]
162
+ # Sort the detections by maximum objectness confidence
163
+ _, conf_sort_index = torch.sort(detections_class[:, 4], descending=True)
164
+ detections_class = detections_class[conf_sort_index]
165
+ # Perform non-maximum suppression
166
+ max_detections = []
167
+ while detections_class.size(0):
168
+ # Get detection with highest confidence and save as max detection
169
+ max_detections.append(detections_class[0].unsqueeze(0))
170
+ # Stop if we're at the last detection
171
+ if len(detections_class) == 1:
172
+ break
173
+ # Get the IOUs for all boxes with lower confidence
174
+ ious = bbox_iou(max_detections[-1], detections_class[1:])
175
+ # Remove detections with IoU >= NMS threshold
176
+ detections_class = detections_class[1:][ious < nms_thres]
177
+
178
+ max_detections = torch.cat(max_detections).data
179
+ # Add max detections to outputs
180
+ output[image_i] = (
181
+ max_detections if output[image_i] is None else torch.cat((output[image_i], max_detections))
182
+ )
183
+
184
+ return output
185
+
186
+
187
+ def build_targets(
188
+ pred_boxes, pred_conf, pred_cls, target, anchors, num_anchors, num_classes, grid_size, ignore_thres, img_dim
189
+ ):
190
+ nB = target.size(0)
191
+ nA = num_anchors
192
+ nC = num_classes
193
+ nG = grid_size
194
+ mask = torch.zeros(nB, nA, nG, nG)
195
+ conf_mask = torch.ones(nB, nA, nG, nG)
196
+ tx = torch.zeros(nB, nA, nG, nG)
197
+ ty = torch.zeros(nB, nA, nG, nG)
198
+ tw = torch.zeros(nB, nA, nG, nG)
199
+ th = torch.zeros(nB, nA, nG, nG)
200
+ tconf = torch.ByteTensor(nB, nA, nG, nG).fill_(0)
201
+ tcls = torch.ByteTensor(nB, nA, nG, nG, nC).fill_(0)
202
+
203
+ nGT = 0
204
+ nCorrect = 0
205
+ for b in range(nB):
206
+ for t in range(target.shape[1]):
207
+ if target[b, t].sum() == 0:
208
+ continue
209
+ nGT += 1
210
+ # Convert to position relative to box
211
+ gx = target[b, t, 1] * nG
212
+ gy = target[b, t, 2] * nG
213
+ gw = target[b, t, 3] * nG
214
+ gh = target[b, t, 4] * nG
215
+ # Get grid box indices
216
+ gi = int(gx)
217
+ gj = int(gy)
218
+ # Get shape of gt box
219
+ gt_box = torch.FloatTensor(np.array([0, 0, gw, gh])).unsqueeze(0)
220
+ # Get shape of anchor box
221
+ anchor_shapes = torch.FloatTensor(np.concatenate((np.zeros((len(anchors), 2)), np.array(anchors)), 1))
222
+ # Calculate iou between gt and anchor shapes
223
+ anch_ious = bbox_iou(gt_box, anchor_shapes)
224
+ # Where the overlap is larger than threshold set mask to zero (ignore)
225
+ conf_mask[b, anch_ious > ignore_thres, gj, gi] = 0
226
+ # Find the best matching anchor box
227
+ best_n = np.argmax(anch_ious)
228
+ # Get ground truth box
229
+ gt_box = torch.FloatTensor(np.array([gx, gy, gw, gh])).unsqueeze(0)
230
+ # Get the best prediction
231
+ pred_box = pred_boxes[b, best_n, gj, gi].unsqueeze(0)
232
+ # Masks
233
+ mask[b, best_n, gj, gi] = 1
234
+ conf_mask[b, best_n, gj, gi] = 1
235
+ # Coordinates
236
+ tx[b, best_n, gj, gi] = gx - gi
237
+ ty[b, best_n, gj, gi] = gy - gj
238
+ # Width and height
239
+ tw[b, best_n, gj, gi] = math.log(gw / anchors[best_n][0] + 1e-16)
240
+ th[b, best_n, gj, gi] = math.log(gh / anchors[best_n][1] + 1e-16)
241
+ # One-hot encoding of label
242
+ target_label = int(target[b, t, 0])
243
+ tcls[b, best_n, gj, gi, target_label] = 1
244
+ tconf[b, best_n, gj, gi] = 1
245
+
246
+ # Calculate iou between ground truth and best matching prediction
247
+ iou = bbox_iou(gt_box, pred_box, x1y1x2y2=False)
248
+ pred_label = torch.argmax(pred_cls[b, best_n, gj, gi])
249
+ score = pred_conf[b, best_n, gj, gi]
250
+ if iou > 0.5 and pred_label == target_label and score > 0.5:
251
+ nCorrect += 1
252
+
253
+ return nGT, nCorrect, mask, conf_mask, tx, ty, tw, th, tconf, tcls
254
+
255
+
256
+ def to_categorical(y, num_classes):
257
+ """ 1-hot encodes a tensor """
258
+ return torch.from_numpy(np.eye(num_classes, dtype="uint8")[y])