Spaces:

schibsted-presplit
/

Facial_Recognition_with_Sentiment_Detector

Running

+# Facial Recognition with Emotion / Sentiment Detector
+# This is a custom, hard-coded version of darknet with
+# YOLOv3 implementation for openimages database. This
+# was written to test viability of implementing YOLO
+# for face detection followed by emotion / sentiment
+# analysis.
+#
+# Configuration, weights and data are hardcoded.
+# This version takes any images, detects faces,
+# and then runs emotion / sentiment analysis
+#
+# Author    : Saikiran Tharimena
+# Co-Authors: Kjetil Marinius Sjulsen, Juan Carlos Calvet Lopez
+# Project   : Emotion / Sentiment Detection from news images
+# Date      : 12 September 2022
+# Version   : v0.1
+#
+# (C) Schibsted ASA
+# Libraries
+import torch
+from utils import *
+import gradio as gr
+from numpy import array
+from darknet import Darknet
+from torch.autograd import Variable
+from torch.cuda import is_available as check_cuda
+from PIL.ImageOps import grayscale
+from fastai.vision.all import PILImage, load_learner
+################## DARKNET ##################
+# Parameters
+batch_size = 1
+confidence = 0.25
+nms_thresh = 0.30
+run_cuda = False
+# CFG Files
+cfg     = 'cfg/yolov3-openimages.cfg'
+clsnames= 'cfg/openimages.names'
+weights = 'cfg/yolov3-openimages.weights'
+# Load classes
+classes = load_classes(clsnames)
+num_classes = len(classes)
+# Set up the neural network
+print('Load Network')
+model = Darknet(cfg)
+print('Load Weights')
+model.load_weights(weights)
+print('Successfully loaded Network')
+# Check CUDA
+if run_cuda:
+    CUDA = check_cuda()
+else:
+    CUDA = False
+# Input dimension
+inp_dim = int(model.net_info["height"])
+# put the model on GPU
+if CUDA:
+    model.cuda()
+# Set the model in evaluation mode
+model.eval()
+def get_detections(x):
+    c1 = [int(y) for y in x[1:3]]
+    c2 = [int(y) for y in x[3:5]]
+    det_class = int(x[-1])
+    label = "{0}".format(classes[det_class])
+    return (label, tuple(c1 + c2))
+# face detector
+def detector(image):
+    # Just lazy to update this
+    imlist = [image]
+    loaded_ims = [image]
+    im_batches = list(map(prep_image, loaded_ims, [inp_dim for x in range(len(imlist))]))
+    im_dim_list = [(x.shape[1], x.shape[0]) for x in loaded_ims]
+    im_dim_list = torch.FloatTensor(im_dim_list).repeat(1,2)
+    leftover = 0
+    if (len(im_dim_list) % batch_size):
+        leftover = 1
+    if batch_size != 1:
+        num_batches = len(imlist) // batch_size + leftover
+        im_batches = [torch.cat((im_batches[i*batch_size : min((i +  1)*batch_size,
+                            len(im_batches))]))  for i in range(num_batches)]
+    write = 0
+    if CUDA:
+        im_dim_list = im_dim_list.cuda()
+    for i, batch in enumerate(im_batches):
+        # load the image
+        if CUDA:
+            batch = batch.cuda()
+        with torch.no_grad():
+            prediction = model(Variable(batch), CUDA)
+        prediction = write_results(prediction, confidence, num_classes, nms_conf = nms_thresh)
+        if type(prediction) == int:
+            for im_num, image in enumerate(imlist[i*batch_size: min((i +  1)*batch_size, len(imlist))]):
+                im_id = i*batch_size + im_num
+            continue
+        prediction[:,0] += i*batch_size    # transform the atribute from index in batch to index in imlist
+        if not write: # If we have't initialised output
+            output = prediction
+            write = 1
+        else:
+            output = torch.cat((output, prediction))
+        for im_num, image in enumerate(imlist[i*batch_size: min((i +  1)*batch_size, len(imlist))]):
+            im_id = i * batch_size + im_num
+            objs = [classes[int(x[-1])] for x in output if int(x[0]) == im_id]
+        if CUDA:
+            torch.cuda.synchronize()
+    try:
+        output
+    except NameError:
+        return None
+    im_dim_list = torch.index_select(im_dim_list, 0, output[:,0].long())
+    scaling_factor = torch.min(608/im_dim_list,1)[0].view(-1,1)
+    output[:, [1,3]] -= (inp_dim - scaling_factor*im_dim_list[:,0].view(-1,1))/2
+    output[:, [2,4]] -= (inp_dim - scaling_factor*im_dim_list[:,1].view(-1,1))/2
+    output[:, 1:5] /= scaling_factor
+    for i in range(output.shape[0]):
+        output[i, [1,3]] = torch.clamp(output[i, [1,3]], 0.0, im_dim_list[i,0])
+        output[i, [2,4]] = torch.clamp(output[i, [2,4]], 0.0, im_dim_list[i,1])
+    detections = list(map(get_detections, output))
+    if CUDA:
+        torch.cuda.empty_cache()
+    return loaded_ims[0], detections
+#############################################
+# Emotion
+learn_emotion = load_learner('models/emotions_vgg19.pkl')
+learn_emotion_labels = learn_emotion.dls.vocab
+# Sentiment
+learn_sentiment = load_learner('models/sentiment_vgg19.pkl')
+learn_sentiment_labels = learn_sentiment.dls.vocab
+def crop_images(img, bbox):
+    "Here image should be an image object from PILImage.create"
+    # Coordinates of face in cv2 format
+    xmin, ymin, xmax, ymax = bbox[1]
+    # resize and crop face
+    return img.crop((xmin, ymin, xmax, ymax))
+def detect_person_face(img, detections):
+    '''This function is called from within detect face.
+    If only a person is detected, then this will crop
+    image and then try to detect face again.'''
+    faces = []
+    # Loop through people
+    for detection in detections:
+        # Get cropped image of person
+        temp = crop_images(img, detection)
+        # run detector again
+        _, detect = detector(array(temp)[...,:3])
+        # check for human faces
+        human_face = [idx for idx, val in enumerate(detect) if val[0] == 'Human face']
+        if len(human_face) == 0:
+            continue
+        # Force it to take only 1 face per person
+        # crop face and append to list
+        faces.append(crop_images(temp, detect[human_face[0]]))
+    return faces
+def detect_face(img):
+    _, detections = detector(array(img)[...,:3])
+    # check for human faces
+    human_face = [idx for idx, val in enumerate(detections) if val[0] == 'Human face']
+    if len(human_face) == 0:
+        human_face = [idx for idx, val in enumerate(detections) if val[0] == 'Person']
+        if len(human_face) == 0:
+            return None
+        else:
+            # Only get human face detections
+            faces = detect_person_face(img, [detections[idx] for idx in human_face])
+    else:
+        # Only get human face detections
+        faces = []
+        for idx in human_face:
+            faces.append(crop_images(img, detections[idx]))
+    return faces
+# Predict
+def predict(img):
+    img = PILImage.create(img)
+    # Detect faces
+    faces = detect_face(img)
+    output = []
+    if len(faces) == 0:
+        img = img.resize(48, 48)
+        pred_emotion, pred_emotion_idx, probs_emotion = learn_emotion.predict(array(grayscale(img)))
+        pred_sentiment, pred_sentiment_idx, probs_sentiment = learn_sentiment.predict(array(grayscale(img)))
+        emotions = {learn_emotion_labels[i]: float(probs_emotion[i]) for i in range(len(learn_emotion_labels))}
+        sentiments = {learn_sentiment_labels[i]: float(probs_sentiment[i]) for i in range(len(learn_sentiment_labels))}
+        output = [img.resize((48, 48)), emotions, sentiments, None, None, None, None, None, None]
+    else: # Max 3 for now
+        for face in faces[:3]:
+            img = face.resize((48, 48))
+            pred_emotion, pred_emotion_idx, probs_emotion = learn_emotion.predict(array(grayscale(img)))
+            pred_sentiment, pred_sentiment_idx, probs_sentiment = learn_sentiment.predict(array(grayscale(img)))
+            emotions = {learn_emotion_labels[i]: float(probs_emotion[i]) for i in range(len(learn_emotion_labels))}
+            sentiments = {learn_sentiment_labels[i]: float(probs_sentiment[i]) for i in range(len(learn_sentiment_labels))}
+            output.append(img)
+            output.append(emotions)
+            output.append(sentiments)
+        temp = output[-3:]
+        while len(output) < 9:
+            output = output + temp
+    return output
+# Gradio
+title = 'Face Recognition with Emotion and Sentiment Detector'
+description = gr.Markdown(
+                """Ever wondered what a person might be feeling looking at their picture?
+                 Well, now you can! Try this fun app. Just upload a facial image in JPG or
+                 PNG format. Voila! you can now see what they might have felt when the picture
+                 was taken.
+                 This is an updated version of Facial Expression Classifier:
+                 https://huggingface.co/spaces/schibsted/facial_expression_classifier
+                 """).value
+article = gr.Markdown(
+             """**DISCLAIMER:** This model does not reveal the actual emotional state of a person. Use and
+             interpret results at your own risk! It was built as a demo for AI course. Samples images
+             were downloaded from VG & AftenPosten news webpages. Copyrights belong to respective
+             brands. All rights reserved.
+             **PREMISE:** The idea is to determine an overall sentiment of a news site on a daily basis
+             based on the pictures. We are restricting pictures to only include close-up facial
+             images.
+             **DATA:** FER2013 dataset consists of 48x48 pixel grayscale images of faces. There are 28,709
+             images in the training set and 3,589 images in the test set. However, for this demo all
+             pictures were combined into a single dataset and 80:20 split was used for training. Images
+             are assigned one of the 7 emotions: Angry, Disgust, Fear, Happy, Sad, Surprise, and Neutral.
+             In addition to these 7 classes, images were re-classified into 3 sentiment categories based
+             on emotions:
+             Positive (Happy, Surprise)
+             Negative (Angry, Disgust, Fear, Sad)
+             Neutral (Neutral)
+             FER2013 (preliminary version) dataset can be downloaded at:
+             https://www.kaggle.com/c/challenges-in-representation-learning-facial-expression-recognition-challenge/data
+             **EMOTION / SENTIMENT MODEL:** VGG19 was used as the base model and trained on FER2013 dataset. Model was trained
+             using PyTorch and FastAI. Two models were trained, one for detecting emotion and the other
+             for detecting sentiment. Although, this could have been done with just one model, here two
+             models were trained for the demo.
+             **FACE DETECTOR:** Darknet with YOLOv3 architecture was used for face detection. Reach out to me for full details.
+             In short, any image is first sent through darknet. If face is detected, then it is passed through emotion/sentiment
+             model for each face in the picture. If a person is detected rather than a face, the image is cropped and run through
+             face detector again. If a face is detected, then it is passed through emotion/sentiment model. In case face is not
+             detected in an image, then the entire image is evaluated to generate some score. This is done because, I couldn't
+             figure out how to pipe None/blank output to Gradio.Interface(). There maybe option through Gradio.Blocks() but was
+             too lazy to go through that at this stage. In addition, the output is restricted to only 3 faces in a picture.
+             """).value
+enable_queue=True
+examples = ['happy1.jpg', 'happy2.jpg', 'angry1.png', 'angry2.jpg', 'neutral1.jpg', 'neutral2.jpg']
+gr.Interface(fn = predict,
+             inputs = gr.Image(),
+             outputs = [gr.Image(shape=(24, 24), label='Person 1'),
+                        gr.Label(label='Emotion - Person 1'),
+                        gr.Label(label='Sentiment - Person 1'),
+                        gr.Image(shape=(24, 24), label='Person 2'),
+                        gr.Label(label='Emotion - Person 2'),
+                        gr.Label(label='Sentiment - Person 2'),
+                        gr.Image(shape=(24, 24), label='Person 3'),
+                        gr.Label(label='Emotion - Person 3'),
+                        gr.Label(label='Sentiment - Person 3'),], #gr.Label(),
+             title = title,
+             examples = examples,
+             description = description,
+             article=article,
+             allow_flagging='never').launch(enable_queue=enable_queue)

cfg/openimages.names ADDED Viewed

	@@ -0,0 +1,601 @@

+Tortoise
+Container
+Magpie
+Sea turtle
+Football
+Ambulance
+Ladder
+Toothbrush
+Syringe
+Sink
+Toy
+Organ
+Cassette deck
+Apple
+Human eye
+Cosmetics
+Paddle
+Snowman
+Beer
+Chopsticks
+Human beard
+Bird
+Parking meter
+Traffic light
+Croissant
+Cucumber
+Radish
+Towel
+Doll
+Skull
+Washing machine
+Glove
+Tick
+Belt
+Sunglasses
+Banjo
+Cart
+Ball
+Backpack
+Bicycle
+Home appliance
+Centipede
+Boat
+Surfboard
+Boot
+Headphones
+Hot dog
+Shorts
+Fast food
+Bus
+Boy
+Screwdriver
+Bicycle wheel
+Barge
+Laptop
+Miniskirt
+Drill
+Dress
+Bear
+Waffle
+Pancake
+Brown bear
+Woodpecker
+Blue jay
+Pretzel
+Bagel
+Tower
+Teapot
+Person
+Bow and arrow
+Swimwear
+Beehive
+Brassiere
+Bee
+Bat
+Starfish
+Popcorn
+Burrito
+Chainsaw
+Balloon
+Wrench
+Tent
+Vehicle registration plate
+Lantern
+Toaster
+Flashlight
+Billboard
+Tiara
+Limousine
+Necklace
+Carnivore
+Scissors
+Stairs
+Computer keyboard
+Printer
+Traffic sign
+Chair
+Shirt
+Poster
+Cheese
+Sock
+Fire hydrant
+Land vehicle
+Earrings
+Tie
+Watercraft
+Cabinetry
+Suitcase
+Muffin
+Bidet
+Snack
+Snowmobile
+Clock
+Medical equipment
+Cattle
+Cello
+Jet ski
+Camel
+Coat
+Suit
+Desk
+Cat
+Bronze sculpture
+Juice
+Gondola
+Beetle
+Cannon
+Computer mouse
+Cookie
+Office building
+Fountain
+Coin
+Calculator
+Cocktail
+Computer monitor
+Box
+Stapler
+Christmas tree
+Cowboy hat
+Hiking equipment
+Studio couch
+Drum
+Dessert
+Wine rack
+Drink
+Zucchini
+Ladle
+Human mouth
+Dairy
+Dice
+Oven
+Dinosaur
+Ratchet
+Couch
+Cricket ball
+Winter melon
+Spatula
+Whiteboard
+Pencil sharpener
+Door
+Hat
+Shower
+Eraser
+Fedora
+Guacamole
+Dagger
+Scarf
+Dolphin
+Sombrero
+Tin can
+Mug
+Tap
+Harbor seal
+Stretcher
+Can opener
+Goggles
+Human body
+Roller skates
+Coffee cup
+Cutting board
+Blender
+Plumbing fixture
+Stop sign
+Office supplies
+Volleyball
+Vase
+Slow cooker
+Wardrobe
+Coffee
+Whisk
+Paper towel
+Personal care
+Food
+Sun hat
+Tree house
+Flying disc
+Skirt
+Gas stove
+Salt and pepper shakers
+Mechanical fan
+Face powder
+Fax
+Fruit
+French fries
+Nightstand
+Barrel
+Kite
+Tart
+Treadmill
+Fox
+Flag
+Horn
+Window blind
+Human foot
+Golf cart
+Jacket
+Egg
+Street light
+Guitar
+Pillow
+Human leg
+Isopod
+Grape
+Human ear
+Power plugs and sockets
+Panda
+Giraffe
+Woman
+Door handle
+Rhinoceros
+Bathtub
+Goldfish
+Houseplant
+Goat
+Baseball bat
+Baseball glove
+Mixing bowl
+Marine invertebrates
+Kitchen utensil
+Light switch
+House
+Horse
+Stationary bicycle
+Hammer
+Ceiling fan
+Sofa bed
+Adhesive tape
+Harp
+Sandal
+Bicycle helmet
+Saucer
+Harpsichord
+Human hair
+Heater
+Harmonica
+Hamster
+Curtain
+Bed
+Kettle
+Fireplace
+Scale
+Drinking straw
+Insect
+Hair dryer
+Kitchenware
+Indoor rower
+Invertebrate
+Food processor
+Bookcase
+Refrigerator
+Wood-burning stove
+Punching bag
+Common fig
+Cocktail shaker
+Jaguar
+Golf ball
+Fashion accessory
+Alarm clock
+Filing cabinet
+Artichoke
+Table
+Tableware
+Kangaroo
+Koala
+Knife
+Bottle
+Bottle opener
+Lynx
+Lavender
+Lighthouse
+Dumbbell
+Human head
+Bowl
+Humidifier
+Porch
+Lizard
+Billiard table
+Mammal
+Mouse
+Motorcycle
+Musical instrument
+Swim cap
+Frying pan
+Snowplow
+Bathroom cabinet
+Missile
+Bust
+Man
+Waffle iron
+Milk
+Ring binder
+Plate
+Mobile phone
+Baked goods
+Mushroom
+Crutch
+Pitcher
+Mirror
+Lifejacket
+Table tennis racket
+Pencil case
+Musical keyboard
+Scoreboard
+Briefcase
+Kitchen knife
+Nail
+Tennis ball
+Plastic bag
+Oboe
+Chest of drawers
+Ostrich
+Piano
+Girl
+Plant
+Potato
+Hair spray
+Sports equipment
+Pasta
+Penguin
+Pumpkin
+Pear
+Infant bed
+Polar bear
+Mixer
+Cupboard
+Jacuzzi
+Pizza
+Digital clock
+Pig
+Reptile
+Rifle
+Lipstick
+Skateboard
+Raven
+High heels
+Red panda
+Rose
+Rabbit
+Sculpture
+Saxophone
+Shotgun
+Seafood
+Submarine sandwich
+Snowboard
+Sword
+Picture frame
+Sushi
+Loveseat
+Ski
+Squirrel
+Tripod
+Stethoscope
+Submarine
+Scorpion
+Segway
+Training bench
+Snake
+Coffee table
+Skyscraper
+Sheep
+Television
+Trombone
+Tea
+Tank
+Taco
+Telephone
+Torch
+Tiger
+Strawberry
+Trumpet
+Tree
+Tomato
+Train
+Tool
+Picnic basket
+Cooking spray
+Trousers
+Bowling equipment
+Football helmet
+Truck
+Measuring cup
+Coffeemaker
+Violin
+Vehicle
+Handbag
+Paper cutter
+Wine
+Weapon
+Wheel
+Worm
+Wok
+Whale
+Zebra
+Auto part
+Jug
+Pizza cutter
+Cream
+Monkey
+Lion
+Bread
+Platter
+Chicken
+Eagle
+Helicopter
+Owl
+Duck
+Turtle
+Hippopotamus
+Crocodile
+Toilet
+Toilet paper
+Squid
+Clothing
+Footwear
+Lemon
+Spider
+Deer
+Frog
+Banana
+Rocket
+Wine glass
+Countertop
+Tablet computer
+Waste container
+Swimming pool
+Dog
+Book
+Elephant
+Shark
+Candle
+Leopard
+Axe
+Hand dryer
+Soap dispenser
+Porcupine
+Flower
+Canary
+Cheetah
+Palm tree
+Hamburger
+Maple
+Building
+Fish
+Lobster
+Asparagus
+Furniture
+Hedgehog
+Airplane
+Spoon
+Otter
+Bull
+Oyster
+Horizontal bar
+Convenience store
+Bomb
+Bench
+Ice cream
+Caterpillar
+Butterfly
+Parachute
+Orange
+Antelope
+Beaker
+Moths and butterflies
+Window
+Closet
+Castle
+Jellyfish
+Goose
+Mule
+Swan
+Peach
+Coconut
+Seat belt
+Raccoon
+Chisel
+Fork
+Lamp
+Camera
+Squash
+Racket
+Human face
+Human arm
+Vegetable
+Diaper
+Unicycle
+Falcon
+Chime
+Snail
+Shellfish
+Cabbage
+Carrot
+Mango
+Jeans
+Flowerpot
+Pineapple
+Drawer
+Stool
+Envelope
+Cake
+Dragonfly
+Sunflower
+Microwave oven
+Honeycomb
+Marine mammal
+Sea lion
+Ladybug
+Shelf
+Watch
+Candy
+Salad
+Parrot
+Handgun
+Sparrow
+Van
+Grinder
+Spice rack
+Light bulb
+Corded phone
+Sports uniform
+Tennis racket
+Wall clock
+Serving tray
+Kitchen & dining room table
+Dog bed
+Cake stand
+Cat furniture
+Bathroom accessory
+Facial tissue holder
+Pressure cooker
+Kitchen appliance
+Tire
+Ruler
+Luggage and bags
+Microphone
+Broccoli
+Umbrella
+Pastry
+Grapefruit
+Band-aid
+Animal
+Bell pepper
+Turkey
+Lily
+Pomegranate
+Doughnut
+Glasses
+Human nose
+Pen
+Ant
+Car
+Aircraft
+Human hand
+Skunk
+Teddy bear
+Watermelon
+Cantaloupe
+Dishwasher
+Flute
+Balance beam
+Sandwich
+Shrimp
+Sewing machine
+Binoculars
+Rays and skates
+Ipod
+Accordion
+Willow
+Crab
+Crown
+Seahorse
+Perfume
+Alpaca
+Taxi
+Canoe
+Remote control
+Wheelchair
+Rugby ball
+Armadillo
+Maracas
+Helmet

cfg/yolov3-openimages.cfg ADDED Viewed

	@@ -0,0 +1,789 @@

+[net]
+# Testing
+ batch=1
+ subdivisions=1
+# Training
+batch=64
+subdivisions=16
+width=608
+height=608
+channels=3
+momentum=0.9
+decay=0.0005
+angle=0
+saturation = 1.5
+exposure = 1.5
+hue=.1
+learning_rate=0.001
+burn_in=5000
+max_batches = 500200
+policy=steps
+steps=400000,450000
+scales=.1,.1
+[convolutional]
+batch_normalize=1
+filters=32
+size=3
+stride=1
+pad=1
+activation=leaky
+# Downsample
+[convolutional]
+batch_normalize=1
+filters=64
+size=3
+stride=2
+pad=1
+activation=leaky
+[convolutional]
+batch_normalize=1
+filters=32
+size=1
+stride=1
+pad=1
+activation=leaky
+[convolutional]
+batch_normalize=1
+filters=64
+size=3
+stride=1
+pad=1
+activation=leaky
+[shortcut]
+from=-3
+activation=linear
+# Downsample
+[convolutional]
+batch_normalize=1
+filters=128
+size=3
+stride=2
+pad=1
+activation=leaky
+[convolutional]
+batch_normalize=1
+filters=64
+size=1
+stride=1
+pad=1
+activation=leaky
+[convolutional]
+batch_normalize=1
+filters=128
+size=3
+stride=1
+pad=1
+activation=leaky
+[shortcut]
+from=-3
+activation=linear
+[convolutional]
+batch_normalize=1
+filters=64
+size=1
+stride=1
+pad=1
+activation=leaky
+[convolutional]
+batch_normalize=1
+filters=128
+size=3
+stride=1
+pad=1
+activation=leaky
+[shortcut]
+from=-3
+activation=linear
+# Downsample
+[convolutional]
+batch_normalize=1
+filters=256
+size=3
+stride=2
+pad=1
+activation=leaky
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=leaky
+[convolutional]
+batch_normalize=1
+filters=256
+size=3
+stride=1
+pad=1
+activation=leaky
+[shortcut]
+from=-3
+activation=linear
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=leaky
+[convolutional]
+batch_normalize=1
+filters=256
+size=3
+stride=1
+pad=1
+activation=leaky
+[shortcut]
+from=-3
+activation=linear
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=leaky
+[convolutional]
+batch_normalize=1
+filters=256
+size=3
+stride=1
+pad=1
+activation=leaky
+[shortcut]
+from=-3
+activation=linear
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=leaky
+[convolutional]
+batch_normalize=1
+filters=256
+size=3
+stride=1
+pad=1
+activation=leaky
+[shortcut]
+from=-3
+activation=linear
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=leaky
+[convolutional]
+batch_normalize=1
+filters=256
+size=3
+stride=1
+pad=1
+activation=leaky
+[shortcut]
+from=-3
+activation=linear
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=leaky
+[convolutional]
+batch_normalize=1
+filters=256
+size=3
+stride=1
+pad=1
+activation=leaky
+[shortcut]
+from=-3
+activation=linear
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=leaky
+[convolutional]
+batch_normalize=1
+filters=256
+size=3
+stride=1
+pad=1
+activation=leaky
+[shortcut]
+from=-3
+activation=linear
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=leaky
+[convolutional]
+batch_normalize=1
+filters=256
+size=3
+stride=1
+pad=1
+activation=leaky
+[shortcut]
+from=-3
+activation=linear
+# Downsample
+[convolutional]
+batch_normalize=1
+filters=512
+size=3
+stride=2
+pad=1
+activation=leaky
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=leaky
+[convolutional]
+batch_normalize=1
+filters=512
+size=3
+stride=1
+pad=1
+activation=leaky
+[shortcut]
+from=-3
+activation=linear
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=leaky
+[convolutional]
+batch_normalize=1
+filters=512
+size=3
+stride=1
+pad=1
+activation=leaky
+[shortcut]
+from=-3
+activation=linear
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=leaky
+[convolutional]
+batch_normalize=1
+filters=512
+size=3
+stride=1
+pad=1
+activation=leaky
+[shortcut]
+from=-3
+activation=linear
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=leaky
+[convolutional]
+batch_normalize=1
+filters=512
+size=3
+stride=1
+pad=1
+activation=leaky
+[shortcut]
+from=-3
+activation=linear
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=leaky
+[convolutional]
+batch_normalize=1
+filters=512
+size=3
+stride=1
+pad=1
+activation=leaky
+[shortcut]
+from=-3
+activation=linear
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=leaky
+[convolutional]
+batch_normalize=1
+filters=512
+size=3
+stride=1
+pad=1
+activation=leaky
+[shortcut]
+from=-3
+activation=linear
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=leaky
+[convolutional]
+batch_normalize=1
+filters=512
+size=3
+stride=1
+pad=1
+activation=leaky
+[shortcut]
+from=-3
+activation=linear
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=leaky
+[convolutional]
+batch_normalize=1
+filters=512
+size=3
+stride=1
+pad=1
+activation=leaky
+[shortcut]
+from=-3
+activation=linear
+# Downsample
+[convolutional]
+batch_normalize=1
+filters=1024
+size=3
+stride=2
+pad=1
+activation=leaky
+[convolutional]
+batch_normalize=1
+filters=512
+size=1
+stride=1
+pad=1
+activation=leaky
+[convolutional]
+batch_normalize=1
+filters=1024
+size=3
+stride=1
+pad=1
+activation=leaky
+[shortcut]
+from=-3
+activation=linear
+[convolutional]
+batch_normalize=1
+filters=512
+size=1
+stride=1
+pad=1
+activation=leaky
+[convolutional]
+batch_normalize=1
+filters=1024
+size=3
+stride=1
+pad=1
+activation=leaky
+[shortcut]
+from=-3
+activation=linear
+[convolutional]
+batch_normalize=1
+filters=512
+size=1
+stride=1
+pad=1
+activation=leaky
+[convolutional]
+batch_normalize=1
+filters=1024
+size=3
+stride=1
+pad=1
+activation=leaky
+[shortcut]
+from=-3
+activation=linear
+[convolutional]
+batch_normalize=1
+filters=512
+size=1
+stride=1
+pad=1
+activation=leaky
+[convolutional]
+batch_normalize=1
+filters=1024
+size=3
+stride=1
+pad=1
+activation=leaky
+[shortcut]
+from=-3
+activation=linear
+######################
+[convolutional]
+batch_normalize=1
+filters=512
+size=1
+stride=1
+pad=1
+activation=leaky
+[convolutional]
+batch_normalize=1
+size=3
+stride=1
+pad=1
+filters=1024
+activation=leaky
+[convolutional]
+batch_normalize=1
+filters=512
+size=1
+stride=1
+pad=1
+activation=leaky
+[convolutional]
+batch_normalize=1
+size=3
+stride=1
+pad=1
+filters=1024
+activation=leaky
+[convolutional]
+batch_normalize=1
+filters=512
+size=1
+stride=1
+pad=1
+activation=leaky
+[convolutional]
+batch_normalize=1
+size=3
+stride=1
+pad=1
+filters=1024
+activation=leaky
+[convolutional]
+size=1
+stride=1
+pad=1
+filters=1818
+activation=linear
+[yolo]
+mask = 6,7,8
+anchors = 10,13,  16,30,  33,23,  30,61,  62,45,  59,119,  116,90,  156,198,  373,326
+classes=601
+num=9
+jitter=.3
+ignore_thresh = .7
+truth_thresh = 1
+random=1
+[route]
+layers = -4
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=leaky
+[upsample]
+stride=2
+[route]
+layers = -1, 61
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=leaky
+[convolutional]
+batch_normalize=1
+size=3
+stride=1
+pad=1
+filters=512
+activation=leaky
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=leaky
+[convolutional]
+batch_normalize=1
+size=3
+stride=1
+pad=1
+filters=512
+activation=leaky
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=leaky
+[convolutional]
+batch_normalize=1
+size=3
+stride=1
+pad=1
+filters=512
+activation=leaky
+[convolutional]
+size=1
+stride=1
+pad=1
+filters=1818
+activation=linear
+[yolo]
+mask = 3,4,5
+anchors = 10,13,  16,30,  33,23,  30,61,  62,45,  59,119,  116,90,  156,198,  373,326
+classes=601
+num=9
+jitter=.3
+ignore_thresh = .7
+truth_thresh = 1
+random=1
+[route]
+layers = -4
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=leaky
+[upsample]
+stride=2
+[route]
+layers = -1, 36
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=leaky
+[convolutional]
+batch_normalize=1
+size=3
+stride=1
+pad=1
+filters=256
+activation=leaky
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=leaky
+[convolutional]
+batch_normalize=1
+size=3
+stride=1
+pad=1
+filters=256
+activation=leaky
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=leaky
+[convolutional]
+batch_normalize=1
+size=3
+stride=1
+pad=1
+filters=256
+activation=leaky
+[convolutional]
+size=1
+stride=1
+pad=1
+filters=1818
+activation=linear
+[yolo]
+mask = 0,1,2
+anchors = 10,13,  16,30,  33,23,  30,61,  62,45,  59,119,  116,90,  156,198,  373,326
+classes=601
+num=9
+jitter=.3
+ignore_thresh = .7
+truth_thresh = 1
+random=1

darknet.py ADDED Viewed

	@@ -0,0 +1,322 @@

+# PyTorch implementation of Darknet
+# This is a custom, hard-coded version of darknet with
+# YOLOv3 implementation for openimages database. This
+# was written to test viability of implementing YOLO
+# for face detection followed by emotion / sentiment
+# analysis.
+#
+# Configuration, weights and data are hardcoded.
+# Additional options include, ability to create
+# subset of data with faces exracted for labelling.
+#
+# Author    : Saikiran Tharimena
+# Co-Authors: Kjetil Marinius Sjulsen, Juan Carlos Calvet Lopez
+# Project   : Emotion / Sentiment Detection from news images
+# Date      : 12 September 2022
+# Version   : v0.1
+#
+# (C) Schibsted ASA
+# Libraries
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from torch.autograd import Variable
+import numpy as np
+from utils import *
+def parse_cfg(cfgfile):
+    """
+    Takes a configuration file
+    Returns a list of blocks. Each blocks describes a block in the neural
+    network to be built. Block is represented as a dictionary in the list
+    """
+    file = open(cfgfile, 'r')
+    lines = file.read().split('\n')                        # store the lines in a list
+    lines = [x for x in lines if len(x) > 0]               # get read of the empty lines
+    lines = [x for x in lines if x[0] != '#']              # get rid of comments
+    lines = [x.rstrip().lstrip() for x in lines]           # get rid of fringe whitespaces
+    block = {}
+    blocks = []
+    for line in lines:
+        if line[0] == "[":               # This marks the start of a new block
+            if len(block) != 0:          # If block is not empty, implies it is storing values of previous block.
+                blocks.append(block)     # add it the blocks list
+                block = {}               # re-init the block
+            block["type"] = line[1:-1].rstrip()
+        else:
+            key,value = line.split("=")
+            block[key.rstrip()] = value.lstrip()
+    blocks.append(block)
+    return blocks
+class EmptyLayer(nn.Module):
+    def __init__(self):
+        super(EmptyLayer, self).__init__()
+class DetectionLayer(nn.Module):
+    def __init__(self, anchors):
+        super(DetectionLayer, self).__init__()
+        self.anchors = anchors
+def create_modules(blocks):
+    net_info = blocks[0]     #Captures the information about the input and pre-processing
+    module_list = nn.ModuleList()
+    prev_filters = 3
+    output_filters = []
+    for index, x in enumerate(blocks[1:]):
+        module = nn.Sequential()
+        #check the type of block
+        #create a new module for the block
+        #append to module_list
+        #If it's a convolutional layer
+        if (x["type"] == "convolutional"):
+            #Get the info about the layer
+            activation = x["activation"]
+            try:
+                batch_normalize = int(x["batch_normalize"])
+                bias = False
+            except:
+                batch_normalize = 0
+                bias = True
+            filters= int(x["filters"])
+            padding = int(x["pad"])
+            kernel_size = int(x["size"])
+            stride = int(x["stride"])
+            if padding:
+                pad = (kernel_size - 1) // 2
+            else:
+                pad = 0
+            #Add the convolutional layer
+            conv = nn.Conv2d(prev_filters, filters, kernel_size, stride, pad, bias = bias)
+            module.add_module("conv_{0}".format(index), conv)
+            #Add the Batch Norm Layer
+            if batch_normalize:
+                bn = nn.BatchNorm2d(filters)
+                module.add_module("batch_norm_{0}".format(index), bn)
+            #Check the activation.
+            #It is either Linear or a Leaky ReLU for YOLO
+            if activation == "leaky":
+                activn = nn.LeakyReLU(0.1, inplace = True)
+                module.add_module("leaky_{0}".format(index), activn)
+            #If it's an upsampling layer
+            #We use Bilinear2dUpsampling
+        elif (x["type"] == "upsample"):
+            stride = int(x["stride"])
+            upsample = nn.Upsample(scale_factor = 2, mode = "nearest")
+            module.add_module("upsample_{}".format(index), upsample)
+        #If it is a route layer
+        elif (x["type"] == "route"):
+            x["layers"] = x["layers"].split(',')
+            #Start  of a route
+            start = int(x["layers"][0])
+            #end, if there exists one.
+            try:
+                end = int(x["layers"][1])
+            except:
+                end = 0
+            #Positive anotation
+            if start > 0:
+                start = start - index
+            if end > 0:
+                end = end - index
+            route = EmptyLayer()
+            module.add_module("route_{0}".format(index), route)
+            if end < 0:
+                filters = output_filters[index + start] + output_filters[index + end]
+            else:
+                filters= output_filters[index + start]
+        #shortcut corresponds to skip connection
+        elif x["type"] == "shortcut":
+            shortcut = EmptyLayer()
+            module.add_module("shortcut_{}".format(index), shortcut)
+        #Yolo is the detection layer
+        elif x["type"] == "yolo":
+            mask = x["mask"].split(",")
+            mask = [int(x) for x in mask]
+            anchors = x["anchors"].split(",")
+            anchors = [int(a) for a in anchors]
+            anchors = [(anchors[i], anchors[i+1]) for i in range(0, len(anchors),2)]
+            anchors = [anchors[i] for i in mask]
+            detection = DetectionLayer(anchors)
+            module.add_module("Detection_{}".format(index), detection)
+        module_list.append(module)
+        prev_filters = filters
+        output_filters.append(filters)
+    return (net_info, module_list)
+class Darknet(nn.Module):
+    def __init__(self, cfgfile):
+        super(Darknet, self).__init__()
+        self.blocks = parse_cfg(cfgfile)
+        self.net_info, self.module_list = create_modules(self.blocks)
+    def forward(self, x, CUDA):
+        modules = self.blocks[1:]
+        outputs = {}   #We cache the outputs for the route layer
+        write = 0
+        for i, module in enumerate(modules):
+            module_type = (module["type"])
+            if module_type == "convolutional" or module_type == "upsample":
+                x = self.module_list[i](x)
+            elif module_type == "route":
+                layers = module["layers"]
+                layers = [int(a) for a in layers]
+                if (layers[0]) > 0:
+                    layers[0] = layers[0] - i
+                if len(layers) == 1:
+                    x = outputs[i + (layers[0])]
+                else:
+                    if (layers[1]) > 0:
+                        layers[1] = layers[1] - i
+                    map1 = outputs[i + layers[0]]
+                    map2 = outputs[i + layers[1]]
+                    x = torch.cat((map1, map2), 1)
+            elif  module_type == "shortcut":
+                from_ = int(module["from"])
+                x = outputs[i-1] + outputs[i+from_]
+            elif module_type == 'yolo':
+                anchors = self.module_list[i][0].anchors
+                #Get the input dimensions
+                inp_dim = int (self.net_info["height"])
+                #Get the number of classes
+                num_classes = int (module["classes"])
+                #Transform
+                x = x.data
+                x = predict_transform(x, inp_dim, anchors, num_classes, CUDA)
+                if not write:              #if no collector has been intialised.
+                    detections = x
+                    write = 1
+                else:
+                    detections = torch.cat((detections, x), 1)
+            outputs[i] = x
+        return detections
+    def load_weights(self, weightfile):
+        #Open the weights file
+        fp = open(weightfile, "rb")
+        #The first 5 values are header information
+        # 1. Major version number
+        # 2. Minor Version Number
+        # 3. Subversion number
+        # 4,5. Images seen by the network (during training)
+        header = np.fromfile(fp, dtype = np.int32, count = 5)
+        self.header = torch.from_numpy(header)
+        self.seen = self.header[3]
+        weights = np.fromfile(fp, dtype = np.float32)
+        ptr = 0
+        for i in range(len(self.module_list)):
+            module_type = self.blocks[i + 1]["type"]
+            #If module_type is convolutional load weights
+            #Otherwise ignore.
+            if module_type == "convolutional":
+                model = self.module_list[i]
+                try:
+                    batch_normalize = int(self.blocks[i+1]["batch_normalize"])
+                except:
+                    batch_normalize = 0
+                conv = model[0]
+                if (batch_normalize):
+                    bn = model[1]
+                    #Get the number of weights of Batch Norm Layer
+                    num_bn_biases = bn.bias.numel()
+                    #Load the weights
+                    bn_biases = torch.from_numpy(weights[ptr:ptr + num_bn_biases])
+                    ptr += num_bn_biases
+                    bn_weights = torch.from_numpy(weights[ptr: ptr + num_bn_biases])
+                    ptr  += num_bn_biases
+                    bn_running_mean = torch.from_numpy(weights[ptr: ptr + num_bn_biases])
+                    ptr  += num_bn_biases
+                    bn_running_var = torch.from_numpy(weights[ptr: ptr + num_bn_biases])
+                    ptr  += num_bn_biases
+                    #Cast the loaded weights into dims of model weights.
+                    bn_biases = bn_biases.view_as(bn.bias.data)
+                    bn_weights = bn_weights.view_as(bn.weight.data)
+                    bn_running_mean = bn_running_mean.view_as(bn.running_mean)
+                    bn_running_var = bn_running_var.view_as(bn.running_var)
+                    #Copy the data to model
+                    bn.bias.data.copy_(bn_biases)
+                    bn.weight.data.copy_(bn_weights)
+                    bn.running_mean.copy_(bn_running_mean)
+                    bn.running_var.copy_(bn_running_var)
+                else:
+                    #Number of biases
+                    num_biases = conv.bias.numel()
+                    #Load the weights
+                    conv_biases = torch.from_numpy(weights[ptr: ptr + num_biases])
+                    ptr = ptr + num_biases
+                    #reshape the loaded weights according to the dims of the model weights
+                    conv_biases = conv_biases.view_as(conv.bias.data)
+                    #Finally copy the data
+                    conv.bias.data.copy_(conv_biases)
+                #Let us load the weights for the Convolutional layers
+                num_weights = conv.weight.numel()
+                #Do the same as above for weights
+                conv_weights = torch.from_numpy(weights[ptr:ptr+num_weights])
+                ptr = ptr + num_weights
+                conv_weights = conv_weights.view_as(conv.weight.data)
+                conv.weight.data.copy_(conv_weights)

detect.py ADDED Viewed

	@@ -0,0 +1,161 @@

+# PyTorch implementation of Darknet
+# This is a custom, hard-coded version of darknet with
+# YOLOv3 implementation for openimages database. This
+# was written to test viability of implementing YOLO
+# for face detection followed by emotion / sentiment
+# analysis.
+#
+# Configuration, weights and data are hardcoded.
+# Additional options include, ability to create
+# subset of data with faces exracted for labelling.
+#
+# Author    : Saikiran Tharimena
+# Co-Authors: Kjetil Marinius Sjulsen, Juan Carlos Calvet Lopez
+# Project   : Emotion / Sentiment Detection from news images
+# Date      : 12 September 2022
+# Version   : v0.1
+#
+# (C) Schibsted ASA
+# Libraries
+import os
+import cv2
+import torch
+import numpy as np
+from utils import *
+from darknet import Darknet
+from torch.autograd import Variable
+from torch.cuda import is_available as check_cuda
+# Parameters
+batch_size = 1
+confidence = 0.25
+nms_thresh = 0.30
+run_cuda = False
+# CFG Files
+cwd = os.path.dirname(__file__)
+cfg     = cwd + '/cfg/yolov3-openimages.cfg'
+data    = cwd + '/cfg/openimages.data'
+clsnames= cwd + '/cfg/openimages.names'
+weights = cwd + '/cfg/yolov3-openimages.weights'
+# Load classes
+num_classes = 601
+classes = load_classes(clsnames)
+# Set up the neural network
+print('Load Network')
+model = Darknet(cfg)
+print('Load Weights')
+model.load_weights(weights)
+print('Successfully loaded Network')
+# Check CUDA
+if run_cuda:
+    CUDA = check_cuda()
+else:
+    CUDA = False
+# Input dimension
+inp_dim = int(model.net_info["height"])
+# put the model on GPU
+if CUDA:
+    model.cuda()
+# Set the model in evaluation mode
+model.eval()
+# face detector
+def detect_face(image):
+    # Just lazy to update this
+    imlist = [image]
+    loaded_ims = [cv2.imread(x) for x in imlist]
+    im_batches = list(map(prep_image, loaded_ims, [inp_dim for x in range(len(imlist))]))
+    im_dim_list = [(x.shape[1], x.shape[0]) for x in loaded_ims]
+    im_dim_list = torch.FloatTensor(im_dim_list).repeat(1,2)
+    leftover = 0
+    if (len(im_dim_list) % batch_size):
+        leftover = 1
+    if batch_size != 1:
+        num_batches = len(imlist) // batch_size + leftover
+        im_batches = [torch.cat((im_batches[i*batch_size : min((i +  1)*batch_size,
+                            len(im_batches))]))  for i in range(num_batches)]
+    write = 0
+    if CUDA:
+        im_dim_list = im_dim_list.cuda()
+    for i, batch in enumerate(im_batches):
+        # load the image
+        if CUDA:
+            batch = batch.cuda()
+        with torch.no_grad():
+            prediction = model(Variable(batch), CUDA)
+        prediction = write_results(prediction, confidence, num_classes, nms_conf = nms_thresh)
+        if type(prediction) == int:
+            for im_num, image in enumerate(imlist[i*batch_size: min((i +  1)*batch_size, len(imlist))]):
+                im_id = i*batch_size + im_num
+            continue
+        prediction[:,0] += i*batch_size    # transform the atribute from index in batch to index in imlist
+        if not write: # If we have't initialised output
+            output = prediction
+            write = 1
+        else:
+            output = torch.cat((output, prediction))
+        for im_num, image in enumerate(imlist[i*batch_size: min((i +  1)*batch_size, len(imlist))]):
+            im_id = i * batch_size + im_num
+            objs = [classes[int(x[-1])] for x in output if int(x[0]) == im_id]
+        if CUDA:
+            torch.cuda.synchronize()
+    try:
+        output
+    except NameError:
+        return None
+    im_dim_list = torch.index_select(im_dim_list, 0, output[:,0].long())
+    scaling_factor = torch.min(608/im_dim_list,1)[0].view(-1,1)
+    output[:, [1,3]] -= (inp_dim - scaling_factor*im_dim_list[:,0].view(-1,1))/2
+    output[:, [2,4]] -= (inp_dim - scaling_factor*im_dim_list[:,1].view(-1,1))/2
+    output[:, 1:5] /= scaling_factor
+    for i in range(output.shape[0]):
+        output[i, [1,3]] = torch.clamp(output[i, [1,3]], 0.0, im_dim_list[i,0])
+        output[i, [2,4]] = torch.clamp(output[i, [2,4]], 0.0, im_dim_list[i,1])
+    def get_detections(x, results):
+        c1 = [int(y) for y in x[1:3]]
+        c2 = [int(y) for y in x[3:5]]
+        det_class = int(x[-1])
+        label = "{0}".format(classes[det_class])
+        return (label, tuple(c1 + c2))
+    detections = list(map(lambda x: get_detections(x, loaded_ims), output))
+    if CUDA:
+        torch.cuda.empty_cache()
+    return loaded_ims[0], detections

requirements.txt ADDED Viewed

	@@ -0,0 +1,4 @@

+torch
+fastai
+numpy
+opencv-python

utils.py ADDED Viewed

	@@ -0,0 +1,237 @@

+# PyTorch implementation of Darknet
+# This is a custom, hard-coded version of darknet with
+# YOLOv3 implementation for openimages database. This
+# was written to test viability of implementing YOLO
+# for face detection followed by emotion / sentiment
+# analysis.
+#
+# Configuration, weights and data are hardcoded.
+# Additional options include, ability to create
+# subset of data with faces exracted for labelling.
+#
+# Author    : Saikiran Tharimena
+# Co-Authors: Kjetil Marinius Sjulsen, Juan Carlos Calvet Lopez
+# Project   : Emotion / Sentiment Detection from news images
+# Date      : 12 September 2022
+# Version   : v0.1
+#
+# (C) Schibsted ASA
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from torch.autograd import Variable
+import numpy as np
+import cv2
+def unique(tensor):
+    tensor_np = tensor.cpu().numpy()
+    unique_np = np.unique(tensor_np)
+    unique_tensor = torch.from_numpy(unique_np)
+    tensor_res = tensor.new(unique_tensor.shape)
+    tensor_res.copy_(unique_tensor)
+    return tensor_res
+def bbox_iou(box1, box2):
+    """
+    Returns the IoU of two bounding boxes
+    """
+    #Get the coordinates of bounding boxes
+    b1_x1, b1_y1, b1_x2, b1_y2 = box1[:,0], box1[:,1], box1[:,2], box1[:,3]
+    b2_x1, b2_y1, b2_x2, b2_y2 = box2[:,0], box2[:,1], box2[:,2], box2[:,3]
+    #get the corrdinates of the intersection rectangle
+    inter_rect_x1 =  torch.max(b1_x1, b2_x1)
+    inter_rect_y1 =  torch.max(b1_y1, b2_y1)
+    inter_rect_x2 =  torch.min(b1_x2, b2_x2)
+    inter_rect_y2 =  torch.min(b1_y2, b2_y2)
+    #Intersection area
+    inter_area = torch.clamp(inter_rect_x2 - inter_rect_x1 + 1, min=0) * torch.clamp(inter_rect_y2 - inter_rect_y1 + 1, min=0)
+    #Union Area
+    b1_area = (b1_x2 - b1_x1 + 1)*(b1_y2 - b1_y1 + 1)
+    b2_area = (b2_x2 - b2_x1 + 1)*(b2_y2 - b2_y1 + 1)
+    iou = inter_area / (b1_area + b2_area - inter_area)
+    return iou
+def predict_transform(prediction, inp_dim, anchors, num_classes, CUDA = True):
+    batch_size = prediction.size(0)
+    stride =  inp_dim // prediction.size(2)
+    grid_size = inp_dim // stride
+    bbox_attrs = 5 + num_classes
+    num_anchors = len(anchors)
+    prediction = prediction.view(batch_size, bbox_attrs*num_anchors, grid_size*grid_size)
+    prediction = prediction.transpose(1,2).contiguous()
+    prediction = prediction.view(batch_size, grid_size*grid_size*num_anchors, bbox_attrs)
+    anchors = [(a[0]/stride, a[1]/stride) for a in anchors]
+    #Sigmoid the  centre_X, centre_Y. and object confidencce
+    prediction[:,:,0] = torch.sigmoid(prediction[:,:,0])
+    prediction[:,:,1] = torch.sigmoid(prediction[:,:,1])
+    prediction[:,:,4] = torch.sigmoid(prediction[:,:,4])
+    #Add the center offsets
+    grid = np.arange(grid_size)
+    a,b = np.meshgrid(grid, grid)
+    x_offset = torch.FloatTensor(a).view(-1,1)
+    y_offset = torch.FloatTensor(b).view(-1,1)
+    if CUDA:
+        x_offset = x_offset.cuda()
+        y_offset = y_offset.cuda()
+    x_y_offset = torch.cat((x_offset, y_offset), 1).repeat(1,num_anchors).view(-1,2).unsqueeze(0)
+    prediction[:,:,:2] += x_y_offset
+    #log space transform height and the width
+    anchors = torch.FloatTensor(anchors)
+    if CUDA:
+        anchors = anchors.cuda()
+    anchors = anchors.repeat(grid_size*grid_size, 1).unsqueeze(0)
+    prediction[:,:,2:4] = torch.exp(prediction[:,:,2:4])*anchors
+    prediction[:,:,5: 5 + num_classes] = torch.sigmoid((prediction[:,:, 5 : 5 + num_classes]))
+    prediction[:,:,:4] *= stride
+    return prediction
+def write_results(prediction, confidence, num_classes, nms_conf = 0.4):
+    conf_mask = (prediction[:,:,4] > confidence).float().unsqueeze(2)
+    prediction = prediction*conf_mask
+    box_corner = prediction.new(prediction.shape)
+    box_corner[:,:,0] = (prediction[:,:,0] - prediction[:,:,2]/2)
+    box_corner[:,:,1] = (prediction[:,:,1] - prediction[:,:,3]/2)
+    box_corner[:,:,2] = (prediction[:,:,0] + prediction[:,:,2]/2)
+    box_corner[:,:,3] = (prediction[:,:,1] + prediction[:,:,3]/2)
+    prediction[:,:,:4] = box_corner[:,:,:4]
+    batch_size = prediction.size(0)
+    write = False
+    for ind in range(batch_size):
+        image_pred = prediction[ind]          #image Tensor
+       #confidence threshholding
+       #NMS
+        max_conf, max_conf_score = torch.max(image_pred[:,5:5+ num_classes], 1)
+        max_conf = max_conf.float().unsqueeze(1)
+        max_conf_score = max_conf_score.float().unsqueeze(1)
+        seq = (image_pred[:,:5], max_conf, max_conf_score)
+        image_pred = torch.cat(seq, 1)
+        non_zero_ind =  (torch.nonzero(image_pred[:,4]))
+        try:
+            image_pred_ = image_pred[non_zero_ind.squeeze(),:].view(-1,7)
+        except:
+            continue
+        if image_pred_.shape[0] == 0:
+            continue
+#
+        #Get the various classes detected in the image
+        img_classes = unique(image_pred_[:,-1])  # -1 index holds the class index
+        for cls in img_classes:
+            #perform NMS
+            #get the detections with one particular class
+            cls_mask = image_pred_*(image_pred_[:,-1] == cls).float().unsqueeze(1)
+            class_mask_ind = torch.nonzero(cls_mask[:,-2]).squeeze()
+            image_pred_class = image_pred_[class_mask_ind].view(-1,7)
+            #sort the detections such that the entry with the maximum objectness
+            #confidence is at the top
+            conf_sort_index = torch.sort(image_pred_class[:,4], descending = True )[1]
+            image_pred_class = image_pred_class[conf_sort_index]
+            idx = image_pred_class.size(0)   #Number of detections
+            for i in range(idx):
+                #Get the IOUs of all boxes that come after the one we are looking at
+                #in the loop
+                try:
+                    ious = bbox_iou(image_pred_class[i].unsqueeze(0), image_pred_class[i+1:])
+                except ValueError:
+                    break
+                except IndexError:
+                    break
+                #Zero out all the detections that have IoU > treshhold
+                iou_mask = (ious < nms_conf).float().unsqueeze(1)
+                image_pred_class[i+1:] *= iou_mask
+                #Remove the non-zero entries
+                non_zero_ind = torch.nonzero(image_pred_class[:,4]).squeeze()
+                image_pred_class = image_pred_class[non_zero_ind].view(-1,7)
+            batch_ind = image_pred_class.new(image_pred_class.size(0), 1).fill_(ind)      #Repeat the batch_id for as many detections of the class cls in the image
+            seq = batch_ind, image_pred_class
+            if not write:
+                output = torch.cat(seq,1)
+                write = True
+            else:
+                out = torch.cat(seq,1)
+                output = torch.cat((output,out))
+    try:
+        return output
+    except:
+        return 0
+def letterbox_image(img, inp_dim):
+    '''resize image with unchanged aspect ratio using padding'''
+    img_w, img_h = img.shape[1], img.shape[0]
+    w, h = inp_dim
+    new_w = int(img_w * min(w/img_w, h/img_h))
+    new_h = int(img_h * min(w/img_w, h/img_h))
+    resized_image = cv2.resize(img, (new_w,new_h), interpolation = cv2.INTER_CUBIC)
+    canvas = np.full((inp_dim[1], inp_dim[0], 3), 128)
+    canvas[(h-new_h)//2:(h-new_h)//2 + new_h,(w-new_w)//2:(w-new_w)//2 + new_w,  :] = resized_image
+    return canvas
+def prep_image(img, inp_dim):
+    """
+    Prepare image for inputting to the neural network.
+    Returns a Variable
+    """
+    img = (letterbox_image(img, (inp_dim, inp_dim)))
+    img = img[:,:,::-1].transpose((2,0,1)).copy()
+    img = torch.from_numpy(img).float().div(255.0).unsqueeze(0)
+    return img
+def load_classes(namesfile):
+    fp = open(namesfile, "r")
+    names = fp.read().split("\n")[:-1]
+    return names