Sunil Surendra Singh commited on
Commit
0362b22
·
0 Parent(s):

First commit

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .gitattributes +2 -0
  2. .github/workflows/main.yml +20 -0
  3. .gitignore +141 -0
  4. README.md +221 -0
  5. app.py +88 -0
  6. assets/Eiffel-tower_night.jpg +3 -0
  7. assets/Howrah-Bridge.jpg +3 -0
  8. assets/LogSoftmax.png +3 -0
  9. assets/NLLLoss.png +3 -0
  10. assets/accuracy.png +3 -0
  11. assets/app-screenshot.png +3 -0
  12. assets/bhakra-dam.jpg +3 -0
  13. assets/button-icon.png +3 -0
  14. assets/eiffel_tower_prediction.png +3 -0
  15. assets/examples/gateway-of-india.jpg +3 -0
  16. assets/examples/grand-canyon.jpg +3 -0
  17. assets/examples/opera-house.jpg +3 -0
  18. assets/examples/stone-henge.jpg +3 -0
  19. assets/examples/temple-of-zeus.jpg +3 -0
  20. assets/grand-canyon.jpg +3 -0
  21. assets/landmark_samples.png +3 -0
  22. assets/loss1.png +3 -0
  23. assets/loss2.png +3 -0
  24. assets/loss3.png +3 -0
  25. assets/sample_output.png +3 -0
  26. assets/scratch_network.png +3 -0
  27. assets/sqrt_n_inputs.png +3 -0
  28. assets/taj-mahal-at-full-moon-night.jpg +3 -0
  29. assets/title_image_sydney_opera_house.jpg +3 -0
  30. assets/train_hist1.png +3 -0
  31. assets/train_hist2.png +3 -0
  32. assets/transfer_network.png +3 -0
  33. config.py +73 -0
  34. model.py +89 -0
  35. notebook/assets/Eiffel-tower_night.jpg +3 -0
  36. notebook/assets/Howrah-Bridge.jpg +3 -0
  37. notebook/assets/LogSoftmax.png +3 -0
  38. notebook/assets/NLLLoss.png +3 -0
  39. notebook/assets/accuracy.png +3 -0
  40. notebook/assets/bhakra-dam.jpg +3 -0
  41. notebook/assets/button-icon.png +3 -0
  42. notebook/assets/eiffel_tower_prediction.png +3 -0
  43. notebook/assets/examples/gateway-of-india.jpg +3 -0
  44. notebook/assets/examples/grand-canyon.jpg +3 -0
  45. notebook/assets/examples/opera-house.jpg +3 -0
  46. notebook/assets/examples/stone-henge.jpg +3 -0
  47. notebook/assets/examples/temple-of-zeus.jpg +3 -0
  48. notebook/assets/landmark_samples.png +3 -0
  49. notebook/assets/loss1.png +3 -0
  50. notebook/assets/loss2.png +3 -0
.gitattributes ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ *.png filter=lfs diff=lfs merge=lfs -text
2
+ *.jpg filter=lfs diff=lfs merge=lfs -text
.github/workflows/main.yml ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: Sync to Hugging Face hub
2
+ on:
3
+ push:
4
+ branches: [master]
5
+
6
+ # to run this workflow manually from the Actions tab
7
+ workflow_dispatch:
8
+
9
+ jobs:
10
+ sync-to-hub:
11
+ runs-on: ubuntu-latest
12
+ steps:
13
+ - uses: actions/checkout@v3
14
+ with:
15
+ fetch-depth: 0
16
+ lfs: true
17
+ - name: Push to hub
18
+ env:
19
+ HF_TOKEN: ${{ secrets.HF_TOKEN }}
20
+ run: git push --force https://sssingh:[email protected]/spaces/sssingh/famous-landmarks-classifier-cnn master
.gitignore ADDED
@@ -0,0 +1,141 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # MY CHNAGES
2
+ *.p
3
+ *.pt
4
+ secrets.toml
5
+ scratch*.ipynb
6
+ venv*/
7
+ .vscode*
8
+ .vscode*/
9
+ .examples/
10
+ .swp
11
+
12
+ # Byte-compiled / optimized / DLL files
13
+ __pycache__/
14
+ *.py[cod]
15
+ *$py.class
16
+
17
+ # C extensions
18
+ *.so
19
+
20
+ # Distribution / packaging
21
+ .Python
22
+ build/
23
+ develop-eggs/
24
+ dist/
25
+ downloads/
26
+ eggs/
27
+ .eggs/
28
+ lib/
29
+ lib64/
30
+ parts/
31
+ sdist/
32
+ var/
33
+ wheels/
34
+ pip-wheel-metadata/
35
+ share/python-wheels/
36
+ *.egg-info/
37
+ .installed.cfg
38
+ *.egg
39
+ MANIFEST
40
+
41
+ # PyInstaller
42
+ # Usually these files are written by a python script from a template
43
+ # before PyInstaller builds the exe, so as to inject date/other infos into it.
44
+ *.manifest
45
+ *.spec
46
+
47
+ # Installer logs
48
+ pip-log.txt
49
+ pip-delete-this-directory.txt
50
+
51
+ # Unit test / coverage reports
52
+ htmlcov/
53
+ .tox/
54
+ .nox/
55
+ .coverage
56
+ .coverage.*
57
+ .cache
58
+ nosetests.xml
59
+ coverage.xml
60
+ *.cover
61
+ *.py,cover
62
+ .hypothesis/
63
+ .pytest_cache/
64
+
65
+ # Translations
66
+ *.mo
67
+ *.pot
68
+
69
+ # Django stuff:
70
+ *.log
71
+ local_settings.py
72
+ db.sqlite3
73
+ db.sqlite3-journal
74
+
75
+ # Flask stuff:
76
+ instance/
77
+ .webassets-cache
78
+
79
+ # Scrapy stuff:
80
+ .scrapy
81
+
82
+ # Sphinx documentation
83
+ docs/_build/
84
+
85
+ # PyBuilder
86
+ target/
87
+
88
+ # Jupyter Notebook
89
+ .ipynb_checkpoints
90
+
91
+ # IPython
92
+ profile_default/
93
+ ipython_config.py
94
+
95
+ # pyenv
96
+ .python-version
97
+
98
+ # pipenv
99
+ # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
100
+ # However, in case of collaboration, if having platform-specific dependencies or dependencies
101
+ # having no cross-platform support, pipenv may install dependencies that don't work, or not
102
+ # install all needed dependencies.
103
+ #Pipfile.lock
104
+
105
+ # PEP 582; used by e.g. github.com/David-OConnor/pyflow
106
+ __pypackages__/
107
+
108
+ # Celery stuff
109
+ celerybeat-schedule
110
+ celerybeat.pid
111
+
112
+ # SageMath parsed files
113
+ *.sage.py
114
+
115
+
116
+ # Environments
117
+ .env
118
+ .venv
119
+ env/
120
+ venv*/
121
+ ENV/
122
+ env.bak/
123
+ venv.bak/
124
+
125
+ # Spyder project settings
126
+ .spyderproject
127
+ .spyproject
128
+
129
+ # Rope project settings
130
+ .ropeproject
131
+
132
+ # mkdocs documentation
133
+ /site
134
+
135
+ # mypy
136
+ .mypy_cache/
137
+ .dmypy.json
138
+ dmypy.json
139
+
140
+ # Pyre type checker
141
+ .pyre/
README.md ADDED
@@ -0,0 +1,221 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: Famous Landmarks Classifier Cnn
3
+ emoji: 🌉
4
+ colorFrom: blue
5
+ colorTo: red
6
+ sdk: gradio
7
+ sdk_version: 3.45.2
8
+ app_file: app.py
9
+ pinned: false
10
+ license: mit
11
+ ---
12
+
13
+ <a href="https://tv-script-generation-rnn-sssingh.streamlit.app/" target="_blank"><img src="https://img.shields.io/badge/click_here_to_open_demo_app-orange?style=for-the-badge&logo=dependabot"/></a>
14
+
15
+
16
+ # Landmarks Classification and Tagging using CNN
17
+ In this project we solve a `multi-label-classification` problem by classifying/tagging a given image of a famous landmark using CNN (Convolutional Neural Network).
18
+
19
+ <img src="https://github.com/sssingh/landmark-classification-tagging/blob/master/assets/title_image_sydney_opera_house.jpg?raw=true" width="800" height="300" />
20
+
21
+ ## Features
22
+ ⚡Multi Label Image Classification
23
+ ⚡Custom CNN
24
+ ⚡Transfer Learning CNN
25
+ ⚡PyTorch
26
+
27
+ ## Table of Contents
28
+
29
+ - [Introduction](#introduction)
30
+ - [Objective](#objective)
31
+ - [Dataset](#dataset)
32
+ - [Evaluation Criteria](#evaluation-criteria)
33
+ - [Solution Approach](#solution-approach)
34
+ - [How To Use](#how-to-use)
35
+ - [License](#license)
36
+ - [Get in touch](#get-in-touch)
37
+ - [Credits](#credits)
38
+
39
+ ## Introduction
40
+
41
+ <img src="https://github.com/sssingh/landmark-classification-tagging/blob/master/assets/app-screenshot.png?raw=true">
42
+
43
+ Photo sharing and photo storage services like to have location data for each uploaded photo. In addition, these services can build advanced features with the location data, such as the automatic suggestion of relevant tags or automatic photo organization, which help provide a compelling user experience. However, although a photo's location can often be obtained by looking at the photo's metadata, many images uploaded to these services will not have location metadata available. This can happen when, for example, the camera capturing the picture does not have GPS or if a photo's metadata is scrubbed due to privacy concerns.
44
+
45
+ If no location metadata for an image is available, one way to infer the location is to detect and classify a discernible landmark in the picture. However, given the large number of landmarks worldwide and the immense volume of images uploaded to photo-sharing services, using human judgment to classify these landmarks would not be feasible. In this project, we'll try to address this problem by building `Neural Network` (NN) based models to automatically predict the location of the image based on any landmarks depicted in the picture.
46
+
47
+ ## Objective
48
+ To build NN based model that'd accept any user-supplied image as input and suggest the `top k` most relevant landmarks from '50 possible` landmarks from across the world.
49
+
50
+ 1. Download the dataset
51
+ 2. Build a CNN based neural network from scratch to classify the landmark image
52
+ - Here, we aim to attain a test accuracy of at least 30%. At first glance, an accuracy of 30% may appear to be very low, but it's way better than random guessing, which would provide an accuracy of just 2% since we have 50 different landmarks classes in the dataset.
53
+ 3. Build a CNN based neural network, using transfer-learning, to classify the landmark image
54
+ - Here, we aim to attain a test accuracy of at least 60%, which is pretty good given the complex nature of this task.
55
+ 4. Implement an inference function that will accept a file path to an image and an integer k and then predict the top k most likely landmarks this image belongs to. The print below displays the expected sample output from the predict function, indicating the top 3 (k = 3) possibilities for the image in question.
56
+
57
+ <img src="https://github.com/sssingh/landmark-classification-tagging/blob/master/assets/sample_output.png?raw=true">
58
+
59
+ ## Dataset
60
+ - Dataset to be downloaded from [here](https://udacity-dlnfd.s3-us-west-1.amazonaws.com/datasets/landmark_images.zip). Note that this is a mini dataset containing around 6,000 images); this dataset is a small subset of the [Original Landmark Dataset](https://github.com/cvdfoundation/google-landmark) that has over 700,000 images.
61
+ - Unzipped dataset would have the parent folder `landmark_images` containing training data in the `train` sub-folder and testing data in the `test` sub-folder
62
+ - There are 1250 images in the `test` sub-folder to be kept hidden and only used for model evaluation
63
+ - There are 4996 images in the `train` sub-folder to be used for training and validation
64
+ - Images in `test` and `train` sets are further categorized and kept in one of the 50 sub-folders representing 50 different landmarks classes (from 0 to 49)
65
+ - Images in the dataset are of different sizes and resolution
66
+ - Here are a few samples from the training dataset with their respective labels descriptions...
67
+
68
+ <img src="https://github.com/sssingh/landmark-classification-tagging/blob/master/assets/landmark_samples.png?raw=true">
69
+
70
+ ## Evaluation Criteria
71
+
72
+ ### Loss Function
73
+ We will use `LogSoftmax` in the output layer of the network...
74
+
75
+ <img src="https://github.com/sssingh/landmark-classification-tagging/blob/master/assets/LogSoftmax.png?raw=true">
76
+
77
+ We need a suitable loss function that consumes these `log-probabilities` outputs and produces a total loss. The function that we are looking for is `NLLLoss` (Negative Log-Likelihood Loss). In practice, `NLLLoss` is nothing but a generalization of `BCELoss` (Binary Cross EntropyLoss or Log Loss) extended from binary-class to multi-class problem.
78
+
79
+ <img src="https://github.com/sssingh/landmark-classification-tagging/blob/master/assets/NLLLoss.png?raw=true">
80
+
81
+ <br>Note the `negative` sign in front `NLLLoss` formula hence negative in the name. The negative sign is put in front to make the average loss positive. Suppose we don't do this then since the `log` of a number less than 1 is negative. In that case, we will have a negative overall average loss. To reduce the loss, we need to `maximize` the loss function instead of `minimizing,` which is a much easier task mathematically than `maximizing.`
82
+
83
+
84
+ ### Performance Metric
85
+
86
+ `accuracy` is used as the model's performance metric on the test-set
87
+
88
+ <img src="https://github.com/sssingh/landmark-classification-tagging/blob/master/assets/accuracy.png?raw=true">
89
+
90
+
91
+ ## Solution Approach
92
+ - Once the dataset is downloaded and unzipped, we split the training set into training and validation sets in 80%:20% (3996:1000) ratio and keep images in respective `train` and `val` sub-folders.
93
+ - `train` data is then used to build Pytorch `Dataset` object; after applying data augmentations, images are resized to 128x128.
94
+ `mean` and `standard deviation` is computed for the train dataset, and then the dataset is `normalized` using the calculated statistics.
95
+ - The RGB channel histogram of the train set is shown below...
96
+
97
+ <img src="https://github.com/sssingh/landmark-classification-tagging/blob/master/assets/train_hist1.png?raw=true">
98
+
99
+ - The RGB channel histogram of the train set after normalization is shown below...
100
+
101
+ <img src="https://github.com/sssingh/landmark-classification-tagging/blob/master/assets/train_hist2.png?raw=true">
102
+
103
+ - Now, `test` and `val` Dataset objects are prepared in the same fashion where images are resized to 128x128 and then normalized.
104
+ - The training, validation, and testing datasets are then wrapped in Pytorch `DataLoader` object so that we can iterate through them with ease. A typical `batch_size` 32 is used.
105
+
106
+ ### CNN from scratch
107
+ - The neural network is implemented as a subclass of the `nn.Module` PyTorch class. The final network presented here is built incrementally with many experiments...
108
+ - Started with a very small CNN of just two convolutions and a linear layer with LogSoftmax output.
109
+ - Tried to overfit the network on a single batch of 32 training images, but the network found it hard to overfit, which means it's not powerful enough.
110
+ - Gradually increased the Conv and Linear layers to overfit the batch easily.
111
+ - Then trained on complete training data, adjusted layers, and output sizes to ensure that training loss goes down.
112
+ - Then, trained again with validation data to select the best network with the lowest validation loss.
113
+ - `ReLU` is used as an activation function, and `BatchNorm` is used after every layer except the last.
114
+ - Final model architecture (from scratch) is shown below...
115
+
116
+ <img src="https://github.com/sssingh/landmark-classification-tagging/blob/master/assets/scratch_network.png?raw=true">
117
+
118
+ - Network initial weights are initialized by numbers drawn from a `normal-distribution in the range...
119
+
120
+ <img src="https://github.com/sssingh/landmark-classification-tagging/blob/master/assets/sqrt_n_inputs.png?raw=true">
121
+
122
+ - Network is then trained and validated for 15 epochs using the `NLLLoss` function and `Adam` optimizer with a learning rate of 0.001. We save the trained model here as `ignore.pt` (ignore because we are not using it for evaluation)
123
+ - We keep track of training and validation losses. When plotted, we observe that the model starts to `overfit` very quickly.
124
+
125
+ <img src="https://github.com/sssingh/landmark-classification-tagging/blob/master/assets/loss1.png?raw=true">
126
+
127
+ - Now, we reset the Network initial weights to Pytorch default weight to check if there are any improvements
128
+ - Network is then again trained and validated for 15 epochs using the `NLLLoss` function and `Adam` optimizer with a learning rate of 0.001. We save the trained model here as `model_scratch.pt` (we will use this saved model for evaluation)
129
+ - We keep track of training and validation losses. When plotted, we observe that result is almost the same as that of custom weight initialization
130
+
131
+ <img src="https://github.com/sssingh/landmark-classification-tagging/blob/master/assets/loss2.png?raw=true">
132
+
133
+ - The trained network (`model_scratch.pt`) is then loaded and evaluated on unseen 1,250 testing images.
134
+ The network can achieve around `38%` accuracy, which is more than we aimed for (i.e., 30%). Furthermore, the network can classify `475` images out of the total `1250` test images.
135
+
136
+ ### CNN using transfer-learning
137
+ - Here, we use transfer-learning to implement the CNN network to classify images of landmarks.
138
+ - We have selected the `VGG19` pre-trained model on `ImageNet` as our base model.
139
+ Models pre-trained and tested on ImgaeNet can extract general features from even the datasets that may not be very similar to ImageNet. This is due to the sheer size of the ImageNet dataset (1.2 million images) and the number of classes (1000). Instead of `VGG19`, we could have chosen `ResNet` `DenseNet` as our base network; they would have worked just fine. `VGG19` was selected here because of its simplicity of the architecture and still producing an impressive result.
140
+ - VGG19 models weights are frozen so that they do not change during the training.
141
+ - A `custom-classifier` with `ReLU` activation, `Dropouts` in hidden layers and `LogSoftmax` in last layer is created.
142
+ The original classifier layer in VGG19 is replaced by a `custom-classifier` with learnable weights.
143
+ - Final model architecture (transfer learning) is shown below...
144
+
145
+ <img src="https://github.com/sssingh/landmark-classification-tagging/blob/master/assets/transfer_network.png?raw=true">
146
+
147
+ - Network is then trained and validated for ten epochs using the `NLLLoss` function and `Adam` optimizer with a learning rate of 0.001. Note that the optimizer has been supplied with the learnable parameters of `custom-classifier` only and not the whole model. This is because we want to optimize our custom-classifier weights only and use ImageNet learned weights for the rest of the layers.
148
+ - We keep track of training and validation losses and plot them.
149
+
150
+ <img src="https://github.com/sssingh/landmark-classification-tagging/blob/master/assets/loss3.png?raw=true">
151
+
152
+ - The trained network is saved as `model_transfer.pt`
153
+
154
+ - The trained network `model_transfer.pt` is then loaded and evaluated on unseen 1,250 testing images.
155
+ - This time network can achieve around `63%` accuracy, which is more than what we aimed for (i.e., 60%). In addition, the network can classify `788` images out of the total `1250` test images.
156
+ As we can see, the model built using transfer learning has outperformed the model built from scratch; hence, the second model will be used to predict unseen images.
157
+
158
+ ### Interface for inference
159
+ - For our model to be used easily, we'll implement a function `predict_landmarks` which will...
160
+ - Accepts a `file-path` to an image and an integer `k`
161
+ The function expects the trained model `model_transfer.pt` to be present in the same folder/directory from where the function is invoked. The trained model can be downloaded from [here](https://drive.google.com/file/d/1c3aj2l3f3mkuH2a9orFDRNPdg0Vqa-wg/view?usp=sharing)
162
+ - It predicts and returns the **top k most likely landmarks**.
163
+ - `predict_landmarks` function can be invoked from the `python` script or shell; an example is shown below...
164
+
165
+
166
+ ```python
167
+ >>> predicted_landmarks = predict_landmarks('images/test/09.Golden_Gate_Bridge/190f3bae17c32c37.jpg', 5)
168
+ >>> print(predicted_landmarks)
169
+ ['Golden Gate Bridge',
170
+ 'Forth Bridge',
171
+ 'Sydney Harbour Bridge',
172
+ 'Brooklyn Bridge',
173
+ 'Niagara Falls']
174
+ ```
175
+
176
+ - We create another higher-level function, `suggest_locations` that accepts the same parameters as that of `predict_landmarks` and internally uses the `predict_landmarks` function
177
+ - A sample of function usage and its output is shown below
178
+
179
+ ``` python
180
+ >>> suggest_locations('assets/Eiffel-tower_night.jpg')
181
+ ```
182
+
183
+ <img src="https://github.com/sssingh/landmark-classification-tagging/blob/master/assets/eiffel_tower_prediction.png?raw=true">
184
+
185
+
186
+ ## How To Use
187
+
188
+ ### Open the LIVE app
189
+
190
+ App has been deployed on `Hugging Face Spaces`. <br>
191
+ <a href="https://gradio.app/" target="_blank"><img src="https://img.shields.io/badge/click_here_to_open_demo_app-orange?style=for-the-badge&logo=dependabot"/></a>
192
+
193
+ ### Training and Testing using jupyter notebook
194
+ 1. Ensure the below-listed packages are installed
195
+ - `NumPy`
196
+ - `matplotlib`
197
+ - `torch`
198
+ - `torchvision`
199
+ - `cv2`
200
+ - `PIL`
201
+ 2. Download `landmark-classification-cnn-pytorch.ipynb` jupyter notebook from this repo
202
+ 3. To train the models, it's recommended to execute the notebook one cell at a time. If a GPU is available (recommended), it'll use it automatically; otherwise, it'll fall back to the CPU.
203
+ 4. On a machine with `NVIDIA Quadro P5000` GPU with 16GB memory, it approximately takes 15-18 minutes to train and validate the `from scratch` model for 15 epochs
204
+ 5. On a machine with `NVIDIA Quadro P5000` GPU with 16GB memory, it approximately takes 15-18 minutes to train and validate the `transfer-learning` model for ten epochs
205
+ 6. A fully trained model `model_transfer.pt` can be downloaded from [here](https://drive.google.com/file/d/1c3aj2l3f3mkuH2a9orFDRNPdg0Vqa-wg/view?usp=sharing). This model then can be used directly for tagging new landmark images as described in [Interface for inference](#interface-for-inference) section.
206
+
207
+ ## License
208
+ [![MIT License](https://img.shields.io/badge/License-MIT-green.svg)](https://choosealicense.com/licenses/mit/)
209
+
210
+ ## Get in touch
211
+ [![email](https://img.shields.io/badge/Gmail-D14836?style=for-the-badge&logo=gmail&logoColor=white)](mailto:[email protected])
212
+ [![twitter](https://img.shields.io/badge/twitter-1DA1F2?style=for-the-badge&logo=twitter&logoColor=white)](https://twitter.com/@thesssingh)
213
+ [![linkedin](https://img.shields.io/badge/linkedin-0A66C2?style=for-the-badge&logo=linkedin&logoColor=white)](https://www.linkedin.com/in/sssingh/)
214
+ [![website](https://img.shields.io/badge/web_site-8B5BE8?style=for-the-badge&logo=ko-fi&logoColor=white)](https://sunilssingh.me)
215
+
216
+ ## Credits
217
+ - Dataset used in this project is provided by [Udacity](https://www.udacity.com/)
218
+ - Above dataset is a subset taken from the original landmarks dataset by Google [Original Landmark Dataset](https://github.com/cvdfoundation/google-landmark)
219
+ - Title photo by [Patty Jansen On Pixabay](https://pixabay.com/users/pattyjansen-154933/)
220
+
221
+ [Back To The Top](#Landmarks-Classification-and-Tagging-using-CNN)
app.py ADDED
@@ -0,0 +1,88 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import model
3
+ from config import app_config
4
+
5
+
6
+ def init():
7
+ if model != None:
8
+ print("Initializing App...")
9
+ app_config.model = model.load_model()
10
+
11
+
12
+ def clear():
13
+ return None, 2, None, None, None
14
+
15
+
16
+ def create_interface():
17
+ md = """
18
+ # Famous Landmark Classifier using CNN
19
+ **Choose an image containing any of the `50 possible classes` of world famous landmarks,**
20
+ **choose the number of prediction required (k) and hit `Predict`, model will try to identify**
21
+ **the landmark in the image.**
22
+ **Please note that the model is trained on a small set of only 4,000 images hence it may not**
23
+ **be right all the time, but its fun to try out.**
24
+ Visit the [project's repo](https://github.com/sssingh/landmark-classification-tagging)
25
+ """
26
+ with gr.Blocks(
27
+ title=app_config.title, theme=app_config.theme, css=app_config.css
28
+ ) as app:
29
+ with gr.Row():
30
+ gr.Markdown(md)
31
+ with gr.Accordion(
32
+ "Expand to see 50 classes:", open=False, elem_classes="accordion"
33
+ ):
34
+ gr.JSON(app_config.classes, elem_classes="json-box")
35
+ with gr.Row():
36
+ with gr.Column():
37
+ img = gr.Image(type="pil", elem_classes="image-picker")
38
+ k = gr.Slider(
39
+ label="Number of predictions (k):",
40
+ minimum=2,
41
+ maximum=5,
42
+ value=2,
43
+ step=1,
44
+ elem_classes="slider",
45
+ )
46
+ with gr.Row():
47
+ submit_btn = gr.Button(
48
+ "Predict",
49
+ icon="assets/button-icon.png",
50
+ elem_classes="submit-button",
51
+ )
52
+ clear_btn = gr.ClearButton(elem_classes="clear-button")
53
+ with gr.Column():
54
+ landmarks = gr.JSON(
55
+ label="Predicted Landmarks:", elem_classes="json-box"
56
+ )
57
+ proba = gr.JSON(
58
+ label="Predicted Probabilities:", elem_classes="json-box"
59
+ )
60
+ plot = gr.Plot(container=True, elem_classes="plot")
61
+ with gr.Row():
62
+ with gr.Accordion(
63
+ "Expand for examples:", open=False, elem_classes="accordion"
64
+ ):
65
+ gr.Examples(
66
+ examples=[
67
+ ["assets/examples/gateway-of-india.jpg", 3],
68
+ ["assets/examples/grand-canyon.jpg", 2],
69
+ ["assets/examples/opera-house.jpg", 3],
70
+ ["assets/examples/stone-henge.jpg", 4],
71
+ ["assets/examples/temple-of-zeus.jpg", 5],
72
+ ],
73
+ inputs=[img, k],
74
+ outputs=[landmarks, proba],
75
+ elem_id="examples",
76
+ )
77
+ submit_btn.click(
78
+ fn=model.predict, inputs=[img, k], outputs=[landmarks, proba, plot]
79
+ )
80
+ clear_btn.click(fn=clear, inputs=[], outputs=[img, k, landmarks, proba, plot])
81
+ img.clear(fn=clear, inputs=[], outputs=[img, k, landmarks, proba, plot])
82
+ return app
83
+
84
+
85
+ if __name__ == "__main__":
86
+ init()
87
+ app = create_interface()
88
+ app.launch()
assets/Eiffel-tower_night.jpg ADDED

Git LFS Details

  • SHA256: 345cb271e4da7f0ff3a0a96e0c567a69c89a5f056ede53b802a9541981d33de6
  • Pointer size: 130 Bytes
  • Size of remote file: 73 kB
assets/Howrah-Bridge.jpg ADDED

Git LFS Details

  • SHA256: 57308e8edc42379a070762fe59fb793f4d6e71e4daa6214a4e55da486f8410d3
  • Pointer size: 131 Bytes
  • Size of remote file: 345 kB
assets/LogSoftmax.png ADDED

Git LFS Details

  • SHA256: cf8b3b7aba3f88cfcc6c95fb9bb74819c0fab489f9ab89f82227c1b5093d3e0d
  • Pointer size: 130 Bytes
  • Size of remote file: 17.1 kB
assets/NLLLoss.png ADDED

Git LFS Details

  • SHA256: 6bf3f1a221e6def77cd5023d72026359ebe0c1aa15626a18ed25de8e8e58b84a
  • Pointer size: 130 Bytes
  • Size of remote file: 28.1 kB
assets/accuracy.png ADDED

Git LFS Details

  • SHA256: ee4d14b7764bfecbc6e63d3080ff4bed907bed53fe79b9e1c074ff4b2b8cbe4d
  • Pointer size: 130 Bytes
  • Size of remote file: 27.4 kB
assets/app-screenshot.png ADDED

Git LFS Details

  • SHA256: b1522195f05d07d90262262fbf95e0c395b4d85761b9c391a3206fad15d3d00e
  • Pointer size: 131 Bytes
  • Size of remote file: 207 kB
assets/bhakra-dam.jpg ADDED

Git LFS Details

  • SHA256: f9e4f7b8c135e191e8d3394cc15f6683e66c1fcb50a8aa2ccc1359ee00765491
  • Pointer size: 130 Bytes
  • Size of remote file: 48 kB
assets/button-icon.png ADDED

Git LFS Details

  • SHA256: a06ce5dd7558ad27621758729c7ef2019b1e7dc62c72b4691931bc6573fda078
  • Pointer size: 130 Bytes
  • Size of remote file: 48.3 kB
assets/eiffel_tower_prediction.png ADDED

Git LFS Details

  • SHA256: 57af944264147bd202faa7616616231a176b84d5125fe556236bc6f9da00771d
  • Pointer size: 131 Bytes
  • Size of remote file: 404 kB
assets/examples/gateway-of-india.jpg ADDED

Git LFS Details

  • SHA256: e3b9e7563a6c13cd1496b2758107abb7ebb0b9a87eb8565f7154ff813db68f4c
  • Pointer size: 130 Bytes
  • Size of remote file: 80.5 kB
assets/examples/grand-canyon.jpg ADDED

Git LFS Details

  • SHA256: 1ed5103af445bba7a83b49006b4a7d6911da90d00b7ed8d10da798cf5b6d38fa
  • Pointer size: 132 Bytes
  • Size of remote file: 3.35 MB
assets/examples/opera-house.jpg ADDED

Git LFS Details

  • SHA256: d81c27283373e60c8f61f4186b8e1b2f2043c08621165e50bc06e5321d3a374c
  • Pointer size: 130 Bytes
  • Size of remote file: 91.2 kB
assets/examples/stone-henge.jpg ADDED

Git LFS Details

  • SHA256: a47f96989025fb1928a73e9df7cb82271c1e47a211d3308dad24c117e79df259
  • Pointer size: 130 Bytes
  • Size of remote file: 87.8 kB
assets/examples/temple-of-zeus.jpg ADDED

Git LFS Details

  • SHA256: 1defc807e92bb554f7659ee55158526093412b46789a937a71edb51c01853cc7
  • Pointer size: 131 Bytes
  • Size of remote file: 150 kB
assets/grand-canyon.jpg ADDED

Git LFS Details

  • SHA256: 1ed5103af445bba7a83b49006b4a7d6911da90d00b7ed8d10da798cf5b6d38fa
  • Pointer size: 132 Bytes
  • Size of remote file: 3.35 MB
assets/landmark_samples.png ADDED

Git LFS Details

  • SHA256: b9c74f2b14ca93b7a53e3c2bdef91fffdcca7a2cd7d40e3149eb9037f33e2404
  • Pointer size: 132 Bytes
  • Size of remote file: 1.84 MB
assets/loss1.png ADDED

Git LFS Details

  • SHA256: 7e50d467fe18987ded46d5df992ee5fe224e4f27eb490b353724b3ca8b0b20db
  • Pointer size: 131 Bytes
  • Size of remote file: 144 kB
assets/loss2.png ADDED

Git LFS Details

  • SHA256: 1faeeacb184542a20cded490bcbc5e087a7cc0eb596d421a4837fd6469186d82
  • Pointer size: 131 Bytes
  • Size of remote file: 130 kB
assets/loss3.png ADDED

Git LFS Details

  • SHA256: aff367d0aff088539a338a16e5f26f2927e5f0c33f09ff3eee4d801136e3eb1e
  • Pointer size: 131 Bytes
  • Size of remote file: 111 kB
assets/sample_output.png ADDED

Git LFS Details

  • SHA256: 56e5aa695099cdafc817b7cc239f3d6f646d6129a7c8cbc83ad8ec28a1f0e1b6
  • Pointer size: 131 Bytes
  • Size of remote file: 173 kB
assets/scratch_network.png ADDED

Git LFS Details

  • SHA256: a2c5e79fa2721ac64d294ab263920c3c7dae6c80bb98ca050d789387eb69f6cf
  • Pointer size: 130 Bytes
  • Size of remote file: 66.5 kB
assets/sqrt_n_inputs.png ADDED

Git LFS Details

  • SHA256: 9689ec8b2c3ecd21b5112a224e0680f66711d1b614cecce4cbd2cb356eabd875
  • Pointer size: 129 Bytes
  • Size of remote file: 2.78 kB
assets/taj-mahal-at-full-moon-night.jpg ADDED

Git LFS Details

  • SHA256: 6808cab8f3cac0574f28e9f527ef73550bb14e037b7942964cfff64c1a33c343
  • Pointer size: 130 Bytes
  • Size of remote file: 25.1 kB
assets/title_image_sydney_opera_house.jpg ADDED

Git LFS Details

  • SHA256: 592f97dee99b65acacf87af6f437e5e871f113b72443d359c2f1a618bc4e5ddd
  • Pointer size: 131 Bytes
  • Size of remote file: 236 kB
assets/train_hist1.png ADDED

Git LFS Details

  • SHA256: e892644d0b8676d9b8415c427707a7ba5787686a2d1b63ea8ab9324324b25623
  • Pointer size: 130 Bytes
  • Size of remote file: 52 kB
assets/train_hist2.png ADDED

Git LFS Details

  • SHA256: 6421325fa771a5bc64a7e3aada1066fafc6d86e95bc7931f77d2558e415b509c
  • Pointer size: 130 Bytes
  • Size of remote file: 48.4 kB
assets/transfer_network.png ADDED

Git LFS Details

  • SHA256: a8eca6740a769ab4692d682cb9f8bc7f43548771c32555f4e19c978c3449c2c5
  • Pointer size: 130 Bytes
  • Size of remote file: 78.1 kB
config.py ADDED
@@ -0,0 +1,73 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from dataclasses import dataclass
3
+
4
+
5
+ ### define all app-wide configuration here,
6
+ ### should not be accessed and changed directly hence leading "__"
7
+ @dataclass
8
+ class __AppConfig:
9
+ """app-wide configurations"""
10
+
11
+ title = "Computer Vision - Landmark Classifier"
12
+ theme = "freddyaboulton/dracula_revamped"
13
+ hf_repo_id = "sssingh/landmark-classifier-pt"
14
+ hf_weights_file = "model_transfer.pt"
15
+ css = "style.css"
16
+ model = None
17
+ classes = [
18
+ "Haleakala_National_Park",
19
+ "Mount_Rainier_National_Park",
20
+ "Ljubljana_Castle",
21
+ "Dead_Sea",
22
+ "Wroclaws_Dwarves",
23
+ "London_Olympic_Stadium",
24
+ "Niagara_Falls",
25
+ "Stonehenge",
26
+ "Grand_Canyon",
27
+ "Golden_Gate_Bridge",
28
+ "Edinburgh_Castle",
29
+ "Mount_Rushmore_National_Memorial",
30
+ "Kantanagar_Temple",
31
+ "Yellowstone_National_Park",
32
+ "Terminal_Tower",
33
+ "Central_Park",
34
+ "Eiffel_Tower",
35
+ "Changdeokgung",
36
+ "Delicate_Arch",
37
+ "Vienna_City_Hall",
38
+ "Matterhorn",
39
+ "Taj_Mahal",
40
+ "Moscow_Raceway",
41
+ "Externsteine",
42
+ "Soreq_Cave",
43
+ "Banff_National_Park",
44
+ "Pont_du_Gard",
45
+ "Seattle_Japanese_Garden",
46
+ "Sydney_Harbour_Bridge",
47
+ "Petronas_Towers",
48
+ "Brooklyn_Bridge",
49
+ "Washington_Monument",
50
+ "Hanging_Temple",
51
+ "Sydney_Opera_House",
52
+ "Great_Barrier_Reef",
53
+ "Monumento_a_la_Revolucion",
54
+ "Badlands_National_Park",
55
+ "Atomium",
56
+ "Forth_Bridge",
57
+ "Gateway_of_India",
58
+ "Stockholm_City_Hall",
59
+ "Machu_Picchu",
60
+ "Death_Valley_National_Park",
61
+ "Gullfoss_Falls",
62
+ "Trevi_Fountain",
63
+ "Temple_of_Heaven",
64
+ "Great_Wall_of_China",
65
+ "Prague_Astronomical_Clock",
66
+ "Whitby_Abbey",
67
+ "Temple_of_Olympian_Zeus",
68
+ ]
69
+ mean = [0.4624, 0.4711, 0.4668]
70
+ std = [0.2592, 0.2600, 0.2925]
71
+
72
+
73
+ app_config = __AppConfig()
model.py ADDED
@@ -0,0 +1,89 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ import pandas as pd
3
+ import torch
4
+ import torch.nn as nn
5
+ import torchvision.models as models
6
+ import torchvision.transforms as transforms
7
+ import plotly.express as px
8
+ from huggingface_hub import hf_hub_download
9
+ from config import app_config
10
+
11
+ ### Reproducibility
12
+ np.random.seed(42)
13
+ torch.manual_seed(42)
14
+ torch.cuda.manual_seed_all(42)
15
+ torch.backends.cudnn.deterministic = True
16
+ torch.backends.cudnn.benchmark = False
17
+
18
+
19
+ def load_model():
20
+ ### useful variable that tells us whether we should use the GPU
21
+ use_cuda = torch.cuda.is_available()
22
+
23
+ ### Our model is based on VGG19 with classifier head replaced with custom layers
24
+ ### Load the pretrained VGG19 model
25
+ model_transfer = models.vgg19(weights="DEFAULT")
26
+
27
+ ### Instantiate a custom classifier for our 50 classes and use it with pretrained model
28
+ clf = nn.Sequential(
29
+ nn.Linear(in_features=25088, out_features=4096),
30
+ nn.ReLU(inplace=True),
31
+ nn.Dropout(p=0.5),
32
+ nn.Linear(in_features=4096, out_features=1024),
33
+ nn.ReLU(inplace=True),
34
+ nn.Dropout(p=0.5),
35
+ nn.Linear(in_features=1024, out_features=50),
36
+ nn.LogSoftmax(dim=1),
37
+ )
38
+ model_transfer.classifier = clf
39
+ if use_cuda:
40
+ model_transfer = model_transfer.cuda()
41
+
42
+ ### Download the saved fine-tuned model weights and load it
43
+ model_weights = hf_hub_download(
44
+ repo_id=app_config.hf_repo_id, filename=app_config.hf_weights_file
45
+ )
46
+ if not use_cuda:
47
+ state_dict = torch.load(model_weights, map_location=torch.device("cpu"))
48
+ else:
49
+ state_dict = torch.load(model_weights)
50
+ model_transfer.load_state_dict(state_dict)
51
+
52
+ return model_transfer
53
+
54
+
55
+ def predict(img, k):
56
+ model = app_config.model
57
+ classes = app_config.classes
58
+ mean = torch.tensor(app_config.mean)
59
+ std = torch.tensor(app_config.std)
60
+
61
+ # Prepare image for model prediction
62
+ transform = transforms.Compose(
63
+ [
64
+ transforms.Resize((128, 128)),
65
+ transforms.ToTensor(),
66
+ transforms.Normalize(mean, std),
67
+ ]
68
+ )
69
+ img = transform(img).unsqueeze(dim=0)
70
+
71
+ # Predict
72
+ model.eval()
73
+ use_cuda = torch.cuda.is_available()
74
+ if use_cuda:
75
+ img = img.cuda()
76
+ pred = model(img) # LogSoftmax output
77
+ pred_proba = torch.exp(pred) # Prediction probabilities
78
+ proba, labels = torch.topk(pred_proba, k)
79
+ labels = labels.cpu().numpy().squeeze()
80
+ landmarks = [classes[i] for i in labels]
81
+ # Cleanup the labels
82
+ landmarks = [" ".join(label.split("_")) for label in landmarks]
83
+ proba = proba.squeeze().tolist()
84
+ fig = px.bar(
85
+ pd.DataFrame(data={"Landmarks": landmarks, "Probabilities": proba}),
86
+ y="Probabilities",
87
+ x="Landmarks",
88
+ )
89
+ return landmarks, proba, fig
notebook/assets/Eiffel-tower_night.jpg ADDED

Git LFS Details

  • SHA256: 345cb271e4da7f0ff3a0a96e0c567a69c89a5f056ede53b802a9541981d33de6
  • Pointer size: 130 Bytes
  • Size of remote file: 73 kB
notebook/assets/Howrah-Bridge.jpg ADDED

Git LFS Details

  • SHA256: 57308e8edc42379a070762fe59fb793f4d6e71e4daa6214a4e55da486f8410d3
  • Pointer size: 131 Bytes
  • Size of remote file: 345 kB
notebook/assets/LogSoftmax.png ADDED

Git LFS Details

  • SHA256: cf8b3b7aba3f88cfcc6c95fb9bb74819c0fab489f9ab89f82227c1b5093d3e0d
  • Pointer size: 130 Bytes
  • Size of remote file: 17.1 kB
notebook/assets/NLLLoss.png ADDED

Git LFS Details

  • SHA256: 6bf3f1a221e6def77cd5023d72026359ebe0c1aa15626a18ed25de8e8e58b84a
  • Pointer size: 130 Bytes
  • Size of remote file: 28.1 kB
notebook/assets/accuracy.png ADDED

Git LFS Details

  • SHA256: ee4d14b7764bfecbc6e63d3080ff4bed907bed53fe79b9e1c074ff4b2b8cbe4d
  • Pointer size: 130 Bytes
  • Size of remote file: 27.4 kB
notebook/assets/bhakra-dam.jpg ADDED

Git LFS Details

  • SHA256: f9e4f7b8c135e191e8d3394cc15f6683e66c1fcb50a8aa2ccc1359ee00765491
  • Pointer size: 130 Bytes
  • Size of remote file: 48 kB
notebook/assets/button-icon.png ADDED

Git LFS Details

  • SHA256: a06ce5dd7558ad27621758729c7ef2019b1e7dc62c72b4691931bc6573fda078
  • Pointer size: 130 Bytes
  • Size of remote file: 48.3 kB
notebook/assets/eiffel_tower_prediction.png ADDED

Git LFS Details

  • SHA256: 57af944264147bd202faa7616616231a176b84d5125fe556236bc6f9da00771d
  • Pointer size: 131 Bytes
  • Size of remote file: 404 kB
notebook/assets/examples/gateway-of-india.jpg ADDED

Git LFS Details

  • SHA256: e3b9e7563a6c13cd1496b2758107abb7ebb0b9a87eb8565f7154ff813db68f4c
  • Pointer size: 130 Bytes
  • Size of remote file: 80.5 kB
notebook/assets/examples/grand-canyon.jpg ADDED

Git LFS Details

  • SHA256: 1ed5103af445bba7a83b49006b4a7d6911da90d00b7ed8d10da798cf5b6d38fa
  • Pointer size: 132 Bytes
  • Size of remote file: 3.35 MB
notebook/assets/examples/opera-house.jpg ADDED

Git LFS Details

  • SHA256: d81c27283373e60c8f61f4186b8e1b2f2043c08621165e50bc06e5321d3a374c
  • Pointer size: 130 Bytes
  • Size of remote file: 91.2 kB
notebook/assets/examples/stone-henge.jpg ADDED

Git LFS Details

  • SHA256: a47f96989025fb1928a73e9df7cb82271c1e47a211d3308dad24c117e79df259
  • Pointer size: 130 Bytes
  • Size of remote file: 87.8 kB
notebook/assets/examples/temple-of-zeus.jpg ADDED

Git LFS Details

  • SHA256: 1defc807e92bb554f7659ee55158526093412b46789a937a71edb51c01853cc7
  • Pointer size: 131 Bytes
  • Size of remote file: 150 kB
notebook/assets/landmark_samples.png ADDED

Git LFS Details

  • SHA256: b9c74f2b14ca93b7a53e3c2bdef91fffdcca7a2cd7d40e3149eb9037f33e2404
  • Pointer size: 132 Bytes
  • Size of remote file: 1.84 MB
notebook/assets/loss1.png ADDED

Git LFS Details

  • SHA256: 7e50d467fe18987ded46d5df992ee5fe224e4f27eb490b353724b3ca8b0b20db
  • Pointer size: 131 Bytes
  • Size of remote file: 144 kB
notebook/assets/loss2.png ADDED

Git LFS Details

  • SHA256: 1faeeacb184542a20cded490bcbc5e087a7cc0eb596d421a4837fd6469186d82
  • Pointer size: 131 Bytes
  • Size of remote file: 130 kB