SKT27182 commited on
Commit
cbbfbb2
·
1 Parent(s): 75505ab

Created Neural Style Tranfer from scratch

Browse files
.gitattributes CHANGED
@@ -32,3 +32,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
32
  *.zip filter=lfs diff=lfs merge=lfs -text
33
  *.zst filter=lfs diff=lfs merge=lfs -text
34
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
32
  *.zip filter=lfs diff=lfs merge=lfs -text
33
  *.zst filter=lfs diff=lfs merge=lfs -text
34
  *tfevents* filter=lfs diff=lfs merge=lfs -text
35
+ *.jpg filter=lfs diff=lfs merge=lfs -text
Dockerfile ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.9
2
+
3
+ WORKDIR /code
4
+
5
+ COPY ./requirements.txt /code/requirements.txt
6
+
7
+ RUN pip install --no-cache-dir --upgrade -r /code/requirements.txt
8
+
9
+ COPY . .
10
+ RUN useradd -m -u 1000 user
11
+ USER user
12
+ ENV HOME=/home/user \
13
+ PATH=/home/user/.local/bin:$PATH
14
+
15
+ WORKDIR $HOME/app
16
+
17
+ COPY --chown=user . $HOME/app
18
+
19
+ CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "7860"]
app.py ADDED
@@ -0,0 +1,307 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from model import NeuralStyleTransfer
3
+ import tensorflow as tf
4
+ from keras import backend as K
5
+ import numpy as np
6
+
7
+
8
+ def change_dtype_inputs(
9
+ n_style_layers,
10
+ n_content_layers,
11
+ epochs,
12
+ learning_rate,
13
+ steps_per_epoch,
14
+ style_weight,
15
+ content_weight,
16
+ var_weight,
17
+ ):
18
+ return (
19
+ int(n_style_layers),
20
+ int(n_content_layers),
21
+ int(epochs),
22
+ float(learning_rate),
23
+ int(steps_per_epoch),
24
+ float(style_weight),
25
+ float(content_weight),
26
+ float(var_weight),
27
+ )
28
+
29
+
30
+ def fit_style_transfer(
31
+ style_image,
32
+ content_image,
33
+ extractor="inception_v3",
34
+ n_style_layers=2,
35
+ n_content_layers=3,
36
+ epochs=4,
37
+ learning_rate=60.0,
38
+ steps_per_epoch=100,
39
+ style_weight=0.3,
40
+ content_weight=0.5,
41
+ var_weight=1e-12,
42
+ ):
43
+ """
44
+ Fit the style transfer model to the content and style images.
45
+
46
+ Parameters
47
+ ----------
48
+
49
+ style_image: str
50
+ The path to the style image.
51
+
52
+ content_image: str
53
+ The path to the content image.
54
+
55
+ extractor: str
56
+ The name of the feature extractor to use. Options are
57
+ "inception_v3", "vgg19", "resnet50", and "mobilenet_v2".
58
+
59
+ n_style_layers: int
60
+ The number of layers to use for the style loss.
61
+
62
+ n_content_layers: int
63
+ The number of layers to use for the content loss.
64
+
65
+ epochs: int
66
+ The number of epochs to train the model for.
67
+
68
+ learning_rate: float
69
+ The learning rate to use for the Adam optimizer.
70
+
71
+ steps_per_epoch: int
72
+ The number of steps to take per epoch.
73
+
74
+ style_weight: float
75
+ The weight to use for the style loss.
76
+
77
+ content_weight: float
78
+ The weight to use for the content loss.
79
+
80
+ var_weight: float
81
+ The weight to use for the total variation loss.
82
+
83
+ Returns
84
+ -------
85
+ display_image: np.array
86
+ """
87
+
88
+ (
89
+ n_style_layers,
90
+ n_content_layers,
91
+ epochs,
92
+ learning_rate,
93
+ steps_per_epoch,
94
+ style_weight,
95
+ content_weight,
96
+ var_weight,
97
+ ) = change_dtype_inputs(
98
+ n_style_layers,
99
+ n_content_layers,
100
+ epochs,
101
+ learning_rate,
102
+ steps_per_epoch,
103
+ style_weight,
104
+ content_weight,
105
+ var_weight,
106
+ )
107
+
108
+ model = NeuralStyleTransfer(
109
+ style_image=style_image,
110
+ content_image=content_image,
111
+ extractor=extractor,
112
+ n_style_layers=n_style_layers,
113
+ n_content_layers=n_content_layers,
114
+ )
115
+
116
+ style_image = model.style_image
117
+ content_image = model.content_image
118
+
119
+ content_and_style_layers = model.get_output_layers()
120
+
121
+ # build the model with the layers we need to extract the features from
122
+ K.clear_session()
123
+ model.build(content_and_style_layers)
124
+
125
+ style_features = model.get_features(style_image, type="style")
126
+ content_features = model.get_features(content_image, type="content")
127
+
128
+ optimizer = tf.optimizers.Adam(
129
+ tf.keras.optimizers.schedules.ExponentialDecay(
130
+ initial_learning_rate=learning_rate, decay_steps=100, decay_rate=0.80
131
+ )
132
+ )
133
+
134
+ generated_image = tf.cast(content_image, tf.float32)
135
+ generated_image = tf.Variable(generated_image)
136
+
137
+ step = 0
138
+
139
+ for epoch in range(epochs):
140
+ for step in range(steps_per_epoch):
141
+ losses = model._update_image_with_style(
142
+ generated_image,
143
+ style_features,
144
+ content_features,
145
+ style_weight,
146
+ content_weight,
147
+ optimizer,
148
+ var_weight,
149
+ )
150
+
151
+ display_image = model.tensor_to_image(generated_image)
152
+
153
+ step += 1
154
+
155
+ style_loss, content_loss, var_loss = losses
156
+
157
+ yield np.array(display_image), style_loss, content_loss, var_loss, epoch, step
158
+
159
+
160
+
161
+ def main():
162
+ content_image = gr.Image(type="filepath", label="Content Image", shape=(512, 512))
163
+ style_image = gr.Image(type="filepath", label="Style Image", shape=(512, 512))
164
+
165
+ extractor = gr.Dropdown(
166
+ ["inception_v3", "vgg19", "resnet50", "mobilenet_v2"],
167
+ label="Feature Extractor",
168
+ value="inception_v3",
169
+
170
+ )
171
+
172
+ n_content_layers = gr.Slider(
173
+ 1,
174
+ 5,
175
+ value=3,
176
+ step=1,
177
+ label="Content Layers",
178
+ )
179
+
180
+ n_style_layers = gr.Slider(
181
+ 1,
182
+ 5,
183
+ value=2,
184
+ step=1,
185
+ label="Style Layers",
186
+ )
187
+
188
+ epochs = gr.Slider(2, 20, value=4, step=1, label="Epochs")
189
+
190
+ learning_rate = gr.Slider(1, 100, value=60, step=1, label="Learning Rate")
191
+
192
+ steps_per_epoch = gr.Slider(
193
+ 1,
194
+ 100,
195
+ value=80,
196
+ step=1,
197
+ label="Steps Per Epoch",
198
+ )
199
+
200
+ style_weight = gr.Slider(
201
+ 1e-4,
202
+ 0.5,
203
+ value=0.3,
204
+ step=1e-4,
205
+ label="Style Weight",
206
+ )
207
+
208
+ content_weight = gr.Slider(
209
+ 1e-3,
210
+ 0.5,
211
+ value=0.5,
212
+ step=1e-4,
213
+ label="Content Weight",
214
+ )
215
+
216
+ var_weight = gr.Slider(
217
+ 0,
218
+ 1e-5,
219
+ value=1e-7,
220
+ step=1e-12,
221
+ label="Total Variation Weight",
222
+ )
223
+
224
+ inputs = [
225
+ style_image,
226
+ content_image,
227
+ extractor,
228
+ n_style_layers,
229
+ n_content_layers,
230
+ epochs,
231
+ learning_rate,
232
+ steps_per_epoch,
233
+ style_weight,
234
+ content_weight,
235
+ var_weight,
236
+ ]
237
+
238
+ examples = [
239
+ [
240
+ "examples/style_1.jpg",
241
+ "examples/content_1.jpg",
242
+ "inception_v3",
243
+ 3,
244
+ 2,
245
+ 4,
246
+ 60,
247
+ 100,
248
+ 0.3,
249
+ 0.5,
250
+ 1e-8,
251
+ ],
252
+ [
253
+ "examples/style_2.jpg",
254
+ "examples/content_2.jpg",
255
+ "inception_v3",
256
+ 3,
257
+ 2,
258
+ 4,
259
+ 60,
260
+ 100,
261
+ 0.3,
262
+ 0.5,
263
+ 1e-5,
264
+ ],
265
+ [
266
+ "examples/style_3.jpg",
267
+ "examples/content_3.jpg",
268
+ "inception_v3",
269
+ 3,
270
+ 2,
271
+ 4,
272
+ 60,
273
+ 100,
274
+ 0.5,
275
+ 0.3,
276
+ 1e-10,
277
+ ]
278
+
279
+ ]
280
+
281
+ output_image = gr.Image(type="numpy", label="Output Image", shape=(512, 512))
282
+
283
+ style_loss = gr.Number(label="Current Style Loss")
284
+
285
+ content_loss = gr.Number(label="Current Content Loss")
286
+
287
+ var_loss = gr.Number(label="Current Total Variation Loss")
288
+
289
+ curr_epoch = gr.Number(label="Current Epoch")
290
+
291
+ curr_step = gr.Number(label="Current Step")
292
+
293
+
294
+
295
+ outputs = [output_image, style_loss, content_loss, var_loss, curr_epoch, curr_step]
296
+
297
+ interface = gr.Interface(
298
+ fn=fit_style_transfer,
299
+ inputs=inputs,
300
+ outputs=outputs,
301
+ examples=examples,
302
+
303
+ )
304
+
305
+ interface.queue().launch(sever_name="0.0.0.0", server_port=7860)
306
+
307
+ main()
examples/content_1.jpg ADDED

Git LFS Details

  • SHA256: 86b8d00d3303cb3525db04e57f7d23853f5607ebed85561e43ed3a3f61d95176
  • Pointer size: 132 Bytes
  • Size of remote file: 2.36 MB
examples/content_2.jpg ADDED

Git LFS Details

  • SHA256: a04d1bb8ae37a6d6b5d7058a067d4b471d9c9461ce20d5b48fa8390b5afbfa17
  • Pointer size: 132 Bytes
  • Size of remote file: 1.96 MB
examples/content_3.jpg ADDED

Git LFS Details

  • SHA256: 16dfe55991e70e355ac95927d1ad9fa461c58e555d013ad84deb717a89e26da5
  • Pointer size: 132 Bytes
  • Size of remote file: 3.1 MB
examples/style_1.jpg ADDED

Git LFS Details

  • SHA256: 0982c578317c170d444599632c1556f2492b51fa75a2caad9012755893159c52
  • Pointer size: 132 Bytes
  • Size of remote file: 2.59 MB
examples/style_2.jpg ADDED

Git LFS Details

  • SHA256: d25bb1f00ca850cab0710ac98414a0de63dd9e49f9abc96ee9415cbdf7e4540a
  • Pointer size: 133 Bytes
  • Size of remote file: 14.3 MB
examples/style_3.jpg ADDED

Git LFS Details

  • SHA256: 749549277a70212a842011a60228ae91d17026ecac8aecc3aab90799b6eed6a2
  • Pointer size: 132 Bytes
  • Size of remote file: 4.41 MB
model.py ADDED
@@ -0,0 +1,279 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import tensorflow as tf
2
+ import numpy as np
3
+ import matplotlib.pyplot as plt
4
+ from keras import backend as K
5
+
6
+
7
+ class NeuralStyleTransfer:
8
+ def __init__(self, style_image, content_image, extractor, n_style_layers=5, n_content_layers=5):
9
+ # load the model
10
+ if extractor == "inception_v3":
11
+ self.feature_extractor = tf.keras.applications.InceptionV3(
12
+ include_top=False, weights="imagenet"
13
+ )
14
+ elif extractor == "vgg19":
15
+ self.feature_extractor = tf.keras.applications.VGG19(
16
+ include_top=False, weights="imagenet"
17
+ )
18
+ elif extractor == "resnet50":
19
+ self.feature_extractor = tf.keras.applications.ResNet50(
20
+ include_top=False, weights="imagenet"
21
+ )
22
+ elif extractor == "mobilenet_v2":
23
+ self.feature_extractor = tf.keras.applications.MobileNetV2(
24
+ include_top=False, weights="imagenet"
25
+ )
26
+ elif isinstance(extractor, tf.keras.Model):
27
+ self.feature_extractor = extractor
28
+ else:
29
+ raise Exception("Features Extractor not found")
30
+
31
+ # freeze the model
32
+ self.feature_extractor.trainable = False
33
+
34
+ # define the style and content depth
35
+ self.n_style_layers = n_style_layers
36
+ self.n_content_layers = n_content_layers
37
+
38
+ self.style_image = self._load_img(style_image)
39
+ self.content_image = self._load_img(content_image)
40
+
41
+ def tensor_to_image(self, tensor):
42
+ """converts a tensor to an image"""
43
+ tensor_shape = tf.shape(tensor)
44
+ number_elem_shape = tf.shape(tensor_shape)
45
+ if number_elem_shape > 3:
46
+ assert tensor_shape[0] == 1
47
+ tensor = tensor[0]
48
+ return tf.keras.preprocessing.image.array_to_img(tensor)
49
+
50
+ def _load_img(self, image):
51
+ max_dim = 512
52
+
53
+ image = tf.io.read_file(image)
54
+ image = tf.image.decode_image(image)
55
+ image = tf.image.convert_image_dtype(image, tf.float32)
56
+
57
+ image = tf.image.convert_image_dtype(image, tf.float32)
58
+
59
+ shape = tf.shape(image)[:-1]
60
+ shape = tf.cast(tf.shape(image)[:-1], tf.float32)
61
+ long_dim = max(shape)
62
+ scale = max_dim / long_dim
63
+
64
+ new_shape = tf.cast(shape * scale, tf.int32)
65
+
66
+ image = tf.image.resize(image, new_shape)
67
+ image = image[tf.newaxis, :]
68
+ image = tf.image.convert_image_dtype(image, tf.uint8)
69
+
70
+ return image
71
+
72
+ def imshow(self, image, title=None):
73
+ """displays an image with a corresponding title"""
74
+ if len(image.shape) > 3:
75
+ image = tf.squeeze(image, axis=0)
76
+
77
+ plt.imshow(image)
78
+ if title:
79
+ plt.title(title)
80
+
81
+ def show_images_with_objects(self, images, titles=[]):
82
+ """displays a row of images with corresponding titles"""
83
+ if len(images) != len(titles):
84
+ return
85
+
86
+ plt.figure(figsize=(20, 12))
87
+ for idx, (image, title) in enumerate(zip(images, titles)):
88
+ plt.subplot(1, len(images), idx + 1)
89
+ plt.xticks([])
90
+ plt.yticks([])
91
+ self.imshow(image, title)
92
+
93
+ def _preprocess_image(self, image):
94
+ image = tf.cast(image, dtype=tf.float32)
95
+ image = (image / 127.5) - 1.0
96
+
97
+ return image
98
+
99
+ def get_output_layers(self):
100
+ # get all the layers which contain conv in their name
101
+ all_layers = [
102
+ layer.name
103
+ for layer in self.feature_extractor.layers
104
+ if "conv" in layer.name
105
+ ]
106
+
107
+ # define the style layers
108
+ style_layers = all_layers[: self.n_style_layers]
109
+
110
+ # define the content layers from second last layer
111
+ content_layers = all_layers[-2: -self.n_content_layers - 2 : -1]
112
+
113
+ content_and_style_layers = content_layers + style_layers
114
+
115
+ return content_and_style_layers
116
+
117
+ def build(self, layers_name):
118
+
119
+ output_layers = [
120
+ self.feature_extractor.get_layer(name).output for name in layers_name
121
+ ]
122
+
123
+ model = tf.keras.Model(self.feature_extractor.input, output_layers)
124
+
125
+ self.feature_extractor = model
126
+
127
+ return
128
+
129
+ def _loss(self, target_img, features_img, type):
130
+ """
131
+ Calculates the loss of the style transfer
132
+
133
+ target_img:
134
+ the target image (style or content) features
135
+
136
+ features_img:
137
+ the generated image features (style or content)
138
+
139
+ """
140
+
141
+ loss = tf.reduce_mean(tf.square(features_img - target_img))
142
+
143
+ if type == "content":
144
+ return 0.5 * loss
145
+
146
+ return loss
147
+
148
+ def _gram_matrix(self, input_tensor):
149
+ """
150
+ Calculates the gram matrix and divides by the number of locations
151
+
152
+ input_tensor:
153
+ the output of the conv layer of the style image, shape = (batch_size, height, width, channels)
154
+
155
+ """
156
+ result = tf.linalg.einsum("bijc,bijd->bcd", input_tensor, input_tensor)
157
+ input_shape = tf.shape(input_tensor)
158
+ num_locations = tf.cast(input_shape[1] * input_shape[2], tf.float32)
159
+ return result / (num_locations)
160
+
161
+ def get_features(self, image, type):
162
+ preprocess_image = self._preprocess_image(image)
163
+
164
+ outputs = self.feature_extractor(preprocess_image)
165
+
166
+ if type == "style":
167
+ outputs = outputs[self.n_content_layers : ]
168
+ features = [self._gram_matrix(style_output) for style_output in outputs]
169
+
170
+ elif type == "content":
171
+ features = outputs[ : self.n_content_layers]
172
+
173
+ return features
174
+
175
+ def _style_content_loss(
176
+ self,
177
+ style_targets,
178
+ style_outputs,
179
+ content_targets,
180
+ content_outputs,
181
+ style_weight,
182
+ content_weight,
183
+ ):
184
+ """
185
+ Calculates the total loss of the style transfer
186
+
187
+ style_targets:
188
+ the style features of the style image
189
+
190
+ style_outputs:
191
+ the style features of the generated image
192
+
193
+ content_targets:
194
+ the content features of the content image
195
+
196
+ content_outputs:
197
+ the content features of the generated image
198
+
199
+ style_weight:
200
+ the weight of the style loss
201
+
202
+ content_weight:
203
+ the weight of the content loss
204
+
205
+ """
206
+
207
+ # adding the loss of each layer
208
+ style_loss = style_weight * tf.add_n(
209
+ [
210
+ self._loss(style_target, style_output, type="style")
211
+ for style_target, style_output in zip(style_targets, style_outputs)
212
+ ]
213
+ )
214
+ content_loss = content_weight * tf.add_n(
215
+ [
216
+ self._loss(content_target, content_output, type="content")
217
+ for content_target, content_output in zip(
218
+ content_targets, content_outputs
219
+ )
220
+ ]
221
+ )
222
+ total_loss = style_loss + content_loss
223
+ return total_loss, style_loss, content_loss
224
+
225
+ def _grad_loss(
226
+ self,
227
+ generated_image,
228
+ style_target,
229
+ content_target,
230
+ style_weight,
231
+ content_weight,
232
+ var_weight,
233
+ ):
234
+ """
235
+ Calculates the gradients of the loss function with respect to the generated image
236
+
237
+ generated_image:
238
+ the generated image
239
+
240
+ """
241
+
242
+ with tf.GradientTape() as tape:
243
+ style_features = self.get_features(generated_image, type="style")
244
+ content_features = self.get_features(generated_image, type="content")
245
+ loss, style_loss, content_loss = self._style_content_loss(
246
+ style_target,
247
+ style_features,
248
+ content_target,
249
+ content_features,
250
+ style_weight,
251
+ content_weight,
252
+ )
253
+
254
+ variational_loss= var_weight*tf.image.total_variation(generated_image)
255
+
256
+ loss += variational_loss
257
+ grads = tape.gradient(loss, generated_image)
258
+ return grads, loss, [style_loss, content_loss, variational_loss]
259
+
260
+ def _update_image_with_style(
261
+ self,
262
+ generated_image,
263
+ style_target,
264
+ content_target,
265
+ style_weight,
266
+ content_weight,
267
+ optimizer,
268
+ var_weight,
269
+ ):
270
+ grads, loss, loss_list = self._grad_loss(
271
+ generated_image, style_target, content_target, style_weight, content_weight, var_weight
272
+ )
273
+
274
+ optimizer.apply_gradients([(grads, generated_image)])
275
+
276
+ generated_image.assign(
277
+ tf.clip_by_value(generated_image, clip_value_min=0.0, clip_value_max=255.0)
278
+ )
279
+ return loss_list
requirements.txt ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ tensorflow-cpu
2
+ gradio
3
+ keras
4
+ matplotlib
5
+ numpy