taneemishere
commited on
Commit
β’
0d7f4b7
1
Parent(s):
31c555d
adding model code
Browse files- classes/model/pix2code2.py +68 -0
classes/model/pix2code2.py
ADDED
@@ -0,0 +1,68 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
__author__ = 'Ferdiand John Briones, attempt at pix2code2 through pretrained autoencoders'
|
2 |
+
|
3 |
+
from keras.layers import Input, Dense, Dropout, RepeatVector, LSTM, concatenate, Flatten
|
4 |
+
from keras.models import Sequential, Model
|
5 |
+
# from keras.optimizers import RMSprop
|
6 |
+
from tensorflow.keras.optimizers import RMSprop
|
7 |
+
from keras import *
|
8 |
+
from .Config import *
|
9 |
+
from .AModel import *
|
10 |
+
from .autoencoder_image import *
|
11 |
+
|
12 |
+
class pix2code2(AModel):
|
13 |
+
def __init__(self, input_shape, output_size, output_path):
|
14 |
+
AModel.__init__(self, input_shape, output_size, output_path)
|
15 |
+
self.name = "pix2code2"
|
16 |
+
|
17 |
+
visual_input = Input(shape=input_shape)
|
18 |
+
|
19 |
+
#Load the pre-trained autoencoder model
|
20 |
+
autoencoder_model = autoencoder_image(input_shape, input_shape, output_path)
|
21 |
+
autoencoder_model.load('autoencoder')
|
22 |
+
autoencoder_model.model.load_weights('../bin/autoencoder.h5')
|
23 |
+
|
24 |
+
#Get only the model up to the encoded part
|
25 |
+
hidden_layer_model_freeze = Model(inputs=autoencoder_model.model.input, outputs=autoencoder_model.model.get_layer('encoded_layer').output)
|
26 |
+
hidden_layer_input = hidden_layer_model_freeze(visual_input)
|
27 |
+
|
28 |
+
#Additional layers before concatenation
|
29 |
+
hidden_layer_model = Flatten()(hidden_layer_input)
|
30 |
+
hidden_layer_model = Dense(1024, activation='relu')(hidden_layer_model)
|
31 |
+
hidden_layer_model = Dropout(0.3)(hidden_layer_model)
|
32 |
+
hidden_layer_model = Dense(1024, activation='relu')(hidden_layer_model)
|
33 |
+
hidden_layer_model = Dropout(0.3)(hidden_layer_model)
|
34 |
+
hidden_layer_result = RepeatVector(CONTEXT_LENGTH)(hidden_layer_model)
|
35 |
+
|
36 |
+
#Make sure the loaded hidden_layer_model_freeze will no longer be updated
|
37 |
+
for layer in hidden_layer_model_freeze.layers:
|
38 |
+
layer.trainable = False
|
39 |
+
|
40 |
+
#The same language model that of pix2code by Tony Beltramelli
|
41 |
+
language_model = Sequential()
|
42 |
+
language_model.add(LSTM(128, return_sequences=True, input_shape=(CONTEXT_LENGTH, output_size)))
|
43 |
+
language_model.add(LSTM(128, return_sequences=True))
|
44 |
+
|
45 |
+
textual_input = Input(shape=(CONTEXT_LENGTH, output_size))
|
46 |
+
encoded_text = language_model(textual_input)
|
47 |
+
|
48 |
+
decoder = concatenate([hidden_layer_result, encoded_text])
|
49 |
+
|
50 |
+
decoder = LSTM(512, return_sequences=True)(decoder)
|
51 |
+
decoder = LSTM(512, return_sequences=False)(decoder)
|
52 |
+
decoder = Dense(output_size, activation='softmax')(decoder)
|
53 |
+
|
54 |
+
self.model = Model(inputs=[visual_input, textual_input], outputs=decoder)
|
55 |
+
|
56 |
+
optimizer = RMSprop(lr=0.0001, clipvalue=1.0)
|
57 |
+
self.model.compile(loss='categorical_crossentropy', optimizer=optimizer)
|
58 |
+
|
59 |
+
def fit_generator(self, generator, steps_per_epoch):
|
60 |
+
self.model.summary()
|
61 |
+
self.model.fit_generator(generator, steps_per_epoch=steps_per_epoch, epochs=EPOCHS, verbose=1)
|
62 |
+
self.save()
|
63 |
+
|
64 |
+
def predict(self, image, partial_caption):
|
65 |
+
return self.model.predict([image, partial_caption], verbose=0)[0]
|
66 |
+
|
67 |
+
def predict_batch(self, images, partial_captions):
|
68 |
+
return self.model.predict([images, partial_captions], verbose=1)
|