Now it's time to create the model. As we mentioned, we are going to use a deep learning architecture called CNN as a learning algorithm for this fish recognition task. Again, you are not required to understand any of the previous or the upcoming code in this chapter as we are only demonstrating how complex data science tasks can be solved by using only a few lines of code with the help of Keras and TensorFlow as a deep learning platform.
Also note that CNN and other deep learning architectures will be explained in greater detail in later chapters:
Figure 1.10: CNN architecture
So let's go ahead and create a function that will be responsible for creating the CNN architecture that will be used in our fish recognition task:
def create_cnn_model_arch():
pool_size = 2 # we will use 2x2 pooling throughout
conv_depth_1 = 32 # we will initially have 32 kernels per conv. layer...
conv_depth_2 = 64 # ...switching to 64 after the first pooling layer
kernel_size = 3 # we will use 3x3 kernels throughout
drop_prob = 0.5 # dropout in the FC layer with probability 0.5
hidden_size = 32 # the FC layer will have 512 neurons
num_classes = 8 # there are 8 fish types
# Conv [32] -> Conv [32] -> Pool
cnn_model = Sequential()
cnn_model.add(ZeroPadding2D((1, 1), input_shape=(3, 32, 32), dim_ordering='th'))
cnn_model.add(Convolution2D(conv_depth_1, kernel_size, kernel_size, activation='relu',
dim_ordering='th'))
cnn_model.add(ZeroPadding2D((1, 1), dim_ordering='th'))
cnn_model.add(Convolution2D(conv_depth_1, kernel_size, kernel_size, activation='relu',
dim_ordering='th'))
cnn_model.add(MaxPooling2D(pool_size=(pool_size, pool_size), strides=(2, 2),
dim_ordering='th'))
# Conv [64] -> Conv [64] -> Pool
cnn_model.add(ZeroPadding2D((1, 1), dim_ordering='th'))
cnn_model.add(Convolution2D(conv_depth_2, kernel_size, kernel_size, activation='relu',
dim_ordering='th'))
cnn_model.add(ZeroPadding2D((1, 1), dim_ordering='th'))
cnn_model.add(Convolution2D(conv_depth_2, kernel_size, kernel_size, activation='relu',
dim_ordering='th'))
cnn_model.add(MaxPooling2D(pool_size=(pool_size, pool_size), strides=(2, 2),
dim_ordering='th'))
# Now flatten to 1D, apply FC then ReLU (with dropout) and finally softmax(output layer)
cnn_model.add(Flatten())
cnn_model.add(Dense(hidden_size, activation='relu'))
cnn_model.add(Dropout(drop_prob))
cnn_model.add(Dense(hidden_size, activation='relu'))
cnn_model.add(Dropout(drop_prob))
cnn_model.add(Dense(num_classes, activation='softmax'))
# initiating the stochastic gradient descent optimiser
stochastic_gradient_descent = SGD(lr=1e-2, decay=1e-6, momentum=0.9, nesterov=True) cnn_model.compile(optimizer=stochastic_gradient_descent, # using the stochastic gradient descent optimiser
loss='categorical_crossentropy') # using the cross-entropy loss function
return cnn_model
Before starting to train the model, we need to use a model assessment and validation method to help us assess our model and see its generalization ability. For this, we are going to use a method called k-fold cross-validation. Again, you are not required to understand this method or how it works as we are going to explain this method later in much detail.
So let's start and and create a function that will help us assess and validate the model:
def create_model_with_kfold_cross_validation(nfolds=10):
batch_size = 16 # in each iteration, we consider 32 training examples at once
num_epochs = 30 # we iterate 200 times over the entire training set
random_state = 51 # control the randomness for reproducibility of the results on the same platform
# Loading and normalizing the training samples prior to feeding it to the created CNN model
training_samples, training_samples_target, training_samples_id =
load_normalize_training_samples()
yfull_train = dict()
# Providing Training/Testing indices to split data in the training samples
# which is splitting data into 10 consecutive folds with shuffling
kf = KFold(len(train_id), n_folds=nfolds, shuffle=True, random_state=random_state)
fold_number = 0 # Initial value for fold number
sum_score = 0 # overall score (will be incremented at each iteration)
trained_models = [] # storing the modeling of each iteration over the folds
# Getting the training/testing samples based on the generated training/testing indices by
Kfold
for train_index, test_index in kf:
cnn_model = create_cnn_model_arch()
training_samples_X = training_samples[train_index] # Getting the training input variables
training_samples_Y = training_samples_target[train_index] # Getting the training output/label variable
validation_samples_X = training_samples[test_index] # Getting the validation input variables
validation_samples_Y = training_samples_target[test_index] # Getting the validation output/label variable
fold_number += 1
print('Fold number {} from {}'.format(fold_number, nfolds))
callbacks = [
EarlyStopping(monitor='val_loss', patience=3, verbose=0),
]
# Fitting the CNN model giving the defined settings
cnn_model.fit(training_samples_X, training_samples_Y, batch_size=batch_size,
nb_epoch=num_epochs,
shuffle=True, verbose=2, validation_data=(validation_samples_X,
validation_samples_Y),
callbacks=callbacks)
# measuring the generalization ability of the trained model based on the validation set
predictions_of_validation_samples =
cnn_model.predict(validation_samples_X.astype('float32'),
batch_size=batch_size, verbose=2)
current_model_score = log_loss(Y_valid, predictions_of_validation_samples)
print('Current model score log_loss: ', current_model_score)
sum_score += current_model_score*len(test_index)
# Store valid predictions
for i in range(len(test_index)):
yfull_train[test_index[i]] = predictions_of_validation_samples[i]
# Store the trained model
trained_models.append(cnn_model)
# incrementing the sum_score value by the current model calculated score
overall_score = sum_score/len(training_samples)
print("Log_loss train independent avg: ", overall_score)
#Reporting the model loss at this stage
overall_settings_output_string = 'loss_' + str(overall_score) + '_folds_' + str(nfolds) +
'_ep_' + str(num_epochs)
return overall_settings_output_string, trained_models
Now, after building the model and using k-fold cross-validation method in order to assess and validate the model, we need to report the results of the trained model over the test set. In order to do this, we are also going to use k-fold cross-validation but this time over the test to see how good our trained model is.
So let's define the function that will take the trained CNN models as an input and then test them using the test set that we have:
def test_generality_crossValidation_over_test_set( overall_settings_output_string, cnn_models):
batch_size = 16 # in each iteration, we consider 32 training examples at once
fold_number = 0 # fold iterator
number_of_folds = len(cnn_models) # Creating number of folds based on the value used in the training step
yfull_test = [] # variable to hold overall predictions for the test set
#executing the actual cross validation test process over the test set
for j in range(number_of_folds):
model = cnn_models[j]
fold_number += 1
print('Fold number {} out of {}'.format(fold_number, number_of_folds))
#Loading and normalizing testing samples
testing_samples, testing_samples_id = load_normalize_testing_samples()
#Calling the current model over the current test fold
test_prediction = model.predict(testing_samples, batch_size=batch_size, verbose=2)
yfull_test.append(test_prediction)
test_result = merge_several_folds_mean(yfull_test, number_of_folds)
overall_settings_output_string = 'loss_' + overall_settings_output_string \ + '_folds_' +
str(number_of_folds)
format_results_for_types(test_result, testing_samples_id, overall_settings_output_string)