Evaluating the model
With the model trained, let’s test the model on our unseen test dataset. Testing logic is almost identical to the validation logic we discussed earlier during model training. Therefore we will not repeat our discussion here.
bleu_metric = BLEUMetric(tokenizer=tokenizer)
test_dataset, _ = generate_tf_dataset(
test_captions_df, tokenizer=tokenizer, n_vocab=n_vocab, batch_size=batch_size, training=False
)
test_loss, test_accuracy, test_bleu = [], [], []
for ti, t_batch in enumerate(test_dataset):
print(f"{ti+1} batches processed", end='\r')
loss, accuracy = full_model.test_on_batch(t_batch[0], t_batch[1])
batch_predicted = full_model.predict_on_batch(t_batch[0])
bleu_score = bleu_metric.calculate_bleu_from_predictions(t_batch[1], batch_predicted)
test_loss.append(loss)
test_accuracy.append(accuracy)
test_bleu.append(bleu_score)
print(
f"\ntest_loss: {np.mean(test_loss)} - test_accuracy: {np.mean...