Packt+ | Advance your knowledge in tech

You're reading from Deep Learning for Natural Language Processing Solve your natural language processing problems with smart deep neural networks

Product type Paperback

Published in Jun 2019

Publisher

ISBN-13 9781838550295

Length 372 pages

Edition 1st Edition

Languages

Processing

Tools

Processing

Concepts

Deep Learning

Authors (4):

Karthiek Reddy Bokka

Monicah Wambugu

Tanuj Jain

Shubhangi Hora

View More author details

Table of Contents (11) Chapters

About the Book

1. Introduction to Natural Language Processing FREE CHAPTER

2. Applications of Natural Language Processing

3. Introduction to Neural Networks

4. Foundations of Convolutional Neural Network

5. Recurrent Neural Networks

6. Gated Recurrent Units (GRUs)

7. Long Short-Term Memory (LSTM)

8. State-of-the-Art Natural Language Processing

9. A Practical NLP Project Workflow in an Organization

1. Appendix

Chapter 8: State of the art in Natural Language Processing

Activity 11: Build a Text Summarization Model

Solution:

Import the necessary Python packages and classes.
import os
import re
import pdb
import string
import numpy as np
import pandas as pd
from keras.utils import to_categorical
import matplotlib.pyplot as plt
%matplotlib inline
Load the dataset and read the file.
path_data = "news_summary_small.csv"
df_text_file = pd.read_csv(path_data)
df_text_file.headlines = df_text_file.headlines.str.lower()
df_text_file.text = df_text_file.text.str.lower()
lengths_text = df_text_file.text.apply(len)
dataset = list(zip(df_text_file.text.values, df_text_file.headlines.values))
Make vocab dictionary.
input_texts = []
target_texts = []
input_chars = set()
target_chars = set()
for line in dataset:
input_text, target_text = list(line[0]), list(line[1])
target_text = ['BEGIN_'] + target_text + ['_END']
input_texts.append(input_text)
target_texts.append(target_text)

for character in input_text:
if character not in input_chars:
input_chars.add(character)
for character in target_text:
if character not in target_chars:
target_chars.add(character)
input_chars.add("<unk>")
input_chars.add("<pad>")
target_chars.add("<pad>")
input_chars = sorted(input_chars)
target_chars = sorted(target_chars)
human_vocab = dict(zip(input_chars, range(len(input_chars))))
machine_vocab = dict(zip(target_chars, range(len(target_chars))))
inv_machine_vocab = dict(enumerate(sorted(machine_vocab)))
def string_to_int(string_in, length, vocab):
"""
Converts all strings in the vocabulary into a list of integers representing the positions of the
input string's characters in the "vocab"
Arguments:
string -- input string
length -- the number of time steps you'd like, determines if the output will be padded or cut
vocab -- vocabulary, dictionary used to index every character of your "string"
Returns:
rep -- list of integers (or '<unk>') (size = length) representing the position of the string's character in the vocabulary
"""
Convert lowercase to standardize.
string_in = string_in.lower()
string_in = string_in.replace(',','')
if len(string_in) > length:
string_in = string_in[:length]
rep = list(map(lambda x: vocab.get(x, '<unk>'), string_in))
if len(string_in) < length:
rep += [vocab['<pad>']] * (length - len(string_in))

return rep
def preprocess_data(dataset, human_vocab, machine_vocab, Tx, Ty):
X, Y = zip(*dataset)
X = np.array([string_to_int(i, Tx, human_vocab) for i in X])
Y = [string_to_int(t, Ty, machine_vocab) for t in Y]
print("X shape from preprocess: {}".format(X.shape))
Xoh = np.array(list(map(lambda x: to_categorical(x, num_classes=len(human_vocab)), X)))
Yoh = np.array(list(map(lambda x: to_categorical(x, num_classes=len(machine_vocab)), Y)))
return X, np.array(Y), Xoh, Yoh
def softmax(x, axis=1):
"""Softmax activation function.
# Arguments
x : Tensor.
axis: Integer, axis along which the softmax normalization is applied.
# Returns
Tensor, output of softmax transformation.
# Raises
ValueError: In case 'dim(x) == 1'.
"""
ndim = K.ndim(x)
if ndim == 2:
return K.softmax(x)
elif ndim > 2:
e = K.exp(x - K.max(x, axis=axis, keepdims=True))
s = K.sum(e, axis=axis, keepdims=True)
return e / s
else:
raise ValueError('Cannot apply softmax to a tensor that is 1D')
Run the previous code snippet to load data, get vocab dictionaries and define some utility functions to be used later. Define length of input characters and output characters.
Tx = 460
Ty = 75
X, Y, Xoh, Yoh = preprocess_data(dataset, human_vocab, machine_vocab, Tx, Ty)
Define the model functions (Repeator, Concatenate, Densors, Dotor)
# Defined shared layers as global variables
repeator = RepeatVector(Tx)
concatenator = Concatenate(axis=-1)
densor1 = Dense(10, activation = "tanh")
densor2 = Dense(1, activation = "relu")
activator = Activation(softmax, name='attention_weights')
dotor = Dot(axes = 1)
Define one-step-attention function:
def one_step_attention(h, s_prev):
"""
Performs one step of attention: Outputs a context vector computed as a dot product of the attention weights
"alphas" and the hidden states "h" of the Bi-LSTM.

Arguments:
h -- hidden state output of the Bi-LSTM, numpy-array of shape (m, Tx, 2*n_h)
s_prev -- previous hidden state of the (post-attention) LSTM, numpy-array of shape (m, n_s)

Returns:
context -- context vector, input of the next (post-attetion) LSTM cell
"""
Use repeator to repeat s_prev to be of shape (m, Tx, n_s) so that you can concatenate it with all hidden states "a"
s_prev = repeator(s_prev)
Use concatenator to concatenate a and s_prev on the last axis (≈ 1 line)
concat = concatenator([h, s_prev])
Use densor1 to propagate concat through a small fully-connected neural network to compute the "intermediate energies" variable e.
e = densor1(concat)
Use densor2 to propagate e through a small fully-connected neural network to compute the "energies" variable energies.
energies = densor2(e)
Use "activator" on "energies" to compute the attention weights "alphas"
alphas = activator(energies)
Use dotor together with "alphas" and "a" to compute the context vector to be given to the next (post-attention) LSTM-cell
context = dotor([alphas, h])

return context
Define the number of hidden states for decoder and encoder.
n_h = 32
n_s = 64
post_activation_LSTM_cell = LSTM(n_s, return_state = True)
output_layer = Dense(len(machine_vocab), activation=softmax)
Define the model architecture and run it to obtain a model.
def model(Tx, Ty, n_h, n_s, human_vocab_size, machine_vocab_size):
"""
Arguments:
Tx -- length of the input sequence
Ty -- length of the output sequence
n_h -- hidden state size of the Bi-LSTM
n_s -- hidden state size of the post-attention LSTM
human_vocab_size -- size of the python dictionary "human_vocab"
machine_vocab_size -- size of the python dictionary "machine_vocab"
Returns:
model -- Keras model instance
"""
Define the inputs of your model with a shape (Tx,)
Define s0 and c0, initial hidden state for the decoder LSTM of shape (n_s,)
X = Input(shape=(Tx, human_vocab_size), name="input_first")
s0 = Input(shape=(n_s,), name='s0')
c0 = Input(shape=(n_s,), name='c0')
s = s0
c = c0
Initialize empty list of outputs
outputs = []
Define your pre-attention Bi-LSTM. Remember to use return_sequences=True.
a = Bidirectional(LSTM(n_h, return_sequences=True))(X)

# Iterate for Ty steps
for t in range(Ty):

# Perform one step of the attention mechanism to get back the context vector at step t
context = one_step_attention(h, s)
Apply the post-attention LSTM cell to the "context" vector.
# Pass: initial_state = [hidden state, cell state]
s, _, c = post_activation_LSTM_cell(context, initial_state = [s,c])
Apply Dense layer to the hidden state output of the post-attention LSTM
out = output_layer(s)
Append "out" to the "outputs" list
outputs.append(out)
Create model instance taking three inputs and returning the list of outputs.
model = Model(inputs=[X, s0, c0], outputs=outputs)

return model
model = model(Tx, Ty, n_h, n_s, len(human_vocab), len(machine_vocab))
#Define model loss functions and other hyperparameters. Also #initialize decoder state vectors.
opt = Adam(lr = 0.005, beta_1=0.9, beta_2=0.999, decay = 0.01)
model.compile(loss='categorical_crossentropy', optimizer=opt, metrics=['accuracy'])
s0 = np.zeros((10000, n_s))
c0 = np.zeros((10000, n_s))
outputs = list(Yoh.swapaxes(0,1))
Fit the model to our data:
model.fit([Xoh, s0, c0], outputs, epochs=1, batch_size=100)
#Run inference step for the new text.
EXAMPLES = ["Last night a meteorite was seen flying near the earth's moon."]
for example in EXAMPLES:

source = string_to_int(example, Tx, human_vocab)
source = np.array(list(map(lambda x: to_categorical(x, num_classes=len(human_vocab)), source)))
source = source[np.newaxis, :]
prediction = model.predict([source, s0, c0])
prediction = np.argmax(prediction, axis = -1)
output = [inv_machine_vocab[int(i)] for i in prediction]

print("source:", example)
print("output:", ''.join(output))
The output is as follows: