Reputation: 165
the following code throws an error:
Traceback (most recent call last):
File "training.py", line 19, in <module>
preds = model.predict(x_test, test_df)
File "D:\brand\models\lstm_detection_model\lstm_brand_detection.py", line 46, in predict
output = [' '.join(np.array(token_df[i])[np.array(ind[i])]) for i in range(len(ind))]
File "D:\brand\models\lstm_detection_model\lstm_brand_detection.py", line 46, in <listcomp>
output = [' '.join(np.array(token_df[i])[np.array(ind[i])]) for i in range(len(ind))]
IndexError: boolean index did not match indexed array along dimension 0; dimension is 16 but corresponding boolean dimension is 17
The predict function:
def predict(self, test_x, test_df=None):
token_df = test_df.apply(word_tokenize)
ind = self.model.predict(test_x, verbose=0).argmax(axis=-1)
ind = [[z for z in obs if z!=2] for obs in ind]
ind = [[False if elem == 0 else True for elem in obs] for obs in ind]
output = [' '.join(np.array(token_df[i])[np.array(ind[i])]) for i in range(len(ind))]
preds = pd.concat([test_df, pd.DataFrame(output, columns=['predictions'])], axis=1)
return preds
This seems to be due to the update in Numpy, anyone is aware of a way to rectify this? Thanks!
Edit: Posted the entire LSTM_model file in. This involves model training and then outputting predictions into a separate file: predictions.csv. This is where the error comes in, post training.
import numpy as np
import pandas as pd
from nltk import word_tokenize
from keras.layers import Dense
from keras.layers import LSTM
from keras.layers.core import Dropout
from keras.layers.wrappers import Bidirectional
from keras.layers.wrappers import TimeDistributed
from keras.models import Sequential
from crf_layer import ChainCRF
import warnings
warnings.filterwarnings("ignore")
class LstmBrandDetector:
def __init__(self):
self.model = None
def create_model(self, dropout=0.5, units=150):
self.model = Sequential()
self.model.add(Bidirectional(LSTM(units, return_sequences=True),
input_shape=(36, 50)))
self.model.add(Dropout(dropout))
self.model.add(Bidirectional(LSTM(units, return_sequences=True)))
self.model.add(Dropout(dropout))
self.model.add(TimeDistributed(Dense(3)))
self.model.add(Dropout(dropout))
crf = ChainCRF()
self.model.add(crf)
self.model.compile(loss=crf.loss, optimizer='Adam',
metrics=['categorical_accuracy'])
def fit(self, train_x, train_y, epochs=5, batch=28):
self.model.fit(train_x, train_y, epochs=epochs, batch_size=batch)
def save(self, filepath):
self.model.save(filepath)
def print_summary(self):
print(self.model.summary())
def predict(self, test_x, test_df=None):
token_df = test_df.apply(word_tokenize)
ind = self.model.predict(test_x, verbose=0).argmax(axis=-1)
ind = [[z for z in obs if z!=2] for obs in ind]
ind = [[False if elem == 0 else True for elem in obs] for obs in ind]
ind = ind[1:]
output = [' '.join(np.array(token_df[i])[np.array(ind[i])]) for i in range(len(ind))]
preds = pd.concat([test_df, pd.DataFrame(output, columns=['predictions'])], axis=1)
return preds
def evaluate(self, test_x, test_y):
y_pred = self.model.predict(test_x, verbose=0).argmax(axis=-1)
y_test = test_y.argmax(axis=-1)
acc = [np.array_equal(y_pred[i], y_test[i]) for i in
range(len(y_pred))].count(True) / len(y_pred)
return acc
Crf_Layer, which is imputed into the BiLSTM Model:
from __future__ import absolute_import
from keras import backend as K
from keras import initializers
from keras import regularizers
from keras import constraints
from keras.engine import Layer, InputSpec
def path_energy(y, x, U, b_start=None, b_end=None, mask=None):
'''
Calculates the energy of a tag path y for a given input x (with mask),
transition energies U and boundary energies b_start, b_end.
'''
x = add_boundary_energy(x, b_start, b_end, mask)
return path_energy0(y, x, U, mask)
def path_energy0(y, x, U, mask=None):
'''
Path energy without boundary potential handling.
'''
n_classes = K.shape(x)[2]
y_one_hot = K.one_hot(y, n_classes)
energy = K.sum(x * y_one_hot, 2)
energy = K.sum(energy, 1)
y_t = y[:, :-1]
y_tp1 = y[:, 1:]
U_flat = K.reshape(U, [-1])
flat_indices = y_t * n_classes + y_tp1
U_y_t_tp1 = K.gather(U_flat, flat_indices)
if mask is not None:
mask = K.cast(mask, K.floatx())
y_t_mask = mask[:, :-1]
y_tp1_mask = mask[:, 1:]
U_y_t_tp1 *= y_t_mask * y_tp1_mask
energy += K.sum(U_y_t_tp1, axis=1)
return energy
def sparse_chain_crf_loss(y, x, U, b_start=None, b_end=None, mask=None):
'''
Given the true sparsely encoded tag sequence y, input x (with mask),
transition energies U, boundary energies b_start and b_end, it computes
the loss function of a Linear Chain Conditional Random Field:
loss(y, x) = NNL(P(y|x)), where P(y|x) = exp(E(y, x)) / Z.
So, loss(y, x) = - E(y, x) + log(Z)
Here, E(y, x) is the tag path energy, and Z is the normalization constant.
The values log(Z) is also called free energy.
'''
x = add_boundary_energy(x, b_start, b_end, mask)
energy = path_energy0(y, x, U, mask)
energy -= free_energy0(x, U, mask)
return K.expand_dims(-energy, -1)
def chain_crf_loss(y, x, U, b_start=None, b_end=None, mask=None):
'''
Variant of sparse_chain_crf_loss but with one-hot encoded tags y.
'''
y_sparse = K.argmax(y, -1)
y_sparse = K.cast(y_sparse, 'int32')
return sparse_chain_crf_loss(y_sparse, x, U, b_start, b_end, mask)
def add_boundary_energy(x, b_start=None, b_end=None, mask=None):
'''
Given the observations x, it adds the start boundary energy b_start (resp.
end boundary energy b_end on the start (resp. end) elements and multiplies
the mask.
'''
if mask is None:
if b_start is not None:
x = K.concatenate([x[:, :1, :] + b_start, x[:, 1:, :]], axis=1)
if b_end is not None:
x = K.concatenate([x[:, :-1, :], x[:, -1:, :] + b_end], axis=1)
else:
mask = K.cast(mask, K.floatx())
mask = K.expand_dims(mask, 2)
x *= mask
if b_start is not None:
mask_r = K.concatenate([K.zeros_like(mask[:, :1]), mask[:, :-1]],
axis=1)
start_mask = K.cast(K.greater(mask, mask_r), K.floatx())
x = x + start_mask * b_start
if b_end is not None:
mask_l = K.concatenate([mask[:, 1:], K.zeros_like(mask[:, -1:])],
axis=1)
end_mask = K.cast(K.greater(mask, mask_l), K.floatx())
x = x + end_mask * b_end
return x
def viterbi_decode(x, U, b_start=None, b_end=None, mask=None):
'''
Computes the best tag sequence y for a given input x, i.e. the one that
maximizes the value of path_energy.
'''
x = add_boundary_energy(x, b_start, b_end, mask)
alpha_0 = x[:, 0, :]
gamma_0 = K.zeros_like(alpha_0)
initial_states = [gamma_0, alpha_0]
_, gamma = _forward(x,
lambda B: [K.cast(K.argmax(B, axis=1), K.floatx()),
K.max(B, axis=1)],
initial_states,
U,
mask)
y = _backward(gamma, mask)
return y
def free_energy(x, U, b_start=None, b_end=None, mask=None):
'''
Computes efficiently the sum of all path energies for input x, when
runs over all possible tag sequences.
'''
x = add_boundary_energy(x, b_start, b_end, mask)
return free_energy0(x, U, mask)
def free_energy0(x, U, mask=None):
'''
Free energy without boundary potential handling.
'''
initial_states = [x[:, 0, :]]
last_alpha, _ = _forward(x,
lambda B: [K.logsumexp(B, axis=1)],
initial_states,
U,
mask)
return last_alpha[:, 0]
def _forward(x, reduce_step, initial_states, U, mask=None):
'''
Forward recurrence of the linear chain crf.
'''
def _forward_step(energy_matrix_t, states):
alpha_tm1 = states[-1]
new_states = reduce_step(K.expand_dims(alpha_tm1, 2) + energy_matrix_t)
return new_states[0], new_states
U_shared = K.expand_dims(K.expand_dims(U, 0), 0)
if mask is not None:
mask = K.cast(mask, K.floatx())
mask_U = K.expand_dims(K.expand_dims(mask[:, :-1] * mask[:, 1:], 2), 3)
U_shared = U_shared * mask_U
inputs = K.expand_dims(x[:, 1:, :], 2) + U_shared
inputs = K.concatenate([inputs, K.zeros_like(inputs[:, -1:, :, :])],
axis=1)
last, values, _ = K.rnn(_forward_step, inputs, initial_states)
return last, values
def batch_gather(reference, indices):
ref_shape = K.shape(reference)
batch_size = ref_shape[0]
n_classes = ref_shape[1]
flat_indices = K.arange(0, batch_size) * n_classes + K.flatten(indices)
return K.gather(K.flatten(reference), flat_indices)
def _backward(gamma, mask):
'''
Backward recurrence of the linear chain crf.
'''
gamma = K.cast(gamma, 'int32')
def _backward_step(gamma_t, states):
y_tm1 = K.squeeze(states[0], 0)
y_t = batch_gather(gamma_t, y_tm1)
return y_t, [K.expand_dims(y_t, 0)]
initial_states = [K.expand_dims(K.zeros_like(gamma[:, 0, 0]), 0)]
_, y_rev, _ = K.rnn(_backward_step,
gamma,
initial_states,
go_backwards=True)
y = K.reverse(y_rev, 1)
if mask is not None:
mask = K.cast(mask, dtype='int32')
y *= mask
y += -(1 - mask)
return y
class ChainCRF(Layer):
'''
A Linear Chain Conditional Random Field output layer.
It carries the loss function and its weights for computing
the global tag sequence scores. While training it acts as
the identity function that passes the inputs to the subsequently
used loss function. While testing it applies Viterbi decoding
and returns the best scoring tag sequence as one-hot encoded vectors.
# Arguments
init: weight initialization function for chain energies U.
Can be the name of an existing function (str),
or a Theano function (see: [initializers](../initializers.md)).
U_regularizer: instance of [WeightRegularizer](../regularizers.md)
(eg. L1 or L2 regularization), applied to the transition
weight matrix.
b_start_regularizer: instance of [WeightRegularizer]
(../regularizers.md), applied to the start bias b.
b_end_regularizer: instance of [WeightRegularizer](../regularizers.md)
module, applied to the end bias b.
b_start_constraint: instance of the [constraints](../constraints.md)
module, applied to the start bias b.
b_end_constraint: instance of the [constraints](../constraints.md)
module, applied to the end bias b.
weights: list of Numpy arrays for initializing [U, b_start, b_end].
Thus it should be a list of 3 elements of shape
[(n_classes, n_classes), (n_classes, ), (n_classes, )]
'''
def __init__(self, init='glorot_uniform',
U_regularizer=None,
b_start_regularizer=None,
b_end_regularizer=None,
U_constraint=None,
b_start_constraint=None,
b_end_constraint=None,
weights=None,
**kwargs):
super(ChainCRF, self).__init__(**kwargs)
self.init = initializers.get(init)
self.U_regularizer = regularizers.get(U_regularizer)
self.b_start_regularizer = regularizers.get(b_start_regularizer)
self.b_end_regularizer = regularizers.get(b_end_regularizer)
self.U_constraint = constraints.get(U_constraint)
self.b_start_constraint = constraints.get(b_start_constraint)
self.b_end_constraint = constraints.get(b_end_constraint)
self.initial_weights = weights
self.supports_masking = True
self.uses_learning_phase = True
self.input_spec = [InputSpec(ndim=3)]
def compute_output_shape(self, input_shape):
assert input_shape and len(input_shape) == 3
return (input_shape[0], input_shape[1], input_shape[2])
def compute_mask(self, input, mask=None):
if mask is not None:
return K.any(mask, axis=1)
return mask
def _fetch_mask(self):
mask = None
if self.inbound_nodes:
mask = self.inbound_nodes[0].input_masks[0]
return mask
def build(self, input_shape):
assert len(input_shape) == 3
n_classes = input_shape[2]
n_steps = input_shape[1]
assert n_steps is None or n_steps >= 2
self.input_spec = [InputSpec(dtype=K.floatx(),
shape=(None, n_steps, n_classes))]
self.U = self.add_weight(shape=(n_classes, n_classes),
initializer=self.init,
name='U',
regularizer=self.U_regularizer,
constraint=self.U_constraint)
self.b_start = self.add_weight(shape=(n_classes,),
initializer='zero',
name='b_start',
regularizer=self.b_start_regularizer,
constraint=self.b_start_constraint)
self.b_end = self.add_weight(shape=(n_classes,),
initializer='zero',
name='b_end',
regularizer=self.b_end_regularizer,
constraint=self.b_end_constraint)
if self.initial_weights is not None:
self.set_weights(self.initial_weights)
del self.initial_weights
self.built = True
def call(self, x, mask=None):
y_pred = viterbi_decode(x, self.U, self.b_start, self.b_end, mask)
nb_classes = self.input_spec[0].shape[2]
y_pred_one_hot = K.one_hot(y_pred, nb_classes)
return K.in_train_phase(x, y_pred_one_hot)
def loss(self, y_true, y_pred):
'''
Linear Chain Conditional Random Field loss function.
'''
mask = self._fetch_mask()
return chain_crf_loss(y_true, y_pred, self.U, self.b_start, self.b_end,
mask)
def sparse_loss(self, y_true, y_pred):
'''
Linear Chain Conditional Random Field loss function with sparse
tag sequences.
'''
y_true = K.cast(y_true, 'int32')
y_true = K.squeeze(y_true, 2)
mask = self._fetch_mask()
return sparse_chain_crf_loss(y_true, y_pred, self.U, self.b_start,
self.b_end, mask)
def get_config(self):
config = {
'init': initializers.serialize(self.init),
'U_regularizer': regularizers.serialize(self.U_regularizer),
'b_start_regularizer': regularizers.serialize(
self.b_start_regularizer),
'b_end_regularizer': regularizers.serialize(
self.b_end_regularizer),
'U_constraint': constraints.serialize(self.U_constraint),
'b_start_constraint': constraints.serialize(
self.b_start_constraint),
'b_end_constraint': constraints.serialize(self.b_end_constraint)
}
base_config = super(ChainCRF, self).get_config()
return dict(list(base_config.items()) + list(config.items()))
def create_custom_objects():
'''
Returns the custom objects, needed for loading a persisted model.
'''
instanceHolder = {'instance': None}
class ClassWrapper(ChainCRF):
def __init__(self, *args, **kwargs):
instanceHolder['instance'] = self
super(ClassWrapper, self).__init__(*args, **kwargs)
def loss(*args):
method = getattr(instanceHolder['instance'], 'loss')
return method(*args)
def sparse_loss(*args):
method = getattr(instanceHolder['instance'], 'sparse_loss')
return method(*args)
return {'ChainCRF': ClassWrapper, 'loss': loss, 'sparse_loss': sparse_loss}
Upvotes: 1
Views: 2985
Reputation: 165
I discovered the way to get around this:
In this line:
output = [' '.join(np.array(token_df[i])[np.array(ind[i])]) for i in range(len(ind))]
By replacing the second np.array with np.where, the mismatch in dimension is no longer present.
Upvotes: 0
Reputation: 231385
Yes, in the past boolean index arrays could be longer than the object they are indexing; now they must match. That's logical, right. The former behavior let buggy code run.
This line creates a list of lists; even if ind
was 2d array, the new lists could differ in length:
ind = [[z for z in obs if z!=2] for obs in ind]
This just turns the elements in those sublists to boolean
ind = [[False if elem == 0 else True for elem in obs] for obs in ind]
This applies that boolean index to arrays from token_df
. And for at least one i
, there's a mismatch between the length of np.array(token_df[i])
and ind[i]
. Given the way the way ind
was constructed, I'm not surprised.
output = [' '.join(np.array(token_df[i])[np.array(ind[i])]) for i in range(len(ind))]
It's hard to imagine a case where constructing boolean indexes like this would result in correct results, even if the lengths were right. Older numpy was just letting you get by with buggy code, when it should have been raising an error.
Upvotes: 1