Reputation: 164
I'm trying to build a simple deep lstm neural network. I'm not much experienced with machine learning in general. But I do know some basics. This is my sample model.
const model = tf.sequential();
const {layers} = tf;
model.add(layers.lstm({
units : 256,
inputShape : [1, 6],
activation : "relu",
returnSequences : true,
}));
model.add(layers.dropout({rate: 0.2}));
model.add(layers.batchNormalization());
model.add(layers.lstm({
units : 128,
activation : "relu",
returnSequences : true,
}));
model.add(layers.dropout({rate: 0.2}));
model.add(layers.batchNormalization());
model.add(layers.lstm({
units : 128,
activation : "relu",
}));
model.add(layers.dropout({rate: 0.2}));
model.add(layers.batchNormalization());
model.add(layers.dense({
units : 32,
activation : "relu",
}));
model.add(layers.dropout({rate: 0.2}));
model.add(layers.dense({
units : 2,
activation : "softmax",
}));
model.compile({
//loss : "sparseCategoricalCrossentropy",
loss : "categoricalCrossentropy",
optimizer : tf.train.adam(0.001),
metrics : ['acc']
});
And when I trying to train the model I got this error.
const result = await model.fit(tf.ones([1, 1, 6]), tf.ones([1, 2]));
Error: Argument tensors passed to stack must be a `Tensor[]` or `TensorLike[]`
I found a thread about bug on github. But I don't think this is tfjs
bug. Because that thread was more than a year ago. If it was a bug I'm pretty sure google fixed already. I think I did something wrong here. Also I tried exact same model in python and it's working fine... But I don't want to use python. I quit python many years ago since nodejs came out. I love javascript more than python and which is much easier to me maintaining. Can you help me understand what is wrong here?
Upvotes: 2
Views: 1090
Reputation: 901
I think this is a bug in tf.js namely RNN layers don't accept sequences with just one element (throws an error when fitting). From my understanding, when using stateful RNN, it might make sense to pass just one element at a time (correct me if I'm wrong).
As a temporal solution for testing, I simply repeat an input tensor before wiring it to the RNN layer. Theoretically, RNN should learn to ignore the repeated frame:
outputs = tf.layers.flatten().apply(outputs)
outputs = tf.layers.repeatVector({n: 2}).apply(outputs)
outputs = tf.layers.gru({units: 32}).apply(outputs)
Also, I made a custom stateful GRU layer that can accept just one element (only for testing). However, I did not find much difference with repeating:
class MyGruLayer extends tf.layers.Layer {
constructor(args) {
super(args)
this.cell = new tf.layers.gruCell(args)
this.states_ = null
this.keptStates = []
}
build(inputShape) {
this.cell.build(inputShape)
this.resetStates()
this.stateSpec = {shape: [null, this.cell.stateSize]}
this.built = true
}
computeOutputShape(inputShape) {
return [inputShape[0], this.cell.stateSize]
}
call(inputs, kwargs) {
return tf.tidy(() => {
const training = kwargs == null ? null : kwargs['training']
const cellCallKwargs = {training}
const input = inputs[0]
const initialState= this.states_
const [outputs, ...states] = this.cell.call([input].concat(initialState), cellCallKwargs)
this.resetStates(states, training)
return outputs
})
}
/* Method from https://github.com/tensorflow/tfjs/blob/tfjs-v3.12.0/tfjs-layers/src/layers/recurrent.ts#L562 */
resetStates(states, training = false) {
tf.tidy(() => {
if (this.states_ == null) {
if (Array.isArray(this.cell.stateSize)) {
this.states_ = this.cell.stateSize.map(dim => tf.zeros([batchSize, dim]));
} else {
this.states_ = [tf.zeros([batchSize, this.cell.stateSize])];
}
} else if (states == null) {
// Dispose old state tensors.
tf.dispose(this.states_);
// For stateful RNNs, fully dispose kept old states.
if (this.keptStates != null) {
tf.dispose(this.keptStates);
this.keptStates = [];
}
if (Array.isArray(this.cell.stateSize)) {
this.states_ = this.cell.stateSize.map(dim => tf.zeros([batchSize, dim]));
} else {
this.states_[0] = tf.zeros([batchSize, this.cell.stateSize]);
}
} else {
if (training === true) {
this.keptStates.push(this.states_.slice());
} else {
tf.dispose(this.states_);
}
for (let index = 0; index < this.states_.length; ++index) {
const value = states[index];
const dim = Array.isArray(this.cell.stateSize) ?
this.cell.stateSize[index] :
this.cell.stateSize;
const expectedShape = [batchSize, dim];
if (value.shape[0] != batchSize || value.shape[1] != dim) {
throw new Error(
`State ${index} is incompatible with layer ${this.name}: ` +
`expected shape=${expectedShape}, received shape=${
value.shape}`);
}
this.states_[index] = value;
}
}
this.states_ = this.states_.map(state => tf.keep(state.clone()));
})
}
static get className() {
return 'MyGruLayer';
}
}
tf.serialization.registerClass(MyGruLayer)
// Example: outputs = new MyGruLayer({units: 32}).apply(outputs)
Upvotes: 1