Reputation: 706
I am trying to make a simple project to find coefficients of an equation using a tensorflow.js model. however, when ran, the loss approaches infinity and becomes NaN withing 4 or so iterations. I don't know why this is happening. Here is my code:
let xs = [];
let ys = [];
let aReal = Math.random();
let bReal = Math.random();
let cReal = Math.random();
let dReal = Math.random();
for (let i = -100; i < 100; i+=1) {
xs.push(i);
ys.push((aReal*Math.pow(i, 3) + bReal*Math.pow(i, 2) + cReal*i + dReal) + Math.random()*10-1);
}
const a = tf.variable(tf.scalar(Math.random()));
const b = tf.variable(tf.scalar(Math.random()));
const c = tf.variable(tf.scalar(Math.random()));
const d = tf.variable(tf.scalar(Math.random()));
function predict(x) {
return tf.tidy(() => {
return a.mul(x.pow(tf.scalar(3, 'int32')))
.add(b.mul(x.square()))
.add(c.mul(x))
.add(d);
});
}
function loss(predictions, labels) {
const meanSquareError = predictions.sub(labels).square().mean();
print(meanSquareError.dataSync());
return meanSquareError;
}
function train(xS, yS, numIterations) {
const learningRate = 0.1;
const optimizer = tf.train.sgd(learningRate);
console.log(xS.dataSync(), yS.dataSync());
for (let iter = 0; iter < numIterations; iter++) {
optimizer.minimize(() => {
const predYs = predict(xS);
return loss(predYs, yS);
});
}
}
train(tf.tensor(xs), tf.tensor(ys), 100);
let yPred = predict(tf.tensor(xs)).dataSync();
console.log(yPred);
let trace1 = {
x: xs,
y: ys,
mode: 'markers',
type: 'scatter'
};
let trace2 = {
x: xs,
y: yPred,
mode: 'lines',
};
console.log(aReal, bReal, cReal, dReal);
console.log(a.dataSync(), b.dataSync(), c.dataSync(), d.dataSync());
let graphData = [trace1, trace2];
Plotly.newPlot('graph', graphData);
Plotly is just a js library I'm using to plot the data.
Upvotes: 5
Views: 1498
Reputation: 187
The loss depends on the values you start with, so if they are too big the loss may jump to the infinite and the prediction will return NaN. Try normalizing them so that they scale between 1 and -1. For instance when you train on MNIST, you divide all the values by 255, meaning that some white pixel [255, 255, 255] will become [1., 1., 1.].
Upvotes: 0
Reputation: 521
You should try to normalize your input data for the prediction to work correctly. Otherwise the optimization becomes numerically unstable.
ys = [...];
// compute mean and stdev for ys!
normalized = (ys-ysmean)/(ysstd);
train(xs, normalized);
normed_pred = predict(xs);
pred = ysstd*normed_pred+ysmean;
In the tests I ran, your code works perfect on linear models y=ax+b
; therefore my conclusion.
Upvotes: 1
Reputation: 1635
Try lowering your learning rate. Once it's stable you can tweak it back up to speed training. If it's too high you'll get instability and NaNs
const learningRate = 0.0001;
Upvotes: 2