Reputation: 324
I have a dataset (which contains both the data and the label) with the size of [299,13], and the model keeps outputting / predicting the same value. This is a binary classification task. How would I make my model predict values which are not constantly the same?
Here is the code (with some dummy data):
var Dataset = tf.tensor([[1,0.491821360184978,9,314,0.504585169147173,542,1231,3213,1,0.267304071302649,3,0.615917680092409,0],
[0,0.72959029133292,3,758,0.402582737085955,400,1788,4599,0,0.532702887951197,4,0.18630897965037,1],
[1,0.198764110760428,5,787,0.65507860022684,887,192,4831,1,0.739456077544426,3,0.100068056951143,1],
[0,0.583574833590476,5,596,0.933996451580092,631,331,811,0,0.258445986493932,7,0.811276729811182,0],
[1,0.701499878184206,8,854,0.0326334179806069,845,470,4930,1,0.825469683527519,1,0.448086959665654,1],
[0,0.954482878414911,2,468,0.736300149681564,557,3110,739,0,0.325783042694677,5,0.43488580142501,1],
[1,0.384845877769,2,662,0.265402742189238,649,384,1158,1,0.484884260891815,2,0.915444292219105,0],
[1,0.379266474923531,9,551,0.275982850450116,1022,3329,1413,1,0.237295089390298,4,0.817104709627837,1],
[1,0.691365367558705,8,549,0.479627221800976,796,3381,495,1,0.37129382411555,9,0.332832739155564,1],
[0,0.433042848178662,5,529,0.545178403950882,842,4768,506,0,0.386370525896832,9,0.189942077251933,0],
[1,0.611272282663452,4,823,0.737901576655264,839,2724,1787,1,0.365032317656007,6,0.884073622694046,0],
[0,0.0084315409129881,5,352,0.76858549557176,476,685,4796,0,0.302944943656102,1,0.849655932794213,1],
[0,0.977380232874908,6,701,0.588833228576897,999,2897,3325,0,0.418024491281536,2,0.631872118440871,1],
[1,0.419601058571829,10,384,0.0157052616592944,1009,4438,113,1,0.909015627566542,1,0.0297684897733232,0],
[0,0.739471449044276,4,836,0.0430176780439737,1030,1456,3932,0,0.331426481315121,6,0.734008754824423,0],
[1,0.00209807072438295,4,352,0.499622407429238,418,1912,4452,1,0.727130871883893,8,0.157427964683612,0],
[1,0.956533819923862,10,681,0.196708599930969,829,4562,1718,1,0.233193195569506,7,0.60582783922237,0],
[1,0.504637155233183,8,809,0.608861975627751,717,130,4194,1,0.134197560919101,6,0.375188428842507,0],
[0,0.747363884375055,1,522,0.868234577182028,849,3529,1192,0,0.0322641640468155,5,0.185973206518818,0],
[0,0.244142898027225,10,402,0.0280582030746698,315,3576,3882,0,0.724916254371562,8,0.062229775169706,1],
[0,0.858414851618448,8,459,0.367325906336267,616,930,3892,0,0.177388425930446,10,0.859824526007041,1],
[1,0.921555604905976,2,863,0.821166873626313,528,1624,1289,1,0.366243396916411,5,0.453840754701258,1],
[1,0.171321120311715,1,524,0.177251413832862,468,1608,3123,1,0.192861821442111,8,0.122983286410146,0],
[0,0.539946042901786,6,692,0.817780349862711,392,1053,4891,0,0.409578972921785,3,0.0453862502541893,1],
[1,0.996848843212564,5,549,0.877740438211017,762,3046,843,1,0.888578696082088,8,0.877971306478434,1],
[0,0.218116987741582,3,655,0.240496962520226,407,1001,1474,0,0.976212355833712,2,0.936396547703282,1]])
var x = Dataset.slice([0, 0], [-1, 12])
var y = Dataset.slice([0, 12], [-1, 1]) y = y.cast('int32').reshape([-1]).oneHot(2) y.print()
const model = tf.sequential({
layers: [
tf.layers.dense({ inputShape: [12], units: 12, activation: "relu6" }),
tf.layers.dense({ units: 56, activation: "tanh" }),
tf.layers.dense({ units: 28, activation: "tanh" }),
tf.layers.dense({ units: 14, activation: "sigmoid" }),
tf.layers.dense({ units: 58, activation: "tanh" }),
tf.layers.dense({ units: 2, activation: "softmax" })
] }) model.summary()
model.compile({
optimizer: tf.train.adam(),
loss: 'categoricalCrossentropy',
metrics: ['accuracy'], });
model.fit(x, y, { batchSize: 3, epochs: 10, shuffle: true }).then(h => {
console.log("Training Complete")
var predictions = model.predict(x)
predictions.print() });
Upvotes: 0
Views: 575
Reputation: 23
I had the same problem. Model is trained but it always predicts the same value. I don't know the exact principle, but I first trained model with fake data and then trained with normal data again, and this problem was solved. I think it was initialized by training with fake data at first.
I will add example code.
var fake_xs = tf.zeros([10, 7, 7, 256]);
var fake_ys = tf.zeros([10]);
newModel.current.fit(
fake_xs,
fake_ys, {
epochs: 5,
callbacks: {
onEpochEnd: async (epoch, logs) => {
setLoss(logs.loss.toFixed(5));
console.log("LOSS: " + logs.loss.toFixed(5));
},
},
});
const history = await newModel.fit(
datasetForTraining.xs,
datasetForTraining.ys,
{
epochs: epochNum,
batchSize: 16,
callbacks: {
onEpochEnd: async (epoch, logs) => {
setLoss(logs.loss.toFixed(5));
console.log("LOSS: " + logs.loss.toFixed(5));
},
},
}
Upvotes: 0
Reputation: 5079
299 samples with 13 features. That might not be enough for model to generalize. In your hidden layers you use tanh
, and sigmoid
. I suggest using relu
. Also you one-hot-encoding your labels to use softmax
, that's understandable but you might want to use sigmoid
.
If you use sigmoid
without one-hot-encoding, then you will have a chance to set some threshold depending on your business problem.
tf.layers.dense({ units: 1, activation: "sigmoid" })
Let's say you set 0.5 threshold for predictions, means if your prediction is bigger than 0.5 then it will belong to second class. But you can adjust it to, say 0.4, to see what happens. You can conclude it by interpreting AUC-ROC curve.
Another thing is about features, they are not scaled properly:
[1,0.00209807072438295,4,352,0.499622407429238,418,1912,4452,1,0.727130871883893,8,0.157427964683612,0]
If they are not scaled properly in a range, then model can give more importance to certain features than the others, or some unexpected behaviors can happen.
Upvotes: 1