Abhay Srivastav
Abhay Srivastav

Reputation: 774

LSTM performing well on training data but poorly on test data

I am trying to implement a LSTM model for sequence prediction. After training the model is performing well on training data but performs poorly on test data.

Below is the model prediction on training data

Model predict on training data

Below is the model prediction on test data

enter image description here

Total data count is 429. 350 i'm using for training and rest for test data. I have 1 LSTM layers with 30 units and one dense layer.

Can someone guide me, how can i improve the results. Is it the case of overfitting ?

Below is the code

/**
     * Get the stock data reduced to just the variables we are interested
     * and cleaned of missing data.
     */
    async function getData() {
      const stockDataReq = await fetch('http://localhost:8080/stockdata.json?v=0.2');  
      let stockData = await stockDataReq.json();  
      const cleaned = stockData.map((stock,i) => ({
        day: i,
        value: stock["Close Price"],
      }))
      .filter(n=>n.value);
    
      return generateSequence(cleaned,10);
      
    }
    
    function generateSequence(data, window_size)
    {
      /*returns [
        {
        set:[[834],[831],[823],[674],[787],[626],[727],[727],[736],[789]],
        value:832
        },
        ...
      ]
      */
      const closePrices = data.map((stock)=>stock["value"]);
      let r_avgs = [], avg_prev = 0;
      for (let i = 0; i < data.length - window_size; i++){
        let set = closePrices.slice(i, i + window_size);
        set = set.map(n=>[n]);
        r_avgs.push({ set: set, value:closePrices[i+window_size] });
      }
      return r_avgs;
    }
    
    async function run() {
      // Load and plot the original input data that we are going to train on.
      const data = await getData();
      const trainingData = data.slice(0,350);
      const validationData  = data.slice(350)
      const model = createModel();  
      // Convert the data to a form we can use for training.
    const trainingTensors = convertToTensor(trainingData);
    const {inputs, labels} = trainingTensors;
        
    // Train the model  
    await trainModel(model, inputs, labels);
    console.log('Done Training');
    const validationTensor = convertToTensor(validationData);
    testModel(model, validationTensor);
    //model.predict(inputs).print();
    }
    
    /**
     * Convert the input data to tensors that we can use for machine 
     * learning. We will also do the important best practices of _shuffling_
     * the data and _normalizing_ the data
     * MPG on the y-axis.
     */
    function convertToTensor(data) {
      // Wrapping these calculations in a tidy will dispose any 
      // intermediate tensors.
      
      return tf.tidy(() => {
        // Step 1. Shuffle the data    
        //tf.util.shuffle(data);
    
        // Step 2. Convert data to Tensor
        const inputs = data.map(d => d.set)
        const labels = data.map(d => d.value);
    
        const inputTensor = tf.tensor3d(inputs, [inputs.length, inputs[0].length,1]);
        const labelTensor = tf.tensor1d(labels);
    
        //Step 3. Normalize the data to the range 0 - 1 using min-max scaling
        const inputMax = inputTensor.max();
        const inputMin = inputTensor.min();  
        const labelMax = labelTensor.max();
        const labelMin = labelTensor.min();
    
        const normalizedInputs = inputTensor.div(inputMax.sub(inputMin));
        const normalizedLabels = labelTensor.div(labelMax.sub(labelMin));
        normalizedInputs.print();
        normalizedLabels.print()
    
        return {
          inputs: normalizedInputs,
          labels: normalizedLabels,
          // Return the min/max bounds so we can use them later.
          inputMax,
          inputMin,
          labelMax,
          labelMin,
        }
      });  
    }
    
    function createModel() {
      // Create a sequential model
      const model = tf.sequential(); 
      
      // Add a single input layer
      model.add(tf.layers.lstm({units: 30,inputShape:[10,1]}));
      model.add(tf.layers.dropout({rate:0.2}));
      model.add(tf.layers.dense({units:1}))
      model.summary()
    
      return model;
    }
    
    async function trainModel(model, inputs, labels) {
      // Prepare the model for training.  
      model.compile({
        optimizer: tf.train.adam(),
        loss: tf.losses.meanSquaredError,
      });
      
      const epochs = 10;
      const batchSize = 32;
      
      return await model.fit(inputs, labels, {
        epochs,
        batchSize,
        callbacks: tfvis.show.fitCallbacks(
          { name: 'Training Performance' },
          ['loss', 'mse'], 
          { height: 200, callbacks: ['onEpochEnd'] })
        });
    }

    function testModel(model, normalizationData) {
  const {inputMax, inputMin, labelMin, labelMax, inputs, labels} = normalizationData;  
  
  // Generate predictions for a uniform range of numbers between 0 and 1;
  // We un-normalize the data by doing the inverse of the min-max scaling 
  // that we did earlier.

  const preds = model.predict(inputs);  
  /*const [xs, preds] = tf.tidy(() => {
    
    const xs = tf.linspace(0, 429, 1);      
    xs.print()
    return;
    const preds = model.predict(xs.reshape([429, 10,1]));      
    
    const unNormXs = xs
      .mul(inputMax.sub(inputMin))
      .add(inputMin);
    
    const unNormPreds = preds
      .mul(labelMax.sub(labelMin))
      .add(labelMin);
    
    // Un-normalize the data
    return [unNormXs.dataSync(), unNormPreds.dataSync()];
  });

  */
  
  /*const originalPoints = inputData.map(d => ({
    x: d.day, y: d.value,
  }));
  */
  const unNormXs = labels
      .mul(labelMax.sub(labelMin))
      .add(labelMin);
    
    const unNormPreds = preds
      .mul(labelMax.sub(labelMin))
      .add(labelMin);

  let predictedPoints = unNormPreds.dataSync();
  predictedPoints = Array.from(predictedPoints);

  predictedPoints = predictedPoints.map((val,i)=>({x:i,y:val}))

  let originalPoints = unNormXs.dataSync();
  originalPoints = Array.from(originalPoints);
  originalPoints = originalPoints.map((val,i)=>({x:i,y:val}))
  
  tfvis.render.linechart(
    {name: 'Model Predictions vs Original Data'}, 
    {values: [predictedPoints,originalPoints], series: ['predicted','original']}, 
    {
      xLabel: 'Horsepower',
      yLabel: 'MPG',
      height: 300
    }
  );
}
    
    document.addEventListener('DOMContentLoaded', run);

Upvotes: 2

Views: 937

Answers (2)

Deepak
Deepak

Reputation: 126

Try to increase your sample size. With LSTM you need to tune a bit the units to find the right balance to avoid too much overfitting; some overfitting is always good. Lower LSTM units, see if things change for better or worse, increase/ decrease accordingly.

Upvotes: 0

Abhishek Verma
Abhishek Verma

Reputation: 1729

Yes, this surely is a case of overfitting. Because the number of samples in your data is too low (<1000), so, here is what you can do avoid overfitting.

  1. Play around with dropout.
  2. Reduce the number of units in LSTM.
  3. Increasing the number of samples will surely help.

Upvotes: 1

Related Questions