TensorFlow.js and complex datasets?

Question

I am taking my first steps into developing an app that utilizes TensorFlow (the TensorFlow.js library in particular).

I have walked through the examples and have it working if I only have two axis (progression, perceivedSkinAppearance) of data.

// Visualize Data ========================================================== //

function CreateModel() {
    // Create a sequential model
    const model = tf.sequential();

    // Add a single hidden layer
    model.add(tf.layers.dense({ inputShape: [1], units: 1, useBias: true }));

    // Add an output layer
    model.add(tf.layers.dense({ units: 1, useBias: true }));

    return model;
}





function ConvertToTensor(data) {
    return tf.tidy(() => {
        // Shuffle the data    
        tf.util.shuffle(data);

        // Convert data to Tensor
        const inputs = data.map(d => parseInt(d.progression));
        const labels = data.map(d => parseInt(d.perceivedSkinAppearance));

        const inputTensor = tf.tensor2d(inputs, [inputs.length, 1]);
        const labelTensor = tf.tensor2d(labels, [labels.length, 1]);

        console.log(inputTensor);
        console.log(labelTensor);

        // Normalize the data to the range 0 - 1 using min-max scaling
        const inputMax = inputTensor.max();
        const inputMin = inputTensor.min();
        const labelMax = labelTensor.max();
        const labelMin = labelTensor.min();

        const normalizedInputs = inputTensor.sub(inputMin).div(inputMax.sub(inputMin));
        const normalizedLabels = labelTensor.sub(labelMin).div(labelMax.sub(labelMin));

        return {
            inputs: normalizedInputs,
            labels: normalizedLabels,
            inputMax,
            inputMin,
            labelMax,
            labelMin,
        }
    });
}




async function TrainModel(model, inputs, labels) {
    model.compile({
        optimizer: tf.train.adam(),
        loss: tf.losses.meanSquaredError,
        metrics: ['mse'],
    });

    const batchSize = 32;
    const epochs = 50;

    return await model.fit(inputs, labels, {
        batchSize,
        epochs,
        shuffle: true,
        callbacks: tfvis.show.fitCallbacks(
            { name: 'Training Performance' },
            ['loss', 'mse'],
            { height: 200, callbacks: ['onEpochEnd'] }
        )
    });
}





function TestModel(model, inputData, normalizationData) {
    const { inputMax, inputMin, labelMin, labelMax } = normalizationData;

    // Generate predictions for a uniform range of numbers between 0 and 1;
    // We un-normalize the data by doing the inverse of the min-max scaling 
    // that we did earlier.
    const [xs, preds] = tf.tidy(() => {

        const xs = tf.linspace(0, 1, 100);
        const preds = model.predict(xs.reshape([100, 1]));

        const unNormXs = xs
            .mul(inputMax.sub(inputMin))
            .add(inputMin);

        const unNormPreds = preds
            .mul(labelMax.sub(labelMin))
            .add(labelMin);

        // Un-normalize the data
        return [unNormXs.dataSync(), unNormPreds.dataSync()];
    });


    const predictedPoints = Array.from(xs).map((val, i) => {
        return { x: val, y: preds[i] }
    });

    const originalPoints = inputData.map(d => ({
        x: parseInt(d.progression), 
        y: parseInt(d.perceivedSkinAppearance)
    }));

    tfvis.render.scatterplot(
        { name: 'Model Predictions vs Original Data' },
        { values: [originalPoints, predictedPoints], series: ['original', 'predicted'] },
        {
            xLabel: 'Progression',
            yLabel: 'Perceived Skin Appearance',
            height: 300
        }
    );
}





async function VisualizeData() {
    // Load and plot the original input data that we are going to train on.
    const data = await appData.read("conditions", "created");
    const values = data.map(d => ({
        x: (d.progression / 86400000),
        y: d.perceivedSkinAppearance
    }));

    tfvis.render.scatterplot(
        { name: 'Skin Appearance vs Progression' },
        { values },
        {
            xLabel: 'Progression',
            yLabel: 'Appearance',
            height: 300
        }
    );

    const model = CreateModel();
    tfvis.show.modelSummary({ name: 'Model Summary' }, model);

    const tensorData = ConvertToTensor(data);
    const { inputs, labels } = tensorData;

    await TrainModel(model, inputs, labels);
    console.log('Done Training');

    TestModel(model, data, tensorData);
}

I am having trouble wrapping my head around -- and the demos do not cover -- how to tackle this if I have a much more complex dataset, such as:

  {
  "a25bfa27-4447-3a54-d2c5-29685b0dbed3" : {
    "affectedAreas" : [ "361106d9-5bc1-42ab-a52d-8b23eb2ed923", "79916df1-99d8-4ec6-8bc0-531c9c9725c8", "23a220e8-cfff-4dd0-87c3-066f11d99506", "3df1c2a4-a7d5-4a8f-8753-eef9d3c44e76" ],
    "created" : "2019-07-29 18:58:37",
    "gender" : "Z2VuZGVyfHx8ZmVtYWxl",
    "humidityObserved" : 18,
    "locationLatitude" : "bG9jYXRpb25MYXRpdHVkZXx8fDMzLjI2MTgzMDM=",
    "locationLongitude" : "bG9jYXRpb25Mb25naXR1ZGV8fHwtMTExLjgwNTI0OTk=",
    "notes" : "",
    "observed" : "2019-07-29 18:58:00",
    "observer" : "b2JzZXJ2ZXJ8fHw0WDlqT1Nlem10U0ltVkdRRWk4MEZKZHRoMEsz",
    "perceivedSkinAppearance" : "3",
    "perceivedSkinSensation" : "3",
    "perceivedSkinTexture" : "3",
    "pollenCountObserved" : 0,
    "progression" : 186544718618,
    "subject" : "c3ViamVjdHx8fDg0NGRmNmQyLTBjOTUtNDE3ZS1hYWUxLTc5ZjUxNjM1OWMyMw==",
    "temperatureMaximum" : 109.4,
    "temperatureMinimum" : 102.99,
    "temperatureObserved" : 106.21,
    "triggersEncountered" : [ "1cfb8826-58ad-4168-905c-6f6150d3618e", "928915de-aadc-45e4-b386-4df7fcbf9787" ],
    "uvIndexObserved" : 11.31
  },
  "d6604849-a6ed-0fef-4541-ba6b65e8ffa2" : {
    "affectedAreas" : [ "361106d9-5bc1-42ab-a52d-8b23eb2ed923", "b0b72048-393f-4980-b649-c764aed50c1d", "3df1c2a4-a7d5-4a8f-8753-eef9d3c44e76" ],
    "created" : "2019-07-17 15:43:46",
    "gender" : "Z2VuZGVyfHx8ZmVtYWxl",
    "humidityObserved" : 26,
    "locationLatitude" : "bG9jYXRpb25MYXRpdHVkZXx8fDMzLjI2MDYyMTg2Mjg5NDQ3",
    "locationLongitude" : "bG9jYXRpb25Mb25naXR1ZGV8fHwtMTExLjgwNTE4MDEyMTY3NzIx",
    "notes" : "",
    "observed" : "2019-07-17 15:43:00",
    "observer" : "b2JzZXJ2ZXJ8fHxGZkducU1tUVlGVE9QQUZ3Wjc3THpwMEFCNHMx",
    "perceivedSkinAppearance" : "3",
    "perceivedSkinSensation" : "3",
    "perceivedSkinTexture" : "3",
    "pollenCountObserved" : 0,
    "progression" : 185496227507,
    "subject" : "c3ViamVjdHx8fDg0NGRmNmQyLTBjOTUtNDE3ZS1hYWUxLTc5ZjUxNjM1OWMyMw==",
    "temperatureMaximum" : 106,
    "temperatureMinimum" : 100,
    "temperatureObserved" : 103.15,
    "triggersEncountered" : [ "f756a7af-6a3d-4e48-998d-d706eac68e09" ],
    "uvIndexObserved" : 11.57
  },
  "fe5e995d-8b89-c6a7-23b5-3fb27112a92b" : {
    "created" : "2019-06-30 16:13:26",
    "gender" : "Z2VuZGVyfHx8ZmVtYWxl",
    "humidityObserved" : 12,
    "locationLatitude" : "bG9jYXRpb25MYXRpdHVkZXx8fDMzLjI2MDY0Njc1MDIzMjAz",
    "locationLongitude" : "bG9jYXRpb25Mb25naXR1ZGV8fHwtMTExLjgwNTEyNTkxNDk3NTA0",
    "notes" : "",
    "observed" : "2019-06-30 16:13:00",
    "observer" : "b2JzZXJ2ZXJ8fHxGZkducU1tUVlGVE9QQUZ3Wjc3THpwMEFCNHMx",
    "perceivedSkinAppearance" : "1",
    "perceivedSkinSensation" : "3",
    "perceivedSkinTexture" : "3",
    "pollenCountObserved" : 0,
    "progression" : 184029207516,
    "subject" : "c3ViamVjdHx8fDg0NGRmNmQyLTBjOTUtNDE3ZS1hYWUxLTc5ZjUxNjM1OWMyMw==",
    "temperatureMaximum" : 105.01,
    "temperatureMinimum" : 95,
    "temperatureObserved" : 99.95,
    "triggersEncountered" : [ "f756a7af-6a3d-4e48-998d-d706eac68e09" ],
    "uvIndexObserved" : 11.28
  }
}

Note: the obvious hashed values would be unhashed before actually using them, so don't panic about them being weird data types.

UPDATE I updated my code to reflect the suggested changes for mapping, and now getting errors on the CreateModel, TestModel, TrainModel methods as those models are apparently now not expecting my new data inputShape?

This is my updated code:

var mappingIndex = 0;
var mappingDictionary = []; 

function MapToDictionary(stringToFind, uniquePrepend) {
    var output = 0;
    if (stringToFind) 
    {
        if (uniquePrepend)
        {
            stringToFind = uniquePrepend + stringToFind;
        }

        var queryResult = mappingDictionary.filter(obj => Object.values(obj).some(val => val?val.toString().toLowerCase().includes(stringToFind):false))[0];
        if (queryResult) {
            output = queryResult["Key"];
        }
        else {
            mappingIndex = mappingIndex + 1;
            var mappingDictionaryEntry = {};
            mappingDictionaryEntry.Key = mappingIndex;
            mappingDictionaryEntry.Value = stringToFind;
            mappingDictionary.push(mappingDictionaryEntry);
            output = mappingIndex;
        }
        console.log(stringToFind + ": " + output);
        return output;
    }   
}


// Visualize Data ========================================================== //

function CreateModel() {
    // Create a sequential model
    const model = tf.sequential();

    // Add a single hidden layer
    model.add(tf.layers.dense({ inputShape: [3,16], units: 1, useBias: true }));

    // Add an output layer
    model.add(tf.layers.dense({ units: 1, useBias: true }));

    return model;
}





function ConvertToTensor(data) {
    return tf.tidy(() => {
        // Shuffle the data    
        tf.util.shuffle(data);

        console.log(data);
        // Convert data to Tensor
        const inputs = data.map(d => [
            MapToDictionary(d.affectedAreas, "affectedAreas"),
            MapToDictionary(d.gender, "gender"),
            parseInt(d.humidityObserved),
            parseInt(d.locationLatitude),
            parseInt(d.locationLongitude),
            parseInt(d.observed),
            parseInt(d.perceivedSkinAppearance),
            parseInt(d.perceivedSkinSensation),
            parseInt(d.perceivedSkinTexture),
            parseInt(d.progression),
            MapToDictionary(d.subject, "subject"),
            parseInt(d.temperatureMaximum),
            parseInt(d.temperatureMinimum),
            parseInt(d.temperatureObserved),
            MapToDictionary(d.triggersEncountered, "triggersEncountered"),
            parseInt(d.uvIndexObserved)
        ]);
        const labels = data.map(d => parseInt(d.progression));

        const inputTensor = tf.tensor2d(inputs);
        const labelTensor = tf.tensor2d(labels, [labels.length, 1]);

        // Normalize the data to the range 0 - 1 using min-max scaling
        const inputMax = inputTensor.max();
        const inputMin = inputTensor.min();
        const labelMax = labelTensor.max();
        const labelMin = labelTensor.min();

        const normalizedInputs = inputTensor.sub(inputMin).div(inputMax.sub(inputMin));
        const normalizedLabels = labelTensor.sub(labelMin).div(labelMax.sub(labelMin));

        return {
            inputs: normalizedInputs,
            labels: normalizedLabels,
            inputMax,
            inputMin,
            labelMax,
            labelMin,
        }
    });
}




async function TrainModel(model, inputs, labels) {
    model.compile({
        optimizer: tf.train.adam(),
        loss: tf.losses.meanSquaredError,
        metrics: ['mse'],
    });

    const batchSize = 32;
    const epochs = 50;

    return await model.fit(inputs, labels, {
        batchSize,
        epochs,
        shuffle: true,
        callbacks: tfvis.show.fitCallbacks(
            { name: 'Training Performance' },
            ['loss', 'mse'],
            { height: 200, callbacks: ['onEpochEnd'] }
        )
    });
}





function TestModel(model, inputData, normalizationData) {
    const { inputMax, inputMin, labelMin, labelMax } = normalizationData;

    // Generate predictions for a uniform range of numbers between 0 and 1;
    // We un-normalize the data by doing the inverse of the min-max scaling 
    // that we did earlier.
    const [xs, preds] = tf.tidy(() => {

        const xs = tf.linspace(0, 1, 100);
        const preds = model.predict(xs.reshape([100, 1]));

        const unNormXs = xs
            .mul(inputMax.sub(inputMin))
            .add(inputMin);

        const unNormPreds = preds
            .mul(labelMax.sub(labelMin))
            .add(labelMin);

        // Un-normalize the data
        return [unNormXs.dataSync(), unNormPreds.dataSync()];
    });


    const predictedPoints = Array.from(xs).map((val, i) => {
        return { x: val, y: preds[i] }
    });

    const originalPoints = inputData.map(d => ({
        x: parseInt(d.progression), 
        y: parseInt(d.perceivedSkinAppearance)
    }));

    tfvis.render.scatterplot(
        { name: 'Original vs. Predictions' },
        { values: [originalPoints, predictedPoints], series: ['original', 'predicted'] },
        {
            xLabel: 'Original',
            yLabel: 'Predicted',
            height: 300
        }
    );
}





async function VisualizeData() {
    // Load and plot the original input data that we are going to train on.
    const data = await appData.read("conditions", "created");
    const values = data.map(d => ({
        x: (d.progression / 86400000),
        y: d.perceivedSkinAppearance
    }));

    tfvis.render.scatterplot(
        { name: 'Skin Condition vs. Progression' },
        { values },
        {
            xLabel: 'Condition',
            yLabel: 'Progression',
            height: 300
        }
    );

    const model = CreateModel();
    tfvis.show.modelSummary({ name: 'Model Summary' }, model);

    const tensorData = ConvertToTensor(data);
    const { inputs, labels } = tensorData;

    await TrainModel(model, inputs, labels);
    console.log('Done Training');

    TestModel(model, data, tensorData);
}

The error I get is:

Uncaught (in promise) Error: Error when checking input: expected dense_Dense1_input to have 3 dimension(s). but got array with shape 3,16

TensorFlow.js and complex datasets?

Answers (1)

Related Questions