eat-sleep-code
eat-sleep-code

Reputation: 4855

TensorFlow.js and complex datasets?

I am taking my first steps into developing an app that utilizes TensorFlow (the TensorFlow.js library in particular).

I have walked through the examples and have it working if I only have two axis (progression, perceivedSkinAppearance) of data.

// Visualize Data ========================================================== //

function CreateModel() {
    // Create a sequential model
    const model = tf.sequential();

    // Add a single hidden layer
    model.add(tf.layers.dense({ inputShape: [1], units: 1, useBias: true }));

    // Add an output layer
    model.add(tf.layers.dense({ units: 1, useBias: true }));

    return model;
}





function ConvertToTensor(data) {
    return tf.tidy(() => {
        // Shuffle the data    
        tf.util.shuffle(data);

        // Convert data to Tensor
        const inputs = data.map(d => parseInt(d.progression));
        const labels = data.map(d => parseInt(d.perceivedSkinAppearance));

        const inputTensor = tf.tensor2d(inputs, [inputs.length, 1]);
        const labelTensor = tf.tensor2d(labels, [labels.length, 1]);

        console.log(inputTensor);
        console.log(labelTensor);

        // Normalize the data to the range 0 - 1 using min-max scaling
        const inputMax = inputTensor.max();
        const inputMin = inputTensor.min();
        const labelMax = labelTensor.max();
        const labelMin = labelTensor.min();

        const normalizedInputs = inputTensor.sub(inputMin).div(inputMax.sub(inputMin));
        const normalizedLabels = labelTensor.sub(labelMin).div(labelMax.sub(labelMin));

        return {
            inputs: normalizedInputs,
            labels: normalizedLabels,
            inputMax,
            inputMin,
            labelMax,
            labelMin,
        }
    });
}




async function TrainModel(model, inputs, labels) {
    model.compile({
        optimizer: tf.train.adam(),
        loss: tf.losses.meanSquaredError,
        metrics: ['mse'],
    });

    const batchSize = 32;
    const epochs = 50;

    return await model.fit(inputs, labels, {
        batchSize,
        epochs,
        shuffle: true,
        callbacks: tfvis.show.fitCallbacks(
            { name: 'Training Performance' },
            ['loss', 'mse'],
            { height: 200, callbacks: ['onEpochEnd'] }
        )
    });
}





function TestModel(model, inputData, normalizationData) {
    const { inputMax, inputMin, labelMin, labelMax } = normalizationData;

    // Generate predictions for a uniform range of numbers between 0 and 1;
    // We un-normalize the data by doing the inverse of the min-max scaling 
    // that we did earlier.
    const [xs, preds] = tf.tidy(() => {

        const xs = tf.linspace(0, 1, 100);
        const preds = model.predict(xs.reshape([100, 1]));

        const unNormXs = xs
            .mul(inputMax.sub(inputMin))
            .add(inputMin);

        const unNormPreds = preds
            .mul(labelMax.sub(labelMin))
            .add(labelMin);

        // Un-normalize the data
        return [unNormXs.dataSync(), unNormPreds.dataSync()];
    });


    const predictedPoints = Array.from(xs).map((val, i) => {
        return { x: val, y: preds[i] }
    });

    const originalPoints = inputData.map(d => ({
        x: parseInt(d.progression), 
        y: parseInt(d.perceivedSkinAppearance)
    }));

    tfvis.render.scatterplot(
        { name: 'Model Predictions vs Original Data' },
        { values: [originalPoints, predictedPoints], series: ['original', 'predicted'] },
        {
            xLabel: 'Progression',
            yLabel: 'Perceived Skin Appearance',
            height: 300
        }
    );
}





async function VisualizeData() {
    // Load and plot the original input data that we are going to train on.
    const data = await appData.read("conditions", "created");
    const values = data.map(d => ({
        x: (d.progression / 86400000),
        y: d.perceivedSkinAppearance
    }));

    tfvis.render.scatterplot(
        { name: 'Skin Appearance vs Progression' },
        { values },
        {
            xLabel: 'Progression',
            yLabel: 'Appearance',
            height: 300
        }
    );

    const model = CreateModel();
    tfvis.show.modelSummary({ name: 'Model Summary' }, model);

    const tensorData = ConvertToTensor(data);
    const { inputs, labels } = tensorData;

    await TrainModel(model, inputs, labels);
    console.log('Done Training');

    TestModel(model, data, tensorData);
}

I am having trouble wrapping my head around -- and the demos do not cover -- how to tackle this if I have a much more complex dataset, such as:

  {
  "a25bfa27-4447-3a54-d2c5-29685b0dbed3" : {
    "affectedAreas" : [ "361106d9-5bc1-42ab-a52d-8b23eb2ed923", "79916df1-99d8-4ec6-8bc0-531c9c9725c8", "23a220e8-cfff-4dd0-87c3-066f11d99506", "3df1c2a4-a7d5-4a8f-8753-eef9d3c44e76" ],
    "created" : "2019-07-29 18:58:37",
    "gender" : "Z2VuZGVyfHx8ZmVtYWxl",
    "humidityObserved" : 18,
    "locationLatitude" : "bG9jYXRpb25MYXRpdHVkZXx8fDMzLjI2MTgzMDM=",
    "locationLongitude" : "bG9jYXRpb25Mb25naXR1ZGV8fHwtMTExLjgwNTI0OTk=",
    "notes" : "",
    "observed" : "2019-07-29 18:58:00",
    "observer" : "b2JzZXJ2ZXJ8fHw0WDlqT1Nlem10U0ltVkdRRWk4MEZKZHRoMEsz",
    "perceivedSkinAppearance" : "3",
    "perceivedSkinSensation" : "3",
    "perceivedSkinTexture" : "3",
    "pollenCountObserved" : 0,
    "progression" : 186544718618,
    "subject" : "c3ViamVjdHx8fDg0NGRmNmQyLTBjOTUtNDE3ZS1hYWUxLTc5ZjUxNjM1OWMyMw==",
    "temperatureMaximum" : 109.4,
    "temperatureMinimum" : 102.99,
    "temperatureObserved" : 106.21,
    "triggersEncountered" : [ "1cfb8826-58ad-4168-905c-6f6150d3618e", "928915de-aadc-45e4-b386-4df7fcbf9787" ],
    "uvIndexObserved" : 11.31
  },
  "d6604849-a6ed-0fef-4541-ba6b65e8ffa2" : {
    "affectedAreas" : [ "361106d9-5bc1-42ab-a52d-8b23eb2ed923", "b0b72048-393f-4980-b649-c764aed50c1d", "3df1c2a4-a7d5-4a8f-8753-eef9d3c44e76" ],
    "created" : "2019-07-17 15:43:46",
    "gender" : "Z2VuZGVyfHx8ZmVtYWxl",
    "humidityObserved" : 26,
    "locationLatitude" : "bG9jYXRpb25MYXRpdHVkZXx8fDMzLjI2MDYyMTg2Mjg5NDQ3",
    "locationLongitude" : "bG9jYXRpb25Mb25naXR1ZGV8fHwtMTExLjgwNTE4MDEyMTY3NzIx",
    "notes" : "",
    "observed" : "2019-07-17 15:43:00",
    "observer" : "b2JzZXJ2ZXJ8fHxGZkducU1tUVlGVE9QQUZ3Wjc3THpwMEFCNHMx",
    "perceivedSkinAppearance" : "3",
    "perceivedSkinSensation" : "3",
    "perceivedSkinTexture" : "3",
    "pollenCountObserved" : 0,
    "progression" : 185496227507,
    "subject" : "c3ViamVjdHx8fDg0NGRmNmQyLTBjOTUtNDE3ZS1hYWUxLTc5ZjUxNjM1OWMyMw==",
    "temperatureMaximum" : 106,
    "temperatureMinimum" : 100,
    "temperatureObserved" : 103.15,
    "triggersEncountered" : [ "f756a7af-6a3d-4e48-998d-d706eac68e09" ],
    "uvIndexObserved" : 11.57
  },
  "fe5e995d-8b89-c6a7-23b5-3fb27112a92b" : {
    "created" : "2019-06-30 16:13:26",
    "gender" : "Z2VuZGVyfHx8ZmVtYWxl",
    "humidityObserved" : 12,
    "locationLatitude" : "bG9jYXRpb25MYXRpdHVkZXx8fDMzLjI2MDY0Njc1MDIzMjAz",
    "locationLongitude" : "bG9jYXRpb25Mb25naXR1ZGV8fHwtMTExLjgwNTEyNTkxNDk3NTA0",
    "notes" : "",
    "observed" : "2019-06-30 16:13:00",
    "observer" : "b2JzZXJ2ZXJ8fHxGZkducU1tUVlGVE9QQUZ3Wjc3THpwMEFCNHMx",
    "perceivedSkinAppearance" : "1",
    "perceivedSkinSensation" : "3",
    "perceivedSkinTexture" : "3",
    "pollenCountObserved" : 0,
    "progression" : 184029207516,
    "subject" : "c3ViamVjdHx8fDg0NGRmNmQyLTBjOTUtNDE3ZS1hYWUxLTc5ZjUxNjM1OWMyMw==",
    "temperatureMaximum" : 105.01,
    "temperatureMinimum" : 95,
    "temperatureObserved" : 99.95,
    "triggersEncountered" : [ "f756a7af-6a3d-4e48-998d-d706eac68e09" ],
    "uvIndexObserved" : 11.28
  }
}

Note: the obvious hashed values would be unhashed before actually using them, so don't panic about them being weird data types.


UPDATE I updated my code to reflect the suggested changes for mapping, and now getting errors on the CreateModel, TestModel, TrainModel methods as those models are apparently now not expecting my new data inputShape?

This is my updated code:

var mappingIndex = 0;
var mappingDictionary = []; 

function MapToDictionary(stringToFind, uniquePrepend) {
    var output = 0;
    if (stringToFind) 
    {
        if (uniquePrepend)
        {
            stringToFind = uniquePrepend + stringToFind;
        }

        var queryResult = mappingDictionary.filter(obj => Object.values(obj).some(val => val?val.toString().toLowerCase().includes(stringToFind):false))[0];
        if (queryResult) {
            output = queryResult["Key"];
        }
        else {
            mappingIndex = mappingIndex + 1;
            var mappingDictionaryEntry = {};
            mappingDictionaryEntry.Key = mappingIndex;
            mappingDictionaryEntry.Value = stringToFind;
            mappingDictionary.push(mappingDictionaryEntry);
            output = mappingIndex;
        }
        console.log(stringToFind + ": " + output);
        return output;
    }   
}


// Visualize Data ========================================================== //

function CreateModel() {
    // Create a sequential model
    const model = tf.sequential();

    // Add a single hidden layer
    model.add(tf.layers.dense({ inputShape: [3,16], units: 1, useBias: true }));

    // Add an output layer
    model.add(tf.layers.dense({ units: 1, useBias: true }));

    return model;
}





function ConvertToTensor(data) {
    return tf.tidy(() => {
        // Shuffle the data    
        tf.util.shuffle(data);

        console.log(data);
        // Convert data to Tensor
        const inputs = data.map(d => [
            MapToDictionary(d.affectedAreas, "affectedAreas"),
            MapToDictionary(d.gender, "gender"),
            parseInt(d.humidityObserved),
            parseInt(d.locationLatitude),
            parseInt(d.locationLongitude),
            parseInt(d.observed),
            parseInt(d.perceivedSkinAppearance),
            parseInt(d.perceivedSkinSensation),
            parseInt(d.perceivedSkinTexture),
            parseInt(d.progression),
            MapToDictionary(d.subject, "subject"),
            parseInt(d.temperatureMaximum),
            parseInt(d.temperatureMinimum),
            parseInt(d.temperatureObserved),
            MapToDictionary(d.triggersEncountered, "triggersEncountered"),
            parseInt(d.uvIndexObserved)
        ]);
        const labels = data.map(d => parseInt(d.progression));

        const inputTensor = tf.tensor2d(inputs);
        const labelTensor = tf.tensor2d(labels, [labels.length, 1]);

        // Normalize the data to the range 0 - 1 using min-max scaling
        const inputMax = inputTensor.max();
        const inputMin = inputTensor.min();
        const labelMax = labelTensor.max();
        const labelMin = labelTensor.min();

        const normalizedInputs = inputTensor.sub(inputMin).div(inputMax.sub(inputMin));
        const normalizedLabels = labelTensor.sub(labelMin).div(labelMax.sub(labelMin));

        return {
            inputs: normalizedInputs,
            labels: normalizedLabels,
            inputMax,
            inputMin,
            labelMax,
            labelMin,
        }
    });
}




async function TrainModel(model, inputs, labels) {
    model.compile({
        optimizer: tf.train.adam(),
        loss: tf.losses.meanSquaredError,
        metrics: ['mse'],
    });

    const batchSize = 32;
    const epochs = 50;

    return await model.fit(inputs, labels, {
        batchSize,
        epochs,
        shuffle: true,
        callbacks: tfvis.show.fitCallbacks(
            { name: 'Training Performance' },
            ['loss', 'mse'],
            { height: 200, callbacks: ['onEpochEnd'] }
        )
    });
}





function TestModel(model, inputData, normalizationData) {
    const { inputMax, inputMin, labelMin, labelMax } = normalizationData;

    // Generate predictions for a uniform range of numbers between 0 and 1;
    // We un-normalize the data by doing the inverse of the min-max scaling 
    // that we did earlier.
    const [xs, preds] = tf.tidy(() => {

        const xs = tf.linspace(0, 1, 100);
        const preds = model.predict(xs.reshape([100, 1]));

        const unNormXs = xs
            .mul(inputMax.sub(inputMin))
            .add(inputMin);

        const unNormPreds = preds
            .mul(labelMax.sub(labelMin))
            .add(labelMin);

        // Un-normalize the data
        return [unNormXs.dataSync(), unNormPreds.dataSync()];
    });


    const predictedPoints = Array.from(xs).map((val, i) => {
        return { x: val, y: preds[i] }
    });

    const originalPoints = inputData.map(d => ({
        x: parseInt(d.progression), 
        y: parseInt(d.perceivedSkinAppearance)
    }));

    tfvis.render.scatterplot(
        { name: 'Original vs. Predictions' },
        { values: [originalPoints, predictedPoints], series: ['original', 'predicted'] },
        {
            xLabel: 'Original',
            yLabel: 'Predicted',
            height: 300
        }
    );
}





async function VisualizeData() {
    // Load and plot the original input data that we are going to train on.
    const data = await appData.read("conditions", "created");
    const values = data.map(d => ({
        x: (d.progression / 86400000),
        y: d.perceivedSkinAppearance
    }));

    tfvis.render.scatterplot(
        { name: 'Skin Condition vs. Progression' },
        { values },
        {
            xLabel: 'Condition',
            yLabel: 'Progression',
            height: 300
        }
    );

    const model = CreateModel();
    tfvis.show.modelSummary({ name: 'Model Summary' }, model);

    const tensorData = ConvertToTensor(data);
    const { inputs, labels } = tensorData;

    await TrainModel(model, inputs, labels);
    console.log('Done Training');

    TestModel(model, data, tensorData);
}

The error I get is:

Uncaught (in promise) Error: Error when checking input: expected dense_Dense1_input to have 3 dimension(s). but got array with shape 3,16

Upvotes: 3

Views: 593

Answers (1)

Thomas Dondorf
Thomas Dondorf

Reputation: 25240

Tensorflow.js works with vectors. Even in your simple example, you are creating a vector (a tensor) from an array of objects.

Code Sample

In your example, you are creating a rank-2 tensor (two dimensions), by using this code (simplified):

const inputs = [1,2,3]; // example input
const inputTensor = tf.tensor2d(inputs, [inputs.length, 1]); // Tensor: [[1], [2], [3]]

An alternative writing, making it more obvious what is happening would be the following code. In this case, we are already adding the second dimension to our JavaScript array, making it optional to pass it as second parameter (as it was necessary above).

const inputs = [[1], [2], [3]];
const inputTensor = tf.tensor2d(inputs); // Tensor: [[1], [2], [3]]

Adding more values

To add more values to your input vector, you can add them to your inputs variable:

const inputs = [[1, 4], [2, 5], [3, 6]];
const inputTensor = tf.tensor2d(inputs); // Tensor: [[1, 4], [2, 5], [3, 6]]

In your code, you would do that in the following line:

const inputs = data.map(d => [
  parseInt(d.progression),
  parseInt(d.anotherValue),
  parseInt(d.thirdAttribute)
]);

Instead of returning a single value, this would return an array for each line with three values. To adapt your code for three values, you now would have to change inputShape accordingly. Regarding the data types, you still need numbers to work with. That means all values of your input need to be converted to numbers.

Upvotes: 3

Related Questions