Reputation: 352
I am new to ML and only scratching its surface so I apologize if my question makes no sense.
I have a sequence of continuous measurements for some object (capturing its weight, size, temperature,...) and a discrete column determining the property of the object (a finite range of integers, say 0,1,2). This is the column that I would like to predict.
The data in question is indeed a sequence since the value of the property column may vary depending on the context surrounding it and there may also be some cycical properties to the sequence itself. In short: the order of the data matters to me.
A small example is represented by the following table
Note that there are two rows containing equal data yet having a different value in the Property field. The idea is that the value of the property field may depend on the previous rows and hence the order of the rows is important.
My question is, what kind of approach/tools/techniques should I use to tackle this problem?
I am aware of classification algorithms but somehow I don't think they apply here given that the data in question is sequential and I wouldn't want to ignore this property.
I tried using Keras LSTM and pretend the Property column is continuous as well. However the predictions I obtain in this way are usually just a constant decimal value that makes no sense in this context.
What would be the best way to tackle this type of problem?
Upvotes: 3
Views: 3362
Reputation: 2982
import tensorflow as tf
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler
df = pd.DataFrame({'Temperature': [183, 10.7, 24.3, 10.7],
'Weight': [8, 11.2, 14, 11.2],
'Size': [3.97, 7.88, 11, 7.88],
'Property': [0,1,2,0]})
# print first 5 rows
df.head()
# adjust target(t) to depend on input (t-1)
df.Property = df.Property.shift(-1)
# parameters
time_steps = 1
inputs = 3
outputs = 1
# remove nans as a result of the shifted values
df = df.iloc[:-1,:]
# convert to numoy
df = df.values
# center and scale
scaler = MinMaxScaler(feature_range=(0, 1))
df = scaler.fit_transform(df)
# X_y_split
train_X = df[:, 1:]
train_y = df[:, 0]
# reshape input to 3D array
train_X = train_X[:,None,:]
# reshape output to 1D array
train_y = np.reshape(train_y, (-1,outputs))
learning_rate = 0.001
epochs = 500
batch_size = int(train_X.shape[0]/2)
length = train_X.shape[0]
display = 100
neurons = 100
# clear graph (if any) before running
tf.reset_default_graph()
X = tf.placeholder(tf.float32, [None, time_steps, inputs])
y = tf.placeholder(tf.float32, [None, outputs])
# LSTM Cell
cell = tf.contrib.rnn.BasicLSTMCell(num_units=neurons, activation=tf.nn.relu)
cell_outputs, states = tf.nn.dynamic_rnn(cell, X, dtype=tf.float32)
# pass into Dense layer
stacked_outputs = tf.reshape(cell_outputs, [-1, neurons])
out = tf.layers.dense(inputs=stacked_outputs, units=outputs)
# squared error loss or cost function for linear regression
loss = tf.losses.mean_squared_error(labels=y, predictions=out)
# optimizer to minimize cost
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)
training_op = optimizer.minimize(loss)
with tf.Session() as sess:
# initialize all variables
tf.global_variables_initializer().run()
# Train the model
for steps in range(epochs):
mini_batch = zip(range(0, length, batch_size),
range(batch_size, length+1, batch_size))
# train data in mini-batches
for (start, end) in mini_batch:
sess.run(training_op, feed_dict = {X: train_X[start:end,:,:],
y: train_y[start:end,:]})
# print training performance
if (steps+1) % display == 0:
# evaluate loss function on training set
loss_fn = loss.eval(feed_dict = {X: train_X, y: train_y})
print('Step: {} \tTraining loss (mse): {}'.format((steps+1), loss_fn))
# Test model
y_pred = sess.run(out, feed_dict={X: train_X})
plt.title("LSTM RNN Model", fontsize=12)
plt.plot(train_y, "b--", markersize=10, label="targets")
plt.plot(y_pred, "k--", markersize=10, label=" prediction")
plt.legend()
plt.xlabel("Period")
'Output':
Step: 100 Training loss (mse): 0.15871836245059967
Step: 200 Training loss (mse): 0.03062588907778263
Step: 300 Training loss (mse): 0.0003023963945452124
Step: 400 Training loss (mse): 1.7712079625198385e-07
Step: 500 Training loss (mse): 8.750407516633363e-12
Assumptions
Property
is the output for the sequence of inputs after 1 time step.The code below models the use-case as a classification problem where RNN algorithm attempts to predict the class membership of a particular input sequence.
Again, I make the assumption that the target (t), depends on the input sequence
t-1`.
import tensorflow as tf
import pandas as pd
from sklearn.preprocessing import MinMaxScaler, OneHotEncoder
df = pd.DataFrame({'Temperature': [183, 10.7, 24.3, 10.7],
'Weight': [8, 11.2, 14, 11.2],
'Size': [3.97, 7.88, 11, 7.88],
'Property': [0,1,2,0]})
# print first 5 rows
df.head()
# adjust target(t) to depend on input (t-1)
df.Property = df.Property.shift(-1)
# parameters
time_steps = 1
inputs = 3
outputs = 3
# remove nans as a result of the shifted values
df = df.iloc[:-1,:]
# convert to numpy
df = df.values
# X_y_split
train_X = df[:, 1:]
train_y = df[:, 0]
# center and scale
scaler = MinMaxScaler(feature_range=(0, 1))
train_X = scaler.fit_transform(train_X)
# reshape input to 3D array
train_X = train_X[:,None,:]
# one-hot encode the outputs
onehot_encoder = OneHotEncoder()
encode_categorical = train_y.reshape(len(train_y), 1)
train_y = onehot_encoder.fit_transform(encode_categorical).toarray()
learning_rate = 0.001
epochs = 500
batch_size = int(train_X.shape[0]/2)
length = train_X.shape[0]
display = 100
neurons = 100
# clear graph (if any) before running
tf.reset_default_graph()
X = tf.placeholder(tf.float32, [None, time_steps, inputs])
y = tf.placeholder(tf.float32, [None, outputs])
# LSTM Cell
cell = tf.contrib.rnn.BasicLSTMCell(num_units=neurons, activation=tf.nn.relu)
cell_outputs, states = tf.nn.dynamic_rnn(cell, X, dtype=tf.float32)
# pass into Dense layer
stacked_outputs = tf.reshape(cell_outputs, [-1, neurons])
out = tf.layers.dense(inputs=stacked_outputs, units=outputs)
# squared error loss or cost function for linear regression
loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(
labels=y, logits=out))
# optimizer to minimize cost
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)
training_op = optimizer.minimize(loss)
accuracy = tf.metrics.accuracy(labels = tf.argmax(y, 1),
predictions = tf.argmax(out, 1),
name = "accuracy")
precision = tf.metrics.precision(labels=tf.argmax(y, 1),
predictions=tf.argmax(out, 1),
name="precision")
recall = tf.metrics.recall(labels=tf.argmax(y, 1),
predictions=tf.argmax(out, 1),
name="recall")
f1 = 2 * accuracy[1] * recall[1] / ( precision[1] + recall[1] )
with tf.Session() as sess:
# initialize all variables
tf.global_variables_initializer().run()
tf.local_variables_initializer().run()
# Train the model
for steps in range(epochs):
mini_batch = zip(range(0, length, batch_size),
range(batch_size, length+1, batch_size))
# train data in mini-batches
for (start, end) in mini_batch:
sess.run(training_op, feed_dict = {X: train_X[start:end,:,:],
y: train_y[start:end,:]})
# print training performance
if (steps+1) % display == 0:
# evaluate loss function on training set
loss_fn = loss.eval(feed_dict = {X: train_X, y: train_y})
print('Step: {} \tTraining loss: {}'.format((steps+1), loss_fn))
# evaluate model accuracy
acc, prec, recall, f1 = sess.run([accuracy, precision, recall, f1],
feed_dict = {X: train_X, y: train_y})
print('\nEvaluation on training set')
print('Accuracy:', acc[1])
print('Precision:', prec[1])
print('Recall:', recall[1])
print('F1 score:', f1)
Step: 100 Training loss: 0.5373622179031372
Step: 200 Training loss: 0.33380019664764404
Step: 300 Training loss: 0.176949605345726
Step: 400 Training loss: 0.0781424418091774
Step: 500 Training loss: 0.0373661033809185
Evaluation on training set
Accuracy: 1.0
Precision: 1.0
Recall: 1.0
F1 score: 1.0
Upvotes: 5