Reputation: 21
I created a multiple input one output LSTM that estimated the total price with a dataset of daily room rates for a hotel by month, but the model I created doesn't work well. Below I shared the model's code and the link to the data set.
data = pd.read_csv("/content/drive/My Drive/hotels.csv")
data
new_data = data.loc[:,['date','days','price','total']]
new_data.info()
date = new_data.date.values
dates = []
for i in date:
dates.append(i.split('/')[0])
new_data['date'] = dates
new_data
new_data = new_data.astype('float32')
new_data.info()
import pickle
filehandler = open(b"Hotels.obj","wb")
pickle.dump(new_data,filehandler)
file = open("/content/Hotels.obj",'rb')
object_file = pickle.load(file)
object_file
from math import sqrt
from matplotlib import pyplot
from pandas import read_csv
from pandas import DataFrame
from pandas import concat
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import mean_squared_error
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import LSTM
from tensorflow.keras.layers import Concatenate
from sklearn.model_selection import train_test_split
from tensorflow.keras.layers import Add
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import RMSprop,Adam
from sklearn.preprocessing import MinMaxScaler
import tensorflow as tf
import datetime
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from packaging import version
print("TensorFlow version: ", tf.__version__)
assert version.parse(tf.__version__).release[0] >= 2, \
"This notebook requires TensorFlow 2.0 or above."
file = open('/content/Hotels.obj', 'rb')
scaler = MinMaxScaler(feature_range=(0, 1))
train_size = int(len(object_file) * 0.76)
test_size = len(object_file) - train_size
days = object_file["days"].values.reshape(-1,1)
price = object_file["price"].values.reshape(-1,1)
total = object_file["total"].values.reshape(-1,1)
date = object_file["date"].values.reshape(-1,1)
days_ = scaler.fit_transform(days)
total_ = scaler.fit_transform(total)
price_ = scaler.fit_transform(price)
date_ = scaler.fit_transform(date)
days_train = days_[0:train_size].reshape(train_size,1,1)
days_test = days_[train_size:len(days_)].reshape(test_size,1,1)
date_train = date_[0:train_size].reshape(train_size,1,1)
date_test = date_[train_size:len(days_)].reshape(test_size,1,1)
price_train = price_[0:train_size].reshape(train_size,1,1)
price_test = price_[train_size:len(price_)].reshape(test_size,1,1)
total_train = total_[0:train_size].reshape(train_size,1)
total_test = total_[train_size:len(total_)].reshape(test_size,1)
def buildModel(dataLength,labelLength):
date = tf.keras.Input(shape=(1,1),name='date')
days = tf.keras.Input(shape=(1,1),name='days')
price = tf.keras.Input(shape=(1,1),name='price')
dateLayers = LSTM(100,return_sequences=False)(date)
daysLayers = LSTM(100,return_sequences=False)(days)
priceLayers = LSTM(100,return_sequences=False)(price)
output = tf.keras.layers.concatenate(inputs=[dateLayers,daysLayers, priceLayers],axis=1)
output = Dense(labelLength,activation='relu',name='weightedAverage_output_3')(output)
model = Model(inputs=[date,days,price],outputs=[output])
optimizer = Adam(lr=0.001, beta_1=0.9, beta_2=0.999)
model.compile(optimizer=optimizer,loss='mse',metrics=['accuracy'])
return model
object_file = pickle.load(file)
logdir="logs/fit/" + datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=logdir)
rnn = buildModel(train_size,1)
rnn.fit([date_train,days_train,price_train],
[total_train],
validation_data = ([date_test,days_test,price_test],[total_test]),
epochs = 1,
batch_size = 10,
callbacks=[tensorboard_callback]
)
result = rnn.predict([date_test,days_test,price_test])
scaler.inverse_transform(result)
When I increase the number of epoch, the model is being overfit.I can't get the result I want.How can I do this?
Data set link : https://www.kaggle.com/leomauro/argodatathon2019#hotels.csv
Upvotes: 2
Views: 5587
Reputation: 36584
Your results are poor because your metrics is accuracy
. If I understand correctly, you're predicting a continuous variable — you're not classifying. So, it makes no sense to look at accuracy.
Metrics should be mae
for mean absolute error. I think you'll be satisfied with your model performance then.
Re-scaling your target makes no sense here. It's the inner workings of the neural network that prefer an input between 0 and 1.
Upvotes: 2