Reputation: 1071
I'm learning Tensorflow. The following is my code. The code is building a linear regression model using some features and trying to predict the MPG (fuel expense).
The first part (Dataset preparation) of the code prepares the dataset for training. The second part (Begin Tensorflow) tries to build and train a linear regressor.
The problem I encountered is when I call the train function on the linear regressor, an error was thrown...
I don't know how to fix this error. And I don't know why "unhashable list" affects the training.
Please provide some insight on this. Thanks.
from __future__ import absolute_import, division, print_function
import pathlib
import pandas as pd
import seaborn as sns
import tensorflow as tf
from tensorflow import keras
# tf.enable_eager_execution() # turn eager model on; this should only be called ONCE!
print(tf.__version__)
#-----------------------------------------------------
## Dataset preparation
# read dataset and preview
dataset_path = keras.utils.get_file("auto-mpg.data", "https://archive.ics.uci.edu/ml/machine-learning-databases/auto-mpg/auto-mpg.data")
column_names = ['MPG','Cylinders','Displacement','Horsepower','Weight',
'Acceleration', 'Model Year', 'Origin']
raw_dataset = pd.read_csv(dataset_path, names=column_names,
na_values = "?", comment='\t',
sep=" ", skipinitialspace=True)
dataset = raw_dataset.copy()
# erase NaN rows
dataset = dataset.dropna()
# Origin column is not magnitude meaningful, don't use this as feature!
origin = dataset.pop('Origin')
# Separate train & test dataset
dataset_train = dataset.sample(frac=0.8, random_state = 0)
dataset_test = dataset.drop(dataset_train.index)
#-----------------------------------------------------
## Begin Tensorflow
# build input fn
def train_input_fn(df, label_name):
"""
Argus:
df: pandas dataframe
label_name: name of label column
return:
A function: <function tensorflow.python.estimator.inputs.pandas_io.input_fn>
"""
return tf.estimator.inputs.pandas_input_fn(
x = df,
y = df[label_name],
batch_size = 32,
num_epochs = 5,
shuffle = True,
queue_capacity = 1000,
num_threads = 1
)
# define model
feature_names = ['Cylinders', 'Displacement', 'Horsepower', 'Weight', 'Acceleration', 'Model Year']
feature_cols_tensor = [tf.feature_column.numeric_column(feature_names)] # turn the string list into tensor object
linear_regressor = tf.estimator.LinearRegressor(feature_columns = feature_cols_tensor)
linear_regressor.train(
train_input_fn(dataset_train, 'MPG'),
steps = 100
)
The following is the error messages
INFO:tensorflow:Calling model_fn.
TypeErrorTraceback (most recent call last)
<ipython-input-14-c1814cca00b6> in <module>()
----> 1 linear_regressor.train(train_input_fn(dataset_train_norm, 'MPG'), steps = 100)
/usr/local/envs/py2env/lib/python2.7/site-packages/tensorflow/python/estimator/estimator.pyc in train(self, input_fn, hooks, steps, max_steps, saving_listeners)
361
362 saving_listeners = _check_listeners_type(saving_listeners)
--> 363 loss = self._train_model(input_fn, hooks, saving_listeners)
364 logging.info('Loss for final step: %s.', loss)
365 return self
/usr/local/envs/py2env/lib/python2.7/site-packages/tensorflow/python/estimator/estimator.pyc in _train_model(self, input_fn, hooks, saving_listeners)
841 return self._train_model_distributed(input_fn, hooks, saving_listeners)
842 else:
--> 843 return self._train_model_default(input_fn, hooks, saving_listeners)
844
845 def _train_model_default(self, input_fn, hooks, saving_listeners):
/usr/local/envs/py2env/lib/python2.7/site-packages/tensorflow/python/estimator/estimator.pyc in _train_model_default(self, input_fn, hooks, saving_listeners)
854 worker_hooks.extend(input_hooks)
855 estimator_spec = self._call_model_fn(
--> 856 features, labels, model_fn_lib.ModeKeys.TRAIN, self.config)
857 return self._train_with_estimator_spec(estimator_spec, worker_hooks,
858 hooks, global_step_tensor,
/usr/local/envs/py2env/lib/python2.7/site-packages/tensorflow/python/estimator/estimator.pyc in _call_model_fn(self, features, labels, mode, config)
829
830 logging.info('Calling model_fn.')
--> 831 model_fn_results = self._model_fn(features=features, **kwargs)
832 logging.info('Done calling model_fn.')
833
/usr/local/envs/py2env/lib/python2.7/site-packages/tensorflow/python/estimator/canned/linear.pyc in _model_fn(features, labels, mode, config)
430 optimizer=optimizer,
431 partitioner=partitioner,
--> 432 config=config)
433
434 super(LinearRegressor, self).__init__(
/usr/local/envs/py2env/lib/python2.7/site-packages/tensorflow/python/estimator/canned/linear.pyc in _linear_model_fn(features, labels, mode, head, feature_columns, optimizer, partitioner, config)
155 logit_fn = _linear_logit_fn_builder(
156 units=head.logits_dimension, feature_columns=feature_columns)
--> 157 logits = logit_fn(features=features)
158
159 return head.create_estimator_spec(
/usr/local/envs/py2env/lib/python2.7/site-packages/tensorflow/python/estimator/canned/linear.pyc in linear_logit_fn(features)
96 feature_columns=feature_columns,
97 units=units,
---> 98 cols_to_vars=cols_to_vars)
99 bias = cols_to_vars.pop('bias')
100 if units > 1:
/usr/local/envs/py2env/lib/python2.7/site-packages/tensorflow/python/feature_column/feature_column.pyc in linear_model(features, feature_columns, units, sparse_combiner, weight_collections, trainable, cols_to_vars)
409 nor `_CategoricalColumn`.
410 """
--> 411 feature_columns = _clean_feature_columns(feature_columns)
412 for column in feature_columns:
413 if not isinstance(column, (_DenseColumn, _CategoricalColumn)):
/usr/local/envs/py2env/lib/python2.7/site-packages/tensorflow/python/feature_column/feature_column.pyc in _clean_feature_columns(feature_columns)
2231 name_to_column = dict()
2232 for column in feature_columns:
-> 2233 if column.name in name_to_column:
2234 raise ValueError('Duplicate feature column name found for columns: {} '
2235 'and {}. This usually means that these columns refer to '
TypeError: unhashable type: 'list'
Upvotes: 1
Views: 4292
Reputation: 1190
The problem is that you provide the tensor flow feature column function with a list, but it takes a single string denoting the name of the column.
Replace the line:
feature_cols_tensor = [tf.feature_column.numeric_column(feature_names)]
With:
feature_cols_tensor = list(map(tf.feature_column.numeric_column, feature_names))
Your column Model Year
seems to be a typo: I only find ModelYear
without the space in your raw data.
Upvotes: 1