Fei
Fei

Reputation: 1071

Tensorflow throws "TypeError: unhashable type: 'list'" error

I'm learning Tensorflow. The following is my code. The code is building a linear regression model using some features and trying to predict the MPG (fuel expense).

The first part (Dataset preparation) of the code prepares the dataset for training. The second part (Begin Tensorflow) tries to build and train a linear regressor.

The problem I encountered is when I call the train function on the linear regressor, an error was thrown...

I don't know how to fix this error. And I don't know why "unhashable list" affects the training.

Please provide some insight on this. Thanks.

from __future__ import absolute_import, division, print_function
import pathlib
import pandas as pd
import seaborn as sns
import tensorflow as tf
from tensorflow import keras
# tf.enable_eager_execution() # turn eager model on; this should only be called ONCE!
print(tf.__version__)
#-----------------------------------------------------
## Dataset preparation
# read dataset and preview
dataset_path = keras.utils.get_file("auto-mpg.data", "https://archive.ics.uci.edu/ml/machine-learning-databases/auto-mpg/auto-mpg.data")

column_names = ['MPG','Cylinders','Displacement','Horsepower','Weight',
                'Acceleration', 'Model Year', 'Origin'] 
raw_dataset = pd.read_csv(dataset_path, names=column_names,
                      na_values = "?", comment='\t',
                      sep=" ", skipinitialspace=True)
dataset = raw_dataset.copy()

# erase NaN rows
dataset = dataset.dropna()

# Origin column is not magnitude meaningful, don't use this as feature!
origin = dataset.pop('Origin')

# Separate train & test dataset
dataset_train = dataset.sample(frac=0.8, random_state = 0)
dataset_test  = dataset.drop(dataset_train.index)

#-----------------------------------------------------
## Begin Tensorflow 
# build input fn
def train_input_fn(df, label_name):
  """
  Argus:
    df: pandas dataframe
    label_name: name of label column
  return:
    A function: <function tensorflow.python.estimator.inputs.pandas_io.input_fn>
  """
  return tf.estimator.inputs.pandas_input_fn(
    x = df,
    y = df[label_name],
    batch_size = 32,
    num_epochs = 5,
    shuffle    = True,
    queue_capacity = 1000,
    num_threads = 1
  )

# define model
feature_names = ['Cylinders', 'Displacement', 'Horsepower', 'Weight', 'Acceleration', 'Model Year'] 
feature_cols_tensor = [tf.feature_column.numeric_column(feature_names)]  # turn the string list into tensor object
linear_regressor = tf.estimator.LinearRegressor(feature_columns = feature_cols_tensor)

linear_regressor.train(
  train_input_fn(dataset_train, 'MPG'), 
  steps = 100
)

The following is the error messages

INFO:tensorflow:Calling model_fn.

TypeErrorTraceback (most recent call last)
<ipython-input-14-c1814cca00b6> in <module>()
----> 1 linear_regressor.train(train_input_fn(dataset_train_norm, 'MPG'), steps = 100)

/usr/local/envs/py2env/lib/python2.7/site-packages/tensorflow/python/estimator/estimator.pyc in train(self, input_fn, hooks, steps, max_steps, saving_listeners)
    361 
    362     saving_listeners = _check_listeners_type(saving_listeners)
--> 363     loss = self._train_model(input_fn, hooks, saving_listeners)
    364     logging.info('Loss for final step: %s.', loss)
    365     return self

/usr/local/envs/py2env/lib/python2.7/site-packages/tensorflow/python/estimator/estimator.pyc in _train_model(self, input_fn, hooks, saving_listeners)
    841       return self._train_model_distributed(input_fn, hooks, saving_listeners)
    842     else:
--> 843       return self._train_model_default(input_fn, hooks, saving_listeners)
    844 
    845   def _train_model_default(self, input_fn, hooks, saving_listeners):

/usr/local/envs/py2env/lib/python2.7/site-packages/tensorflow/python/estimator/estimator.pyc in _train_model_default(self, input_fn, hooks, saving_listeners)
    854       worker_hooks.extend(input_hooks)
    855       estimator_spec = self._call_model_fn(
--> 856           features, labels, model_fn_lib.ModeKeys.TRAIN, self.config)
    857       return self._train_with_estimator_spec(estimator_spec, worker_hooks,
    858                                              hooks, global_step_tensor,

/usr/local/envs/py2env/lib/python2.7/site-packages/tensorflow/python/estimator/estimator.pyc in _call_model_fn(self, features, labels, mode, config)
    829 
    830     logging.info('Calling model_fn.')
--> 831     model_fn_results = self._model_fn(features=features, **kwargs)
    832     logging.info('Done calling model_fn.')
    833 

/usr/local/envs/py2env/lib/python2.7/site-packages/tensorflow/python/estimator/canned/linear.pyc in _model_fn(features, labels, mode, config)
    430           optimizer=optimizer,
    431           partitioner=partitioner,
--> 432           config=config)
    433 
    434     super(LinearRegressor, self).__init__(

/usr/local/envs/py2env/lib/python2.7/site-packages/tensorflow/python/estimator/canned/linear.pyc in _linear_model_fn(features, labels, mode, head, feature_columns, optimizer, partitioner, config)
    155     logit_fn = _linear_logit_fn_builder(
    156         units=head.logits_dimension, feature_columns=feature_columns)
--> 157     logits = logit_fn(features=features)
    158 
    159     return head.create_estimator_spec(

/usr/local/envs/py2env/lib/python2.7/site-packages/tensorflow/python/estimator/canned/linear.pyc in linear_logit_fn(features)
     96         feature_columns=feature_columns,
     97         units=units,
---> 98         cols_to_vars=cols_to_vars)
     99     bias = cols_to_vars.pop('bias')
    100     if units > 1:

/usr/local/envs/py2env/lib/python2.7/site-packages/tensorflow/python/feature_column/feature_column.pyc in linear_model(features, feature_columns, units, sparse_combiner, weight_collections, trainable, cols_to_vars)
    409       nor `_CategoricalColumn`.
    410   """
--> 411   feature_columns = _clean_feature_columns(feature_columns)
    412   for column in feature_columns:
    413     if not isinstance(column, (_DenseColumn, _CategoricalColumn)):

/usr/local/envs/py2env/lib/python2.7/site-packages/tensorflow/python/feature_column/feature_column.pyc in _clean_feature_columns(feature_columns)
   2231   name_to_column = dict()
   2232   for column in feature_columns:
-> 2233     if column.name in name_to_column:
   2234       raise ValueError('Duplicate feature column name found for columns: {} '
   2235                        'and {}. This usually means that these columns refer to '

TypeError: unhashable type: 'list'

Upvotes: 1

Views: 4292

Answers (1)

Falco Winkler
Falco Winkler

Reputation: 1190

The problem is that you provide the tensor flow feature column function with a list, but it takes a single string denoting the name of the column.

Replace the line:

feature_cols_tensor = [tf.feature_column.numeric_column(feature_names)]

With:

feature_cols_tensor = list(map(tf.feature_column.numeric_column, feature_names))

Your column Model Year seems to be a typo: I only find ModelYear without the space in your raw data.

Upvotes: 1

Related Questions