Philphed
Philphed

Reputation: 51

DNNClassifier: 'DataFrame' object has no attribute 'dtype'

I am trying to run a tensorflow DNNClassifier model with some data, that I read from a csv. Even though I converted the datatype of each column to float32, I keeo getting the 'DataFrame' object has no attribute 'dtype' Error. I would really appreciate if you could help me.

Dataformat: 27 columns, 23 input, 4 classes

Thank you

import numpy as np
import pandas as pd
import tensorflow as tf
import matplotlib.pyplot as plt
%matplotlib inline 

factors = pd.read_csv('xxx.csv')

#Formatting data to float32
factors['1'] = factors['1'].astype('float32')
factors['2'] = factors['2'].astype('float32')
...
factors['27'] = factors['27'].astype('float32')

#Definition of in- and output
feat_data = factors[['1', '2', ... '23']]
labels = factors[['24', '25','26', '27']]

#Train-Test Split
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(feat_data,labels, test_size=0.3, random_state=101)

from sklearn.preprocessing import MinMaxScalerscaler = MinMaxScaler()
scaled_x_train = scaler.fit_transform(X_train) scaled_x_test = scaler.transform(X_test)

#Model
from tensorflow import estimator 

feat_cols = [tf.feature_column.numeric_column('x', shape [23],dtype=tf.float32)]

deep_model = estimator.DNNClassifier(hidden_units=[23,23,23],
                        feature_columns=feat_cols,
                        n_classes=4, optimizer=tf.train.GradientDescentOptimizer(learning_rate=0.01) )

input_fn = estimator.inputs.numpy_input_fn(x {'x':scaled_x_train},y=y_train,shuffle=True,batch_size=10,num_epochs=5)

deep_model.train(input_fn=input_fn,steps=50)


---------------------------------------------------------------------------
AttributeError                            Traceback (most recent call last)
<ipython-input-169-9b2e050e4e40> in <module>()
----> 1 deep_model.train(input_fn=input_fn,steps=50)

~\Anaconda\envs\tfdeeplearning\lib\site- packages\tensorflow\python\estimator\estimator.py in train(self, input_fn, hooks, steps, max_steps)
    239       hooks.append(training.StopAtStepHook(steps, max_steps))
    240 
 --> 241     loss = self._train_model(input_fn=input_fn, hooks=hooks)
    242     logging.info('Loss for final step: %s.', loss)
    243     return self

~\Anaconda\envs\tfdeeplearning\lib\site-packages\tensorflow\python\estimator\estimator.py in _train_model(self, input_fn, hooks)
    626       global_step_tensor = self._create_and_assert_global_step(g)
    627       features, labels = self._get_features_and_labels_from_input_fn(
--> 628           input_fn, model_fn_lib.ModeKeys.TRAIN)
    629       estimator_spec = self._call_model_fn(features, labels,
    630                                            model_fn_lib.ModeKeys.TRAIN)

~\Anaconda\envs\tfdeeplearning\lib\site-packages\tensorflow\python\estimator\estimator.py in _get_features_and_labels_from_input_fn(self, input_fn, mode)
    497 
    498   def _get_features_and_labels_from_input_fn(self, input_fn, mode):
--> 499     result = self._call_input_fn(input_fn, mode)
    500     if isinstance(result, (list, tuple)):
    501       if len(result) != 2:

~\Anaconda\envs\tfdeeplearning\lib\site-packages\tensorflow\python\estimator\estimator.py in _call_input_fn(***failed resolving arguments***)
    583       kwargs['config'] = self.config
    584     with ops.device('/cpu:0'):
--> 585       return input_fn(**kwargs)
    586 
    587   def _call_model_fn(self, features, labels, mode):

~\Anaconda\envs\tfdeeplearning\lib\site-packages\tensorflow\python\estimator\inputs\numpy_io.py in input_fn()
    122         num_threads=num_threads,
    123         enqueue_size=batch_size,
--> 124         num_epochs=num_epochs)
    125 
    126     features = (queue.dequeue_many(batch_size) if num_epochs is None

~\Anaconda\envs\tfdeeplearning\lib\site-packages\tensorflow\python\estimator\inputs\queues\feeding_functions.py in _enqueue_data(data, capacity, shuffle, min_after_dequeue, num_threads, seed, name, enqueue_size, num_epochs)
    315     elif isinstance(data, collections.OrderedDict):
    316       types = [dtypes.int64] + [
--> 317           dtypes.as_dtype(col.dtype) for col in data.values()
    318       ]
    319       queue_shapes = [()] + [col.shape[1:] for col in data.values()]

~\Anaconda\envs\tfdeeplearning\lib\site-packages\tensorflow\python\estimator\inputs\queues\feeding_functions.py in <listcomp>(.0)
    315     elif isinstance(data, collections.OrderedDict):
    316       types = [dtypes.int64] + [
--> 317           dtypes.as_dtype(col.dtype) for col in data.values()
    318       ]
    319       queue_shapes = [()] + [col.shape[1:] for col in data.values()]

 ~\Anaconda\envs\tfdeeplearning\lib\site-packages\pandas\core\generic.py in __getattr__(self, name)
   3079             if name in self._info_axis:
   3080                 return self[name]
-> 3081             return object.__getattribute__(self, name)
   3082 
   3083     def __setattr__(self, name, value):

 AttributeError: 'DataFrame' object has no attribute 'dtype'`$`

Upvotes: 5

Views: 20267

Answers (1)

Mikhail Berlinkov
Mikhail Berlinkov

Reputation: 1624

Tensorflow assumes that you pass numpy arrays not pandas DataFrames (which have dtype attribute). So, you should pass df.values instead of df to tensorflow functions.

Upvotes: 9

Related Questions