Reputation: 2793
In solving some problem when i am trying to use dtype='category' then i am getting error . I had read previous answers and applied but none of answer is solving problem because they all are very old My code is ,
train = pd.read_csv("C:/shubhamprojectwork/Kagglecomp/livecomp/My music recomment/train.csv/train.csv", dtype={'msno' : 'category','song_id' : 'category','source_system_tab' : 'category','source_screen_name' : 'category','source_type' : 'category','target' : np.uint8})
Error is
TypeError Traceback (most recent call last)
<ipython-input-47-7d016844c8d7> in <module>()
1
2 #train = pd.read_csv(data_path + 'train.csv', dtype={'msno' : category,'song_id' : 'category','source_system_tab' : 'category','source_screen_name' : 'category','source_type' : 'category','target' : np.uint8})
----> 3 train = pd.read_csv(data_path + 'train.csv', dtype={'msno' : 'category'})
C:\ProgramData\Anaconda3\lib\site-packages\pandas\io\parsers.py in parser_f(filepath_or_buffer, sep, delimiter, header, names, index_col, usecols, squeeze, prefix, mangle_dupe_cols, dtype, engine, converters, true_values, false_values, skipinitialspace, skiprows, skipfooter, nrows, na_values, keep_default_na, na_filter, verbose, skip_blank_lines, parse_dates, infer_datetime_format, keep_date_col, date_parser, dayfirst, iterator, chunksize, compression, thousands, decimal, lineterminator, quotechar, quoting, escapechar, comment, encoding, dialect, tupleize_cols, error_bad_lines, warn_bad_lines, skip_footer, doublequote, delim_whitespace, as_recarray, compact_ints, use_unsigned, low_memory, buffer_lines, memory_map, float_precision)
560 skip_blank_lines=skip_blank_lines)
561
--> 562 return _read(filepath_or_buffer, kwds)
563
564 parser_f.__name__ = name
C:\ProgramData\Anaconda3\lib\site-packages\pandas\io\parsers.py in _read(filepath_or_buffer, kwds)
313
314 # Create the parser.
--> 315 parser = TextFileReader(filepath_or_buffer, **kwds)
316
317 if (nrows is not None) and (chunksize is not None):
C:\ProgramData\Anaconda3\lib\site-packages\pandas\io\parsers.py in __init__(self, f, engine, **kwds)
643 self.options['has_index_names'] = kwds['has_index_names']
644
--> 645 self._make_engine(self.engine)
646
647 def close(self):
C:\ProgramData\Anaconda3\lib\site-packages\pandas\io\parsers.py in _make_engine(self, engine)
797 def _make_engine(self, engine='c'):
798 if engine == 'c':
--> 799 self._engine = CParserWrapper(self.f, **self.options)
800 else:
801 if engine == 'python':
C:\ProgramData\Anaconda3\lib\site-packages\pandas\io\parsers.py in __init__(self, src, **kwds)
1211 kwds['allow_leading_cols'] = self.index_col is not False
1212
-> 1213 self._reader = _parser.TextReader(src, **kwds)
1214
1215 # XXX
pandas/parser.pyx in pandas.parser.TextReader.__cinit__ (pandas\parser.c:4625)()
TypeError: data type "category" not understood
Upvotes: 1
Views: 2956
Reputation: 1080
train = pd.read_csv("C:/shubhamprojectwork/Kagglecomp/livecomp/My music recomment/train.csv/train.csv", dtype={'msno' : 'category','song_id' : 'category','source_system_tab' : 'category','source_screen_name' : 'category','source_type' : 'category','target' : np.uint8})
use category
in quotes as shown.
Upvotes: 1