Jsevillamol
Jsevillamol

Reputation: 2563

TypeError: to_list_if_array() got an unexpected keyword argument 'convert_dtype'

I am trying to load some data from a google sheet onto colab, transform it into a Pandas dataframe then plot it using Altair. But I am getting an unexpected error TypeError: to_list_if_array() got an unexpected keyword argument 'convert_dtype'.

What is going on? Is this because I am converting the columns to numeric data? I tried removing the conversion and it didn't work. When I draw a scatter plot using one of the altair premade datasets it works, so it must be an issue with the dataset. But all the dtypes in the dataset seem correct, as well as their entries.

# Import data from google sheet
from google.colab import auth
auth.authenticate_user()

import gspread
from oauth2client.client import GoogleCredentials

gc = gspread.authorize(GoogleCredentials.get_application_default())
worksheet = gc.open('My cool spreadsheet').sheet1

# get_all_values gives a list of rows.
rows = worksheet.get_all_values()

# Convert to a DataFrame
import pandas as pd
df = pd.DataFrame.from_records(rows)

# Use first row as column names
df = df.rename(columns=df.iloc[0])
df = df.drop(0)

# Encode some columns as numeric types
df['Victims'] = pd.to_numeric(df['Victims'], errors='coerce')
df['Starting year'] = pd.to_numeric(df['Starting year'], errors='coerce')

# Plot
import altair as alt

alt.Chart(df).mark_point().encode(
  x=alt.X('Starting year'),
  y=alt.Y('Victims'),
)

Error trace:

---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
/usr/local/lib/python3.7/dist-packages/altair/vegalite/v4/api.py in to_dict(self, *args, **kwargs)
    361         copy = self.copy(deep=False)
    362         original_data = getattr(copy, "data", Undefined)
--> 363         copy.data = _prepare_data(original_data, context)
    364 
    365         if original_data is not Undefined:

12 frames
/usr/local/lib/python3.7/dist-packages/altair/vegalite/v4/api.py in _prepare_data(data, context)
     82     # convert dataframes  or objects with __geo_interface__ to dict
     83     if isinstance(data, pd.DataFrame) or hasattr(data, "__geo_interface__"):
---> 84         data = _pipe(data, data_transformers.get())
     85 
     86     # convert string input to a URLData

/usr/local/lib/python3.7/dist-packages/toolz/functoolz.py in pipe(data, *funcs)
    625     """
    626     for func in funcs:
--> 627         data = func(data)
    628     return data
    629 

/usr/local/lib/python3.7/dist-packages/toolz/functoolz.py in __call__(self, *args, **kwargs)
    301     def __call__(self, *args, **kwargs):
    302         try:
--> 303             return self._partial(*args, **kwargs)
    304         except TypeError as exc:
    305             if self._should_curry(args, kwargs, exc):

/usr/local/lib/python3.7/dist-packages/altair/vegalite/data.py in default_data_transformer(data, max_rows)
     17 @curried.curry
     18 def default_data_transformer(data, max_rows=5000):
---> 19     return curried.pipe(data, limit_rows(max_rows=max_rows), to_values)
     20 
     21 

/usr/local/lib/python3.7/dist-packages/toolz/functoolz.py in pipe(data, *funcs)
    625     """
    626     for func in funcs:
--> 627         data = func(data)
    628     return data
    629 

/usr/local/lib/python3.7/dist-packages/toolz/functoolz.py in __call__(self, *args, **kwargs)
    301     def __call__(self, *args, **kwargs):
    302         try:
--> 303             return self._partial(*args, **kwargs)
    304         except TypeError as exc:
    305             if self._should_curry(args, kwargs, exc):

/usr/local/lib/python3.7/dist-packages/altair/utils/data.py in to_values(data)
    147         return {"values": data}
    148     elif isinstance(data, pd.DataFrame):
--> 149         data = sanitize_dataframe(data)
    150         return {"values": data.to_dict(orient="records")}
    151     elif isinstance(data, dict):

/usr/local/lib/python3.7/dist-packages/altair/utils/core.py in sanitize_dataframe(df)
    346             # Convert numpy arrays saved as objects to lists
    347             # Arrays are not JSON serializable
--> 348             col = df[col_name].apply(to_list_if_array, convert_dtype=False)
    349             df[col_name] = col.where(col.notnull(), None)
    350     return df

/usr/local/lib/python3.7/dist-packages/pandas/core/frame.py in apply(self, func, axis, raw, result_type, args, **kwds)
   7550             kwds=kwds,
   7551         )
-> 7552         return op.get_result()
   7553 
   7554     def applymap(self, func) -> "DataFrame":

/usr/local/lib/python3.7/dist-packages/pandas/core/apply.py in get_result(self)
    183             return self.apply_raw()
    184 
--> 185         return self.apply_standard()
    186 
    187     def apply_empty_result(self):

/usr/local/lib/python3.7/dist-packages/pandas/core/apply.py in apply_standard(self)
    274 
    275     def apply_standard(self):
--> 276         results, res_index = self.apply_series_generator()
    277 
    278         # wrap results

/usr/local/lib/python3.7/dist-packages/pandas/core/apply.py in apply_series_generator(self)
    303                 for i, v in enumerate(series_gen):
    304                     # ignore SettingWithCopy here in case the user mutates
--> 305                     results[i] = self.f(v)
    306                     if isinstance(results[i], ABCSeries):
    307                         # If we have a view on v, we need to make a copy because

/usr/local/lib/python3.7/dist-packages/pandas/core/apply.py in f(x)
    112 
    113             def f(x):
--> 114                 return func(x, *args, **kwds)
    115 
    116         else:

TypeError: to_list_if_array() got an unexpected keyword argument 'convert_dtype'

The data sheet being loaded is this one

Upvotes: 3

Views: 2374

Answers (1)

joelostblom
joelostblom

Reputation: 49054

The following works fine for me:

import altair as alt
import pandas as pd


df = pd.read_csv('https://docs.google.com/spreadsheets/d/1ZSZF7iX8jDcgP_uMpgy6vaQagLXzXmg4dqZCG4h-sOc/export?format=csv#gid=0')

# Encode some columns as numeric types
df['Victims'] = pd.to_numeric(df['Victims'], errors='coerce')
df['Starting year'] = pd.to_numeric(df['Starting year'], errors='coerce')

# Plot

alt.Chart(df).mark_point().encode(
  x=alt.X('Starting year'),
  y=alt.Y('Victims'),
)

enter image description here

Upvotes: 1

Related Questions