Reputation: 2563
I am trying to load some data from a google sheet onto colab, transform it into a Pandas dataframe then plot it using Altair. But I am getting an unexpected error TypeError: to_list_if_array() got an unexpected keyword argument 'convert_dtype'
.
What is going on? Is this because I am converting the columns to numeric data? I tried removing the conversion and it didn't work. When I draw a scatter plot using one of the altair premade datasets it works, so it must be an issue with the dataset. But all the dtypes in the dataset seem correct, as well as their entries.
# Import data from google sheet
from google.colab import auth
auth.authenticate_user()
import gspread
from oauth2client.client import GoogleCredentials
gc = gspread.authorize(GoogleCredentials.get_application_default())
worksheet = gc.open('My cool spreadsheet').sheet1
# get_all_values gives a list of rows.
rows = worksheet.get_all_values()
# Convert to a DataFrame
import pandas as pd
df = pd.DataFrame.from_records(rows)
# Use first row as column names
df = df.rename(columns=df.iloc[0])
df = df.drop(0)
# Encode some columns as numeric types
df['Victims'] = pd.to_numeric(df['Victims'], errors='coerce')
df['Starting year'] = pd.to_numeric(df['Starting year'], errors='coerce')
# Plot
import altair as alt
alt.Chart(df).mark_point().encode(
x=alt.X('Starting year'),
y=alt.Y('Victims'),
)
Error trace:
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
/usr/local/lib/python3.7/dist-packages/altair/vegalite/v4/api.py in to_dict(self, *args, **kwargs)
361 copy = self.copy(deep=False)
362 original_data = getattr(copy, "data", Undefined)
--> 363 copy.data = _prepare_data(original_data, context)
364
365 if original_data is not Undefined:
12 frames
/usr/local/lib/python3.7/dist-packages/altair/vegalite/v4/api.py in _prepare_data(data, context)
82 # convert dataframes or objects with __geo_interface__ to dict
83 if isinstance(data, pd.DataFrame) or hasattr(data, "__geo_interface__"):
---> 84 data = _pipe(data, data_transformers.get())
85
86 # convert string input to a URLData
/usr/local/lib/python3.7/dist-packages/toolz/functoolz.py in pipe(data, *funcs)
625 """
626 for func in funcs:
--> 627 data = func(data)
628 return data
629
/usr/local/lib/python3.7/dist-packages/toolz/functoolz.py in __call__(self, *args, **kwargs)
301 def __call__(self, *args, **kwargs):
302 try:
--> 303 return self._partial(*args, **kwargs)
304 except TypeError as exc:
305 if self._should_curry(args, kwargs, exc):
/usr/local/lib/python3.7/dist-packages/altair/vegalite/data.py in default_data_transformer(data, max_rows)
17 @curried.curry
18 def default_data_transformer(data, max_rows=5000):
---> 19 return curried.pipe(data, limit_rows(max_rows=max_rows), to_values)
20
21
/usr/local/lib/python3.7/dist-packages/toolz/functoolz.py in pipe(data, *funcs)
625 """
626 for func in funcs:
--> 627 data = func(data)
628 return data
629
/usr/local/lib/python3.7/dist-packages/toolz/functoolz.py in __call__(self, *args, **kwargs)
301 def __call__(self, *args, **kwargs):
302 try:
--> 303 return self._partial(*args, **kwargs)
304 except TypeError as exc:
305 if self._should_curry(args, kwargs, exc):
/usr/local/lib/python3.7/dist-packages/altair/utils/data.py in to_values(data)
147 return {"values": data}
148 elif isinstance(data, pd.DataFrame):
--> 149 data = sanitize_dataframe(data)
150 return {"values": data.to_dict(orient="records")}
151 elif isinstance(data, dict):
/usr/local/lib/python3.7/dist-packages/altair/utils/core.py in sanitize_dataframe(df)
346 # Convert numpy arrays saved as objects to lists
347 # Arrays are not JSON serializable
--> 348 col = df[col_name].apply(to_list_if_array, convert_dtype=False)
349 df[col_name] = col.where(col.notnull(), None)
350 return df
/usr/local/lib/python3.7/dist-packages/pandas/core/frame.py in apply(self, func, axis, raw, result_type, args, **kwds)
7550 kwds=kwds,
7551 )
-> 7552 return op.get_result()
7553
7554 def applymap(self, func) -> "DataFrame":
/usr/local/lib/python3.7/dist-packages/pandas/core/apply.py in get_result(self)
183 return self.apply_raw()
184
--> 185 return self.apply_standard()
186
187 def apply_empty_result(self):
/usr/local/lib/python3.7/dist-packages/pandas/core/apply.py in apply_standard(self)
274
275 def apply_standard(self):
--> 276 results, res_index = self.apply_series_generator()
277
278 # wrap results
/usr/local/lib/python3.7/dist-packages/pandas/core/apply.py in apply_series_generator(self)
303 for i, v in enumerate(series_gen):
304 # ignore SettingWithCopy here in case the user mutates
--> 305 results[i] = self.f(v)
306 if isinstance(results[i], ABCSeries):
307 # If we have a view on v, we need to make a copy because
/usr/local/lib/python3.7/dist-packages/pandas/core/apply.py in f(x)
112
113 def f(x):
--> 114 return func(x, *args, **kwds)
115
116 else:
TypeError: to_list_if_array() got an unexpected keyword argument 'convert_dtype'
The data sheet being loaded is this one
Upvotes: 3
Views: 2374
Reputation: 49054
The following works fine for me:
import altair as alt
import pandas as pd
df = pd.read_csv('https://docs.google.com/spreadsheets/d/1ZSZF7iX8jDcgP_uMpgy6vaQagLXzXmg4dqZCG4h-sOc/export?format=csv#gid=0')
# Encode some columns as numeric types
df['Victims'] = pd.to_numeric(df['Victims'], errors='coerce')
df['Starting year'] = pd.to_numeric(df['Starting year'], errors='coerce')
# Plot
alt.Chart(df).mark_point().encode(
x=alt.X('Starting year'),
y=alt.Y('Victims'),
)
Upvotes: 1