Reputation: 33998
it looks like .append is deprecated now
The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.
I am trying to get tweets with tweepy (more than 100), so I use Paginator, however I am not sure how to properly append/concat rows to the pandas dataframe
paginator = tweepy.Paginator(
client.search_recent_tweets, # The method you want to use
"#publictransport -is:retweet", # Some argument for this method
max_results=100 # How many tweets asked per request
)
import pandas as pd
df = pd.DataFrame()
for tweet in paginator.flatten(limit=1000): # Total number of tweets to retrieve
df2 = df.append({'Tweet':tweet}, ignore_index = True)
I get this error:
df2.head(5)
---------------------------------------------------------------------------
StopIteration Traceback (most recent call last)
File /anaconda/envs/GPSAnalysis/lib/python3.9/site-packages/IPython/core/formatters.py:707, in PlainTextFormatter.__call__(self, obj)
700 stream = StringIO()
701 printer = pretty.RepresentationPrinter(stream, self.verbose,
702 self.max_width, self.newline,
703 max_seq_length=self.max_seq_length,
704 singleton_pprinters=self.singleton_printers,
705 type_pprinters=self.type_printers,
706 deferred_pprinters=self.deferred_printers)
--> 707 printer.pretty(obj)
708 printer.flush()
709 return stream.getvalue()
File /anaconda/envs/GPSAnalysis/lib/python3.9/site-packages/IPython/lib/pretty.py:410, in RepresentationPrinter.pretty(self, obj)
407 return meth(obj, self, cycle)
408 if cls is not object \
409 and callable(cls.__dict__.get('__repr__')):
--> 410 return _repr_pprint(obj, self, cycle)
412 return _default_pprint(obj, self, cycle)
413 finally:
File /anaconda/envs/GPSAnalysis/lib/python3.9/site-packages/IPython/lib/pretty.py:778, in _repr_pprint(obj, p, cycle)
776 """A pprint that just redirects to the normal repr function."""
777 # Find newlines and replace them with p.break_()
--> 778 output = repr(obj)
779 lines = output.splitlines()
780 with p.group():
File /anaconda/envs/GPSAnalysis/lib/python3.9/site-packages/pandas/core/frame.py:1011, in DataFrame.__repr__(self)
1008 return buf.getvalue()
1010 repr_params = fmt.get_dataframe_repr_params()
-> 1011 return self.to_string(**repr_params)
File /anaconda/envs/GPSAnalysis/lib/python3.9/site-packages/pandas/core/frame.py:1192, in DataFrame.to_string(self, buf, columns, col_space, header, index, na_rep, formatters, float_format, sparsify, index_names, justify, max_rows, max_cols, show_dimensions, decimal, line_width, min_rows, max_colwidth, encoding)
1173 with option_context("display.max_colwidth", max_colwidth):
1174 formatter = fmt.DataFrameFormatter(
1175 self,
1176 columns=columns,
(...)
1190 decimal=decimal,
1191 )
-> 1192 return fmt.DataFrameRenderer(formatter).to_string(
1193 buf=buf,
1194 encoding=encoding,
1195 line_width=line_width,
1196 )
File /anaconda/envs/GPSAnalysis/lib/python3.9/site-packages/pandas/io/formats/format.py:1128, in DataFrameRenderer.to_string(self, buf, encoding, line_width)
1125 from pandas.io.formats.string import StringFormatter
1127 string_formatter = StringFormatter(self.fmt, line_width=line_width)
-> 1128 string = string_formatter.to_string()
1129 return save_to_buffer(string, buf=buf, encoding=encoding)
File /anaconda/envs/GPSAnalysis/lib/python3.9/site-packages/pandas/io/formats/string.py:25, in StringFormatter.to_string(self)
24 def to_string(self) -> str:
---> 25 text = self._get_string_representation()
26 if self.fmt.should_show_dimensions:
27 text = "".join([text, self.fmt.dimensions_info])
File /anaconda/envs/GPSAnalysis/lib/python3.9/site-packages/pandas/io/formats/string.py:40, in StringFormatter._get_string_representation(self)
37 if self.fmt.frame.empty:
38 return self._empty_info_line
---> 40 strcols = self._get_strcols()
42 if self.line_width is None:
43 # no need to wrap around just print the whole frame
44 return self.adj.adjoin(1, *strcols)
File /anaconda/envs/GPSAnalysis/lib/python3.9/site-packages/pandas/io/formats/string.py:31, in StringFormatter._get_strcols(self)
30 def _get_strcols(self) -> list[list[str]]:
---> 31 strcols = self.fmt.get_strcols()
32 if self.fmt.is_truncated:
33 strcols = self._insert_dot_separators(strcols)
File /anaconda/envs/GPSAnalysis/lib/python3.9/site-packages/pandas/io/formats/format.py:611, in DataFrameFormatter.get_strcols(self)
607 def get_strcols(self) -> list[list[str]]:
608 """
609 Render a DataFrame to a list of columns (as lists of strings).
610 """
--> 611 strcols = self._get_strcols_without_index()
613 if self.index:
614 str_index = self._get_formatted_index(self.tr_frame)
File /anaconda/envs/GPSAnalysis/lib/python3.9/site-packages/pandas/io/formats/format.py:875, in DataFrameFormatter._get_strcols_without_index(self)
871 cheader = str_columns[i]
872 header_colwidth = max(
873 int(self.col_space.get(c, 0)), *(self.adj.len(x) for x in cheader)
874 )
--> 875 fmt_values = self.format_col(i)
876 fmt_values = _make_fixed_width(
877 fmt_values, self.justify, minimum=header_colwidth, adj=self.adj
878 )
880 max_len = max(max(self.adj.len(x) for x in fmt_values), header_colwidth)
File /anaconda/envs/GPSAnalysis/lib/python3.9/site-packages/pandas/io/formats/format.py:889, in DataFrameFormatter.format_col(self, i)
887 frame = self.tr_frame
888 formatter = self._get_formatter(i)
--> 889 return format_array(
890 frame.iloc[:, i]._values,
891 formatter,
892 float_format=self.float_format,
893 na_rep=self.na_rep,
894 space=self.col_space.get(frame.columns[i]),
895 decimal=self.decimal,
896 leading_space=self.index,
897 )
File /anaconda/envs/GPSAnalysis/lib/python3.9/site-packages/pandas/io/formats/format.py:1316, in format_array(values, formatter, float_format, na_rep, digits, space, justify, decimal, leading_space, quoting)
1301 digits = get_option("display.precision")
1303 fmt_obj = fmt_klass(
1304 values,
1305 digits=digits,
(...)
1313 quoting=quoting,
1314 )
-> 1316 return fmt_obj.get_result()
File /anaconda/envs/GPSAnalysis/lib/python3.9/site-packages/pandas/io/formats/format.py:1347, in GenericArrayFormatter.get_result(self)
1346 def get_result(self) -> list[str]:
-> 1347 fmt_values = self._format_strings()
1348 return _make_fixed_width(fmt_values, self.justify)
File /anaconda/envs/GPSAnalysis/lib/python3.9/site-packages/pandas/io/formats/format.py:1410, in GenericArrayFormatter._format_strings(self)
1408 for i, v in enumerate(vals):
1409 if not is_float_type[i] and leading_space:
-> 1410 fmt_values.append(f" {_format(v)}")
1411 elif is_float_type[i]:
1412 fmt_values.append(float_format(v))
File /anaconda/envs/GPSAnalysis/lib/python3.9/site-packages/pandas/io/formats/format.py:1390, in GenericArrayFormatter._format_strings.<locals>._format(x)
1387 return str(x)
1388 else:
1389 # object dtype
-> 1390 return str(formatter(x))
File /anaconda/envs/GPSAnalysis/lib/python3.9/site-packages/pandas/io/formats/printing.py:222, in pprint_thing(thing, _nest_lvl, escape_chars, default_escapes, quote_strings, max_seq_items)
218 result = _pprint_dict(
219 thing, _nest_lvl, quote_strings=True, max_seq_items=max_seq_items
220 )
221 elif is_sequence(thing) and _nest_lvl < get_option("display.pprint_nest_depth"):
--> 222 result = _pprint_seq(
223 thing,
224 _nest_lvl,
225 escape_chars=escape_chars,
226 quote_strings=quote_strings,
227 max_seq_items=max_seq_items,
228 )
229 elif isinstance(thing, str) and quote_strings:
230 result = f"'{as_escaped_string(thing)}'"
File /anaconda/envs/GPSAnalysis/lib/python3.9/site-packages/pandas/io/formats/printing.py:119, in _pprint_seq(seq, _nest_lvl, max_seq_items, **kwds)
117 s = iter(seq)
118 # handle sets, no slicing
--> 119 r = [
120 pprint_thing(next(s), _nest_lvl + 1, max_seq_items=max_seq_items, **kwds)
121 for i in range(min(nitems, len(seq)))
122 ]
123 body = ", ".join(r)
125 if nitems < len(seq):
File /anaconda/envs/GPSAnalysis/lib/python3.9/site-packages/pandas/io/formats/printing.py:120, in <listcomp>(.0)
117 s = iter(seq)
118 # handle sets, no slicing
119 r = [
--> 120 pprint_thing(next(s), _nest_lvl + 1, max_seq_items=max_seq_items, **kwds)
121 for i in range(min(nitems, len(seq)))
122 ]
123 body = ", ".join(r)
125 if nitems < len(seq):
StopIteration:
Upvotes: 0
Views: 483
Reputation: 13478
I can't reproduce your error, so I am walking blind here, but here is one way to do it like you asked:
df = pd.concat(
[pd.DataFrame({"Tweet": [tweet]}) for tweet in paginator.flatten(limit=1000)]
).reset_index(drop=True)
Although you do not need pd.concat
or append
to achieve the same result:
df = pd.DataFrame({"Tweets": [tweet for tweet in paginator.flatten(limit=1000)]})
Upvotes: 1