opening a zipped excel file with pandas

Question

I am trying to open a zipped excel file with pandas

When I try

import pandas as pd
import zipfile
from urllib.request import urlopen
import io

url = 'https://www.cftc.gov/files/dea/history/fut_disagg_xls_2020.zip'
file =zipfile.ZipFile((io.BytesIO(urlopen(url).read())))
file_name = file.namelist()[0]
pd.read_excel(file.open(file_name))

I get an UnsupportedOperation: seek error. Any ideas how to read this file?

EDIT

Here is the trace:

UnsupportedOperation                      Traceback (most recent call last)
 in 
      7 file =zipfile.ZipFile((io.BytesIO(urlopen(url).read())))
      8 file_name = file.namelist()[0]
----> 9 pd.read_excel(file.open(file_name))

~/anaconda3/envs/p/lib/python3.6/site-packages/pandas/util/_decorators.py in wrapper(*args, **kwargs)
    294                 )
    295                 warnings.warn(msg, FutureWarning, stacklevel=stacklevel)
--> 296             return func(*args, **kwargs)
    297 
    298         return wrapper

~/anaconda3/envs/p/lib/python3.6/site-packages/pandas/io/excel/_base.py in read_excel(io, sheet_name, header, names, index_col, usecols, squeeze, dtype, engine, converters, true_values, false_values, skiprows, nrows, na_values, keep_default_na, na_filter, verbose, parse_dates, date_parser, thousands, comment, skipfooter, convert_float, mangle_dupe_cols)
    302 
    303     if not isinstance(io, ExcelFile):
--> 304         io = ExcelFile(io, engine=engine)
    305     elif engine and engine != io.engine:
    306         raise ValueError(

~/anaconda3/envs/p/lib/python3.6/site-packages/pandas/io/excel/_base.py in __init__(self, path_or_buffer, engine)
    849             engine = "xlrd"
    850             if isinstance(path_or_buffer, (BufferedIOBase, RawIOBase)):
--> 851                 if _is_ods_stream(path_or_buffer):
    852                     engine = "odf"
    853             else:

~/anaconda3/envs/p/lib/python3.6/site-packages/pandas/io/excel/_base.py in _is_ods_stream(stream)
    798         Boolean indication that this is indeed an ODS file or not
    799     """
--> 800     stream.seek(0)
    801     is_ods = False
    802     if stream.read(4) == b"PK\003\004":

UnsupportedOperation: seek

Andrej Kesely · Accepted Answer

The issue is in Python's version. In Python3.8 the script works as is. In Python3.6, add .read() in pd.read_excel():

import pandas as pd
import zipfile
from urllib.request import urlopen
import io

url = 'https://www.cftc.gov/files/dea/history/fut_disagg_xls_2020.zip'
file =zipfile.ZipFile((io.BytesIO(urlopen(url).read())))
file_name = file.namelist()[0]
df = pd.read_excel(file.open(file_name).read())  # <-- add .read()
print(df)

Prints:

                              Market_and_Exchange_Names  As_of_Date_In_Form_YYMMDD Report_Date_as_MM_DD_YYYY  ...                Contract_Units CFTC_SubGroup_Code  FutOnly_or_Combined
0                    WHEAT-SRW - CHICAGO BOARD OF TRADE                     201013                2020-10-13  ...  (CONTRACTS OF 5,000 BUSHELS)                A10              FutOnly
1                    WHEAT-SRW - CHICAGO BOARD OF TRADE                     201006                2020-10-06  ...  (CONTRACTS OF 5,000 BUSHELS)                A10              FutOnly
2                    WHEAT-SRW - CHICAGO BOARD OF TRADE                     200929                2020-09-29  ...  (CONTRACTS OF 5,000 BUSHELS)                A10              FutOnly
3                    WHEAT-SRW - CHICAGO BOARD OF TRADE                     200922                2020-09-22  ...  (CONTRACTS OF 5,000 BUSHELS)                A10              FutOnly
4                    WHEAT-SRW - CHICAGO BOARD OF TRADE                     200915                2020-09-15  ...  (CONTRACTS OF 5,000 BUSHELS)                A10              FutOnly
...                                                 ...                        ...                       ...  ...                           ...                ...                  ...
8475  MINI JAPAN C&F NAPHTHA - NEW YORK MERCANTILE E...                     200901                2020-09-01  ...             (100 METRIC TONS)                N10              FutOnly
8476  MINI JAPAN C&F NAPHTHA - NEW YORK MERCANTILE E...                     200825                2020-08-25  ...             (100 METRIC TONS)                N10              FutOnly
8477  MINI JAPAN C&F NAPHTHA - NEW YORK MERCANTILE E...                     200818                2020-08-18  ...             (100 METRIC TONS)                N10              FutOnly
8478  MINI JAPAN C&F NAPHTHA - NEW YORK MERCANTILE E...                     200811                2020-08-11  ...             (100 METRIC TONS)                N10              FutOnly
8479  MINI JAPAN C&F NAPHTHA - NEW YORK MERCANTILE E...                     200728                2020-07-28  ...             (100 METRIC TONS)                N10              FutOnly

[8480 rows x 188 columns]

opening a zipped excel file with pandas

Answers (1)

Related Questions