"Bad Request" when Pandas wants to read from google sheet?

Question

I used to have a working code. It reads a google sheet into pandas and then does stuff with the data.

It worked very well, until recently.

The google sheet has around 450 lines, but it seems the google api rejects the request now. Can anybody help with what is wrong here?

import pandas as pd

google_id = r"my_google_id"
front_boilerplate = r"https://docs.google.com/spreadsheets/d/"
back_boilerplate = r"/export?format=xlsx"

sheet_name  = front_boilerplate + google_id + back_boilerplate
df = pd.read_excel(sheet_name)

I get "Bad Request" as result. Which is strange, because last time I executed the code (a month ago) it worked perfectly well. Was there any change in pandas or the google API which leads to the rejection of this code?

For anybody wanting to see the stack trace:

Traceback (most recent call last):

  File "card_printer.py", line 1040, in 
    cards = read_cards(google_sheet)

  File "card_printer.py", line 90, in read_cards
    in_pd = read_excel(file, engine='openpyxl')

  File "C:\Users\wagen\Anaconda3\lib\site-packages\pandas\util\_decorators.py", line 296, in wrapper
    return func(*args, **kwargs)

  File "C:\Users\wagen\Anaconda3\lib\site-packages\pandas\io\excel\_base.py", line 304, in read_excel
    io = ExcelFile(io, engine=engine)

  File "C:\Users\wagen\Anaconda3\lib\site-packages\pandas\io\excel\_base.py", line 867, in __init__
    self._reader = self._engines[engine](self._io)

  File "C:\Users\wagen\Anaconda3\lib\site-packages\pandas\io\excel\_openpyxl.py", line 480, in __init__
    super().__init__(filepath_or_buffer)

  File "C:\Users\wagen\Anaconda3\lib\site-packages\pandas\io\excel\_base.py", line 342, in __init__
    filepath_or_buffer = BytesIO(urlopen(filepath_or_buffer).read())

  File "C:\Users\wagen\Anaconda3\lib\site-packages\pandas\io\common.py", line 137, in urlopen
    return urllib.request.urlopen(*args, **kwargs)

  File "C:\Users\wagen\Anaconda3\lib\urllib\request.py", line 222, in urlopen
    return opener.open(url, data, timeout)

  File "C:\Users\wagen\Anaconda3\lib\urllib\request.py", line 531, in open
    response = meth(req, response)

  File "C:\Users\wagen\Anaconda3\lib\urllib\request.py", line 641, in http_response
    'http', request, response, code, msg, hdrs)

  File "C:\Users\wagen\Anaconda3\lib\urllib\request.py", line 563, in error
    result = self._call_chain(*args)

  File "C:\Users\wagen\Anaconda3\lib\urllib\request.py", line 503, in _call_chain
    result = func(*args)

  File "C:\Users\wagen\Anaconda3\lib\urllib\request.py", line 755, in http_error_302
    return self.parent.open(new, timeout=req.timeout)

  File "C:\Users\wagen\Anaconda3\lib\urllib\request.py", line 531, in open
    response = meth(req, response)

  File "C:\Users\wagen\Anaconda3\lib\urllib\request.py", line 641, in http_response
    'http', request, response, code, msg, hdrs)

  File "C:\Users\wagen\Anaconda3\lib\urllib\request.py", line 569, in error
    return self._call_chain(*args)

  File "C:\Users\wagen\Anaconda3\lib\urllib\request.py", line 503, in _call_chain
    result = func(*args)

  File "C:\Users\wagen\Anaconda3\lib\urllib\request.py", line 649, in http_error_default
    raise HTTPError(req.full_url, code, msg, hdrs, fp)

HTTPError: Bad Request

Rob Raymond · Accepted Answer

You can always use the API

jupyter notebook

from google import drive
mydrive = drive.drive()
mydrive.sheetdf(mydrive.findItem("Blood pressure",mime="application/vnd.google-apps.spreadsheet"),  range="Sheet1!A1:D" )

drive.py

from apiclient import discovery
from oauth2client import client, tools
from oauth2client.file import Storage
from pathlib import Path
import pandas as pd

# install pre-requisites
# pip3 install httplib2
# pip3 install google-api-python-client
# pip3 install oauth2client

# setup / download client secret file here
# https://console.developers.google.com/apis/credentials

SCOPES = ['https://www.googleapis.com/auth/spreadsheets', 'https://www.googleapis.com/auth/drive']

class drive(object):
    driveservice = None
    sheetservice = None

    def __init__(self, appname="googleclient") -> None:
        self.__connectToService(appname=appname)

    def __connectToService(self, appname=""):
        credentials = self.__get_credentials(appname=appname)
        self.driveservice = discovery.build('drive', 'v3', credentials=credentials)
        self.sheetservice = discovery.build('sheets', 'v4', credentials=credentials)

    def __get_credentials(self, appname=""):
        credential_dir = Path.home().joinpath('.credentials')
        if not credential_dir.exists(): credential_dir.mkdir()

        store = Storage(credential_dir.joinpath(f'{appname}.json'))
        credentials = store.get()
        if not credentials or credentials.invalid:
            cs = [p for p in Path.cwd().glob("**/client*.json")][0]
            flow = client.flow_from_clientsecrets(cs, SCOPES)
            flow.user_agent = appname
            # NB this fails with Safari, set Chrome as default for this to work
            credentials = tools.run_flow(flow, store, None)
        return credentials

    def findItem(self, name="", mime="application/vnd.google-apps.folder") -> str:
        name = name.replace("'", "\'") if "'" in name else name
        id = []
        page_token = None
        while True:
            response = self.driveservice.files().list(
                q=f"name='{name}' and trashed=false and mimeType='{mime}'",
                spaces='drive',
                fields='nextPageToken, files(id, name, mimeType)',
                pageToken=page_token).execute()
            for file in response.get('files', []):
                id.append(file["id"])
            page_token = response.get('nextPageToken', None)
            if page_token is None:
                break

        if id == []:  # not found
            id.append("")

        return id[0]

    def sheetdf(self, id, range="Sheet1!A1:Z"):
        df = pd.DataFrame(self.sheetservice.spreadsheets().values().get(
            spreadsheetId=id, range=range
        ).execute()["values"])
        df.columns = df.iloc[0]
        df.drop(0, inplace=True)
        return df


if __name__ == '__main__':
    mydrive = drive()

    df = mydrive.sheetdf(mydrive.findItem(
        "Blood pressure",mime="application/vnd.google-apps.spreadsheet"),  range="Sheet1!A1:D" )
    print(df.tail(5))

"Bad Request" when Pandas wants to read from google sheet?

Answers (2)

jupyter notebook

drive.py

Reference:

Related Questions

&quot;Bad Request&quot; when Pandas wants to read from google sheet?

Answers (2)

jupyter notebook

drive.py

Reference:

Related Questions

"Bad Request" when Pandas wants to read from google sheet?