Reputation: 700
I used to have a working code. It reads a google sheet into pandas and then does stuff with the data.
It worked very well, until recently.
The google sheet has around 450 lines, but it seems the google api rejects the request now. Can anybody help with what is wrong here?
import pandas as pd
google_id = r"my_google_id"
front_boilerplate = r"https://docs.google.com/spreadsheets/d/"
back_boilerplate = r"/export?format=xlsx"
sheet_name = front_boilerplate + google_id + back_boilerplate
df = pd.read_excel(sheet_name)
I get "Bad Request" as result. Which is strange, because last time I executed the code (a month ago) it worked perfectly well. Was there any change in pandas or the google API which leads to the rejection of this code?
For anybody wanting to see the stack trace:
Traceback (most recent call last):
File "card_printer.py", line 1040, in <module>
cards = read_cards(google_sheet)
File "card_printer.py", line 90, in read_cards
in_pd = read_excel(file, engine='openpyxl')
File "C:\Users\wagen\Anaconda3\lib\site-packages\pandas\util\_decorators.py", line 296, in wrapper
return func(*args, **kwargs)
File "C:\Users\wagen\Anaconda3\lib\site-packages\pandas\io\excel\_base.py", line 304, in read_excel
io = ExcelFile(io, engine=engine)
File "C:\Users\wagen\Anaconda3\lib\site-packages\pandas\io\excel\_base.py", line 867, in __init__
self._reader = self._engines[engine](self._io)
File "C:\Users\wagen\Anaconda3\lib\site-packages\pandas\io\excel\_openpyxl.py", line 480, in __init__
super().__init__(filepath_or_buffer)
File "C:\Users\wagen\Anaconda3\lib\site-packages\pandas\io\excel\_base.py", line 342, in __init__
filepath_or_buffer = BytesIO(urlopen(filepath_or_buffer).read())
File "C:\Users\wagen\Anaconda3\lib\site-packages\pandas\io\common.py", line 137, in urlopen
return urllib.request.urlopen(*args, **kwargs)
File "C:\Users\wagen\Anaconda3\lib\urllib\request.py", line 222, in urlopen
return opener.open(url, data, timeout)
File "C:\Users\wagen\Anaconda3\lib\urllib\request.py", line 531, in open
response = meth(req, response)
File "C:\Users\wagen\Anaconda3\lib\urllib\request.py", line 641, in http_response
'http', request, response, code, msg, hdrs)
File "C:\Users\wagen\Anaconda3\lib\urllib\request.py", line 563, in error
result = self._call_chain(*args)
File "C:\Users\wagen\Anaconda3\lib\urllib\request.py", line 503, in _call_chain
result = func(*args)
File "C:\Users\wagen\Anaconda3\lib\urllib\request.py", line 755, in http_error_302
return self.parent.open(new, timeout=req.timeout)
File "C:\Users\wagen\Anaconda3\lib\urllib\request.py", line 531, in open
response = meth(req, response)
File "C:\Users\wagen\Anaconda3\lib\urllib\request.py", line 641, in http_response
'http', request, response, code, msg, hdrs)
File "C:\Users\wagen\Anaconda3\lib\urllib\request.py", line 569, in error
return self._call_chain(*args)
File "C:\Users\wagen\Anaconda3\lib\urllib\request.py", line 503, in _call_chain
result = func(*args)
File "C:\Users\wagen\Anaconda3\lib\urllib\request.py", line 649, in http_error_default
raise HTTPError(req.full_url, code, msg, hdrs, fp)
HTTPError: Bad Request
Upvotes: 0
Views: 980
Reputation: 31226
You can always use the API
from google import drive
mydrive = drive.drive()
mydrive.sheetdf(mydrive.findItem("Blood pressure",mime="application/vnd.google-apps.spreadsheet"), range="Sheet1!A1:D" )
from apiclient import discovery
from oauth2client import client, tools
from oauth2client.file import Storage
from pathlib import Path
import pandas as pd
# install pre-requisites
# pip3 install httplib2
# pip3 install google-api-python-client
# pip3 install oauth2client
# setup / download client secret file here
# https://console.developers.google.com/apis/credentials
SCOPES = ['https://www.googleapis.com/auth/spreadsheets', 'https://www.googleapis.com/auth/drive']
class drive(object):
driveservice = None
sheetservice = None
def __init__(self, appname="googleclient") -> None:
self.__connectToService(appname=appname)
def __connectToService(self, appname=""):
credentials = self.__get_credentials(appname=appname)
self.driveservice = discovery.build('drive', 'v3', credentials=credentials)
self.sheetservice = discovery.build('sheets', 'v4', credentials=credentials)
def __get_credentials(self, appname=""):
credential_dir = Path.home().joinpath('.credentials')
if not credential_dir.exists(): credential_dir.mkdir()
store = Storage(credential_dir.joinpath(f'{appname}.json'))
credentials = store.get()
if not credentials or credentials.invalid:
cs = [p for p in Path.cwd().glob("**/client*.json")][0]
flow = client.flow_from_clientsecrets(cs, SCOPES)
flow.user_agent = appname
# NB this fails with Safari, set Chrome as default for this to work
credentials = tools.run_flow(flow, store, None)
return credentials
def findItem(self, name="", mime="application/vnd.google-apps.folder") -> str:
name = name.replace("'", "\\'") if "'" in name else name
id = []
page_token = None
while True:
response = self.driveservice.files().list(
q=f"name='{name}' and trashed=false and mimeType='{mime}'",
spaces='drive',
fields='nextPageToken, files(id, name, mimeType)',
pageToken=page_token).execute()
for file in response.get('files', []):
id.append(file["id"])
page_token = response.get('nextPageToken', None)
if page_token is None:
break
if id == []: # not found
id.append("")
return id[0]
def sheetdf(self, id, range="Sheet1!A1:Z"):
df = pd.DataFrame(self.sheetservice.spreadsheets().values().get(
spreadsheetId=id, range=range
).execute()["values"])
df.columns = df.iloc[0]
df.drop(0, inplace=True)
return df
if __name__ == '__main__':
mydrive = drive()
df = mydrive.sheetdf(mydrive.findItem(
"Blood pressure",mime="application/vnd.google-apps.spreadsheet"), range="Sheet1!A1:D" )
print(df.tail(5))
Upvotes: 2
Reputation: 494
import gspread
import pandas as pd
gc = gspread.service_account(filename='credentials.json')
book = gc.open_by_url(url)
You now have an authenticated connection to the google sheet you are trying to get a df from.
Each specific sheet in the spreadsheet can be accessed through the worksheet function.
sheet = book.worksheet('name of the sheet')
You can create a list of dictionaries from the data with a sheet by using this function
records = sheet.get_all_records()
And now a dataframe
df = pd.DataFrame(records)
There's a few steps to getting yourself a credentials file capable of authenticating. This video goes through them pretty clearly.
Upvotes: 1