Reputation: 644
I'm trying to read an o365 excel file into a pandas dataframe for analysis. I'm able to connect and authenticate, however am getting the error: "Unsupported format, or corrupt file: Expected BOF record; found b'\r\n<!DOCT' "
Some googling of the error showed that this can be an encoding issue, or that xlrd is misinterpreting the file as being encrypted. However, none of the solutions I found apply to my exact scenario, trying to read from o365 into pandas.
I know this is possible, does anybody see anything inherently wrong with my method of reading the spreadsheet?
Code:
from office365.runtime.auth.authentication_context import AuthenticationContext
from office365.sharepoint.client_context import ClientContext
from office365.sharepoint.files.file import File
import io
from xlrd import *
import pandas as pd
url = 'https://somedomain.somesite.com/:x:/r/sites/IT/_layouts/15/guestaccess.aspx?e=4%3Ay8lZaY&at=9&CID=0CEFB96F-C585-4B93-95D8-7B9161922C05&wdLOR=c2549E09D-B403-4600-9D64-4E3AFD70A2D3&share=EbCMUuWuEsRJpPItV4SAhHQBum7Fe0ISfki4Na-k0VIlsA'
username = '[email protected]'
password = 'fakepw'
def pullO365():
ctx_auth = AuthenticationContext(url)
if ctx_auth.acquire_token_for_user(username, password):
ctx = ClientContext(url, ctx_auth)
web = ctx.web
ctx.load(web)
ctx.execute_query()
print("O365 authentication successful")
else:
print("O365 authentication failed.")
response = File.open_binary(ctx, url)
#save data to BytesIO stream
bytes_file_obj = io.BytesIO()
bytes_file_obj.write(response.content)
bytes_file_obj.seek(0) #set file object to start
#read excel file and each sheet into pandas dataframe
normResults = pd.read_excel(bytes_file_obj, sheet_name=None, usecols="H,G,I,J,F")
df = pd.DataFrame(normResults)
return df
Upvotes: 1
Views: 1276
Reputation: 230
This worked for me. Don't fill spaces with special characters for your folder names.
from shareplum import Site
from shareplum import Office365
from shareplum.site import Version
authcookie = Office365('https://<organization>.sharepoint.com/', username='<your username>', password='<your pw>').GetCookies()
site = Site('https://<organization>.sharepoint.com/teams/<team name>', version=Version.v365, authcookie=authcookie)
#version=Version.v2016,
folder = site.Folder('Shared Documents/<Folder>/<Subfolder>/<Subfolder>')
file = folder.get_file('xxxxx.csv')
with open("xxxxx.csv", "wb") as fh:
fh.write(file)
print('---')
folder.upload_file('xlsx', 'xxxxx.xlsx')
Upvotes: 1