Reputation: 139
I am attempting to create a tool for interfacing with Sharepoint to start transitioning a number of old processes away from using Sharepoint as a relational database and move that data into an actual database and automate several manual tasks among other things.
The environment I am working with is a Sharepoint 2013 server using NTLM authentication. I need to use python3 to be able to use a set of libraries that will later be used to consume the data. Python is running via Anaconda 64 bit. I have no administrative power over the sharepoint server at all, just the ability to read and change information of certain pages.
The problem I am running into is authenticating using NTLM and the Sharepoint module together. The following snippet works just fine for pulling the HTML from the site as if it were a browser:
from <<password management library>> import KEY
from requests_ntlm import HttpNtlmAuth
import requests,getpass
#Configuration
domain='domain'
srv="sharepoint.company.com"
creds=KEY(srv,getpass.getuser()) #Secure credential retrieval
user='{0}\\{1}'.format(domain,creds.USERNAME).upper()
srvaddr='https://{0}'.format(srv)
site='/sites/path/to/site/'
site_url='{0}{1}'.format(srvaddr,site)
#Auth via Requests_NTLM
opener=HttpNtlmAuth(user,creds.getpass())
#Interface via Requests
s = sharepoint.SharePointSite(site_url, opener)
r = requests.get(srvaddr, auth=opener)
Using the documentation, source code provided in the sharepoint library's documentation and a few Stackoverflow posts (that refer to python 2 libraries), I attempted to replicate this using the sharepoint module. While the above is able to successfully authenticate, the following snippet gets a 401 Unauthorized Error:
import sharepoint,requests,getpass,urllib
from ntlm3.HTTPNtlmAuthHandler import HTTPNtlmAuthHandler
from <<password management library>> import KEY
from requests_ntlm import HttpNtlmAuth
from urllib.request import BaseHandler, HTTPPasswordMgrWithDefaultRealm, build_opener
#Configuration
domain='domain'
srv="sharepoint.company.com"
creds=KEY(srv,getpass.getuser()) #Secure credential retrieval
user='{0}\\{1}'.format(domain,creds.USERNAME).upper()
srvaddr='https://{0}'.format(srv)
site='/sites/path/to/site/'
site_url='{0}{1}'.format(srvaddr,site)
#Auth via urllib Opener
def basic_auth_opener(url, username, password):
password_manager = HTTPPasswordMgrWithDefaultRealm()
password_manager.add_password(None, url, username, password)
auth_handler = HTTPNtlmAuthHandler(password_manager) #PreemptiveBasicAuthHandler(password_manager)
opener = build_opener(auth_handler)
return opener
print('Auth as {0}@{1}'.format(user,srvaddr))
opener = basic_auth_opener(srvaddr, user, creds.getpass())
#urllib.request.install_opener(opener)
print('Open {0}'.format(site_url))
#Interface via Sharepoint module
s = sharepoint.SharePointSite(site_url, opener)
for sp_list in s.lists:
print (sp_list.id, sp_list.meta['Title'])
I know that the credentials work and are authorized to access the site since the first snippet works just fine. The second simply gets a 401 error and I have been diving into the inner workings of urllib, requests, python-ntlm, and requests-ntlm all day trying to understand why one works but the other does not.
Error stack for reference:
HTTPErrorTraceback (most recent call last)
<ipython-input-41-af721d5620e7> in <module>()
31 #Interface via Sharepoint module
32 s = sharepoint.SharePointSite(site_url, opener)
---> 33 for sp_list in s.lists:
34 print (sp_list.id, sp_list.meta['Title'])
C:\Anaconda3\lib\site-packages\sharepoint\lists\__init__.py in __iter__(self)
78
79 def __iter__(self):
---> 80 return iter(self.all_lists)
81
82 def __getitem__(self, key):
C:\Anaconda3\lib\site-packages\sharepoint\lists\__init__.py in all_lists(self)
34 if not hasattr(self, '_all_lists'):
35 xml = SP.GetListCollection()
---> 36 result = self.opener.post_soap(LIST_WEBSERVICE, xml)
37
38 self._all_lists = []
C:\Anaconda3\lib\site-packages\sharepoint\site.py in post_soap(self, url, xml, soapaction)
30 if soapaction:
31 request.add_header('Soapaction', soapaction)
---> 32 response = self.opener.open(request, timeout=self.timeout)
33 return etree.parse(response).xpath('/soap:Envelope/soap:Body/*', namespaces=namespaces)[0]
34
C:\Anaconda3\lib\urllib\request.py in open(self, fullurl, data, timeout)
470 for processor in self.process_response.get(protocol, []):
471 meth = getattr(processor, meth_name)
--> 472 response = meth(req, response)
473
474 return response
C:\Anaconda3\lib\urllib\request.py in http_response(self, request, response)
580 if not (200 <= code < 300):
581 response = self.parent.error(
--> 582 'http', request, response, code, msg, hdrs)
583
584 return response
C:\Anaconda3\lib\urllib\request.py in error(self, proto, *args)
508 if http_err:
509 args = (dict, 'default', 'http_error_default') + orig_args
--> 510 return self._call_chain(*args)
511
512 # XXX probably also want an abstract factory that knows when it makes
C:\Anaconda3\lib\urllib\request.py in _call_chain(self, chain, kind, meth_name, *args)
442 for handler in handlers:
443 func = getattr(handler, meth_name)
--> 444 result = func(*args)
445 if result is not None:
446 return result
C:\Anaconda3\lib\urllib\request.py in http_error_default(self, req, fp, code, msg, hdrs)
588 class HTTPDefaultErrorHandler(BaseHandler):
589 def http_error_default(self, req, fp, code, msg, hdrs):
--> 590 raise HTTPError(req.full_url, code, msg, hdrs, fp)
591
592 class HTTPRedirectHandler(BaseHandler):
HTTPError: HTTP Error 401: Unauthorized
Upvotes: 1
Views: 6500
Reputation: 139
Shareplum ended up working great, with some exceptions due to the way it imported some data. Here's some rough code I ended up using to test it and later expand, censored, but enough to get someone started if they stumble across this:
from <<DATABASE LIBRARY>> import KEY,ORACLESQL
from requests_ntlm import HttpNtlmAuth
import requests,getpass,datetime,re,json
import shareplum
def date_handler(obj):
return obj.isoformat() if hasattr(obj, 'isoformat') else obj
def ppJSON(inDict):
return (json.dumps(inDict,sort_keys=True, indent=4, default=date_handler))
####### BEGIN: Monkey Patch to Shareplum ###################
def _python_type(self, key, value):
"""Returns proper type from the schema"""
try:
field_type = self._sp_cols[key]['type']
if field_type in ['Number', 'Currency']:
return float(value)
elif field_type == 'DateTime':
# Need to round datetime object
return datetime.datetime.strptime(value, '%Y-%m-%d %H:%M:%S')#.date()
elif field_type == 'Boolean':
if value == '1':
return 'Y'
elif value == '0':
return 'N'
else:
return ''
elif field_type == 'User':
# Sometimes the User no longer exists or
# has a diffrent ID number so we just remove the "123;#"
# from the beginning of their name
if value in self.users['sp']:
return self.users['sp'][value]
else:
return value.split('#')[1]
else:
return value.encode('ascii','replace').decode('ascii')
except AttributeError as e:
print(e)
return value
#Monkey Patching the datetime import to avoid truncation of data
shareplum.shareplum._List._python_type=_python_type
####### END: Monkey Patch to Shareplum ###################
class SPSite(object):
def __init__(self,username,password,server,site,listname=None,viewname=None,domain='sensenet'):
self.domain=domain
self.server=server
self.site=site
self.listname=listname
self.viewname=viewname
self.user='{0}\\{1}'.format(self.domain,username).upper()
self.opener=HttpNtlmAuth(self.user,password)
self.s=None
self.l=None
self.sql=None
self.updateMetaData()
def updateMetaData(self):
self.srvaddr='https://{0}'.format(self.server)
self.site_url='{0}{1}'.format(self.srvaddr,self.site)
self.s=shareplum.Site(self.site_url,self.opener)
if self.listname:
self.l=self.s.List(self.listname)
def showSampleData(self,view=None,*args,**kwargs):
s=self.s
#theview=view
#if not theview and not fields: theview=self.viewname
#Display Data
print('Lists in {0}'.format(self.site_url))
for i in s.GetListCollection():
print(i['Title'])
if self.listname:
l=s.List(self.listname)
print('\nViews in List {0}'.format(self.listname))
for i in l.views:
print(i)
if self.viewname:
print('\nView Fields for {0}'.format(viewname))
for i in l.GetView(viewname)['fields']:
print (i)
print('\nData in View {0}'.format(self.viewname))
for i in l.GetListItems(rowlimit=10,*args,**kwargs):
print(ppJSON(i))
Example:
#Configuration
domain='DomainName'
srv="sharepoint.site.com"
creds=KEY(srv,getpass.getuser()) #Secure credential retrieval
site='/sites/path/to/site'
listname='Some List'
viewname='Some View I Want to ETL'
tablename='SP_{0}_STG'.format(to_oracle(listname))
#Show Sample Data
s=SPSite(creds.USERNAME,creds.getpass(),srv,site,listname,viewname)
s.showSampleData(viewname)
Upvotes: 1