Reputation: 31329
I'm trying to dynamically load modules and packages from arbitrary folder locations in python 2.7. It works great with bare, single file modules. But trying to load in a package is a bit harder.
The best I could figure out was to load the init.py file inside the package (folder). But say for example I have this:
root:
mod.py
package:
__init__.py
sub.py
If mod.py contains:
from package import sub
Using my current loading code (below), it will fail stating that there is no package named "sub", unless I add the following to package/__init__.py
import sub
I have to imagine that this is because when you import a package it would normally also scan for all the other sub files in it. Do I also just need to do that manually, or is there a method similar to imp.load_source that will also handle package folders?
Loading code:
import md5
import sys
import os.path
import imp
import traceback
import glob
def load_package(path, base):
try:
try:
sys.path.append(path + "/" + base)
init = path + "/" + base + "/__init__.py"
if not os.path.exists(init):
return None
fin = open(init, 'rb')
return (base, imp.load_source(base, init, fin))
finally:
try: fin.close()
except: pass
except ImportError, x:
traceback.print_exc(file = sys.stderr)
raise
except:
traceback.print_exc(file = sys.stderr)
raise
def load_module(path):
try:
try:
code_dir = os.path.dirname(path)
code_file = os.path.basename(path)
base = code_file.replace(".py", "")
fin = open(path, 'rb')
hash = md5.new(path).hexdigest() + "_" + code_file
return (base, imp.load_source(base, path, fin))
finally:
try: fin.close()
except: pass
except ImportError, x:
traceback.print_exc(file = sys.stderr)
raise
except:
traceback.print_exc(file = sys.stderr)
raise
def load_folder(dir):
sys.path.append(dir)
mods = {}
for p in glob.glob(dir + "/*/"):
base = p.replace("\\", "").replace("/", "")
base = base.replace(dir.replace("\\", "").replace("/", ""), "")
package = load_package(dir, base)
if package:
hash, pack = package
mods[hash] = pack
for m in glob.glob(dir + "/*.py"):
hash, mod = load_module(m)
mods[hash] = mod
return mods
Upvotes: 1
Views: 4568
Reputation: 34026
The code below is functionally equivalent to your code modulo the traceback.print_exc
(which you should let the client handle - if not handled the exception will end up printed anyway):
def _load_package(path, base):
sys.path.append(path + "/" + base)
init = path + "/" + base + "/__init__.py"
if not os.path.exists(init):
return None, None
with open(init, 'rb') as fin:
return base, imp.load_source(base, init, fin)
def _load_module(path):
code_file = os.path.basename(path)
base = code_file.replace(".py", "")
with open(path, 'rb') as fin:
return base, imp.load_source(base, path, fin)
def load_folder(dir):
sys.path.append(dir)
mods = {}
for p in glob.glob(dir + "/*/"):
base = p.replace("\\", "").replace("/", "")
base = base.replace(dir.replace("\\", "").replace("/", ""), "")
hash, pack = _load_package(dir, base)
if hash: mods[hash] = pack
for m in glob.glob(dir + "/*.py"): ##: /*/*.py
hash, mod = _load_module(m)
mods[hash] = mod
return mods
## My added code
print('Python %s on %s' % (sys.version, sys.platform))
root_ = r'C:\Dropbox\eclipse_workspaces\python\sandbox\root'
def depyc(root, _indent=''): # deletes .pyc which will end up being imported
if not _indent: print '\nListing', root
for p in os.listdir(root):
name = _indent + p
abspath = os.path.join(root, p)
if os.path.isdir(abspath):
print name + ':'
depyc(abspath, _indent=_indent + ' ')
else:
name_ = name[-4:]
if name_ == '.pyc':
os.remove(abspath)
continue
print name
if not _indent: print
depyc(root_)
load_folder(root_)
Prints:
Python 2.7.10 (default, May 23 2015, 09:40:32) [MSC v.1500 32 bit (Intel)] on win32
Listing C:\Dropbox\eclipse_workspaces\python\sandbox\root
mod.py
package:
sub.py
__init__.py
C:\Dropbox\eclipse_workspaces\python\sandbox\root/package/__init__.py imported!
C:\Dropbox\eclipse_workspaces\python\sandbox\root\mod.py imported!
mod.py
, sub.py
and __init__.py
just contain
print(__file__ + u' imported!')
Now modifying mod.py
to:
from package import sub
print(__file__ + u' imported!')
we get indeed:
Listing....
C:\Dropbox\eclipse_workspaces\python\sandbox\root/package/__init__.py imported! <### this may move around ###>
Traceback (most recent call last):
File "C:/Users/MrD/.PyCharm40/config/scratches/load_folder.py", line 57, in <module>
load_folder(root_)
File "C:/Users/MrD/.PyCharm40/config/scratches/load_folder.py", line 31, in load_folder
hash, mod = _load_module(m)
File "C:/Users/MrD/.PyCharm40/config/scratches/load_folder.py", line 20, in _load_module
return base, imp.load_source(base, path, fin)
File "C:\Dropbox\eclipse_workspaces\python\sandbox\root\mod.py", line 1, in <module>
from package import sub
ImportError: cannot import name sub
Note the error is "cannot import name sub" and not "there is no package named "sub"". So why can't it ?
Modifying __init__.py
:
# package/__init__.py
print(__file__ + u' imported!')
print '__name__', '->', __name__
print '__package__', '->', __package__
print '__path__', '->', __path__
prints:
Listing...
C:\Dropbox\eclipse_workspaces\python\sandbox\root/package/__init__.py imported! <### not really ###>
__name__ -> package
__package__ -> None
__path__ ->
Traceback (most recent call last):
File "C:/Users/MrD/.PyCharm40/config/scratches/load_folder.py", line 59, in <module>
load_folder(root_)
File "C:/Users/MrD/.PyCharm40/config/scratches/load_folder.py", line 30, in load_folder
hash, pack = _load_package(dir, base)
File "C:/Users/MrD/.PyCharm40/config/scratches/load_folder.py", line 14, in _load_package
init = imp.load_source(base, init, fin)
File "C:\Dropbox\eclipse_workspaces\python\sandbox\root/package/__init__.py", line 5, in <module>
print '__path__', '->', __path__
NameError: name '__path__' is not defined
While directly importing it would print:
>>> sys.path.extend([r'C:\Dropbox\eclipse_workspaces\python\sandbox\root'])
>>> import package
C:\Dropbox\eclipse_workspaces\python\sandbox\root\package\__init__.py imported!
__name__ -> package
__package__ -> None
__path__ -> ['C:\\Dropbox\\eclipse_workspaces\\python\\sandbox\\root\\package']
So modify _load_package to:
def _load_package(path, base):
pkgDir = os.path.abspath(os.path.join(path, base))
init = os.path.join(pkgDir, "__init__.py")
if not os.path.exists(init):
return None, None
file, pathname, description = imp.find_module(base, [path])
print file, pathname, description # None, pkgDir, ('', '', 5)
pack = sys.modules.get(base, None) # load_module will reload - yak!
if pack is None:
sys.modules[base] = pack = imp.load_module(base, file, pathname, description)
return base, pack
Solves it as would:
...
if pack is None:
sys.modules[base] = pack = imp.load_module(base, None, '', description)
pack.__path__ = [pkgDir]
or in your original code:
with open(init, 'rb') as fin:
source = imp.load_source(base, init, fin)
source.__path__ = path + "/" + base
return base, source
So what's going on is that package relies on its __path __
attribute to function correctly.
Kept hacking on that and came up with:
import sys
import os.path
import imp
def _load_(root, name):
file_object, pathname, description = imp.find_module(name, [root])
pack = sys.modules.get(name, None)
try:
if pack is None:
pack = imp.load_module(name, file_object, pathname, description)
else:
print 'In cache', pack
finally:
if file_object is not None: file_object.close()
return name, pack
def load_folder(root):
# sys.path.append(root)
mods = {}
paths = [(item, os.path.join(root, item)) for item in os.listdir(root)]
packages = filter(lambda path_tuple: os.path.exists(
os.path.join((path_tuple[1]), "__init__.py")), paths)
py_files = filter(lambda path_tuple: path_tuple[0][-3:] == '.py', paths)
del paths
# first import packages as in original - modules may import from them
for path, _abspath in packages:
print 'Importing', _abspath
key, mod = _load_(root, name=path) # will use pyc if available!
mods[key] = mod
# then modules
for path, _abspath in py_files:
print 'Importing', _abspath
key, mod = _load_(root, name=path[:-3])
mods[key] = mod
return mods
I merged package and modules loading code dropping imp.load_source
(one less tricky function) and relying on imp.load_module instead. I do not mess with sys.path directly and since imp.load_module
will reload [!] I check the sys.modules
cache. The mods
dict returned is completelly untested - you have to somehow implement a hash (the _abspath should suffice).
Run as:
def depyc(root, rmpyc, _indent=''):
if not _indent: print '\nListing', root
for p in os.listdir(root):
name = _indent + p
abspath = os.path.join(root, p)
if os.path.isdir(abspath):
print name + ':'
depyc(abspath, rmpyc, _indent=_indent + ' ')
else:
if rmpyc and name[-4:] == '.pyc':
os.remove(abspath)
continue
print name
if not _indent: print
## Run ##
print('Python %s on %s' % (sys.version, sys.platform))
root_ = os.path.join(os.getcwdu(), u'root')
depyc(root_, False) # False will end up importing the pyc files !
load_folder(root_)
to test various scenarios -
The code with an example root/
dir is here
Upvotes: 1