Reputation: 597173
Given a module containing :
import stuff
from foo import Foo
from bar import *
CST = True
def func(): pass
How can I define a function get_defined_objects so that I can do:
print(get_defined_objects('path.to.module'))
{'CST': True, 'func', <function path.to.module.func>}
Right now the only solution I can imagine is to read the original module file, extract defined names with re.search(r'^(?:def|class )?(\w+)(?:\s*=)?'
then import the module, and find the intersection with __dict__
.
Is there something cleaner ?
Upvotes: 7
Views: 496
Reputation: 180481
mod = "foo"
import ast, inspect
import importlib
mod = importlib.import_module(mod)
p = ast.parse(inspect.getsource(mod))
from collections import defaultdict
data = defaultdict(defaultdict)
for node in p.body:
if isinstance(node, (ast.ImportFrom, ast.Import)):
continue
if isinstance(node, (ast.ClassDef, ast.FunctionDef)):
data["classes"][node.name] = mod.__dict__[node.name]
elif isinstance(node, ast.Assign):
for trg in node.targets:
if isinstance(node.value, ast.Num):
data["assignments"][trg.id] = node.value.n
elif isinstance(node.value, ast.Str):
data["assignments"][trg.id] = node.value.s
else:
data["assignments"][trg.id] = mod.__dict__[trg.id]
Output:
There is a nice explanation here that lists what the different types do and their attributes which this is based on:
class Nodes(ast.NodeVisitor):
def __init__(self):
self.data = defaultdict()
super(Nodes, self).__init__()
def visit_FunctionDef(self, node):
self.data[node.name] = mod.__dict__[node.name]
print("In FunctionDef with funcion {}".format(node.name))
def visit_ClassDef(self, node):
self.data[node.name] = mod.__dict__[node.name]
def visit_Assign(self, node):
for trg in node.targets:
if isinstance(node.value, (ast.Str, ast.Num, ast.Dict, ast.List, ast.ListComp, ast.NameConstant)):
self.data[trg.id] = mod.__dict__[trg.id]
self.generic_visit(node)
def visit_Name(self, node):
"""
class Name(idctx)
A variable name. id holds the name as a string
and ctx is either class Load class Store class Del.
"""
print("In Name with {}\n".format(node.id))
#
def visit_Dict(self, node):
"""
class Dict(keys, values)
A dictionary. keys and values
hold lists of nodes with matching order
"""
print("In Dict keys = {}, values = {}\n".format(node.keys,node.values))
def visit_Set(self,node):
"""
class Set(elts)
A set. elts holds a list of
nodes representing the elements.
"""
print("In Set elts = {}\n".format(node.elts))
def visit_List(self, node):
"""
class List(eltsctx)
lts holds a list of nodes representing the elements.
ctx is Store if the container
is an assignment target
(i.e. (x,y)=pt), and Load otherwise.
"""
print("In List elts = {}\nctx = {}\n".format(node.elts,node.ctx))
def visit_Tuple(self, node):
"""
class Tuple(eltsctx)
lts holds a list of nodes representing the elements.
ctx is Store if the container
is an assignment target
(i.e. (x,y)=pt), and Load otherwise.
"""
print("In Tuple elts = {}\nctx = {}\n".format(node.elts,node.ctx))
def visit_NameConstant(self, node):
"""
class NameConstant(value)
True, False or None. "value" holds one of those constants.
"""
print("In NameConstant getting value {}\n".format(node.value))
def visit_Load(self, node):
print("In Load with node {}\n".format(node.func))
def visit_Call(self, node):
"""
class Call(func, args, keywords, starargs, kwargs)
A function call. func is the function,
which will often be a Name or Attribute object. Of the arguments:
args holds a list of the arguments passed by position.
keywords holds a list of keyword objects representing arguments
passed by keyword.starargs and kwargs each hold a single node,
for arguments passed as *args and **kwargs.
"""
print("In Call with node {}\n".format(node.func))
def visit_Num(self, node):
print("In Num getting value {}\n".format(node.n))
def visit_Str(self, node):
print("In Str getting value {}\n".format(node.s))
f = Nodes()
f.visit(p)
print(f.data)
Upvotes: 3
Reputation: 597173
While I accepted an answer, it can't hurt to post the solution I ended up using. It's a mix between the other proposals :
import ast
import inspect
import importlib
from types import ModuleType
def extract_definitions(module):
""" Returns the name and value of objects defined at the top level of the given module.
:param module: A module object or the name of the module to import.
:return: A dict {'classes': {}, 'functions': {}, 'assignments': {}} containing defined objects in the module.
"""
if not isinstance(module, ModuleType):
module = importlib.import_module(module)
tree = ast.parse(inspect.getsource(module))
definitions = {'classes': {}, 'functions': {}, 'assignments': {}}
for node in tree.body:
if isinstance(node, ast.ClassDef):
definitions["classes"][node.name] = getattr(module, node.name)
elif isinstance(node, ast.FunctionDef):
definitions["functions"][node.name] = getattr(module, node.name)
elif isinstance(node, ast.Assign):
# for unpacking, you need to loop on all names
for target in node.targets:
definitions["assignments"][target.id] = getattr(module, target.id)
return definitions
I added the ability to import from a string or a module object, then removed the parsing of values and replaced it by a simple getattr from the original module.
Upvotes: 1
Reputation: 251096
A bytecode hack for Python 3.4+. Possible due to dis.get_instructions
.
import dis
import importlib
from itertools import islice
import marshal
import os
def consume_iterator(it, n=1):
next(islice(it, n, n), None)
def get_defined_names(module_path):
path, module_name = os.path.split(module_path)
module_name = module_name[:-3]
module_object = importlib.import_module(module_name)
pyc_name = '{}.cpython-34.pyc'.format(module_name)
pyc_path = os.path.join(path, '__pycache__/', pyc_name)
with open(pyc_path, 'rb') as f:
f.read(12) # drop the first 12 bytes
code = marshal.load(f)
# dis.disassemble(code) # see the byte code
instructions = dis.get_instructions(code)
objects = {}
for instruction in instructions:
if instruction.opname == 'STORE_NAME':
objects[instruction.argval] = getattr(module_object,
instruction.argval)
elif instruction.opname == 'IMPORT_NAME':
consume_iterator(instructions, 2)
elif instruction.opname == 'IMPORT_FROM':
consume_iterator(instructions, 1)
return objects
print(get_defined_names('/Users/ashwini/py/so.py'))
For a file like:
#/Users/ashwini/py/so.py
import os
from sys import argv, modules
from math import *
from itertools import product
CST = True
from itertools import permutations, combinations
from itertools import chain
E = 100
from itertools import starmap
def func(): pass
for x in range(10):
pass
class C:
a = 100
d = 1
The output will be:
{'d': 1, 'E': 100, 'CST': True, 'x': 9, 'func': <function func at 0x10efd0510>, 'C': <class 'so.C'>}
A much more better way as someone already mentioned in comments will be to parse the source code using ast module and find out the variable names from there.
Upvotes: 2
Reputation: 2844
Here is something for you to start with using ast
. Note that this code does not cover all possible cases, although it should handle e.g. multiple assignment properly. Consider investigating ast
's data structures and API more closely if you would like to get access to compiled code, for example.
import ast
with open('module.py') as f:
data = f.read()
tree = ast.parse(data)
elements = [el for el in tree.body if type(el) in (ast.Assign, ast.FunctionDef, ast.ClassDef)]
result = {}
for el in elements:
if type(el) == ast.Assign:
for t in el.targets:
if type(el.value) == ast.Call:
result[t.id] = el.value.func.id + '()'
else:
for attr in ['id', 'i', 's']:
try:
result[t.id] = getattr(el.value, attr)
break
except Exception as e:
pass
elif type(el) == ast.FunctionDef:
result[el.name] = '<function %s>' % el.name
else:
result[el.name] = '<class %s>' % el.name
print result
#
Upvotes: 3
Reputation: 22340
Untested
def unexported_names (module):
try:
return [name for name in module.__dict__ if name not in module.__all__]
except AttributeError:
return [name for name in module.__dict__ if name.startswith('_')]
Upvotes: -3