Bite code
Bite code

Reputation: 597173

How to get a list of all non imported names in a Python module?

Given a module containing :

import stuff
from foo import Foo
from bar import *

CST = True

def func(): pass

How can I define a function get_defined_objects so that I can do:

print(get_defined_objects('path.to.module'))
{'CST': True, 'func', <function path.to.module.func>}

Right now the only solution I can imagine is to read the original module file, extract defined names with re.search(r'^(?:def|class )?(\w+)(?:\s*=)?' then import the module, and find the intersection with __dict__.

Is there something cleaner ?

Upvotes: 7

Views: 496

Answers (5)

Padraic Cunningham
Padraic Cunningham

Reputation: 180481

mod = "foo"
import ast, inspect
import importlib

mod = importlib.import_module(mod)
p = ast.parse(inspect.getsource(mod))

from collections import defaultdict

data = defaultdict(defaultdict)

for node in p.body:
    if isinstance(node, (ast.ImportFrom, ast.Import)):
        continue
    if isinstance(node, (ast.ClassDef, ast.FunctionDef)):
        data["classes"][node.name] =  mod.__dict__[node.name]
    elif isinstance(node, ast.Assign):
        for trg in node.targets:
            if isinstance(node.value, ast.Num):
                data["assignments"][trg.id] = node.value.n
            elif isinstance(node.value, ast.Str):
                data["assignments"][trg.id] = node.value.s
            else:
                data["assignments"][trg.id] = mod.__dict__[trg.id]

Output:

There is a nice explanation here that lists what the different types do and their attributes which this is based on:

class Nodes(ast.NodeVisitor):
    def __init__(self):
        self.data = defaultdict()
        super(Nodes, self).__init__()

    def visit_FunctionDef(self, node):
        self.data[node.name] = mod.__dict__[node.name]
        print("In FunctionDef  with funcion {}".format(node.name))

    def visit_ClassDef(self, node):
        self.data[node.name] = mod.__dict__[node.name]

    def visit_Assign(self, node):
        for trg in node.targets:
            if isinstance(node.value, (ast.Str, ast.Num, ast.Dict, ast.List, ast.ListComp, ast.NameConstant)):
                self.data[trg.id] = mod.__dict__[trg.id]
        self.generic_visit(node)

    def visit_Name(self, node):
        """
        class Name(idctx)
        A variable name. id holds the name as a string
        and ctx is either class Load class Store class Del.
        """
        print("In Name with {}\n".format(node.id))
    #
    def visit_Dict(self, node):
        """
        class Dict(keys, values)
        A dictionary. keys and values
        hold lists of nodes with matching order
        """
        print("In Dict  keys = {}, values = {}\n".format(node.keys,node.values))


    def visit_Set(self,node):
        """
        class Set(elts)
        A set. elts holds a list of
        nodes representing the elements.
        """
        print("In Set  elts = {}\n".format(node.elts))

    def visit_List(self, node):
        """
        class List(eltsctx)
        lts holds a list of nodes representing the elements.
        ctx is Store if the container
        is an assignment target
        (i.e. (x,y)=pt), and Load otherwise.
        """
        print("In List  elts = {}\nctx = {}\n".format(node.elts,node.ctx))

    def visit_Tuple(self, node):
        """
        class Tuple(eltsctx)
        lts holds a list of nodes representing the elements.
        ctx is Store if the container
        is an assignment target
        (i.e. (x,y)=pt), and Load otherwise.
        """
        print("In Tuple  elts = {}\nctx = {}\n".format(node.elts,node.ctx))

    def visit_NameConstant(self, node):
        """
        class NameConstant(value)
        True, False or None. "value" holds one of those constants.
        """
        print("In NameConstant getting value {}\n".format(node.value))


    def visit_Load(self, node):
        print("In Load with node {}\n".format(node.func))


    def visit_Call(self, node):
        """
        class Call(func, args, keywords, starargs, kwargs)
        A function call. func is the function,
        which will often be a Name or Attribute object. Of the arguments:
        args holds a list of the arguments passed by position.
        keywords holds a list of keyword objects representing arguments
        passed by keyword.starargs and kwargs each hold a single node,
        for arguments passed as *args and **kwargs.
        """
        print("In Call with node {}\n".format(node.func))


    def visit_Num(self, node):
        print("In Num getting value {}\n".format(node.n))

    def visit_Str(self, node):
        print("In Str getting value {}\n".format(node.s))
f = Nodes()
f.visit(p)
print(f.data)

Upvotes: 3

Bite code
Bite code

Reputation: 597173

While I accepted an answer, it can't hurt to post the solution I ended up using. It's a mix between the other proposals :

import ast
import inspect
import importlib

from types import ModuleType

def extract_definitions(module):
    """ Returns the name and value of objects defined at the top level of the given module.

        :param module: A module object or the name of the module to import.
        :return: A dict {'classes': {}, 'functions': {}, 'assignments': {}} containing defined objects in the module.
    """

    if not isinstance(module, ModuleType):
        module = importlib.import_module(module)

    tree = ast.parse(inspect.getsource(module))

    definitions = {'classes': {}, 'functions': {}, 'assignments': {}}

    for node in tree.body:

        if isinstance(node, ast.ClassDef):
            definitions["classes"][node.name] = getattr(module, node.name)
        elif isinstance(node, ast.FunctionDef):
            definitions["functions"][node.name] = getattr(module, node.name)
        elif isinstance(node, ast.Assign):
            # for unpacking, you need to loop on all names
            for target in node.targets:
                definitions["assignments"][target.id] = getattr(module, target.id)

    return definitions

I added the ability to import from a string or a module object, then removed the parsing of values and replaced it by a simple getattr from the original module.

Upvotes: 1

Ashwini Chaudhary
Ashwini Chaudhary

Reputation: 251096

A bytecode hack for Python 3.4+. Possible due to dis.get_instructions.

import dis
import importlib
from itertools import islice
import marshal
import os


def consume_iterator(it, n=1):
    next(islice(it, n, n), None)


def get_defined_names(module_path):
    path, module_name = os.path.split(module_path)
    module_name = module_name[:-3]
    module_object = importlib.import_module(module_name)
    pyc_name = '{}.cpython-34.pyc'.format(module_name)
    pyc_path = os.path.join(path, '__pycache__/', pyc_name)

    with open(pyc_path, 'rb') as f:
        f.read(12)  # drop the first 12 bytes
        code = marshal.load(f)
        # dis.disassemble(code)  # see the byte code
        instructions = dis.get_instructions(code)
        objects = {}

        for instruction in instructions:
            if instruction.opname == 'STORE_NAME':
                objects[instruction.argval] = getattr(module_object,
                                                      instruction.argval)
            elif instruction.opname == 'IMPORT_NAME':
                consume_iterator(instructions, 2)
            elif instruction.opname == 'IMPORT_FROM':
                consume_iterator(instructions, 1)
        return objects


print(get_defined_names('/Users/ashwini/py/so.py'))

For a file like:

#/Users/ashwini/py/so.py
import os
from sys import argv, modules
from math import *
from itertools import product


CST = True

from itertools import permutations, combinations
from itertools import chain
E = 100
from itertools import starmap

def func(): pass

for x in range(10):
    pass

class C:
    a = 100

d = 1

The output will be:

{'d': 1, 'E': 100, 'CST': True, 'x': 9, 'func': <function func at 0x10efd0510>, 'C': <class 'so.C'>}

A much more better way as someone already mentioned in comments will be to parse the source code using ast module and find out the variable names from there.

Upvotes: 2

Vadim Landa
Vadim Landa

Reputation: 2844

Here is something for you to start with using ast. Note that this code does not cover all possible cases, although it should handle e.g. multiple assignment properly. Consider investigating ast's data structures and API more closely if you would like to get access to compiled code, for example.

import ast

with open('module.py') as f:
    data = f.read()
    tree = ast.parse(data)
    elements = [el for el in tree.body if type(el) in (ast.Assign, ast.FunctionDef, ast.ClassDef)]

result = {}

for el in elements:
    if type(el) == ast.Assign:
        for t in el.targets:
            if type(el.value) == ast.Call:
                result[t.id] = el.value.func.id + '()'
            else:
                for attr in ['id', 'i', 's']:
                    try:
                        result[t.id] = getattr(el.value, attr)
                        break
                    except Exception as e:
                        pass
    elif type(el) == ast.FunctionDef:
        result[el.name] = '<function %s>' % el.name
    else:
        result[el.name] = '<class %s>' % el.name

print result
#

Upvotes: 3

Hammerite
Hammerite

Reputation: 22340

Untested

def unexported_names (module):
    try:
        return [name for name in module.__dict__ if name not in module.__all__]
    except AttributeError:
        return [name for name in module.__dict__ if name.startswith('_')]

Upvotes: -3

Related Questions