Reputation: 5949
I'm creating a tool that analyses code used within definition of user defined function, for example:
def help_fun(a, b): return a * b
def test1(numbers):
r1, r2 = np.multiply.reduce(numbers), sum(numbers)/len(numbers)
r = math.sin(help_fun(r1, r2))
return math.sqrt(r)
I understand, however, how to interpret results of dis.dis(test1)
:
3 0 LOAD_GLOBAL 0 (np)
2 LOAD_ATTR 1 (multiply)
4 LOAD_METHOD 2 (reduce)
6 LOAD_FAST 0 (numbers)
8 CALL_METHOD 1
10 LOAD_GLOBAL 3 (sum)
...
5 46 LOAD_GLOBAL 5 (math)
48 LOAD_METHOD 8 (sqrt)
50 LOAD_FAST 3 (r)
52 CALL_METHOD 1
54 RETURN_VALUE
My expect output is:
{0: {'functions': ['sum', 'len', 'help_fun'],
'methods': ['np.multiply.reduce', 'math.sin', 'math.sqrt']}} #frame 0: scope of test1
In order to collect function and method names, I implement a wrapper for contents of stack of disassemler and use a specific way to extract these names from stack archive.
import types
import dis
def get_frames(code):
'''given <function>.__code__ instance, iterate each code frame
>>> [type(x) for x in get_frames(test1.__code__)]
[code]
'''
yield code
for c in code.co_consts:
if isinstance(c, types.CodeType):
yield from get_frames(c)
break
def get_calls(instructions, stack):
'''get called functions and methods in CALL_FUNCTION and CALL_METHOD opnames'''
functions, methods = [], []
for idx, instr in enumerate(instructions):
if instr.opname == 'CALL_FUNCTION':
functions.append(stack[idx - 1][- 1 - instr.arg])
elif instr.opname == 'CALL_METHOD':
methods.append(stack[idx - 1][- 1 - instr.arg])
return {'functions': functions, 'methods': methods}
def get_stack(instructions):
'''Wrapper for stack contents'''
stack = []
for n in instructions:
if n.opname in ('LOAD_FAST', 'LOAD_GLOBAL', 'LOAD_CONST'):
stack.append(n.argrepr) #global var
elif n.opname in ('LOAD_METHOD', 'LOAD_ATTR'):
stack[-1] = f'{stack[-1]}.{n.argrepr}'
elif n.opname in ('CALL_FUNCTION', 'CALL_METHOD'):
args = stack[-n.arg:]
del stack[-n.arg:]
stack[-1] = f'{stack[-1]}({", ".join(args)})'
elif n.opname == 'BINARY_TRUE_DIVIDE':
stack[-2:] = [' / '.join(stack[-2:])]
elif n.opname == 'STORE_FAST':
del stack[-1]
elif n.opname == 'ROT_TWO':
stack[-1], stack[-2] = stack[-2], stack[-1]
elif n.opname == 'GET_ITER':
stack[-1] = f'iter({stack[-1]})'
yield stack.copy()
code = list(get_frames(test1.__code__))
out = dict()
for i, c in enumerate(code):
instructions = dis.Bytecode(c)
stack = list(get_stack(instructions))
out[i] = get_calls(instructions, stack)
out
>>> {0: {'functions': ['sum', 'len', 'help_fun'], 'methods': ['np.multiply.reduce', 'math.sin', 'math.sqrt']}}
In my approach names of functions and methods are extracted from stack
column of table:
| line | opname | arg | argrepr | stack |
|--------|--------------------|-------|-----------|----------------------------------------------------------|
| 3 | LOAD_GLOBAL | 0 | np | np |
| | LOAD_ATTR | 1 | multiply | np.multiply |
| | LOAD_METHOD | 2 | reduce | np.multiply.reduce |
| | LOAD_FAST | 0 | numbers | np.multiply.reduce, numbers |
| | CALL_METHOD | 1 | | np.multiply.reduce(numbers) |
| | LOAD_GLOBAL | 3 | sum | np.multiply.reduce(numbers), sum |
| | LOAD_FAST | 0 | numbers | np.multiply.reduce(numbers), sum, numbers |
| | CALL_FUNCTION | 1 | | np.multiply.reduce(numbers), sum(numbers) |
| | LOAD_GLOBAL | 4 | len | np.multiply.reduce(numbers), sum(numbers), len |
| | LOAD_FAST | 0 | numbers | np.multiply.reduce(numbers), sum(numbers), len, numbers |
| | CALL_FUNCTION | 1 | | np.multiply.reduce(numbers), sum(numbers), len(numbers) |
| | BINARY_TRUE_DIVIDE | | | np.multiply.reduce(numbers), sum(numbers) / len(numbers) |
| | ROT_TWO | | | sum(numbers) / len(numbers), np.multiply.reduce(numbers) |
| | STORE_FAST | 1 | r1 | sum(numbers) / len(numbers) |
| | STORE_FAST | 2 | r2 | |
| 4 | LOAD_GLOBAL | 5 | math | math |
| | LOAD_METHOD | 6 | sin | math.sin |
| | LOAD_GLOBAL | 7 | help_fun | math.sin, help_fun |
| | LOAD_FAST | 1 | r1 | math.sin, help_fun, r1 |
| | LOAD_FAST | 2 | r2 | math.sin, help_fun, r1, r2 |
| | CALL_FUNCTION | 2 | | math.sin, help_fun(r1, r2) |
| | CALL_METHOD | 1 | | math.sin(help_fun(r1, r2)) |
| | STORE_FAST | 3 | r | |
| 5 | LOAD_GLOBAL | 5 | math | math |
| | LOAD_METHOD | 8 | sqrt | math.sqrt |
| | LOAD_FAST | 3 | r | math.sqrt, r |
| | CALL_METHOD | 1 | | math.sqrt(r) |
| | RETURN_VALUE | | | math.sqrt(r) |
However, things get more complicated if other kind of opnames in my instructions are included. For instance, I'm not sure about behaviour of stack if there are any list comprehensions used. Getting names of methods and functions might display incorrect results:
(<listcomp>
is not a function but my wrapper things it is)
def test2(x):
return [[math.sqrt(m) for m in list(n)] for n in x]
Expected output:
{0: {'functions': [], 'methods': []}, #frame 0: scope of test2
1: {'functions': ['list'], 'methods': [], #frame 1: scope of <listcomp>
2: {'functions': [], 'methods': ['math.sqrt']}} #frame 2: scope of <listcomp>
Output of current code:
{0: {'functions': ["'test6.<locals>.<listcomp>'"], 'methods': []},
1: {'functions': ['list', "'test6.<locals>.<listcomp>.<listcomp>'"], 'methods': []},
2: {'functions': [], 'methods': ['math.sqrt']}}
It might also crash sometimes because I am not sure what's happening with stack:
def test3(x,y):
return [pow(i,j) for i,j in zip(x,y)]
Expected output:
{0: {'functions': ['zip'], 'methods': []},
1: {'functions': ['pow'], 'methods': []}}
It crashes after second STORE_FAST
command tries to pop item from empty stack. The instructions of second scope looks like:
| line | opname | arg | argrepr | stack |
|--------|-----------------|-------|-----------|---------|
| 104 | BUILD_LIST | 0 | | |
| | LOAD_FAST | 0 | .0 | .0 |
| | FOR_ITER | 18 | to 24 | .0 |
| | UNPACK_SEQUENCE | 2 | | .0 |
| | STORE_FAST | 1 | i | |
| | STORE_FAST | 2 | j | ??? | ### stuck here
| | LOAD_GLOBAL | 0 | pow | ??? |
| | LOAD_FAST | 1 | i | ??? |
| | LOAD_FAST | 2 | j | ??? |
| | CALL_FUNCTION | 2 | | ??? |
| | LIST_APPEND | 2 | | ??? |
| | JUMP_ABSOLUTE | 4 | | ??? |
| | RETURN_VALUE | | | ??? |
Is there any easier way to get names of methods and functions used inside a caller? Are there any better ways to get archive of stack? I know, my implementation of get_stack
is poor at the moment, I'm looking for a different approach or better documentation of stack control.
Remarks
ls.append
Upvotes: 3
Views: 392
Reputation: 3601
If you want your code to be more portable across Python versions, you should consider using ast. Although the AST does change across versions, it usually does so in ways that cause easy-to-understand errors.
To fix your code, you will need to implement a lot more operations. I did this by ignoring most things and just applying their stack effect (the number of elements placed or removed from the stack).
import dis
import inspect
def iterate_stack(instructions):
stack = []
called = []
for instruction in instructions:
old_stack_len = len(stack)
if instruction.opname == "ROT_TWO":
stack[-1], stack[-2] = stack[-2], stack[-1]
elif instruction.opname == "ROT_THREE":
stack[-1], stack[-2], stack[-3] = stack[-2], stack[-3], stack[-1]
elif instruction.opname == "ROT_FOUR":
stack[-1], stack[-2], stack[-3], stack[-4] = stack[-2], stack[-3], stack[-4], stack[-1]
elif instruction.opname == "DUP_TOP":
stack.append(stack[-1])
elif instruction.opname == "DUP_TOP_TWO":
stack.extend(stack[-2:])
elif instruction.opname == "LOAD_ASSERTION_ERROR":
stack.append("AssertionError")
elif instruction.opname == "LOAD_NAME":
stack.append(instruction.argrepr)
elif instruction.opname == "LOAD_ATTR" or instruction.opname == "LOAD_METHOD":
if stack[-1]:
stack[-1] = stack[-1] + "." + instruction.argrepr
elif instruction.opname == "LOAD_GLOBAL":
stack.append(instruction.argrepr)
elif instruction.opname == "LOAD_FAST" or instruction.opname == "LOAD_CLOSURE" or instruction.opname == "LOAD_DEREF" or instruction.opname == "LOAD_CLASSDEREF":
if inspect.iscode(instruction.argval):
stack.append(None)
else:
stack.append(instruction.argrepr)
elif instruction.opname == "CALL_FUNCTION":
args = stack[-instruction.arg:]
del stack[-instruction.arg:]
if stack[-1] is not None:
called.append(f'{stack[-1]}({", ".join(args)})')
stack.pop()
elif instruction.opname == "CALL_FUNCTION_KW":
# TODO get the names of keyword arguments
called.append(stack[-1 - instruction.arg])
del stack[-1 - instruction.arg:]
stack.append(None)
elif instruction.opname == "CALL_FUNCTION_EX":
# TODO get the arguments
if instruction.arg & 0x01:
stack.pop()
stack.pop()
called.append(stack.pop())
elif instruction.opname == "CALL_METHOD":
# TODO get the arguments
called.append(stack[-2 - instruction.arg])
del stack[-2 - instruction.arg:]
stack.append(None)
elif instruction.opname == "ROT_N":
tos = stack.pop()
stack.insert(1 - instruction.arg, tos)
stack_effect = dis.stack_effect(instruction.opcode, instruction.arg)
while old_stack_len + stack_effect < len(stack):
stack.pop()
while old_stack_len + stack_effect > len(stack):
stack.append(None)
return called
def get_frames(code):
yield code
for c in code.co_consts:
if inspect.iscode(c):
yield from get_frames(c)
def help_fun(a, b):
return a * b
def test1(numbers):
r1, r2 = np.multiply.reduce(numbers), sum(numbers)/len(numbers)
r = math.sin(help_fun(r1, r2))
return math.sqrt(r)
def test2(x):
return [[math.sqrt(m) for m in list(n)] for n in x]
def test3(x,y):
return [pow(i,j) for i,j in zip(x,y)]
def main():
code = list(get_frames(test1.__code__))
out = dict()
for i, c in enumerate(code):
instructions = dis.get_instructions(c)
out[i] = iterate_stack(instructions)
print(out)
main()
This gives the expected result for all three of your examples, under Python 3.10.8 (main, Nov 14 2022, 00:00:00) [GCC 12.2.1 20220819 (Red Hat 12.2.1-2)] on linux. It will not work on Python 3.11 because bytecode changes every version.
I would really recommend against using this code, because it will break all the time. You should probably just build a pylint checker, or at least use a library like astroid.
Upvotes: 2