Michael Petrochuk
Michael Petrochuk

Reputation: 497

Low-overhead tracing function in Python by modify the code object

sys.settrace is inefficient, to say the least. It adds a lot of overhead to every function call in Python.

Instead, I'm looking for a way to trace the "call" event for only a couple hundred functions in Python, without incurring additional overhead. I'm thinking of modifying the code object for each function so that they all call a tracing function.

import sys


def trace():
    frame = sys._getframe(1)
    ...


def func():
    trace()  # TODO: Add programmatically
    ...

Does anyone know how to do this? So far, I've found these resources...

I can't be the only person interested in low overhead localized tracing? Thank you for any help you can provide me!

Upvotes: 1

Views: 458

Answers (1)

Michael Petrochuk
Michael Petrochuk

Reputation: 497

Okay, so following the approach outlined in Modifying Code of Function at Runtime, I took a stab at it...

# boo.py
import inspect
import sys
import types


def ___trace():
    """
    NOTE: This function needs a unique name, so, it doesn't interfere with globals.
    """
    frame = sys._getframe(1)
    print("Tracing:", frame.f_code.co_name)
    print("Locals:", frame.f_locals)


def _unwrap(fn):
    while hasattr(fn, "__wrapped__"):
        fn = fn.__wrapped__
    return fn


_orig_code = "___trace_orig_code"
_closure = "___closure"


def set_trace(fn):
    """Call `trace` at the beginning of `fn`."""
    fn = _unwrap(fn)
    if getattr(fn, _orig_code, None) is None:
        setattr(fn, _orig_code, fn.__code__)
    else:
        raise ValueError("Function is already being traced.")
    lines = inspect.getsourcelines(fn)[0]
    lines = lines[next(i for i, l in enumerate(lines) if "@" != l[0]) :]
    init_indent = len(lines[0]) - len(lines[0].lstrip())
    lines = ["    " + l[init_indent:] for l in lines]
    whitespace = lines[1][: len(lines[1]) - len(lines[1].lstrip())]
    # NOTE: So that the line numbers remain the name, the trace function is added to the first
    # line.
    lines[1] = f"{whitespace}{___trace.__name__}(); {lines[1].strip()}\n"
    free_vars = " ".join([f"    {var} = None;" for var in fn.__code__.co_freevars])
    code = "".join(lines)
    code = f"""
def {_closure}():
{free_vars}

{code}

    return {fn.__name__}.__code__
"""
    module = fn.__globals__.copy()
    try:
        exec(code, module)
    except SyntaxError:
        raise SyntaxError("Unable to add `___trace` to function definition.")
    new = module[_closure]()
    fn.__code__ = types.CodeType(
        fn.__code__.co_argcount,
        fn.__code__.co_posonlyargcount,
        fn.__code__.co_kwonlyargcount,
        fn.__code__.co_nlocals,
        fn.__code__.co_stacksize,
        fn.__code__.co_flags,
        new.co_code,
        fn.__code__.co_consts,
        tuple([___trace.__name__] + list(fn.__code__.co_names)),
        fn.__code__.co_varnames,
        fn.__code__.co_filename,
        fn.__code__.co_name,
        fn.__code__.co_firstlineno,
        new.co_lnotab,
        fn.__code__.co_freevars,
        fn.__code__.co_cellvars,
    )

    if ___trace.__name__ in fn.__globals__:
        if fn.__globals__[___trace.__name__] is not ___trace:
            raise RuntimeError()
    else:
        fn.__globals__[___trace.__name__] = ___trace


def unset_trace(fn):
    """Remove `trace` from the beginning of `fn`."""
    if hasattr(fn, _orig_code):
        fn.__code__ = getattr(fn, _orig_code)

And I tested this on classes, closures, decorators, line numbers, traceback, globals...

import functools
import sys
import traceback
import typing

from boo import set_trace, unset_trace


def testing(*args, **kwargs):
    print("Inside `testing`...")


def testing_closure(*args, **kwargs):
    variable = "hello!"

    def func(*args, **kwargs):
        print(f"Inside `testing_closure`... {variable}")

    func()
    set_trace(func)
    func()


def test_lineno(*args, **kwargs):
    print("Inside `test_lineno`...")
    print("Line no", sys._getframe(0).f_lineno)


def test_args(a: typing.List, b: typing.List):
    print("Inside `test_args`...")


def test_traceback(*args, **kwargs):
    print("".join(traceback.format_stack()))


def test_cellvars():
    print("Inside `test_cellvars`...")

    a = 10

    def func():
        return a

    return func()


def test_funky_first_line():
    def func():
        return


@functools.lru_cache()
@functools.lru_cache()
def testing_decorator(*args, **kwargs):
    print("Inside `testing_decorator`...")


class Test:
    def __init__(self, *args, **kwargs):
        print("Inside `__init__`...")


if __name__ == "__main__":
    set_trace(testing)
    testing()
    unset_trace(testing)
    testing()

    Test()
    set_trace(Test.__init__)
    Test()
    set_trace(testing_decorator)
    testing_decorator()
    testing_closure()
    set_trace(test_lineno)
    test_lineno()
    set_trace(test_traceback)
    test_traceback()
    set_trace(test_cellvars)
    test_cellvars()
    set_trace(test_args)
    test_args(["a"], ["b"])
    try:
        set_trace(test_funky_first_line)
    except SyntaxError:
        print("Unable to modify handle.")

Let me know if I'm missing anything, please! I don't have much experience with code editing.

Here is my fully working library implementation (tested in a production code base!): https://github.com/PetrochukM/HParams/blob/master/config/trace.py

Upvotes: 1

Related Questions