Nadav
Nadav

Reputation: 2727

dataclasses: how to ignore default values using asdict()?

I would like to ignore the default values after calling asdict()

@dataclass
class A:
    a: str
    b: bool = True

so if I call

a = A("1")
result = asdict(a, ignore_default=True) 
assert {"a": "1"} == result  # the "b": True should be deleted

Upvotes: 8

Views: 2793

Answers (3)

JL Peyret
JL Peyret

Reputation: 12204

Here's a stab at it relying on some introspection of A.__init__ signature.

Using dict_factory argument SB's answer with a functools.lru_cache to precompute.

from dataclasses import dataclass, asdict
from functools import lru_cache, partial
from typing import Any

@dataclass
class A:
    a: str
    b: bool = True

def build_set(di : dict[str, Any]) -> set[tuple[str, Any]]:
    return {(k,v) for k,v in di.items()}

def get_defaults(cls_: type) -> set[tuple[str, Any]]:
    "build a set of name/value for the defaults in `__init__`"
    constructor = cls_.__init__
    anno = list(constructor.__annotations__)[:-1]
    f_def = constructor.__defaults__
    reva = reversed(anno)
    revd = reversed(f_def)
    defaults = {tu[0] : tu[1] for tu in zip(reva,revd)}
    s_defaults = build_set(defaults)
    return s_defaults

@lru_cache
def calc_asdict(cls, ignore_default=False):
    """compute the ignore dict_factory for the class"""
    print(f"***calc_asdict({cls=},{ignore_default=})***")
    if ignore_default is False:
        return asdict
    s_defaults = get_defaults(cls)

    def remove_defaults(args):
        """works by substracting sets"""
        s_non_default = set(args) - s_defaults
        return dict(s_non_default)

    return partial(asdict, dict_factory=remove_defaults)

def asdict2(obj, *, ignore_default=False, dict_factory=dict):
    """if we did get a dict_factory here it would need accomodating"""
    _asdict = calc_asdict(obj.__class__,ignore_default=ignore_default)
    return _asdict(obj)      

a1 = A("a1")
a2 = A("a2",False)
a3 = A("a3",True)

for name, v in [(name, v) for name, v in globals().items() if isinstance(v, A)]:
    print(f"{name:4.4} base=>{str(asdict(v)):30.30} / asdict2(I)=>{str(asdict2(v,ignore_default=True)):30.30} / asdict2=>{str(asdict2(v)):30.30} \n")


output:

***calc_asdict(cls=<class '__main__.A'>,ignore_default=True)***
***calc_asdict(cls=<class '__main__.A'>,ignore_default=False)***
a1   base=>{'a': 'a1', 'b': True}         / asdict2(I)=>{'a': 'a1'}                    / asdict2=>{'a': 'a1', 'b': True}         

a2   base=>{'a': 'a2', 'b': False}        / asdict2(I)=>{'a': 'a2', 'b': False}        / asdict2=>{'a': 'a2', 'b': False}        

a3   base=>{'a': 'a3', 'b': True}         / asdict2(I)=>{'a': 'a3'}                    / asdict2=>{'a': 'a3', 'b': True}         

And... no, it would not differentiate between not-passed in defaults and defaults which happened because you called A(a=1,b=True), i.e. did provide values which were defaults.


Not going to repost everything, but changing the classes to use inheritance resulted in exactly the same, correct, results. Not too surprising as __init__ is a generated method on dataclasses.

@dataclass
class A0:
    a: str

@dataclass
class A(A0):
    b: bool = True


Upvotes: 1

Wizard.Ritvik
Wizard.Ritvik

Reputation: 11662

The dataclasses module doesn't appear to have support for detecting default values in asdict(), however the dataclass-wizard library does -- via skip_defaults argument.

Example:

from dataclasses import dataclass
from dataclass_wizard import asdict

@dataclass
class A:
    a: str
    b: bool = True

a = A("1")
result = asdict(a, skip_defaults=True)
assert {"a": "1"} == result  # the "b": True should be deleted

Further, results show it is close to 2x faster than an approach with dataclasses.adict(). I've added benchmark code I used for testing below.

from dataclasses import dataclass, asdict as asdict_orig, MISSING
from timeit import timeit

from dataclass_wizard import asdict

@dataclass
class A:
    a: str
    b: bool = True


def asdict_factory(cls):
    def factory(obj: list[tuple]) -> dict:
        d = {}
        for k, v in obj:
            field_value = cls.__dataclass_fields__[k].default
            if field_value is MISSING or field_value != v:
                d[k] = v
        return d

    return factory

a = A("1")
A_fact = asdict_factory(A)

print('dataclass_wizard.asdict():  ', timeit('asdict(a, skip_defaults=True)', globals=globals()))
print('dataclasses.asdict():       ', timeit('asdict_orig(a, dict_factory=A_fact)', globals=globals()))

result1 = asdict(a, skip_defaults=True)
result2 = asdict_orig(a, dict_factory=A_fact)

assert {"a": "1"} == result1 == result2

a2 = A("1", True)
a3 = A("1", False)
assert asdict(a2, skip_defaults=True) == asdict_orig(a2, dict_factory=A_fact)
assert asdict(a3, skip_defaults=True) == asdict_orig(a3, dict_factory=A_fact)

Disclaimer: I am the creator and maintainer of this library.

Upvotes: 3

S.B
S.B

Reputation: 16536

You can pass a factory function to asdict() which gives you control over what you want to return from the passed object which is basically a list of key-value pair tuples.

dataclass stores its fields a __dataclass_fields__ attribute which is an instance of Field. If the attribute has its default set to an instance of MISSING, it means it didn't has a default value, so pick it. Also if the default attribute is not the same as what passed in, it means user provided that parameter. The caveat here is there is no difference between non-passed parameter and the passed-parameter same as the default value:

from dataclasses import dataclass, asdict, MISSING


def asdict_factory(cls):
    def factory(obj: list[tuple]) -> dict:
        d = {}
        for k, v in obj:
            field_value = cls.__dataclass_fields__[k].default
            if field_value is MISSING or field_value != v:
                d[k] = v
        return d

    return factory


@dataclass
class A:
    a: str
    b: bool = True


a1 = A("1")
a2 = A("1", True)
a3 = A("1", False)
print(asdict(a1, dict_factory=asdict_factory(A)))
print(asdict(a2, dict_factory=asdict_factory(A)))
print(asdict(a3, dict_factory=asdict_factory(A)))

output:

{'a': '1'}
{'a': '1'}
{'a': '1', 'b': False}

Upvotes: 2

Related Questions