Reputation: 365
I have a dataclass with 5 attributes. When I give these attributes via a dictionary, it works well. But when the dictionary has more attributes than the class have, the class gives TypeError. I am trying to make that when there is extra values, the class wouldn't care them. How can I make that?
from dataclasses import dataclass
@dataclass
class Employee(object):
name: str
lastname: str
age: int or None
salary: int
department: str
def __new__(cls, name, lastname, age, salary, department):
return object.__new__(cls)
def __post_init__(self):
if type(self.age) == str:
self.age = int(self.age) or None
def __str__(self):
return f'{self.name}, {self.lastname}, {self.age}'
dic = {"name":"abdülmutallip",
"lastname":"uzunkavakağacıaltındauzanıroğlu",
"age":"24", "salary":2000, "department":"İK",
"city":"istanbul", "country":"tr", "adres":"yok", "phone":"0033333"}
a = Employee(**dic)
print(a)
The error is:
TypeError: __new__() got an unexpected keyword argument 'city'
I want the class works properly in this situation without any error. I don't want to add these extra attributes into the class.
Upvotes: 5
Views: 12867
Reputation: 1122172
If you want the dataclass to accept arbitrary extra keyword arguments then you either have to define your own __init__
method, or provide a custom __call__
method on a metaclass. If you define a custom __init__
method, the dataclass
decorator won't generate one for you; at this point there is no need to use __post_init__
any more either since you already are writing an __init__
method.
Side notes:
__new__
can't alter what arguments are passed to __init__
. The metaclass's __call__
will normally first call cls.__new__(<arguments>)
then call instance.__init__(<arguments>
on the instance
return value from __new__
, see the datamodel documentation.int or None
, that's an expression that just returns int
, it won't let you omit the age
parameter. Give the field a default value instead, or use a Union
type hint if None
is only used to indicate age=0 or a failed int()
conversion.age
at the end.age
is meant to be an optional field, then use typing.Optional
to properly mark the type of the age
field as optional. Optional[int]
is equivalent to Union[int, None]
; personally I prefer the latter in constructors when there is no default value set and omitting age
is not acceptable.isinstance()
to determine if an object is a string. Or just don't test, since int(self.age)
just returns self.age
unchanged if it already is set to an integer.or None
in the __post_init__
method if it is okay for an age set to 0
to be set to None
.age
is to be set to None
only if int(age)
fails, then you have to use try:...except
to handle the ValueError
or TypeError
exceptions that int()
can raise in that case, not or None
.Assuming that you meant for age
to be set to None
only if conversion fails:
from dataclasses import dataclass
from typing import Union
@dataclass
class Employee(object):
name: str
lastname: str
age: Union[int, None] # set to None if conversion fails
salary: int
department: str
def __init__(
self,
name: str,
lastname: str,
age: Union[int, None],
salary: int,
department: str,
*args: Any,
**kwargs: Any,
) -> None:
self.name = name
self.lastname = lastname
try:
self.age = int(age)
except (ValueError, TypeError):
# could not convert age to an integer
self.age = None
self.salary = salary
self.department = department
def __str__(self):
return f'{self.name}, {self.lastname}, {self.age}'
If you want to go the metaclass route, then you can create one that ignores all extra arguments for almost any class, by introspecting the __init__
or __new__
method call signature:
from inspect import signature, Parameter
class _ArgTrimmer:
def __init__(self):
self.new_args, self.new_kw = [], {}
self.dispatch = {
Parameter.POSITIONAL_ONLY: self.pos_only,
Parameter.KEYWORD_ONLY: self.kw_only,
Parameter.POSITIONAL_OR_KEYWORD: self.pos_or_kw,
Parameter.VAR_POSITIONAL: self.starargs,
Parameter.VAR_KEYWORD: self.starstarkwargs,
}
def pos_only(self, p, i, args, kwargs):
if i < len(args):
self.new_args.append(args[i])
def kw_only(self, p, i, args, kwargs):
if p.name in kwargs:
self.new_kw[p.name] = kwargs.pop(p.name)
def pos_or_kw(self, p, i, args, kwargs):
if i < len(args):
self.new_args.append(args[i])
# drop if also in kwargs, otherwise parameters collide
# if there's a VAR_KEYWORD parameter to capture it
kwargs.pop(p.name, None)
elif p.name in kwargs:
self.new_kw[p.name] = kwargs[p.name]
def starargs(self, p, i, args, kwargs):
self.new_args.extend(args[i:])
def starstarkwargs(self, p, i, args, kwargs):
self.new_kw.update(kwargs)
def trim(self, params, args, kwargs):
for i, p in enumerate(params.values()):
if i: # skip first (self or cls) arg of unbound function
self.dispatch[p.kind](p, i - 1, args, kwargs)
return self.new_args, self.new_kw
class IgnoreExtraArgsMeta(type):
def __call__(cls, *args, **kwargs):
if cls.__new__ is not object.__new__:
func = cls.__new__
else:
func = getattr(cls, '__init__', None)
if func is not None:
sig = signature(func)
args, kwargs = _ArgTrimmer().trim(sig.parameters, args, kwargs)
return super().__call__(*args, **kwargs)
This metaclass will work for any Python class, but if you were to subclass in a built-in type then the __new__
or __init__
methods may not be introspectable. Not the case here, but a caveat that you would need to know about if you were to use the above metaclass in other situations.
Then use the above as a metaclass
parameter on your dataclass:
from dataclasses import dataclass
from typing import Union
@dataclass
class Employee(metaclass=IgnoreExtraArgsMeta):
name: str
lastname: str
age: Union[int, None]
salary: int
department: str
def __post_init__(self):
try:
self.age = int(self.age)
except (ValueError, TypeError):
# could not convert age to an integer
self.age = None
def __str__(self):
return f'{self.name}, {self.lastname}, {self.age}'
The advantage of using a metaclass should be clear here; no need to repeat all the fields in the __init__
method.
Demo of the first approach:
>>> from dataclasses import dataclass
>>> from typing import Union
>>> @dataclass
... class Employee(object):
... name: str
... lastname: str
... age: Union[int, None] # set to None if conversion fails
... salary: int
... department: str
... def __init__(self,
... name: str,
... lastname: str,
... age: Union[int, None],
... salary: int,
... department: str,
... *args: Any,
... **kwargs: Any,
... ) -> None:
... self.name = name
... self.lastname = lastname
... try:
... self.age = int(age)
... except (ValueError, TypeError):
... # could not convert age to an integer
... self.age = None
... self.salary = salary
... self.department = department
... def __str__(self):
... return f'{self.name}, {self.lastname}, {self.age}'
...
>>> dic = {"name":"abdülmutallip",
... "lastname":"uzunkavakağacıaltındauzanıroğlu",
... "age":"24", "salary":2000, "department":"İK",
... "city":"istanbul", "country":"tr", "adres":"yok", "phone":"0033333"}
>>> a = Employee(**dic)
>>> a
Employee(name='abdülmutallip', lastname='uzunkavakağacıaltındauzanıroğlu', age=24, salary=2000, department='İK')
>>> print(a)
abdülmutallip, uzunkavakağacıaltındauzanıroğlu, 24
>>> a.age
24
>>> Employee(name="Eric", lastname="Idle", age="too old to tell", salary=123456, department="Silly Walks")
Employee(name='Eric', lastname='Idle', age=None, salary=123456, department='Silly Walks')
and of the second approach:
>>> @dataclass
... class Employee(metaclass=IgnoreExtraArgsMeta):
... name: str
... lastname: str
... age: Union[int, None]
... salary: int
... department: str
... def __post_init__(self):
... try:
... self.age = int(self.age)
... except (ValueError, TypeError):
... # could not convert age to an integer
... self.age = None
... def __str__(self):
... return f'{self.name}, {self.lastname}, {self.age}'
...
>>> a = Employee(**dic)
>>> print(a)
abdülmutallip, uzunkavakağacıaltındauzanıroğlu, 24
>>> a
Employee(name='abdülmutallip', lastname='uzunkavakağacıaltındauzanıroğlu', age=24, salary=2000, department='İK')
>>> Employee("Michael", "Palin", "annoyed you asked", salary=42, department="Complaints", notes="Civil servants should never be asked for their salary, either")
Employee(name='Michael', lastname='Palin', age=None, salary=42, department='Complaints')
If age
is meant to be optional (so, have a default value), then move it to the end of the fields, give it Optional[int]
as the type, and assign None
to it. You'll have to do the same in the __init__
method you specify your own:
from typing import Optional
@dataclass
class Employee(object):
name: str
lastname: str
age: Optional[int] = None
salary: int
department: str
def __init__(
self,
name: str,
lastname: str,
salary: int,
department: str,
age: Optional[int] = None,
*args: Any,
**kwargs: Any,
) -> None:
# ...
Upvotes: 7