Reputation: 917
I would like to know how custom attributes of numpy arrays can be propagated, even when the array passes through functions like np.fromfunction
.
For example, my class ExampleTensor
defines an attribute attr
that is set to 1 on default.
import numpy as np
class ExampleTensor(np.ndarray):
def __new__(cls, input_array):
return np.asarray(input_array).view(cls)
def __array_finalize__(self, obj) -> None:
if obj is None: return
# This attribute should be maintained!
self.attr = getattr(obj, 'attr', 1)
Slicing and basic operations between ExampleTensor
instances will maintain the attributes, but using other numpy functions will not (probably because they create regular numpy arrays instead of ExampleTensors). My question: Is there a solution that persists the custom attributes when a regular numpy array is constructed out of subclassed
numpy array instances?
Example to reproduce problem:
ex1 = ExampleTensor([[3, 4],[5, 6]])
ex1.attr = "some val"
print(ex1[0].attr) # correctly outputs "some val"
print((ex1+ex1).attr) # correctly outputs "some val"
np.sum([ex1, ex1], axis=0).attr # Attribute Error: 'numpy.ndarray' object has no attribute 'attr'
Upvotes: 23
Views: 6092
Reputation: 805
Here is an attempt that works for operators that are not arrays and even when our subclass is specified as output of a numpy ufunc (explanations in the comments):
import numpy as np
class ArraySubclass(np.ndarray):
'''Subclass of ndarray MUST be initialized with a numpy array as first argument.
'''
def __new__(cls, input_array, a=None, b=1):
obj = np.asarray(input_array).view(cls)
obj.a = a
obj.b = b
return obj
def __array_finalize__(self, obj):
if obj is None: # __new__ handles instantiation
return
'''we essentially need to set all our attributes that are set in __new__ here again (including their default values).
Otherwise numpy's view-casting and new-from-template mechanisms would break our class.
'''
self.a = getattr(obj, 'a', None)
self.b = getattr(obj, 'b', 1)
def __array_ufunc__(self, ufunc, method, *inputs, **kwargs): # this method is called whenever you use a ufunc
'''this implementation of __array_ufunc__ makes sure that all custom attributes are maintained when a ufunc operation is performed on our class.'''
# convert inputs and outputs of class ArraySubclass to np.ndarray to prevent infinite recursion
args = ((i.view(np.ndarray) if isinstance(i, ArraySubclass) else i) for i in inputs)
outputs = kwargs.pop('out', None)
if outputs:
kwargs['out'] = tuple((o.view(np.ndarray) if isinstance(o, ArraySubclass) else o) for o in outputs)
else:
outputs = (None,) * ufunc.nout
# call numpys implementation of __array_ufunc__
results = super().__array_ufunc__(ufunc, method, *args, **kwargs) # pylint: disable=no-member
if results is NotImplemented:
return NotImplemented
if method == 'at':
# method == 'at' means that the operation is performed in-place. Therefore, we are done.
return
# now we need to make sure that outputs that where specified with the 'out' argument are handled corectly:
if ufunc.nout == 1:
results = (results,)
results = tuple((self._copy_attrs_to(result) if output is None else output)
for result, output in zip(results, outputs))
return results[0] if len(results) == 1 else results
def _copy_attrs_to(self, target):
'''copies all attributes of self to the target object. target must be a (subclass of) ndarray'''
target = target.view(ArraySubclass)
try:
target.__dict__.update(self.__dict__)
except AttributeError:
pass
return target
and here are the corresponding unittests:
import unittest
class TestArraySubclass(unittest.TestCase):
def setUp(self):
self.shape = (10, 2, 5)
self.subclass = ArraySubclass(np.zeros(self.shape))
def test_instantiation(self):
self.assertIsInstance(self.subclass, np.ndarray)
self.assertIs(self.subclass.a, None)
self.assertEqual(self.subclass.b, 1)
self.assertEqual(self.subclass.shape, self.shape)
self.assertTrue(np.array_equal(self.subclass, np.zeros(self.shape)))
sub2 = micdata.arrayasubclass.ArraySubclass(np.zeros(self.shape), a=2)
self.assertEqual(sub2.a, 2)
def test_view_casting(self):
self.assertIsInstance(np.zeros(self.shape).view(ArraySubclass),ArraySubclass)
def test_new_from_template(self):
self.subclass.a = 5
bla = self.subclass[3, :]
self.assertIsInstance(bla, ArraySubclass)
self.assertIs(bla.a, 5)
self.assertEqual(bla.b, 1)
def test_np_min(self):
self.assertEqual(np.min(self.subclass), 0)
def test_ufuncs(self):
self.subclass.b = 2
self.subclass += 2
self.assertTrue(np.all(self.subclass == 2))
self.subclass = self.subclass + np.ones(self.shape)
self.assertTrue(np.all(self.subclass == 3))
np.multiply.at(self.subclass, slice(0, 2), 2)
self.assertTrue(np.all(self.subclass[:2] == 6))
self.assertTrue(np.all(self.subclass[2:] == 3))
self.assertEqual(self.subclass.b, 2)
def test_output(self):
self.subclass.a = 3
bla = np.ones(self.shape)
bla *= 2
np.multiply(bla, bla, out=self.subclass)
self.assertTrue(np.all(self.subclass == 5))
self.assertEqual(self.subclass.a, 3)
P.s. tempname123 got it almost right. However, his answer fails for operators that are not arrays and when his class is specified as output of a ufunc:
>>> ExampleTensor += 1
AttributeError: 'int' object has no attribute 'view'
>>> np.multiply(np.ones((5)), np.ones((5)), out=ExampleTensor)
RecursionError: maximum recursion depth exceeded in comparison
Upvotes: 3
Reputation: 369
import numpy as np
class ExampleTensor(np.ndarray):
def __new__(cls, input_array):
return np.asarray(input_array).view(cls)
def __array_finalize__(self, obj) -> None:
if obj is None: return
# This attribute should be maintained!
default_attributes = {"attr": 1}
self.__dict__.update(default_attributes) # another way to set attributes
Implement the array_ufunc method like this
def __array_ufunc__(self, ufunc, method, *inputs, **kwargs): # this method is called whenever you use a ufunc
f = {
"reduce": ufunc.reduce,
"accumulate": ufunc.accumulate,
"reduceat": ufunc.reduceat,
"outer": ufunc.outer,
"at": ufunc.at,
"__call__": ufunc,
}
output = ExampleTensor(f[method](*(i.view(np.ndarray) for i in inputs), **kwargs)) # convert the inputs to np.ndarray to prevent recursion, call the function, then cast it back as ExampleTensor
output.__dict__ = self.__dict__ # carry forward attributes
return output
Test
x = ExampleTensor(np.array([1,2,3]))
x.attr = 2
y0 = np.add(x, x)
print(y0, y0.attr)
y1 = np.add.outer(x, x)
print(y1, y1.attr) # works even if called with method
[2 4 6] 2
[[2 3 4]
[3 4 5]
[4 5 6]] 2
Explanation in the comments.
Upvotes: 11
Reputation: 1484
Which value should "propagate" if ex1.attr != ex2.attr
for np.sum([ex1, ex2], axis=0).attr
?
Please note that this question is more fundamental than it might appear first: how at all the large variety of numpy functions could find out your intention by themselves? You probably cannot avoid writing an overloaded version for each of the "attr-aware" functions like this:
def sum(a, **kwargs):
sa=np.sum(a, **kwargs)
if isinstance(a[0],ExampleTensor): # or if hasattr(a[0],'attr')
sa.attr=a[0].attr
return sa
I am sure this is not general enough to handle any np.sum() input but should work for your example.
Upvotes: 0
Reputation: 4326
I think your example is incorrect:
>>> type(ex1)
<class '__main__.ExampleTensor'>
but
>>> type([ex1, ex1])
<class 'numpy.ndarray'>
for which your overloaded __new__
and __array_finalize__
are not called as you are in fact building an array and not your subclass. However they are called if you do:
>>> ExampleTensor([ex1, ex1])
which sets attr = 1
as you haven't defined how to propagate the attribute when building an ExampleTensor
out of a list of ExampleTensor
. You would need to define this behaviour in your subclass, by overloading the relevant operations. As suggested in the comments above, it's worth taking a look at the code for np.matrix for inspiration.
Upvotes: 0