Reputation: 333
In numpy, is there a way to zero pad entries if I'm slicing past the end of the array, such that I get something that is the size of the desired slice?
For example,
>>> x = np.ones((3,3,))
>>> x
array([[ 1., 1., 1.],
[ 1., 1., 1.],
[ 1., 1., 1.]])
>>> x[1:4, 1:4] # would behave as x[1:3, 1:3] by default
array([[ 1., 1., 0.],
[ 1., 1., 0.],
[ 0., 0., 0.]])
>>> x[-1:2, -1:2]
array([[ 0., 0., 0.],
[ 0., 1., 1.],
[ 0., 1., 1.]])
Visually, I'd like the out-of-bounds areas to be zero padded:
I'm dealing with images and would like to zero pad to signify moving off the image for my application.
My current plan is to use np.pad to make the entire array larger prior to slicing, but indexing seems to be a bit tricky. Is there a potentially easier way?
Upvotes: 23
Views: 7397
Reputation: 3721
This problem has a ton of edge-cases. My solution for a fairly restricted problem-space: Regular slices (i.e. slice
objects, no masks etc.), slice.start <= slice.stop and slice.step == 1
per dimension, ideally n-dimensional.
Prerequisites:
from typing import Optional, NamedTuple
import numpy as np
from numpy.typing import ArrayLike
class _Intv(NamedTuple):
length: int
buff: slice
data: slice
I want to slice from data into a buffer. A buff
array shall be created filled with padding values before data is copied into it from a data
array. For this, correct indices/slices must be computed first for each array for every dimension. The following helper does this with data.shape[n] == shape_n
. Note that it only computes intervals and does not yet interact with the data:
def _intervals_1d(intv: slice, shape_n: int) -> _Intv:
assert intv.step in (None, 1) # works only for steps of length 1
if intv.start is None:
intv = slice(0, intv.stop)
if intv.stop is None:
intv = slice(intv.start, shape_n)
assert intv.start <= intv.stop # works only if slice's start <= stop
length = intv.stop - intv.start
if intv.start >= 0: # no padding at start
buff_start = 0
data_start = intv.start
else: # padding at start
buff_start = -intv.start
data_start = 0
if intv.stop <= shape_n: # no padding at stop
buff_stop = length
data_stop = intv.stop
else: # padding at stop
buff_stop = length - intv.stop + shape_n
data_stop = shape_n
return _Intv(length, slice(buff_start, buff_stop), slice(data_start, data_stop))
It can be applied to the 1D case:
def paddedslice_1d(data: ArrayLike, intv: slice, fill_value: Optional[float] = np.nan) -> ArrayLike:
assert data.ndim == 1
intv = _intervals_1d(intv, data.shape[0])
buff = np.full((intv.length,), fill_value = fill_value, dtype = data.dtype)
buff[intv.buff] = data[intv.data]
return buff
The following tests are passing:
data_1d = np.array([10, 11, 12, 13])
test = paddedslice_1d(data_1d, intv = slice(None), fill_value = -99)
assert np.all(test == data_1d)
test = paddedslice_1d(data_1d, intv = slice(1, 3), fill_value = -99)
assert np.all(test == np.array([11, 12]))
test = paddedslice_1d(data_1d, intv = slice(-2, 2), fill_value = -99)
assert np.all(test == np.array([-99, -99, 10, 11]))
test = paddedslice_1d(data_1d, intv = slice(2, 6), fill_value = -99)
assert np.all(test == np.array([12, 13, -99, -99]))
test = paddedslice_1d(data_1d, intv = slice(-2, 6), fill_value = -99)
assert np.all(test == np.array([-99, -99, 10, 11, 12, 13, -99, -99]))
Based on the initial helper function, the solution can be generalized to n dimensions:
def paddedslice_nd(data: ArrayLike, *intvs: slice, fill_value: Optional[float] = np.nan) -> ArrayLike:
assert data.ndim == len(intvs)
intvs = [_intervals_1d(intv, shape_n) for intv, shape_n in zip(intvs, data.shape)]
buff = np.full([intv.length for intv in intvs], fill_value = fill_value, dtype = data.dtype)
buff[tuple(intv.buff for intv in intvs)] = data[tuple(intv.data for intv in intvs)]
return buff
The following tests are passing:
data_2d = np.arange(11, 20).reshape(3, 3)
test = paddedslice_nd(data_2d, slice(None), slice(None), fill_value = -99)
assert np.all(test == data_2d)
test = paddedslice_nd(data_2d, slice(-1, None), slice(-1, None), fill_value = -99)
assert np.all(test == np.array([[-99, -99, -99, -99], [-99, 11, 12, 13], [-99, 14, 15, 16], [-99, 17, 18, 19]]))
test = paddedslice_nd(data_2d, slice(-1, 2), slice(-1, 2), fill_value = -99)
assert np.all(test == np.array([[-99, -99, -99], [-99, 11, 12], [-99, 14, 15]]))
More complicated edge-cases can be addressed relatively trivially by altering the helper function _intervals_1d
, which is left as an exercise to the reader ;)
Upvotes: 0
Reputation: 1171
For the simplest case of rank 2 or 3 images, here is an example of how to implement zero-padded "slicing" with out-of-bounds indices:
def padded_slice(img, sl):
output_shape = np.asarray(img.shape)
output_shape[0] = sl[1] - sl[0]
output_shape[1] = sl[3] - sl[2]
src = [max(sl[0], 0),
min(sl[1], img.shape[0]),
max(sl[2], 0),
min(sl[3], img.shape[1])]
dst = [src[0] - sl[0], src[1] - sl[0],
src[2] - sl[2], src[3] - sl[2]]
output = np.zeros(output_shape, dtype=img.dtype)
output[dst[0]:dst[1], dst[2]:dst[3]] = img[src[0]:src[1], src[2]:src[3]]
return output
For example, call this function with padded_slice(img, [-10, 150, -10, 150])
on a 100x100 image and it will return a 160x160 zero-padded image.
Upvotes: 1
Reputation: 5918
import numpy as np
def fill_crop(img, pos, crop):
'''
Fills `crop` with values from `img` at `pos`,
while accounting for the crop being off the edge of `img`.
*Note:* negative values in `pos` are interpreted as-is, not as "from the end".
'''
img_shape, pos, crop_shape = np.array(img.shape), np.array(pos), np.array(crop.shape),
end = pos+crop_shape
# Calculate crop slice positions
crop_low = np.clip(0 - pos, a_min=0, a_max=crop_shape)
crop_high = crop_shape - np.clip(end-img_shape, a_min=0, a_max=crop_shape)
crop_slices = (slice(low, high) for low, high in zip(crop_low, crop_high))
# Calculate img slice positions
pos = np.clip(pos, a_min=0, a_max=img_shape)
end = np.clip(end, a_min=0, a_max=img_shape)
img_slices = (slice(low, high) for low, high in zip(pos, end))
crop[tuple(crop_slices)] = img[tuple(img_slices)]
If memory is a concern, then copying the image into a padded version might not be good. This also works well for higher dimensional inputs, and it's clear how to return indices/slices if you needed those.
To indicate the padded value, we can instead create the memory for the crop ahead of time with np.zeros
/np.full
, then fill in the part that we need. The difficulty then isn't working out where to copy from, but instead, where to paste inside the crop.
Let's look at a 1D case:
If you think about it a little bit, you can see that:
crop_low
is as far above 0
as pos
is below 0
, but if pos >= 0
, then crop_low == 0
crop_high
is as far below crop.shape
as end
is above img.shape
, but if end <= img.shape
, then crop_high == crop.shape
If we put this into normal python code, it would look like this:
crop_low = max(-pos, 0)
crop_high = crop.shape - max(end-img.shape, 0)
The rest of the code above is just for indexing.
# Examples in 1 dimension
img = np.arange(10, 20)
# Normal
pos = np.array([1,])
crop = np.full([5,], 0)
fill_crop(img, pos, crop)
assert crop.tolist() == [11, 12, 13, 14, 15]
# Off end
pos = np.array([8,])
crop = np.full([5,], 0)
fill_crop(img, pos, crop)
assert crop.tolist() == [18, 19, 0, 0, 0]
# Off start
pos = np.array([-2,])
crop = np.full([5,], 0)
fill_crop(img, pos, crop)
assert crop.tolist() == [ 0, 0, 10, 11, 12]
# Example in 2 dimensions (y,x)
img = np.arange(10, 10+10*10)\
.reshape([10, 10])
# Off Top right
pos = np.array([-2, 8])
crop = np.full([5, 5], 0)
fill_crop(img, pos, crop)
assert np.all(crop[:2] == 0) # That is, the top two rows are 0s
assert np.all(crop[:, 3:] == 0) # That is, the right 3 rows are 0s
assert np.all(crop[2:, :2] == img[:3, 8:])
# That is, the rows 2-5 and columns 0-1 in the crop
# are the same as the top 3 rows and columns 8 and 9 (the last two columns)
And there we have it. The over-engineered answer to the original question.
Upvotes: 3
Reputation: 2875
In case of 1D array I did this, can be useful if someone fall here...
def getPaddedSlice(npArray, pos, lenSegment, center = False):
lenNpArray = len(npArray)
if center:
if lenSegment % 2 == 0:
startIndex = int(pos - math.floor(lenSegment / 2.0)) + 1
lastIndex = int(pos + math.ceil(lenSegment / 2.0)) + 1
else :
startIndex = int(pos - math.floor(lenSegment / 2.0))
lastIndex = int(pos + math.ceil(lenSegment / 2.0)) + 1
else:
startIndex = pos
lastIndex = startIndex + lenSegment
if startIndex < 0:
padded_slice = npArray[0: lastIndex]
padded_slice = np.concatenate((np.zeros(abs(startIndex)), padded_slice))
else:
if center :
padded_slice = npArray[startIndex: lastIndex]
else:
padded_slice = npArray[pos: lastIndex]
if lastIndex > len(npArray):
if center :
padded_slice = npArray[startIndex: pos + lenSegment]
padded_slice = np.concatenate((padded_slice, np.zeros(lastIndex - len(a))))
else :
padded_slice = npArray[pos: pos + lenSegment]
padded_slice = np.concatenate((padded_slice, np.zeros(lastIndex - len(a))))
return padded_slice
Usage
a = np.asarray([2,2,3,1,7,6,5,4])
for i in range(len(a)):
b = getPaddedSlice(a, i, lenSegment, True)
print b
Display
[0 2 2 3]
[2 2 3 1]
[2 3 1 7]
[3 1 7 6]
[1 7 6 5]
[7 6 5 4]
[6 5 4 0]
[5 4 0 0]
Upvotes: 0
Reputation: 152667
As far as I know there is no numpy solution (nor in any package I know) for such a problem. You could do it yourself but it will be a really, really complicated one even if you only want basic slicing. I would suggest you manually np.pad
your array and simply offset your start/stop/step before you actually slice it.
However if all you need to support are integers and slices without step I have some "working code" for this:
import numpy as np
class FunArray(np.ndarray):
def __getitem__(self, item):
all_in_slices = []
pad = []
for dim in range(self.ndim):
# If the slice has no length then it's a single argument.
# If it's just an integer then we just return, this is
# needed for the representation to work properly
# If it's not then create a list containing None-slices
# for dim>=1 and continue down the loop
try:
len(item)
except TypeError:
if isinstance(item, int):
return super().__getitem__(item)
newitem = [slice(None)]*self.ndim
newitem[0] = item
item = newitem
# We're out of items, just append noop slices
if dim >= len(item):
all_in_slices.append(slice(0, self.shape[dim]))
pad.append((0, 0))
# We're dealing with an integer (no padding even if it's
# out of bounds)
if isinstance(item[dim], int):
all_in_slices.append(slice(item[dim], item[dim]+1))
pad.append((0, 0))
# Dealing with a slice, here it get's complicated, we need
# to correctly deal with None start/stop as well as with
# out-of-bound values and correct padding
elif isinstance(item[dim], slice):
# Placeholders for values
start, stop = 0, self.shape[dim]
this_pad = [0, 0]
if item[dim].start is None:
start = 0
else:
if item[dim].start < 0:
this_pad[0] = -item[dim].start
start = 0
else:
start = item[dim].start
if item[dim].stop is None:
stop = self.shape[dim]
else:
if item[dim].stop > self.shape[dim]:
this_pad[1] = item[dim].stop - self.shape[dim]
stop = self.shape[dim]
else:
stop = item[dim].stop
all_in_slices.append(slice(start, stop))
pad.append(tuple(this_pad))
# Let numpy deal with slicing
ret = super().__getitem__(tuple(all_in_slices))
# and padding
ret = np.pad(ret, tuple(pad), mode='constant', constant_values=0)
return ret
This can be used as follows:
>>> x = np.arange(9).reshape(3, 3)
>>> x = x.view(FunArray)
>>> x[0:2]
array([[0, 1, 2],
[3, 4, 5]])
>>> x[-3:2]
array([[0, 0, 0],
[0, 0, 0],
[0, 0, 0],
[0, 1, 2],
[3, 4, 5]])
>>> x[-3:2, 2]
array([[0],
[0],
[0],
[2],
[5]])
>>> x[-1:4, -1:4]
array([[0, 0, 0, 0, 0],
[0, 0, 1, 2, 0],
[0, 3, 4, 5, 0],
[0, 6, 7, 8, 0],
[0, 0, 0, 0, 0]])
Note that this may be contain Bugs and "not cleanly coded" parts, I've never used this except in trivial cases.
Upvotes: 6
Reputation: 12913
This class can handle your first test (x[1:4, 1:4]
) and can be modified to handle your other test (i.e. appending zeros to the start) if you so desire.
class CustomArray():
def __init__(self, numpy_array):
self._array = numpy_array
def __getitem__(self, val):
# Get the shape you wish to return
required_shape = []
for i in range(2):
start = val[i].start
if not start:
start = 0
required_shape.append(val[i].stop - start)
get = self._array[val]
# Check first dimension
while get.shape[0] < required_shape[0]:
get = np.concatenate((get, np.zeros((1, get.shape[1]))))
# Check second dimension
get = get.T
while get.shape[0] < required_shape[1]:
get = np.concatenate((get, np.zeros((1, get.shape[1]))))
get = get.T
return get
Here is an example of it's usage:
a = CustomArray(np.ones((3, 3)))
print(a[:2, :2])
[[ 1. 1.]
[ 1. 1.]]
print(a[:4, 1:6])
[[ 1. 1. 0. 0. 0.]
[ 1. 1. 0. 0. 0.]
[ 1. 1. 0. 0. 0.]
[ 0. 0. 0. 0. 0.]]
# The actual numpy array is stored in the _array attribute
actual_numpy_array = a._array
Upvotes: 4