Reputation: 21470
I was wondering if there was a syntactically simple way of checking if each element in a numpy array lies between two numbers.
In other words, just as numpy.array([1,2,3,4,5]) < 5
will return array([True, True, True, True, False])
, I was wondering if it was possible to do something akin to this:
1 < numpy.array([1,2,3,4,5]) < 5
... to obtain ...
array([False, True, True, True, False])
I understand that I can obtain this through logical chaining of boolean tests, but I'm working through some rather complex code and I was looking for a syntactically clean solution.
Any tips?
Upvotes: 83
Views: 126537
Reputation: 69092
One solution would be:
import numpy as np
a = np.array([1, 2, 3, 4, 5])
(a > 1) & (a < 5) # 1 < element < 5?
# array([False, True, True, True, False])
Upvotes: 115
Reputation: 26956
It is interesting to compare the NumPy-based approach against a Numba-accelerated loop:
import numpy as np
import numba as nb
def between(arr, a, b):
return (arr > a) & (arr < b)
@nb.njit(fastmath=True)
def between_nb(arr, a, b):
shape = arr.shape
arr = arr.ravel()
n = arr.size
result = np.empty_like(arr, dtype=np.bool_)
for i in range(n):
result[i] = arr[i] > a or arr[i] < b
return result.reshape(shape)
The benchmarks computed and plotted with:
import pandas as pd
import matplotlib.pyplot as plt
def benchmark(
funcs,
args=None,
kws=None,
ii=range(4, 24),
m=2 ** 15,
is_equal=np.allclose,
seed=0,
unit="ms",
verbose=True
):
labels = [func.__name__ for func in funcs]
units = {"s": 0, "ms": 3, "µs": 6, "ns": 9}
args = tuple(args) if args else ()
kws = dict(kws) if kws else {}
assert unit in units
np.random.seed(seed)
timings = {}
for i in ii:
n = 2 ** i
k = 1 + m // n
if verbose:
print(f"i={i}, n={n}, m={m}, k={k}")
arrs = np.random.random((k, n))
base = np.array([funcs[0](arr, *args, **kws) for arr in arrs])
timings[n] = []
for func in funcs:
res = np.array([func(arr, *args, **kws) for arr in arrs])
is_good = is_equal(base, res)
timed = %timeit -n 8 -r 8 -q -o [func(arr, *args, **kws) for arr in arrs]
timing = timed.best / k
timings[n].append(timing if is_good else None)
if verbose:
print(
f"{func.__name__:>24}"
f" {is_good!s:5}"
f" {timing * (10 ** units[unit]):10.3f} {unit}"
f" {timings[n][0] / timing:5.1f}x")
return timings, labels
def plot(timings, labels, title=None, xlabel="Input Size / #", unit="ms"):
n_rows = 1
n_cols = 3
fig, axs = plt.subplots(n_rows, n_cols, figsize=(8 * n_cols, 6 * n_rows), squeeze=False)
units = {"s": 0, "ms": 3, "µs": 6, "ns": 9}
df = pd.DataFrame(data=timings, index=labels).transpose()
base = df[[labels[0]]].to_numpy()
(df * 10 ** units[unit]).plot(marker="o", xlabel=xlabel, ylabel=f"Best timing / {unit}", ax=axs[0, 0])
(df / base * 100).plot(marker='o', xlabel=xlabel, ylabel='Relative speed /labels %', logx=True, ax=axs[0, 1])
(base / df).plot(marker='o', xlabel=xlabel, ylabel='Speed Gain / x', ax=axs[0, 2])
if title:
fig.suptitle(title)
fig.patch.set_facecolor('white')
funcs = between, between_nb
timings, labels = benchmark(funcs, args=(0.25, 0.75), unit="µs", verbose=False)
plot(timings, labels, unit="µs")
indicate that (under my testing conditions):
Upvotes: 1
Reputation: 1411
In multi-dimensional arrays you could use the np.any()
option suggested or comparison operators, while using &
and and
will raise an error.
import numpy as np
arr = np.array([[1,5,1],
[0,1,0],
[0,0,0],
[2,2,2]])
Now use ==
if you want to check if the array values are inside a range, i.e A < arr < B, or !=
if you want to check if the array values are outside a range, i.e arr < A and arr > B :
(arr<1) != (arr>3)
> array([[False, True, False],
[ True, False, True],
[ True, True, True],
[False, False, False]])
(arr>1) == (arr<4)
> array([[False, False, False],
[False, False, False],
[False, False, False],
[ True, True, True]])
Upvotes: 1
Reputation: 742
You can also center the matrix and use the distance to 0
upper_limit = 5
lower_limit = 1
a = np.array([1,2,3,4,5])
your_mask = np.abs(a- 0.5*(upper_limit+lower_limit))<0.5*(upper_limit-lower_limit)
One thing to keep in mind is that the comparison will be symmetric on both sides, so it can do 1<x<5
or 1<=x<=5
, but not 1<=x<5
Upvotes: 1
Reputation: 4601
Another would be to use numpy.any
, Here is an example
import numpy as np
a = np.array([1,2,3,4,5])
np.any((a < 1)|(a > 5 ))
Upvotes: 11