Reputation: 294546
consider the array a
a = np.array([
[5, 4],
[4, 5],
[2, 2],
[6, 1],
[3, 7]
])
I can find where the minimums are with
a.argmin(0)
array([2, 3])
How do I find the maximum for column 0 of values before index 2. And the same for column 1 and index 3. And more importantly, where they are?
If I do
a.max(0)
array([6, 7])
but I need
# max values
array([5, 5])
# argmax before mins
array([0, 1])
Upvotes: 4
Views: 170
Reputation: 294546
I knew I could answer this with a vectorized version of a cumulative argmax
@ajcr answered that question for me here
def ajcr(a):
m = np.maximum.accumulate(a)
x = np.repeat(np.arange(a.shape[0])[:, None], a.shape[1], axis=1)
x[1:] *= m[:-1] < m[1:]
np.maximum.accumulate(x, axis=0, out=x)
# at this point x houses the cumulative argmax
# we slice that with a's argmin
return x[a.argmin(0), np.arange(a.shape[1])]
def divakar(a):
b = np.where(a.argmin(0) >= np.arange(a.shape[0])[:,None],a,np.nan)
return np.nanargmax(b,axis=0)
comparison
a = np.random.randn(10000, 1000)
(ajcr(a) == divakar(a)).all()
True
timing
import timeit
results = pd.DataFrame(
[], [10, 100, 1000, 10000],
pd.MultiIndex.from_product(
[['divakar', 'ajcr'], [10, 100, 1000]]))
for i, j in results.stack(dropna=False).index:
a = np.random.randn(i, j)
results.loc[i, ('divakar', j)] = \
timeit.timeit(
'divakar(a)',
setup='from __main__ import divakar, a',
number=10)
results.loc[i, ('ajcr', j)] = \
timeit.timeit(
'ajcr(a)',
setup='from __main__ import ajcr, a',
number=10)
import matplotlib.pyplot as plt
fig, axes = plt.subplots(2, 2, figsize=(10, 5))
for i, (name, group) in enumerate(results.stack().groupby(level=0)):
r, c = i // 2, i % 2
group.xs(name).plot.barh(ax=axes[r, c], title=name)
fig.tight_layout()
results
Upvotes: 2
Reputation: 221754
Here's one approach using broadcasting
-
b = np.where(a.argmin(0) >= np.arange(a.shape[0])[:,None],a,np.nan)
idx = np.nanargmax(b,axis=0)
out = a[idx,np.arange(a.shape[1])]
Sample run -
In [38]: a
Out[38]:
array([[5, 4],
[4, 5],
[2, 2],
[6, 1],
[3, 7]])
In [39]: b = np.where(a.argmin(0) >= np.arange(a.shape[0])[:,None],a,np.nan)
...: idx = np.nanargmax(b,axis=0)
...: out = a[idx,np.arange(a.shape[1])]
...:
In [40]: idx
Out[40]: array([0, 1])
In [41]: out
Out[41]: array([5, 5])
Alternatively, if a
has positive numbers only, we could get idx
simply with -
mask = a.argmin(0) >= np.arange(a.shape[0])[:,None]
idx = (a*mask).argmax(0)
Upvotes: 4