Luke
Luke

Reputation: 43

Return index from closest number from sorted arrays in Python

I have two sorted arrrays with timestamps (float values) and I want to find the closest value for each value and then get the index to this value.

This is my code so far but I was looking for something faster. I've checked also other solutions but none were faster so far....

t = array 01 with all timestamps

    for index, time in enumerate(timestamps_array02):   
        #find closest timestamp
        i = bisect_left(t, float (time))
        value = min(t[max(0, i-1): i+2], key=lambda t: abs(float(time) - float(t)))
        idx = np.where(t==value) #Getting the index
        x.append(gps[idx[0][0]])

Upvotes: 0

Views: 116

Answers (1)

Paul Panzer
Paul Panzer

Reputation: 53029

np.searchsorted is a vectorized version of bisection. In my limited testing it gives the same result as your code but quite a bit faster:

m, n = 100, 100
OP                    1.39766530 ms
pp0                   0.00914090 ms
m, n = 2000, 100
OP                    2.14626830 ms
pp0                   0.01182020 ms
m, n = 100000, 10000
OP                  798.53475470 ms
pp0                   0.48502260 ms

Code:

import numpy as np
from bisect import bisect_left
from heapq import merge
from timeit import timeit
import types

def mock_data(m, n):
    t0 = np.cumsum(np.random.randint(1, 3*np.maximum(1, n//m), (m,)))
    t1 = np.cumsum(np.random.randint(1, 3*np.maximum(1, m//n), (n,)))
    return t0, t1

def f_OP(t, timestamps_array02):
    x = []
    for index, time in enumerate(timestamps_array02):   
        #find closest timestamp
        i = bisect_left(t, float (time))
        value = min(t[max(0, i-1): i+2], key=lambda t: abs(float(time) - float(t)))
        idx = np.where(t==value) #Getting the index
        x.append(idx[0][0])
    return x

def f_pp0(t0, t1):
    idx = t0.searchsorted(t1)
    over = idx.searchsorted(len(t0)-1)
    under = idx.searchsorted(0, 'right')
    idx[:under] = 1
    idx[over:] = len(t0)-1
    idx -= ((t1-t0[idx-1]) <= (t0[idx]-t1))
    return idx

for m, n in[(100, 100), (2000, 100), (100000, 10000)]:
    data = mock_data(m, n)
    ref = f_OP(*data)
    print(f'm, n = {m}, {n}')
    for name, func in list(globals().items()):
        if not name.startswith('f_') or not isinstance(func, types.FunctionType):
            continue
        try:
            assert np.all(ref==func(*data))
            print("{:16s}{:16.8f} ms".format(name[2:], timeit(
                'f(*data)', globals={'f':func, 'data':data}, number=10)*100))
        except:
            print("{:16s} apparently failed".format(name[2:]))

Upvotes: 1

Related Questions