MBV
MBV

Reputation: 630

Numba function works randomly with same given input, is this a bug?

I wrote a function called not_test in Numba to take a list of 2d arrays that are a drainage network, then I get an imaginary water drop routing from the figure below. The point of the code is to get the path of the drop for every possible drainage stream.

enter image description here

Results

This area the results I am getting, this are the routing stream a water drop would take if it falls in the start of the streams, eg. falls in point 1 then, routing stream [16, 15, 2, 1].

[[16, 3], 
[16, 15, 2, 0], 
[16, 15, 2, 1], 
[16, 15, 14, 13], 
[16, 15, 14, 12, 4], 
[16, 15, 14, 12, 11, 6], 
[16, 15, 14, 12, 11, 10, 9], 
[16, 15, 14, 12, 11, 10, 8, 5], 
[16, 15, 14, 12, 11, 10, 8, 7]]

Problem

The code works on normal python and it works as well when it is compile with Numba. The problem comes if you ran several times the code compile with Numba, some times this gives an error and sometimes it does work.

I have not been able to debug the code in Numba and it gives no error in python mode. And it does not show any particular error in the python console or pycharm run, it just stops.

The code that is commented is sure not to be part of the issue I am experiencing.

I would really like to been able to use Numba on this function because it has a 653X speed up, and this function will ran around 5k times, this would mean:

with Numba:  0.0015003681182861328s per run -> 7.5s total time
with Python: 0.9321613311767578s per run -> 1.3 hours total time

Using Numba is a BIG help in this particular issue, so I would appreciate any help, because normal python would not work for the application usage.

'Error example'

in Pycharm error:

        Now
        0.0
        0.2295396327972412
        [16]
        [ 3 15]
        [ 3 15]
        [ 2 14]
        [0 1]
        [12 13]
        
        Process finished with exit code -1073740940 (0xC0000374)
    
    
    in Pycharm no error:

    Now
    0.0
    0.2430422306060791
    [16]
    [ 3 15]
    [ 3 15]
    [ 2 14]
    [0 1]
    [12 13]
    [ 4 11]
    [ 4 11]
    [ 4 11]
    [ 6 10]
    [ 6 10]
    [8 9]
    [5 7]
    [[16, 3], [16, 15, 2, 0], [16, 15, 2, 1], [16, 15, 14, 13], [16, 15, 14, 12, 4], [16, 15, 14, 12, 11, 6], [16, 15, 14, 12, 11, 10, 9], [16, 15, 14, 12, 11, 10, 8, 5], [16, 15, 14, 12, 11, 10, 8, 7]]
    0.0016527080535889
    
    Process finished with exit code 0

Code

link to file: https://drive.google.com/file/d/1guAe1C2sKZyy2U2_qXAhMA1v46PfeKnN/view

import numpy as np
#from pypiper import RUT_5
import numba   

def convert2(x, dtype=np.float64):
    try:
        # Try and convert x to a Numpy array. If this succeeds
        # then we have reached the end of the nesting-depth.
        y = np.asarray(x, dtype=dtype)
    except:
        # If the conversion to a Numpy array fails, then it can
        # be because not all elements of x can be converted to
        # the given dtype. There is currently no way to distinguish
        # if this is because x is a nested list, or just a list
        # of simple elements with incompatible types.

        # Recursively call this function on all elements of x.
        y = [convert2(x_, dtype=dtype) for x_ in x]

        # Convert Python list to Numba list.
        y = numba.typed.List(y)

    return y
  

@numba.njit('(ListType(float64[:, ::1]), float64[:])')
def not_test(branches, outlet):
    # get len of branches
    _len_branches = len(branches)
    # # empty array
    # d_array = np.empty(shape=_len_branches, dtype=np.float64)
    # # set outlet coordinates as arrays
    # x_outlet, y_outlet = outlet
    # x_outlet, y_outlet = np.array([x_outlet]), np.array([y_outlet])
    #
    # # get min distance from branches
    # for pos in numba.prange(_len_branches):
    #     # get current branch
    #     branch = branches[pos]
    #     # get min distance from outlet point
    #     d_min = RUT_5.nb_cdist(branch, x_outlet, y_outlet).min()
    #     # add to array
    #     d_array[pos] = d_min
    #
    # #get index for minimun distance
    # index_branch = np.argmin(d_array)
    index_branch = 16

    #remove initial branch
    update_branches = branches.copy()
    del update_branches[index_branch]

    #define arrays
    not_read = np.empty(shape=0, dtype=np.int64)
    paths_update = [[np.int(x)] for x in range(0)]
    points = np.empty(shape=(2, 2))
    a_list = [np.int(x) for x in range(0)]

    # avoid from loop
    not_read = np.append(index_branch, not_read)
    # iterable in loop
    iterable = not_read.copy()

    # conditions
    cond = 0
    cont = 0

    while cond == 0:
        for pos_idx in iterable:
            print(iterable)
            if cont > 0:
                paths = paths_update.copy()

            branch = branches[pos_idx]
            points[0] = branch[0]
            points[1] = branch[-1]

            for point in points:
                for pos_j in range(_len_branches):
                    if pos_j not in not_read:
                        diff = np.sum(point - branches[pos_j], axis=1)
                        if 0 in diff:
                            a_list.append(pos_j)

            if cont == 0:
                paths = [[pos_idx] + [i] for i in a_list]
                paths_update = paths.copy()
                cont = cont + 1

                not_read = np.append(not_read, a_list)
                iterable = np.array(a_list)
                a_list = [np.int(x) for x in range(0)]

            else:
                if len(a_list):
                    path_arr = [_i for _i in paths if pos_idx in _i]
                    for path in path_arr:
                        for conexion in a_list:
                            temp_list = path.copy()
                            temp_list.append(conexion)
                            paths_update.append(temp_list)
                        paths_update.remove(path)

                    not_read = np.append(not_read, a_list)
                    iterable = np.array(a_list)
                    a_list = [np.int(x) for x in range(0)]
                else:
                    continue

            if len(branches) == len(np.unique(not_read)):
                cond = 1
    return paths




if __name__ == '__main__':

    print('Now')
    branches = np.load('test.npy', allow_pickle=True).item()
    x_snap, y_snap = 717110.7843995667, 9669749.115011858

    import time
    t0 = time.time()
    arr = []
    for pos, branch in enumerate(branches.features):
        arr.append(list(branch.geometry.coordinates))
    print(time.time() - t0)

    t0 = time.time()
    arr = convert2(arr)
    print(time.time() - t0)

    t0 = time.time()
    outlet = np.array([x_snap, y_snap])
    print(not_test(branches=arr, outlet=outlet))
    print(time.time() - t0)

Upvotes: 2

Views: 261

Answers (1)

MBV
MBV

Reputation: 630

This is not a real answer as it does not addresses the actual problem of the potential bug in the numba code, but it gets the job done.

It seems to be an issue when using the pop or remove list method in the code while using the @numba.njit decorator, this issue was reported and the developers are debugging it.

I ended up avoiding these methods, surely it is not ideal as it iterates over some paths it should not, but it is quite faster than normal python.

Code

@numba.njit('(ListType(float64[:, ::1]), float64[:])')
def not_test(branches, outlet):
    # get len of branches
    _len_branches = len(branches)
    # # empty array
    # d_array = np.empty(shape=_len_branches, dtype=np.float64)
    # # set outlet coordinates as arrays
    # x_outlet, y_outlet = outlet
    # x_outlet, y_outlet = np.array([x_outlet]), np.array([y_outlet])
    #
    # # get min distance from branches
    # for pos in numba.prange(_len_branches):
    #     # get current branch
    #     branch = branches[pos]
    #     # get min distance from outlet point
    #     d_min = RUT_5.nb_cdist(branch, x_outlet, y_outlet).min()
    #     # add to array
    #     d_array[pos] = d_min
    #
    # #get index for minimun distance
    # index_branch = np.argmin(d_array)
    index_branch = 16

    #remove initial branch
    update_branches = branches.copy()
    del update_branches[index_branch]

    #define arrays
    not_read = np.empty(shape=0, dtype=np.int64)
    paths_update = [[np.int(_)] for _ in range(0)]
    paths_remove = [np.int(_) for _ in range(0)]
    points = np.empty(shape=(2, 2))
    a_list = [np.int(x) for x in range(0)]
    diff = np.empty(shape=0, dtype=np.int64)

    # avoid from loop
    not_read = np.append(index_branch, not_read)
    # iterable in loop
    iterable = not_read.copy()

    # conditions
    cond = 0
    cont = 0

    while cond == 0:
        for pos_idx in iterable:
            if cont > 0:
                paths = paths_update.copy()

            branch = branches[pos_idx]
            points[0] = branch[0]
            points[1] = branch[-1]

            for point in points:
                for pos_j in numba.prange(_len_branches):
                    if pos_j not in not_read:
                        diff = np.sum(point - branches[pos_j], axis=1)
                        if len(diff[diff == 0]) > 0:
                            a_list.append(pos_j)

            if cont == 0:
                paths = [[pos_idx] + [i] for i in a_list]
                paths_update = paths.copy()
                cont = 1

                not_read = np.append(not_read, a_list)
                iterable = np.array(a_list)
                a_list = [np.int(x) for x in range(0)]

            else:
                if len(a_list):
                    for pos, path in enumerate(paths):
                        if pos_idx in path:
                            for conexion in a_list:
                                temp_list = path.copy()
                                temp_list.append(conexion)
                                paths_update.append(temp_list)
                            paths_remove.append(pos)

                    not_read = np.append(not_read, a_list)
                    iterable = np.array(a_list)
                    a_list = [np.int(x) for x in range(0)]

            if len(branches) == len(np.unique(not_read)):
                cond = 1

    paths = [_ for _i, _ in enumerate(paths) if _i not in paths_remove]

    return paths

Upvotes: 1

Related Questions