Shantanu
Shantanu

Reputation: 867

Pandas, Numpy: How to Speed up row iteration with inner loop?

I have the following data set, and I am calculating the Net Forecast column based on the rest.

The logic implemented is,

   Part  Week  Gross Forecast  Orders  Net Forecast
0     A     1              10       0            10
1     A     2               5       0             0
2     A     3              30       0             0
3     A     4              20       0             0
4     A     5              10     -70             0
5     A     6              50       0             0
6     A     7               5     -60             0
7     A     8              30       0            20
8     Z     1              10       0            10
9     Z     2               5       0           -15
10    Z     3              10       0             0
11    Z     4              30       0             0
12    Z     5              30     -90             0

I was able to recreate the logic, but it is really slow using standard iterrows. Is it possible to vectorize this solution using Pandas and Numpy?

import pandas as pd
import numpy as np

data = {
    "Part": ["A", "A", "A", "A", "A", "A", "A", "A", "Z", "Z", "Z", "Z", "Z"],
    "Week": [1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5],
    "Gross Forecast": [10, 5, 30, 20, 10, 50, 5, 30, 10, 5, 10, 30, 30],
    "Orders": [0, 0, 0, 0, -70, 0, -60, 0, 0, 0, 0, 0, -90],
}

df = pd.DataFrame(data)
print(df)

# Create Net Forecast column
df["Net Forecast"] = df["Gross Forecast"]

for i, row in df.iterrows():

    k = 0
    order = 0
    inventory = 0
    index_list = [0, -1, -2, -3, 1, 2, 3]

    if df.loc[i, "Orders"] != 0:
        order = df.loc[i, "Orders"]

        for j in index_list:

            try:

                if order < 0 and (df.loc[i, "Part"] == df.loc[i + j, "Part"]):
                    order = order + df.loc[i + j, "Net Forecast"]
                    df.loc[i + j, "Net Forecast"] = 0
                    k = j
                else:
                    break

            except KeyError:
                break

        df.loc[i + k, "Net Forecast"] = order

print(df)

Upvotes: 1

Views: 134

Answers (1)

Tls Chris
Tls Chris

Reputation: 3824

Often where balances like inventory are involved 'vectorisation' can be achieved using cumulative for the flows.

iterative balance[t] = balance[t-1] + in[t] - out[t] becomes vectorised balance = in.cumsum() - out.cumsum()

import numpy as np

in_ = np.array( [10, 5, 30, 20, 10, 50, 5, 30, 0, 0, 0, 0] )
orders = np.array( [0, 0, 0,  0, 70, 0, 60, 0, 0, 0, 0, 0] )
# 4 extra periods to handle the out of date range.

out_of_date = np.zeros_like( in_ )
out_of_date[ 4: ] = in_[ :-4 ]
# Parts fall out of date after 4 weeks if not already delivered.

# Create cumulatives to work with
cum_in = in_.cumsum()           # Constant through calculation
cum_orders = orders.cumsum()    # Constant through calculation
cum_ood = out_of_date.cumsum()  # Amended at each iteration
cum_deliveries = np.zeros_like( cum_in ) # One period filled each iteration 
available = np.zeros_like( in_ )         # One period filled each iteration
deliveries = np.zeros_like( cum_in )     # One period filled each iteration

def decum( in_, axis=-1 ):
    """ Take differences in_[t] - in_[t-1] fill first period with in_[0] """
    res = in_.copy()
    res[ 1: ] = np.diff(in_, 1, axis = axis)
    return res

def cum_dels( week ):
    """ Calcultes cumulative deliveries at the week specified.
        Also calculates the value of deliveries in the week """
    available[ week ] = cum_in[ week ] - cum_ood[ week ]
    cum_deliveries[ week ] = np.minimum( cum_orders[ week ], available[ week ] )
    if week:
        deliveries[ week ] = cum_deliveries[ week ] - cum_deliveries[ week-1 ]
    else:
        deliveries[ week ] = cum_deliveries[ week ]  # If week == 0 no difference to take

def amend_ood( week ):
    """ Amend the cum_ood for deliveries in the week. """
    min_cum_ood = cum_ood[ week ]    # Don't subtract to below the cum_ood in this week.
    available_notused = available[ week ] - cum_deliveries[ week ] 
    # Don't subtract any available that's not delivered. 
    # This has the effect of taking deliveries from the most recent in_

    max_subtract = np.maximum( cum_ood[ week: ] - min_cum_ood - available_notused, 0)
    # The maximum to subtract is the cum_ood less the fixed bands and never less than zero.

    to_subtract = np.minimum( max_subtract, deliveries[ week ] ) # max_subtract clipped at the weeks deliveries
    cum_ood[ week: ] -= to_subtract

week_range = range(8)

# Iterate the above functions by week.  
# This can be rewritten to calculate all part numbers for each week.
for week in week_range:
    cum_dels( week )
    amend_ood( week )

print(deliveries)
print(decum(cum_ood)[4:])

The functions need to be rewritten to work with 2d arrays, part_number x week. Then each iteration by week calculates all part numbers for that week.

I'll look at making it 2d once I've some time but this may help as is. There's also certain to be scope to optimise the code. It's written to help me understand what I was doing.

**Edit Changes to run a 2D version **

out_of_date = np.zeros_like( in_ )
out_of_date[ :, 4: ] = in_[ :, :-4 ]
# Parts fall out of date after 4 weeks if not already delivered.

# Create cumulatives to work with
cum_in = in_.cumsum(axis=1)           # Constant through calculation
cum_orders = orders.cumsum(axis=1)    # Constant through calculation
cum_ood = out_of_date.cumsum(axis=1)  # Amended at each iteration
cum_deliveries = np.zeros_like( cum_in ) # One period filled each iteration 
available = np.zeros_like( in_ )         # One period filled each iteration
deliveries = np.zeros_like( cum_in )     # One period filled each iteration
def decum( in_, axis=-1 ):
    """ Take differences in_[t] - in_[t-1] fill first period with in_[0] """
    res = in_.copy()
    res[ :, 1: ] = np.diff(in_, 1, axis = axis)
    return res

def cum_dels( week ):
    """ Calcultes cumulative deliveries at the week specified.
        Also calculates the value of deliveries in the week """
    available[ :, week ] = cum_in[ :, week ] - cum_ood[ :, week ]
    cum_deliveries[ :, week ] = np.minimum( cum_orders[ :, week ], available[ :, week ] )
    if week:
        deliveries[ :, week ] = cum_deliveries[ :, week ] - cum_deliveries[ :, week-1 ]
    else:
        deliveries[ :, week ] = cum_deliveries[ :, week ]  # If week == 0 no difference to take

def amend_ood( week ):
    """ Amend the cum_ood for deliveries in the week. """
    min_cum_ood = cum_ood[ :, week ]    # Don't subtract to below the cum_ood in this week.
    available_notused = available[ :, week ] - cum_deliveries[ :, week ] 
    # Don't subtract any available that's not delivered. 
    # This has the effect of taking deliveries from the most recent in_

    max_subtract = np.maximum( cum_ood[ :, week: ] - min_cum_ood[:,None] - available_notused[:,None], 0)
    # The maximum to subtract is the cum_ood less the fixed bands and never less than zero.

    to_subtract = np.minimum( max_subtract, deliveries[ :, week ].reshape(-1,1) ) # max_subtract clipped at the weeks deliveries
    cum_ood[ :, week: ] -= to_subtract

This doesn't give the same results as your version for part number Z.

What results are expected with the following scenarios?

data = {                                                                                
    "Part": ["Z", "Z", "Z", "Z", "Z", "Z"],                                                  
    "Week": [1, 2, 3, 4, 5, 6],                                                            
    "Gross Forecast": [10, 5, 10, 30, 30, 0],                                 
    "Orders":          [ 0, 0, 0, 0, -90, 0]                                  
}    

Or this

data = {                                             
    "Part": ["Z", "Z", "Z", "Z", "Z", "Z", "Z", "Z"],
    "Week": [1, 2, 3, 4, 5, 6,7,8],                  
    "Gross Forecast": [10, 5, 10, 30, 30, 0, 0, 100],
    "Orders":          [ 0,-90, 0, 0,  0, 0, 0, -50]
}     

Upvotes: 1

Related Questions