Reputation: 867
I have the following data set, and I am calculating the Net Forecast
column based on the rest.
The logic implemented is,
Order
< 0 for a Part, we add it with Gross Forecast
in the same row, i.e., 0
.Order
is not yet positive from the above calculation, we add it with Gross Forecast
of previous week, i.e, -1
.[0, -1, -2, -3, 1, 2, 3]
, i.e, go back 3 Weeks and go forward 3 weeks, until the Order
is 0.Gross Forecast
column to the Net Forecast
column. Part Week Gross Forecast Orders Net Forecast
0 A 1 10 0 10
1 A 2 5 0 0
2 A 3 30 0 0
3 A 4 20 0 0
4 A 5 10 -70 0
5 A 6 50 0 0
6 A 7 5 -60 0
7 A 8 30 0 20
8 Z 1 10 0 10
9 Z 2 5 0 -15
10 Z 3 10 0 0
11 Z 4 30 0 0
12 Z 5 30 -90 0
I was able to recreate the logic, but it is really slow using standard iterrows
. Is it possible to vectorize this solution using Pandas and Numpy?
import pandas as pd
import numpy as np
data = {
"Part": ["A", "A", "A", "A", "A", "A", "A", "A", "Z", "Z", "Z", "Z", "Z"],
"Week": [1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5],
"Gross Forecast": [10, 5, 30, 20, 10, 50, 5, 30, 10, 5, 10, 30, 30],
"Orders": [0, 0, 0, 0, -70, 0, -60, 0, 0, 0, 0, 0, -90],
}
df = pd.DataFrame(data)
print(df)
# Create Net Forecast column
df["Net Forecast"] = df["Gross Forecast"]
for i, row in df.iterrows():
k = 0
order = 0
inventory = 0
index_list = [0, -1, -2, -3, 1, 2, 3]
if df.loc[i, "Orders"] != 0:
order = df.loc[i, "Orders"]
for j in index_list:
try:
if order < 0 and (df.loc[i, "Part"] == df.loc[i + j, "Part"]):
order = order + df.loc[i + j, "Net Forecast"]
df.loc[i + j, "Net Forecast"] = 0
k = j
else:
break
except KeyError:
break
df.loc[i + k, "Net Forecast"] = order
print(df)
Upvotes: 1
Views: 134
Reputation: 3824
Often where balances like inventory are involved 'vectorisation' can be achieved using cumulative for the flows.
iterative balance[t] = balance[t-1] + in[t] - out[t]
becomes vectorised balance = in.cumsum() - out.cumsum()
import numpy as np
in_ = np.array( [10, 5, 30, 20, 10, 50, 5, 30, 0, 0, 0, 0] )
orders = np.array( [0, 0, 0, 0, 70, 0, 60, 0, 0, 0, 0, 0] )
# 4 extra periods to handle the out of date range.
out_of_date = np.zeros_like( in_ )
out_of_date[ 4: ] = in_[ :-4 ]
# Parts fall out of date after 4 weeks if not already delivered.
# Create cumulatives to work with
cum_in = in_.cumsum() # Constant through calculation
cum_orders = orders.cumsum() # Constant through calculation
cum_ood = out_of_date.cumsum() # Amended at each iteration
cum_deliveries = np.zeros_like( cum_in ) # One period filled each iteration
available = np.zeros_like( in_ ) # One period filled each iteration
deliveries = np.zeros_like( cum_in ) # One period filled each iteration
def decum( in_, axis=-1 ):
""" Take differences in_[t] - in_[t-1] fill first period with in_[0] """
res = in_.copy()
res[ 1: ] = np.diff(in_, 1, axis = axis)
return res
def cum_dels( week ):
""" Calcultes cumulative deliveries at the week specified.
Also calculates the value of deliveries in the week """
available[ week ] = cum_in[ week ] - cum_ood[ week ]
cum_deliveries[ week ] = np.minimum( cum_orders[ week ], available[ week ] )
if week:
deliveries[ week ] = cum_deliveries[ week ] - cum_deliveries[ week-1 ]
else:
deliveries[ week ] = cum_deliveries[ week ] # If week == 0 no difference to take
def amend_ood( week ):
""" Amend the cum_ood for deliveries in the week. """
min_cum_ood = cum_ood[ week ] # Don't subtract to below the cum_ood in this week.
available_notused = available[ week ] - cum_deliveries[ week ]
# Don't subtract any available that's not delivered.
# This has the effect of taking deliveries from the most recent in_
max_subtract = np.maximum( cum_ood[ week: ] - min_cum_ood - available_notused, 0)
# The maximum to subtract is the cum_ood less the fixed bands and never less than zero.
to_subtract = np.minimum( max_subtract, deliveries[ week ] ) # max_subtract clipped at the weeks deliveries
cum_ood[ week: ] -= to_subtract
week_range = range(8)
# Iterate the above functions by week.
# This can be rewritten to calculate all part numbers for each week.
for week in week_range:
cum_dels( week )
amend_ood( week )
print(deliveries)
print(decum(cum_ood)[4:])
The functions need to be rewritten to work with 2d arrays, part_number x week. Then each iteration by week calculates all part numbers for that week.
I'll look at making it 2d once I've some time but this may help as is. There's also certain to be scope to optimise the code. It's written to help me understand what I was doing.
**Edit Changes to run a 2D version **
out_of_date = np.zeros_like( in_ )
out_of_date[ :, 4: ] = in_[ :, :-4 ]
# Parts fall out of date after 4 weeks if not already delivered.
# Create cumulatives to work with
cum_in = in_.cumsum(axis=1) # Constant through calculation
cum_orders = orders.cumsum(axis=1) # Constant through calculation
cum_ood = out_of_date.cumsum(axis=1) # Amended at each iteration
cum_deliveries = np.zeros_like( cum_in ) # One period filled each iteration
available = np.zeros_like( in_ ) # One period filled each iteration
deliveries = np.zeros_like( cum_in ) # One period filled each iteration
def decum( in_, axis=-1 ):
""" Take differences in_[t] - in_[t-1] fill first period with in_[0] """
res = in_.copy()
res[ :, 1: ] = np.diff(in_, 1, axis = axis)
return res
def cum_dels( week ):
""" Calcultes cumulative deliveries at the week specified.
Also calculates the value of deliveries in the week """
available[ :, week ] = cum_in[ :, week ] - cum_ood[ :, week ]
cum_deliveries[ :, week ] = np.minimum( cum_orders[ :, week ], available[ :, week ] )
if week:
deliveries[ :, week ] = cum_deliveries[ :, week ] - cum_deliveries[ :, week-1 ]
else:
deliveries[ :, week ] = cum_deliveries[ :, week ] # If week == 0 no difference to take
def amend_ood( week ):
""" Amend the cum_ood for deliveries in the week. """
min_cum_ood = cum_ood[ :, week ] # Don't subtract to below the cum_ood in this week.
available_notused = available[ :, week ] - cum_deliveries[ :, week ]
# Don't subtract any available that's not delivered.
# This has the effect of taking deliveries from the most recent in_
max_subtract = np.maximum( cum_ood[ :, week: ] - min_cum_ood[:,None] - available_notused[:,None], 0)
# The maximum to subtract is the cum_ood less the fixed bands and never less than zero.
to_subtract = np.minimum( max_subtract, deliveries[ :, week ].reshape(-1,1) ) # max_subtract clipped at the weeks deliveries
cum_ood[ :, week: ] -= to_subtract
This doesn't give the same results as your version for part number Z.
What results are expected with the following scenarios?
data = {
"Part": ["Z", "Z", "Z", "Z", "Z", "Z"],
"Week": [1, 2, 3, 4, 5, 6],
"Gross Forecast": [10, 5, 10, 30, 30, 0],
"Orders": [ 0, 0, 0, 0, -90, 0]
}
Or this
data = {
"Part": ["Z", "Z", "Z", "Z", "Z", "Z", "Z", "Z"],
"Week": [1, 2, 3, 4, 5, 6,7,8],
"Gross Forecast": [10, 5, 10, 30, 30, 0, 0, 100],
"Orders": [ 0,-90, 0, 0, 0, 0, 0, -50]
}
Upvotes: 1