Reputation: 79
I have a below df where below criteria needs to accomplished,
assume: today's month is Dec 2023
Criteria:
Input dataframe is below,
import pandas as pd
input_data = [
{"submissionmonth": "Jan-2023", "sku": "A1","location": "sup1","forecastmonth": "Jan-2023","cost": 100},
{"submissionmonth": "Jan-2023", "sku": "A1","location": "sup1","forecastmonth": "Feb-2023","cost": 105},
{"submissionmonth": "Jan-2023", "sku": "A1","location": "sup1","forecastmonth": "Mar-2023","cost": 108},
{"submissionmonth": "Jan-2023", "sku": "A1","location": "sup1","forecastmonth": "Apr-2023","cost": 106},
{"submissionmonth": "Apr-2023", "sku": "A1","location": "sup1","forecastmonth": "Apr-2023","cost": 101},
{"submissionmonth": "Apr-2023", "sku": "A1","location": "sup1","forecastmonth": "May-2023","cost": 102},
{"submissionmonth": "Apr-2023", "sku": "A1","location": "sup1","forecastmonth": "Jun-2023","cost": 109},
{"submissionmonth": "Apr-2023", "sku": "A1","location": "sup1","forecastmonth": "Jul-2023","cost": 104},
{"submissionmonth": "Oct-2023", "sku": "A2","location": "sup2","forecastmonth": "Oct-2023","cost": 101},
{"submissionmonth": "Oct-2023", "sku": "A2","location": "sup2","forecastmonth": "Nov-2023","cost": 102},
{"submissionmonth": "Oct-2023", "sku": "A2","location": "sup2","forecastmonth": "Dec-2023","cost": 109},
{"submissionmonth": "Oct-2023", "sku": "A2","location": "sup2","forecastmonth": "Jan-2024","cost": 104},
]
# Create the DataFrame
input_data = pd.DataFrame(input_data)
# Print the DataFrame (optional)
print(input_data.shape)
input_data
Expected result,
import pandas as pd
output_data = [
{"submissionmonth": "Jan-2023", "sku": "A1","location": "sup1","forecastmonth": "Jan-2023","cost": 100},
{"submissionmonth": "Jan-2023", "sku": "A1","location": "sup1","forecastmonth": "Feb-2023","cost": 105},
{"submissionmonth": "Jan-2023", "sku": "A1","location": "sup1","forecastmonth": "Mar-2023","cost": 108},
{"submissionmonth": "Jan-2023", "sku": "A1","location": "sup1","forecastmonth": "Apr-2023","cost": 106},
{"submissionmonth": "Feb-2023", "sku": "A1","location": "sup1","forecastmonth": "Jan-2023","cost": 100},
{"submissionmonth": "Feb-2023", "sku": "A1","location": "sup1","forecastmonth": "Feb-2023","cost": 105},
{"submissionmonth": "Feb-2023", "sku": "A1","location": "sup1","forecastmonth": "Mar-2023","cost": 108},
{"submissionmonth": "Feb-2023", "sku": "A1","location": "sup1","forecastmonth": "Apr-2023","cost": 106},
{"submissionmonth": "Mar-2023", "sku": "A1","location": "sup1","forecastmonth": "Jan-2023","cost": 100},
{"submissionmonth": "Mar-2023", "sku": "A1","location": "sup1","forecastmonth": "Feb-2023","cost": 105},
{"submissionmonth": "Mar-2023", "sku": "A1","location": "sup1","forecastmonth": "Mar-2023","cost": 108},
{"submissionmonth": "Mar-2023", "sku": "A1","location": "sup1","forecastmonth": "Apr-2023","cost": 106},
{"submissionmonth": "Apr-2023", "sku": "A1","location": "sup1","forecastmonth": "Apr-2023","cost": 101},
{"submissionmonth": "Apr-2023", "sku": "A1","location": "sup1","forecastmonth": "May-2023","cost": 102},
{"submissionmonth": "Apr-2023", "sku": "A1","location": "sup1","forecastmonth": "Jun-2023","cost": 109},
{"submissionmonth": "Apr-2023", "sku": "A1","location": "sup1","forecastmonth": "Jul-2023","cost": 104},
{"submissionmonth": "May-2023", "sku": "A1","location": "sup1","forecastmonth": "Apr-2023","cost": 101},
{"submissionmonth": "May-2023", "sku": "A1","location": "sup1","forecastmonth": "May-2023","cost": 102},
{"submissionmonth": "May-2023", "sku": "A1","location": "sup1","forecastmonth": "Jun-2023","cost": 109},
{"submissionmonth": "May-2023", "sku": "A1","location": "sup1","forecastmonth": "Jul-2023","cost": 104},
{"submissionmonth": "Jun-2023", "sku": "A1","location": "sup1","forecastmonth": "Apr-2023","cost": 101},
{"submissionmonth": "Jun-2023", "sku": "A1","location": "sup1","forecastmonth": "May-2023","cost": 102},
{"submissionmonth": "Jun-2023", "sku": "A1","location": "sup1","forecastmonth": "Jun-2023","cost": 109},
{"submissionmonth": "Jun-2023", "sku": "A1","location": "sup1","forecastmonth": "Jul-2023","cost": 104},
{"submissionmonth": "Jul-2023", "sku": "A1","location": "sup1","forecastmonth": "Apr-2023","cost": 101},
{"submissionmonth": "Jul-2023", "sku": "A1","location": "sup1","forecastmonth": "May-2023","cost": 102},
{"submissionmonth": "Jul-2023", "sku": "A1","location": "sup1","forecastmonth": "Jun-2023","cost": 109},
{"submissionmonth": "Jul-2023", "sku": "A1","location": "sup1","forecastmonth": "Jul-2023","cost": 104},
{"submissionmonth": "Aug-2023", "sku": "A1","location": "sup1","forecastmonth": "Apr-2023","cost": 101},
{"submissionmonth": "Aug-2023", "sku": "A1","location": "sup1","forecastmonth": "May-2023","cost": 102},
{"submissionmonth": "Aug-2023", "sku": "A1","location": "sup1","forecastmonth": "Jun-2023","cost": 109},
{"submissionmonth": "Aug-2023", "sku": "A1","location": "sup1","forecastmonth": "Jul-2023","cost": 104},
{"submissionmonth": "Sept-2023", "sku": "A1","location": "sup1","forecastmonth": "Apr-2023","cost": 101},
{"submissionmonth": "Sept-2023", "sku": "A1","location": "sup1","forecastmonth": "May-2023","cost": 102},
{"submissionmonth": "Sept-2023", "sku": "A1","location": "sup1","forecastmonth": "Jun-2023","cost": 109},
{"submissionmonth": "Sept-2023", "sku": "A1","location": "sup1","forecastmonth": "Jul-2023","cost": 104},
{"submissionmonth": "Oct-2023", "sku": "A1","location": "sup1","forecastmonth": "Apr-2023","cost": 101},
{"submissionmonth": "Oct-2023", "sku": "A1","location": "sup1","forecastmonth": "May-2023","cost": 102},
{"submissionmonth": "Oct-2023", "sku": "A1","location": "sup1","forecastmonth": "Jun-2023","cost": 109},
{"submissionmonth": "Oct-2023", "sku": "A1","location": "sup1","forecastmonth": "Jul-2023","cost": 104},
{"submissionmonth": "Oct-2023", "sku": "A2","location": "sup2","forecastmonth": "Oct-2023","cost": 101},
{"submissionmonth": "Oct-2023", "sku": "A2","location": "sup2","forecastmonth": "Nov-2023","cost": 102},
{"submissionmonth": "Oct-2023", "sku": "A2","location": "sup2","forecastmonth": "Dec-2023","cost": 109},
{"submissionmonth": "Oct-2023", "sku": "A2","location": "sup2","forecastmonth": "Jan-2024","cost": 104},
{"submissionmonth": "Nov-2023", "sku": "A1","location": "sup1","forecastmonth": "Apr-2023","cost": 101},
{"submissionmonth": "Nov-2023", "sku": "A1","location": "sup1","forecastmonth": "May-2023","cost": 102},
{"submissionmonth": "Nov-2023", "sku": "A1","location": "sup1","forecastmonth": "Jun-2023","cost": 109},
{"submissionmonth": "Nov-2023", "sku": "A1","location": "sup1","forecastmonth": "Jul-2023","cost": 104},
{"submissionmonth": "Nov-2023", "sku": "A2","location": "sup2","forecastmonth": "Oct-2023","cost": 101},
{"submissionmonth": "Nov-2023", "sku": "A2","location": "sup2","forecastmonth": "Nov-2023","cost": 102},
{"submissionmonth": "Nov-2023", "sku": "A2","location": "sup2","forecastmonth": "Dec-2023","cost": 109},
{"submissionmonth": "Nov-2023", "sku": "A2","location": "sup2","forecastmonth": "Jan-2024","cost": 104},
{"submissionmonth": "Dec-2023", "sku": "A1","location": "sup1","forecastmonth": "Apr-2023","cost": 101},
{"submissionmonth": "Dec-2023", "sku": "A1","location": "sup1","forecastmonth": "May-2023","cost": 102},
{"submissionmonth": "Dec-2023", "sku": "A1","location": "sup1","forecastmonth": "Jun-2023","cost": 109},
{"submissionmonth": "Dec-2023", "sku": "A1","location": "sup1","forecastmonth": "Jul-2023","cost": 104},
{"submissionmonth": "Dec-2023", "sku": "A2","location": "sup2","forecastmonth": "Oct-2023","cost": 101},
{"submissionmonth": "Dec-2023", "sku": "A2","location": "sup2","forecastmonth": "Nov-2023","cost": 102},
{"submissionmonth": "Dec-2023", "sku": "A2","location": "sup2","forecastmonth": "Dec-2023","cost": 109},
{"submissionmonth": "Dec-2023", "sku": "A2","location": "sup2","forecastmonth": "Jan-2024","cost": 104},
]
# Create the DataFrame
output_data = pd.DataFrame(output_data)
# Print the DataFrame (optional)
print(output_data.shape)
output_data
Upvotes: -1
Views: 71
Reputation: 2787
You can use a combination of .reindex
, .stack
and .ffill
. The data is pivoted such that the submission months are unique per row, so that forward filling works. Then the data is returned back into the form of the input_data
.
# convert existing months to datetime
submission_months = pd.to_datetime(
input_data.submissionmonth,
format="%b-%Y",
)
# all months between min and max
full_months = pd.date_range(
submission_months.min(),
submission_months.max(),
freq="M"
)
output_data = (
input_data
# set the index of the columns
.set_index(["submissionmonth", "forecastmonth"])
# unstack, so single-level index of submission month
.unstack("forecastmonth")
# reindex with all months, with same formatting
.reindex(
full_months.strftime("%b-%Y"),
axis=0,
)
# forward fill data
.ffill()
# stack the forecast month back into a column, as before
.stack("forecastmonth")
# reset index, providing names
.reset_index(
names=["submissionmonth", "forecastmonth"]
)
)
If you want to keep the same column order as before:
output_data.reindex(columns=input_data.columns, inplace=True)
Upvotes: 0