Reputation: 135
I have a DataFrame
that looks like so:
dput(head(f))
structure(list(`2021-04-01` = c(18.75, 18.8047425327496, 19.044178996207,
19.1751973213021, 19.1689792828825, 19.5836158822698), `2021-05-01` = c(18.68,
18.6054569055353, 18.8313151379181, 19.0204171560939, 19.0789341312536,
19.5202197400974), `2021-06-01` = c(18.7, 18.6167441350011, 18.8507363020792,
19.0240964484369, 19.0811699993501, 19.5312231113571), `2021-07-01` = c(18.74,
18.6232367708829, 18.8951394692142, 19.0857033425782, 19.1976392153446,
19.6658807214903), `2021-08-01` = c(18.77, 18.644593579267, 18.9246150091022,
19.1032894987568, 19.2156342675263, 19.6849727955065), `2021-09-01` = c(18.84,
18.7266061665193, 19.0136981423636, 19.2080462372435, 19.3252661741782,
19.7976573506291), `2021-10-01` = c(19.16, 19.0429325189963,
19.2868666432121, 19.5302122400335, 19.6246896164923, 20.0574681959085
), `2021-11-01` = c(19.61, 19.4885636931401, 19.7281581945936,
19.9833632850547, 20.1125365664892, 20.5253519746824), `2021-12-01` = c(20.49,
20.3817436029844, 20.615343551369, 20.8552877862877, 20.9919455460469,
21.3954384388117), `2022-01-01` = c(20.48, 20.3740323095769,
20.6359913586072, 20.8916278650574, 21.0735209988655, 21.4729020165058
), `2022-02-01` = c(20.61, 20.5005105056777, 20.7692639443274,
21.0278974843904, 21.2088372694747, 21.6274189250732), `2022-03-01` = c(20.01,
19.8989327908198, 20.160435596222, 20.4165374436547, 20.5841189948464,
20.9775903556065), `2022-04-01` = c(17.33, 17.2371485625473,
17.4547245416883, 17.5005672257715, 17.6708376845902, 18.1363806857805
), `2022-05-01` = c(16.98, 16.8871485729785, 17.1047245517176,
17.1505672343433, 17.320837688241, 17.7863806850577), `2022-06-01` = c(16.93,
16.8371485710296, 17.0547245439537, 17.100567227402, 17.270837681622,
17.7363806838834), `2022-07-01` = c(16.55, 16.4602929774512,
16.679186983992, 16.72087791148, 16.902375852915, 17.3788582533972
), `2022-08-01` = c(16.55, 16.4602929762459, 16.6791869839035,
16.7208779112979, 16.902375853404, 17.3788582563241), `2022-09-01` = c(16.63,
16.5371485669301, 16.7547245454607, 16.8005672316863, 16.9708376873097,
17.4363806848043), `2022-10-01` = c(17.55, 17.4565144059899,
17.6932969707138, 17.7610505877454, 17.9658483820753, 18.3789383274342
), `2022-11-01` = c(17.95, 17.8565144105445, 18.0932969766868,
18.1610505955585, 18.3658483847519, 18.7789383271028), `2022-12-01` = c(18.83,
18.746506350021, 18.9775786073436, 19.0456532330963, 19.247708755873,
19.6618961429746), `2023-01-01` = c(18.85, 18.7565144062112,
18.9932969711003, 19.0610505896545, 19.2658483811053, 19.6789383251607
), `2023-02-01` = c(18.9, 18.8065144004764, 19.0432969669948,
19.1110505845328, 19.3158483743015, 19.7289383179684), `2023-03-01` = c(18.48,
18.3965063458801, 18.6275786028996, 18.695653229726, 18.8977087489342,
19.3118961401579), `2023-04-01` = c(16.45, 16.3946104511309,
16.6014635966272, 16.5836894226683, 16.7780479423448, 17.1192896129417
), `2023-05-01` = c(16.1, 16.0446104494817, 16.2514635937641,
16.2336894172376, 16.4280479394591, 16.7692896149847), `2023-06-01` = c(16.05,
15.9946104513846, 16.2014635962793, 16.1836894234373, 16.3780479477376,
16.7192896151668), `2023-07-01` = c(15.68, 15.6196427933565,
15.8264798722289, 15.8067013586247, 15.9929523803558, 16.3360049618321
), `2023-08-01` = c(15.68, 15.6196427956788, 15.8264798738901,
15.8067013650241, 15.9929523892458, 16.3360049695611), `2023-09-01` = c(15.75,
15.6946104485846, 15.9014635974283, 15.8836894236607, 16.0780479442838,
16.4192896139878), `2023-10-01` = c(16.55, 16.4859287021049,
16.6972792014036, 16.6652734109894, 16.9082634547469, 17.2600362319714
), `2023-11-01` = c(16.95, 16.88772137434, 17.1001895977296,
17.0660375905701, 17.3119906079332, 17.6598028036498), `2023-12-01` = c(17.83,
17.7629779733904, 17.9769203281466, 17.9457670813362, 18.1919876279738,
18.5322466212441), `2024-01-01` = c(17.85, 17.7877213736662,
18.0001895966288, 17.9660375873591, 18.21199060611, 18.5598028001363
), `2024-02-01` = c(17.9, 17.8377213696917, 18.0501895967882,
18.0160375825238, 18.2619906037143, 18.609802799337), `2024-03-01` = c(17.48,
17.412977978269, 17.6269203306822, 17.5957670842671, 17.8419876327419,
18.182246625778), `2024-04-01` = c(15.58, 15.6320820495314, 15.9040941973079,
16.0011191104772, 16.1344022617966, 16.2335337117727), `2024-05-01` = c(15.23,
15.282082046754, 15.5540941922692, 15.6511191067781, 15.7844022615365,
15.8835337091116), `2024-06-01` = c(15.18, 15.2320820475369,
15.5040941967701, 15.6011191055654, 15.7344022621257, 15.8335337059841
), `2024-07-01` = c(14.8, 14.8510913081696, 15.1150995970647,
15.2182582392206, 15.34485846568, 15.4485814541912), `2024-08-01` = c(14.8,
14.8510913086602, 15.1150995968972, 15.2182582375734, 15.3448584628037,
15.4485814528923), `2024-09-01` = c(14.88, 14.9320820482739,
15.2040941957414, 15.3011191040817, 15.434402262001, 15.5335337090296
), `2024-10-01` = c(15.48, 15.4841139854442, 15.5870431340295,
15.6417314142646, 15.8049227959112, 16.098072929477), `2024-11-01` = c(15.88,
15.8841139866634, 15.9870431368691, 16.0417314145131, 16.2049227962264,
16.4980729322541), `2024-12-01` = c(16.75, 16.7489599312631,
16.8369242831981, 16.8966205174003, 17.071500700515, 17.3712327127006
), `2025-01-01` = c(16.78, 16.6562480763831, 16.6057902890351,
16.4748384688119, 16.6420566625675, 16.8092903223808), `2025-02-01` = c(16.83,
16.7062480758168, 16.6557902869362, 16.5248384644428, 16.69205666146,
16.8592903220325), `2025-03-01` = c(16.4, 16.2779097817421, 16.2307811943544,
16.1021665690717, 16.2687265649343, 16.4311314730022), `2025-04-01` = c(15.73,
15.7087537504016, 15.6075842903385, 15.5299411837982, 15.7125519597493,
15.8306868005935), `2025-05-01` = c(15.38, 15.3587537511295,
15.257584289102, 15.1799411833166, 15.3625519604448, 15.480686801248
), `2025-06-01` = c(15.33, 15.3087537532081, 15.2075842896311,
15.129941184956, 15.312551964791, 15.4306868025988), `2025-07-01` = c(14.95,
14.9219729698033, 14.8130638959243, 14.7435847446705, 14.9181912547184,
15.0389778390791), `2025-08-01` = c(14.95, 14.9219729696512,
14.8130638953738, 14.7435847412341, 14.9181912530557, 15.0389778362589
), `2025-09-01` = c(15.03, 15.0087537477099, 14.9075842830852,
14.8299411740066, 15.0125519532892, 15.1306867930961), `2025-10-01` = c(15.75,
15.7563564789682, 15.6174493875532, 15.5606783126084, 15.7533723756061,
15.8531515873088), `2025-11-01` = c(16.15, 16.1563564821382,
16.0174493913382, 15.9606783194065, 16.1533723777501, 16.2531515879125
), `2025-12-01` = c(17.03, 17.0272458979667, 16.8737455455617,
16.8203287006477, 17.0225669443998, 17.1255973103374), Date = c("2021-04-01",
"2021-04-02", "2021-04-03", "2021-04-04", "2021-04-05", "2021-04-06"
)), row.names = c(NA, 6L), class = "data.frame")
As you can see I have a column that is called Date with Dates from 2021-04-01
until 2025-12-31
. I also have columns, one for each month between these Dates. So a column for April 2021, May 2021, ..., December 2025.
What I want is to optimize the nested for loop
that I am using (maybe with dplyr
) below:
for(i in 1:nrow(f)):
for(j in 1:ncol(f)):
if(as.character(f$Date[i]) != colnames(f)[j]):
f[i, j] <- 0
I tried to do it using the comment's suggestions and it worked, but somehow it is slower:
require(tidyr)
long <- f %>%
gather(Month_Product, Price, -c(Date)) %>%
filter(Month_Product == as.character(floor_date(ymd(Date), unit = "month")))%>%
spread(Month_Product, Price) %>%
mutate_all(~replace(., is.na(.), 0)) %>%
mutate(SPOT = rowSums(across(where(is.numeric)))) %>%
dplyr::select(Date, SPOT)
So if I were to take only the head of the result DataFrame
it would look have the column of 2021-04-01
with values since the first 6 Date Values are in the month of 2021-04-01
and the other columns
values would all be equal to 0.
I hope this is clear I could provide further clarification if needed.
Thank you.
Upvotes: 1
Views: 183
Reputation: 56219
Base solution, with the help of lubridate package to get the floor date.
library(lubridate) # floor_date
# get index where floor date of "Date" doesn't match column name
ix <- matrix(
rep(head(colnames(f), -1), each = nrow(f)),
nrow = nrow(f)) != as.character(floor_date(ymd(f$Date), unit = "month"))
# assign 0, based on index, excluding the last "Date" column
f[, -ncol(f)][ ix ] <- 0
# then row sums
data.frame(f$Date,
SPOT = rowSums(f[, -ncol(f)]))
# Date SPOT
# 1 2021-04-01 18.75000
# 2 2021-04-02 18.80474
# 3 2021-04-03 19.04418
# 4 2021-04-04 19.17520
# 5 2021-04-05 19.16898
# 6 2021-04-06 19.58362
A bit more clarification: get column names excluding last "Date" column, then repeat each as many as number of rows, then convert to matrix. Then compare this matrix's each column with the floor of "Date" column. This will give us logical (TRUE/FALSE) matrix, which we then use as index to assign zero. And lastly, sum by rows, excluding last "Date" column.
Upvotes: 2