Reputation: 3760
i do have quite complicated case for me to solve. Let me explain you on the base of an example..So we start with the table below:
Datum Urlaub_geplannt
1 2018-10 1410
2 2018-11 940
3 2018-12 470
structure(list(Datum = structure(1:3, .Label = c("2018-10", "2018-11",
"2018-12"), class = "factor"), Urlaub_geplannt = c(1410, 940,
470)), .Names = c("Datum", "Urlaub_geplannt"), row.names = c(NA,
-3L), class = "data.frame")
I would like to get the new rows added to this table till January next year (Datum column) and all the other columns should be filled with 0. The final table should look in this case like that:
Datum Urlaub_geplannt
1 2018-10 1410
2 2018-11 940
3 2018-12 470
4 2019-01 0
However it is very important to make it somehow automatically the "end year", as my data changes (it is actually in Shiny
).
What i mean by that if i would have new data with rows from 2019, i would like automatically get the "end date" as January 2020. Thanks for help!
Upvotes: 1
Views: 276
Reputation: 388982
A base R approach
get_date_till_Jan <- function(df) {
#Convert the character dates to actual Date objects
max_Date <- max(as.Date(paste0(df$Datum, "-01")))
#Get the date for next year January
next_Jan <- as.Date(paste0(as.numeric(format(max_Date, "%Y")) + 1, "-01-01"))
#Create a monthly sequence from the max date to next Jan date
new_date <- format(seq(max_Date, next_Jan, by = "month")[-1], "%Y-%m")
#Create a new dataframe with all values as 0 and change only the Datum
#column with new_date and rbind it to original dataframe
rbind(df, transform(data.frame(matrix(0, nrow = length(new_date),
ncol = ncol(df), dimnames = list(NULL, names(df)))),
Datum = new_date))
}
df <- get_date_till_Jan(df)
df
# Datum Urlaub_geplannt
#1 2018-10 1410
#2 2018-11 940
#3 2018-12 470
#4 2019-01 0
This would work for any number of columns
df['another_col'] = 1:4
get_date_till_Jan(df)
# Datum Urlaub_geplannt another_col
#1 2018-10 1410 1
#2 2018-11 940 2
#3 2018-12 470 3
#4 2019-01 0 4
#5 2019-02 0 0
#6 2019-03 0 0
#7 2019-04 0 0
#8 2019-05 0 0
#9 2019-06 0 0
#10 2019-07 0 0
#11 2019-08 0 0
#12 2019-09 0 0
#13 2019-10 0 0
#14 2019-11 0 0
#15 2019-12 0 0
#16 2020-01 0 0
Upvotes: 1
Reputation: 6325
df <- structure(list(Datum = structure(1:3, .Label = c("2018-10", "2018-11",
"2018-12"), class = "factor"), Urlaub_geplannt = c(1410, 940,
470)), .Names = c("Datum", "Urlaub_geplannt"), row.names = c(NA,
-3L), class = "data.frame")
Datum <- format(seq.Date(as.Date(paste0(df$Datum[nrow(df)],"-01")),
as.Date(paste0(substring(seq.Date(as.Date(paste0(as.character(df$Datum[1]),"-01")),
length = 2,
by = 'year')[2],1,4),"-01-01")),
by = "month"
),"%Y-%m")
new_df <- data.frame(Datum = Datum, Urlaub_geplannt = rep(0,length(Datum)))
total_df <- rbind(df,new_df)
total_df
#> Datum Urlaub_geplannt
#> 1 2018-10 1410
#> 2 2018-11 940
#> 3 2018-12 470
#> 4 2018-12 0
#> 5 2019-01 0
Upvotes: 1
Reputation: 8364
Solution with dplyr
and a full_join
:
library(dplyr)
library(lubridate) # for ymd() function
d <- d %>%
mutate(Datum = paste0(Datum,"-01"),
Datum = ymd(Datum)) # correct Date format
min_year <- year(min(d$Datum))
min_date <- min(d$Datum)
# create a data.frame of possible dates
fill_dates <- data.frame(Datum = seq.Date(
min_date, # min date avaiable
as.Date(paste0(min_year+1,"-01-01")), # until first Jan next year
by = "month"))
Now we can join the two data.frames
:
d %>%
full_join(fill_dates, by="Datum") %>% # full_join of the two tables
# the full_join will add all new row not present in d originally, with NA
mutate(Urlaub_geplannt = ifelse(is.na(Urlaub_geplannt), 0, Urlaub_geplannt))
# Datum Urlaub_geplannt
# 1 2018-10-01 1410
# 2 2018-11-01 940
# 3 2018-12-01 470
# 4 2019-01-01 0
Data:
d <- structure(list(Datum = structure(c("2018-10", "2018-11",
"2018-12"), class = "character"), Urlaub_geplannt = c(1410, 940,
470)), .Names = c("Datum", "Urlaub_geplannt"), row.names = c(NA,
-3L), class = "data.frame")
Upvotes: 1