Mal_a
Mal_a

Reputation: 3760

Add rows with dates till January next year

i do have quite complicated case for me to solve. Let me explain you on the base of an example..So we start with the table below:

 Datum Urlaub_geplannt
1 2018-10            1410
2 2018-11             940
3 2018-12             470


structure(list(Datum = structure(1:3, .Label = c("2018-10", "2018-11", 
"2018-12"), class = "factor"), Urlaub_geplannt = c(1410, 940, 
470)), .Names = c("Datum", "Urlaub_geplannt"), row.names = c(NA, 
-3L), class = "data.frame")

I would like to get the new rows added to this table till January next year (Datum column) and all the other columns should be filled with 0. The final table should look in this case like that:

 Datum Urlaub_geplannt
1 2018-10            1410
2 2018-11             940
3 2018-12             470
4 2019-01             0

However it is very important to make it somehow automatically the "end year", as my data changes (it is actually in Shiny).

What i mean by that if i would have new data with rows from 2019, i would like automatically get the "end date" as January 2020. Thanks for help!

Upvotes: 1

Views: 276

Answers (3)

Ronak Shah
Ronak Shah

Reputation: 388982

A base R approach

get_date_till_Jan <- function(df) {
  #Convert the character dates to actual Date objects
  max_Date <- max(as.Date(paste0(df$Datum, "-01")))

  #Get the date for next year January
  next_Jan <- as.Date(paste0(as.numeric(format(max_Date, "%Y")) + 1, "-01-01"))

  #Create a monthly sequence from the max date to next Jan date
  new_date <- format(seq(max_Date, next_Jan, by = "month")[-1], "%Y-%m")

  #Create a new dataframe with all values as 0 and change only the Datum 
  #column with new_date and rbind it to original dataframe
  rbind(df, transform(data.frame(matrix(0, nrow = length(new_date), 
      ncol = ncol(df), dimnames = list(NULL, names(df)))), 
      Datum = new_date))
}

df <- get_date_till_Jan(df)
df

#    Datum Urlaub_geplannt
#1 2018-10            1410
#2 2018-11             940
#3 2018-12             470
#4 2019-01               0

This would work for any number of columns

df['another_col'] = 1:4
get_date_till_Jan(df)


#     Datum Urlaub_geplannt another_col
#1  2018-10            1410           1
#2  2018-11             940           2
#3  2018-12             470           3
#4  2019-01               0           4
#5  2019-02               0           0
#6  2019-03               0           0
#7  2019-04               0           0
#8  2019-05               0           0
#9  2019-06               0           0
#10 2019-07               0           0
#11 2019-08               0           0
#12 2019-09               0           0
#13 2019-10               0           0
#14 2019-11               0           0
#15 2019-12               0           0
#16 2020-01               0           0

Upvotes: 1

amrrs
amrrs

Reputation: 6325

df <- structure(list(Datum = structure(1:3, .Label = c("2018-10", "2018-11", 
                                                       "2018-12"), class = "factor"), Urlaub_geplannt = c(1410, 940, 
                                                                                                          470)), .Names = c("Datum", "Urlaub_geplannt"), row.names = c(NA, 
                                                                                                                                                                       -3L), class = "data.frame")




Datum <- format(seq.Date(as.Date(paste0(df$Datum[nrow(df)],"-01")),
                         as.Date(paste0(substring(seq.Date(as.Date(paste0(as.character(df$Datum[1]),"-01")), 
                                                           length = 2,
                                                           by = 'year')[2],1,4),"-01-01")),
                         by = "month"

),"%Y-%m")


new_df <- data.frame(Datum  = Datum, Urlaub_geplannt = rep(0,length(Datum)))


total_df <- rbind(df,new_df)

total_df
#>     Datum Urlaub_geplannt
#> 1 2018-10            1410
#> 2 2018-11             940
#> 3 2018-12             470
#> 4 2018-12               0
#> 5 2019-01               0

Upvotes: 1

RLave
RLave

Reputation: 8364

Solution with dplyr and a full_join:

library(dplyr)
library(lubridate) # for ymd() function


d <- d %>% 
  mutate(Datum = paste0(Datum,"-01"),
         Datum = ymd(Datum)) # correct Date format

min_year <- year(min(d$Datum))
min_date <- min(d$Datum)

# create a data.frame of possible dates
fill_dates <- data.frame(Datum = seq.Date(
  min_date, # min date avaiable
  as.Date(paste0(min_year+1,"-01-01")), # until first Jan next year
  by = "month"))

Now we can join the two data.frames:

d %>% 
  full_join(fill_dates, by="Datum") %>% # full_join of the two tables
  # the full_join will add all new row not present in d originally, with NA
  mutate(Urlaub_geplannt = ifelse(is.na(Urlaub_geplannt), 0, Urlaub_geplannt))

#       Datum Urlaub_geplannt
# 1 2018-10-01            1410
# 2 2018-11-01             940
# 3 2018-12-01             470
# 4 2019-01-01               0

Data:

d <- structure(list(Datum = structure(c("2018-10", "2018-11", 
                                                      "2018-12"), class = "character"), Urlaub_geplannt = c(1410, 940, 
                                                                                                         470)), .Names = c("Datum", "Urlaub_geplannt"), row.names = c(NA, 
                                                                                                                                                                      -3L), class = "data.frame")

Upvotes: 1

Related Questions