Ic3fr0g
Ic3fr0g

Reputation: 1219

Inserting rows into a table

I have this table (visit_ts) -

Year  Month  Number_of_visits
2011    4        1
2011    6        3
2011    7        23
2011    12       32
2012    1        123
2012    11       3200
  1. The aim is to insert rows with Number_of_visits as 0, for months which are missing in the table.
  2. Do not insert rows for 2011 where month is 1,2,3 or 2012 where month is 12.

The following code works correctly -

vec_month=c(1,2,3,4,5,6,7,8,9,10,11,12)
vec_year=c(2011,2012,2013,2014,2015,2016)
i=1
startyear=head(visit_ts$Year,n=1)
endyear=tail(visit_ts$Year,n=1)
x=head(visit_ts$Month,n=1)
y=tail(visit_ts$Month,n=1)
for (year in vec_year)
{
  if(year %in% visit_ts$Year)
  {
    a=subset(visit_ts,visit_ts$Year==year)
    index= which(!vec_month %in% a$Month)
    for (j in index)
    {
      if((year==startyear & j>x )|(year==endyear & j<y))
      visit_ts=rbind(visit_ts,c(year,j,0))
      else
      {
        if(year!=startyear & year!=endyear)
        visit_ts=rbind(visit_ts,c(year,j,0))
      }
    }}
  else
  {
    i=i+1
  }}

As I am new to R I am looking for an alternative/better solution to the problem which would not involve hard-coding the year and month vectors. Also please feel free to point out best programming practices.

Upvotes: 0

Views: 91

Answers (1)

akrun
akrun

Reputation: 887951

We can use expand.grid with merge or left_join

library(dplyr)
expand.grid(Year = min(df1$Year):max(df1$Year), Month = 1:12) %>% 
  filter(!(Year == min(df1$Year) & Month %in% 1:3| 
           Year == max(df1$Year) & Month == 12)) %>% 
  left_join(., df1) %>%
  mutate(Number_of_visits=replace(Number_of_visits, is.na(Number_of_visits), 0))
#   Year Month Number_of_visits
#1  2012     1              123
#2  2012     2                0
#3  2012     3                0
#4  2011     4                1
#5  2012     4                0
#6  2011     5                0
#7  2012     5                0
#8  2011     6                3
#9  2012     6                0
#10 2011     7               23
#11 2012     7                0
#12 2011     8                0
#13 2012     8                0
#14 2011     9                0
#15 2012     9                0
#16 2011    10                0
#17 2012    10                0
#18 2011    11                0
#19 2012    11             3200
#20 2011    12               32

We can make it more dynamic by grouping by 'Year', get the sequence of 'Month' from minimum to maximum in a list, unnest the column, join with the original dataset (left_join) and replace the NA values with 0.

library(tidyr)
df1 %>%
     group_by(Year) %>% 
     summarise(Month = list(min(Month):max(Month))) %>% 
     unnest(Month) %>%
     left_join(., df1) %>% 
     mutate(Number_of_visits=replace(Number_of_visits, is.na(Number_of_visits), 0))
#     Year Month Number_of_visits
#    <int> <int>            <dbl>
#1   2011     4                1
#2   2011     5                0
#3   2011     6                3
#4   2011     7               23
#5   2011     8                0
#6   2011     9                0
#7   2011    10                0
#8   2011    11                0
#9   2011    12               32
#10  2012     1              123
#11  2012     2                0
#12  2012     3                0
#13  2012     4                0
#14  2012     5                0
#15  2012     6                0
#16  2012     7                0
#17  2012     8                0
#18  2012     9                0
#19  2012    10                0
#20  2012    11             3200

Or another option is data.table. Convert the 'data.frame' to 'data.table' (setDT(df1)), grouped by 'Year', we get the sequence of min to max 'Month', join with the original dataset on 'Year' and 'Month', replace the NA values to 0.

library(data.table)
setDT(df1)
df1[df1[, .(Month=min(Month):max(Month)), Year], 
  on = c("Year", "Month")][is.na(Number_of_visits), Number_of_visits := 0][]
#    Year Month Number_of_visits
# 1: 2011     4                1
# 2: 2011     5                0
# 3: 2011     6                3
# 4: 2011     7               23
# 5: 2011     8                0
# 6: 2011     9                0
# 7: 2011    10                0
# 8: 2011    11                0
# 9: 2011    12               32
#10: 2012     1              123
#11: 2012     2                0
#12: 2012     3                0
#13: 2012     4                0
#14: 2012     5                0
#15: 2012     6                0
#16: 2012     7                0
#17: 2012     8                0
#18: 2012     9                0
#19: 2012    10                0
#20: 2012    11             3200

Upvotes: 4

Related Questions