m_c
m_c

Reputation: 516

Subset and change values of grouped data frame

I need to:

Data frame:

             year var1 var2  #Desired output:
        1     1   10    1    #change first five values in  var1 to twofor year 1
        2     1   11    1
        3     1   12    1
        4     1   13    1
        5     1   14    1
        6     1   15    1
        7     1   16    1
        8     1   17    1
        9     1   18    1
        10    1   19    1
        11    2   20    1     #change first five values in  var1 to 2 for year 2
        12    2   21    1     #var2 stays the same since it is below threshold
        13    2   22    1
        14    2   23    1
        15    2   24    1
        16    2   25    1
        17    2   26    1
        18    2   27    1
        19    2   28    1
        20    2   29    1

Data frame code:

threshold <- 2        
df <- data.frame(year = c(rep(1, 10,), rep(2,10)),
                     var1 = seq(10,29, 1),
                     var2 = rep(1,20))
    df_out <- data.frame(year = c(rep(1, 10,), rep(2,10)),
                     var1 = c(rep(2,5), seq(16,20, 1), rep(2,5),seq(26,30)),
                     var2 = rep(1,20))

Upvotes: 1

Views: 317

Answers (2)

Gopala
Gopala

Reputation: 10483

You can use something like this in dplyr (I am adding a 3rd column with sample to show it works on many columns):

df$var3 <- sample(1:100, nrow(df))
head(df)
  year var1 var2 var3
1    1   10    1   54
2    1   11    1   60
3    1   12    1   26
4    1   13    1   28
5    1   14    1    7
6    1   15    1  100

df %>%
  group_by(year) %>%
  mutate_each(funs(ifelse(row_number() <= 5 & . >= threshold, threshold, .)))
Source: local data frame [20 x 4]
Groups: year [2]

    year  var1  var2  var3
   <dbl> <dbl> <dbl> <dbl>
1      1     2     1     2
2      1     2     1     2
3      1     2     1     2
4      1     2     1     2
5      1     2     1     2
6      1    15     1   100
7      1    16     1    25
8      1    17     1     1
9      1    18     1    55
10     1    19     1    48
11     2     2     1     2
12     2     2     1     2
13     2     2     1     2
14     2     2     1     2
15     2     2     1     2
16     2    25     1     9
17     2    26     1    63
18     2    27     1     2
19     2    28     1     5
20     2    29     1    61

Upvotes: 1

Mike H.
Mike H.

Reputation: 14360

I think using data.table and an ifelse would work:

setDT(df)[,var1 := ifelse(var1 >= threshold & seq_len(.N) <= 5, threshold, var1), by = year]

df
 #   year var1 var2
 #1:    1    2    1
 #2:    1    2    1
 #3:    1    2    1
 #4:    1    2    1
 #5:    1    2    1
 #6:    1   15    1
 #7:    1   16    1
 #8:    1   17    1
 #9:    1   18    1
#10:    1   19    1
#11:    2    2    1
#12:    2    2    1
#13:    2    2    1
#14:    2    2    1
#15:    2    2    1
#16:    2   25    1
#17:    2   26    1
#18:    2   27    1
#19:    2   28    1
#20:    2   29    1

Upvotes: 1

Related Questions