user3022875
user3022875

Reputation: 9018

unable to add column with dplyr mutate

I currently have this data frame

    dat = data.frame(time= c("Q1","Q2","Q3"), 
                     measure1 = c(1,2,9   ),
                     measure2 = c(4,5,6   ))
    dat2 = dat %>% gather(key= Metric, value = Value, c(measure1, measure2)) %>% group_by(Metric, time)
dat2 = as.data.frame(dat2)

time   Metric Value
1   Q1 measure1     1
2   Q2 measure1     2
3   Q3 measure1     9
4   Q1 measure2     4
5   Q2 measure2     5
6   Q3 measure2     6

I can add this "test" column like this

dat2$test=  ifelse(  dat2$Metric =="measure1" &  dat2$Value >   dat2$Value[ dat2$Metric=="measure2"] ,1,
                               ifelse(  dat2$Metric == "measure2" &   dat2$Value >  dat2$Value[ dat2$Metric=="measure1"] ,1,-1)
                               ) #end ifelse

 time   Metric Value test
1   Q1 measure1     1   -1
2   Q2 measure1     2   -1
3   Q3 measure1     9    1
4   Q1 measure2     4    1
5   Q2 measure2     5    1
6   Q3 measure2     6   -1

BUT I would like to use dplyr and mutate like this

dat = data.frame(time= c("Q1","Q2","Q3"), 
                 measure1 = c(1,2,9   ),
                 measure2 = c(4,5,6   ))
dat %>% gather(key= Metric, value = Value, c(measure1, measure2)) %>% group_by(Metric, time) %>%
     mutate(test= ifelse( Metric =="measure1" &  Value > Value[Metric=="measure2"] ,1,
                               ifelse( Metric =="measure2" &  Value > Value[Metric=="measure1"] ,1,-1)
                               ) #end ifelse
                        )#end mutate
dat2 = as.data.frame(dat2)
dat2

but the "test" column is all NA

 time   Metric Value test
1   Q1 measure1     1   NA
2   Q2 measure1     2   NA
3   Q3 measure1     9   NA
4   Q1 measure2     4   NA
5   Q2 measure2     5   NA
6   Q3 measure2     6   NA

why can't you use mutate to add the column? does it have something to do with using as.data.frame and adding it manually...mutate doesn't recognize the column name?

Thank you.

Upvotes: 1

Views: 1091

Answers (1)

akuiper
akuiper

Reputation: 214927

The problem lies in the fact that when you group by Metric, for groups where Metric is measure1, Metric == "measure2" will always return FALSE, and Value[FALSE] == numeric(0), zero length objects are transformed to NA after the mutate:

dat %>% 
        gather(key= Metric, value = Value, c(measure1, measure2)) %>% 
        group_by(time) %>%        # <<<<<<<<<<<< here
        mutate(test= ifelse(Metric =="measure1" & Value > Value[Metric=="measure2"] ,1,
                            ifelse(Metric =="measure2" & Value > Value[Metric=="measure1"],1,-1)
                            ) #end ifelse
              )

# Source: local data frame [6 x 4]
# Groups: time [3]

#     time   Metric Value  test
#   <fctr>    <chr> <dbl> <dbl>
# 1     Q1 measure1     1    -1
# 2     Q2 measure1     2    -1
# 3     Q3 measure1     9     1
# 4     Q1 measure2     4     1
# 5     Q2 measure2     5     1
# 6     Q3 measure2     6    -1

You can see more clearly what is happening when you group by Metric with this example:

dat %>% 
        gather(key= Metric, value = Value, c(measure1, measure2)) %>% 
        group_by(Metric, time) %>%
        mutate(test= Value[Metric == "measure2"])

# Source: local data frame [6 x 4]
# Groups: Metric, time [6]

#     time   Metric Value  test
#   <fctr>    <chr> <dbl> <dbl>
# 1     Q1 measure1     1    NA
# 2     Q2 measure1     2    NA
# 3     Q3 measure1     9    NA
# 4     Q1 measure2     4     4
# 5     Q2 measure2     5     5
# 6     Q3 measure2     6     6

Upvotes: 2

Related Questions