Doug Fir
Doug Fir

Reputation: 21212

mutate_at & vars: Can I tell r / dplyr to overwrite existing features instead of creating new ones?

[If downvoting feedback would be helpful so I can try to update the post.]

library(tidyverse)
example_mtcars <- mtcars %>% mutate_at(vars(disp, wt, qsec), funs(as.character(.)))
example_mtcars$disp[c(2,4,8)] <- "NULL"
example_mtcars$wt[c(10, 12)] <- "NULL"
example_mtcars$qsec[c(2,3,4)] <- "NULL"

processed_mtcars <- example_mtcars %>% 
  mutate_at(vars(c(disp, wt:qsec)),
            funs(str_replace(., "NULL", "0"),
                 as.numeric))

The new data frame processed_mtcars has new features where I would like the feature names to be the exact same as example_mtcars but with the transformations applied.

> glimpse(processed_mtcars)
Observations: 32
Variables: 17
$ mpg              <dbl> 21.0, 21.0, 22.8, 21.4, 18.7, 18.1, 14.3, 24.4, 22.8, 19.2, 17.8, 16.4, 17.3, 15.2, 10.4, 10.4, ...
$ cyl              <dbl> 6, 6, 4, 6, 8, 6, 8, 4, 4, 6, 6, 8, 8, 8, 8, 8, 8, 4, 4, 4, 4, 8, 8, 8, 8, 4, 4, 4, 8, 6, 8, 4
$ disp             <chr> "160", "NULL", "108", "NULL", "360", "225", "360", "NULL", "140.8", "167.6", "167.6", "275.8", "...
$ hp               <dbl> 110, 110, 93, 110, 175, 105, 245, 62, 95, 123, 123, 180, 180, 180, 205, 215, 230, 66, 52, 65, 97...
$ drat             <dbl> 3.90, 3.90, 3.85, 3.08, 3.15, 2.76, 3.21, 3.69, 3.92, 3.92, 3.92, 3.07, 3.07, 3.07, 2.93, 3.00, ...
$ wt               <chr> "2.62", "2.875", "2.32", "3.215", "3.44", "3.46", "3.57", "3.19", "3.15", "NULL", "3.44", "NULL"...
$ qsec             <chr> "16.46", "NULL", "NULL", "NULL", "17.02", "20.22", "15.84", "20", "22.9", "18.3", "18.9", "17.4"...
$ vs               <dbl> 0, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1
$ am               <dbl> 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1
$ gear             <dbl> 4, 4, 4, 3, 3, 3, 3, 4, 4, 4, 4, 3, 3, 3, 3, 3, 3, 4, 4, 4, 3, 3, 3, 3, 3, 4, 5, 5, 5, 5, 5, 4
$ carb             <dbl> 4, 4, 1, 1, 2, 1, 4, 2, 2, 4, 4, 3, 3, 3, 4, 4, 4, 1, 2, 1, 1, 2, 2, 4, 2, 1, 2, 2, 4, 6, 8, 2
$ disp_str_replace <chr> "160", "0", "108", "0", "360", "225", "360", "0", "140.8", "167.6", "167.6", "275.8", "275.8", "...
$ wt_str_replace   <chr> "2.62", "2.875", "2.32", "3.215", "3.44", "3.46", "3.57", "3.19", "3.15", "0", "3.44", "0", "3.7...
$ qsec_str_replace <chr> "16.46", "0", "0", "0", "17.02", "20.22", "15.84", "20", "22.9", "18.3", "18.9", "17.4", "17.6",...
$ disp_as.numeric  <dbl> 160.0, NA, 108.0, NA, 360.0, 225.0, 360.0, NA, 140.8, 167.6, 167.6, 275.8, 275.8, 275.8, 472.0, ...
$ wt_as.numeric    <dbl> 2.620, 2.875, 2.320, 3.215, 3.440, 3.460, 3.570, 3.190, 3.150, NA, 3.440, NA, 3.730, 3.780, 5.25...
$ qsec_as.numeric  <dbl> 16.46, NA, NA, NA, 17.02, 20.22, 15.84, 20.00, 22.90, 18.30, 18.90, 17.40, 17.60, 18.00, 17.98, ...

Example the second function to funs is as.numeric. However it's referencing the original, untransformed features not the ones where "NULL" has just been replaced with "0". So there are multiple NA values.

Upvotes: 2

Views: 499

Answers (1)

JasonAizkalns
JasonAizkalns

Reputation: 20463

Perhaps one of the following is what you're after:

example_mtcars %>% 
  mutate_at(
    vars(c(disp, wt:qsec)),
    funs(str_replace(., "NULL", "0") %>% as.numeric)
  ) 

or this:

example_mtcars %>% 
  mutate_at(
    vars(c(disp, wt:qsec)),
    funs(str_replace(., "NULL", NA_character_) %>% as.numeric)
  ) 

Upvotes: 2

Related Questions