AW27
AW27

Reputation: 505

Changing Values in Dataframe based on Specific Columns

I currently have a consisting of lower and upper bounds of confidence intervals and point estimates. I want to create a new dataframe so I can plot the variables where point estimates are not equal to zero. For each value of n, is it possible to change the values of the upper and lower bounds to if the point estimate value is 0? For example, in the dataframe provided when n = 205, y_pe = 0 so I would like to change the corresponding y_lo and y_up to NA.

X1_lo <- c(0 ,0 ,0 ,0 ,0 ,0 ,0 ,0, 0 ,0)
X1_up <-c(0 ,0 ,0 ,0 ,0 ,0 ,0 ,0, 0 ,0)
X1_pe <-c(0 ,0 ,0 ,0 ,0 ,0 ,0 ,0, 0 ,0)
x_lo <- c(0 ,0 ,0 ,0 ,0 ,0 ,0 ,0, 0 ,0)
x_up <- c(0 ,0 ,0 ,0 ,0 ,0 ,0 ,0, 0 ,0)
x_pe <- c(0 ,0 ,0 ,0 ,0 ,0 ,0 ,0, 0 ,0)
y_lo <- c(-24.71177, -25.13779, -16.19142, -15.63819 ,-15.42051 ,-16.11342, -17.10603 ,-18.00848 ,-19.59877, -12.91438)
y_up <- c(14.074116, 14.051209, 13.417954, 12.187319 ,13.602022, 12.943939,  1.317839 ,11.891103, 15.165398,  1.365459)
y_pe <- c(-2.984101, -2.867680, -2.695838 ,-2.583140, -2.416878 , 0.000000,  0.000000 , 0.000000 , 0.000000,  0.000000)


test.df <- cbind(n, X1_lo, X1_up, x_lo, x_up, y_lo, y_up, X1_pe, x_pe, y_pe) 

Thanks in advance.

Upvotes: 2

Views: 47

Answers (2)

akrun
akrun

Reputation: 887108

Specify a logical vector in i, and j with the column names, then assign the columns where the 'y_pe' is 0 to NA

test.df[test.df[, "y_pe"] == 0, c("y_lo", "y_up")] <- NA

If the data is data.frame and wants to be applied to all sets of columns except the first column ('n'), then we can split into a group of data.frames and do the replacement

nm1 <- sub("_.*", "", colnames(test.df)[-1])
out <- do.call(cbind, unname(lapply(split.default(test.df[-1], nm1),
       function(x) {
         i1 <- endsWith(names(x), "pe")
         x[!x[,i1], !i1] <- NA
        x})))
out
#   x_lo x_up x_pe X1_lo X1_up X1_pe      y_lo     y_up      y_pe
#1    NA   NA    0    NA    NA     0 -24.71177 14.07412 -2.984101
#2    NA   NA    0    NA    NA     0 -25.13779 14.05121 -2.867680
#3    NA   NA    0    NA    NA     0 -16.19142 13.41795 -2.695838
#4    NA   NA    0    NA    NA     0 -15.63819 12.18732 -2.583140
#5    NA   NA    0    NA    NA     0 -15.42051 13.60202 -2.416878
#6    NA   NA    0    NA    NA     0        NA       NA  0.000000
#7    NA   NA    0    NA    NA     0        NA       NA  0.000000
#8    NA   NA    0    NA    NA     0        NA       NA  0.000000
#9    NA   NA    0    NA    NA     0        NA       NA  0.000000
#10   NA   NA    0    NA    NA     0        NA       NA  0.000000

test.df[names(out)] <-  out
test.df
#     n X1_lo X1_up x_lo x_up      y_lo     y_up X1_pe x_pe      y_pe
#1  205    NA    NA   NA   NA -24.71177 14.07412     0    0 -2.984101
#2  205    NA    NA   NA   NA -25.13779 14.05121     0    0 -2.867680
#3  205    NA    NA   NA   NA -16.19142 13.41795     0    0 -2.695838
#4  205    NA    NA   NA   NA -15.63819 12.18732     0    0 -2.583140
#5  205    NA    NA   NA   NA -15.42051 13.60202     0    0 -2.416878
#6  205    NA    NA   NA   NA        NA       NA     0    0  0.000000
#7  205    NA    NA   NA   NA        NA       NA     0    0  0.000000
#8  205    NA    NA   NA   NA        NA       NA     0    0  0.000000
#9  205    NA    NA   NA   NA        NA       NA     0    0  0.000000
#10 205    NA    NA   NA   NA        NA       NA     0    0  0.000000

Or using tidyverse

library(dplyr)
library(tidyr)
library(stringr)
test.df %>%
   mutate(rn = row_number()) %>% 
   pivot_longer(cols = -c(n, rn), names_sep="_", 
        names_to = c("group", ".value")) %>%
   mutate_at(vars(lo, up), ~ replace(., pe == 0, NA)) %>%
   pivot_wider(names_from = "group", values_from = c('lo', 'up', 'pe'),
          names_repair = ~ str_replace(., "(.*)_(.*)", "\\2_\\1")) %>%
   select(-rn)
# A tibble: 10 x 10
#       n X1_lo  x_lo  y_lo X1_pe  x_pe  y_pe X1_up  x_up  y_up
#   <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
# 1   205    NA    NA -24.7     0     0 -2.98    NA    NA  14.1
# 2   205    NA    NA -25.1     0     0 -2.87    NA    NA  14.1
# 3   205    NA    NA -16.2     0     0 -2.70    NA    NA  13.4
# 4   205    NA    NA -15.6     0     0 -2.58    NA    NA  12.2
# 5   205    NA    NA -15.4     0     0 -2.42    NA    NA  13.6
# 6   205    NA    NA  NA       0     0  0       NA    NA  NA  
# 7   205    NA    NA  NA       0     0  0       NA    NA  NA  
# 8   205    NA    NA  NA       0     0  0       NA    NA  NA  
# 9   205    NA    NA  NA       0     0  0       NA    NA  NA  
#10   205    NA    NA  NA       0     0  0       NA    NA  NA  

data

test.df <- data.frame(n, X1_lo, X1_up, x_lo, x_up, y_lo, y_up, X1_pe, x_pe, y_pe)

Upvotes: 3

Chris
Chris

Reputation: 1668

I prefer akrun's answer, but here's an alternative using tidyr's mutate:

library(tidyr)

test.df %>% mutate(y_up = ifelse(y_pe == 0, NA, y_up),
                   y_lo = ifelse(y_pe == 0, NA, y_lo))

Upvotes: 2

Related Questions