Changing Values in Dataframe based on Specific Columns

Question

I currently have a dataframe consisting of lower and upper bounds of confidence intervals and point estimates. I want to create a new dataframe so I can plot the variables where point estimates are not equal to zero. For each value of n, is it possible to change the values of the upper and lower bounds to na if the point estimate value is 0? For example, in the dataframe provided when n = 205, y_pe = 0 so I would like to change the corresponding y_lo and y_up to NA.

X1_lo <- c(0 ,0 ,0 ,0 ,0 ,0 ,0 ,0, 0 ,0)
X1_up <-c(0 ,0 ,0 ,0 ,0 ,0 ,0 ,0, 0 ,0)
X1_pe <-c(0 ,0 ,0 ,0 ,0 ,0 ,0 ,0, 0 ,0)
x_lo <- c(0 ,0 ,0 ,0 ,0 ,0 ,0 ,0, 0 ,0)
x_up <- c(0 ,0 ,0 ,0 ,0 ,0 ,0 ,0, 0 ,0)
x_pe <- c(0 ,0 ,0 ,0 ,0 ,0 ,0 ,0, 0 ,0)
y_lo <- c(-24.71177, -25.13779, -16.19142, -15.63819 ,-15.42051 ,-16.11342, -17.10603 ,-18.00848 ,-19.59877, -12.91438)
y_up <- c(14.074116, 14.051209, 13.417954, 12.187319 ,13.602022, 12.943939,  1.317839 ,11.891103, 15.165398,  1.365459)
y_pe <- c(-2.984101, -2.867680, -2.695838 ,-2.583140, -2.416878 , 0.000000,  0.000000 , 0.000000 , 0.000000,  0.000000)


test.df <- cbind(n, X1_lo, X1_up, x_lo, x_up, y_lo, y_up, X1_pe, x_pe, y_pe)

Thanks in advance.

akrun · Accepted Answer

Specify a logical vector in i, and j with the column names, then assign the columns where the 'y_pe' is 0 to NA

test.df[test.df[, "y_pe"] == 0, c("y_lo", "y_up")] <- NA

If the data is data.frame and wants to be applied to all sets of columns except the first column ('n'), then we can split into a group of data.frames and do the replacement

nm1 <- sub("_.*", "", colnames(test.df)[-1])
out <- do.call(cbind, unname(lapply(split.default(test.df[-1], nm1),
       function(x) {
         i1 <- endsWith(names(x), "pe")
         x[!x[,i1], !i1] <- NA
        x})))
out
#   x_lo x_up x_pe X1_lo X1_up X1_pe      y_lo     y_up      y_pe
#1    NA   NA    0    NA    NA     0 -24.71177 14.07412 -2.984101
#2    NA   NA    0    NA    NA     0 -25.13779 14.05121 -2.867680
#3    NA   NA    0    NA    NA     0 -16.19142 13.41795 -2.695838
#4    NA   NA    0    NA    NA     0 -15.63819 12.18732 -2.583140
#5    NA   NA    0    NA    NA     0 -15.42051 13.60202 -2.416878
#6    NA   NA    0    NA    NA     0        NA       NA  0.000000
#7    NA   NA    0    NA    NA     0        NA       NA  0.000000
#8    NA   NA    0    NA    NA     0        NA       NA  0.000000
#9    NA   NA    0    NA    NA     0        NA       NA  0.000000
#10   NA   NA    0    NA    NA     0        NA       NA  0.000000

test.df[names(out)] <-  out
test.df
#     n X1_lo X1_up x_lo x_up      y_lo     y_up X1_pe x_pe      y_pe
#1  205    NA    NA   NA   NA -24.71177 14.07412     0    0 -2.984101
#2  205    NA    NA   NA   NA -25.13779 14.05121     0    0 -2.867680
#3  205    NA    NA   NA   NA -16.19142 13.41795     0    0 -2.695838
#4  205    NA    NA   NA   NA -15.63819 12.18732     0    0 -2.583140
#5  205    NA    NA   NA   NA -15.42051 13.60202     0    0 -2.416878
#6  205    NA    NA   NA   NA        NA       NA     0    0  0.000000
#7  205    NA    NA   NA   NA        NA       NA     0    0  0.000000
#8  205    NA    NA   NA   NA        NA       NA     0    0  0.000000
#9  205    NA    NA   NA   NA        NA       NA     0    0  0.000000
#10 205    NA    NA   NA   NA        NA       NA     0    0  0.000000

Or using tidyverse

library(dplyr)
library(tidyr)
library(stringr)
test.df %>%
   mutate(rn = row_number()) %>% 
   pivot_longer(cols = -c(n, rn), names_sep="_", 
        names_to = c("group", ".value")) %>%
   mutate_at(vars(lo, up), ~ replace(., pe == 0, NA)) %>%
   pivot_wider(names_from = "group", values_from = c('lo', 'up', 'pe'),
          names_repair = ~ str_replace(., "(.*)_(.*)", "\2_\1")) %>%
   select(-rn)
# A tibble: 10 x 10
#       n X1_lo  x_lo  y_lo X1_pe  x_pe  y_pe X1_up  x_up  y_up
#            
# 1   205    NA    NA -24.7     0     0 -2.98    NA    NA  14.1
# 2   205    NA    NA -25.1     0     0 -2.87    NA    NA  14.1
# 3   205    NA    NA -16.2     0     0 -2.70    NA    NA  13.4
# 4   205    NA    NA -15.6     0     0 -2.58    NA    NA  12.2
# 5   205    NA    NA -15.4     0     0 -2.42    NA    NA  13.6
# 6   205    NA    NA  NA       0     0  0       NA    NA  NA  
# 7   205    NA    NA  NA       0     0  0       NA    NA  NA  
# 8   205    NA    NA  NA       0     0  0       NA    NA  NA  
# 9   205    NA    NA  NA       0     0  0       NA    NA  NA  
#10   205    NA    NA  NA       0     0  0       NA    NA  NA

data

test.df <- data.frame(n, X1_lo, X1_up, x_lo, x_up, y_lo, y_up, X1_pe, x_pe, y_pe)

Changing Values in Dataframe based on Specific Columns

Answers (2)

data

Related Questions