user3354212
user3354212

Reputation: 1112

how to replace specific characters in a data frame by the value in a variable in r

I have a dataframe looks like:

df <- read.table(text="chr     pos Ref Alt D045313 D045314 D045135 D045136 D045137 D045138
Chr1 1462191   T   C     1/1     0/1     1/1     0/0     1/1     1/1
Chr1 1463534   G   C     0/0     1/1     0/0     0/1     0/0     0/0
Chr1 1463881   T   A     0/1     0/0     1/1     0/0     1/1     1/1
Chr1 1464091   G   A     0/0     0/0     1/1     0/0     1/1     1/1
Chr1 1464651   T   C     1/1     0/0     1/1     0/1    1/1     1/1",head=F, stringsAsFactors=F)

The expected result:

  chr     pos Ref Alt D045313 D045314 D045135 D045136 D045137 D045138
Chr1 1462191   T   C     C/C     T/C     C/C     T/T     C/C     C/C
Chr1 1463534   G   C     G/G     C/C     G/G     G/C     G/G     G/G
Chr1 1463881   T   A     T/A     T/T     A/A     T/T     A/A     A/A
Chr1 1464091   G   A     G/G     G/G     A/A     G/G     A/A     A/A
Chr1 1464651   T   C     C/C     T/T     C/C     T/C    C/C     C/C

the replacements would follow this: in df[5:10], "0" should be replaced by the character in df$Ref, "1" by the character in df$Alt. I checked the question in this link[Replace specific characters in a variable in data frame in R, but it didn't work on my situation. Appreciate any helps.

Upvotes: 5

Views: 261

Answers (3)

Veerendra Gadekar
Veerendra Gadekar

Reputation: 4472

Using data.table

setDT(df)[, lapply(.SD, function(x) gsub("0", Ref, gsub("1", Alt, x))), 
            by = .(chr, pos)]

#    chr     pos Ref Alt D045313 D045314 D045135 D045136 D045137 D045138
#1: Chr1 1462191   T   C     C/C     T/C     C/C     T/T     C/C     C/C
#2: Chr1 1463534   G   C     G/G     C/C     G/G     G/C     G/G     G/G
#3: Chr1 1463881   T   A     T/A     T/T     A/A     T/T     A/A     A/A
#4: Chr1 1464091   G   A     G/G     G/G     A/A     G/G     A/A     A/A
#5: Chr1 1464651   T   C     C/C     T/T     C/C     T/C     C/C     C/C

Using dplyr

library(dplyr)

df %>% 
   rowwise %>% 
   mutate_each(funs(gsub("0", Ref, gsub("1", Alt, .))), matches("^D04."))

#   chr     pos Ref Alt D045313 D045314 D045135 D045136 D045137 D045138
#1 Chr1 1462191   T   C     C/C     T/C     C/C     T/T     C/C     C/C
#2 Chr1 1463534   G   C     G/G     C/C     G/G     G/C     G/G     G/G
#3 Chr1 1463881   T   A     T/A     T/T     A/A     T/T     A/A     A/A
#4 Chr1 1464091   G   A     G/G     G/G     A/A     G/G     A/A     A/A
#5 Chr1 1464651   T   C     C/C     T/T     C/C     T/C     C/C     C/C

Another option

library(dplyr)
library(tidyr)

df %>% 
   gather(key, value, -c(chr, pos, Ref, Alt)) %>% rowwise %>% 
   mutate(value = gsub("0", Ref, gsub("1", Alt, value))) %>%    
   spread(key, value)

#Source: local data frame [5 x 10]

#   chr     pos Ref Alt D045313 D045314 D045135 D045136 D045137 D045138
#1 Chr1 1462191   T   C     C/C     T/C     C/C     T/T     C/C     C/C
#2 Chr1 1463534   G   C     G/G     C/C     G/G     G/C     G/G     G/G
#3 Chr1 1463881   T   A     T/A     T/T     A/A     T/T     A/A     A/A
#4 Chr1 1464091   G   A     G/G     G/G     A/A     G/G     A/A     A/A
#5 Chr1 1464651   T   C     C/C     T/T     C/C     T/C     C/C     C/C

base R option using apply

data.frame(t(
  apply(df, 1, 
  function(x) c(x[c(1:4)], gsub("0", x['Ref'], gsub("1", x['Alt'], x[c(5:10)]))))
 ))

#   chr     pos Ref Alt D045313 D045314 D045135 D045136 D045137 D045138
#1 Chr1 1462191   T   C     C/C     T/C     C/C     T/T     C/C     C/C
#2 Chr1 1463534   G   C     G/G     C/C     G/G     G/C     G/G     G/G
#3 Chr1 1463881   T   A     T/A     T/T     A/A     T/T     A/A     A/A
#4 Chr1 1464091   G   A     G/G     G/G     A/A     G/G     A/A     A/A
#5 Chr1 1464651   T   C     C/C     T/T     C/C     T/C     C/C     C/C

Upvotes: 4

Pierre L
Pierre L

Reputation: 28461

Here is a function that you can use to fill in the values in this case and be able to change for future cases too.

convert_val <- function(df) {
  reference_cols <- c("chr", "pos", "Ref", "Alt")
  morph <- function(DF,vec,First="0", Second="1") {
  m <- mapply(function(x,y) gsub(First, x,y), x=DF[,"Ref"], y=DF[,vec])
  mapply(function(x,y) gsub(Second, x,y), x=DF[,"Alt"], y=m)
}
  nums <- which(!names(df) %in% reference_cols)
  df[, nums] <- lapply(nums,function(x) morph(df,x))
  df
}

convert_val(df)
#    chr     pos Ref Alt D045313 D045314 D045135 D045136 D045137 D045138
# 1 Chr1 1462191   T   C     C/C     T/C     C/C     T/T     C/C     C/C
# 2 Chr1 1463534   G   C     G/G     C/C     G/G     G/C     G/G     G/G
# 3 Chr1 1463881   T   A     T/A     T/T     A/A     T/T     A/A     A/A
# 4 Chr1 1464091   G   A     G/G     G/G     A/A     G/G     A/A     A/A
# 5 Chr1 1464651   T   C     C/C     T/T     C/C     T/C     C/C     C/C

In the future, you can change the First and Second arguments for the internal function morph to whatever the new values to look for are (default is "0" and "1"). Or if your column names change, you can adjust the line reference_cols.

Upvotes: 2

Carlos Cinelli
Carlos Cinelli

Reputation: 11617

Creating data:

df <- read.table(text="chr     pos Ref Alt D045313 D045314 D045135 D045136 D045137 D045138
                 Chr1 1462191   T   C     1/1     0/1     1/1     0/0     1/1     1/1
                 Chr1 1463534   G   C     0/0     1/1     0/0     0/1     0/0     0/0
                 Chr1 1463881   T   A     0/1     0/0     1/1     0/0     1/1     1/1
                 Chr1 1464091   G   A     0/0     0/0     1/1     0/0     1/1     1/1
                 Chr1 1464651   T   C     1/1     0/0     1/1     0/1    1/1     1/1",head=T, stringsAsFactors=F)

Using gsub:

vgsub<- Vectorize(gsub, SIMPLIFY = FALSE)
new <- vgsub("0", df$Ref, as.data.frame(t(df[5:10])))
new <- vgsub("1", df$Alt, new)
df[5:10] <- do.call("rbind", new)
df
  chr     pos Ref Alt D045313 D045314 D045135 D045136 D045137 D045138
1 Chr1 1462191   T   C     C/C     T/C     C/C     T/T     C/C     C/C
2 Chr1 1463534   G   C     G/G     C/C     G/G     G/C     G/G     G/G
3 Chr1 1463881   T   A     T/A     T/T     A/A     T/T     A/A     A/A
4 Chr1 1464091   G   A     G/G     G/G     A/A     G/G     A/A     A/A
5 Chr1 1464651   T   C     C/C     T/T     C/C     T/C     C/C     C/C

Upvotes: 4

Related Questions