Reputation: 53
This is my cn0 file
sample_name chr_no copy_no Freq
gw6.0001_normal chr1: cn=0 2
gw6.0001_normal chr15 cn=0 1
gw6.0001_normal chr17 cn=0 1
gw6.0001_normal chr4: cn=0 1
gw6.0001_normal chr8: cn=0 1
This is my code in R
cn0 <- read.csv("/home/results/cn0.csv",header=TRUE,stringsAsFactor=FALSE)
sample_name <- unique(cn0$Sample_Name)
abc <- data.frame()
for(i in 1:length(sample_name)){
name <- sample_name[i]
a <- cn0[which(cn0$Sample_Name==name),]
x <- NULL;
x[1]<-a[which(a$Chr_No=="chr1:"),]$Frequence
x[2]<-a[which(a$Chr_No=="chr2:"),]$Frequence
x[3]<-a[which(a$Chr_No=="chr3:"),]$Frequence
abc <-rbind(abc,x)
It gives error
in x[2] <- a[which(a$chr_no == "chr2:"), ]$Freq :
replacement has length zero
because in cn0 file there is no information about chromosome-2, what improvement is required in my script? I am new with R.
Desired output is :- if there is no information about chromosome replace with 0 and move to next
Upvotes: 1
Views: 702
Reputation: 76402
This is a bit intricate and I believe it could be made simpler but it does what you want using base R.
I have created a test dataset with one less column, not directly related to the problem, column copy_no
.
sp <- split(cn0, cn0$sample_name)
N <- 22
sp <- lapply(sp, function(DF){
M <- as.numeric(sub("[^[:digit:]]+([[:digit:]]+)[^[:digit:]]*", "\\1", DF$Chr_no))
Chr_no <- sub("[[:digit:]]+[^[:digit:]]*$", "", DF$Chr_no)
Chr_no <- paste0(Chr_no, 1:N)
Chr_no <- ifelse(nchar(Chr_no) == 4, paste0(Chr_no, ":"), Chr_no)
sample_name <- rep(DF$sample_name[1], length(Chr_no))
res <- data.frame(sample_name, Chr_no)
res$Frequence <- 0
res$Frequence[1:N %in% M] <- DF$Frequence
res
})
abc <- as.data.frame(t(sapply(sp, '[[', 'Frequence')))
names(abc) <- sp[[1]]$Chr_no
abc
Data creation code.
set.seed(1234) # Make the results reproducible
m <- 6
n <- 22
sample_name <- rep(letters[1:m], each = n - 1)
Chr_no <- rep(paste0("chr", (1:n)[-2]), m)
Chr_no <- ifelse(nchar(Chr_no) == 4, paste0(Chr_no, ":"), Chr_no)
Frequence <- sample(10, m*(n - 1), TRUE)
cn0 <- data.frame(sample_name, Chr_no, Frequence)
Upvotes: 1