Rochi Saurabh
Rochi Saurabh

Reputation: 53

how to remove error replacement has length zero in R

This is my cn0 file

sample_name   chr_no  copy_no Freq
gw6.0001_normal  chr1:    cn=0    2
gw6.0001_normal  chr15    cn=0    1
gw6.0001_normal  chr17    cn=0    1
gw6.0001_normal  chr4:    cn=0    1
gw6.0001_normal  chr8:    cn=0    1

This is my code in R

cn0 <- read.csv("/home/results/cn0.csv",header=TRUE,stringsAsFactor=FALSE)
sample_name <- unique(cn0$Sample_Name)
abc <- data.frame()

for(i in 1:length(sample_name)){
name <- sample_name[i]
a <- cn0[which(cn0$Sample_Name==name),]
x <- NULL;

x[1]<-a[which(a$Chr_No=="chr1:"),]$Frequence
x[2]<-a[which(a$Chr_No=="chr2:"),]$Frequence
x[3]<-a[which(a$Chr_No=="chr3:"),]$Frequence


abc <-rbind(abc,x)

It gives error

in x[2] <- a[which(a$chr_no == "chr2:"), ]$Freq : 
replacement has length zero

because in cn0 file there is no information about chromosome-2, what improvement is required in my script? I am new with R.

Desired output is :- if there is no information about chromosome replace with 0 and move to next

Upvotes: 1

Views: 702

Answers (1)

Rui Barradas
Rui Barradas

Reputation: 76402

This is a bit intricate and I believe it could be made simpler but it does what you want using base R.
I have created a test dataset with one less column, not directly related to the problem, column copy_no.

sp <- split(cn0, cn0$sample_name)
N <- 22
sp <- lapply(sp, function(DF){
  M <- as.numeric(sub("[^[:digit:]]+([[:digit:]]+)[^[:digit:]]*", "\\1", DF$Chr_no))
  Chr_no <- sub("[[:digit:]]+[^[:digit:]]*$", "", DF$Chr_no)
  Chr_no <- paste0(Chr_no, 1:N)
  Chr_no <- ifelse(nchar(Chr_no) == 4, paste0(Chr_no, ":"), Chr_no)
  sample_name <- rep(DF$sample_name[1], length(Chr_no))
  res <- data.frame(sample_name, Chr_no)
  res$Frequence <- 0
  res$Frequence[1:N %in% M] <- DF$Frequence
  res
})

abc <- as.data.frame(t(sapply(sp, '[[', 'Frequence')))
names(abc) <- sp[[1]]$Chr_no
abc

Data creation code.

set.seed(1234)    # Make the results reproducible
m <- 6
n <- 22
sample_name <- rep(letters[1:m], each = n - 1)
Chr_no <- rep(paste0("chr", (1:n)[-2]), m)
Chr_no <- ifelse(nchar(Chr_no) == 4, paste0(Chr_no, ":"), Chr_no)
Frequence <- sample(10, m*(n - 1), TRUE)

cn0 <- data.frame(sample_name, Chr_no, Frequence)

Upvotes: 1

Related Questions