MycoP
MycoP

Reputation: 137

conditionally add rows to data frames in a list of data frames

I have a list of data frames, where each data frame has either 1 or 2 rows named "mis" or "syn" (form a column named cat) and a second col with a numeric frequency. I want to fill in each data frame such that if the "mis" row is missing, it adds a mis row with frequency = 0 and if "syn" row is missing, is adds a syn row with frequency 1:

###exmaple: 
#example list of dataframes:
df1<- as.data.frame(cbind(cat = c("mis", "syn"), freq= c(4, 2)))
df2<- as.data.frame(cbind(cat = "mis", freq= 1))
df3<- as.data.frame(cbind(cat = "syn", freq= 2))
df_list<- list(df1 = df1, df2 = df2, df3= df3)

looks like:

> df_list
$df1
cat freq
1 mis    4
2 syn    2

$df2
cat freq
1 mis    1

$df3
cat freq
1 syn    2

Expected output:

> df_list
$df1
cat freq
mis    4
syn    2

$df2
cat freq
mis    1
syn    1

$df3
cat freq
syn    2
mis    0

what I've tried: first I change the row names so that I can search by them

df_list_named<- lapply(df_list, function(x){ row.names(x)<-as.character(x$cat); x})

df_list_named
$df1
cat freq
mis mis    4
syn syn    2

$df2
cat freq
mis mis    1

$df3
cat freq
syn syn    2

then I've been trying to use an ifelse loop to append the rows to the the dataframes where it needs it, but I can't get it to work:

test<- lapply(df_list_named, function (x) ifelse(!row.names(df_list_named[[x]]) %in% "mis", rbind(df_list_named[[x]], c(cat = "mis", freq= 0)), 
                                                                                               ifelse(!row.names(df_list_named[[x]]) %in% "syn", rbind(df_list_named[[x]], c(cat = "syn", freq= 1))))

Upvotes: 1

Views: 57

Answers (2)

jay.sf
jay.sf

Reputation: 72828

You could use a "base" data frame, merge it with all data frames in the list using Map. The duplicated rows created in the already complete data frames can be safely excluded with !, as they are always placed at the end.

(base <- data.frame(cat=factor(c("syn", "mis")), freq=factor(1:0)))
#   cat freq
# 1 syn    1
# 2 mis    0

Map(function(x) {y <- (merge(x, base, all=TRUE));y[!duplicated(y$cat), ]}, df_list)
# $df1
#   cat freq
# 1 mis    4
# 3 syn    2
# 
# $df2
#   cat freq
# 1 mis    1
# 3 syn    1
# 
# $df3
#   cat freq
# 1 syn    2
# 3 mis    0

Data

df_list <- list(df1 = structure(list(cat = structure(1:2, .Label = c("mis", 
"syn"), class = "factor"), freq = structure(2:1, .Label = c("2", 
"4"), class = "factor")), class = "data.frame", row.names = c(NA, 
-2L)), df2 = structure(list(cat = structure(c(cat = 1L), .Label = "mis", class = "factor"), 
    freq = structure(c(freq = 1L), .Label = "1", class = "factor")), class = "data.frame", row.names = c(NA, 
-1L)), df3 = structure(list(cat = structure(c(cat = 1L), .Label = "syn", class = "factor"), 
    freq = structure(c(freq = 1L), .Label = "2", class = "factor")), class = "data.frame", row.names = c(NA, 
-1L)))

Upvotes: 0

Ronak Shah
Ronak Shah

Reputation: 388982

Here is one way to do it with lapply

lapply(df_list, function(x) {
   if(all(c("mis", "syn") %in% x$cat))
      x
   else if("mis" %in% x$cat)
      rbind(x, data.frame(cat = "syn", freq = 1))
   else
      rbind(x, data.frame(cat = "mis", freq = 0))
})

#$df1
#  cat freq
#1 mis    4
#2 syn    2

#$df2
#  cat freq
#1 mis    1
#2 syn    1

#$df3
#  cat freq
#1 syn    2
#2 mis    0

data

df1<- data.frame(cat = c("mis", "syn"), freq= c(4, 2), stringsAsFactors = FALSE)
df2<- data.frame(cat = "mis", freq= 1,stringsAsFactors = FALSE)
df3<- data.frame(cat = "syn", freq= 2, stringsAsFactors = FALSE)
df_list<- list(df1 = df1, df2 = df2, df3= df3)

Upvotes: 2

Related Questions