Reputation: 137
I have a list of data frames, where each data frame has either 1 or 2 rows named "mis" or "syn" (form a column named cat) and a second col with a numeric frequency. I want to fill in each data frame such that if the "mis" row is missing, it adds a mis row with frequency = 0 and if "syn" row is missing, is adds a syn row with frequency 1:
###exmaple:
#example list of dataframes:
df1<- as.data.frame(cbind(cat = c("mis", "syn"), freq= c(4, 2)))
df2<- as.data.frame(cbind(cat = "mis", freq= 1))
df3<- as.data.frame(cbind(cat = "syn", freq= 2))
df_list<- list(df1 = df1, df2 = df2, df3= df3)
looks like:
> df_list
$df1
cat freq
1 mis 4
2 syn 2
$df2
cat freq
1 mis 1
$df3
cat freq
1 syn 2
Expected output:
> df_list
$df1
cat freq
mis 4
syn 2
$df2
cat freq
mis 1
syn 1
$df3
cat freq
syn 2
mis 0
what I've tried: first I change the row names so that I can search by them
df_list_named<- lapply(df_list, function(x){ row.names(x)<-as.character(x$cat); x})
df_list_named
$df1
cat freq
mis mis 4
syn syn 2
$df2
cat freq
mis mis 1
$df3
cat freq
syn syn 2
then I've been trying to use an ifelse loop to append the rows to the the dataframes where it needs it, but I can't get it to work:
test<- lapply(df_list_named, function (x) ifelse(!row.names(df_list_named[[x]]) %in% "mis", rbind(df_list_named[[x]], c(cat = "mis", freq= 0)),
ifelse(!row.names(df_list_named[[x]]) %in% "syn", rbind(df_list_named[[x]], c(cat = "syn", freq= 1))))
Upvotes: 1
Views: 57
Reputation: 72828
You could use a "base"
data frame, merge
it with all data frames in the list using Map
. The duplicated
rows created in the already complete data frames can be safely excluded with !
, as they are always placed at the end.
(base <- data.frame(cat=factor(c("syn", "mis")), freq=factor(1:0)))
# cat freq
# 1 syn 1
# 2 mis 0
Map(function(x) {y <- (merge(x, base, all=TRUE));y[!duplicated(y$cat), ]}, df_list)
# $df1
# cat freq
# 1 mis 4
# 3 syn 2
#
# $df2
# cat freq
# 1 mis 1
# 3 syn 1
#
# $df3
# cat freq
# 1 syn 2
# 3 mis 0
df_list <- list(df1 = structure(list(cat = structure(1:2, .Label = c("mis",
"syn"), class = "factor"), freq = structure(2:1, .Label = c("2",
"4"), class = "factor")), class = "data.frame", row.names = c(NA,
-2L)), df2 = structure(list(cat = structure(c(cat = 1L), .Label = "mis", class = "factor"),
freq = structure(c(freq = 1L), .Label = "1", class = "factor")), class = "data.frame", row.names = c(NA,
-1L)), df3 = structure(list(cat = structure(c(cat = 1L), .Label = "syn", class = "factor"),
freq = structure(c(freq = 1L), .Label = "2", class = "factor")), class = "data.frame", row.names = c(NA,
-1L)))
Upvotes: 0
Reputation: 388982
Here is one way to do it with lapply
lapply(df_list, function(x) {
if(all(c("mis", "syn") %in% x$cat))
x
else if("mis" %in% x$cat)
rbind(x, data.frame(cat = "syn", freq = 1))
else
rbind(x, data.frame(cat = "mis", freq = 0))
})
#$df1
# cat freq
#1 mis 4
#2 syn 2
#$df2
# cat freq
#1 mis 1
#2 syn 1
#$df3
# cat freq
#1 syn 2
#2 mis 0
data
df1<- data.frame(cat = c("mis", "syn"), freq= c(4, 2), stringsAsFactors = FALSE)
df2<- data.frame(cat = "mis", freq= 1,stringsAsFactors = FALSE)
df3<- data.frame(cat = "syn", freq= 2, stringsAsFactors = FALSE)
df_list<- list(df1 = df1, df2 = df2, df3= df3)
Upvotes: 2