Reputation: 2259
Intro: Working in R, I often need to reorganize information from lists of data.frames to create a summary table. In this example, I start with a single data.frame, and I show my function that converts key information from the data.frame into a single row. Bearing in mind that my desired output requires the sorting of a mixture of numeric and character data, I can’t help wondering if there is an easier technique to do this kind of thing.
My question: Can anyone provide advice, or better yet a solution, for a simpler technique to convert data.frames like these into rows, while respecting the specific sorting of the data?
#sample data
input_df <- data.frame(M1 = c("Age", "Weight", "Speed", "Range"),
dogs = c(100, 120, 85, 105),
cats = c(115, 89, 80, 111),
birds = c(100, 90, 100, 104))
# desired summary row
desired_row <- data.frame(Model = "M1",
dogs = "Weight (120)",
cats = "Age (115), Range (111)",
birds = "Range (104)",
stringsAsFactors = F)
desired_row$Model <- factor(desired_row$Model)
# my function
makeRow <- function(dat1) {
# get model name
mod <- data.frame(Model = names(dat1[1]))
# make list of variables with model varible
d1 <- setNames(lapply(names(dat1)[-1], function(x) cbind(dat1[1],
dat1[x])), names(dat1)[-1])
# create a sorted named vector, largest-to-smallest
sorted_named_vec <- function(x) {
sort(setNames(x[[2]], x[[1]]), decreasing = T)
}
d2 <- lapply(d1, sorted_named_vec)
# implement a criterion to report only top indexes
keep_tops <- function(x) {
ifelse(x == max(x) | x >= 110 | (x > 102) & ((x -
100)/(max(x) - 100) > 0.33), x, "")
}
d3 <- lapply(d2, keep_tops)
# remove blank character elements
remove_blank_elements <- function(x) {
x[nchar(x) > 0]
}
d4 <- lapply(d3, remove_blank_elements)
# collapse variable name with top values and add parenthesis
collapse_to_string <- function(x) {
paste0(names(x), " (", x, "),", collapse = " ")
}
d5 <- lapply(d4, collapse_to_string)
# remove the last comma
remove_last_comma <- function(x) {
gsub("\\,$", "", x)
}
d6 <- lapply(d5, remove_last_comma)
# consturct a row from the list
row <- cbind(mod, as.data.frame(d6, stringsAsFactors = F))
row
}
# call
row_output <- makeRow(dat1 = input_df)
row_output
# check output to desired
identical(desired_row, row_output)
Upvotes: 0
Views: 55
Reputation: 1384
not sure if more efficient, but slightly less code and more direct approach imo.
makeRow <- function(dat1) {
#make data frame for row with model name
d0 <- data.frame(mod = names(dat1)[1]) #col name changed later
# implement a criterion to report only top indexes -> now return if true or false
keep_tops <- function(x) {
x == max(x) | x >= 110 | (x > 102) & ((x - 100)/(max(x) - 100) > 0.33)
}
vals =c() #empty -> for values of each cols
# make list of variables with model variables(dat1 cols)
#use the columns of the df directly
for(col in 2:ncol(dat1)){
#make temp df with each and evaluate what row to keep in the same line
df = dat1[keep_tops(dat1[,col])==1,c(1,col)]
df[,2] = paste0("(",df[,2],")") #add the () around the numbers
val = apply(as.data.frame(apply(df, 1, paste0, collapse=" ")), 2, paste0, collapse=", ") #collapse rows, then cols
vals = c(vals, val) #add this variable values to the values' list
}
# bind the first col made earlier with these values
row <- cbind(d0, as.data.frame(t(vals), stringsAsFactors = F))
colnames(row) = colnames(dat1) #rename the columns to match
row
}
# call
row_output <- makeRow(dat1 = input_df)
# check output to desired
identical(desired_row$birds, row_output$birds)
with your 'input_df', identical() was TRUE.
Upvotes: 1