Reputation: 1845
I'm trying to generalize the code here for more than one repeating column: rename list of dataframe columns to mimic joined suffixes
I have a dataframe with columns name that are the same amoung different data frames in a list
I want to use the same patters as reduce(left_join, suffix = c("_x", "_y"), by="inAll")
to create new names
"inAll"
)library(dplyr)
library(purrr)
library(stringr)
dd <- list()
dd$data <- list(
ONE = data.frame(inAll = c(1.1,1.2,1.3), inAll_2 = c(1.4,1.5,1.6), inSome = c(1.7,1.8,1.9), only_one = c(1.10, 1.11, 1.12)),
TWO = data.frame(inAll = c(2.1,2.2,2.3), inAll_2 = c(2.4,2.5,2.6), inOthers = c(2.7,2.8,2.9)),
THREE = data.frame(inAll = c(3.1,3.2,3.3), inAll_2 = c(3.4,3.5,3.6)),
FOUR = data.frame(inAll = c(4.1,4.2,4.3), inAll_2 = c(4.4,4.5,4.6), inOthers = c(4.10, 4.11, 4.12), inSome = c(4.7,4.8,4.9)),
FIVE = data.frame(inAll = c(5.1,5.2,5.3), inAll_2 = c(5.4,5.5,5.6)),
SIX = data.frame(inAll = c(6.1,6.2,6.3), inAll_2 = c(6.4,6.5,6.6), inOthers = c(6.7,6.8,6.8))
)
dd$data2 <- list(
ONE = data.frame(inAll = c(1.1,1.2,1.3), inAll_2_x = c(1.4,1.5,1.6), inSome_x = c(1.7,1.8,1.9), only_one = c(1.10, 1.11, 1.12)),
TWO = data.frame(inAll = c(2.1,2.2,2.3), inAll_2_y = c(2.4,2.5,2.6), inOthers_x = c(2.7,2.8,2.9)),
THREE = data.frame(inAll = c(3.1,3.2,3.3), inAll_2_x_x = c(3.4,3.5,3.6)),
FOUR = data.frame(inAll = c(4.1,4.2,4.3), inAll_2_y_y = c(4.4,4.5,4.6), inOthers_y = c(4.10, 4.11, 4.12), inSome_y = c(4.7,4.8,4.9)),
FIVE = data.frame(inAll = c(5.1,5.2,5.3), inAll_2_x_x_x = c(5.4,5.5,5.6)),
SIX = data.frame(inAll = c(6.1,6.2,6.3), inAll_2_y_y_y = c(6.4,6.5,6.6), inOthers = c(6.7,6.8,6.8))
)
Open to entirely different ideas!!!
new_names <- function(data, toExclude) {
# calculate the number of suffixes we need per column
nnames <- map(data, ~colnames(.x)) %>%
unlist() %>%
tibble() %>%
rename("names" = 1) %>%
group_by(names) %>%
count() %>%
filter(!names %in% toExclude)
suffixes <- map(nnames$n, ~strrep(rep(c('_x', '_y'), .x/2), rep(seq_len(.x/2), each = 2)))
map2(nnames$names, suffixes, ~paste0(.x, .y))
# .....somehow apply these to the right columns?
}
Function specifying data frame and column to exclude which should yield the desired output:
new_names(dd$data, "inAll")
Any help, even with just what logic to use to get my desired end result would be really appreciated, thank you!
Upvotes: 2
Views: 296
Reputation: 886948
Here is one option, where we extract column names from the list
, change the names based on the how it is duplicated after split
ting by common names, then relist
it back to list
again and change the column names of the original list
using map2
with the changed names
library(purrr)
library(dplyr)
library(stringr)
inp <- dd$data
lst1 <- map(inp, names)
nm1 <- unlist(lst1)
i1 <- nm1 != 'inAll'
lst2 <- split(nm1[i1], nm1[i1])
i2 <- lengths(lst2) > 1
lst2[i2] <- map(lst2[i2], ~ {
i3 <- (seq_along(.x)-1) %/% 2 + 1
i4 <- i3 %in% names(which(table(i3) == 2))
n <- ceiling(length(.x[i4])/2)
.x[i4] <- str_c(.x[i4], strrep(rep(c('_x', '_y'), n),
rep(seq_len(n), each = 2)))
.x
})
nm1[i1] <- unsplit(lst2, nm1[i1])
out2 <- map2(inp, relist(nm1, skeleton = lst1), set_names)
-checking with OP's output
out <- dd$data2
identical(out, out2)
#[1] TRUE
Upvotes: 2