Creating new columns in multiple dataframes in R

Question

Following my previous question, I work with a large number of dataframes in R, each of which has a different number of columns. I want to assilimilate these datasets so that all of them have the same number of columns and NA values for newly-added columns. I have written a loop but I am not sure how to update the real dataframes.

first_df   = data.frame(matrix(rnorm(20), nrow=10))
second_df  = data.frame(matrix(rnorm(20), nrow=4))
third_df   = data.frame(matrix(rnorm(20), nrow=5))

library(tidyverse)

min_max <- mget(ls(pattern = "_df")) %>%
  map_dbl(ncol) %>%
  enframe() %>%
  arrange(value) %>%
  slice(1, n())

min_max

# A tibble: 2 x 2
#  name      value
#       
#1 first_df      2
#2 second_df     5

diff <- setdiff(names(get(min_max$name[2])), names(get(min_max$name[1])))

for (col_name in diff)
    
#     all dataframes whose names contain "_df"
    for (df_index in 1:length(ls(pattern = "_df")))
    
    {
#     capturing the dataframe
        data = get(ls(pattern = "_df")[df_index]);
        
     if (!(col_name %in% names(data)))
         
    {data[,col_name] <- NA}
#          I don't know how to update the real datasets
#     get(ls(pattern = "_df")[df_index]) <- data
                   
    }

Fabi_Nutri · Accepted Answer

i looked it up quick and the solution is the assign() function.

So here is your reprex with assign. But I also read about that it would be useful to gather your dataframes into one list and then you could change the name of the listposition I think.

first_df   = data.frame(matrix(rnorm(20), nrow=10))
second_df  = data.frame(matrix(rnorm(20), nrow=4))
third_df   = data.frame(matrix(rnorm(20), nrow=5))

library(tidyverse)

min_max <- mget(ls(pattern = "_df")) %>%
  map_dbl(ncol) %>%
  enframe() %>%
  arrange(value) %>%
  slice(1, n())

min_max

diff <- setdiff(names(get(min_max$name[2])), names(get(min_max$name[1])))

for (col_name in diff) {
  
  #     all dataframes whose names contain "_df"
  for (df_index in 1:length(ls(pattern = "_df"))) {
    
    #     capturing the dataframe
    data = get(ls(pattern = "_df")[df_index]);
    
    if (!(col_name %in% names(data))) {
      data[,col_name] <- NA
    assign(ls(pattern = "_df")[df_index], data)
    }
    #          I don't know how to update the real datasets
    #     get(ls(pattern = "_df")[df_index]) <- data
    
  }
}

Creating new columns in multiple dataframes in R

Answers (2)

Related Questions