pdeli
pdeli

Reputation: 534

Append data.frame objects from an environment to corresponding data.frame objects in GlobalEnv (or another env) in R

I have several existing data.frame objects that need to be updated from the Internet. However, as the updates have the same names as the mentioned existing objects, I put the updates in a separate environment also as data.frame objects.

Then, the idea is to append the updates to the existing data.frame objects. But I don't see how I can do that iteratively (i.e., in a loop?) with rbind from one environment to GlobalEnv (or another environment, for that matter).

Also, I did not put them here, but there will be several other data.frame objects (with other names) that will in the GlobalEnv (or the environment where they will be loaded).

Here below is a piece of code that should be reproducible (with comments and links to the sources):

library(quantmod)

# Load ticker data from 2020-01-01 till 2021-02-02
tickers <- c("NKLA", "MPNGF", "RMO", "JD", "MSFT")
getSymbols.yahoo(tickers, auto.assign = TRUE, env = globalenv(), 
                 from = "2020-01-01", to = "2021-02-02")

# Close all Internet connections as a precaution
# https://stackoverflow.com/a/52758758/2950721
closeAllConnections()

# Find xts objects
xtsObjects <- names(which(unlist(eapply(.GlobalEnv, is.xts))))

# Convert xts to data.frame
# https://stackoverflow.com/a/69246047/2950721
for (i in seq_along(xtsObjects)) {
  assign(xtsObjects[i], fortify.zoo(get(xtsObjects[i])))
}


# Redo the previous process but in separate environment for updated
# values of the same tickers (comments and sources are not repeated)
symbolUpdates.env <- new.env()

getSymbols.yahoo(tickers, auto.assign = TRUE, env = symbolUpdates.env,
                 from = "2021-02-03")

closeAllConnections()

symbolUpdatesXtsObjects <- names(which(unlist(eapply(symbolUpdates.env, 
                                                     is.xts))))

for (i in seq_along(symbolUpdatesXtsObjects)) {
  assign(envir = symbolUpdates.env, symbolUpdatesXtsObjects[i], 
         fortify.zoo(get(symbolUpdatesXtsObjects[i], 
                         envir = symbolUpdates.env)))
}

# Find ```data.frame``` objects both in ```GlobalEnv``` and 
# ```symbolUpdates.env```
globalEnvDataframeObjects <- names(which(unlist(eapply(.GlobalEnv, 
                                                        is.data.frame))))
symbolUpdatesDataframeObjects <- names(which(unlist(eapply(symbolUpdates.env, 
                                                           is.data.frame))))


# This rbind definitely does not work!!!
for (i in seq_along(globalEnvDataframeObjects)) {
  rbind(envir = .GlobalEnv, globalEnvDataframeObjects[i], envir =
  symbolUpdates.env, symbolUpdatesDataframeObjects[i])
}

My questions:

Thanks in advance.


Systems used:

Upvotes: 0

Views: 437

Answers (2)

hello_friend
hello_friend

Reputation: 5788

If it is required to store the data.frames in multiple environments use the following:

# Install pacakges if they are not already installed: necessary_packages => vector
necessary_packages <- c("quantmod")

# Create a vector containing the names of any packages needing installation:
# new_pacakges => vector
new_packages <- necessary_packages[!(necessary_packages %in%
                                       installed.packages()[, "Package"])]

# If the vector has more than 0 values, install the new pacakges
# (and their) associated dependencies:
if(length(new_packages) > 0){
  install.packages(
    new_packages, 
    dependencies = TRUE
  )
}

# Initialise the packages in the session: list of boolean => stdout (console)
lapply(
  necessary_packages, 
  require, 
  character.only = TRUE
)

# Load ticker data from 2020-01-01 till 2021-02-02
tickers <- c(
  "NKLA", 
  "MPNGF", 
  "RMO", 
  "JD", 
  "MSFT"
)

# Create a new environment: environment => symbolUpdates.env
symbolUpdates.env <- new.env()

# Create a vector of from dates: from_dates => Date Vector
from_dates <- as.Date(
  c(
    "2020-01-01", 
    "2020-02-03"
  )
)

# Create a vector of to dates:
to_dates <- as.Date(
  c(
    "2021-02-02", 
    format(
      Sys.Date(),
      "%Y-%m-%d"
    )
  )
)

# Create a vetor environments: env_vec => vector of environments
env_vec <- c(
  .GlobalEnv, 
  symbolUpdates.env
)

# Function to retreive ticker as a data.frame: 
# retrieve_ticker_df => function()
retrieve_ticker_df <- function(ticker_vec, from_date, to_date){
  
  # Create a list of size length(tickers):
  # df_list => empty list
  df_list <- vector(
    "list", 
    length(ticker_vec)
  )
  
  # Store each ticker's response as a data.frame in the list:
  # df_list => list of data.frames
  df_list <- setNames(
    lapply(
      seq_along(ticker_vec),
      function(i){
        # Retrieve the data.frame: tmp => data.frame
        tmp <- getSymbols.yahoo(
          ticker_vec[i],
          auto.assign = FALSE, 
          from = from_date,
          to = to_date,
          return.class = 'data.frame',
        )
        
        # Close all Internet connections as a precaution
        # https://stackoverflow.com/a/52758758/2950721
        closeAllConnections()
        
        # Create a data.frame and revert index to sequential
        # integers: data.frame => env
        data.frame(
          cbind(
            date = as.Date(
              row.names(
                tmp
              )
            ),
            tmp
          ),
          row.names = NULL
        )
      }
    ),
    ticker_vec
  )
  # Explicitly define returned object: list of data.frames => env
  return(df_list)
}

# Store all the data.frames in a list of data.frames, 
# store each list of data.frames in a list: 
# ticker_df_list_list => list of list of data.frames
ticker_df_list_list <- lapply(
  seq_along(env_vec),
  function(i){
    retrieve_ticker_df(
      tickers, 
      from_dates[i], 
      to_dates[i]
    )
  }
)

# Push each of the lists to the appropriate environment: 
# data.frames => env
lapply(
  seq_along(ticker_df_list_list),
  function(i){
    list2env(
      ticker_df_list_list[[i]],
      envir = env_vec[[i]]
    )
  }
)

# Initialise an empty list to create some memory
# bound_df_list => empty list
bound_df_list <- vector(
  "list", 
  length(tickers)
)

# Allocate some memory by initialising an
# empty list: ir_list => list
ir_list <- vector(
  "list",
  length(env_vec) * length(tickers)
)

# Unlist the env_vec, and retrieve the ticker
# data.frames: ir_list => list of data.frames
ir_list <- unlist(
  lapply(
    env_vec,
    function(x){
      mget(
        tickers, 
        envir = x
      )
    }
  ),
  recursive = FALSE
)

# Split-apply-combine based on the 
# data.frame names: bound_df_list => list of data.frames
bound_df_list <- lapply(
  split(
    ir_list,
    names(ir_list)
  ),
  function(x){
    do.call(
      rbind, 
      x
    )
  }
)

# Clear up the intermediate objects:
rm(ticker_df_list_list, ir_list, env_vec); gc()

If it is not mandatory to use multiple environments:

# Install pacakges if they are not already installed: necessary_packages => vector
necessary_packages <- c("quantmod")

# Create a vector containing the names of any packages needing installation:
# new_pacakges => vector
new_packages <- necessary_packages[!(necessary_packages %in%
                                       installed.packages()[, "Package"])]

# If the vector has more than 0 values, install the new pacakges
# (and their) associated dependencies:
if(length(new_packages) > 0){
  install.packages(
    new_packages, 
    dependencies = TRUE
  )
}

# Initialise the packages in the session: list of boolean => stdout (console)
lapply(
  necessary_packages, 
  require, 
  character.only = TRUE
)

# Load ticker data from 2020-01-01 till 2021-02-02
tickers <- c(
  "NKLA", 
  "MPNGF", 
  "RMO", 
  "JD", 
  "MSFT"
)

# Create a new environment: environment => symbolUpdates.env
symbolUpdates.env <- new.env()

# Create a vector of from dates: from_dates => Date Vector
from_dates <- as.Date(
  c(
    "2020-01-01", 
    "2020-02-03"
  )
)

# Create a vector of to dates:
to_dates <- as.Date(
  c(
    "2021-02-02", 
    format(
      Sys.Date(),
      "%Y-%m-%d"
    )
  )
)

# Function to retreive ticker as a data.frame: 
# retrieve_ticker_df => function()
retrieve_ticker_df <- function(ticker_vec, from_date, to_date){

  # Create a list of size length(tickers):
  # df_list => empty list
  df_list <- vector(
    "list", 
    length(ticker_vec)
  )
  
  # Store each ticker's response as a data.frame in the list:
  # df_list => list of data.frames
  df_list <- setNames(
    lapply(
      seq_along(ticker_vec),
      function(i){
        # Retrieve the data.frame: tmp => data.frame
        tmp <- getSymbols.yahoo(
          ticker_vec[i],
          auto.assign = FALSE, 
          from = from_date,
          to = to_date,
          return.class = 'data.frame',
        )
        
        # Close all Internet connections as a precaution
        # https://stackoverflow.com/a/52758758/2950721
        closeAllConnections()
        
        # Create a data.frame and revert index to sequential
        # integers: data.frame => env
        data.frame(
          cbind(
            date = as.Date(
              row.names(
                tmp
              )
            ),
            tmp
          ),
          row.names = NULL
        )
      }
    ),
    ticker_vec
  )
  # Explicitly define returned object: list of data.frames => env
  return(df_list)
}

# Store all the data.frames in a list of data.frames, 
# store each list of data.frames in a list: 
# ticker_df_list_list => list of list of data.frames
ticker_df_list_list <- lapply(
  seq_along(from_dates),
  function(i){
    retrieve_ticker_df(
      tickers, 
      from_dates[i], 
      to_dates[i]
    )
  }
)

# Initialise an empty list to create some memory:
# ir_list => empty list
ir_list <- vector(
  "list",
  length(tickers) * length(from_dates)
)

# Populate the list with each of the named data.frames: 
# ir_list => list of data.frames
ir_list <- unlist(
  ticker_df_list_list, 
  recursive = FALSE
)

# Initialise an empty list to create some memory
# bound_df_list => empty list
bound_df_list <- vector(
  "list", 
  length(tickers)
)

# Split-apply-combine: bound_df_list => list of data.frames
bound_df_list <- lapply(
  split(
    ir_list,
    names(ir_list)
  ),
  function(x){
    do.call(
      rbind, 
      x
    )
  }
)

# Clear up the intermediate objects:
rm(ticker_df_list_list, ir_list); gc()

Upvotes: 0

akrun
akrun

Reputation: 887008

We may need intersect here

interObj <- intersect(globalEnvDataframeObjects, symbolUpdatesDataframeObjects)
interObj <- interObj[match(interObj, symbolUpdatesDataframeObjects)]
nrow(get(interObj[1]))
[1] 273
for (i in seq_along(interObj)) {
  assign(interObj[i], rbind(get(interObj[i], envir = .GlobalEnv), 
    get(symbolUpdatesDataframeObjects[i], envir = symbolUpdates.env)), envir = .GlobalEnv)
}

Upvotes: 1

Related Questions