Reputation: 28
I am trying to write a function to run several concurrent cell-wise calculations on a raster. My hope is to use the raster package's built-in parallel processing code to speed up processing.
I am getting an error: "Error in get(name, envir = envir) : object 'mn' not found". This error occurs at the ##### ERROR HERE ##### marker below, where the function is attempting to run raster::overlay.
I see that this error is from the get function, and indicates that the mn object is not accessible to all clusters.
How can I write this code so that objects will be in the appropriate environments? Thanks.
Here is a basic version of my code:
my_func <- function(file = NULL, na.rm = TRUE, ncores = 2){
# begin parallel processing
raster::beginCluster(ncores, type='SOCK')
# load data from file as raster brick
data <- raster::brick(file)
# calculate the standard deviation of the brick
sd <- raster::clusterR(x = data, fun = function(data){ raster::calc(x = data, fun = sd, na.rm = na.rm) })
# calculate mean - for layer i, mean is the mean of all layers but i
set <- 1:raster::nbands(data) # marker to pass to for loop
mn <- data # creates raster object to be filled with results
for(i in set){
subset <- set[-i]
wrk <- raster::stack(x = data, bands = subset) # make stack of all but i
mn_i <- raster::clusterR(x = wrk, fun = function(wrk){ raster::calc(x = wrk, fun = mean, na.rm = na.rm) }) # calculate mean of stack
mn[[i]] <- mn_i # set values of appropriate band with results
}
# calculate z score
##### ERROR HERE #####
z <- raster::clusterR(x = data, fun = function(data, mn){ raster::overlay(x = data, y = mn, fun = function(data, mn){ data - mn }, na.rm = na.rm }, export = mn)
# normalize z score
##### I assume the error would also occur here #####
z <- raster::clusterR(x = data, fun = function(z, sd){ raster::overlay(x = z, y = sd, fun = function(z, sd){ z / sd }, na.rm = na.rm) }, export = sd)
# end parallel processing
raster::endCluster()
# return result
return(result)
}
Upvotes: 0
Views: 1467
Reputation: 28
To work with multi-band rasters the clusterR function can be rolled into a for loop.
# example data
data("Rlogo")
data <- raster::brick(Rlogo)
# calculate sd
sd <- raster::calc(data, sd)
# calculate mean (mean of layer x is mean of all layers but x)
mn <- data
set <- 1:nlayers(data)
for (i in set) {
subset <- set[-i]
wrk <- raster::stack(x = data, bands = subset)
mn_i <- raster::calc(x = wrk, fun = mean)
mn[[i]] <- mn_i
}
# calculate z score
z <- data
set <- 1:nlayers(data)
fun1 <- function(x,y){x-y}
raster::beginCluster()
for (i in set){
wrk <- raster::stack(data[[i]], mn[[i]])
z_i <- clusterR(x = wrk, fun = raster::overlay, arg = list(fun = fun1))
z[[i]] <- z_i
rm(z_i, wrk)
}
# normalize z score
fun2 <- function(x,y){x/y}
for(i in set){
wrk <- raster::stack(z[[i]], sd)
z_i <- clusterR(x = wrk, fun = raster::overlay, arg = list(fun = fun2))
z[[i]] <- z_i
rm(z_i, wrk)
}
raster::endCluster()
Upvotes: 0
Reputation: 1228
You are setting x
and y
in overlay()
but using data
, mn
, z
and sd
as parameter. Use:
z <- raster::clusterR(x = data, fun = function(data, mn){ raster::overlay(x = data, y = mn, fun = function(x, y){ x - y }, na.rm = na.rm }, export = mn)
z <- raster::clusterR(x = data, fun = function(z, sd){ raster::overlay(x = z, y = sd, fun = function(x, y){ x / y }, na.rm = na.rm) }, export = sd)
Propose: (always provide example data)
library(raster)
set.seed(123)
r <- raster()
r[] <- 1:ncell(r)
data <- r
mn <- setValues(r , rnorm(ncell(r)))
data[1:1000] <- NA # to force some NA testing
sd <- r*2
fun1 <- function(x,y){x - y}
fun2 <- function(x,y){x / y}
beginCluster()
z <- clusterR(stack(data,mn), overlay, arg = list(fun = fun1))
z <- clusterR(stack(z,sd), overlay, arg = list(fun = fun2))
endCluster()
plot(z)
Upvotes: 0