Reputation: 39
Please, I need help in creating a loop that would do the computation shown in the codes below on a hdflist containing 483 files in R. I have added a link that contains two .hdf files and the shapefiles for trial. The code seems to work just fine for a single .hdf file but I'm still struggling with looping. Thank you
download files from here
https://beardatashare.bham.ac.uk/getlink/fi2gNzWbuv5H8Gp7Qg2aemdM/
# import .hdf file into R using get_subdatasets to access the subsets in the file`
sub <- get_subdatasets("MOD13Q1.A2020353.h18v08.006.2021003223721.hdf")
# convert red and NIR subsets and save them as raster`
gdalwarp(sub[4], 'red_c.tif')
gdalwarp(sub[5], 'NIR_c.tif')
# import red and NIR raster back into R`
# scale the rater while at it`
r_r=raster('red_c.tif') * 0.0001
r_N=raster('NIR_c.tif') * 0.0001
# calculate sigma using (0.5*(NIR+red))`
sigma <- (0.5*(r_N+r_r))
# calculate knr using exp((-(NIR-red)^2)/(2*sigma^2))`
knr <- exp((-(r_N-r_r)^2)/(2*sigma^2))
# calculate kndvi using (1 - knr) / (1 + knr)`
kndvi <- (1 - knr) / (1 + knr)
# import shapefile into R`
shp=readOGR(".", "National_Parks")
options(stringsAsFactors = FALSE)
#change crs of shapefile to crs of one of the rasters`
shp2 <- spTransform(shp, crs(kndvi))
# use extent to crop/clip raster`
## set extent`
e <- extent(910000,980000, 530000, 650000)
## clip using crop function`
crop_kndvi <- crop(kndvi, e)
# mask raster using the shapefile`
kndvi_mask <- mask(crop_kndvi, shp2)
And then save kndvi_mask as raster for 483 files
Upvotes: 0
Views: 373
Reputation: 47491
Here is how you can do that with terra
. terra
is the replacement for raster
; it is much faster and more versatile. For example, with terra
you can skip the gdalwarp
step.
You can write one big for-loop
, but I prefer to use functions and then call these in a loop or lapply
.
Also, instead of your raster-algebra approach, it could be more efficient to wrap the kndvi
computation into its own function and use it with lapp
. I think that is a better approach as the code is clearer and it allows you to re-use the kndvi
function.
library(terra)
parks <- vect("National_Parks.shp")
parks <- project(parks, "+proj=sinu +lon_0=0 +x_0=0 +y_0=0 +R=6371007.181 +units=m")
e <- ext(910000,980000, 530000, 650000)
kndvi function to be used by lapp
kndvi <- function(red, NIR) {
red <- red * 0.0001
NIR <- NIR * 0.0001
sigma <- (0.5 * (NIR + red))
knr <- exp((-(NIR-red)^2)/(2*sigma^2))
(1 - knr) / (1 + knr)
}
Main function. Note that I use crop
before the other functions; that saves a lot of unnecessary processing.
fun <- function(f) {
outf <- gsub(".hdf$", "_processed.tif", f)
# if file.exists(outf) return(rast(outf))
r <- rast(f)[[4:5]]
# or r <- sds(f)[4:5]
r <- crop(r, e)
kn <- lapp(r, kndvi)
name <- substr(basename(f), 9, 16)
mask(kn, parks, filename=outf, overwrite=TRUE, names=name)
}
Get the filenames and use the function with a loop or with lapply
as shown by Elia.
ff <- list.files(pattern="hdf$", full=TRUE)
x <- list()
for (i in 1:length(ff)) {
print(ff[i]); flush.console()
x[[i]] <- fun(ff[i])
}
z <- rast(x)
z
#class : SpatRaster
#dimensions : 518, 302, 2 (nrow, ncol, nlyr)
#resolution : 231.6564, 231.6564 (x, y)
#extent : 909946.2, 979906.4, 530029.7, 650027.7 (xmin, xmax, ymin, ymax)
#coord. ref. : +proj=sinu +lon_0=0 +x_0=0 +y_0=0 +R=6371007.181 +units=m +no_defs
#sources : MOD13Q1.A2020337.h18v08.006.2020358165204_processed.tif
# MOD13Q1.A2020353.h18v08.006.2021003223721_processed.tif
#names : A2020337, A2020353
#min values : 0.0007564131, 0.0028829363
#max values : 0.7608207, 0.7303495
This takes about 1 second per file on my computer.
Or as a for-loop
that you asked for:
ff <- list.files(pattern="hdf$", full=TRUE)
for (f in ff) {
print(f); flush.console()
outf <- gsub(".hdf$", "_processed.tif", f)
r <- rast(f)[[4:5]]
r <- crop(r, e)
kn <- lapp(r, kndvi)
name <- substr(basename(f), 9, 16)
mask(kn, parks, filename=outf, overwrite=TRUE, names=name)
}
outf <- list.files(pattern="_processed.tif$", full=TRUE)
x <- rast(outf)
Upvotes: 1
Reputation: 2584
You can wrap your code in a function and then lapply
over the hdf path. In this way if your loop is too slow it will be easy to parallelize it.
You could try this:
library(gdalUtils)
library(raster)
library(rgdal)
#set the directory where you have .hdf files. In my case I downloaded your data in "D:/download"
setwd("D:/download")
#function to save the masked index in your current working directory
#the final files name will depend on the name of the input hdf files
myfun <- function(path){
name <- basename(tools::file_path_sans_ext(path))
sub <- get_subdatasets(path)
gdalwarp(sub[4], paste0(name,'_red_c.tif'))
gdalwarp(sub[5], paste0(name,'NIR_c.tif'))
r_r=raster(paste0(name,'_red_c.tif')) * 0.0001
r_N=raster(paste0(name,'NIR_c.tif')) * 0.0001
sigma <- (0.5*(r_N+r_r))
knr <- exp((-(r_N-r_r)^2)/(2*sigma^2))
kndvi <- (1 - knr) / (1 + knr)
crop_kndvi <- crop(kndvi, e)
kndvi_mask <- mask(crop_kndvi,
shp2,filename=paste0(name,"_kndvi_mask.tif"))
}
#list the hdf file in your current working directory. Thanks to setwd("D:/download") there is no need to specify the path argument of list.files().
b#however for the for peace of mind:
hdf <- list.files(path=getwd(),pattern = "hdf",full.names = T)
#since your shop is always the same you could keep this part out of the function
shp=readOGR(".", "National_Parks")
options(stringsAsFactors = FALSE)
shp2 <- spTransform(shp, "+proj=sinu +lon_0=0 +x_0=0 +y_0=0 +a=6371007.181 +b=6371007.181 +units=m
+no_defs ")
e <- extent(910000,980000, 530000, 650000)
#now run your function across the hdf files path
lapply(hdf, myfun)
in your working directory now you find all the saved if
list.files(pattern = "tif")
[1] "MOD13Q1.A2020337.h18v08.006.2020358165204_kndvi_mask.tif"
[2] "MOD13Q1.A2020337.h18v08.006.2020358165204_red_c.tif"
[3] "MOD13Q1.A2020337.h18v08.006.2020358165204NIR_c.tif"
[4] "MOD13Q1.A2020353.h18v08.006.2021003223721_kndvi_mask.tif"
[5] "MOD13Q1.A2020353.h18v08.006.2021003223721_red_c.tif"
[6] "MOD13Q1.A2020353.h18v08.006.2021003223721NIR_c.tif"
With lapply
on my PC the function run in 45 seconds.
You could easily parallelize lapply
by replacing it with sfLapply
from the snowfall
package, for example. For just 2 files it's not worth it, but if you have hundreds of files you can speed up the process a lot:
library(snowfall)
#open cluster with as many node as hdf file
sfInit(parallel=TRUE, cpus=length(hdf))
# Load the required packages inside the cluster
sfLibrary(raster)
sfLibrary(rgdal)
sfLibrary(gdalUtils)
sfExportAll()
system.time(sfLapply(hdf, myfun))
sfStop()
with sfLapply
this function took 20 secs to run. It is a good improvement
Upvotes: 1
Reputation: 1
hdf_files <- list.files("foldername", pattern = ".hdf")
for(f in files) { ... }
For the save code, you can use the string "f" to create a name for the file, so they don't save on top of each other.
Upvotes: 0