Reputation: 43
I have five data frames (home.dat2013, home.dat2015, home.dat2016, home.dat2017, home.dat2018)
a sample data frame looks like this
Note: all five data frames have the same these four columns
home.dat2013 <- structure(list(Yield = c(
43.5773588, 47.4013604, 46.3847655,
49.1999453
), Latitude = c(
399.412927, 397.4667224, 395.5014969,
393.5341704
), Longitude = c(3.27465, 3.257958, 3.216063, 3.257626), TimeStamp = c(
"2013-09-30 18:47:00", "2013-09-30 18:47:01",
"2013-09-30 18:47:02", "2013-09-30 18:47:03"
)), class = "data.frame", row.names = c(
NA,
-4L
))
I wrote a code to divide a farming field into 120 grid cells (6 col, 20 rows)
This is the code that divides each year's data into 120 grid cells and then calculates Yield means per each grid cell
Here is my code for one year home2013
#dividing the field into grid cells for home2013
# range of latitude
minlatitude120 <- 0
maxlatitude120 <- max(home2013.dat$Latitude)
rangelat120 <- maxlatitude120-minlatitude120
#range of longitude
minlong120 <- 0
maxlong120 <- max(home2013.dat$Longitude)
rangelong120 <- maxlong120 - minlong120
min120 <- min(home2013.dat$Latitude/rangelat120)
max120 <- max(home2013.dat$Latitude/rangelat120)
min120 <- min(2*home2013.dat$Latitude/rangelat120)
max120 <- max(2*home2013.dat$Latitude/rangelat120)
#creating unique identifiers for our data
unique_i <- unique(2*ceiling(home2013.dat$Latitude/rangelat120))
unique_i <- unique(2*ceiling(home2013.dat$Longitude/rangelong120))
#creating col and row
home2013.dat$row <- ceiling(20*home2013.dat$Latitude/rangelat120)
home2013.dat$col <- ceiling(6*home2013.dat$Longitude/rangelong120)
home2013.dat$cell <- 1000*(home2013.dat$row) + home2013.dat$col
uniquecombo120 <- unique(home2013.dat$cell)
length(uniquecombo120)
#calculating mean estimates for yield
means2013 <- tapply(home2013.dat$Yield, home2013.dat$cell, mean)
yield13 <- data.frame (cell.number, means2013)
yield13
I want to avoid using this code five times since it is long. My question is how can I can write a function that divides the field into 120 grid cells across all years(2013,2015,2016,2017,2018)
sample output
grid cell means2013 means2015 means2016 means2017 means2018
1001 50 80 100 117 20
1002 55 88 102 120 17
Upvotes: 0
Views: 36
Reputation: 123818
This could be achieved like so.
lapply
which gives a list of the resultsReduce
and merge
BTW: Running your code resulted in an error as cell.number
was not defined. Therefore I had to guess what cell.number
is and set it to the names(means)
.
home.dat2013 <- structure(list(Yield = c(
43.5773588, 47.4013604, 46.3847655,
49.1999453
), Latitude = c(
399.412927, 397.4667224, 395.5014969,
393.5341704
), Longitude = c(3.27465, 3.257958, 3.216063, 3.257626), TimeStamp = c(
"2013-09-30 18:47:00", "2013-09-30 18:47:01",
"2013-09-30 18:47:02", "2013-09-30 18:47:03"
)), class = "data.frame", row.names = c(
NA,
-4L
))
home.dat2015 <- home.dat2016 <- home.dat2017 <- home.dat2018 <- home.dat2013
home.dat <- list(home.dat2013, home.dat2015, home.dat2016, home.dat2017, home.dat2018)
names(home.dat) <- c(2013, 2015:2018)
my_analysis <- function(x) {
# dividing the field into grid cells for home2013
# range of latitude
minlatitude120 <- 0
maxlatitude120 <- max(x$Latitude)
rangelat120 <- maxlatitude120 - minlatitude120
# range of longitude
minlong120 <- 0
maxlong120 <- max(x$Longitude)
rangelong120 <- maxlong120 - minlong120
min120 <- min(x$Latitude / rangelat120)
max120 <- max(x$Latitude / rangelat120)
min120 <- min(2 * x$Latitude / rangelat120)
max120 <- max(2 * x$Latitude / rangelat120)
# creating unique identifiers for our data
unique_i <- unique(2 * ceiling(x$Latitude / rangelat120))
unique_i <- unique(2 * ceiling(x$Longitude / rangelong120))
# creating col and row
x$row <- ceiling(20 * x$Latitude / rangelat120)
x$col <- ceiling(6 * x$Longitude / rangelong120)
x$cell <- 1000 * (x$row) + x$col
uniquecombo120 <- unique(x$cell)
# calculating mean estimates for yield
means <- tapply(x$Yield, x$cell, mean)
yield <- data.frame(cell.number = names(means), means)
yield
}
# Apply the function to each df
results <- lapply(home.dat, my_analysis)
# Rename the columns of the df so that the means col includes the year
results <- lapply(names(home.dat), function(x) setNames(results[[x]], c("cell.number", paste0("means", x))))
# Merge the five df's
Reduce(function(x, y) merge(x, y, by = "cell.number"), results)
#> cell.number means2013 means2015 means2016 means2017 means2018
#> 1 20006 46.64086 46.64086 46.64086 46.64086 46.64086
Upvotes: 1