Reputation: 11
I have dataframe with latitudes and longitude with counts
I will like to create grid cells of 10 x10 km across the sweden data and generating Grid cell Ids with number of sampling points within this cells.
The lines of code below does not seem to solve the issue
Taxon_id,validScientificName,individualCount,longitude,latitude, coordinatePrecision,County,Municipality,County2_Parrish
200816,Volucella pellucens,1,17.4133208,62.6939581, 100, Västernorrland Timrå,Medelpad,Ljustorp
100395, Arctophila superbiens,6,13.9191707,55.6894566,50, Skåne Sjöbo,Skåne,Lövestad
100395,Arctophila superbiens,5, 13.9476204,55.6882818,50, Skåne Tomelilla, Skåne, Andrarum
100395, Arctophila superbiens,8,13.9478629,55.6885198,50,Skåne Tomelilla,Skåne,Andrarum
100395, Arctophila superbiens,1,13.9480606,55.6887375,50, Skåne Tomelilla,Skåne,Andrarum
200816,Volucella pellucens,2,16.2606655,58.2811607,241, Östergötland Åtvidaberg,Östergötland,Yxnerum
200814, Volucella bombylans,1,18.6925487,60.1198292,10,Stockholm Norrtälje,Uppland,Häverö
library(sf)
# Convert Sweden_MM4 to an sf object
my_data_sf <- st_as_sf(Sweden_MM4, coords = c("longitude", "latitude"), crs = 4326)
my_data_sf <- st_transform(my_data_sf, crs = "+proj=utm +zone=33 +datum=WGS84") # Set the CRS to an appropriate projection that uses meters
# Create a grid of 10x10 km using st_make_grid
grid_size_km <- 10
grid_size_m <- grid_size_km * 1000
grid_polygons <- st_make_grid(my_data_sf, cellsize = grid_size_m, what = "polygons")
# Set the CRS of the grid to match your data
grid_polygons <- st_set_crs(grid_polygons, st_crs(my_data_sf))
# Perform a spatial overlay to associate each data point with the corresponding grid cell ID
grid_overlay <- st_intersection(grid_polygons, my_data_sf)
str(my_data_sf)
# Plot the data points
data_plot <- ggplot(Sweden_MM4, aes(x = longitude, y = latitude)) +
geom_point()
# Plot the grid overlay
grid_plot <- ggplot(grid_overlay) +
geom_sf(fill = "transparent", color = "blue", size = 0.5)
# Combine the data plot and the grid plot
# Combine the data plot and the grid plot side by side
combined_plot <- plot_grid(data_plot, grid_plot, labels = c("Data Points", "Grid Overlay"), ncol = 2)
# Display the combined plot
print(combined_plot)
Taxon_id, validScientificName, individualCount, longitude, latitude,coordinatePrecision, Gridcell_REFID, County, Municipality, County2_Parrish
200816, Volucella pellucens, 1, 17.4133208, 62.6939581, 100, ,Västernorrland Timrå, Medelpad, Ljustorp
100395, Arctophila superbiens, 6, 13.9191707, 55.6894566, 50, ,Skåne Sjöbo, Skåne, Lövestad
100395, Arctophila superbiens, 5, 13.9476204, 55.6882818, 50, ,Skåne Tomelilla, Skåne, Andrarum
100395, Arctophila superbiens, 8, 13.9478629, 55.6885198, 50, ,Skåne Tomelilla, Skåne, Andrarum
100395, Arctophila superbiens, 1, 13.9480606, 55.6887375, 50, ,Skåne Tomelilla, Skåne, Andrarum
200816, Volucella pellucens, 2, 16.2606655, 58.2811607, 241, ,Östergötland Åtvidaberg,Östergötland, Yxnerum
200814, Volucella bombylans, 1, 18.6925487, 60.1198292, 10, ,Stockholm Norrtälje, Uppland, Häverö
Upvotes: 0
Views: 257
Reputation: 17434
You also need to generate IDs for grid cells and build an actual sf
object, the return value of st_make_grid()
is just a geometry list column. For assigning grid IDs to your points, you probably want to use st_join()
, by default it uses st_intersects
predicate.
Also note the difference between st_intersects
(binary predicate) and st_intersection
(geometric operation).
Assuming the desired output is a dataframe / tibble with original columns + cell ID, we'll use st_as_sf(..., remove = FALSE)
to keep WGS84 longitude / latitude columns and later just drop the sf
geometry -- no need to transform UTM back to WGS84.
library(sf)
#> Linking to GEOS 3.9.3, GDAL 3.5.2, PROJ 8.2.1; sf_use_s2() is TRUE
library(readr)
library(dplyr, warn.conflicts = FALSE)
Sweden_MM4 <- read_csv(
"Taxon_id,validScientificName,individualCount,longitude,latitude, coordinatePrecision,County,Municipality,County2_Parrish
200816,Volucella pellucens,1,17.4133208,62.6939581, 100, Västernorrland Timrå,Medelpad,Ljustorp
100395, Arctophila superbiens,6,13.9191707,55.6894566,50, Skåne Sjöbo,Skåne,Lövestad
100395,Arctophila superbiens,5, 13.9476204,55.6882818,50, Skåne Tomelilla, Skåne, Andrarum
100395, Arctophila superbiens,8,13.9478629,55.6885198,50,Skåne Tomelilla,Skåne,Andrarum
100395, Arctophila superbiens,1,13.9480606,55.6887375,50, Skåne Tomelilla,Skåne,Andrarum
200816,Volucella pellucens,2,16.2606655,58.2811607,241, Östergötland Åtvidaberg,Östergötland,Yxnerum
200814, Volucella bombylans,1,18.6925487,60.1198292,10,Stockholm Norrtälje,Uppland,Häverö"
, show_col_types = FALSE)
Sweden_MM4 %>%
st_as_sf(coords = c("longitude", "latitude"), crs = 4326, remove = FALSE) %>%
st_transform("+proj=utm +zone=33 +datum=WGS84") %>%
st_join(st_sf(geometry = st_make_grid(.,units::set_units(10, km))) %>%
mutate(Gridcell_REFID = row_number())
) %>%
st_drop_geometry() %>%
relocate(Gridcell_REFID, .after = latitude)
#> # A tibble: 7 × 10
#> Taxon_id validScientificName individualCount longitude latitude Gridcell_REFID
#> <dbl> <chr> <dbl> <dbl> <dbl> <int>
#> 1 200816 Volucella pellucens 1 17.4 62.7 2204
#> 2 100395 Arctophila superbi… 6 13.9 55.7 1
#> 3 100395 Arctophila superbi… 5 13.9 55.7 1
#> 4 100395 Arctophila superbi… 8 13.9 55.7 1
#> 5 100395 Arctophila superbi… 1 13.9 55.7 1
#> 6 200816 Volucella pellucens 2 16.3 58.3 799
#> 7 200814 Volucella bombylans 1 18.7 60.1 1400
#> # ℹ 4 more variables: coordinatePrecision <dbl>, County <chr>,
#> # Municipality <chr>, County2_Parrish <chr>
Created on 2023-07-29 with reprex v2.0.2
Sample random points from Sweden to build a input dataset for reproducible example.
library(sf)
#> Linking to GEOS 3.9.3, GDAL 3.5.2, PROJ 8.2.1; sf_use_s2() is TRUE
library(dplyr)
library(ggplot2)
swe <- giscoR::gisco_get_countries(country = "Sweden")
# sample points
set.seed(123)
my_data_sf <- st_geometry(swe) %>%
st_sample(20) %>%
st_as_sf() %>%
st_set_geometry("geometry") %>%
mutate(random_point_id = row_number(), .before = 1) %>%
st_transform("+proj=utm +zone=33 +datum=WGS84")
print(my_data_sf, n = 3)
#> Simple feature collection with 20 features and 1 field
#> Geometry type: POINT
#> Dimension: XY
#> Bounding box: xmin: 291587.2 ymin: 6421365 xmax: 850403.5 ymax: 7480341
#> Projected CRS: +proj=utm +zone=33 +datum=WGS84
#> First 3 features:
#> random_point_id geometry
#> 1 1 POINT (493032.3 6437133)
#> 2 2 POINT (585428 6482855)
#> 3 3 POINT (836136.5 7424094)
sf
object from grid, add grid_id
column# grid, 200x200km for better visualization
grid_size_km <- 200
grid_size_m <- grid_size_km * 1000
grid_polygons_sfc <- st_make_grid(my_data_sf, cellsize = grid_size_m, what = "polygons")
# st_make_grid() does not return sf object but a geometry column:
print(grid_polygons_sfc, n = 3)
#> Geometry set for 18 features
#> Geometry type: POLYGON
#> Dimension: XY
#> Bounding box: xmin: 291587.2 ymin: 6421365 xmax: 891587.2 ymax: 7621365
#> Projected CRS: +proj=utm +zone=33 +datum=WGS84
#> First 3 geometries:
#> POLYGON ((291587.2 6421365, 491587.2 6421365, 4...
#> POLYGON ((491587.2 6421365, 691587.2 6421365, 6...
#> POLYGON ((691587.2 6421365, 891587.2 6421365, 8...
# create sf object, add grid_id attribute column:
grid_polygons_sf <- st_sf(grid_id = seq_along(grid_polygons_sfc),
geometry = grid_polygons_sfc)
print(grid_polygons_sf, n = 3)
#> Simple feature collection with 18 features and 1 field
#> Geometry type: POLYGON
#> Dimension: XY
#> Bounding box: xmin: 291587.2 ymin: 6421365 xmax: 891587.2 ymax: 7621365
#> Projected CRS: +proj=utm +zone=33 +datum=WGS84
#> First 3 features:
#> grid_id geometry
#> 1 1 POLYGON ((291587.2 6421365,...
#> 2 2 POLYGON ((491587.2 6421365,...
#> 3 3 POLYGON ((691587.2 6421365,...
# spatial join, join grid_polygons_sf$grid_id values to my_data_sf
my_data_sf <- st_join(my_data_sf, grid_polygons_sf)
my_data_sf
#> Simple feature collection with 20 features and 2 fields
#> Geometry type: POINT
#> Dimension: XY
#> Bounding box: xmin: 291587.2 ymin: 6421365 xmax: 850403.5 ymax: 7480341
#> Projected CRS: +proj=utm +zone=33 +datum=WGS84
#> First 10 features:
#> random_point_id grid_id geometry
#> 1 1 2 POINT (493032.3 6437133)
#> 2 2 2 POINT (585428 6482855)
#> 3 3 18 POINT (836136.5 7424094)
#> 4 4 14 POINT (637972.7 7301930)
#> 5 5 14 POINT (595969.3 7221551)
#> 6 6 18 POINT (713168.4 7480341)
#> 7 7 6 POINT (701661.1 6638757)
#> 8 8 10 POINT (375548.9 7076110)
#> 9 9 4 POINT (463189.1 6647020)
#> 10 10 1 POINT (307018.8 6498219)
ggplot() +
geom_sf(data = swe) +
geom_sf(data = grid_polygons_sf, alpha = .2) +
geom_sf_text(data = grid_polygons_sf, aes(label = grid_id)) +
geom_sf(data = my_data_sf, alpha = .8, color = "gold") +
geom_sf_label(data = my_data_sf, aes(label = grid_id), size = 3, alpha = .5) +
coord_sf(crs = "+proj=utm +zone=33")
Created on 2023-07-28 with reprex v2.0.2
Upvotes: 2