GeoDude
GeoDude

Reputation: 33

How can i rowwise create polygons from records in a dataframe using sf, similar to Postgis

I've got a table in Postgis which contains the field beginpunt, a geometry field containing a single point. Next I'll create a 100*100m square which contains the point. In Postgis I'll use the query below and it works great.

select 
*
,st_transform(
    ST_MakeEnvelope(floor(st_x(st_transform(beginpunt,28992))/100) *100,
                floor(st_y(st_transform(beginpunt,28992))/100) *100, 
                floor(st_x(st_transform(beginpunt,28992))/100) *100 +100,
                floor(st_y(st_transform(beginpunt,28992))/100) *100 +100, 28992),4326)  as cbs_begin          
from cbs

For a specific reason I want to achieve the same without using Postgis, but do the whole excersise in R. This is where I get stuck. I can create a single polygon, but my problem is doing it rowwise in a dataframe.

I have the same table (cbs) in R

for simplicity i created a few extra fields:

    dput(cbs)  #this is the input dataframe
    
    cbs$beginpunt_rd<-st_transform(cbs$beginpunt,28992) #transform crs
    cbs$begincbsx<-floor(st_coordinates(cbs$beginpunt_rd)[,1]/100)*100 #define origin x
    cbs$begincbsy<-floor(st_coordinates(cbs$beginpunt_rd)[,2]/100)*100 #define origin y
    
    cbs$pol<-NA #create an extra empty field in the dataframe
    tbldata<-st_sfc()  #create an empty table 
    for(i in 1:nrow(cbs)){
      cbs_begin <- st_polygon(
        list(
          cbind(
            rbind(cbs$begincbsx[i],cbs$begincbsx[i]+100,cbs$begincbsx[i]+100 , cbs$begincbsx[i],cbs$begincbsx[i]),
            rbind(cbs$begincbsy[i],cbs$begincbsy[i],cbs$begincbsy[i]+100 , cbs$begincbsy[i]+100,cbs$begincbsy[i])
          )
        ))
      
      if(i<2){tbldata<-cbs[FALSE,]} #create empty dataframe
      tbldata[nrow(tbldata) + 1, ] <- c(cbs[i,1:5],st_sfc(st_polygon(cbs_begin),crs=28992)) #add row to dataframe
    }

dput(tbldata) #this is the output dataframe

The code above gives me a (new) dataframe with a field "pol" containing a list with the right coordinates, but it's missing the step to make it a polygon or sf object.

I've seen some examples of how it could be done, but I can't get it to work

Edit: the input-table cbs dput(cbs) result is:

structure(list(id = c(9594L, 9520L, 9492L, 83859L, 
                              9438L, 9490L), beginpunt = structure(list(structure(c(5.0499337, 
                                                                                          51.5676501), class = c("XY", "POINT", "sfg")), structure(c(5.0146573, 
                                                                                                                                                     51.5818484), class = c("XY", "POINT", "sfg")), structure(c(5.08459, 
                                                                                                                                                                                                                51.557595), class = c("XY", "POINT", "sfg")), structure(c(5.0129685, 
                                                                                                                                                                                                                                                                          51.5865527), class = c("XY", "POINT", "sfg")), structure(c(5.0548874, 
                                                                                                                                                                                                                                                                                                                                     51.5164541), class = c("XY", "POINT", "sfg")), structure(c(5.0647628, 
                                                                                                                                                                                                                                                                                                                                                                                                51.5812475), class = c("XY", "POINT", "sfg"))), class = c("sfc_POINT", 
                                                                                                                                                                                                                                                                                                                                                                                                                                                          "sfc"), precision = 0, bbox = structure(c(xmin = 5.0129685, ymin = 51.5164541, 
                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                    xmax = 5.08459, ymax = 51.5865527), class = "bbox"), crs = structure(list(
                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                      input = "EPSG:4326", wkt = "GEOGCRS[\"WGS 84\",\n    DATUM[\"World Geodetic System 1984\",\n        ELLIPSOID[\"WGS 84\",6378137,298.257223563,\n            LENGTHUNIT[\"metre\",1]]],\n    PRIMEM[\"Greenwich\",0,\n        ANGLEUNIT[\"degree\",0.0174532925199433]],\n    CS[ellipsoidal,2],\n        AXIS[\"geodetic latitude (Lat)\",north,\n            ORDER[1],\n            ANGLEUNIT[\"degree\",0.0174532925199433]],\n        AXIS[\"geodetic longitude (Lon)\",east,\n            ORDER[2],\n            ANGLEUNIT[\"degree\",0.0174532925199433]],\n    USAGE[\n        SCOPE[\"Horizontal component of 3D system.\"],\n        AREA[\"World.\"],\n        BBOX[-90,-180,90,180]],\n    ID[\"EPSG\",4326]]"), class = "crs"), n_empty = 0L)), row.names = c(NA, 
                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                6L), class = "data.frame")

and the output/result table: tbldata dput(tbldata)

structure(list(id = c(9594L, 9520L, 9492L, 83859L, 
9438L, 9490L), beginpunt = structure(list(structure(c(5.0499337, 
51.5676501), class = c("XY", "POINT", "sfg")), structure(c(5.0146573, 
51.5818484), class = c("XY", "POINT", "sfg")), structure(c(5.08459, 
51.557595), class = c("XY", "POINT", "sfg")), structure(c(5.0129685, 
51.5865527), class = c("XY", "POINT", "sfg")), structure(c(5.0548874, 
51.5164541), class = c("XY", "POINT", "sfg")), structure(c(5.0647628, 
51.5812475), class = c("XY", "POINT", "sfg"))), precision = 0, bbox = structure(c(xmin = 5.0499337, 
ymin = 51.5676501, xmax = 5.0499337, ymax = 51.5676501), class = "bbox"), crs = structure(list(
    input = "EPSG:4326", wkt = "GEOGCRS[\"WGS 84\",\n    DATUM[\"World Geodetic System 1984\",\n        ELLIPSOID[\"WGS 84\",6378137,298.257223563,\n            LENGTHUNIT[\"metre\",1]]],\n    PRIMEM[\"Greenwich\",0,\n        ANGLEUNIT[\"degree\",0.0174532925199433]],\n    CS[ellipsoidal,2],\n        AXIS[\"geodetic latitude (Lat)\",north,\n            ORDER[1],\n            ANGLEUNIT[\"degree\",0.0174532925199433]],\n        AXIS[\"geodetic longitude (Lon)\",east,\n            ORDER[2],\n            ANGLEUNIT[\"degree\",0.0174532925199433]],\n    USAGE[\n        SCOPE[\"Horizontal component of 3D system.\"],\n        AREA[\"World.\"],\n        BBOX[-90,-180,90,180]],\n    ID[\"EPSG\",4326]]"), class = "crs"), classes = character(0), n_empty = 0L, class = c("sfc_POINT", 
"sfc")), beginpunt_rd = structure(list(structure(c(131616.180272543, 
397688.855374183), class = c("XY", "POINT", "sfg")), structure(c(129178.446528786, 
399280.343643684), class = c("XY", "POINT", "sfg")), structure(c(134014.355256952, 
396559.663404012), class = c("XY", "POINT", "sfg")), structure(c(129064.081985984, 
399804.302704657), class = c("XY", "POINT", "sfg")), structure(c(131933.668374674, 
391991.666781811), class = c("XY", "POINT", "sfg")), structure(c(132651.016139436, 
399196.932221466), class = c("XY", "POINT", "sfg"))), precision = 0, bbox = structure(c(xmin = 131616.180272543, 
ymin = 397688.855374183, xmax = 131616.180272543, ymax = 397688.855374183
), class = "bbox"), crs = structure(list(input = "EPSG:28992", 
    wkt = "PROJCRS[\"Amersfoort / RD New\",\n    BASEGEOGCRS[\"Amersfoort\",\n        DATUM[\"Amersfoort\",\n            ELLIPSOID[\"Bessel 1841\",6377397.155,299.1528128,\n                LENGTHUNIT[\"metre\",1]]],\n        PRIMEM[\"Greenwich\",0,\n            ANGLEUNIT[\"degree\",0.0174532925199433]],\n        ID[\"EPSG\",4289]],\n    CONVERSION[\"RD New\",\n        METHOD[\"Oblique Stereographic\",\n            ID[\"EPSG\",9809]],\n        PARAMETER[\"Latitude of natural origin\",52.1561605555556,\n            ANGLEUNIT[\"degree\",0.0174532925199433],\n            ID[\"EPSG\",8801]],\n        PARAMETER[\"Longitude of natural origin\",5.38763888888889,\n            ANGLEUNIT[\"degree\",0.0174532925199433],\n            ID[\"EPSG\",8802]],\n        PARAMETER[\"Scale factor at natural origin\",0.9999079,\n            SCALEUNIT[\"unity\",1],\n            ID[\"EPSG\",8805]],\n        PARAMETER[\"False easting\",155000,\n            LENGTHUNIT[\"metre\",1],\n            ID[\"EPSG\",8806]],\n        PARAMETER[\"False northing\",463000,\n            LENGTHUNIT[\"metre\",1],\n            ID[\"EPSG\",8807]]],\n    CS[Cartesian,2],\n        AXIS[\"easting (X)\",east,\n            ORDER[1],\n            LENGTHUNIT[\"metre\",1]],\n        AXIS[\"northing (Y)\",north,\n            ORDER[2],\n            LENGTHUNIT[\"metre\",1]],\n    USAGE[\n        SCOPE[\"Engineering survey, topographic mapping.\"],\n        AREA[\"Netherlands - onshore, including Waddenzee, Dutch Wadden Islands and 12-mile offshore coastal zone.\"],\n        BBOX[50.75,3.2,53.7,7.22]],\n    ID[\"EPSG\",28992]]"), class = "crs"), classes = character(0), n_empty = 0L, class = c("sfc_POINT", 
"sfc")), begincbsx = c(131600, 129100, 134000, 129000, 131900, 
132600), begincbsy = c(397600, 399200, 396500, 399800, 391900, 
399100), pol = list(structure(c(131600, 131700, 131700, 131600, 
131600, 397600, 397600, 397700, 397700, 397600), .Dim = c(5L, 
2L)), structure(c(129100, 129200, 129200, 129100, 129100, 399200, 
399200, 399300, 399300, 399200), .Dim = c(5L, 2L)), structure(c(134000, 
134100, 134100, 134000, 134000, 396500, 396500, 396600, 396600, 
396500), .Dim = c(5L, 2L)), structure(c(129000, 129100, 129100, 
129000, 129000, 399800, 399800, 399900, 399900, 399800), .Dim = c(5L, 
2L)), structure(c(131900, 132000, 132000, 131900, 131900, 391900, 
391900, 392000, 392000, 391900), .Dim = c(5L, 2L)), structure(c(132600, 
132700, 132700, 132600, 132600, 399100, 399100, 399200, 399200, 
399100), .Dim = c(5L, 2L)))), row.names = c(NA, 6L), class = "data.frame")

The expected output isenter image description here

Upvotes: 1

Views: 476

Answers (2)

lovalery
lovalery

Reputation: 4652

You just have to enter the following line of code and it should work :

cbs <- st_sf(cbs, sf_column_name = "beginpunt", crs = 4326)

Output :

#> Simple feature collection with 6 features and 1 field
#> Geometry type: POINT
#> Dimension:     XY
#> Bounding box:  xmin: 5.012968 ymin: 51.51645 xmax: 5.08459 ymax: 51.58655
#> Geodetic CRS:  WGS 84
#>      id                 beginpunt
#> 1  9594 POINT (5.049934 51.56765)
#> 2  9520 POINT (5.014657 51.58185)
#> 3  9492  POINT (5.08459 51.55759)
#> 4 83859 POINT (5.012969 51.58655)
#> 5  9438 POINT (5.054887 51.51645)
#> 6  9490 POINT (5.064763 51.58125)

Created on 2021-09-28 by the reprex package (v2.0.1)

Edit 1

Based on @skaqqs' comment, I complete my answer without taking into account his suggestion which normally works very well... but only if the points are arranged in the right order to produce a valid polygon (which is normally the most frequent case).

The original order of the points in your dataframe does not produce a valid polygon: keeping the original order, we get intersecting lines. So, if we choose the classical solution proposed by @skaqqs and draw the polygon, here is the result:


cbs_standard <- st_combine(cbs)
(cbs_standard <- st_cast(cbs_standard, "POLYGON"))
#> Geometry set for 1 feature 
#> Geometry type: POLYGON
#> Dimension:     XY
#> Bounding box:  xmin: 5.012968 ymin: 51.51645 xmax: 5.08459 ymax: 51.58655
#> Geodetic CRS:  WGS 84
#> POLYGON ((5.049934 51.56765, 5.014657 51.58185,...
plot(cbs_standard)

To circumvent this difficulty, I suggest another solution by installing and loading the "concaveman" package.

So, please find below the (very simple) procedure to obtain a topologically valid polygon.

install.packages("concaveman")
library(concaveman)

(cbs_concaveman <- concaveman(cbs))
#> Simple feature collection with 1 feature and 0 fields
#> Geometry type: POLYGON
#> Dimension:     XY
#> Bounding box:  xmin: 5.013 ymin: 51.5165 xmax: 5.0846 ymax: 51.5866
#> Geodetic CRS:  WGS 84
#>                         polygons
#> 1 POLYGON ((5.0147 51.5818, 5...
plot(cbs_concaveman)

Created on 2021-09-28 by the reprex package (v2.0.1)

Edit 2

O.K. we did not understand each other @GeoDude. To follow up on your comment, here is (I hope!) the solution to your problem. You just have to type these few lines of code and it should work:

z <- st_polygon()
for (i in seq(length(tbldata$pol))){
  z[i] <-  st_polygon(list(tbldata$pol[[i]]))
}

sfc <- st_sfc(z, crs = 28992)
tbldata <- st_sf(tbldata, geom = sfc, row.names = 1:length(z))
tbldata$pol <- NULL

tbldata
#> Simple feature collection with 6 features and 3 fields
#> Active geometry column: geom
#> Geometry type: POLYGON
#> Dimension:     XY
#> Bounding box:  xmin: 129000 ymin: 391900 xmax: 134100 ymax: 399900
#> Projected CRS: Amersfoort / RD New
#>      id                 beginpunt              beginpunt_rd begincbsx begincbsy
#> 1  9594 POINT (5.049934 51.56765) POINT (131616.2 397688.9)    131600    397600
#> 2  9520 POINT (5.014657 51.58185) POINT (129178.4 399280.3)    129100    399200
#> 3  9492  POINT (5.08459 51.55759) POINT (134014.4 396559.7)    134000    396500
#> 4 83859 POINT (5.012969 51.58655) POINT (129064.1 399804.3)    129000    399800
#> 5  9438 POINT (5.054887 51.51645) POINT (131933.7 391991.7)    131900    391900
#> 6  9490 POINT (5.064763 51.58125)   POINT (132651 399196.9)    132600    399100
#>                             geom
#> 1 POLYGON ((131600 397600, 13...
#> 2 POLYGON ((131600 397600, 13...
#> 3 POLYGON ((131600 397600, 13...
#> 4 POLYGON ((131600 397600, 13...
#> 5 POLYGON ((131600 397600, 13...
#> 6 POLYGON ((131600 397600, 13...

Created on 2021-09-29 by the reprex package (v2.0.1)

And the expected output:

library(tmap)

tmap_mode("view")
#> tmap mode set to interactive viewing
tmap::tm_shape(tbldata) +
  tm_basemap(server = "OpenStreetMap")+
  tm_fill(NA) +
  tm_borders(col="black")

Created on 2021-09-29 by the reprex package (v2.0.1)

Edit 3

As a follow-up to your comment @Geodude, please find the fix below. Besides, I removed the for loop to take into account @nniloc's remark (For this, I built the function polygon_list() that I have applied to tbldata$pol with sapply()).

polygon_list <-  function(x){
  st_polygon(list(x))
}

h <- st_polygon()
h <- sapply(tbldata$pol, polygon_list, simplify = FALSE)
sfc <- st_sfc(h, crs = 28992)
tbldata <- st_sf(tbldata, geom = sfc)
tbldata$pol <- NULL
tbldata
#> Simple feature collection with 6 features and 3 fields
#> Active geometry column: geom
#> Geometry type: POLYGON
#> Dimension:     XY
#> Bounding box:  xmin: 129000 ymin: 391900 xmax: 134100 ymax: 399900
#> Projected CRS: Amersfoort / RD New
#>      id                 beginpunt              beginpunt_rd begincbsx begincbsy
#> 1  9594 POINT (5.049934 51.56765) POINT (131616.2 397688.9)    131600    397600
#> 2  9520 POINT (5.014657 51.58185) POINT (129178.4 399280.3)    129100    399200
#> 3  9492  POINT (5.08459 51.55759) POINT (134014.4 396559.7)    134000    396500
#> 4 83859 POINT (5.012969 51.58655) POINT (129064.1 399804.3)    129000    399800
#> 5  9438 POINT (5.054887 51.51645) POINT (131933.7 391991.7)    131900    391900
#> 6  9490 POINT (5.064763 51.58125)   POINT (132651 399196.9)    132600    399100
#>                             geom
#> 1 POLYGON ((131600 397600, 13...
#> 2 POLYGON ((129100 399200, 12...
#> 3 POLYGON ((134000 396500, 13...
#> 4 POLYGON ((129000 399800, 12...
#> 5 POLYGON ((131900 391900, 13...
#> 6 POLYGON ((132600 399100, 13...

And by checking one of the polygons, the problem appears to be solved:

tbldata$geom[[3]]
#> POLYGON ((134000 396500, 134100 396500, 134100 396600, 134000 396600, 134000 396500))

Created on 2021-10-01 by the reprex package (v2.0.1)

Upvotes: 4

nniloc
nniloc

Reputation: 4243

A tidyverse solution. More or less the same as @lovalery but skips the for loops.

library(tidyverse)
library(sf)
library(ggspatial)

df_sf <- tbldata %>%
  mutate(pol2 = st_sfc(st_multipolygon(list(pol)))) %>%
  select(-beginpunt, -beginpunt_rd) %>%
  st_as_sf(crs = 28992)

# plot to verify
ggplot(df_sf) +
  annotation_map_tile(zoom = 13) +
  geom_sf(col = 'blue', fill = NA)

enter image description here

Upvotes: 2

Related Questions