nate
nate

Reputation: 1244

Writing SF Dataframe to Microsoft SQL Server Crashes R and Fails to Write Data

Using the sample data provided, I am trying to write an sf dataframe out to a Microsoft SQL Server table, following the instructions found here, however, whenever I do so, the R Session crashes and gives the error shown in the screen shot below.

I am wondering if anyone else has seen this problem and what they did to resolve it?

As always, thank you in advance.

-nate

Error From Terminal:

Note: method with signature ‘DBIObject#sf’ chosen for function ‘dbDataType’,
 target signature ‘Microsoft SQL Server#sf’.
 "OdbcConnection#ANY" would also be valid

 *** caught segfault ***
address 0x21, cause 'memory not mapped'

Traceback:
 1: result_insert_dataframe(rs@ptr, values, batch_rows)
 2: tryCatchList(expr, classes, parentenv, handlers)
 3: tryCatch(result_insert_dataframe(rs@ptr, values, batch_rows),     finally = dbClearResult(rs))
 4: .local(conn, name, value, ...)
 5: dbWriteTable(conn, name, to_postgis(conn, value, binary), ...,     row.names = row.names, overwrite = overwrite, append = append,     field.types = field.types)
 6: dbWriteTable(conn, name, to_postgis(conn, value, binary), ...,     row.names = row.names, overwrite = overwrite, append = append,     field.types = field.types)
 7: .local(conn, name, value, ...)
 8: DBI::dbWriteTable(conn = con, name = the_table_name, value = polygon_db,     row.names = FALSE, overwrite = TRUE, binary = TRUE)
 9: DBI::dbWriteTable(conn = con, name = the_table_name, value = polygon_db,     row.names = FALSE, overwrite = TRUE, binary = TRUE)
An irrecoverable exception occurred. R is aborting now ...
Segmentation fault: 11

Session Info:

R version 3.6.3 (2020-02-29)
Platform: x86_64-apple-darwin15.6.0 (64-bit)
Running under: macOS Catalina 10.15.4

Matrix products: default
BLAS:   /System/Library/Frameworks/Accelerate.framework/Versions/A/Frameworks/vecLib.framework/Versions/A/libBLAS.dylib
LAPACK: /Library/Frameworks/R.framework/Versions/3.6/Resources/lib/libRlapack.dylib

locale:
[1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8

attached base packages:
[1] stats     graphics  grDevices utils     datasets  methods   base     

other attached packages:
[1] odbc_1.2.2   DBI_1.1.0    magrittr_1.5 sf_0.9-3    

loaded via a namespace (and not attached):
 [1] Rcpp_1.0.4.6       class_7.3-16       packrat_0.5.0      grid_3.6.3         e1071_1.7-3        units_0.6-6        KernSmooth_2.23-16 rlang_0.4.6        blob_1.2.1         vctrs_0.2.4        tools_3.6.3        bit64_0.9-7       
[13] bit_1.1-15.2       hms_0.5.3          compiler_3.6.3     pkgconfig_2.0.3    classInt_0.4-3  

Sample Data (Apologies for long lines):

polygon_sf_df <- structure(
  list(leaf_id = c(1234, 2222),
       db_label = c("Middle of Nothing", "Somewhere in Florida"),
       division = c("Mountain", "South Atlantic"),
       geometry = structure(list(
         structure(list(structure(c(-114.806444, -114.326789, -114.08946, -114.898877, -114.806444, 39.236959, 39.219554, 38.961837, 38.829708, 39.236959), .Dim = c(5L, 2L))), class = c("XY", "POLYGON", "sfg")),
         structure(list(structure(c(-81.806444, -81.326789, -81.08946, -81.898877, -81.806444, 29.236959, 29.219554, 28.961837, 28.829708, 29.236959), .Dim = c(5L, 2L))), class = c("XY", "POLYGON", "sfg"))),
         class = c("sfc_POLYGON", "sfc"), precision = 0,
         bbox = structure(c(xmin = -114.898877, ymin = 28.829708, xmax = -81.08946, ymax = 39.236959), class = "bbox"),
         crs = structure(list(input = "EPSG:4326", wkt = "GEOGCS[\"WGS 84\",\n    DATUM[\"WGS_1984\",\n        SPHEROID[\"WGS 84\",6378137,298.257223563,\n            AUTHORITY[\"EPSG\",\"7030\"]],\n        AUTHORITY[\"EPSG\",\"6326\"]],\n    PRIMEM[\"Greenwich\",0,\n        AUTHORITY[\"EPSG\",\"8901\"]],\n    UNIT[\"degree\",0.0174532925199433,\n        AUTHORITY[\"EPSG\",\"9122\"]],\n    AUTHORITY[\"EPSG\",\"4326\"]]"), class = "crs"), n_empty = 0L)),
  sf_column = "geometry", agr = structure(c(leaf_id = NA_integer_, db_label = NA_integer_, division = NA_integer_), .Label = c("constant", "aggregate", "identity"), class = "factor"),
  row.names = 1:2, class = c("sf", "data.frame"))

Quick Package Installs:

options(scipen = 999)
ipak <- function(pkg){
  new.pkg <- pkg[!(pkg %in% installed.packages()[, "Package"])]
  if (length(new.pkg)) 
    install.packages(new.pkg, dependencies = TRUE)
  sapply(pkg, require, character.only = TRUE)
}
packages<- c("sf", "magrittr","DBI", "odbc")
ipak(packages)

Connection & Writing Method:

con <- DBI::dbConnect(odbc::odbc(),
                      Driver   = "ODBC Driver 17 for SQL Server",
                      Server   = "some_database_server.windows.net",
                      Database = "dev_db",
                      UID      = "<User ID Here>",
                      PWD      = "<PW HERE>",
                      Port     = 1433, 
                      maxvarcharsize = 0)
# Fails Here
DBI::dbWriteTable(conn = con, name = "the_sf_polygon_table_name", value = polygon_sf_df, row.names=FALSE, overwrite=TRUE, binary=TRUE)

The Schema of the table (as per comments below)

structure(list(TABLE_CATALOG = c("analytics_dev", "analytics_dev", 
"analytics_dev", "analytics_dev"), TABLE_SCHEMA = c("dbo", "dbo", 
"dbo", "dbo"), TABLE_NAME = c("test_polygon_table", "test_polygon_table", 
"test_polygon_table", "test_polygon_table"), COLUMN_NAME = c("leaf_id", 
"db_label", "division", "geometry"), ORDINAL_POSITION = 1:4, 
    COLUMN_DEFAULT = c(NA_character_, NA_character_, NA_character_, 
    NA_character_), IS_NULLABLE = c("YES", "YES", "YES", "YES"
    ), DATA_TYPE = c("float", "varchar", "varchar", "geometry"
    ), CHARACTER_MAXIMUM_LENGTH = c(NA, 255L, 255L, -1L), CHARACTER_OCTET_LENGTH = c(NA, 
    255L, 255L, -1L), NUMERIC_PRECISION = c(53L, NA, NA, NA), 
    NUMERIC_PRECISION_RADIX = c(2L, NA, NA, NA), NUMERIC_SCALE = c(NA_integer_, 
    NA_integer_, NA_integer_, NA_integer_), DATETIME_PRECISION = c(NA_integer_, 
    NA_integer_, NA_integer_, NA_integer_), CHARACTER_SET_CATALOG = c(NA_character_, 
    NA_character_, NA_character_, NA_character_), CHARACTER_SET_SCHEMA = c(NA_character_, 
    NA_character_, NA_character_, NA_character_), CHARACTER_SET_NAME = c(NA, 
    "iso_1", "iso_1", NA), COLLATION_CATALOG = c(NA_character_, 
    NA_character_, NA_character_, NA_character_), COLLATION_SCHEMA = c(NA_character_, 
    NA_character_, NA_character_, NA_character_), COLLATION_NAME = c(NA, 
    "SQL_Latin1_General_CP1_CI_AS", "SQL_Latin1_General_CP1_CI_AS", 
    NA), DOMAIN_CATALOG = c(NA_character_, NA_character_, NA_character_, 
    NA_character_), DOMAIN_SCHEMA = c(NA_character_, NA_character_, 
    NA_character_, NA_character_), DOMAIN_NAME = c(NA_character_, 
    NA_character_, NA_character_, NA_character_)), class = "data.frame", row.names = c(NA, 
-4L))

Upvotes: 0

Views: 863

Answers (2)

nate
nate

Reputation: 1244

This is the workaround I came up with...It is basically the same thing @r2evans. I am very grateful for his help.

Steps:

  1. Turn the actual polygon (geometry column) into a line/polygon string
  2. Drop the old geometry column from the existing dataframe
  3. Write the new data with the string/character column (I call it geo) out the the DB
  4. Read from the DB (select * from whatever)
  5. Convert it back to a geometry for use in R

Code:

# Converting To String/Character, Writing To DB:
polygon_sf_df$geo<- sf::st_as_text(polygon_sf_df$geometry)
polygon_sf_df<- polygon_sf_df %>% sf::st_set_geometry(NULL)

the_table_name<- paste0("test_polygon_table")
DBI::dbWriteTable(conn = con, name = the_table_name, value = polygon_sf_df, row.names=FALSE, overwrite=TRUE, binary=TRUE)

## Reading back IN:
tmp<- DBI::dbSendQuery(conn=con, statement = paste0("SELECT * FROM dbo.", the_table_name))
polygon_df_ret<- DBI::dbFetch(tmp)
polygon_df_ret$geometry<- sf::st_as_sfc(polygon_df_ret$geo)
polygon_df_ret$geo<- NULL
polygon_df_ret<- sf::st_as_sf(polygon_df_ret)

Upvotes: 0

r2evans
r2evans

Reputation: 160447

This is a hack, to be honest, since I'm not familiar or proficient with sf (R package) or geometry (sql server data type). Up front, I wonder if geography (sql server data type) might be more appropriate for you, since it alleges to be for GPS-like coordinates (datums, etc) ... but I think this process might translate just as well.

Either way, I first show how R and SQL Server agree on the shapes (spoiler: character), and then how to read and push those to the database in proper data types.

Query

If you step through the examples in https://learn.microsoft.com/en-us/sql/t-sql/spatial-geometry/spatial-types-geometry-transact-sql and then run a query, notice what it returns:

# con <- DBI::dbConnect(...) # sql server
DBI::dbExecute(con, "
CREATE TABLE SpatialTable   
    ( id int IDENTITY (1,1),  
    GeomCol1 geometry,   
    GeomCol2 AS GeomCol1.STAsText() )")

DBI::dbExecute(con, "
INSERT INTO SpatialTable (GeomCol1)  
VALUES (geometry::STGeomFromText('LINESTRING (100 100, 20 180, 180 180)', 0));  
INSERT INTO SpatialTable (GeomCol1)  
VALUES (geometry::STGeomFromText('POLYGON ((0 0, 150 0, 150 150, 0 150, 0 0))', 0));  ")

ret <- DBI::dbGetQuery(con, "select * from SpatialTable")
str(ret)
# 'data.frame': 2 obs. of  3 variables:
#  $ id      : int  1 2
#  $ GeomCol1: chr  "" ""
#  $ GeomCol2: chr  "LINESTRING (100 100, 20 180, 180 180)" "POLYGON ((0 0, 150 0, 150 150, 0 150, 0 0))"

Not a surprise (to me), DBI treats this like a string. Note, though, that it's returning something for GeomCol2, which is a text-translation of the actual blob-like field GeomCol1. And nothing for GeomCol1. Okay, let's only retrieve what we need, and since the "id" if a geometry is not intended to be numerical, let's convert it to a string then confirm it plots:

ret <- DBI::dbGetQuery(con, "select id, GeomCol2 from SpatialTable")
ret$id <- as.character(ret$id)
plot(sf::st_as_sf(ret, wkt="GeomCol2"))

simple sf sample plot

Upload

In order to upload your data to SQL Server, we need to character-ize it.

tempdat <- as.data.frame(polygon_sf_df)[,c("leaf_id", "geometry")]
tempdat$geometry <- sapply(tempdat$geometry, format, width = 0)
names(tempdat)[1] <- "id"
str(tempdat)
# 'data.frame': 2 obs. of  2 variables:
#  $ id      : num  1234 2222
#  $ geometry: chr  "POLYGON ((-114.8064 39.23696, -114.3268 39.21955, -114.0895 38.96184, -114.8989 38.82971, -114.8064 39.23696))" "POLYGON ((-81.80644 29.23696, -81.32679 29.21955, -81.08946 28.96184, -81.89888 28.82971, -81.80644 29.23696))"

Now we can upload it to a temp table:

### write *character* shapes to a temp table
DBI::dbWriteTable(con, "temptable", tempdat, create = TRUE)
### convert those *character* shapes to real *geometries*
DBI::dbExecute(con, "
insert into SpatialTable (GeomCol1)
select geometry::STGeomFromText(geometry, 0) as GeomCol1
from temptable")
DBI::dbExecute(con, "drop table temptable")

Now we can query those shapes:

ret2 <- DBI::dbGetQuery(con, "select id, GeomCol2 from SpatialTable where id > 2")
ret2$id <- as.character(ret2$id)
ret2 <- sf::st_as_sf(ret2, wkt="GeomCol2")
ret2
# Simple feature collection with 2 features and 1 field
# geometry type:  POLYGON
# dimension:      XY
# bbox:           xmin: -114.8989 ymin: 28.82971 xmax: -81.08946 ymax: 39.23696
# CRS:            NA
#   id                       GeomCol2
# 1  3 POLYGON ((-114.8064 39.2369...
# 2  4 POLYGON ((-81.80644 29.2369...

plot(ret2)

OP shapes through the database

Note about data size

Without extensive testing, my guess is that this text field (when queried) will be "large" in the SQL Server sense; this is a "known thing" in that Microsoft's ODBC driver intentionally breaks when "large fields" are not last in the selection of columns. For this, then, make sure that the geometry-text field is among the last columns in your query (not before any not-large fields).

References:

Upvotes: 1

Related Questions