Reputation: 1932
I have grouped connected lines to form one continuous MULTILINESTRING
object and now would like to split this into smaller groups based on points. I have found a couple of examples of how to do this with LINESTRING
geometries but not with MULTILINESTRING
geometries. See here and here. Using either of these methods I get an sf object containing LINESTRING
's. Of course, I can aggregate these back to a MULTILINESTRING
based on my group column but this only serves to get me back to where I started. I need all the lines on each side of the intersecting point grouped into a MULTILINESTRING
with a unique group number. I am looking to do this splitting in the most efficient way possible as my real-world dataset is very large. See my sample dataset below.
library(ggplot2)
library(sf)
testArea<-structure(list(group = 3, geom = structure(list(structure(list(
structure(c(6502476.72250405, 6502454.02996413, 1962622.20532215,
1962613.17515647), dim = c(2L, 2L)), structure(c(6502476.72250405,
6502453.90463631, 1962622.20532215, 1962632.57964523), dim = c(2L,
2L)), structure(c(6502366.41465381, 6502371.44253089, 6502473.30682847,
6502476.72250405, 1962490.68032272, 1962495.74363281, 1962498.12158081,
1962622.20532215), dim = c(4L, 2L)), structure(c(6502228.73645148,
6502211.24928172, 1962628.79028273, 1962643.49694623), dim = c(2L,
2L)), structure(c(6502213.34114106, 6502223.08324756, 1962491.00643755,
1962467.08522557), dim = c(2L, 2L)), structure(c(6502228.73645148,
6502250.32728755, 6502307.72317013, 6502325.46394831, 1962628.79028273,
1962625.21515864, 1962624.4385854, 1962633.67150655), dim = c(4L,
2L)), structure(c(6502671.5715238, 6502661.95999447, 1962487.84240189,
1962462.80997165), dim = c(2L, 2L)), structure(c(6502476.72250405,
6502494.79891147, 6502552.95102614, 6502581.00116688, 1962622.20532215,
1962617.1669464, 1962616.94680248, 1962626.03963207), dim = c(4L,
2L)), structure(c(6502228.73645148, 6502210.75486013, 1962628.79028273,
1962623.37789197), dim = c(2L, 2L)), structure(c(6502366.41465381,
6502376.15413564, 6502474.69757372, 6502493.52365156, 6502542.63214913,
6502548.52485389, 1962490.68032272, 1962480.89097223, 1962483.04615164,
1962483.45789623, 1962483.56846032, 1962488.53531389), dim = c(6L,
2L)), structure(c(6502366.41465381, 6502367.14956047, 6502405.43065189,
1962490.68032272, 1962469.13115323, 1962446.99602689), dim = 3:2),
structure(c(6502366.41465381, 6502306.02074572, 1962490.68032272,
1962490.85880005), dim = c(2L, 2L)), structure(c(6502306.02074572,
6502331.86321372, 1962490.85880005, 1962451.68335348), dim = c(2L,
2L)), structure(c(6502548.52485389, 6502671.5715238, 1962488.53531389,
1962487.84240189), dim = c(2L, 2L)), structure(c(6502476.72250405,
6502493.10337681, 6502550.52878688, 6502569.00578405, 1962622.20532215,
1962611.71616989, 1962610.93959664, 1962603.28344397), dim = c(4L,
2L)), structure(c(6502213.34114106, 6502180.82119296, 1962491.00643755,
1962465.7459894), dim = c(2L, 2L)), structure(c(6502228.73645148,
6502245.1176523, 6502303.41609214, 6502326.35075755, 1962628.79028273,
1962618.30080239, 1962617.95959572, 1962614.20074497), dim = c(4L,
2L)), structure(c(6502366.41465381, 6502361.39957197, 6502228.35587481,
6502228.73645148, 1962490.68032272, 1962495.69540456, 1962496.08812031,
1962628.79028273), dim = c(4L, 2L)), structure(c(6502548.52485389,
6502569.88406314, 1962488.53531389, 1962462.09999931), dim = c(2L,
2L)), structure(c(6502366.41465381, 6502376.00387347, 6502452.04538806,
1962490.68032272, 1962471.47498056, 1962457.93334098), dim = 3:2),
structure(c(6502306.02074572, 6502294.69202822, 1962490.85880005,
1962453.50651257), dim = c(2L, 2L)), structure(c(6502306.02074572,
6502213.34114106, 1962490.85880005, 1962491.00643755), dim = c(2L,
2L)), structure(c(6502671.5715238, 6502693.53867146, 1962487.84240189,
1962463.86377531), dim = c(2L, 2L)), structure(c(6502548.52485389,
6502538.80833788, 1962488.53531389, 1962464.75189689), dim = c(2L,
2L)), structure(c(6502693.53867146, 6502693.05278005, 1962463.86377531,
1962448.23880656), dim = c(2L, 2L)), structure(c(6502507.63714039,
6502497.52331547, 6502366.41465381, 1962494.67375307, 1962493.54777107,
1962490.68032272), dim = 3:2), structure(c(6502507.63714039,
6502517.90418021, 6502555.63835672, 6502736.0713948, 6502735.87224822,
6502735.84239264, 1962494.67375307, 1962493.70754765, 1962493.49527773,
1962492.47887556, 1962559.16378155, 1962569.20477198), dim = c(6L,
2L))), class = c("XY", "MULTILINESTRING", "sfg"))), class = c("sfc_MULTILINESTRING",
"sfc"), precision = 0, bbox = structure(c(xmin = 6502180.82119296,
ymin = 1962446.99602689, xmax = 6502736.0713948, ymax = 1962643.49694623
), class = "bbox"), crs = structure(list(input = "PROJCS[\"NAD_1983_StatePlane_California_III_FIPS_0403_Feet\",GEOGCS[\"GCS_North_American_1983\",DATUM[\"D_North_American_1983\",SPHEROID[\"GRS_1980\",6378137.0,298.257222101]],PRIMEM[\"Greenwich\",0.0],UNIT[\"Degree\",0.0174532925199433]],PROJECTION[\"Lambert_Conformal_Conic\"],PARAMETER[\"False_Easting\",6561666.666666666],PARAMETER[\"False_Northing\",1640416.666666667],PARAMETER[\"Central_Meridian\",-120.5],PARAMETER[\"Standard_Parallel_1\",37.06666666666667],PARAMETER[\"Standard_Parallel_2\",38.43333333333333],PARAMETER[\"Latitude_Of_Origin\",36.5],UNIT[\"Foot_US\",0.3048006096012192]]",
wkt = "PROJCRS[\"NAD83 / California zone 3 (ftUS)\",\n BASEGEOGCRS[\"NAD83\",\n DATUM[\"North American Datum 1983\",\n ELLIPSOID[\"GRS 1980\",6378137,298.257222101,\n LENGTHUNIT[\"metre\",1]],\n ID[\"EPSG\",6269]],\n PRIMEM[\"Greenwich\",0,\n ANGLEUNIT[\"Degree\",0.0174532925199433]]],\n CONVERSION[\"unnamed\",\n METHOD[\"Lambert Conic Conformal (2SP)\",\n ID[\"EPSG\",9802]],\n PARAMETER[\"Latitude of false origin\",36.5,\n ANGLEUNIT[\"Degree\",0.0174532925199433],\n ID[\"EPSG\",8821]],\n PARAMETER[\"Longitude of false origin\",-120.5,\n ANGLEUNIT[\"Degree\",0.0174532925199433],\n ID[\"EPSG\",8822]],\n PARAMETER[\"Latitude of 1st standard parallel\",37.0666666666667,\n ANGLEUNIT[\"Degree\",0.0174532925199433],\n ID[\"EPSG\",8823]],\n PARAMETER[\"Latitude of 2nd standard parallel\",38.4333333333333,\n ANGLEUNIT[\"Degree\",0.0174532925199433],\n ID[\"EPSG\",8824]],\n PARAMETER[\"Easting at false origin\",6561666.66666667,\n LENGTHUNIT[\"US survey foot\",0.304800609601219],\n ID[\"EPSG\",8826]],\n PARAMETER[\"Northing at false origin\",1640416.66666667,\n LENGTHUNIT[\"US survey foot\",0.304800609601219],\n ID[\"EPSG\",8827]]],\n CS[Cartesian,2],\n AXIS[\"(E)\",east,\n ORDER[1],\n LENGTHUNIT[\"US survey foot\",0.304800609601219,\n ID[\"EPSG\",9003]]],\n AXIS[\"(N)\",north,\n ORDER[2],\n LENGTHUNIT[\"US survey foot\",0.304800609601219,\n ID[\"EPSG\",9003]]]]"), class = "crs"), n_empty = 0L)), row.names = c(NA,
-1L), sf_column = "geom", agr = structure(c(group = NA_integer_), levels = c("constant",
"aggregate", "identity"), class = "factor"), class = c("sf",
"tbl_df", "tbl", "data.frame"))
split_point<-structure(list(PhasingCode = 4L, geom = structure(list(structure(c(6502366.41465381,
1962490.68032272), class = c("XY", "POINT", "sfg"))), class = c("sfc_POINT",
"sfc"), precision = 0, bbox = structure(c(xmin = 6502366.41465381,
ymin = 1962490.68032272, xmax = 6502366.41465381, ymax = 1962490.68032272
), class = "bbox"), crs = structure(list(input = "PROJCS[\"NAD_1983_StatePlane_California_III_FIPS_0403_Feet\",GEOGCS[\"GCS_North_American_1983\",DATUM[\"D_North_American_1983\",SPHEROID[\"GRS_1980\",6378137.0,298.257222101]],PRIMEM[\"Greenwich\",0.0],UNIT[\"Degree\",0.0174532925199433]],PROJECTION[\"Lambert_Conformal_Conic\"],PARAMETER[\"False_Easting\",6561666.666666666],PARAMETER[\"False_Northing\",1640416.666666667],PARAMETER[\"Central_Meridian\",-120.5],PARAMETER[\"Standard_Parallel_1\",37.06666666666667],PARAMETER[\"Standard_Parallel_2\",38.43333333333333],PARAMETER[\"Latitude_Of_Origin\",36.5],UNIT[\"Foot_US\",0.3048006096012192]]",
wkt = "PROJCRS[\"NAD83 / California zone 3 (ftUS)\",\n BASEGEOGCRS[\"NAD83\",\n DATUM[\"North American Datum 1983\",\n ELLIPSOID[\"GRS 1980\",6378137,298.257222101,\n LENGTHUNIT[\"metre\",1]],\n ID[\"EPSG\",6269]],\n PRIMEM[\"Greenwich\",0,\n ANGLEUNIT[\"Degree\",0.0174532925199433]]],\n CONVERSION[\"unnamed\",\n METHOD[\"Lambert Conic Conformal (2SP)\",\n ID[\"EPSG\",9802]],\n PARAMETER[\"Latitude of false origin\",36.5,\n ANGLEUNIT[\"Degree\",0.0174532925199433],\n ID[\"EPSG\",8821]],\n PARAMETER[\"Longitude of false origin\",-120.5,\n ANGLEUNIT[\"Degree\",0.0174532925199433],\n ID[\"EPSG\",8822]],\n PARAMETER[\"Latitude of 1st standard parallel\",37.0666666666667,\n ANGLEUNIT[\"Degree\",0.0174532925199433],\n ID[\"EPSG\",8823]],\n PARAMETER[\"Latitude of 2nd standard parallel\",38.4333333333333,\n ANGLEUNIT[\"Degree\",0.0174532925199433],\n ID[\"EPSG\",8824]],\n PARAMETER[\"Easting at false origin\",6561666.66666667,\n LENGTHUNIT[\"US survey foot\",0.304800609601219],\n ID[\"EPSG\",8826]],\n PARAMETER[\"Northing at false origin\",1640416.66666667,\n LENGTHUNIT[\"US survey foot\",0.304800609601219],\n ID[\"EPSG\",8827]]],\n CS[Cartesian,2],\n AXIS[\"(E)\",east,\n ORDER[1],\n LENGTHUNIT[\"US survey foot\",0.304800609601219,\n ID[\"EPSG\",9003]]],\n AXIS[\"(N)\",north,\n ORDER[2],\n LENGTHUNIT[\"US survey foot\",0.304800609601219,\n ID[\"EPSG\",9003]]]]"), class = "crs"), n_empty = 0L)), row.names = 23L, class = c("sf",
"data.frame"), sf_column = "geom", agr = structure(c(PhasingCode = NA_integer_), levels = c("constant",
"aggregate", "identity"), class = "factor"))
You can see a plot of the linework below. The central point should split the MULTILINESTRING
into several smaller MULTILINSTRING
s. Lastly, I should note my real-world dataset consist of several MULTILINESTRING
's in one sf object. Ideally I would want to split each MULTILINESTRING
into smaller groups. Each smaller group should have an ID unique from each other. This could probably be achieved at the end step and assigned based on the row number of the geometry column.
ggplot() + geom_sf(data = testArea, color = "red") + geom_sf(data = split_point, color = "blue")
Upvotes: 0
Views: 414
Reputation: 2296
Another approach to grouping, geos::geos_hilbert_code
is dutifully copied from Dewey Dunnington Blog,
testArea_linestring <- st_cast(testArea, 'LINESTRING')
testArea_lstring_geos <- as_geos_geometry(testArea_linestring)
data.frame(
hilbert_code = geos_hilbert_code(testArea_lstring_geos, level = 2),
geometry = testArea_lstring_geos
) |>
st_as_sf() |>
ggplot(aes(col = factor(hilbert_code))) +
geom_sf() +
theme_bw() +
theme(legend.position = 'bottom')
As I was wondering what one would do if one didn't have split_point
that seems determined by additional data not provided.
Upvotes: 1
Reputation: 1932
I'm posting this answer because I know I'm close to the character limit of my question and this does provide me with what I want although clunky. If you think you can speed up this code please do and I will accept the best answer.
library(dplyr)
library(data.table)
library(sf)
library(ggplot2)
library(igraph)
library(geos)
group_lines_geo<-testArea |>
sf::st_cast(to = "LINESTRING") |>
sf::st_filter(split_point) |>
data.table() |>
mutate(geo = as_geos_geometry((geom)))
group_lines_sf <- group_lines_geo
st_geometry(group_lines_sf) <- group_lines_geo$geom
testArea_geo<-testArea |>
sf::st_cast(to = "LINESTRING") |>
data.table() |>
mutate(geo = as_geos_geometry((geom)))
empty_sf <- st_sf(section = integer(), geometry = st_sfc())
crs <- st_crs(testArea)
st_crs(empty_sf) <- crs
for(i in 1:length(group_lines$group)){
group_lines[i,]
line_subset<-testArea_geo[!testArea_geo$geo %in% group_lines_geo[-i,]$geo,]
com = components(graph.adjlist(geos_touches_matrix(line_subset$geo,geos_strtree(line_subset$geo))))
line_subset$group = com$membership
line_subset_sf <- line_subset
st_geometry(line_subset_sf) <- line_subset$geom
line_subset_groups_sf = line_subset_sf |> group_by(group) |>
summarise()
line_subset_groups_sf$group=i
empty_sf <- rbind(empty_sf, line_subset_groups_sf[rowSums(line_subset_groups_sf |> st_intersects(group_lines_sf,sparse = FALSE))>2,])
}
ggplot() + geom_sf(data = empty_sf, aes(color = group))
Upvotes: 1
Reputation: 3604
Hmm.. I'm afraid there is something weird with either data, either approach:
Let's have a look on your data plotted:
library(ggplot2)
ggplot() +
geom_sf(data = testArea, color = "red") +
geom_sf(data = split_point, color = "blue")
Let's take only those, which intersects/touches the split_point
testArea |>
sf::st_cast(to = "LINESTRING") |>
sf::st_intersects(split_point, sparse = FALSE)
#> Warning in st_cast.sf(testArea, to = "LINESTRING"): repeating attributes for
#> all sub-geometries for which they may not be constant
#> [,1]
#> [1,] FALSE
#> [2,] FALSE
#> [3,] TRUE
#> [4,] FALSE
#> [5,] FALSE
#> [6,] FALSE
#> [7,] FALSE
#> [8,] FALSE
#> [9,] FALSE
#> [10,] TRUE
#> [11,] TRUE
#> [12,] TRUE
#> [13,] FALSE
#> [14,] FALSE
#> [15,] FALSE
#> [16,] FALSE
#> [17,] FALSE
#> [18,] TRUE
#> [19,] FALSE
#> [20,] TRUE
#> [21,] FALSE
#> [22,] FALSE
#> [23,] FALSE
#> [24,] FALSE
#> [25,] FALSE
#> [26,] TRUE
#> [27,] FALSE
So, only seven of them intersects with it.
t <- testArea |>
sf::st_cast(to = "LINESTRING") |>
sf::st_filter(split_point)
t
#> Simple feature collection with 7 features and 1 field
#> Geometry type: LINESTRING
#> Dimension: XY
#> Bounding box: xmin: 6502228 ymin: 1962447 xmax: 6502549 ymax: 1962629
#> Projected CRS: NAD83 / California zone 3 (ftUS)
#> # A tibble: 7 × 2
#> group geom
#> * <dbl> <LINESTRING [US_survey_foot]>
#> 1 3 (6502366 1962491, 6502371 1962496, 6502473 1962498, 6502477 1962622)
#> 2 3 (6502366 1962491, 6502376 1962481, 6502475 1962483, 6502494 1962483, 65…
#> 3 3 (6502366 1962491, 6502367 1962469, 6502405 1962447)
#> 4 3 (6502366 1962491, 6502306 1962491)
#> 5 3 (6502366 1962491, 6502361 1962496, 6502228 1962496, 6502229 1962629)
#> 6 3 (6502366 1962491, 6502376 1962471, 6502452 1962458)
#> 7 3 (6502508 1962495, 6502498 1962494, 6502366 1962491)
But, to split the line(string) by point we have to be sure it's not line(string) start neither end point. Let's have a look:
t <- t |>
subset(lwgeom::st_startpoint(t) != sf::st_as_sfc(split_point))
#> Linking to GEOS 3.11.1, GDAL 3.6.2, PROJ 9.1.1; sf_use_s2() is TRUE
t
#> Simple feature collection with 1 feature and 1 field
#> Geometry type: LINESTRING
#> Dimension: XY
#> Bounding box: xmin: 6502366 ymin: 1962491 xmax: 6502508 ymax: 1962495
#> Projected CRS: NAD83 / California zone 3 (ftUS)
#> # A tibble: 1 × 2
#> group geom
#> <dbl> <LINESTRING [US_survey_foot]>
#> 1 3 (6502508 1962495, 6502498 1962494, 6502366 1962491)
Only one line left. Let's check for endpoints:
t |>
subset(lwgeom::st_endpoint(t) != sf::st_as_sfc(split_point))
#> Simple feature collection with 0 features and 1 field
#> Bounding box: xmin: NA ymin: NA xmax: NA ymax: NA
#> Projected CRS: NAD83 / California zone 3 (ftUS)
#> # A tibble: 0 × 2
#> # ℹ 2 variables: group <dbl>, geom <GEOMETRY [US_survey_foot]>
Sooo.. there is no one line which can be spliced.
Maybe you should have a look on lines which starts/ends on your points? And to have groups of them (multilinestrings) to check which intersects with those strings but not intersects with split_point? That's would be my route.
Created on 2024-04-12 with reprex v2.1.0
Upvotes: 2