Reputation: 5691
say I have two sets of shape files that cover the same region and often, but not always share borders, e.g. US counties and PUMAs. I'd like to define a new scale of polygon that uses both PUMAs and counties as atomic building blocks, i.e. neither can ever be split, but I'd still like as many units as possible. Here is a toy example:
library(sp)
# make fake data
# 1) counties:
Cty <- SpatialPolygons(list(
Polygons(list(Polygon(cbind(x=c(0,2,2,1,0,0),y=c(0,0,2,2,1,0)), hole=FALSE)),"county1"),
Polygons(list(Polygon(cbind(x=c(2,4,4,3,3,2,2),y=c(0,0,2,2,1,1,0)),hole=FALSE)),"county2"),
Polygons(list(Polygon(cbind(x=c(4,5,5,4,4),y=c(0,0,3,2,0)),hole=FALSE)),"county3"),
Polygons(list(Polygon(cbind(x=c(0,1,2,2,0,0),y=c(1,2,2,3,3,1)),hole=FALSE)),"county4"),
Polygons(list(Polygon(cbind(x=c(2,3,3,4,4,3,3,2,2),y=c(1,1,2,2,3,3,4,4,1)),hole=FALSE)),"county5"),
Polygons(list(Polygon(cbind(x=c(0,2,2,1,0,0),y=c(3,3,4,5,5,3)),hole=FALSE)),"county6"),
Polygons(list(Polygon(cbind(x=c(1,2,3,4,1),y=c(5,4,4,5,5)),hole=FALSE)),"county7"),
Polygons(list(Polygon(cbind(x=c(3,4,4,5,5,4,3,3),y=c(3,3,2,3,5,5,4,3)),hole=FALSE)),"county8")
))
counties <- SpatialPolygonsDataFrame(Cty, data = data.frame(ID=paste0("county",1:8),
row.names=paste0("county",1:8),
stringsAsFactors=FALSE)
)
# 2) PUMAs:
Pum <- SpatialPolygons(list(
Polygons(list(Polygon(cbind(x=c(0,4,4,3,3,2,2,1,0,0),y=c(0,0,2,2,1,1,2,2,1,0)), hole=FALSE)),"puma1"),
Polygons(list(Polygon(cbind(x=c(4,5,5,4,3,3,4,4),y=c(0,0,5,5,4,3,3,0)),hole=FALSE)),"puma2"),
Polygons(list(Polygon(cbind(x=c(0,1,2,2,3,3,2,0,0),y=c(1,2,2,1,1,2,3,3,1)),hole=FALSE)),"puma3"),
Polygons(list(Polygon(cbind(x=c(2,3,4,4,3,3,2,2),y=c(3,2,2,3,3,4,4,3)),hole=FALSE)),"puma4"),
Polygons(list(Polygon(cbind(x=c(0,1,1,3,4,0,0),y=c(3,3,4,4,5,5,3)),hole=FALSE)),"puma5"),
Polygons(list(Polygon(cbind(x=c(1,2,2,1,1),y=c(3,3,4,4,3)),hole=FALSE)),"puma6")
))
Pumas <- SpatialPolygonsDataFrame(Pum, data = data.frame(ID=paste0("puma",1:6),
row.names=paste0("puma",1:6),
stringsAsFactors=FALSE)
)
# desired result:
Cclust <- SpatialPolygons(list(
Polygons(list(Polygon(cbind(x=c(0,4,4,3,3,2,2,1,0,0),y=c(0,0,2,2,1,1,2,2,1,0)), hole=FALSE)),"ctyclust1"),
Polygons(list(Polygon(cbind(x=c(4,5,5,4,3,3,4,4),y=c(0,0,5,5,4,3,3,0)),hole=FALSE)),"ctyclust2"),
Polygons(list(Polygon(cbind(x=c(0,1,2,2,3,3,4,4,3,3,2,2,0,0),y=c(1,2,2,1,1,2,2,3,3,4,4,3,3,1)),hole=FALSE)),"ctyclust3"),
Polygons(list(Polygon(cbind(x=c(0,2,2,3,4,0,0),y=c(3,3,4,4,5,5,3)),hole=FALSE)),"ctyclust4")
))
CtyClusters <- SpatialPolygonsDataFrame(Cclust, data = data.frame(ID = paste0("ctyclust", 1:4),
row.names = paste0("ctyclust", 1:4),
stringsAsFactors=FALSE)
)
# take a look
par(mfrow = c(1, 2))
plot(counties, border = gray(.3), lwd = 4)
plot(Pumas, add = TRUE, border = "#EEBB00", lty = 2, lwd = 2)
legend(-.5, -.3, lty = c(1, 2), lwd = c(4, 2), col = c(gray(.3), "#EEBB00"),
legend = c("county line", "puma line"), xpd = TRUE, bty = "n")
text(coordinates(counties), counties@data$ID,col = gray(.3))
text(coordinates(Pumas), Pumas@data$ID, col = "#EEBB00",cex=1.5)
title("building blocks")
#desired result:
plot(CtyClusters)
title("desired result")
text(-.5, -.5, "maximum units possible,\nwithout breaking either PUMAs or counties",
xpd = TRUE, pos = 4)
I've naively tried many of the g* functions in the rgeos package to achieve this and have not made headway. Does anyone know of a nice function or awesome trick for this task? Thank you!
[I'm also open to suggestions on a better title]
Upvotes: 7
Views: 1357
Reputation: 162321
Here's a relatively succinct solution which:
Uses rgeos::gRelate()
to identify Pumas that overlap but don't completely encompass/cover each county.To understand what it does, run example(gRelate)
and see this Wikipedia page. (h.t. to Tim Riffe)
Uses RBGL::connectedComp()
to identify groups of Pumas that should thus be merged. (For pointers on installing the RBGL package, see my answer to this SO question.)
Uses rgeos::gUnionCascaded()
to merge the indicated Pumas.
library(rgeos)
library(RBGL)
## Identify groups of Pumas that should be merged
x <- gRelate(counties, Pumas, byid=TRUE)
x <- matrix(grepl("2.2......", x), ncol=ncol(x), dimnames=dimnames(x))
k <- x %*% t(x)
l <- connectedComp(as(k, "graphNEL"))
## Extend gUnionCascaded so that each SpatialPolygon gets its own ID.
gMerge <- function(ii) {
x <- gUnionCascaded(Pumas[ii,])
spChFIDs(x, paste(ii, collapse="_"))
}
## Merge Pumas as needed
res <- do.call(rbind, sapply(l, gMerge))
plot(res)
Upvotes: 3
Reputation: 29477
I think you could do this with a smart set of tests for containment. This gets two of your parts, the simple paired case where puma1
contains county1
and county2
, and puma2
contains county8
and county3
.
library(rgeos)
## pumas by counties
pbyc <- gContains(Pumas, counties, byid = TRUE)
## row / col pairs of where contains test is TRUE for Pumas
pbyc.pairs <- cbind(row(pbyc)[pbyc], col(pbyc)[pbyc])
par(mfrow = c(nrow(pbyc.pairs), 1))
for (i in 1:nrow(pbyc.pairs)) {
plot(counties, col = "white")
plot(gUnion(counties[pbyc.pairs[i,1], ], Pumas[pbyc.pairs[i,2], ]), col = "red", add = TRUE)
}
The plotting there is dumbly redundant, but I think it shows a start. You need to find which contains tests accumulate the most smaller parts, and then union those. Sorry I haven't put the effort in to finish but I think this would work.
Upvotes: 3
Reputation: 5691
After much trial and error, I've come up with the following inelegant solution, rather in keeping with the tip by @mdsummer, but adding more checks, removing redundant merged polygons, and checking. Yikes. If someone can take what I've done and make it cleaner, then I'll accept that answer rather this, which I'd like to avoid if at all possible:
library(rgeos)
pbyc <- gCovers(Pumas, counties, byid = TRUE) |
gContains(Pumas, counties, byid = TRUE) |
gOverlaps(Pumas, counties, byid = TRUE) |
t(gCovers(counties, Pumas, byid = TRUE) |
gContains(counties, Pumas, byid = TRUE) |
gOverlaps(counties, Pumas, byid = TRUE))
## row / col pairs of where test is TRUE for Pumas or counties
pbyc.pairs <- cbind(row(pbyc)[pbyc], col(pbyc)[pbyc])
Potentials <- apply(pbyc.pairs, 1, function(x,counties,Pumas){
gUnion(counties[x[1], ], Pumas[x[2], ])
}, counties = counties, Pumas= Pumas)
for (i in 1:length(Potentials)){
Potentials[[i]]@polygons[[1]]@ID <- paste0("p",i)
}
Potentials <- do.call("rbind",Potentials)
# remove redundant polygons:
Redundants <- gEquals(Potentials, byid = TRUE)
Redundants <- row(Redundants)[Redundants & lower.tri(Redundants)]
Potentials <- Potentials[-c(Redundants),]
# for each Potential summary polygon, see which counties and Pumas are contained:
keep.i <- vector(length=length(Potentials))
for (i in 1:length(Potentials)){
ctyblocki <- gUnionCascaded(counties[c(gCovers(Potentials[i, ], counties, byid = TRUE)), ])
Pumablocki <- gUnionCascaded(Pumas[c(gCovers(Potentials[i, ], Pumas, byid = TRUE)), ])
keep.i[i] <- gEquals(ctyblocki, Potentials[i, ]) & gEquals(Pumablocki, Potentials[i, ])
}
# what do we have left?
NewUnits <- Potentials[keep.i, ]
plot(NewUnits)
Upvotes: 1