Reputation: 3502
There are two data frames that need to be joined according to the nearest features. As you can see below, df
has UniqueIDs
that was created based on well_id
from df1
.
As well_id
is missing in df
and the two dataframes cannot be merged based on this column, I'm trying to merge them based on the nearest features.
> group_WP030793Ta <- df %>%
+ dplyr::filter(group_id == "WP030793Ta")
> group_WP030793Ta
# A tibble: 4 x 4
UniqueIDs group_id x y
<chr> <chr> <dbl> <dbl>
1 GW161_WP030793Ta WP030793Ta 1894411. 5577232.
2 GW162_WP030793Ta WP030793Ta 1894914. 5577675.
3 GW163_WP030793Ta WP030793Ta 1895271. 5576725.
4 GW164_WP030793Ta WP030793Ta 1895573. 5577578.
> group1_WP030793Ta <- df1 %>%
+ dplyr::filter(group_id1 == "WP030793Ta")
> group1_WP030793Ta
group_id1 x1 y1 well_id
1 WP030793Ta 1894914 5577675 Well 1880
2 WP030793Ta 1895573 5577578 Well 1452
3 WP030793Ta 1894411 5577232 Well 2043
4 WP030793Ta 1895271 5576725 Well 1881
I did it as below.
df_sf1 <- df1 %>%
st_as_sf(coords = c('x1', 'y1'), remove = F) %>%
st_set_crs(2193)
joined_sf <- df_sf %>%
cbind(
df_sf1[st_nearest_feature(df_sf, df_sf1),])
As you can see, group_id
and group_id1
are not the same.
check_WP030793Ta <- joined_sf %>%
dplyr::filter(group_id == "WP030793Ta")
UniqueIDs group_id x y group_id1 x1 y1 well_id geometry geometry.1
1 GW161_WP030793Ta WP030793Ta 1894411 5577232 WP040177T 1894411 5577232 Well 2043 POINT (1894411 5577232) POINT (1894411 5577232)
2 GW162_WP030793Ta WP030793Ta 1894914 5577675 WP030793Ta 1894914 5577675 Well 1880 POINT (1894914 5577675) POINT (1894914 5577675)
3 GW163_WP030793Ta WP030793Ta 1895271 5576725 WP040177T 1895271 5576725 Well 1881 POINT (1895271 5576725) POINT (1895271 5576725)
4 GW164_WP030793Ta WP030793Ta 1895573 5577578 WP140476Tb 1895573 5577578 Well 1452 POINT (1895573 5577578) POINT (1895573 5577578)
How can I join based on nearest features within each group?
Update
Please note that, the well_id
for certain group_ids
is repeated.
So for example if a user has a permit group_id
that is called WPXXXTa (from 2000 to 2005), when this permit group_id
gets updated to WPXXXTb (from 2005 to 2010), it will have in general the same well_ids
.
DATA
df
> dput(df)
structure(list(UniqueIDs = c("GW427_WP980128T", "GW428_WP980128T",
"GW429_WP980128T", "GW430_WP980128T", "GW52_WP030680T", "GW53_WP030680T",
"GW54_WP030680T", "GW55_WP030680T", "GW56_WP030680Ta", "GW57_WP030680Ta",
"GW58_WP030680Ta", "GW59_WP030680Ta", "GW92_WP030710T", "GW93_WP030710T",
"GW94_WP030710T", "GW95_WP030710T", "GW96_WP030710Ta", "GW97_WP030710Ta",
"GW98_WP030710Ta", "GW99_WP030710Ta", "GW157_WP030793T", "GW158_WP030793T",
"GW159_WP030793T", "GW160_WP030793T", "GW161_WP030793Ta", "GW162_WP030793Ta",
"GW163_WP030793Ta", "GW164_WP030793Ta", "GW248_WP080553Ta", "GW249_WP080553Ta",
"GW250_WP080553Ta", "GW251_WP080553Ta", "GW252_WP080553Tb", "GW253_WP080553Tb",
"GW254_WP080553Tb", "GW255_WP080553Tb", "GW256_WP080553Tc", "GW257_WP080553Tc",
"GW258_WP080553Tc", "GW259_WP080553Tc", "GW265_WP100090T", "GW266_WP100090T",
"GW267_WP100090T", "GW268_WP100090T", "GW499_WP100090Ta", "GW500_WP100090Ta",
"GW501_WP100090Ta", "GW502_WP100090Ta", "GW503_WP100090Tb", "GW504_WP100090Tb",
"GW505_WP100090Tb", "GW506_WP100090Tb", "GW555_WP120385Tb", "GW556_WP120385Tb",
"GW557_WP120385Tb", "GW558_WP120385Tb", "GW314_WP140476T", "GW315_WP140476T",
"GW316_WP140476T", "GW317_WP140476T", "GW318_WP140476T", "GW319_WP140476T",
"GW320_WP140476Ta", "GW321_WP140476Ta", "GW322_WP140476Ta", "GW323_WP140476Ta",
"GW324_WP140476Ta", "GW325_WP140476Ta", "GW561_WP140476Tb", "GW562_WP140476Tb",
"GW563_WP140476Tb", "GW564_WP140476Tb", "GW565_WP140476Tb", "GW566_WP140476Tb",
"GW340_WP140564T", "GW341_WP140564T", "GW342_WP140564T", "GW343_WP140564T",
"GW344_WP140564T", "GW345_WP140564T", "GW532_WP140564Ta", "GW533_WP140564Ta",
"GW534_WP140564Ta", "GW535_WP140564Ta", "GW567_WP140564Ta", "GW568_WP140564Ta",
"GW346_WP140575T", "GW347_WP140575T", "GW348_WP140575T", "GW349_WP140575T",
"GW350_WP140575T", "GW544_WP140575Ta", "GW545_WP140575Ta", "GW546_WP140575Ta",
"GW547_WP140575Ta", "GW548_WP140575Ta", "GW549_WP140575Ta"),
group_id = c("WP980128T", "WP980128T", "WP980128T", "WP980128T",
"WP030680T", "WP030680T", "WP030680T", "WP030680T", "WP030680Ta",
"WP030680Ta", "WP030680Ta", "WP030680Ta", "WP030710T", "WP030710T",
"WP030710T", "WP030710T", "WP030710Ta", "WP030710Ta", "WP030710Ta",
"WP030710Ta", "WP030793T", "WP030793T", "WP030793T", "WP030793T",
"WP030793Ta", "WP030793Ta", "WP030793Ta", "WP030793Ta", "WP080553Ta",
"WP080553Ta", "WP080553Ta", "WP080553Ta", "WP080553Tb", "WP080553Tb",
"WP080553Tb", "WP080553Tb", "WP080553Tc", "WP080553Tc", "WP080553Tc",
"WP080553Tc", "WP100090T", "WP100090T", "WP100090T", "WP100090T",
"WP100090Ta", "WP100090Ta", "WP100090Ta", "WP100090Ta", "WP100090Tb",
"WP100090Tb", "WP100090Tb", "WP100090Tb", "WP120385Tb", "WP120385Tb",
"WP120385Tb", "WP120385Tb", "WP140476T", "WP140476T", "WP140476T",
"WP140476T", "WP140476T", "WP140476T", "WP140476Ta", "WP140476Ta",
"WP140476Ta", "WP140476Ta", "WP140476Ta", "WP140476Ta", "WP140476Tb",
"WP140476Tb", "WP140476Tb", "WP140476Tb", "WP140476Tb", "WP140476Tb",
"WP140564T", "WP140564T", "WP140564T", "WP140564T", "WP140564T",
"WP140564T", "WP140564Ta", "WP140564Ta", "WP140564Ta", "WP140564Ta",
"WP140564Ta", "WP140564Ta", "WP140575T", "WP140575T", "WP140575T",
"WP140575T", "WP140575T", "WP140575Ta", "WP140575Ta", "WP140575Ta",
"WP140575Ta", "WP140575Ta", "WP140575Ta"), x = c(1895418.928,
1895977.206, 1896640.698, 1895417.928, 1895417.928, 1895418.928,
1895977.206, 1896640.698, 1895417.928, 1895418.928, 1895977.206,
1896640.698, 1897977.552, 1898423.744, 1898465.965, 1899160.578,
1897977.552, 1898423.744, 1898465.965, 1899160.578, 1895573.351,
1894410.616, 1894913.875, 1895271.31, 1894410.616, 1894913.875,
1895271.31, 1895573.351, 1896495.091, 1898542.668, 1898731.391,
1898948.03, 1896495.091, 1898542.668, 1898731.391, 1898948.03,
1896495.091, 1898542.668, 1898731.391, 1898948.03, 1889390.432,
1889592.342, 1890299.744, 1891064.34, 1891064.34, 1889390.432,
1889592.342, 1890299.744, 1890299.744, 1891064.34, 1889390.432,
1889592.342, 1894083.58, 1894092.36, 1896352.598, 1894878,
1893616.553, 1894022.731, 1894410.616, 1894913.875, 1895271.31,
1895573.351, 1893616.553, 1894022.731, 1894410.616, 1894913.875,
1895271.31, 1895573.351, 1894913.875, 1893616.553, 1895271.31,
1894410.616, 1894022.731, 1895573.351, 1895417.928, 1895977.206,
1896640.698, 1896641.699, 1897384.378, 1899529.078, 1896640.698,
1895977.206, 1896641.699, 1897384.378, 1895417.928, 1899529.078,
1888029.348, 1889592.342, 1890299.744, 1891064.34, 1891768.728,
1890299.744, 1891768.728, 1886805.544, 1888029.348, 1891064.34,
1889592.342), y = c(5583799.952, 5584310.447, 5584219.537,
5583795.948, 5583795.948, 5583799.952, 5584310.447, 5584219.537,
5583795.948, 5583799.952, 5584310.447, 5584219.537, 5580035.831,
5580655.371, 5579716.704, 5579134.428, 5580035.831, 5580655.371,
5579716.704, 5579134.428, 5577577.59, 5577232.113, 5577674.523,
5576724.933, 5577232.113, 5577674.523, 5576724.933, 5577577.59,
5586371.051, 5585936.231, 5587672.549, 5585643.113, 5586371.051,
5585936.231, 5587672.549, 5585643.113, 5586371.051, 5585936.231,
5587672.549, 5585643.113, 5570931.906, 5572174.76, 5572513.115,
5572010.921, 5572010.921, 5570931.906, 5572174.76, 5572513.115,
5572513.115, 5572010.921, 5570931.906, 5572174.76, 5576273.384,
5577416.176, 5579074.805, 5578463, 5579767.708, 5580255.137,
5577232.113, 5577674.523, 5576724.933, 5577577.59, 5579767.708,
5580255.137, 5577232.113, 5577674.523, 5576724.933, 5577577.59,
5577674.523, 5579767.708, 5576724.933, 5577232.113, 5580255.137,
5577577.59, 5583795.948, 5584310.447, 5584219.537, 5584216.535,
5583557.233, 5582842.198, 5584219.537, 5584310.447, 5584216.535,
5583557.233, 5583795.948, 5582842.198, 5572055.392, 5572174.76,
5572513.115, 5572010.921, 5572459.348, 5572513.115, 5572459.348,
5572145.22, 5572055.392, 5572010.921, 5572174.76)), row.names = c(NA,
-97L), class = c("tbl_df", "tbl", "data.frame"))
df1
> dput(df1)
structure(list(group_id1 = c("WP140564Ta", "WP030793Ta", "WP040177T",
"WP140476Tb", "WP040178T", "WP040178T", "WP030793Ta", "WP040177T",
"WP030793Ta", "WP030609Ta", "WP140476Tb", "WP040178T", "WP030793Ta",
"WP040177T", "WP140476Tb", "WP040177T", "WP140476Tb", "WP040178T",
"WP030710Ta", "WP140575Ta", "WP130260T", "WP140564Ta", "WP120385Tb",
"WP080553Tc", "WP120320T", "WP030609Ta", "WP120320T", "WP140476Tb",
"WP030609Ta", "WP140575Ta", "WP140575Ta", "WP140564Ta", "WP140564Ta",
"WP030775Tb", "WP981038Tb", "WP180240Ta", "WP140575Ta", "WP080553Tc",
"WP140564Ta", "WP030775Tb", "WP180240Ta", "WP981038Tb", "WP120385Tb",
"WP030775Tb", "WP140564Ta", "WP981038Tb", "WP030609Ta", "WP981038Tb",
"WP981038Tb", "WP180240Ta", "WP120293Ta", "WP130260T", "WP120320T",
"WP130260T", "WP030683Ta", "WP120320T", "WP120293Ta", "WP130260T",
"WP180240Ta", "WP120320T", "WP030775Tb", "WP030775Tb", "WP180240Ta",
"WP140575Ta", "WP140476Tb", "WP080553Tc", "WP030710Ta", "WP030710Ta",
"WP030710Ta", "WP120320T", "WP140575Ta", "WP120385Tb", "WP030775Tb",
"WP080553Tc", "WP981038Tb", "WP030775Tb", "WP120385Tb", "WP120293Ta",
"WP120293Ta", "WP030683Ta", "WP030683Ta", "WP030683Ta"), x1 = c(1896640.698,
1894913.875, 1894913.875, 1894913.875, 1898423.744, 1897977.552,
1895573.351, 1895573.351, 1894410.616, 1884000.938, 1893616.553,
1898465.965, 1895271.31, 1895271.31, 1895271.31, 1894410.616,
1894410.616, 1899160.578, 1898465.965, 1890299.744, 1906890.973,
1895417.928, 1894083.58, 1898731.391, 1884030.965, 1884030.965,
1884000.938, 1894022.731, 1883999.938, 1891768.728, 1886805.544,
1895977.206, 1896641.699, 1899959.097, 1889368, 1889624.55, 1888029.348,
1898948.03, 1899529.078, 1899915.06, 1889388.418, 1889624.55,
1894092.36, 1899927.07, 1897384.378, 1889388.418, 1883986.921,
1888503.953, 1889634.556, 1889264.351, 1896007.954, 1898380.5,
1884027.955, 1899492.203, 1900158.351, 1883720.711, 1896611.299,
1899303.201, 1889895, 1883986.921, 1899908.862, 1899916.15, 1889357,
1891064.34, 1895573.351, 1896495.091, 1899160.578, 1898423.744,
1897977.552, 1883999.938, 1889592.342, 1896352.598, 1899938.084,
1898542.668, 1889264.351, 1899924.074, 1894878, 1893939.51, 1896548.228,
1899625.914, 1899871.286, 1899965.508), y1 = c(5584219.537, 5577674.523,
5577674.523, 5577674.523, 5580655.371, 5580035.831, 5577577.59,
5577577.59, 5577232.113, 5576264.299, 5579767.708, 5579716.704,
5576724.933, 5576724.933, 5576724.933, 5577232.113, 5577232.113,
5579134.428, 5579716.704, 5572513.115, 5568457.895, 5583795.948,
5576273.384, 5587672.549, 5576228.283, 5576228.283, 5576264.299,
5580255.137, 5576264.299, 5572459.348, 5572145.22, 5584310.447,
5584216.535, 5567653.364, 5564346, 5564890.01, 5572055.392, 5585643.113,
5582842.198, 5567693.386, 5564756.889, 5564890.01, 5577416.176,
5567683.38, 5583557.233, 5564756.889, 5576304.322, 5564069.317,
5564890.012, 5564669.815, 5570344.641, 5570933.436, 5576266.306,
5571415.958, 5573380.472, 5576500.39, 5570774.037, 5570587.34,
5564942, 5576304.322, 5567687.778, 5567676.036, 5564340, 5572010.921,
5577577.59, 5586371.051, 5579134.428, 5580655.371, 5580035.831,
5576264.299, 5572174.76, 5579074.805, 5567635.348, 5585936.231,
5564669.815, 5567641.35, 5578463, 5570575.456, 5570945.145, 5573723.626,
5572550.829, 5571596.163), well_id = c("Well 1518", "Well 1880",
"Well 1880", "Well 1880", "Well 2246", "Well 2277", "Well 1452",
"Well 1452", "Well 2043", "Well 2747", "Well 2219", "Well 2278",
"Well 1881", "Well 1881", "Well 1881", "Well 2043", "Well 2043",
"Well 2242", "Well 2278", "Well 3774", "Well 2830", "Well 4122",
"Well 4830", "Well 4295", "Well 4411", "Well 4411", "Well 2747",
"Well 2933", "Well 3092", "Well 4882", "Well 3434", "Well 3870",
"Well 4593", "Well 5617", "Well 6715", "Well 6720", "Well 4489",
"Well 4672", "Well 5167", "Well 15108", "Well 15458", "Well 6720",
"Well 4994", "Well 15109", "Well 5497", "Well 15458", "SW 214",
"Well 6716", "Well 6721", "Well 6723", "SW 754", "SW 854", "SW 223",
"SW 928", "SW 962", "Well 16053", "SW 787", "SW 921", "Well 16813",
"SW 213", "Well 16892", "Well 16893", "Well 16930", "Well 1402",
"Well 1452", "Well 2160", "Well 2242", "Well 2246", "Well 2277",
"Well 3092", "Well 3722", "Well 4764", "Well 5676", "Well 5723",
"Well 6723", "Well 15107", "Well 16817", "SW 661", "SW 782",
"SW 941", "SW 950", "SW 954")), class = "data.frame", row.names = c(NA,
-82L))
Upvotes: 1
Views: 783
Reputation: 5499
I think you're looking for a spatial join, rather than a cbind
/ left_join
.
library(tidyverse)
library(sf)
# Make df & df1 sf objects, and keep the coordinates as columns just in case.
df <- df %>% st_as_sf(coords = c("x", "y"), remove = FALSE) %>%
st_set_crs(2193)
df1 <- df1 %>% st_as_sf(coords = c("x1", "y1"), remove = FALSE) %>%
st_set_crs(2193)
# Join df with df1, based on the nearest feature:
df_near <- st_join(df, df1, join = st_nearest_feature)
df_near
Simple feature collection with 97 features and 8 fields
Geometry type: POINT
Dimension: XY
Bounding box: xmin: 1886806 ymin: 5570932 xmax: 1899529 ymax: 5587673
Projected CRS: NZGD2000 / New Zealand Transverse Mercator 2000
# A tibble: 97 × 9
UniqueIDs group_id x y geometry group_id1 x1 y1 well_id
* <chr> <chr> <dbl> <dbl> <POINT [m]> <chr> <dbl> <dbl> <chr>
1 GW427_WP980128T WP980128T 1895419. 5583800. (1895419 5583800) WP140564Ta 1895418. 5583796. Well 4122
2 GW428_WP980128T WP980128T 1895977. 5584310. (1895977 5584310) WP140564Ta 1895977. 5584310. Well 3870
3 GW429_WP980128T WP980128T 1896641. 5584220. (1896641 5584220) WP140564Ta 1896641. 5584220. Well 1518
Line 51 from df
seems to be the only one that isn't directly on top of any points from df1
. Here's what it looks like after the join:
df_near[51,]
Simple feature collection with 1 feature and 8 fields
Geometry type: POINT
Dimension: XY
Bounding box: xmin: 1889390 ymin: 5570932 xmax: 1889390 ymax: 5570932
Projected CRS: NZGD2000 / New Zealand Transverse Mercator 2000
# A tibble: 1 × 9
UniqueIDs group_id x y geometry group_id1 x1 y1 well_id
<chr> <chr> <dbl> <dbl> <POINT [m]> <chr> <dbl> <dbl> <chr>
1 GW505_WP100090Tb WP100090Tb 1889390. 5570932. (1889390 5570932) WP140575Ta 1889592. 5572175. Well 3722
And a plot showing the connection between nearest points. There's only one since all of df except one is on top of df1.
df
in red, df1
in black, the connection in green.
library(nngeo)
ggplot() +
geom_sf(data = df, color = 'red', size = 3) +
geom_sf(data = df1, color = 'black', alpha = .6) +
theme_void() +
geom_sf(data = st_connect(df, df1), color = 'green')
Upvotes: 2