Reputation: 41
I have the data of some pollution monitoring stations. I am trying to create a new tibble, in such a way that if the data meets certain conditions it will be merged to the left side of the column elevation
. The conditions to be fulfilled are that the water_bodies (water_body
) are equal and that the elevation (elevation
) is less than that of the observation to which it is going to be merged.
Trying to explain this in words sounds really silly, since english is not my first language, but here is an example of what I am looking for, which I think clears everything up.
This is the data:
tribble(
~id_emca, ~water_body, ~id_estmun, ~dbo, ~elevation,
"DLAGU20", "RIO CHICALOTE_16", 1005, 364.92, 1865,
"DLAGU20", "RIO CHICALOTE_17", 1005, 485.91, 1865,
"DLAGU20", "RIO CHICALOTE_18", 1005, 270.32, 1865,
"DLBAJ155", "RIO MULEGE_12", 3003, 20.3, 215,
"DLBAJ155", "RIO MULEGE_13", 3003, 14.005, 215,
"DLBAJ157", "RIO CHICALOTE_17", 3007, 315.34, 1677,
"DLBAJ157", "RIO CHICALOTE_18", 3007, 430.21, 1677,
"DLBAJ159", "RIO MULEGE_13", 3010, 17.89, 477,
"DLBAJ159", "RIO MULEGE_14", 3010, 35.98, 477,
"DLCAM80", "RIO CHICALOTE_18", 4012, 157.66, 1003,
"DLCAM85", "RIO CHICALOTE_18", 4009, 323.02, 573,
"DLCAM122", "RIO MULEGE_12", 4001, 37.81, 300
)
id_emca water_body id_estmun dbo elevation
<chr> <chr> <dbl> <dbl> <dbl>
1 DLAGU20 RIO CHICALOTE_16 1005 365. 1865
2 DLAGU20 RIO CHICALOTE_17 1005 486. 1865
3 DLAGU20 RIO CHICALOTE_18 1005 270. 1865
4 DLBAJ155 RIO MULEGE_12 3003 20.3 215
5 DLBAJ155 RIO MULEGE_13 3003 14.0 215
6 DLBAJ157 RIO CHICALOTE_17 3007 315. 1677
7 DLBAJ157 RIO CHICALOTE_18 3007 430. 1677
8 DLBAJ159 RIO MULEGE_13 3010 17.9 477
9 DLBAJ159 RIO MULEGE_14 3010 36.0 477
10 DLCAM80 RIO CHICALOTE_18 4012 158. 1003
11 DLCAM85 RIO CHICALOTE_18 4009 323. 573
12 DLCAM122 RIO MULEGE_12 4001 37.8 300
The new tibble should look like this:
Upvotes: 0
Views: 86
Reputation: 41220
You could make a non-equi join with data.table
:
df <- tibble::tribble(
~id_emca, ~water_body, ~id_estmun, ~dbo, ~elevation,
"DLAGU20", "RIO CHICALOTE_16", 1005, 364.92, 1865,
"DLAGU20", "RIO CHICALOTE_17", 1005, 485.91, 1865,
"DLAGU20", "RIO CHICALOTE_18", 1005, 270.32, 1865,
"DLBAJ155", "RIO MULEGE_12", 3003, 20.3, 215,
"DLBAJ155", "RIO MULEGE_13", 3003, 14.005, 215,
"DLBAJ157", "RIO CHICALOTE_17", 3007, 315.34, 1677,
"DLBAJ157", "RIO CHICALOTE_18", 3007, 430.21, 1677,
"DLBAJ159", "RIO MULEGE_13", 3010, 17.89, 477,
"DLBAJ159", "RIO MULEGE_14", 3010, 35.98, 477,
"DLCAM80", "RIO CHICALOTE_18", 4012, 157.66, 1003,
"DLCAM85", "RIO CHICALOTE_18", 4009, 323.02, 573,
"DLCAM122", "RIO MULEGE_12", 4001, 37.81, 300
)
library(data.table)
setDT(df)
df[,elevation_join := elevation]
result <- df[df, on = .(water_body = water_body, elevation_join < elevation_join)]
result[,.(id_emca = i.id_emca,
water_body,
id_estmun = i.id_estmun,
dbo = i.dbo,
elevation = i.elevation,
id_emca_b = id_emca,
id_estmun_b = id_estmun,
dbo_b = dbo,
elevation_b = elevation)]
id_emca water_body id_estmun dbo elevation id_emca_b water_body_b id_estmun_b dbo_b elevation_b
1: DLAGU20 RIO CHICALOTE_16 1005 364.920 1865 <NA> <NA> NA NA NA
2: DLAGU20 RIO CHICALOTE_17 1005 485.910 1865 DLBAJ157 RIO CHICALOTE_17 3007 315.340 1677
3: DLAGU20 RIO CHICALOTE_18 1005 270.320 1865 DLBAJ157 RIO CHICALOTE_18 3007 430.210 1677
4: DLAGU20 RIO CHICALOTE_18 1005 270.320 1865 DLCAM80 RIO CHICALOTE_18 4012 157.660 1003
5: DLAGU20 RIO CHICALOTE_18 1005 270.320 1865 DLCAM85 RIO CHICALOTE_18 4009 323.020 573
6: DLBAJ155 RIO MULEGE_12 3003 20.300 215 <NA> <NA> NA NA NA
7: DLBAJ155 RIO MULEGE_13 3003 14.005 215 <NA> <NA> NA NA NA
8: DLBAJ157 RIO CHICALOTE_17 3007 315.340 1677 <NA> <NA> NA NA NA
9: DLBAJ157 RIO CHICALOTE_18 3007 430.210 1677 DLCAM80 RIO CHICALOTE_18 4012 157.660 1003
10: DLBAJ157 RIO CHICALOTE_18 3007 430.210 1677 DLCAM85 RIO CHICALOTE_18 4009 323.020 573
11: DLBAJ159 RIO MULEGE_13 3010 17.890 477 DLBAJ155 RIO MULEGE_13 3003 14.005 215
12: DLBAJ159 RIO MULEGE_14 3010 35.980 477 <NA> <NA> NA NA NA
13: DLCAM80 RIO CHICALOTE_18 4012 157.660 1003 DLCAM85 RIO CHICALOTE_18 4009 323.020 573
14: DLCAM85 RIO CHICALOTE_18 4009 323.020 573 <NA> <NA> NA NA NA
15: DLCAM122 RIO MULEGE_12 4001 37.810 300 DLBAJ155 RIO MULEGE_12 3003 20.300 215
Upvotes: 0
Reputation: 1137
library(tidyverse)
df <- tribble(
~id_emca, ~water_body, ~id_estmun, ~dbo, ~elevation,
"DLAGU20", "RIO CHICALOTE_16", 1005, 364.92, 1865,
"DLAGU20", "RIO CHICALOTE_17", 1005, 485.91, 1865,
"DLAGU20", "RIO CHICALOTE_18", 1005, 270.32, 1865,
"DLBAJ155", "RIO MULEGE_12", 3003, 20.3, 215,
"DLBAJ155", "RIO MULEGE_13", 3003, 14.005, 215,
"DLBAJ157", "RIO CHICALOTE_17", 3007, 315.34, 1677,
"DLBAJ157", "RIO CHICALOTE_18", 3007, 430.21, 1677,
"DLBAJ159", "RIO MULEGE_13", 3010, 17.89, 477,
"DLBAJ159", "RIO MULEGE_14", 3010, 35.98, 477,
"DLCAM80", "RIO CHICALOTE_18", 4012, 157.66, 1003,
"DLCAM85", "RIO CHICALOTE_18", 4009, 323.02, 573,
"DLCAM122", "RIO MULEGE_12", 4001, 37.81, 300
)
temp <- df %>%
group_by(water_body) %>%
filter(n() > 1)
df %>%
arrange(water_body) %>%
left_join(temp,
by = "water_body",
keep = TRUE) %>%
filter(elevation.x > elevation.y | is.na(elevation.y))
Output:
# A tibble: 11 x 10
id_emca.x water_body.x id_estmun.x dbo.x elevation.x id_emca.y water_body.y id_estmun.y dbo.y elevation.y
<chr> <chr> <dbl> <dbl> <dbl> <chr> <chr> <dbl> <dbl> <dbl>
1 DLAGU20 RIO CHICALOTE_16 1005 365. 1865 NA NA NA NA NA
2 DLAGU20 RIO CHICALOTE_17 1005 486. 1865 DLBAJ157 RIO CHICALOTE_17 3007 315. 1677
3 DLAGU20 RIO CHICALOTE_18 1005 270. 1865 DLBAJ157 RIO CHICALOTE_18 3007 430. 1677
4 DLAGU20 RIO CHICALOTE_18 1005 270. 1865 DLCAM80 RIO CHICALOTE_18 4012 158. 1003
5 DLAGU20 RIO CHICALOTE_18 1005 270. 1865 DLCAM85 RIO CHICALOTE_18 4009 323. 573
6 DLBAJ157 RIO CHICALOTE_18 3007 430. 1677 DLCAM80 RIO CHICALOTE_18 4012 158. 1003
7 DLBAJ157 RIO CHICALOTE_18 3007 430. 1677 DLCAM85 RIO CHICALOTE_18 4009 323. 573
8 DLCAM80 RIO CHICALOTE_18 4012 158. 1003 DLCAM85 RIO CHICALOTE_18 4009 323. 573
9 DLCAM122 RIO MULEGE_12 4001 37.8 300 DLBAJ155 RIO MULEGE_12 3003 20.3 215
10 DLBAJ159 RIO MULEGE_13 3010 17.9 477 DLBAJ155 RIO MULEGE_13 3003 14.0 215
11 DLBAJ159 RIO MULEGE_14 3010 36.0 477 NA NA NA NA NA
Upvotes: 2