Quinoba
Quinoba

Reputation: 41

Merge tibble with data from the same tibble

I have the data of some pollution monitoring stations. I am trying to create a new tibble, in such a way that if the data meets certain conditions it will be merged to the left side of the column elevation. The conditions to be fulfilled are that the water_bodies (water_body) are equal and that the elevation (elevation) is less than that of the observation to which it is going to be merged.

Trying to explain this in words sounds really silly, since english is not my first language, but here is an example of what I am looking for, which I think clears everything up.

This is the data:

tribble(
  ~id_emca,       ~water_body,      ~id_estmun,   ~dbo,    ~elevation,
  "DLAGU20",   "RIO CHICALOTE_16",     1005,      364.92,    1865,
  "DLAGU20",   "RIO CHICALOTE_17",     1005,      485.91,    1865,
  "DLAGU20",   "RIO CHICALOTE_18",     1005,      270.32,    1865,
  "DLBAJ155",  "RIO MULEGE_12",        3003,      20.3,      215,
  "DLBAJ155",  "RIO MULEGE_13",        3003,      14.005,    215,
  "DLBAJ157",  "RIO CHICALOTE_17",     3007,      315.34,    1677,
  "DLBAJ157",  "RIO CHICALOTE_18",     3007,      430.21,    1677,
  "DLBAJ159",  "RIO MULEGE_13",        3010,      17.89,     477,
  "DLBAJ159",  "RIO MULEGE_14",        3010,      35.98,     477,
  "DLCAM80",   "RIO CHICALOTE_18",     4012,      157.66,    1003,
  "DLCAM85",   "RIO CHICALOTE_18",     4009,      323.02,    573,
  "DLCAM122",  "RIO MULEGE_12",        4001,      37.81,     300
)


id_emca  water_body       id_estmun   dbo elevation
   <chr>    <chr>                <dbl> <dbl>     <dbl>
 1 DLAGU20  RIO CHICALOTE_16      1005 365.       1865
 2 DLAGU20  RIO CHICALOTE_17      1005 486.       1865
 3 DLAGU20  RIO CHICALOTE_18      1005 270.       1865
 4 DLBAJ155 RIO MULEGE_12         3003  20.3       215
 5 DLBAJ155 RIO MULEGE_13         3003  14.0       215
 6 DLBAJ157 RIO CHICALOTE_17      3007 315.       1677
 7 DLBAJ157 RIO CHICALOTE_18      3007 430.       1677
 8 DLBAJ159 RIO MULEGE_13         3010  17.9       477
 9 DLBAJ159 RIO MULEGE_14         3010  36.0       477
10 DLCAM80  RIO CHICALOTE_18      4012 158.       1003
11 DLCAM85  RIO CHICALOTE_18      4009 323.        573
12 DLCAM122 RIO MULEGE_12         4001  37.8       300

The new tibble should look like this:

enter image description here

Upvotes: 0

Views: 86

Answers (2)

Waldi
Waldi

Reputation: 41220

You could make a non-equi join with data.table:

df <- tibble::tribble(
  ~id_emca,       ~water_body,      ~id_estmun,   ~dbo,    ~elevation,
  "DLAGU20",   "RIO CHICALOTE_16",     1005,      364.92,    1865,
  "DLAGU20",   "RIO CHICALOTE_17",     1005,      485.91,    1865,
  "DLAGU20",   "RIO CHICALOTE_18",     1005,      270.32,    1865,
  "DLBAJ155",  "RIO MULEGE_12",        3003,      20.3,      215,
  "DLBAJ155",  "RIO MULEGE_13",        3003,      14.005,    215,
  "DLBAJ157",  "RIO CHICALOTE_17",     3007,      315.34,    1677,
  "DLBAJ157",  "RIO CHICALOTE_18",     3007,      430.21,    1677,
  "DLBAJ159",  "RIO MULEGE_13",        3010,      17.89,     477,
  "DLBAJ159",  "RIO MULEGE_14",        3010,      35.98,     477,
  "DLCAM80",   "RIO CHICALOTE_18",     4012,      157.66,    1003,
  "DLCAM85",   "RIO CHICALOTE_18",     4009,      323.02,    573,
  "DLCAM122",  "RIO MULEGE_12",        4001,      37.81,     300
)

library(data.table)
setDT(df)

df[,elevation_join := elevation]

result <- df[df, on = .(water_body = water_body, elevation_join < elevation_join)]
result[,.(id_emca = i.id_emca,
          water_body,
          id_estmun = i.id_estmun,
          dbo = i.dbo,
          elevation = i.elevation,
          id_emca_b = id_emca,
          id_estmun_b = id_estmun,
          dbo_b = dbo,
          elevation_b = elevation)]

     id_emca       water_body id_estmun     dbo elevation id_emca_b     water_body_b id_estmun_b   dbo_b elevation_b
 1:  DLAGU20 RIO CHICALOTE_16      1005 364.920      1865      <NA>             <NA>          NA      NA          NA
 2:  DLAGU20 RIO CHICALOTE_17      1005 485.910      1865  DLBAJ157 RIO CHICALOTE_17        3007 315.340        1677
 3:  DLAGU20 RIO CHICALOTE_18      1005 270.320      1865  DLBAJ157 RIO CHICALOTE_18        3007 430.210        1677
 4:  DLAGU20 RIO CHICALOTE_18      1005 270.320      1865   DLCAM80 RIO CHICALOTE_18        4012 157.660        1003
 5:  DLAGU20 RIO CHICALOTE_18      1005 270.320      1865   DLCAM85 RIO CHICALOTE_18        4009 323.020         573
 6: DLBAJ155    RIO MULEGE_12      3003  20.300       215      <NA>             <NA>          NA      NA          NA
 7: DLBAJ155    RIO MULEGE_13      3003  14.005       215      <NA>             <NA>          NA      NA          NA
 8: DLBAJ157 RIO CHICALOTE_17      3007 315.340      1677      <NA>             <NA>          NA      NA          NA
 9: DLBAJ157 RIO CHICALOTE_18      3007 430.210      1677   DLCAM80 RIO CHICALOTE_18        4012 157.660        1003
10: DLBAJ157 RIO CHICALOTE_18      3007 430.210      1677   DLCAM85 RIO CHICALOTE_18        4009 323.020         573
11: DLBAJ159    RIO MULEGE_13      3010  17.890       477  DLBAJ155    RIO MULEGE_13        3003  14.005         215
12: DLBAJ159    RIO MULEGE_14      3010  35.980       477      <NA>             <NA>          NA      NA          NA
13:  DLCAM80 RIO CHICALOTE_18      4012 157.660      1003   DLCAM85 RIO CHICALOTE_18        4009 323.020         573
14:  DLCAM85 RIO CHICALOTE_18      4009 323.020       573      <NA>             <NA>          NA      NA          NA
15: DLCAM122    RIO MULEGE_12      4001  37.810       300  DLBAJ155    RIO MULEGE_12        3003  20.300         215

Upvotes: 0

Desmond
Desmond

Reputation: 1137

library(tidyverse)

df <- tribble(
  ~id_emca,       ~water_body,      ~id_estmun,   ~dbo,    ~elevation,
  "DLAGU20",   "RIO CHICALOTE_16",     1005,      364.92,    1865,
  "DLAGU20",   "RIO CHICALOTE_17",     1005,      485.91,    1865,
  "DLAGU20",   "RIO CHICALOTE_18",     1005,      270.32,    1865,
  "DLBAJ155",  "RIO MULEGE_12",        3003,      20.3,      215,
  "DLBAJ155",  "RIO MULEGE_13",        3003,      14.005,    215,
  "DLBAJ157",  "RIO CHICALOTE_17",     3007,      315.34,    1677,
  "DLBAJ157",  "RIO CHICALOTE_18",     3007,      430.21,    1677,
  "DLBAJ159",  "RIO MULEGE_13",        3010,      17.89,     477,
  "DLBAJ159",  "RIO MULEGE_14",        3010,      35.98,     477,
  "DLCAM80",   "RIO CHICALOTE_18",     4012,      157.66,    1003,
  "DLCAM85",   "RIO CHICALOTE_18",     4009,      323.02,    573,
  "DLCAM122",  "RIO MULEGE_12",        4001,      37.81,     300
)

temp <- df %>% 
  group_by(water_body) %>% 
  filter(n() > 1)

df %>% 
  arrange(water_body) %>% 
  left_join(temp, 
            by = "water_body",
            keep = TRUE) %>% 
  filter(elevation.x > elevation.y | is.na(elevation.y))

Output:

# A tibble: 11 x 10
   id_emca.x water_body.x     id_estmun.x dbo.x elevation.x id_emca.y water_body.y     id_estmun.y dbo.y elevation.y
   <chr>     <chr>                  <dbl> <dbl>       <dbl> <chr>     <chr>                  <dbl> <dbl>       <dbl>
 1 DLAGU20   RIO CHICALOTE_16        1005 365.         1865 NA        NA                        NA  NA            NA
 2 DLAGU20   RIO CHICALOTE_17        1005 486.         1865 DLBAJ157  RIO CHICALOTE_17        3007 315.         1677
 3 DLAGU20   RIO CHICALOTE_18        1005 270.         1865 DLBAJ157  RIO CHICALOTE_18        3007 430.         1677
 4 DLAGU20   RIO CHICALOTE_18        1005 270.         1865 DLCAM80   RIO CHICALOTE_18        4012 158.         1003
 5 DLAGU20   RIO CHICALOTE_18        1005 270.         1865 DLCAM85   RIO CHICALOTE_18        4009 323.          573
 6 DLBAJ157  RIO CHICALOTE_18        3007 430.         1677 DLCAM80   RIO CHICALOTE_18        4012 158.         1003
 7 DLBAJ157  RIO CHICALOTE_18        3007 430.         1677 DLCAM85   RIO CHICALOTE_18        4009 323.          573
 8 DLCAM80   RIO CHICALOTE_18        4012 158.         1003 DLCAM85   RIO CHICALOTE_18        4009 323.          573
 9 DLCAM122  RIO MULEGE_12           4001  37.8         300 DLBAJ155  RIO MULEGE_12           3003  20.3         215
10 DLBAJ159  RIO MULEGE_13           3010  17.9         477 DLBAJ155  RIO MULEGE_13           3003  14.0         215
11 DLBAJ159  RIO MULEGE_14           3010  36.0         477 NA        NA                        NA  NA            NA

Upvotes: 2

Related Questions