Ash S
Ash S

Reputation: 119

R left join returning null values

I'd like to merge/full join two data frames on mrn=mrn_G and date difference <= 30 . When I try my code below, I get null/NA values when merging them. Any idea on why this is and how I can properly merge/join them? I have attached the data structures used.

# DATA:
acutedm11 <- structure(list(V1 = 1:29, mdm_link_id = c("sfdiuwe9889w4thehugsdgdf1", 
                                                  "sfdiuwe9889w4thehugsdgdf2", "sfdiuwe9889w4thehugsdgdf3", "sfdiuwe9889w4thehugsdgdf4", 
                                                  "sfdiuwe9889w4thehugsdgdf5", "sfdiuwe9889w4thehugsdgdf6", "sfdiuwe9889w4thehugsdgdf7", 
                                                  "sfdiuwe9889w4thehugsdgdf8", "sfdiuwe9889w4thehugsdgdf9", "sfdiuwe9889w4thehugsdgdf10", 
                                                  "sfdiuwe9889w4thehugsdgdf11", "sfdiuwe9889w4thehugsdgdf12", "sfdiuwe9889w4thehugsdgdf13", 
                                                  "sfdiuwe9889w4thehugsdgdf14", "sfdiuwe9889w4thehugsdgdf15", "sfdiuwe9889w4thehugsdgdf16", 
                                                  "sfdiuwe9889w4thehugsdgdf17", "sfdiuwe9889w4thehugsdgdf18", "sfdiuwe9889w4thehugsdgdf19", 
                                                  "sfdiuwe9889w4thehugsdgdf20", "sfdiuwe9889w4thehugsdgdf21", "sfdiuwe9889w4thehugsdgdf22", 
                                                  "sfdiuwe9889w4thehugsdgdf23", "sfdiuwe9889w4thehugsdgdf24", "sfdiuwe9889w4thehugsdgdf25", 
                                                  "sfdiuwe9889w4thehugsdgdf26", "sfdiuwe9889w4thehugsdgdf27", "sfdiuwe9889w4thehugsdgdf28", 
                                                  "sfdiuwe9889w4thehugsdgdf29"), time0 = c("1/2/2016 22:10", "1/2/2021 0:00", 
                                                                                           "1/1/2021 1:31", "1/5/2021 3:34", "5/9/2021 5:33", "5/8/2021 3:39", 
                                                                                           "12/12/2021 2:30", "12/11/2021 9:21", "1/2/2016 22:10", "1/2/2021 0:00", 
                                                                                           "1/1/2021 1:31", "1/5/2021 3:34", "5/9/2021 5:33", "5/8/2021 3:39", 
                                                                                           "12/12/2021 2:30", "12/11/2021 9:21", "1/2/2016 22:10", "1/2/2021 0:00", 
                                                                                           "1/1/2021 1:31", "1/5/2021 3:34", "1/2/2016 22:10", "1/2/2021 0:00", 
                                                                                           "1/1/2021 1:31", "1/5/2021 3:34", "5/9/2021 5:33", "5/8/2021 3:39", 
                                                                                           "12/12/2021 2:30", "12/11/2021 9:21", "1/2/2016 22:10"), hr_min_preadmit_vital = c(NA, 
                                                                                                                                                                              NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
                                                                                                                                                                              NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA), rr_min_preadmit_vital = c(NA, 
                                                                                                                                                                                                                                                         NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
                                                                                                                                                                                                                                                         NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA), sbp_min_preadmit_vital = c(NA, 
                                                                                                                                                                                                                                                                                                                                     NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
                                                                                                                                                                                                                                                                                                                                     NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA), `time 00` = c("1/2/2016 22:10", 
                                                                                                                                                                                                                                                                                                                                                                                                    "1/2/2021 0:00", "1/1/2021 1:31", "1/5/2021 3:34", "5/9/2021 5:33", 
                                                                                                                                                                                                                                                                                                                                                                                                    "5/8/2021 3:39", "12/12/2021 2:30", "12/11/2021 9:21", "1/2/2016 22:10", 
                                                                                                                                                                                                                                                                                                                                                                                                    "1/2/2021 0:00", "1/1/2021 1:31", "1/5/2021 3:34", "5/9/2021 5:33", 
                                                                                                                                                                                                                                                                                                                                                                                                    "5/8/2021 3:39", "12/12/2021 2:30", "12/11/2021 9:21", "1/2/2016 22:10", 
                                                                                                                                                                                                                                                                                                                                                                                                    "1/2/2021 0:00", "1/1/2021 1:31", "1/5/2021 3:34", "1/2/2016 22:10", 
                                                                                                                                                                                                                                                                                                                                                                                                    "1/2/2021 0:00", "1/1/2021 1:31", "1/5/2021 3:34", "5/9/2021 5:33", 
                                                                                                                                                                                                                                                                                                                                                                                                    "5/8/2021 3:39", "12/12/2021 2:30", "12/11/2021 9:21", "1/2/2016 22:10"
                                                                                                                                                                                                                                                                                                                                     ), mrn = 100:128, Date = structure(c(1451692800, 1609545600, 
                                                                                                                                                                                                                                                                                                                                                                          1609459200, 1609804800, 1620518400, 1620432000, 1639267200, 1639180800, 
                                                                                                                                                                                                                                                                                                                                                                          1451692800, 1609545600, 1609459200, 1609804800, 1620518400, 1620432000, 
                                                                                                                                                                                                                                                                                                                                                                          1639267200, 1639180800, 1451692800, 1609545600, 1609459200, 1609804800, 
                                                                                                                                                                                                                                                                                                                                                                          1451692800, 1609545600, 1609459200, 1609804800, 1620518400, 1620432000, 
                                                                                                                                                                                                                                                                                                                                                                          1639267200, 1639180800, 1451692800), class = c("POSIXct", "POSIXt"
                                                                                                                                                                                                                                                                                                                                                                          ), tzone = "UTC"), Date_m30 = structure(c(1449100800, 1606953600, 
                                                                                                                                                                                                                                                                                                                                                                                                                    1606867200, 1607212800, 1617926400, 1617840000, 1636675200, 1636588800, 
                                                                                                                                                                                                                                                                                                                                                                                                                    1449100800, 1606953600, 1606867200, 1607212800, 1617926400, 1617840000, 
                                                                                                                                                                                                                                                                                                                                                                                                                    1636675200, 1636588800, 1449100800, 1606953600, 1606867200, 1607212800, 
                                                                                                                                                                                                                                                                                                                                                                                                                    1449100800, 1606953600, 1606867200, 1607212800, 1617926400, 1617840000, 
                                                                                                                                                                                                                                                                                                                                                                                                                    1636675200, 1636588800, 1449100800), class = c("POSIXct", "POSIXt"
                                                                                                                                                                                                                                                                                                                                                                                                                    ), tzone = "UTC"), Date_p30 = structure(c(1454284800, 1612137600, 
                                                                                                                                                                                                                                                                                                                                                                                                                                                              1612051200, 1612396800, 1623110400, 1623024000, 1641859200, 1641772800, 
                                                                                                                                                                                                                                                                                                                                                                                                                                                              1454284800, 1612137600, 1612051200, 1612396800, 1623110400, 1623024000, 
                                                                                                                                                                                                                                                                                                                                                                                                                                                              1641859200, 1641772800, 1454284800, 1612137600, 1612051200, 1612396800, 
                                                                                                                                                                                                                                                                                                                                                                                                                                                              1454284800, 1612137600, 1612051200, 1612396800, 1623110400, 1623024000, 
                                                                                                                                                                                                                                                                                                                                                                                                                                                              1641859200, 1641772800, 1454284800), class = c("POSIXct", "POSIXt"
                                                                                                                                                                                                                                                                                                                                                                                                                                                              ), tzone = "UTC")), row.names = c(NA, -29L), class = c("tbl_df", 
                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                        "tbl", "data.frame"))
      
    gwlfullflattened22 <- structure(list(V1_G = 1:20, mrn_G = 100:119, Full.name_G = c("Aintzane Eilert", 
                                                                     "Dervila Muriel", "Hermes Ingólfr", "Yordana Hadley", "Talaat Archembald", 
                                                                     "Erato Hozan", "Abram Eli", "Drahoslava Gottfrid", "Itxaro Csenge", 
                                                                     "Isokrates Linas", "Yejide Calixto", "Bohuslav Fedlimid", "Siva Jerneja", 
                                                                     "Mae Albie", "Rodolfo Slavomír", "Neptune Mahesh", "Madhavi Luka", 
                                                                     "Lexia Lành", "Marnie Urien", "Hovsep Tase"), date_of_birth_G = c("1/1/1990", 
                                                                                                                                       "1/1/1991", "1/1/1992", "1/1/1993", "1/1/1994", "1/1/1995", "1/1/1996", 
                                                                                                                                       "1/1/1997", "1/1/1998", "1/1/1999", "1/1/2000", "1/1/2001", "1/1/2002", 
                                                                                                                                       "1/1/2003", "1/1/2004", "1/1/2005", "1/1/2006", "1/1/2007", "1/1/2008", 
                                                                                                                                       "1/1/2009"), EncounterDate_G = structure(c(16805, 18631, 18648, 
                                                                                                                                                                                  18772, 18766, 18764, 18963, 18962, 16805, 18631, 18648, 18772, 
                                                                                                                                                                                  18766, 18764, 18963, 18962, 16805, 18631, 18648, 18772), class = "Date")), row.names = c(NA, 
                                                                                                                                                                                                                                                                           -20L), class = c("tbl_df", 
                                                                                                                                                                                                                                                                                                                                               "tbl", "data.frame"))

CODE:

library(sqldf)

acutedm3 <- sqldf::sqldf("
    select acutedm11.*, gwlfullflattened2.*
    from acutedm11
       left join gwlfullflattened2 on acutedm11.mrn = gwlfullflattened2.mrn_G
        and gwlfullflattened2.EncounterDate_G between acutedm11.Date_m30 and acutedm11.Date_p30") %>%
  select(-Date_m30, -Date_p30)

Upvotes: 0

Views: 396

Answers (1)

r2evans
r2evans

Reputation: 160447

Welcome back, Ash S! You are trying to compare (using SQL's between) Date-class with POSIXt-class objects. Unfortunately, they are significantly different numerically, so you need to convert one to the other.

(BTW, your sample data uses gwlfullflattened22 but your code uses ...ed2; I'll use the latter.)

The problem:

gwlfullflattened2$EncounterDate_G[1]
# [1] "2016-01-05"
as.numeric(gwlfullflattened2$EncounterDate_G[1])
# [1] 16805

acutedm11$Date_p30[1]
# [1] "2016-02-01 UTC"
as.numeric(acutedm11$Date_p30[1])
# [1] 1454284800

The fix:

gwlfullflattened2$EncounterDate_G <- as.POSIXct(gwlfullflattened2$EncounterDate_G)

This now works (unchanged):

sqldf::sqldf("
  select acutedm11.*, gwlfullflattened2.*
  from acutedm11
     left join gwlfullflattened2 on acutedm11.mrn = gwlfullflattened2.mrn_G
      and gwlfullflattened2.EncounterDate_G between acutedm11.Date_m30 and acutedm11.Date_p30") %>%
  select(-Date_m30, -Date_p30)
#    V1                mdm_link_id           time0 hr_min_preadmit_vital rr_min_preadmit_vital sbp_min_preadmit_vital         time 00 mrn                Date V1_G mrn_G         Full.name_G date_of_birth_G     EncounterDate_G
# 1   1  sfdiuwe9889w4thehugsdgdf1  1/2/2016 22:10                    NA                    NA                     NA  1/2/2016 22:10 100 2016-01-01 19:00:00    1   100     Aintzane Eilert        1/1/1990 2016-01-04 19:00:00
# 2   2  sfdiuwe9889w4thehugsdgdf2   1/2/2021 0:00                    NA                    NA                     NA   1/2/2021 0:00 101 2021-01-01 19:00:00    2   101      Dervila Muriel        1/1/1991 2021-01-03 19:00:00
# 3   3  sfdiuwe9889w4thehugsdgdf3   1/1/2021 1:31                    NA                    NA                     NA   1/1/2021 1:31 102 2020-12-31 19:00:00    3   102      Hermes Ingólfr        1/1/1992 2021-01-20 19:00:00
# 4   4  sfdiuwe9889w4thehugsdgdf4   1/5/2021 3:34                    NA                    NA                     NA   1/5/2021 3:34 103 2021-01-04 19:00:00   NA    NA                <NA>            <NA>                <NA>
# 5   5  sfdiuwe9889w4thehugsdgdf5   5/9/2021 5:33                    NA                    NA                     NA   5/9/2021 5:33 104 2021-05-08 20:00:00    5   104   Talaat Archembald        1/1/1994 2021-05-18 20:00:00
# 6   6  sfdiuwe9889w4thehugsdgdf6   5/8/2021 3:39                    NA                    NA                     NA   5/8/2021 3:39 105 2021-05-07 20:00:00    6   105         Erato Hozan        1/1/1995 2021-05-16 20:00:00
# 7   7  sfdiuwe9889w4thehugsdgdf7 12/12/2021 2:30                    NA                    NA                     NA 12/12/2021 2:30 106 2021-12-11 19:00:00    7   106           Abram Eli        1/1/1996 2021-12-01 19:00:00
# 8   8  sfdiuwe9889w4thehugsdgdf8 12/11/2021 9:21                    NA                    NA                     NA 12/11/2021 9:21 107 2021-12-10 19:00:00    8   107 Drahoslava Gottfrid        1/1/1997 2021-11-30 19:00:00
# 9   9  sfdiuwe9889w4thehugsdgdf9  1/2/2016 22:10                    NA                    NA                     NA  1/2/2016 22:10 108 2016-01-01 19:00:00    9   108       Itxaro Csenge        1/1/1998 2016-01-04 19:00:00
# 10 10 sfdiuwe9889w4thehugsdgdf10   1/2/2021 0:00                    NA                    NA                     NA   1/2/2021 0:00 109 2021-01-01 19:00:00   10   109     Isokrates Linas        1/1/1999 2021-01-03 19:00:00
# 11 11 sfdiuwe9889w4thehugsdgdf11   1/1/2021 1:31                    NA                    NA                     NA   1/1/2021 1:31 110 2020-12-31 19:00:00   11   110      Yejide Calixto        1/1/2000 2021-01-20 19:00:00
# 12 12 sfdiuwe9889w4thehugsdgdf12   1/5/2021 3:34                    NA                    NA                     NA   1/5/2021 3:34 111 2021-01-04 19:00:00   NA    NA                <NA>            <NA>                <NA>
# 13 13 sfdiuwe9889w4thehugsdgdf13   5/9/2021 5:33                    NA                    NA                     NA   5/9/2021 5:33 112 2021-05-08 20:00:00   13   112        Siva Jerneja        1/1/2002 2021-05-18 20:00:00
# 14 14 sfdiuwe9889w4thehugsdgdf14   5/8/2021 3:39                    NA                    NA                     NA   5/8/2021 3:39 113 2021-05-07 20:00:00   14   113           Mae Albie        1/1/2003 2021-05-16 20:00:00
# 15 15 sfdiuwe9889w4thehugsdgdf15 12/12/2021 2:30                    NA                    NA                     NA 12/12/2021 2:30 114 2021-12-11 19:00:00   15   114    Rodolfo Slavomír        1/1/2004 2021-12-01 19:00:00
# 16 16 sfdiuwe9889w4thehugsdgdf16 12/11/2021 9:21                    NA                    NA                     NA 12/11/2021 9:21 115 2021-12-10 19:00:00   16   115      Neptune Mahesh        1/1/2005 2021-11-30 19:00:00
# 17 17 sfdiuwe9889w4thehugsdgdf17  1/2/2016 22:10                    NA                    NA                     NA  1/2/2016 22:10 116 2016-01-01 19:00:00   17   116        Madhavi Luka        1/1/2006 2016-01-04 19:00:00
# 18 18 sfdiuwe9889w4thehugsdgdf18   1/2/2021 0:00                    NA                    NA                     NA   1/2/2021 0:00 117 2021-01-01 19:00:00   18   117          Lexia Lành        1/1/2007 2021-01-03 19:00:00
# 19 19 sfdiuwe9889w4thehugsdgdf19   1/1/2021 1:31                    NA                    NA                     NA   1/1/2021 1:31 118 2020-12-31 19:00:00   19   118        Marnie Urien        1/1/2008 2021-01-20 19:00:00
# 20 20 sfdiuwe9889w4thehugsdgdf20   1/5/2021 3:34                    NA                    NA                     NA   1/5/2021 3:34 119 2021-01-04 19:00:00   NA    NA                <NA>            <NA>                <NA>
# 21 21 sfdiuwe9889w4thehugsdgdf21  1/2/2016 22:10                    NA                    NA                     NA  1/2/2016 22:10 120 2016-01-01 19:00:00   NA    NA                <NA>            <NA>                <NA>
# 22 22 sfdiuwe9889w4thehugsdgdf22   1/2/2021 0:00                    NA                    NA                     NA   1/2/2021 0:00 121 2021-01-01 19:00:00   NA    NA                <NA>            <NA>                <NA>
# 23 23 sfdiuwe9889w4thehugsdgdf23   1/1/2021 1:31                    NA                    NA                     NA   1/1/2021 1:31 122 2020-12-31 19:00:00   NA    NA                <NA>            <NA>                <NA>
# 24 24 sfdiuwe9889w4thehugsdgdf24   1/5/2021 3:34                    NA                    NA                     NA   1/5/2021 3:34 123 2021-01-04 19:00:00   NA    NA                <NA>            <NA>                <NA>
# 25 25 sfdiuwe9889w4thehugsdgdf25   5/9/2021 5:33                    NA                    NA                     NA   5/9/2021 5:33 124 2021-05-08 20:00:00   NA    NA                <NA>            <NA>                <NA>
# 26 26 sfdiuwe9889w4thehugsdgdf26   5/8/2021 3:39                    NA                    NA                     NA   5/8/2021 3:39 125 2021-05-07 20:00:00   NA    NA                <NA>            <NA>                <NA>
# 27 27 sfdiuwe9889w4thehugsdgdf27 12/12/2021 2:30                    NA                    NA                     NA 12/12/2021 2:30 126 2021-12-11 19:00:00   NA    NA                <NA>            <NA>                <NA>
# 28 28 sfdiuwe9889w4thehugsdgdf28 12/11/2021 9:21                    NA                    NA                     NA 12/11/2021 9:21 127 2021-12-10 19:00:00   NA    NA                <NA>            <NA>                <NA>
# 29 29 sfdiuwe9889w4thehugsdgdf29  1/2/2016 22:10                    NA                    NA                     NA  1/2/2016 22:10 128 2016-01-01 19:00:00   NA    NA                <NA>            <NA>                <NA>

Upvotes: 1

Related Questions