Reputation: 119
I'd like to merge/full join two data frames on mrn=mrn_G and date difference <= 30 . When I try my code below, I get null/NA values when merging them. Any idea on why this is and how I can properly merge/join them? I have attached the data structures used.
# DATA:
acutedm11 <- structure(list(V1 = 1:29, mdm_link_id = c("sfdiuwe9889w4thehugsdgdf1",
"sfdiuwe9889w4thehugsdgdf2", "sfdiuwe9889w4thehugsdgdf3", "sfdiuwe9889w4thehugsdgdf4",
"sfdiuwe9889w4thehugsdgdf5", "sfdiuwe9889w4thehugsdgdf6", "sfdiuwe9889w4thehugsdgdf7",
"sfdiuwe9889w4thehugsdgdf8", "sfdiuwe9889w4thehugsdgdf9", "sfdiuwe9889w4thehugsdgdf10",
"sfdiuwe9889w4thehugsdgdf11", "sfdiuwe9889w4thehugsdgdf12", "sfdiuwe9889w4thehugsdgdf13",
"sfdiuwe9889w4thehugsdgdf14", "sfdiuwe9889w4thehugsdgdf15", "sfdiuwe9889w4thehugsdgdf16",
"sfdiuwe9889w4thehugsdgdf17", "sfdiuwe9889w4thehugsdgdf18", "sfdiuwe9889w4thehugsdgdf19",
"sfdiuwe9889w4thehugsdgdf20", "sfdiuwe9889w4thehugsdgdf21", "sfdiuwe9889w4thehugsdgdf22",
"sfdiuwe9889w4thehugsdgdf23", "sfdiuwe9889w4thehugsdgdf24", "sfdiuwe9889w4thehugsdgdf25",
"sfdiuwe9889w4thehugsdgdf26", "sfdiuwe9889w4thehugsdgdf27", "sfdiuwe9889w4thehugsdgdf28",
"sfdiuwe9889w4thehugsdgdf29"), time0 = c("1/2/2016 22:10", "1/2/2021 0:00",
"1/1/2021 1:31", "1/5/2021 3:34", "5/9/2021 5:33", "5/8/2021 3:39",
"12/12/2021 2:30", "12/11/2021 9:21", "1/2/2016 22:10", "1/2/2021 0:00",
"1/1/2021 1:31", "1/5/2021 3:34", "5/9/2021 5:33", "5/8/2021 3:39",
"12/12/2021 2:30", "12/11/2021 9:21", "1/2/2016 22:10", "1/2/2021 0:00",
"1/1/2021 1:31", "1/5/2021 3:34", "1/2/2016 22:10", "1/2/2021 0:00",
"1/1/2021 1:31", "1/5/2021 3:34", "5/9/2021 5:33", "5/8/2021 3:39",
"12/12/2021 2:30", "12/11/2021 9:21", "1/2/2016 22:10"), hr_min_preadmit_vital = c(NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA), rr_min_preadmit_vital = c(NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA), sbp_min_preadmit_vital = c(NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA), `time 00` = c("1/2/2016 22:10",
"1/2/2021 0:00", "1/1/2021 1:31", "1/5/2021 3:34", "5/9/2021 5:33",
"5/8/2021 3:39", "12/12/2021 2:30", "12/11/2021 9:21", "1/2/2016 22:10",
"1/2/2021 0:00", "1/1/2021 1:31", "1/5/2021 3:34", "5/9/2021 5:33",
"5/8/2021 3:39", "12/12/2021 2:30", "12/11/2021 9:21", "1/2/2016 22:10",
"1/2/2021 0:00", "1/1/2021 1:31", "1/5/2021 3:34", "1/2/2016 22:10",
"1/2/2021 0:00", "1/1/2021 1:31", "1/5/2021 3:34", "5/9/2021 5:33",
"5/8/2021 3:39", "12/12/2021 2:30", "12/11/2021 9:21", "1/2/2016 22:10"
), mrn = 100:128, Date = structure(c(1451692800, 1609545600,
1609459200, 1609804800, 1620518400, 1620432000, 1639267200, 1639180800,
1451692800, 1609545600, 1609459200, 1609804800, 1620518400, 1620432000,
1639267200, 1639180800, 1451692800, 1609545600, 1609459200, 1609804800,
1451692800, 1609545600, 1609459200, 1609804800, 1620518400, 1620432000,
1639267200, 1639180800, 1451692800), class = c("POSIXct", "POSIXt"
), tzone = "UTC"), Date_m30 = structure(c(1449100800, 1606953600,
1606867200, 1607212800, 1617926400, 1617840000, 1636675200, 1636588800,
1449100800, 1606953600, 1606867200, 1607212800, 1617926400, 1617840000,
1636675200, 1636588800, 1449100800, 1606953600, 1606867200, 1607212800,
1449100800, 1606953600, 1606867200, 1607212800, 1617926400, 1617840000,
1636675200, 1636588800, 1449100800), class = c("POSIXct", "POSIXt"
), tzone = "UTC"), Date_p30 = structure(c(1454284800, 1612137600,
1612051200, 1612396800, 1623110400, 1623024000, 1641859200, 1641772800,
1454284800, 1612137600, 1612051200, 1612396800, 1623110400, 1623024000,
1641859200, 1641772800, 1454284800, 1612137600, 1612051200, 1612396800,
1454284800, 1612137600, 1612051200, 1612396800, 1623110400, 1623024000,
1641859200, 1641772800, 1454284800), class = c("POSIXct", "POSIXt"
), tzone = "UTC")), row.names = c(NA, -29L), class = c("tbl_df",
"tbl", "data.frame"))
gwlfullflattened22 <- structure(list(V1_G = 1:20, mrn_G = 100:119, Full.name_G = c("Aintzane Eilert",
"Dervila Muriel", "Hermes Ingólfr", "Yordana Hadley", "Talaat Archembald",
"Erato Hozan", "Abram Eli", "Drahoslava Gottfrid", "Itxaro Csenge",
"Isokrates Linas", "Yejide Calixto", "Bohuslav Fedlimid", "Siva Jerneja",
"Mae Albie", "Rodolfo Slavomír", "Neptune Mahesh", "Madhavi Luka",
"Lexia Lành", "Marnie Urien", "Hovsep Tase"), date_of_birth_G = c("1/1/1990",
"1/1/1991", "1/1/1992", "1/1/1993", "1/1/1994", "1/1/1995", "1/1/1996",
"1/1/1997", "1/1/1998", "1/1/1999", "1/1/2000", "1/1/2001", "1/1/2002",
"1/1/2003", "1/1/2004", "1/1/2005", "1/1/2006", "1/1/2007", "1/1/2008",
"1/1/2009"), EncounterDate_G = structure(c(16805, 18631, 18648,
18772, 18766, 18764, 18963, 18962, 16805, 18631, 18648, 18772,
18766, 18764, 18963, 18962, 16805, 18631, 18648, 18772), class = "Date")), row.names = c(NA,
-20L), class = c("tbl_df",
"tbl", "data.frame"))
CODE:
library(sqldf)
acutedm3 <- sqldf::sqldf("
select acutedm11.*, gwlfullflattened2.*
from acutedm11
left join gwlfullflattened2 on acutedm11.mrn = gwlfullflattened2.mrn_G
and gwlfullflattened2.EncounterDate_G between acutedm11.Date_m30 and acutedm11.Date_p30") %>%
select(-Date_m30, -Date_p30)
Upvotes: 0
Views: 396
Reputation: 160447
Welcome back, Ash S! You are trying to compare (using SQL's between
) Date
-class with POSIXt
-class objects. Unfortunately, they are significantly different numerically, so you need to convert one to the other.
(BTW, your sample data uses gwlfullflattened22
but your code uses ...ed2
; I'll use the latter.)
The problem:
gwlfullflattened2$EncounterDate_G[1]
# [1] "2016-01-05"
as.numeric(gwlfullflattened2$EncounterDate_G[1])
# [1] 16805
acutedm11$Date_p30[1]
# [1] "2016-02-01 UTC"
as.numeric(acutedm11$Date_p30[1])
# [1] 1454284800
The fix:
gwlfullflattened2$EncounterDate_G <- as.POSIXct(gwlfullflattened2$EncounterDate_G)
This now works (unchanged):
sqldf::sqldf("
select acutedm11.*, gwlfullflattened2.*
from acutedm11
left join gwlfullflattened2 on acutedm11.mrn = gwlfullflattened2.mrn_G
and gwlfullflattened2.EncounterDate_G between acutedm11.Date_m30 and acutedm11.Date_p30") %>%
select(-Date_m30, -Date_p30)
# V1 mdm_link_id time0 hr_min_preadmit_vital rr_min_preadmit_vital sbp_min_preadmit_vital time 00 mrn Date V1_G mrn_G Full.name_G date_of_birth_G EncounterDate_G
# 1 1 sfdiuwe9889w4thehugsdgdf1 1/2/2016 22:10 NA NA NA 1/2/2016 22:10 100 2016-01-01 19:00:00 1 100 Aintzane Eilert 1/1/1990 2016-01-04 19:00:00
# 2 2 sfdiuwe9889w4thehugsdgdf2 1/2/2021 0:00 NA NA NA 1/2/2021 0:00 101 2021-01-01 19:00:00 2 101 Dervila Muriel 1/1/1991 2021-01-03 19:00:00
# 3 3 sfdiuwe9889w4thehugsdgdf3 1/1/2021 1:31 NA NA NA 1/1/2021 1:31 102 2020-12-31 19:00:00 3 102 Hermes Ingólfr 1/1/1992 2021-01-20 19:00:00
# 4 4 sfdiuwe9889w4thehugsdgdf4 1/5/2021 3:34 NA NA NA 1/5/2021 3:34 103 2021-01-04 19:00:00 NA NA <NA> <NA> <NA>
# 5 5 sfdiuwe9889w4thehugsdgdf5 5/9/2021 5:33 NA NA NA 5/9/2021 5:33 104 2021-05-08 20:00:00 5 104 Talaat Archembald 1/1/1994 2021-05-18 20:00:00
# 6 6 sfdiuwe9889w4thehugsdgdf6 5/8/2021 3:39 NA NA NA 5/8/2021 3:39 105 2021-05-07 20:00:00 6 105 Erato Hozan 1/1/1995 2021-05-16 20:00:00
# 7 7 sfdiuwe9889w4thehugsdgdf7 12/12/2021 2:30 NA NA NA 12/12/2021 2:30 106 2021-12-11 19:00:00 7 106 Abram Eli 1/1/1996 2021-12-01 19:00:00
# 8 8 sfdiuwe9889w4thehugsdgdf8 12/11/2021 9:21 NA NA NA 12/11/2021 9:21 107 2021-12-10 19:00:00 8 107 Drahoslava Gottfrid 1/1/1997 2021-11-30 19:00:00
# 9 9 sfdiuwe9889w4thehugsdgdf9 1/2/2016 22:10 NA NA NA 1/2/2016 22:10 108 2016-01-01 19:00:00 9 108 Itxaro Csenge 1/1/1998 2016-01-04 19:00:00
# 10 10 sfdiuwe9889w4thehugsdgdf10 1/2/2021 0:00 NA NA NA 1/2/2021 0:00 109 2021-01-01 19:00:00 10 109 Isokrates Linas 1/1/1999 2021-01-03 19:00:00
# 11 11 sfdiuwe9889w4thehugsdgdf11 1/1/2021 1:31 NA NA NA 1/1/2021 1:31 110 2020-12-31 19:00:00 11 110 Yejide Calixto 1/1/2000 2021-01-20 19:00:00
# 12 12 sfdiuwe9889w4thehugsdgdf12 1/5/2021 3:34 NA NA NA 1/5/2021 3:34 111 2021-01-04 19:00:00 NA NA <NA> <NA> <NA>
# 13 13 sfdiuwe9889w4thehugsdgdf13 5/9/2021 5:33 NA NA NA 5/9/2021 5:33 112 2021-05-08 20:00:00 13 112 Siva Jerneja 1/1/2002 2021-05-18 20:00:00
# 14 14 sfdiuwe9889w4thehugsdgdf14 5/8/2021 3:39 NA NA NA 5/8/2021 3:39 113 2021-05-07 20:00:00 14 113 Mae Albie 1/1/2003 2021-05-16 20:00:00
# 15 15 sfdiuwe9889w4thehugsdgdf15 12/12/2021 2:30 NA NA NA 12/12/2021 2:30 114 2021-12-11 19:00:00 15 114 Rodolfo Slavomír 1/1/2004 2021-12-01 19:00:00
# 16 16 sfdiuwe9889w4thehugsdgdf16 12/11/2021 9:21 NA NA NA 12/11/2021 9:21 115 2021-12-10 19:00:00 16 115 Neptune Mahesh 1/1/2005 2021-11-30 19:00:00
# 17 17 sfdiuwe9889w4thehugsdgdf17 1/2/2016 22:10 NA NA NA 1/2/2016 22:10 116 2016-01-01 19:00:00 17 116 Madhavi Luka 1/1/2006 2016-01-04 19:00:00
# 18 18 sfdiuwe9889w4thehugsdgdf18 1/2/2021 0:00 NA NA NA 1/2/2021 0:00 117 2021-01-01 19:00:00 18 117 Lexia Lành 1/1/2007 2021-01-03 19:00:00
# 19 19 sfdiuwe9889w4thehugsdgdf19 1/1/2021 1:31 NA NA NA 1/1/2021 1:31 118 2020-12-31 19:00:00 19 118 Marnie Urien 1/1/2008 2021-01-20 19:00:00
# 20 20 sfdiuwe9889w4thehugsdgdf20 1/5/2021 3:34 NA NA NA 1/5/2021 3:34 119 2021-01-04 19:00:00 NA NA <NA> <NA> <NA>
# 21 21 sfdiuwe9889w4thehugsdgdf21 1/2/2016 22:10 NA NA NA 1/2/2016 22:10 120 2016-01-01 19:00:00 NA NA <NA> <NA> <NA>
# 22 22 sfdiuwe9889w4thehugsdgdf22 1/2/2021 0:00 NA NA NA 1/2/2021 0:00 121 2021-01-01 19:00:00 NA NA <NA> <NA> <NA>
# 23 23 sfdiuwe9889w4thehugsdgdf23 1/1/2021 1:31 NA NA NA 1/1/2021 1:31 122 2020-12-31 19:00:00 NA NA <NA> <NA> <NA>
# 24 24 sfdiuwe9889w4thehugsdgdf24 1/5/2021 3:34 NA NA NA 1/5/2021 3:34 123 2021-01-04 19:00:00 NA NA <NA> <NA> <NA>
# 25 25 sfdiuwe9889w4thehugsdgdf25 5/9/2021 5:33 NA NA NA 5/9/2021 5:33 124 2021-05-08 20:00:00 NA NA <NA> <NA> <NA>
# 26 26 sfdiuwe9889w4thehugsdgdf26 5/8/2021 3:39 NA NA NA 5/8/2021 3:39 125 2021-05-07 20:00:00 NA NA <NA> <NA> <NA>
# 27 27 sfdiuwe9889w4thehugsdgdf27 12/12/2021 2:30 NA NA NA 12/12/2021 2:30 126 2021-12-11 19:00:00 NA NA <NA> <NA> <NA>
# 28 28 sfdiuwe9889w4thehugsdgdf28 12/11/2021 9:21 NA NA NA 12/11/2021 9:21 127 2021-12-10 19:00:00 NA NA <NA> <NA> <NA>
# 29 29 sfdiuwe9889w4thehugsdgdf29 1/2/2016 22:10 NA NA NA 1/2/2016 22:10 128 2016-01-01 19:00:00 NA NA <NA> <NA> <NA>
Upvotes: 1