Reputation: 27
I have two different data frames with similar information. One (df2) has a better list of UNIQFIREID and the second (df1) is the data frame that I need to use because it contains the shapefiles I'm working with. I would like to be able to copy and paste the UNIQFIREIDs from df2 into df1 if df1's UNIQFIREID is NA and if multiple columns between the two data frames match, in this case FIRENAME, DISCOVERDATETIME, and TOTALACRES. Then ignores the ones that do not have NA or non-matches. I've put small sample data frames below to work with.
What I've tried so far, like using merge, match, join(s), and ifelse methods just created a bunch of convoluted messes because I'm not sure what I'm doing. I've found a few similar questions on Stack Overflow but they were much more simplistic and I could not find a way to combine methods. Any suggestions would be greatly appreciated.
df1 <- data.frame(FIRENAME = c("Gold", "Tree", "Tank", "Green_1"),
UNIQFIREID = c("1985-AZASF-000285", NA, "1985-AZASF-000287", "1985-AZASF-000288"),
DISCOVERYDATETIME = c("1985-03-28", "1985-03-29", "1985-03-30", "1985-03-31"),
TOTALACRES = c(60, 70, 80, 90))
df1$DISCOVERYDATETIME <- as.POSIXct(df1$DISCOVERYDATETIME)
df2 <- data.frame(FIRENAME = c("Gold", "Tree", "Tank", "Green_1"),
UNIQFIREID = c("1985-AZASF-000285", "1985-AZASF-000286", "1985-AZASF-000287", "1985-AZASF-000288"),
DISCOVERYDATETIME = c("1985-03-28", "1985-03-29", "1985-03-30", "1985-03-31"),
TOTALACRES = c(60, 70, 80, 90))
df2$DISCOVERYDATETIME <- as.POSIXct(df2$DISCOVERYDATETIME)
This is a bunch of junk that I was working with trying to make it work. I wouldn't suggest running any of it but it's more of an example to see what a mess I've made.
SW_Fire_Perimeters_1985test$UNIQFIREID[is.na(SW_Fire_Perimeters_1985test$UNIQFIREID)] <-
SW_Fire_Occurrences_1985[match(paste(SW_Fire_Perimeters_1985test$DISCOVERYDATETIME,
SW_Fire_Perimeters_1985test$FIRENAME,
SW_Fire_Perimeters_1985test$TOTALACRES),
paste(SW_Fire_Occurrences_1985$DISCOVERYDATETIME,
SW_Fire_Occurrences_1985$FIRENAME,
SW_Fire_Occurrences_1985$TOTALACRES)),"UNIQFIREID"]
ifelse(is.na(SW_Fire_Perimeters_1985test$UNIQFIREID),
SW_Fire_Occurrences_1985[match(paste(SW_Fire_Perimeters_1985test$DISCOVERYDATETIME,
SW_Fire_Perimeters_1985test$FIRENAME,
SW_Fire_Perimeters_1985test$TOTALACRES),
paste(SW_Fire_Occurrences_1985$DISCOVERYDATETIME, SW_Fire_Occurrences_1985$FIRENAME,
SW_Fire_Occurrences_1985$TOTALACRES)),"UNIQFIREID"])
SW_Fire_Perimeters_1985test$UNIQFIREID2 <-
SW_Fire_Occurrences_1985[match(paste(SW_Fire_Perimeters_1985test$DISCOVERYDATETIME,
SW_Fire_Perimeters_1985test$FIRENAME,
SW_Fire_Perimeters_1985test$TOTALACRES),
paste(SW_Fire_Occurrences_1985$DISCOVERYDATETIME, SW_Fire_Occurrences_1985$FIRENAME,
SW_Fire_Occurrences_1985$TOTALACRES)),"UNIQFIREID"]
# Merges two dataframes into fire perimeters dataframe based on "DISCOVERYDATETIME", "FIRENAME", "TOTALACRES"
# https://docs.tibco.com/pub/enterprise-runtime-for-R/4.0.0/doc/html/Language_Reference/base/merge.html
SW_Fire_Merge_1985 <- merge(SW_Fire_Perimeters_1985, SW_Fire_Occurrences_1985, on = c( "DISCOVERYDATETIME", "FIRENAME", "TOTALACRES"), nomatch = 0L)
SW_Fire_join_1985 <- full_join(SW_Fire_Perimeters_1985,SW_Fire_Occurrences_1985,
copy = TRUE,
# by.x = c("DISCOVERYDATETIME", "FIRENAME", "TOTALACRES"),
# by.y = c("DISCOVERYDATETIME", "FIRENAME", "TOTALACRES"),
# all.x = TRUE),
# by.y = c("UNIQFIREID"))
if(is.na(SW_Fire_Merge_1985$UNIQFIREID.x, paste(SW_Fire_Merge_1985$UNIQFIREID.y)))
If you would like to see the full dataset (14 Mb zipped) and where I'm at you can use the following code. Just replace "Directory..." with where you would like to download that data and open files. It selects down to 1985 for a smaller set to work with
# Insert path to Geospatial data needed, and desired download location
FireH <- download.file("http://www.fs.fed.us/r3/gis/gisdata/Fire_History.zip", "Directory.../Fire_History.zip")
# Insert File path of downloaded zip file, overwrite is currently enabled for coding purposes, for exdir insert desired file path for geodatabase.
FireH2 <- unzip("Directory.../Fire_History.zip", overwrite = TRUE, exdir = "Directory...")
# Assigning Geodatabase a name
FireHGDB <- "Direcrory.../Fire_History.gdb"
# Brings Fire perimeters and occurrences out of GDB
SW_Fire_Perimeters <- st_read(FireHGDB, "FirePerimeter") #require_geomType="wkbPolygon")
SW_Fire_Occurrences <- st_read(FireHGDB, "FireOccurrence") #require_geomType="wkbPolygon")
# Removes invalid naming characters
# https://www.journaldev.com/43690/sub-and-gsub-function-r#the-gsub-function-in-r
SW_Fire_Perimeters$FIRENAME <- gsub(" ", "_", SW_Fire_Perimeters$FIRENAME)
SW_Fire_Occurrences$FIRENAME <- gsub(" ", "_", SW_Fire_Occurrences$FIRENAME)
SW_Fire_Perimeters$FIRENAME <- gsub("#", "_", SW_Fire_Perimeters$FIRENAME)
SW_Fire_Occurrences$FIRENAME <- gsub("#", "_", SW_Fire_Occurrences$FIRENAME)
SW_Fire_Perimeters$FIRENAME <- gsub("\\.", "", SW_Fire_Perimeters$FIRENAME)
SW_Fire_Occurrences$FIRENAME <- gsub("\\.", "", SW_Fire_Occurrences$FIRENAME)
# Removes NAs from fire occurrences UNIQFIREID column
SW_Fire_Occurrences <- SW_Fire_Occurrences[!is.na(SW_Fire_Occurrences$UNIQFIREID),]
# Removes incomplete UNIQFIREIDs for fire occurrences
SW_Fire_Occurrences <- subset(SW_Fire_Occurrences, nchar(as.character(UNIQFIREID)) == 17)
# Removes geometries from fire occurrences so they can be merged to perimeters (Error with two sf objects when merged)
SW_Fire_Occurrences <- st_drop_geometry(SW_Fire_Occurrences)
# Filters tables to only contain FIREYEARs 1985 - 2019
SW_Fire_Perimeters_1985_2019 <- filter(SW_Fire_Perimeters, FIREYEAR >= 1985, FIREYEAR <= 2019)
SW_Fire_Occurrences_1985_2019 <- filter(SW_Fire_Occurrences, FIREYEAR >= 1985, FIREYEAR <= 2019)
# Make a new row (UniqLength) with the string length of UNIQFIREID (it should be 17 characters long)
SW_Fire_Perimeters_1985_2019$UniqLength <- str_count(SW_Fire_Perimeters_1985_2019$UNIQFIREID)
# Set NAs is UniqLength to 0
# https://stackoverflow.com/questions/7279089/replace-all-na-with-false-in-selected-columns-in-r
SW_Fire_Perimeters_1985_2019[c("UniqLength")][is.na(SW_Fire_Perimeters_1985_2019[c("UniqLength")])] <- FALSE
# Replace any UNIQFIREIDs with NA when UNIQFIREID (UniqLength) not equal to 17
# https://stackoverflow.com/questions/56681308/converting-values-to-na-with-conditions-in-r
SW_Fire_Perimeters_1985_2019[SW_Fire_Perimeters_1985_2019$UniqLength !=17,c("UNIQFIREID")] <- NA
# Filter to FIREYEAR 1985 only
SW_Fire_Perimeters_1985 <- filter(SW_Fire_Perimeters_1985_2019, FIREYEAR == 1985)
SW_Fire_Occurrences_1985 <- filter(SW_Fire_Occurrences_1985_2019, FIREYEAR == 1985)
Upvotes: 1
Views: 1059
Reputation: 667
If I'm understanding correctly, you can...
by=
being all columns but "UNIQFIREID"
df1$UNIQFIREID
in <RESULT>$UNIQFIREID.x
df2$UNIQFIREID
in <RESULT>$UNIQFIREID.y
"UNIQFIREID"
column using ifelse()
(or its relatives) to pull values from <RESULT>$UNIQFIREID.x
and <RESULT>$UNIQFIREID.y
as desired<RESULT>$UNIQFIREID
is.na()
.Your data:
df1 <- data.frame(FIRENAME = c("Gold", "Tree", "Tank", "Green_1"),
UNIQFIREID = c("1985-AZASF-000285", NA, "1985-AZASF-000287", "1985-AZASF-000288"),
DISCOVERYDATETIME = c("1985-03-28", "1985-03-29", "1985-03-30", "1985-03-31"),
TOTALACRES = c(60, 70, 80, 90))
df1$DISCOVERYDATETIME <- as.POSIXct(df1$DISCOVERYDATETIME)
df2 <- data.frame(FIRENAME = c("Gold", "Tree", "Tank", "Green_1"),
UNIQFIREID = c("1985-AZASF-000285", "1985-AZASF-000286", "1985-AZASF-000287", "1985-AZASF-000288"),
DISCOVERYDATETIME = c("1985-03-28", "1985-03-29", "1985-03-30", "1985-03-31"),
TOTALACRES = c(60, 70, 80, 90))
df2$DISCOVERYDATETIME <- as.POSIXct(df2$DISCOVERYDATETIME)
Using {base}
:
combo_base <- merge(df1, df2, all = TRUE,
by = c("FIRENAME", "DISCOVERYDATETIME", "TOTALACRES"))
combo_base$UNIQFIREID <- ifelse(is.na(combo_base$UNIQFIREID.x),
combo_base$UNIQFIREID.y, combo_base$UNIQFIREID.x)
combo_base <- combo_base[!is.na(combo_base$UNIQFIREID),
!names(combo_base) %in% c("UNIQFIREID.x", "UNIQFIREID.y"),
drop = FALSE]
combo_base
#> FIRENAME DISCOVERYDATETIME TOTALACRES UNIQFIREID
#> 1 Gold 1985-03-28 60 1985-AZASF-000285
#> 2 Green_1 1985-03-31 90 1985-AZASF-000288
#> 3 Tank 1985-03-30 80 1985-AZASF-000287
#> 4 Tree 1985-03-29 70 1985-AZASF-000286
Using {data.table}
:
library(data.table)
combo_datatable <- merge(
as.data.table(df1), df2,
by = c("FIRENAME", "DISCOVERYDATETIME", "TOTALACRES"),
all = TRUE
)[, UNIQFIREID := fifelse(is.na(UNIQFIREID.x), UNIQFIREID.y, UNIQFIREID.x)
][!is.na(UNIQFIREID), !c("UNIQFIREID.x", "UNIQFIREID.y")
]
combo_datatable
#> FIRENAME DISCOVERYDATETIME TOTALACRES UNIQFIREID
#> 1: Gold 1985-03-28 60 1985-AZASF-000285
#> 2: Green_1 1985-03-31 90 1985-AZASF-000288
#> 3: Tank 1985-03-30 80 1985-AZASF-000287
#> 4: Tree 1985-03-29 70 1985-AZASF-000286
Using {dplyr}
:
library(dplyr, warn.conflicts = FALSE)
combo_dplyr <- df1 %>%
full_join(df2, by = c("FIRENAME", "DISCOVERYDATETIME", "TOTALACRES")) %>%
mutate(UNIQFIREID = if_else(is.na(UNIQFIREID.x), UNIQFIREID.y, UNIQFIREID.x)) %>%
select(-UNIQFIREID.x, -UNIQFIREID.y) %>%
filter(!is.na(UNIQFIREID))
combo_dplyr
#> FIRENAME DISCOVERYDATETIME TOTALACRES UNIQFIREID
#> 1 Gold 1985-03-28 60 1985-AZASF-000285
#> 2 Tree 1985-03-29 70 1985-AZASF-000286
#> 3 Tank 1985-03-30 80 1985-AZASF-000287
#> 4 Green_1 1985-03-31 90 1985-AZASF-000288
Sanity check:
identical(combo_base, as.data.frame(combo_datatable))
#> [1] TRUE
identical(combo_base, combo_dplyr %>% arrange(FIRENAME))
#> [1] TRUE
Upvotes: 3