Reputation: 1299
I'll apologize in advance that the example below isn't "minimal" but I haven't been able to reproduce this behavior except in the particular instance of my full data set.
I asked this question before here and thought I had found the answer, but the behavior has returned and is vexing me. Basically I have a script that reads daily COVID-19 case numbers and produces maps where the counties are color-coded by the percent of the population infected. The script produces five maps, a national one and one for each of the four official census regions: northeast, midwest, south and west. To cut down on size, the below is just the national and widwest maps.
My original script actually produces animated gifs showing the spread of the disease, but they take a long time to render. The below version just gives a single plot of the most recent data and should run pretty quickly.
I've used a dput in the below script to avoid you having to read a file and geocode locations (I commented out the code) but there is still a large csv file of county populations that has to be read in. I have posted it at pastebin here.
library(urbnmapr) # For map
library(ggplot2) # For map
library(dplyr) # For summarizing
library(tidyr) # For reshaping
library(stringr) # For padding leading zeros
library(ggrepel)
library(ggmap)
library(usmap)
library(gganimate)
library(magrittr)
library(gifski)
library(ggnewscale)
#if using Microsoft R, update checkpoint to get latest packages
#checkpoint("2020-03-01")
#start the clock
ptm <- proc.time()
set.seed(42)
#first run setup tasks
#these can be commented out once the data frames are in place
###################begin first run only################################
#register_google(key = "your google map key here")
#AFMCbases<-read.csv("C:/Users/jerem/Desktop/Work/covid_maps/AFMCbases.csv", header=TRUE, stringsAsFactors = FALSE)
#geocode the place names
# for(i in 1:nrow(AFMCbases)){
# result <- geocode(AFMCbases$Base[i])
# AFMCbases$longitude[i] <- as.numeric(result[1])
# AFMCbases$latitude[i] <- as.numeric(result[2])
# }
#transform the lat/lons to appropriate map projection
# locations<-AFMCbases[,2:3]
# new_locations <- usmap_transform(locations)
# AFMCbases <- cbind(AFMCbases,new_locations[,3:4])
AFMCbases <- structure(list(Base = c("Gunter AFB", "Davis Monthan AFB", "Edwards AFB",
"Robins AFB", "Scott AFB", "Hanscom AFB", "Offutt AFB", "Holloman AFB",
"Kirtland AFB", "Rome, NY", "Wright-Patterson AFB", "Tinker AFB",
"Arnold AFB", "Joint Base San Antonio", "Hill AFB", "Arlington, VA",
"Eglin AFB"), longitude = c(-86.244558, -110.8592578, -117.8912085,
-83.591719, -89.8550095, -71.2743123, -95.9145568, -106.099291,
-106.5338817, -75.4557303, -84.0537448, -97.4158295, -86.0303306,
-98.4523675, -111.9826984, -77.1067698, -86.5533382), latitude = c(32.4083744,
32.1675525, 34.9240314, 32.6400014, 38.5415911, 42.4579955, 41.1242718,
32.8440404, 35.0539718, 43.2128473, 39.8137298, 35.4277, 35.3828616,
29.4512786, 41.10968, 38.8799697, 30.4635583), personnel = c(820L,
605L, 5317L, 14088L, 613L, 2906L, 177L, 699L, 1264L, 822L, 15299L,
16032L, 389L, 3443L, 13679L, 1157L, 8143L), longitude.1 = c(1292311.33608434,
-1025218.68277084, -1622487.54697885, 1533762.39465597, 881032.996527566,
2296805.44531269, 342224.203588191, -572424.401062999, -596268.294707156,
1951897.82199569, 1352969.1130143, 234917.935027853, 1263808.11814915,
151230.865464104, -1000093.31185121, 1953459.66491185, 1292835.72883446
), latitude.1 = c(-1293180.11438144, -1358896.37536667, -946347.80198453,
-1223833.19307048, -664025.051658055, 128586.352781279, -422393.887189579,
-1328730.76688869, -1081540.1543388, 99159.9145180969, -445535.143260001,
-1059563.46211616, -963250.657602903, -1722291.94024992, -359543.815036425,
-408019.910644083, -1511165.09243038)), class = "data.frame", row.names = c(NA,
-17L))
#define census regions
west_region <-c("WA", "OR","CA","NV","ID", "MT", "WY", "UT","CO", "AZ", "NM")
NE_region <- c("ME","NH","VT","MA", "CT", "RI", "NY", "PA", "NJ")
midwest_region <- c("ND", "SD", "NE", "KS", "MN", "IA", "MO", "WI", "IL","MI", "IN","OH")
south_region <- c("TX", "OK", "AR", "LA", "MS", "TN", "KY", "AL", "GA","FL","SC","NC","VA","WV","DC","MD","DE")
west_region_bases <- c("Davis Monthan AFB", "Edwards AFB","Holloman AFB","Kirtland AFB","Hill AFB")
south_region_bases <- c("Robins AFB","Tinker AFB", "Arnold AFB", "Joint Base San Antonio", "Arlington, VA", "Eglin AFB")
mw_region_bases <- c("Scott AFB", "Offutt AFB", "Wright-Patterson AFB")
ne_region_bases <-c("Hanscom AFB", "Rome, NY")
# Get COVID cases, available from:
url <- "https://static.usafacts.org/public/data/covid-19/covid_confirmed_usafacts.csv"
COV <- read.csv(url, stringsAsFactors = FALSE)
#sometimes there are encoding issues with the first column name
names(COV)[1] <- "countyFIPS"
Covid <- pivot_longer(COV, cols=starts_with("X"),
values_to="cases",
names_to=c("X","date_infected"),
names_sep="X") %>%
mutate(infected = as.Date(date_infected, format="%m.%d.%y"),
countyFIPS = str_pad(as.character(countyFIPS), 5, pad="0"))
# Obtain map data for counties (to link with covid data) and states (for showing borders)
states_sf <- get_urbn_map(map = "states", sf = TRUE)
counties_sf <- get_urbn_map(map = "counties", sf = TRUE)
# Merge county map with total cases of cov
#this is the line to use for making animations
#pop_counties_cov <- inner_join(counties_sf, Covid, by=c("county_fips"="countyFIPS")) %>%
#to make last frame only
pop_counties_cov <- inner_join(counties_sf, group_by(Covid, countyFIPS) %>%
summarise(cases=max(cases)), by=c("county_fips"="countyFIPS"))
#read the county population data
counties_pop <- read.csv("C:/Users/jerem/Desktop/Work/covid_maps/countyPopulations.csv", header=TRUE, stringsAsFactors = FALSE)
#pad the single digit state FIPS states
counties_pop <- counties_pop %>% mutate(CountyFIPS=str_pad(as.character(CountyFIPS),5,pad="0"))
#merge the population and covid data by FIPS
pop_counties_cov$population <- counties_pop$Population[match(pop_counties_cov$county_fips,counties_pop$CountyFIPS)]
#calculate the infection rate
pop_counties_cov <- pop_counties_cov %>% mutate(infRate = (cases/population)*100)
#counties with 0 infections don't appear in the usafacts data, so didn't get a population
#set them to 0
pop_counties_cov$population[is.na(pop_counties_cov$population)] <- 0
pop_counties_cov$infRate[is.na(pop_counties_cov$infRate)] <- 0
plotDate="April20"
basepath = "C:/your/output/file/path"
naColor = "white"
lowColor = "green"
midColor = "maroon"
highColor = "red"
baseFill = "dodgerblue4"
baseColor = "firebrick"
baseShape = 23
scaleLow = "magenta"
scaleHigh = "blue"
###################end first run only################################
###################National Map################################
p <- pop_counties_cov %>%
ggplot() +
geom_sf(mapping = aes(fill = infRate, geometry=geometry), color = NA) +
geom_sf(data = states_sf, fill = NA, color = "black", size = 0.25) +
coord_sf(datum = NA) +
scale_fill_gradient(name = "% Pop \nInfected", trans = "log",low=lowColor, high=highColor,
breaks=c(0, round(max(pop_counties_cov$infRate),1)),
na.value = naColor) +
new_scale_fill() +
geom_point(data=AFMCbases,
aes(x=longitude.1, y=latitude.1,fill=personnel),
shape= baseShape,
color = "black",
size = 3) +
scale_fill_gradient(name="AFMC \nMil + Civ",
low = scaleLow, high = scaleHigh,
breaks = c(1, max(AFMCbases$personnel)))+
theme_bw() +
theme(legend.position="bottom",
panel.border = element_blank(),
axis.title.x=element_blank(),
axis.title.y=element_blank()) +
labs(title=paste('Confirmed COVID-19 Cases: ', max(Covid$infected),sep=""),
subtitle='HQ AFMC/A9A \nData: usafacts.org')
# a <- p + transition_time(infected) +
# labs(title='Confirmed COVID-19 Cases: {frame_time}',
# subtitle='HQ AFMC/A9A \nData: usafacts.org')
#
# animate(a,
# device="png",
# renderer=file_renderer(paste(basepath,plotDate,"/national",sep=""),
# prefix="gganim_plot",
# overwrite=TRUE)
# )
#
# #make the national animated gif
# png_files <- list.files(paste(basepath,plotDate,"/national",sep=""), pattern = ".*png$", full.names = TRUE)
# st = format(Sys.time(), "%Y-%m-%d")
# gifName <- paste(basepath,plotDate,"/national/COVID-19-Cases-byCounty_",st,".gif",sep="")
# gifski(png_files, gif_file = gifName, width = 1000, height = 750, delay = 0.25, loop=FALSE)
#save the image
st = format(Sys.time(), "%Y-%m-%d")
SaveFilename = paste(basepath,plotDate,"/national/COVID-19-Cases-byCounty_",st,".png",sep="")
if(!dir.exists(paste(basepath,plotDate,"/national",sep=""))) dir.create(paste(basepath,plotDate,"/national",sep=""))
ggsave(filename=SaveFilename, plot = p, dpi = 300)
###################End National Map################################
###################Midwest Map################################
#filter out states
#neCovid <- Covid %>% filter(State %in% NE_region )
mw_pop_counties_cov <- pop_counties_cov %>% filter(state_abbv %in% midwest_region)
mw_states_sf <- states_sf %>% filter(state_abbv %in% midwest_region)
mw_counties_sf <- counties_sf %>% filter(state_abbv %in% midwest_region)
#filter out bases
mwBases <- AFMCbases %>% filter(Base %in% mw_region_bases)
p <- mw_pop_counties_cov %>%
ggplot() +
geom_sf(mapping = aes(fill = infRate, geometry=geometry), color = NA) +
geom_sf(data = mw_states_sf, fill = NA, color = "black", size = 0.25) +
coord_sf(datum = NA) +
scale_fill_gradient(name = "% Pop \nInfected", trans = "log",low=lowColor, high=highColor,
breaks=c(0, round(max(mw_pop_counties_cov$infRate),1)),
na.value = naColor) +
new_scale_fill() +
geom_point(data=mwBases,
aes(x=longitude.1, y=latitude.1,fill=personnel),
shape = baseShape,
color = "black",
size=3) +
scale_fill_gradient(name="AFMC \nMil + Civ",
low=scaleLow, high = scaleHigh,
breaks = c(1, max(mwBases$personnel)))+
theme_bw() +
theme(legend.position="bottom",
panel.border = element_blank(),
axis.title.x=element_blank(),
axis.title.y=element_blank()) +
labs(title=paste('Confirmed COVID-19 Cases: ', max(Covid$infected),sep=""),
subtitle='HQ AFMC/A9A \nData: usafacts.org')
# a <- p + transition_time(infected) +
# labs(title='Confirmed COVID-19 Cases: {frame_time}',
# subtitle='HQ AFMC/A9A \nData: usafacts.org')
#
# animate(a,
# device="png",
# renderer=file_renderer(paste(basepath,plotDate,"/midwest",sep=""),
# prefix="gganim_plot",
# overwrite=TRUE)
# )
#
# #make the midwest animated gif
# png_files <- list.files(paste(basepath,plotDate,"/midwest",sep=""), pattern = ".*png$", full.names = TRUE)
# st = format(Sys.time(), "%Y-%m-%d")
# gifName <- paste(basepath,plotDate,"/midwest/MW_COVID-19-Cases-byCounty_",st,".gif",sep="")
# gifski(png_files, gif_file = gifName, width = 1000, height = 750, delay = 0.25, loop=FALSE)
st = format(Sys.time(), "%Y-%m-%d")
SaveFilename = paste(basepath,plotDate,"/midwest/MW_COVID-19-Cases-byCounty_",st,".png",sep="")
if(!dir.exists(paste(basepath,plotDate,"/midwest",sep=""))) dir.create(paste(basepath,plotDate,"/midwest",sep=""))
ggsave(filename=SaveFilename, plot = p, dpi = 300)
###################End Midwest Map################################
This is the national map I got this morning when I ran the code
Note that there is a scale for the number of personnel at the bases (the colored diamonds) but there is no scale for the shading of the counties.
Here is the midwest map. You can see from the code that it is the same ggplot just with a dataset that is filtered down to the counties in the midwest region.
Now the scale is there. As mentioned in my previous question I thought that the answer had been something to do with the width of the image being insufficient to accommodate the scale. When I added a newline in the legend text to shorten it that appeared to do the trick. But now the legend is disappearing again, andmaking the output image wider has no effect. Plus, just by eyeball it would appear there is plenty of room in the national plot to accommodate the scale.
Another bizarre aspect is the behavior associated with rounding the breaks. Below is a west map where I applied no rounding to the breaks
scale_fill_gradient(name = "% Pop \nInfected",trans = "log", low=lowColor, high=highColor,
breaks=c(0, max(west_pop_counties_cov$infRate)),
na.value = naColor)
So the scale is back but it goes to 6 decimal places. If I try to round it to 2
scale_fill_gradient(name = "% Pop \nInfected",trans = "log", low=lowColor, high=highColor,
breaks=c(0, round(max(west_pop_counties_cov$infRate),2)),
na.value = naColor)
I get this map
which surely indicates the horizontal space isn't the issue...if it can accommodate 6 decimal places then surely there's room for 2?
I've spent as much time trying to figure out this inconsistent scale behavior as I spent writing the original script. I need these things to be consistent so that I can provide them as work products on a regular interval.
Upvotes: 1
Views: 618
Reputation: 8506
You can add manual labels and add some space to prevent the key to overlap with the title:
pop_counties_cov %>%
ggplot() +
geom_sf(mapping = aes(fill = infRate, geometry=geometry), color = NA) +
geom_sf(data = states_sf, fill = NA, color = "black", size = 0.25) +
coord_sf(datum = NA) +
scale_fill_gradient(name = "% Pop\nInfected ", trans = "log2", low=lowColor, high=highColor,
breaks=c(min(pop_counties_cov$infRate[pop_counties_cov$infRate!=0]), max(pop_counties_cov$infRate)),
labels = round(c(min(pop_counties_cov$infRate[pop_counties_cov$infRate!=0]),
max(pop_counties_cov$infRate)), 1),
na.value = naColor) +
new_scale_fill() +
geom_point(data=AFMCbases,
aes(x=longitude.1, y=latitude.1,fill=personnel),
shape= baseShape,
color = "black",
size = 3) +
scale_fill_gradient(name="AFMC \nMil + Civ",
low = scaleLow, high = scaleHigh,
breaks = c(1, max(AFMCbases$personnel)))+
theme_bw() +
theme(legend.position="bottom",
panel.border = element_blank(),
axis.title.x=element_blank(),
axis.title.y=element_blank()) +
labs(title=paste('Confirmed COVID-19 Cases: ', max(Covid$infected),sep=""),
subtitle='HQ AFMC/A9A \nData: usafacts.org')
Upvotes: 2
Reputation: 16178
Your issue is due to the presence of 0 values in your variable infRate
which messed up with the log
transformation in your scale_fill_gradient
as observed by this Warning message:
Warning message: Transformation introduced infinite values in discrete y-axis
Here, you can find a way to circuwent that by setting limits
and breaks
argument using non-0 minimal value:
> summary(pop_counties_cov$infRate)
Min. 1st Qu. Median Mean 3rd Qu. Max.
0.00000 0.01543 0.03993 0.09178 0.09043 2.87425
> summary(pop_counties_cov$infRate[pop_counties_cov$infRate != 0])
Min. 1st Qu. Median Mean 3rd Qu. Max.
0.001537 0.023724 0.048169 0.105326 0.102350 2.874253
Setting these new limits (and removing rows with 0 values) give you this:
pop_counties_cov %>%
filter(infRate != 0) %>%
ggplot() +
geom_sf(mapping = aes(fill = infRate, geometry=geometry), color = NA) +
geom_sf(data = states_sf, fill = NA, color = "black", size = 0.25) +
coord_sf(datum = NA) +
scale_fill_gradient(name = "% Pop \nInfected", trans = "log",low=lowColor, high=highColor,
breaks=c(0.001,2.9),
na.value = naColor, limits = c(0.001,2.9)) +
new_scale_fill() +
geom_point(data=AFMCbases,
aes(x=longitude.1, y=latitude.1,fill=personnel),
shape= baseShape,
color = "black",
size = 3) +
scale_fill_gradient(name="AFMC \nMil + Civ",
low = scaleLow, high = scaleHigh,
breaks = c(1, max(AFMCbases$personnel)))+
theme_bw() +
theme(legend.position="bottom",
panel.border = element_blank(),
axis.title.x=element_blank(),
axis.title.y=element_blank()) +
labs(title=paste('Confirmed COVID-19 Cases: ', max(Covid$infected),sep=""),
subtitle='HQ AFMC/A9A \nData: usafacts.org')
Does it answer your question ?
Upvotes: 1