Reputation: 1299
I have the following R script which produces a static chart
library(ggplot2)
library(dplyr)
library(tidyr)
library(stringr)
library(gtable)
library(cowplot)
library(ggrepel)
library(tidyquant)
library(gganimate)
library(gifski)
################################## BASIC SETUP TASKS ###############################
plotDate="May19"
basepath = "C:/Users/your output path here/"
#Countries with testing data reported
#going to leave out Spain because their data is so sporadic
testCountries <- c("ARG", "AUS", "AUT","BEL","BGD","BHR","BGR","BOL","CAN","CHE",
"CHL","COL","CRI","CUB","CZE","DEU","DNK","ECU","EST",
"ETH","FIN","FRA","GBR","GHA","GRC","HKG","HRV","HUN","IDN",
"IND","IRL","IRN","ISL","ISR","ITA","JPN","KAZ","KEN","KOR",
"LTU","LUX","LVA","MAR","MEX","MMR","MYS","NGA","NLD","NOR",
"NPL","NZL","PAK","PAN","PER","PHL","POL","PRT","PRY","ROU",
"RUS","RWA","SEN","SGP","SLV","SRB","SVK","SVN","SWE","THA",
"TUN","TUR","TWN","UGA","URY","USA","VNM","ZAF")
# Get population data
url <- "https://population.un.org/wpp/Download/Files/1_Indicators%20(Standard)/CSV_FILES/WPP2019_TotalPopulationBySex.csv"
pops <- read.csv(url, stringsAsFactors = FALSE, header = TRUE)
pops <- pops %>% filter(Time==2020) %>% select(Location, PopTotal) %>% distinct()
pops$PopTotal <- pops$PopTotal * 1000
names(pops)[1] <- "location"
pops$location <- pops$location %>% str_replace_all("United States of America", "United States")
pops$location <- pops$location %>% str_replace_all("Republic of Korea", "South Korea")
# Get COVID cases, available from:
url <- "https://covid.ourworldindata.org/data/owid-covid-data.csv"
COVtests <- read.csv(url, stringsAsFactors = FALSE, header = TRUE)
COVtests <- COVtests %>% filter(iso_code %in% testCountries) %>%
mutate(Date=as.Date(as.character(date), format="%Y-%m-%d"))
# merge in the population data
COVtests <- COVtests %>% left_join(pops,by='location')
#calculate test rates and test positivity rate
COVtests <- COVtests %>% mutate(PosRate = (total_cases/total_tests) * 100,
TestRate = (total_tests/PopTotal) * 100)
COVtests <- COVtests %>% group_by(location) %>% filter(total_cases > 0) %>%
mutate(maxTestRate = max(TestRate, na.rm=TRUE)) %>% ungroup()
#keeping temporary data frames while testing
plotCOV <- COVtests %>% filter(PopTotal > 20000000, maxTestRate > 0.75) %>%
select(TestRate, PosRate, location, Date)
#France has a 38% positive rate on day 1 that drops precipitously...removing to clean up
plotCOV[which(plotCOV$location=="France" & plotCOV$Date=="2020-02-24"),2] <- NA
plotCOV2 <- plotCOV[complete.cases(plotCOV),]
#endpoint layer
Endpoints <- plotCOV2 %>%
group_by(location) %>%
filter(TestRate == max(TestRate, na.rm=TRUE)) %>%
ungroup()
#if there's a stall you get doubles
Endpoints <- Endpoints %>% group_by(location) %>% filter(Date==max(Date)) %>% ungroup()
g <- plotCOV2 %>%
ggplot(aes(x=TestRate, y=PosRate)) +
geom_line(aes(color=location), show.legend = FALSE) +
labs(x = "Cumulative % of Population Tested",
y = "% of Tests Returning Positive",
title = "Test Positivity Rate (pop > 2,000,000 & > 0.75% pop tested)",
subtitle = paste("Data as of", format(max(plotCOV2$Date), "%A, %B %e, %Y")),
caption = "Data: Our World in Data (https://covid.ourworldindata.org)\n
UN Population Data (https://population.un.org)") +
theme(plot.title = element_text(size = rel(1), face = "bold"),
plot.subtitle = element_text(size = rel(0.7)),
plot.caption = element_text(size = rel(1), lineheight = 0.5))
#add the endpoints and labels - kept out of g so I can animate it later
h <- g + geom_label_repel(data=Endpoints, aes(label=location), show.legend = FALSE) +
geom_point(data = Endpoints,size = 1.5,shape = 21,
aes(color = location,fill = location), show.legend = FALSE)
print(h)
This runs fine and produces the following
What I'd like to do now is to animate the drawing of the lines to convey the temporal information about the data. So I add
#I left the endpoint labels out of g - now add geom_text labels
g <- g + geom_text(aes(label = location))
a <- g + transition_reveal(Date)
animate(a,
renderer= gifski_renderer(file = paste(basepath,plotDate,"/animated/positivity.gif",sep=""), loop = FALSE, width = 2000, height = 1500))
This produces the below gif, which is close but the label behavior is a mess and I don't understand it
I don't know why there are duplicates marching out ahead of the lines, nor why there is only one label left at the end of the animation. Yesterday it was the United States, today it's South Africa.
Other minor issues, I don't know why the gifski output is so "narrow"...I've tried various width and height parameters to try to make it look more like the static plot and it always comes out the same. Also, I'd like to know how to reference the dates so that I can have the date associated with each transition in the title. I thought I could do that with labs(title='Test Positivity Rate: {frame_time}')
but that throws an error about frame_time. Main issue is getting the labels to behave properly though.
Upvotes: 4
Views: 833
Reputation: 13843
Little "life tip" for using ggplot2
: if things look like they're not connecting properly, you may need to use the group=
aesthetic. That's what's going on here. ggplot2
did not know what basis to use for connecting the labels, so likely it was trying to connect them all following a different grouping than the one intended. In this case, the fix is to let ggplot2
know that you want the points to maintain a grouping based on location
. As such, add the group=
aesthetic here:
g <- g + geom_text(aes(label=location, group=location))
And it fixes your issue:
Upvotes: 5