Reputation: 1299
I have an R-script that downloads covid testing data and does some manipulation to come up with weekly totals for the percent of the population tested in selected states and the rate of positive tests, aggregated by week. The following R-script uses a big dput in place of downloading and manipulating the data for purposes of reproducibility
stateWeekly <- structure(list(Date = structure(c(1586044800, 1586649600, 1587254400,
1587859200, 1588464000, 1589068800, 1589673600, 1586044800, 1586649600,
1587254400, 1587859200, 1588464000, 1589068800, 1589673600, 1586044800,
1586649600, 1587254400, 1587859200, 1588464000, 1589068800, 1589673600,
1586044800, 1586649600, 1587254400, 1587859200, 1588464000, 1589068800,
1589673600, 1586044800, 1586649600, 1587254400, 1587859200, 1588464000,
1589068800, 1589673600, 1586044800, 1586649600, 1587254400, 1587859200,
1588464000, 1589068800, 1589673600, 1586044800, 1586649600, 1587254400,
1587859200, 1588464000, 1589068800, 1589673600, 1586044800, 1586649600,
1587254400, 1587859200, 1588464000, 1589068800, 1589673600, 1586044800,
1586649600, 1587254400, 1587859200, 1588464000, 1589068800, 1589673600
), class = c("POSIXct", "POSIXt"), tzone = "UTC"), positiveIncrease = c(306L,
631L, 624L, 862L, 1072L, 1046L, 1079L, 2096L, 1675L, 1762L, 2597L,
3510L, 1808L, 2403L, 7730L, 8356L, 8539L, 11831L, 11452L, 13064L,
12159L, 3996L, 5805L, 5849L, 5100L, 5201L, 4839L, 4260L, 823L,
718L, 629L, 654L, 719L, 617L, 721L, 886L, 698L, 766L, 1054L,
1052L, 1076L, 987L, 2390L, 2561L, 4998L, 4361L, 3951L, 4167L,
3842L, 7905L, 7204L, 6641L, 5532L, 4550L, 4518L, 4992L, 7546L,
12979L, 14439L, 15118L, 13149L, 9706L, 8217L), totalTestResultsIncrease = c(5822L,
11864L, 7940L, 17103L, 21209L, 22109L, 31531L, 24726L, 25377L,
26421L, 51123L, 56386L, 57262L, 63411L, 90276L, 73795L, 90572L,
245184L, 189667L, 239913L, 279579L, 15268L, 26621L, 25480L, 43290L,
51873L, 68451L, 107628L, 1019L, 20107L, 12805L, 17579L, 10632L,
32192L, 27504L, 16899L, 13342L, 19321L, 32147L, 26400L, 24408L,
24242L, 23091L, 19487L, 24056L, 29087L, 33780L, 54696L, 59600L,
70088L, 69349L, 73326L, 88534L, 83576L, 110759L, 112844L, 32877L,
44877L, 52271L, 66985L, 78546L, 73743L, 72437L), posRate = c(5.25592579869461,
5.3186109238031, 7.85894206549118, 5.04005145296147, 5.05445801310764,
4.73110497987245, 3.42202911420507, 8.47690689961983, 6.60046498798124,
6.66893758752507, 5.07990532636974, 6.22494945553861, 3.15741678600119,
3.78956332497516, 8.56263015640923, 11.3232603834948, 9.42785849931546,
4.82535565126599, 6.03795072416393, 5.44530725721407, 4.34903909091885,
26.1723866911187, 21.8060929341497, 22.9552590266876, 11.7810117810118,
10.0264106567964, 7.0692904413376, 3.95807782361467, 80.765456329735,
3.5708957079624, 4.91214369386958, 3.72034814267023, 6.76260346124906,
1.91662524850895, 2.62143688190809, 5.24291378188058, 5.23159946035077,
3.96459810568811, 3.27868852459016, 3.98484848484848, 4.40839069157653,
4.07144625030938, 10.3503529513663, 13.1420947298199, 20.7765214499501,
14.9929521779489, 11.696269982238, 7.61847301448003, 6.44630872483221,
11.2786782330784, 10.3880373184905, 9.05681477238633, 6.24844692434545,
5.44414664496985, 4.07912675267924, 4.42380631668498, 22.9522158347781,
28.9212737036789, 27.6233475540931, 22.5692319175935, 16.740508746467,
13.1619272337713, 11.3436503444372), dailyTest = c(0.138828678924223,
0.282903374571794, 0.189333512651723, 0.407830109179146, 0.505739857661259,
0.527200835165862, 0.751873424108499, 0.181032142393795, 0.185798458202998,
0.193442135168909, 0.374298560850844, 0.412831771455816, 0.419245431438707,
0.464265517323178, 0.114238067546845, 0.0933824958418563, 0.114612635183801,
0.310263484795578, 0.240010540535773, 0.303593396909103, 0.353787991123658,
0.0719006862588031, 0.125364695369112, 0.11999145178637, 0.203863027779905,
0.244282440287064, 0.32235223179862, 0.506846152781141, 0.0128760104635591,
0.254070600972309, 0.161803055923331, 0.222126975406188, 0.134345184738529,
0.406775788854657, 0.347538559165584, 0.263556166362753, 0.208081328576357,
0.30132958697525, 0.501363399021447, 0.411733403868672, 0.380666247031309,
0.378077317294862, 0.0987715050773798, 0.0833554336946386, 0.102899282237298,
0.124419330829576, 0.144493588043562, 0.233961553926307, 0.254938361379405,
0.163164303576303, 0.161443917485348, 0.170702341685253, 0.206106444082074,
0.194564259726246, 0.257846066370959, 0.262699929699297, 0.238498263983345,
0.325549368640101, 0.379187357626105, 0.48592652045273, 0.569793005530792,
0.534950800891926, 0.525476739001782), state = c("NM 3.28%",
"NM 3.28%", "NM 3.28%", "NM 3.28%", "NM 3.28%", "NM 3.28%", "NM 3.28%",
"TN 2.53%", "TN 2.53%", "TN 2.53%", "TN 2.53%", "TN 2.53%", "TN 2.53%",
"TN 2.53%", "CA 1.69%", "CA 1.69%", "CA 1.69%", "CA 1.69%", "CA 1.69%",
"CA 1.69%", "CA 1.69%", "GA 1.78%", "GA 1.78%", "GA 1.78%", "GA 1.78%",
"GA 1.78%", "GA 1.78%", "GA 1.78%", "OK 1.76%", "OK 1.76%", "OK 1.76%",
"OK 1.76%", "OK 1.76%", "OK 1.76%", "OK 1.76%", "UT 2.71%", "UT 2.71%",
"UT 2.71%", "UT 2.71%", "UT 2.71%", "UT 2.71%", "UT 2.71%", "OH 1.19%",
"OH 1.19%", "OH 1.19%", "OH 1.19%", "OH 1.19%", "OH 1.19%", "OH 1.19%",
"FL 1.67%", "FL 1.67%", "FL 1.67%", "FL 1.67%", "FL 1.67%", "FL 1.67%",
"FL 1.67%", "MA 3.46%", "MA 3.46%", "MA 3.46%", "MA 3.46%", "MA 3.46%",
"MA 3.46%", "MA 3.46%")), row.names = c(NA, -63L), class = "data.frame")
g <- stateWeekly %>% ggplot(aes(x = Date)) +
geom_col(aes(y=100*dailyTest), size=0.75, color="darkblue", fill="white") +
geom_line(aes(y=posRate), size = 0.75, color="red") +
scale_y_continuous(name = "Test Positivity Rate",
sec.axis = sec_axis(~./100, name="Percent of Pop Tested")) +
labs(x = "Week Ending",
title = "COVID-19 Testing",
subtitle = paste("Data as of", format(max(stateWeekly$Date), "%A, %B %e, %y")),
caption = "HQ AFMC/A9A \n Data: The COVID Tracking Project (https://covidtracking.com)") +
theme(plot.title = element_text(size = rel(1), face = "bold"),
plot.subtitle = element_text(size = rel(0.7)),
plot.caption = element_text(size = rel(1)),
axis.text.y = element_text(color='red'),
axis.title.y = element_text(color="red"),
axis.text.y.right = element_text(color="blue"),
axis.title.y.right = element_text(color="blue")) +
coord_cartesian(ylim=c(0,75)) +
facet_wrap(~ state)
print(g)
which produces this chart
Hopefully it's clear that the line plot is the test positivity rate and the columns are the percent of the population tested.
How can I align the dates on the X-axis with the centers of the corresponding columns? You'll notice that right now they seem somewhat irregular.
Upvotes: 0
Views: 58
Reputation: 951
What a nice plot! Here is a solution.
1) Use as.Date()
to tell R that this column contains dates.
2) Use the scale_x_date()
function to fin-tune the x axis.
3) Adjust the x axis labels with axis.text.x = element_text(angle = 45,hjust = 1))
to avoid overlapping.
Code:
stateWeekly %>% ggplot(aes(x = as.Date(Date))) +
geom_col(aes(y=100*dailyTest), size=0.75, color="darkblue", fill="white") +
geom_line(aes(y=posRate), size = 0.75, color="red") +
scale_y_continuous(name = "Test Positivity Rate",
sec.axis = sec_axis(~./100, name="Percent of Pop Tested")) +
scale_x_date(date_breaks = '1 week') +
labs(x = "Week Ending",
title = "COVID-19 Testing",
subtitle = paste("Data as of", format(max(stateWeekly$Date), "%A, %B %e, %y")),
caption = "HQ AFMC/A9A \n Data: The COVID Tracking Project (https://covidtracking.com)") +
theme(plot.title = element_text(size = rel(1), face = "bold"),
plot.subtitle = element_text(size = rel(0.7)),
plot.caption = element_text(size = rel(1)),
axis.text.y = element_text(color='red'),
axis.title.y = element_text(color="red"),
axis.text.y.right = element_text(color="blue"),
axis.title.y.right = element_text(color="blue"),
axis.text.x = element_text(angle = 45,hjust = 1)) +
coord_cartesian(ylim=c(0,75)) +
facet_wrap(~ state)
Upvotes: 1