Reputation: 21212
I'm finding this surprisingly tricky. I would like to create a trended bar chart showing counts by month on a datetime filed.
Some data:
structure(list(id = c("p036502", "p039565", "p233823", "p185307",
"p013780", "p063374", "p103285", "p181781", "p114989", "p191410",
"p030093", "p226941", "p225747", "p173555", "p055350", "p060293",
"p199435", "p074849", "p232007", "p037127", "p230221", "p116615",
"p106275", "p070918", "p144291", "p116102", "p070029", "p107356",
"p031939", "p044085", "p081434", "p010969", "p156550", "p048313",
"p139731", "p048657", "p234430", "p000139", "p240738", "p047586",
"p006436", "p211511", "p220000", "p183686", "p045806", "p246581",
"p228935", "p176012", "p229479", "p040831", "p091593", "p151958",
"p123997", "p017606", "p009578", "p245173", "p130035", "p207223",
"p069404", "p186381", "p056807", "p131333", "p164867", "p250930",
"p051288", "p183614", "p141508", "p240150", "p253358", "p055462",
"p100203", "p216335", "p215118", "p123326", "p216043", "p190639",
"p035608", "p194533", "p005330", "p217173", "p243195", "p204162",
"p088899", "p218286", "p024688", "p148544", "p061723", "p062414",
"p150704", "p064902", "p160114", "p054727", "p233164", "p164497",
"p164688", "p226536", "p037347", "p052732", "p254241", "p041330"
), project_submitted_datetime = structure(c(1479480359, 1493222248,
1483311464, 1471016531, 1470474551, 1478340111, 1472603443, 1470230761,
1473806157, 1474742339, 1489079773, 1473103712, 1481203503, 1479248019,
1486043216, 1479492186, 1473029329, 1485382089, 1461751084, 1463859966,
1471643202, 1475223157, 1468792461, 1466087276, 1477175132, 1472419257,
1484329230, 1476213378, 1477941424, 1477399077, 1481059184, 1483645109,
1490904308, 1472674295, 1484011440, 1485462104, 1472038851, 1470601996,
1487232805, 1480968888, 1484044249, 1482944283, 1488401525, 1474729990,
1470225344, 1475788577, 1482791082, 1464016378, 1488307956, 1476242919,
1485345902, 1487935618, 1469702976, 1488271812, 1488463835, 1481566737,
1491245287, 1484054316, 1485724308, 1474716986, 1479078128, 1487459508,
1463426662, 1487970888, 1470611130, 1485939126, 1462535198, 1477697562,
1490993867, 1492697645, 1472712448, 1482328987, 1490272369, 1464291065,
1492533451, 1469229589, 1481555129, 1470575655, 1472198934, 1476538940,
1472054115, 1487785453, 1481195539, 1485205528, 1465419012, 1488271923,
1471527805, 1463708265, 1466429074, 1469121081, 1474506890, 1471146637,
1486636925, 1485953226, 1480664635, 1464224006, 1474481164, 1477663071,
1464264088, 1484220504), class = c("POSIXct", "POSIXt"), tzone = "UTC")), .Names = c("id",
"project_submitted_datetime"), row.names = c(NA, 100L), class = "data.frame")
Tried:
library(tidyverse)
ggplot(aes(x = project_submitted_datetime, y = n)) +
geom_bar(stat = "identity")
Error: Column
y
must be a 1d atomic vector or a list
Tried with y = Count, y = count and similar errors.
I'm aware I could manipulate the data frame using dplyr:
df %>%
mutate(month = format(project_submitted_datetime, "%b-%Y")) %>%
group_by(month) %>%
summarise(Count = n()) %>%
ggplot(aes(x = month, y = Count)) +
geom_bar(stat = "identity") +
theme(axis.text.x=element_text(angle = -90, hjust = 0))
But then the dates are not in order on the chart.
I wondered what options there are to achieve this? Seeking a trended bar chart, grouped by month along the horizontal axis with each bar being the count of observations in each month?
Upvotes: 3
Views: 19752
Reputation: 915
Library zoo
has yearmon
option to convert a date into month-year
format. You can use that for your case.
library(tidyverse)
library(zoo)
df %>%
mutate(yrmn = as.factor(as.yearmon(project_submitted_datetime))) %>%
group_by(yrmn) %>%
summarise(cnt = n()) %>%
ggplot(aes(x = (yrmn), y = cnt)) +
geom_bar(stat = "identity") +
theme(axis.text.x=element_text(angle = -90, hjust = 0))
Without using zoo
this is how I was able to get it.
df1 <- df %>%
mutate(mn = month(project_submitted_datetime, label = TRUE),
yr = year(project_submitted_datetime)) %>%
group_by(yr,mn) %>%
summarise(cnt = n()) %>%
mutate(yr_mn = paste(mn, yr))
df1$yr_mn <- as_factor(df1$yr_mn)
df1 %>%
ggplot(aes(x = yr_mn, y = cnt)) +
geom_bar(stat = "identity") +
theme(axis.text.x=element_text(angle = -90, hjust = 0))
Upvotes: 8
Reputation: 28339
Solution without changing original data.frame (only parsing date column within ggplot2
aes
and applying geom_bar()
"count"):
library(ggplot2)
ggplot(df, aes(format(project_submitted_datetime, "%Y-%m"))) +
geom_bar(stat = "count") +
labs(x = "Month")
As an alternative you can use regex: sub("(\\d{4}-\\d{2}).*", "\\1", project_submitted_datetime)
Upvotes: 10