Reputation: 521
I want to plot a timeseries with 2 different dataframes. The problem is that there are big time-gaps in the data. I would like to have the gaps removed and to get 4 small plots instead of 1 big one with huge gaps. I know that facet_wrap could do the job, but I dont get it working in my Plot. I cant just cut out the gaps manually, I need something automatic.
library(ggplot2)
theme_set(theme_gray())
p1 = ggplot() +
geom_bar(data = Test1, aes(x = Test1$Date, y = Test1$G), stat="identity", color = "red") +
geom_bar(data = Test2, aes(x = Test2$Date, y = Test2$G), stat="identity", color = "grey") +
scale_x_date(date_labels ="%m/%y",date_breaks = "1 month")
p1
Test1
:
structure(list(Date = structure(c(17186, 17187, 17188, 17189,
17436, 17437, 17438, 17453, 17454, 17455, 17456, 17457, 17458,
17541, 17542, 17543, 17569, 17570, 17571, 17572), class = "Date"),
G = c(1L, 8L, 2L, 13L, 3L, 8L, 4L, 5L, 3L, 4L, 9L, 7L, 11L,
7L, 6L, 7L, 4L, 7L, 5L, 4L)), row.names = c("20", "21", "22",
"23", "270", "271", "272", "287", "288", "289", "290", "291",
"292", "375", "376", "377", "403", "404", "405", "406"), class = "data.frame", .Names = c("Date",
"G"))
Test2
:
structure(list(Date = structure(c(17167, 17168, 17169, 17170,
17171, 17172, 17173, 17174, 17175, 17176, 17177, 17178, 17179,
17180, 17181, 17182, 17183, 17184, 17185, 17190, 17191, 17192,
17193, 17194, 17195, 17196, 17197, 17198, 17199, 17200, 17201,
17202, 17203, 17204, 17205, 17206, 17207, 17208, 17209, 17210,
17211, 17212, 17213, 17214, 17215, 17216, 17217, 17218, 17219,
17220, 17221, 17222, 17223, 17224, 17225, 17410, 17411, 17412,
17413, 17414, 17415, 17416, 17417, 17418, 17419, 17420, 17421,
17422, 17423, 17424, 17425, 17426, 17427, 17428, 17429, 17430,
17431, 17432, 17433, 17434, 17435, 17439, 17440, 17441, 17442,
17443, 17444, 17445, 17446, 17447, 17448, 17449, 17450, 17451,
17452, 17459, 17460, 17461, 17462, 17463, 17464, 17465, 17466,
17467, 17468, 17469, 17470, 17532, 17533, 17534, 17535, 17536,
17537, 17538, 17539, 17540, 17544, 17545, 17546, 17547, 17548,
17549, 17550, 17551, 17552, 17553, 17554, 17555, 17556, 17557,
17558, 17559, 17560, 17561, 17562, 17563, 17564, 17565, 17566,
17567, 17568, 17573, 17574, 17575, 17576, 17577, 17578, 17579,
17580, 17581, 17582, 17583, 17584, 17585, 17586, 17587, 17588,
17589, 17590, 17775, 17776, 17777, 17778, 17779, 17780, 17781,
17782, 17783, 17784, 17785, 17786, 17787, 17788, 17789, 17790,
17791, 17792, 17793, 17794, 17795, 17796, 17797, 17798, 17799,
17800, 17801, 17802, 17803, 17804, 17805, 17806, 17807, 17808,
17809, 17810, 17811, 17812, 17813, 17814, 17815, 17816, 17817,
17818, 17819, 17820, 17821, 17822, 17823, 17824, 17825, 17826,
17827, 17828, 17829, 17830, 17831, 17832, 17833, 17834, 17835
), class = "Date"), G = c(3L, 9L, 7L, 2L, 3L, 4L, 4L, 5L, 2L,
6L, 6L, 8L, 7L, 1L, 2L, 9L, 3L, 7L, 10L, 11L, 6L, 3L, 4L, 4L,
2L, 9L, 3L, 3L, 2L, 6L, 1L, 3L, 7L, 5L, 2L, 6L, 7L, 6L, 2L, 2L,
7L, 2L, 3L, 4L, 6L, 6L, 4L, 6L, 3L, 2L, 6L, 2L, 3L, 7L, 5L, 7L,
4L, 5L, 4L, 2L, 4L, 7L, 4L, 4L, 7L, 6L, 4L, 1L, 8L, 4L, 3L, 4L,
6L, 6L, 5L, 5L, 6L, 1L, 3L, 8L, 7L, 6L, 2L, 4L, 5L, 8L, 4L, 4L,
2L, 5L, 7L, 7L, 4L, 5L, 5L, 5L, 4L, 1L, 13L, 3L, 6L, 8L, 3L,
3L, 2L, 2L, 3L, 5L, 4L, 5L, 3L, 5L, 6L, 2L, 5L, 7L, 2L, 3L, 6L,
7L, 2L, 5L, 6L, 2L, 5L, 6L, 5L, 5L, 8L, 3L, 3L, 4L, 5L, 8L, 10L,
6L, 5L, 2L, 6L, 4L, 3L, 3L, 2L, 5L, 3L, 4L, 5L, 7L, 2L, 5L, 6L,
7L, 6L, 5L, 4L, 4L, 7L, 2L, 5L, 3L, 3L, 6L, 6L, 3L, 5L, 4L, 5L,
3L, 9L, 3L, 3L, 2L, 5L, 3L, 3L, 9L, 6L, 1L, 3L, 6L, 6L, 3L, 8L,
9L, 4L, 3L, 5L, 6L, 2L, 10L, 4L, 7L, 4L, 3L, 5L, 3L, 4L, 4L,
6L, 0L, 3L, 5L, 1L, 6L, 3L, 5L, 4L, 1L, 1L, 5L, 5L, 1L, 3L, 4L,
11L, 3L, 1L, 7L, 1L, 6L)), row.names = c("1", "2", "3", "4",
"5", "6", "7", "8", "9", "10", "11", "12", "13", "14", "15",
"16", "17", "18", "19", "24", "25", "26", "27", "28", "29", "30",
"31", "32", "33", "34", "35", "36", "37", "38", "39", "40", "41",
"42", "43", "44", "45", "46", "47", "48", "49", "50", "51", "52",
"53", "54", "55", "56", "57", "58", "59", "244", "245", "246",
"247", "248", "249", "250", "251", "252", "253", "254", "255",
"256", "257", "258", "259", "260", "261", "262", "263", "264",
"265", "266", "267", "268", "269", "273", "274", "275", "276",
"277", "278", "279", "280", "281", "282", "283", "284", "285",
"286", "293", "294", "295", "296", "297", "298", "299", "300",
"301", "302", "303", "304", "366", "367", "368", "369", "370",
"371", "372", "373", "374", "378", "379", "380", "381", "382",
"383", "384", "385", "386", "387", "388", "389", "390", "391",
"392", "393", "394", "395", "396", "397", "398", "399", "400",
"401", "402", "407", "408", "409", "410", "411", "412", "413",
"414", "415", "416", "417", "418", "419", "420", "421", "422",
"423", "424", "609", "610", "611", "612", "613", "614", "615",
"616", "617", "618", "619", "620", "621", "622", "623", "624",
"625", "626", "627", "628", "629", "630", "631", "632", "633",
"634", "635", "636", "637", "638", "639", "640", "641", "642",
"643", "644", "645", "646", "647", "648", "649", "650", "651",
"652", "653", "654", "655", "656", "657", "658", "659", "660",
"661", "662", "663", "664", "665", "666", "667", "668", "669"
), class = "data.frame", .Names = c("Date", "G"))
Upvotes: 1
Views: 525
Reputation: 1428
Here's a solution that uses hierarchical clustering to find good breakpoints.
library(dplyr)
library(ggplot2)
df <- bind_rows(df1, df2, .id = "df_group")
df_clust <- df %>%
mutate(clust = hclust(dist(Date)) %>%
cutree(k = 4))
ggplot(data = df_clust, aes(x = Date, y = G, color = as.factor(clust))) +
geom_point()
Here's the result of the clustering:
Now we can proceed to the desired plot:
theme_set(theme_gray())
ggplot(data = df_clust, aes(x = Date, y = G, fill = df_group)) +
geom_bar(stat = "identity") +
scale_fill_manual(values = c("1" = "red", "2" = "gray")) +
facet_grid(. ~ clust, scales = "free")
Created on 2020-06-07 by the reprex package (v0.3.0)
You'll probably want to play around a bit with the date axis to make it look pretty.
Upvotes: 2
Reputation: 160447
It's often better to not use $
indexing within aes
; this is especially true when you are using data=
, a more idiomatic way would be geom_bar(data = Test1, aes(Date, G), ...)
.
We can use cumsum(diff(Date) > .)
to get groups by gaps, but it isn't clear where the gaps should be. For instance,
table(diff(Test1$Date))
# 1 15 26 83 247
# 15 1 1 1 1
table(diff(Test2$Date))
# 1 4 5 7 62 185
# 211 2 2 1 1 2
You say "4 facets", but there is no gap-width that gives 4 facets in both datasets. I'll use Test2
and a gap width of 10, but this means Test1
will only fill three of the four facets.
Since we're faceting, we need to add some grouping variable to both datasets.
mindates <- by(Test2$Date, cumsum(c(TRUE, diff(Test2$Date) > 10)),
min, simplify = FALSE)
mindates <- as.Date(unlist(mindates))
mindates
# 1 2 3 4
# "2017-01-01" "2017-09-01" "2018-01-01" "2018-09-01"
Test1$grp <- findInterval(Test1$Date, c(unlist(mindates), Inf), left.open = FALSE)
Test2$grp <- findInterval(Test2$Date, c(unlist(mindates), Inf), left.open = FALSE)ggplot() +
theme_gray() +
geom_bar(data = Test1, aes(x = Date, y = G), stat="identity", color = "red") +
geom_bar(data = Test2, aes(x = Date, y = G), stat="identity", color = "grey") +
scale_x_date(date_labels ="%m/%y",date_breaks = "1 month") +
facet_wrap(~ grp, scales = "free_x")
Upvotes: 3