Mr.Spock
Mr.Spock

Reputation: 521

R Plotting time-series with time-gaps using facet_wrap

I want to plot a timeseries with 2 different dataframes. The problem is that there are big time-gaps in the data. I would like to have the gaps removed and to get 4 small plots instead of 1 big one with huge gaps. I know that facet_wrap could do the job, but I dont get it working in my Plot. I cant just cut out the gaps manually, I need something automatic.

library(ggplot2)
theme_set(theme_gray()) 
p1 = ggplot() + 
  geom_bar(data = Test1, aes(x = Test1$Date, y = Test1$G), stat="identity", color = "red") +
  geom_bar(data = Test2, aes(x = Test2$Date, y = Test2$G), stat="identity", color = "grey") +
  scale_x_date(date_labels ="%m/%y",date_breaks  = "1 month")

p1

Test1:

structure(list(Date = structure(c(17186, 17187, 17188, 17189, 
17436, 17437, 17438, 17453, 17454, 17455, 17456, 17457, 17458, 
17541, 17542, 17543, 17569, 17570, 17571, 17572), class = "Date"), 
    G = c(1L, 8L, 2L, 13L, 3L, 8L, 4L, 5L, 3L, 4L, 9L, 7L, 11L, 
    7L, 6L, 7L, 4L, 7L, 5L, 4L)), row.names = c("20", "21", "22", 
"23", "270", "271", "272", "287", "288", "289", "290", "291", 
"292", "375", "376", "377", "403", "404", "405", "406"), class = "data.frame", .Names = c("Date", 
"G"))

Test2:

structure(list(Date = structure(c(17167, 17168, 17169, 17170, 
17171, 17172, 17173, 17174, 17175, 17176, 17177, 17178, 17179, 
17180, 17181, 17182, 17183, 17184, 17185, 17190, 17191, 17192, 
17193, 17194, 17195, 17196, 17197, 17198, 17199, 17200, 17201, 
17202, 17203, 17204, 17205, 17206, 17207, 17208, 17209, 17210, 
17211, 17212, 17213, 17214, 17215, 17216, 17217, 17218, 17219, 
17220, 17221, 17222, 17223, 17224, 17225, 17410, 17411, 17412, 
17413, 17414, 17415, 17416, 17417, 17418, 17419, 17420, 17421, 
17422, 17423, 17424, 17425, 17426, 17427, 17428, 17429, 17430, 
17431, 17432, 17433, 17434, 17435, 17439, 17440, 17441, 17442, 
17443, 17444, 17445, 17446, 17447, 17448, 17449, 17450, 17451, 
17452, 17459, 17460, 17461, 17462, 17463, 17464, 17465, 17466, 
17467, 17468, 17469, 17470, 17532, 17533, 17534, 17535, 17536, 
17537, 17538, 17539, 17540, 17544, 17545, 17546, 17547, 17548, 
17549, 17550, 17551, 17552, 17553, 17554, 17555, 17556, 17557, 
17558, 17559, 17560, 17561, 17562, 17563, 17564, 17565, 17566, 
17567, 17568, 17573, 17574, 17575, 17576, 17577, 17578, 17579, 
17580, 17581, 17582, 17583, 17584, 17585, 17586, 17587, 17588, 
17589, 17590, 17775, 17776, 17777, 17778, 17779, 17780, 17781, 
17782, 17783, 17784, 17785, 17786, 17787, 17788, 17789, 17790, 
17791, 17792, 17793, 17794, 17795, 17796, 17797, 17798, 17799, 
17800, 17801, 17802, 17803, 17804, 17805, 17806, 17807, 17808, 
17809, 17810, 17811, 17812, 17813, 17814, 17815, 17816, 17817, 
17818, 17819, 17820, 17821, 17822, 17823, 17824, 17825, 17826, 
17827, 17828, 17829, 17830, 17831, 17832, 17833, 17834, 17835
), class = "Date"), G = c(3L, 9L, 7L, 2L, 3L, 4L, 4L, 5L, 2L, 
6L, 6L, 8L, 7L, 1L, 2L, 9L, 3L, 7L, 10L, 11L, 6L, 3L, 4L, 4L, 
2L, 9L, 3L, 3L, 2L, 6L, 1L, 3L, 7L, 5L, 2L, 6L, 7L, 6L, 2L, 2L, 
7L, 2L, 3L, 4L, 6L, 6L, 4L, 6L, 3L, 2L, 6L, 2L, 3L, 7L, 5L, 7L, 
4L, 5L, 4L, 2L, 4L, 7L, 4L, 4L, 7L, 6L, 4L, 1L, 8L, 4L, 3L, 4L, 
6L, 6L, 5L, 5L, 6L, 1L, 3L, 8L, 7L, 6L, 2L, 4L, 5L, 8L, 4L, 4L, 
2L, 5L, 7L, 7L, 4L, 5L, 5L, 5L, 4L, 1L, 13L, 3L, 6L, 8L, 3L, 
3L, 2L, 2L, 3L, 5L, 4L, 5L, 3L, 5L, 6L, 2L, 5L, 7L, 2L, 3L, 6L, 
7L, 2L, 5L, 6L, 2L, 5L, 6L, 5L, 5L, 8L, 3L, 3L, 4L, 5L, 8L, 10L, 
6L, 5L, 2L, 6L, 4L, 3L, 3L, 2L, 5L, 3L, 4L, 5L, 7L, 2L, 5L, 6L, 
7L, 6L, 5L, 4L, 4L, 7L, 2L, 5L, 3L, 3L, 6L, 6L, 3L, 5L, 4L, 5L, 
3L, 9L, 3L, 3L, 2L, 5L, 3L, 3L, 9L, 6L, 1L, 3L, 6L, 6L, 3L, 8L, 
9L, 4L, 3L, 5L, 6L, 2L, 10L, 4L, 7L, 4L, 3L, 5L, 3L, 4L, 4L, 
6L, 0L, 3L, 5L, 1L, 6L, 3L, 5L, 4L, 1L, 1L, 5L, 5L, 1L, 3L, 4L, 
11L, 3L, 1L, 7L, 1L, 6L)), row.names = c("1", "2", "3", "4", 
"5", "6", "7", "8", "9", "10", "11", "12", "13", "14", "15", 
"16", "17", "18", "19", "24", "25", "26", "27", "28", "29", "30", 
"31", "32", "33", "34", "35", "36", "37", "38", "39", "40", "41", 
"42", "43", "44", "45", "46", "47", "48", "49", "50", "51", "52", 
"53", "54", "55", "56", "57", "58", "59", "244", "245", "246", 
"247", "248", "249", "250", "251", "252", "253", "254", "255", 
"256", "257", "258", "259", "260", "261", "262", "263", "264", 
"265", "266", "267", "268", "269", "273", "274", "275", "276", 
"277", "278", "279", "280", "281", "282", "283", "284", "285", 
"286", "293", "294", "295", "296", "297", "298", "299", "300", 
"301", "302", "303", "304", "366", "367", "368", "369", "370", 
"371", "372", "373", "374", "378", "379", "380", "381", "382", 
"383", "384", "385", "386", "387", "388", "389", "390", "391", 
"392", "393", "394", "395", "396", "397", "398", "399", "400", 
"401", "402", "407", "408", "409", "410", "411", "412", "413", 
"414", "415", "416", "417", "418", "419", "420", "421", "422", 
"423", "424", "609", "610", "611", "612", "613", "614", "615", 
"616", "617", "618", "619", "620", "621", "622", "623", "624", 
"625", "626", "627", "628", "629", "630", "631", "632", "633", 
"634", "635", "636", "637", "638", "639", "640", "641", "642", 
"643", "644", "645", "646", "647", "648", "649", "650", "651", 
"652", "653", "654", "655", "656", "657", "658", "659", "660", 
"661", "662", "663", "664", "665", "666", "667", "668", "669"
), class = "data.frame", .Names = c("Date", "G"))

Upvotes: 1

Views: 525

Answers (2)

RyanFrost
RyanFrost

Reputation: 1428

Here's a solution that uses hierarchical clustering to find good breakpoints.

library(dplyr)
library(ggplot2)

df <- bind_rows(df1, df2, .id = "df_group")

df_clust <- df %>%
  mutate(clust = hclust(dist(Date)) %>%
           cutree(k = 4))

ggplot(data = df_clust, aes(x = Date, y = G, color = as.factor(clust))) +
  geom_point()

Here's the result of the clustering:

Now we can proceed to the desired plot:

theme_set(theme_gray())
ggplot(data = df_clust, aes(x = Date, y = G, fill = df_group)) +
  geom_bar(stat = "identity") +
  scale_fill_manual(values = c("1" = "red", "2" = "gray")) +
  facet_grid(. ~ clust, scales = "free")

Created on 2020-06-07 by the reprex package (v0.3.0)

You'll probably want to play around a bit with the date axis to make it look pretty.

Upvotes: 2

r2evans
r2evans

Reputation: 160447

  1. It's often better to not use $ indexing within aes; this is especially true when you are using data=, a more idiomatic way would be geom_bar(data = Test1, aes(Date, G), ...).

  2. We can use cumsum(diff(Date) > .) to get groups by gaps, but it isn't clear where the gaps should be. For instance,

    table(diff(Test1$Date))
    #   1  15  26  83 247 
    #  15   1   1   1   1 
    table(diff(Test2$Date))
    #   1   4   5   7  62 185 
    # 211   2   2   1   1   2 
    

    You say "4 facets", but there is no gap-width that gives 4 facets in both datasets. I'll use Test2 and a gap width of 10, but this means Test1 will only fill three of the four facets.

Since we're faceting, we need to add some grouping variable to both datasets.

mindates <- by(Test2$Date, cumsum(c(TRUE, diff(Test2$Date) > 10)), 
               min, simplify = FALSE)
mindates <- as.Date(unlist(mindates))
mindates
#            1            2            3            4 
# "2017-01-01" "2017-09-01" "2018-01-01" "2018-09-01" 
Test1$grp <- findInterval(Test1$Date, c(unlist(mindates), Inf), left.open = FALSE)
Test2$grp <- findInterval(Test2$Date, c(unlist(mindates), Inf), left.open = FALSE)ggplot() +
  theme_gray() +
  geom_bar(data = Test1, aes(x = Date, y = G), stat="identity", color = "red") +
  geom_bar(data = Test2, aes(x = Date, y = G), stat="identity", color = "grey") +
  scale_x_date(date_labels ="%m/%y",date_breaks  = "1 month") +
  facet_wrap(~ grp, scales = "free_x")

ggplot2 faceted plot

Upvotes: 3

Related Questions