Timeseries and its visualisation

Question

I have a quarter of a million events lieke this:

                 Slot Anzahl Nutzung TimeSlotNr WochenSlots Tag
1 2011-01-01 00:00:00      2   Firma          1         242   1
2 2011-01-01 00:00:00     50  Privat          1         242   1
3 2011-01-01 00:30:00      1   Firma          2         243   1
4 2011-01-01 00:30:00     49  Privat          2         243   1
5 2011-01-01 01:00:00      1   Firma          3         244   1
6 2011-01-01 01:00:00     48  Privat          3         244   1

A slot represents half 30 minutes, "Anzahl" is the number of events in a slot, the first slot starts at 2011-01-01 00:00:00 "WochenSlots" is the TimeSlotNr %% 336 and starts on a saturday 00:00:00. So i want to see the distribution in a week.

What I want to do now is:

show the dates in x-scale (monday 00:00 - sunday 24:00)
show lines (envelopes) which shows the distribution for x% of the events.

And i have no idea, how to do that.

  ggplot(data=PB2) + 
    geom_point(mapping = aes(x = WochenSlots, y = Anzahl, colour = Nutzung), alpha=0.6) +
    scale_y_continuous(labels = scales::percent) +
    facet_wrap(~Nutzung,
               shrink = TRUE,
               nrow = 2,
               scales = "free_y")

dput(PB2[1:100, ]) structure(list(Slot = structure(c(1293840000, 1293840000, 1293841800, 1293841800, 1293843600, 1293843600, 1293845400, 1293845400, 1293847200, 1293847200, 1293849000, 1293849000, 1293850800, 1293850800, 1293852600, 1293852600, 1293854400, 1293854400, 1293856200, 1293856200, 1293858000, 1293858000, 1293859800, 1293859800, 1293861600, 1293861600, 1293863400, 1293863400, 1293865200, 1293865200, 1293867000, 1293867000, 1293868800, 1293868800, 1293870600, 1293870600, 1293872400, 1293872400, 1293874200, 1293874200, 1293876000, 1293876000, 1293877800, 1293877800, 1293879600, 1293879600, 1293881400, 1293881400, 1293883200, 1293883200, 1293885000, 1293885000, 1293886800, 1293886800, 1293888600, 1293888600, 1293890400, 1293890400, 1293892200, 1293892200, 1293894000, 1293894000, 1293895800, 1293895800, 1293897600, 1293897600, 1293899400, 1293899400, 1293901200, 1293901200, 1293903000, 1293903000, 1293904800, 1293904800, 1293906600, 1293906600, 1293908400, 1293908400, 1293910200, 1293910200, 1293912000, 1293912000, 1293913800, 1293913800, 1293915600, 1293915600, 1293917400, 1293917400, 1293919200, 1293919200, 1293921000, 1293921000, 1293922800, 1293922800, 1293924600, 1293924600, 1293926400, 1293926400, 1293928200, 1293928200), class = c("POSIXct", "POSIXt"), tzone = "UTC"), Anzahl = c(2L, 50L, 1L, 49L, 1L, 48L, 1L, 43L, 1L, 43L, 1L, 30L, 1L, 27L, 0L, 22L, 0L, 19L, 0L, 20L, 0L, 18L, 0L, 17L, 0L, 17L, 0L, 17L, 0L, 17L, 0L, 18L, 0L, 19L, 2L, 19L, 2L, 19L, 2L, 20L, 2L, 21L, 2L, 21L, 2L, 20L, 2L, 18L, 2L, 22L, 2L, 24L, 3L, 25L, 1L, 28L, 1L, 30L, 1L, 33L, 1L, 32L, 1L, 28L, 2L, 24L, 2L, 25L, 2L, 25L, 2L, 22L, 2L, 20L, 1L, 15L, 2L, 14L, 1L, 13L, 1L, 11L, 1L, 12L, 1L, 11L, 1L, 9L, 1L, 8L, 1L, 7L, 1L, 5L, 1L, 4L, 1L, 3L, 0L, 3L), Nutzung = c("Firma", "Privat", "Firma", "Privat", "Firma", "Privat", "Firma", "Privat", "Firma", "Privat", "Firma", "Privat", "Firma", "Privat", "Firma", "Privat", "Firma", "Privat", "Firma", "Privat", "Firma", "Privat", "Firma", "Privat", "Firma", "Privat", "Firma", "Privat", "Firma", "Privat", "Firma", "Privat", "Firma", "Privat", "Firma", "Privat", "Firma", "Privat", "Firma", "Privat", "Firma", "Privat", "Firma", "Privat", "Firma", "Privat", "Firma", "Privat", "Firma", "Privat", "Firma", "Privat", "Firma", "Privat", "Firma", "Privat", "Firma", "Privat", "Firma", "Privat", "Firma", "Privat", "Firma", "Privat", "Firma", "Privat", "Firma", "Privat", "Firma", "Privat", "Firma", "Privat", "Firma", "Privat", "Firma", "Privat", "Firma", "Privat", "Firma", "Privat", "Firma", "Privat", "Firma", "Privat", "Firma", "Privat", "Firma", "Privat", "Firma", "Privat", "Firma", "Privat", "Firma", "Privat", "Firma", "Privat", "Firma", "Privat", "Firma", "Privat"), TimeSlotNr = c(1L, 1L, 2L, 2L, 3L, 3L, 4L, 4L, 5L, 5L, 6L, 6L, 7L, 7L, 8L, 8L, 9L, 9L, 10L, 10L, 11L, 11L, 12L, 12L, 13L, 13L, 14L, 14L, 15L, 15L, 16L, 16L, 17L, 17L, 18L, 18L, 19L, 19L, 20L, 20L, 21L, 21L, 22L, 22L, 23L, 23L, 24L, 24L, 25L, 25L, 26L, 26L, 27L, 27L, 28L, 28L, 29L, 29L, 30L, 30L, 31L, 31L, 32L, 32L, 33L, 33L, 34L, 34L, 35L, 35L, 36L, 36L, 37L, 37L, 38L, 38L, 39L, 39L, 40L, 40L, 41L, 41L, 42L, 42L, 43L, 43L, 44L, 44L, 45L, 45L, 46L, 46L, 47L, 47L, 48L, 48L, 49L, 49L, 50L, 50L), WochenSlots = c(242, 242, 243, 243, 244, 244, 245, 245, 246, 246, 247, 247, 248, 248, 249, 249, 250, 250, 251, 251, 252, 252, 253, 253, 254, 254, 255, 255, 256, 256, 257, 257, 258, 258, 259, 259, 260, 260, 261, 261, 262, 262, 263, 263, 264, 264, 265, 265, 266, 266, 267, 267, 268, 268, 269, 269, 270, 270, 271, 271, 272, 272, 273, 273, 274, 274, 275, 275, 276, 276, 277, 277, 278, 278, 279, 279, 280, 280, 281, 281, 282, 282, 283, 283, 284, 284, 285, 285, 286, 286, 287, 287, 288, 288, 289, 289, 290, 290, 291, 291), Tag = c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L)), .Names = c("Slot", "Anzahl", "Nutzung", "TimeSlotNr", "WochenSlots", "Tag"), row.names = c(NA, 100L), class = "data.frame")

eipi10 · Accepted Answer

It looks like quantile regression might be what you need. The data sample you posted only has one observation at each time point, so I've created some fake data for illustration. In the plot below, we use a flexible spline function for the regression function and we draw regression lines at the 25th and 75th percentiles of the data. Let me know if this is what you had in mind.

library(ggplot2)
library(quantreg)
library(splines)

# Fake data
set.seed(2)
dat = data.frame(x=runif(1e4,0,20))
dat$y = cos(dat$x) + 10 + rnorm(1e4, 2)

ggplot(dat, aes(x,y)) +
  geom_point(alpha=0.1, colour="blue", size=0.5) +
  geom_quantile(formula=y ~ ns(x, 10), quantiles=c(0.25, 0.75),
                colour="red", size=1) +
  theme_classic()

Timeseries and its visualisation

Answers (1)

Related Questions