Rüdiger Kladt
Rüdiger Kladt

Reputation: 119

Timeseries and its visualisation

I have a quarter of a million events lieke this:

                 Slot Anzahl Nutzung TimeSlotNr WochenSlots Tag
1 2011-01-01 00:00:00      2   Firma          1         242   1
2 2011-01-01 00:00:00     50  Privat          1         242   1
3 2011-01-01 00:30:00      1   Firma          2         243   1
4 2011-01-01 00:30:00     49  Privat          2         243   1
5 2011-01-01 01:00:00      1   Firma          3         244   1
6 2011-01-01 01:00:00     48  Privat          3         244   1

A slot represents half 30 minutes, "Anzahl" is the number of events in a slot, the first slot starts at 2011-01-01 00:00:00 "WochenSlots" is the TimeSlotNr %% 336 and starts on a saturday 00:00:00. So i want to see the distribution in a week.

You see the development of bookings since 2011 by customertype. The peeks and holes are errors.

And the same data agregated to weeks (monday 00:00 - sunday 23:00)

What I want to do now is:

And i have no idea, how to do that.

  ggplot(data=PB2) + 
    geom_point(mapping = aes(x = WochenSlots, y = Anzahl, colour = Nutzung), alpha=0.6) +
    scale_y_continuous(labels = scales::percent) +
    facet_wrap(~Nutzung,
               shrink = TRUE,
               nrow = 2,
               scales = "free_y")

dput(PB2[1:100, ]) structure(list(Slot = structure(c(1293840000, 1293840000, 1293841800, 1293841800, 1293843600, 1293843600, 1293845400, 1293845400, 1293847200, 1293847200, 1293849000, 1293849000, 1293850800, 1293850800, 1293852600, 1293852600, 1293854400, 1293854400, 1293856200, 1293856200, 1293858000, 1293858000, 1293859800, 1293859800, 1293861600, 1293861600, 1293863400, 1293863400, 1293865200, 1293865200, 1293867000, 1293867000, 1293868800, 1293868800, 1293870600, 1293870600, 1293872400, 1293872400, 1293874200, 1293874200, 1293876000, 1293876000, 1293877800, 1293877800, 1293879600, 1293879600, 1293881400, 1293881400, 1293883200, 1293883200, 1293885000, 1293885000, 1293886800, 1293886800, 1293888600, 1293888600, 1293890400, 1293890400, 1293892200, 1293892200, 1293894000, 1293894000, 1293895800, 1293895800, 1293897600, 1293897600, 1293899400, 1293899400, 1293901200, 1293901200, 1293903000, 1293903000, 1293904800, 1293904800, 1293906600, 1293906600, 1293908400, 1293908400, 1293910200, 1293910200, 1293912000, 1293912000, 1293913800, 1293913800, 1293915600, 1293915600, 1293917400, 1293917400, 1293919200, 1293919200, 1293921000, 1293921000, 1293922800, 1293922800, 1293924600, 1293924600, 1293926400, 1293926400, 1293928200, 1293928200), class = c("POSIXct", "POSIXt"), tzone = "UTC"), Anzahl = c(2L, 50L, 1L, 49L, 1L, 48L, 1L, 43L, 1L, 43L, 1L, 30L, 1L, 27L, 0L, 22L, 0L, 19L, 0L, 20L, 0L, 18L, 0L, 17L, 0L, 17L, 0L, 17L, 0L, 17L, 0L, 18L, 0L, 19L, 2L, 19L, 2L, 19L, 2L, 20L, 2L, 21L, 2L, 21L, 2L, 20L, 2L, 18L, 2L, 22L, 2L, 24L, 3L, 25L, 1L, 28L, 1L, 30L, 1L, 33L, 1L, 32L, 1L, 28L, 2L, 24L, 2L, 25L, 2L, 25L, 2L, 22L, 2L, 20L, 1L, 15L, 2L, 14L, 1L, 13L, 1L, 11L, 1L, 12L, 1L, 11L, 1L, 9L, 1L, 8L, 1L, 7L, 1L, 5L, 1L, 4L, 1L, 3L, 0L, 3L), Nutzung = c("Firma", "Privat", "Firma", "Privat", "Firma", "Privat", "Firma", "Privat", "Firma", "Privat", "Firma", "Privat", "Firma", "Privat", "Firma", "Privat", "Firma", "Privat", "Firma", "Privat", "Firma", "Privat", "Firma", "Privat", "Firma", "Privat", "Firma", "Privat", "Firma", "Privat", "Firma", "Privat", "Firma", "Privat", "Firma", "Privat", "Firma", "Privat", "Firma", "Privat", "Firma", "Privat", "Firma", "Privat", "Firma", "Privat", "Firma", "Privat", "Firma", "Privat", "Firma", "Privat", "Firma", "Privat", "Firma", "Privat", "Firma", "Privat", "Firma", "Privat", "Firma", "Privat", "Firma", "Privat", "Firma", "Privat", "Firma", "Privat", "Firma", "Privat", "Firma", "Privat", "Firma", "Privat", "Firma", "Privat", "Firma", "Privat", "Firma", "Privat", "Firma", "Privat", "Firma", "Privat", "Firma", "Privat", "Firma", "Privat", "Firma", "Privat", "Firma", "Privat", "Firma", "Privat", "Firma", "Privat", "Firma", "Privat", "Firma", "Privat"), TimeSlotNr = c(1L, 1L, 2L, 2L, 3L, 3L, 4L, 4L, 5L, 5L, 6L, 6L, 7L, 7L, 8L, 8L, 9L, 9L, 10L, 10L, 11L, 11L, 12L, 12L, 13L, 13L, 14L, 14L, 15L, 15L, 16L, 16L, 17L, 17L, 18L, 18L, 19L, 19L, 20L, 20L, 21L, 21L, 22L, 22L, 23L, 23L, 24L, 24L, 25L, 25L, 26L, 26L, 27L, 27L, 28L, 28L, 29L, 29L, 30L, 30L, 31L, 31L, 32L, 32L, 33L, 33L, 34L, 34L, 35L, 35L, 36L, 36L, 37L, 37L, 38L, 38L, 39L, 39L, 40L, 40L, 41L, 41L, 42L, 42L, 43L, 43L, 44L, 44L, 45L, 45L, 46L, 46L, 47L, 47L, 48L, 48L, 49L, 49L, 50L, 50L), WochenSlots = c(242, 242, 243, 243, 244, 244, 245, 245, 246, 246, 247, 247, 248, 248, 249, 249, 250, 250, 251, 251, 252, 252, 253, 253, 254, 254, 255, 255, 256, 256, 257, 257, 258, 258, 259, 259, 260, 260, 261, 261, 262, 262, 263, 263, 264, 264, 265, 265, 266, 266, 267, 267, 268, 268, 269, 269, 270, 270, 271, 271, 272, 272, 273, 273, 274, 274, 275, 275, 276, 276, 277, 277, 278, 278, 279, 279, 280, 280, 281, 281, 282, 282, 283, 283, 284, 284, 285, 285, 286, 286, 287, 287, 288, 288, 289, 289, 290, 290, 291, 291), Tag = c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L)), .Names = c("Slot", "Anzahl", "Nutzung", "TimeSlotNr", "WochenSlots", "Tag"), row.names = c(NA, 100L), class = "data.frame")

Upvotes: 2

Views: 247

Answers (1)

eipi10
eipi10

Reputation: 93871

It looks like quantile regression might be what you need. The data sample you posted only has one observation at each time point, so I've created some fake data for illustration. In the plot below, we use a flexible spline function for the regression function and we draw regression lines at the 25th and 75th percentiles of the data. Let me know if this is what you had in mind.

library(ggplot2)
library(quantreg)
library(splines)

# Fake data
set.seed(2)
dat = data.frame(x=runif(1e4,0,20))
dat$y = cos(dat$x) + 10 + rnorm(1e4, 2)

ggplot(dat, aes(x,y)) +
  geom_point(alpha=0.1, colour="blue", size=0.5) +
  geom_quantile(formula=y ~ ns(x, 10), quantiles=c(0.25, 0.75),
                colour="red", size=1) +
  theme_classic()

enter image description here

Upvotes: 3

Related Questions