johnny
johnny

Reputation: 473

Panel data moving average in ggplot

I'm working with panel data that looks like this:

library(dplyr)
library(ggplot2)

df <- tibble(id = rep(1:5, each = 12),
             year = rep(2009:2020, 5),
             moved = c(1, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0,
                       1, 0, 1, 0 , 1, 0, 1, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 0, 1, 0 , 1, 0),
             occupation = c('carpenter','carpenter', 'carpenter', 'carpenter', 'carpenter', 'cleaner',
                            'cleaner', 'cleaner', 'cleaner', 'cleaner', 'mechanic', 'mechanic',
                            'mechanic', 'mechanic', 'mechanic', 'carpenter', 'carpenter', 'carpenter',
                            'carpenter', 'carpenter', 'cleaner', 'mechanic', 'carpenter', 'cleaner',
                            'cleaner', 'carpenter', 'cleaner', 'mechanic', 'mechanic', 'mechanic',
                            'carpenter','carpenter', 'carpenter', 'carpenter', 'carpenter', 'cleaner',
                            'cleaner', 'cleaner', 'cleaner', 'cleaner', 'mechanic', 'mechanic',
                            'mechanic', 'mechanic', 'mechanic', 'carpenter', 'carpenter', 'carpenter',
                            'carpenter', 'carpenter', 'cleaner', 'mechanic', 'carpenter', 'cleaner',
                            'cleaner', 'carpenter', 'cleaner', 'mechanic', 'mechanic', 'mechanic'))

To visualise moves by occupation over time, I have used the following code:

ggplot(df,aes(x=year,y=moved,col=occupation)) +
  stat_summary(fun=mean,geom="line",alpha=0.7, size=1)

Now I would like to remake the panel so that I get the 2 years moving average of "moved". Is it possible to do this by using ggplot, or do I need to remake the panel? What is the best solution?

Upvotes: 0

Views: 140

Answers (1)

Peter
Peter

Reputation: 12699

This is a possible solution. It's generally better to carry out the calculations in the dataframe. Here's one way of doing it. I've made some assumptions about what the final graph should look like as there are no requirements in the question, so I've opted for clarity.

library(dplyr)
library(tidyr)
library(zoo)
library(ggplot2)

#Data

df <- tibble(id = rep(1:5, each = 12),
             year = rep(2009:2020, 5),
             moved = c(1, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0,
                       1, 0, 1, 0 , 1, 0, 1, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 0, 1, 0 , 1, 0),
             occupation = c('carpenter','carpenter', 'carpenter', 'carpenter', 'carpenter', 'cleaner',
                            'cleaner', 'cleaner', 'cleaner', 'cleaner', 'mechanic', 'mechanic',
                            'mechanic', 'mechanic', 'mechanic', 'carpenter', 'carpenter', 'carpenter',
                            'carpenter', 'carpenter', 'cleaner', 'mechanic', 'carpenter', 'cleaner',
                            'cleaner', 'carpenter', 'cleaner', 'mechanic', 'mechanic', 'mechanic',
                            'carpenter','carpenter', 'carpenter', 'carpenter', 'carpenter', 'cleaner',
                            'cleaner', 'cleaner', 'cleaner', 'cleaner', 'mechanic', 'mechanic',
                            'mechanic', 'mechanic', 'mechanic', 'carpenter', 'carpenter', 'carpenter',
                            'carpenter', 'carpenter', 'cleaner', 'mechanic', 'carpenter', 'cleaner',
                            'cleaner', 'carpenter', 'cleaner', 'mechanic', 'mechanic', 'mechanic'))


#data wrangle

df1 <- 
  df %>% 
  group_by(occupation, year) %>%
  summarise(m_moves = mean(moved)) %>% 
  mutate(m_avg = rollmean(m_moves, 2, fill = NA, align = "right")) %>% 
  pivot_longer(cols = c(m_moves, m_avg), names_to = "avg", values_to = "val")

# Plot

ggplot(df1,aes(year, val))+
  geom_line(aes(linetype = avg, colour = occupation)) +
  scale_linetype_discrete(labels = c("2 year moving average", "Average moves"))+
  facet_wrap(~occupation, ncol = 1)+
  guides(colour = FALSE)+
  labs(linetype = "Average measure:")+
  theme(legend.position = "bottom")
#> Warning: Removed 3 row(s) containing missing values (geom_path).

Created on 2020-05-11 by the reprex package (v0.3.0)

Upvotes: 2

Related Questions