Reputation: 1441
I'm collecting time series data from Wikipedia and want to run a change-point analysis on each time series using dplyr. But when I do so I get an error saying the data need to be numeric, even though the class
function states it is numeric. Hope you can help.
library(changepoint)
library(dplyr)
library(pageviews)
library(data.table)
articles <- c("Rugby_union", "Football")
foo <- function(x){article_pageviews(project = "en.wikipedia",
article = x,
start = as.Date('2017-01-01'),
end = as.Date("2017-12-31")
, user_type = "user", platform = c("mobile-web"))
}
output<-articles %>% foo
output %>%
select(article, views) %>%
do(cpt.mean(.))
class(output$views)
Upvotes: 0
Views: 506
Reputation: 1599
library(changepoint)
library(dplyr)
library(pageviews)
articles <- c("Rugby_union", "Football")
foo <- function(x){article_pageviews(project = "en.wikipedia", article = x,
start = as.Date('2017-01-01'),
end = as.Date("2017-12-31"),
user_type = "user", platform = c("mobile-web"))
}
output <- articles %>%
foo
df <- as.data.frame(table(output$article))
output1 <- output %>%
dplyr::select(article, views) %>%
dplyr::filter(article == df[1,1])
output2 <- output %>%
dplyr::select(article, views) %>%
dplyr::filter(article == df[2,1])
q <- floor((min(length(output1$views), length(output2$views)))/2 + 1)
cp1 <- changepoint::cpt.mean(data = output1$views, Q = q, method = "BinSeg", penalty
= "SIC")
plot(cp1)
cp2 <- changepoint::cpt.mean(data = output2$views, Q = q, method = "BinSeg", penalty
= "SIC")
plot(cp2)
Upvotes: 1