Reputation: 409
Here is my data
## Data
datex <- c(rep("2021-01-18", 61), rep("2021-01-19", 125))
hourx <- c(0,1,2,3,3,4,4,5,5,6,6,7,7,8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,16,10,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,11,0,0,0,0,0,0,1,1,1,1,1,1,1,1,2,2,2,2,2,2,2,3,3,3,3,3,3,3,4,4,4,4,4,4,4,5,5,5,5,5,5,5,6,6,6,6,6,6,6,6,7,7,7,7,7,7,7,7,8,8,8,8,8,8,8,8,9,9,9,9,9,9,9,9,10,10,10,10,10,10,10,10,11,11,11,11,11,11,11,11,11,11,12,12,12,12,12,12,12,12,13,13,13,13,13,13,13,13,14,14,14,14,14,14,14,14,14,15,15,15,15,16,16,16,16)
seller <- c("dombsdpapp1","dombsdpapp1","dombsdpapp1","dombsdpapp1","dombsdpapp1","dombsdpapp1","dombsdpapp1","dombsdpapp1","dombsdpapp1","dombsdpapp1","dombsdpapp1","dombsdpapp1","dombsdpapp1","dombsdpapp1","dombsdpapp1","dombsdpapp1","dombsdpapp1","dombsdpapp1","dombsdpapp1","dombsdpapp1","dombsdpapp1","dombsdpapp1","dombsdpapp1","dombsdpapp1","dombsdpapp1","dombsdpapp1","dombsdpapp1","dombsdpapp1","dombsdpapp1","dombsdpapp1","dombsdpapp2","dombsdpapp2","dombsdpapp2","dombsdpapp2","dombsdpapp2","dombsdpapp2","dombsdpapp2","dombsdpapp2","dombsdpapp2","dombsdpapp2","dombsdpapp2","dombsdpapp2","dombsdpapp2","dombsdpapp2","dombsdpapp2","dombsdpapp2","dombsdpapp2","dombsdpapp2","dombsdpapp2","dombsdpapp2","dombsdpapp2","dombsdpapp2","dombsdpapp2","dombsdpapp2","dombsdpapp2","dombsdpapp2","dombsdpapp2","dombsdpapp2","dombsdpapp2","dombsdpapp2","dombsdpapp2","dombsdpapp1","dombsdpapp1","dombsdpapp1","dombsdpapp2","dombsdpapp2","dombsdpapp2","dombsdpapp1","dombsdpapp1","dombsdpapp1","dombsdpapp1","dombsdpapp2","dombsdpapp2","dombsdpapp2","dombsdpapp2","dombsdpapp1","dombsdpapp1","dombsdpapp1","dombsdpapp2","dombsdpapp2","dombsdpapp2","dombsdpapp2","dombsdpapp1","dombsdpapp1","dombsdpapp1","dombsdpapp2","dombsdpapp2","dombsdpapp2","dombsdpapp2","dombsdpapp1","dombsdpapp1","dombsdpapp1","dombsdpapp1","dombsdpapp2","dombsdpapp2","dombsdpapp2","dombsdpapp1","dombsdpapp1","dombsdpapp1","dombsdpapp1","dombsdpapp2","dombsdpapp2","dombsdpapp2","dombsdpapp1","dombsdpapp1","dombsdpapp1","dombsdpapp1","dombsdpapp2","dombsdpapp2","dombsdpapp2","dombsdpapp2","dombsdpapp1","dombsdpapp1","dombsdpapp1","dombsdpapp1","dombsdpapp2","dombsdpapp2","dombsdpapp2","dombsdpapp2","dombsdpapp1","dombsdpapp1","dombsdpapp1","dombsdpapp1","dombsdpapp2","dombsdpapp2","dombsdpapp2","dombsdpapp2","dombsdpapp1","dombsdpapp1","dombsdpapp1","dombsdpapp1","dombsdpapp2","dombsdpapp2","dombsdpapp2","dombsdpapp2","dombsdpapp1","dombsdpapp1","dombsdpapp1","dombsdpapp1","dombsdpapp2","dombsdpapp2","dombsdpapp2","dombsdpapp2","dombsdpapp1","dombsdpapp1","dombsdpapp1","dombsdpapp1","dombsdpapp1","dombsdpapp2","dombsdpapp2","dombsdpapp2","dombsdpapp2","dombsdpapp2","dombsdpapp1","dombsdpapp1","dombsdpapp1","dombsdpapp1","dombsdpapp2","dombsdpapp2","dombsdpapp2","dombsdpapp2","dombsdpapp1","dombsdpapp1","dombsdpapp1","dombsdpapp1","dombsdpapp2","dombsdpapp2","dombsdpapp2","dombsdpapp2","dombsdpapp1","dombsdpapp1","dombsdpapp1","dombsdpapp1","dombsdpapp2","dombsdpapp2","dombsdpapp2","dombsdpapp2","dombsdpapp2","dombsdpapp1","dombsdpapp1","dombsdpapp2","dombsdpapp2","dombsdpapp1","dombsdpapp1","dombsdpapp2","dombsdpapp2")
product <- c("00021460","00021460","00021460","00021459","00021460","00021459","00021460","00021459","00021460","00021459","00021460","00021459","00021460","00021460","00021459","00021460","00021459","00021460","00021459","00021460","00021459","00021460","00021459","00021459","00021460","00021459","00021460","00021460","00021460","00021459","00021459","00021460","00021459","00021459","00021460","00021460","00021459","00021459","00021460","00021459","00021460","00021459","00021460","00021459","00021460","00021460","00021459","00021459","00021460","00021460","00021459","00021459","00021460","00021460","00021459","00021460","00021459","00021460","00021459","00021460","00021459","00021459","00021460","00021459","00021459","00021459","00021460","00021459","00021459","00021460","00021460","00021459","00021459","00021460","00021460","00021459","00021460","00021460","00021460","00021459","00021459","00021460","00021459","00021459","00021460","00021459","00021460","00021460","00021459","00021460","00021459","00021460","00021459","00021459","00021460","00021460","00021460","00021460","00021459","00021459","00021460","00021459","00021459","00021460","00021460","00021459","00021459","00021459","00021460","00021460","00021459","00021460","00021459","00021460","00021459","00021459","00021459","00021460","00021460","00021460","00021460","00021459","00021459","00021459","00021459","00021460","00021460","00021459","00021459","00021460","00021460","00021459","00021459","00021460","00021460","00021459","00021460","00021459","00021460","00021460","00021459","00021460","00021459","00021460","00021460","00021459","00021460","00021459","00021460","00021459","00021459","00021460","00021460","00021459","00021459","00021460","00021460","00021460","00021459","00021460","00021459","00021459","00021459","00021460","00021460","00021459","00021459","00021460","00021460","00021460","00021459","00021459","00021460","00021459","00021459","00021459","00021460","00021460","00021460","00021460","00021460","00021460","00021460","00021460","00021460","00021460")
detail <- c("E99","E99","E99","E99","E99","E99","E99","E99","E99","E99","E99","E99","E99","E99","E99","E99","E99","E99","E99","E99","E99","E99","E99","E99","E99","E99","E99","E99","E99","notEnoughBalance","E99","E99","E99","E99","E99","E99","E99","E99","E99","E99","E99","E99","E99","E99","E99","E99","E99","E99","E99","E99","E99","E99","E99","E99","E99","E99","E99","E99","E99","E99","notEnoughBalance","E99","success","success","success","E99","success","success","E99","success","E99","success","E99","E99","success","E99","E99","success","E99","success","E99","success","E99","success","E99","success","success","E99","E99","E99","success","success","E99","success","E99","success","E99","success","success","E99","E99","E99","success","E99","success","success","E99","E99","success","E99","success","E99","success","success","E99","E99","success","success","E99","E99","success","E99","success","success","E99","success","E99","success","E99","E99","success","success","E99","E99","success","E99","success","success","E99","E99","E99","success","success","notEnoughBalance","E99","success","success","E99","success","E99","success","notEnoughBalance","E99","success","E99","E99","success","E99","success","success","E99","success","E99","E99","success","E99","success","success","E99","success","success","E99","E99","success","notEnoughBalance","E99","E99","success","E99","success","success","E99","E99","success","success","E99")
status <- c("FI04","FI04","FI04","FI04","FI04","FI04","FI04","FI04","FI04","FI04","FI04","FI04","FI04","FI04","FI04","FI04","FI04","FI04","FI04","FI04","FI04","FI04","FI04","FI04","FI04","FI04","FI04","FI04","FI04","OK01","FI04","FI04","FI04","FI04","FI04","FI04","FI04","FI04","FI04","FI04","FI04","FI04","FI04","FI04","FI04","FI04","FI04","FI04","FI04","FI04","FI04","FI04","FI04","FI04","FI04","FI04","FI04","FI04","FI04","FI04","OK01","FI04","OK00","OK00","OK00","FI04","OK00","OK00","FI04","OK00","FI04","OK00","FI04","FI04","OK00","FI04","FI04","OK00","FI04","OK00","FI04","OK00","FI04","OK00","FI04","OK00","OK00","FI04","FI04","FI04","OK00","OK00","FI04","OK00","FI04","OK00","FI04","OK00","OK00","FI04","FI04","FI04","OK00","FI04","OK00","OK00","FI04","FI04","OK00","FI04","OK00","FI04","OK00","OK00","FI04","FI04","OK00","OK00","FI04","FI04","OK00","FI04","OK00","OK00","FI04","OK00","FI04","OK00","FI04","FI04","OK00","OK00","FI04","FI04","OK00","FI04","OK00","OK00","FI04","FI04","FI04","OK00","OK00","OK01","FI04","OK00","OK00","FI04","OK00","FI04","OK00","OK01","FI04","OK00","FI04","FI04","OK00","FI04","OK00","OK00","FI04","OK00","FI04","FI04","OK00","FI04","OK00","OK00","FI04","OK00","OK00","FI04","FI04","OK00","OK01","FI04","FI04","OK00","FI04","OK00","OK00","FI04","FI04","OK00","OK00","FI04")
channel <- c("f2","f2","f2","f3","f2","f3","f2","f3","f2","f3","f2","f3","f2","f2","f3","f2","f3","f2","f3","f2","f3","f2","f3","f3","f2","f3","f2","f2","f2","f3","f3","f2","f3","f3","f2","f2","f3","f3","f2","f3","f2","f3","f2","f3","f2","f2","f3","f3","f2","f2","f3","f3","f2","f2","f3","f2","f3","f2","f3","f2","f3","f3","f2","f3","f3","f3","f2","f3","f3","f2","f2","f3","f3","f2","f2","f3","f2","f2","f2","f3","f3","f2","f3","f3","f2","f3","f2","f2","f3","f2","f3","f2","f3","f3","f2","f2","f2","f2","f3","f3","f2","f3","f3","f2","f2","f3","f3","f3","f2","f2","f3","f2","f3","f2","f3","f3","f3","f2","f2","f2","f2","f3","f3","f3","f3","f2","f2","f3","f3","f2","f2","f3","f3","f2","f2","f3","f2","f3","f2","f2","f3","f2","f3","f2","f2","f3","f2","f3","f2","f3","f3","f2","f2","f3","f3","f2","f2","f2","f3","f2","f3","f3","f3","f2","f2","f3","f3","f2","f2","f2","f3","f3","f2","f3","f3","f3","f2","f2","f2","f2","f2","f2","f2","f2","f2","f2")
transaction <- c(1,6,2,5,1,2,1,9,6,12,5,25,14,6,22,9,10,14,15,12,22,12,12,14,9,11,3,3,4,5,1,4,3,1,2,3,3,5,7,5,5,6,9,16,8,13,10,20,15,18,10,19,15,5,13,12,10,12,26,14,5,4,5,5,10,2,20,10,2,30,4,6,6,8,15,2,3,20,2,10,1,20,1,10,2,10,10,2,1,1,20,10,3,10,1,10,3,10,10,6,5,2,10,8,10,10,12,11,10,2,10,11,10,10,14,21,10,10,13,7,10,17,10,10,18,10,7,10,4,4,10,10,7,12,10,131,10,10,13,6,9,10,20,20,16,20,20,162,20,14,10,10,9,10,11,81,10,8,10,10,8,10,10,5,10,15,10,10,3,10,10,8,8,10,10,6,5,10,8,10,10,5,1,10,10,3)
mydata <- data.frame(datex, hourx, seller, product, detail, status, channel, transaction)
mydata %>%
group_by(datex, seller, product, detail, status, channel) %>%
complete(hourx = seq(0, 23, 1)) %>%
mutate(transaction = ifelse(is.na(transaction), 5, transaction)) -> mydata2
mydata2 <- data.frame(mydata2)
My task is to find outlier using t.test from any combination in my data. for sample, i use two combinations. 1st combination:
# Looping 1
combination1 <- mydata2[(mydata2$seller == "dombsdpapp1" & mydata2$product == "00021460" & mydata2$detail == "E99" & mydata2$status == "FI04" & mydata2$channel == "f2"),]
mydata.test <- t.test(combination1$transaction)
mydata.conf <- mydata.test$conf.int[2]
mydata.index <- data.frame(indeks = 1:length(combination1$transaction),
result = ifelse(combination1$transaction > mydata.conf, 1, NA))
mydata.index <- na.omit(mydata.index)[,1]
mydata.result1 <- combination1[c(mydata.index),]
mydata.result1
#datex seller product detail status channel hourx transaction
#56 2021-01-18 dombsdpapp1 00021460 E99 FI04 f2 7 14
#59 2021-01-18 dombsdpapp1 00021460 E99 FI04 f2 10 14
#60 2021-01-18 dombsdpapp1 00021460 E99 FI04 f2 11 12
#61 2021-01-18 dombsdpapp1 00021460 E99 FI04 f2 12 12
#200 2021-01-19 dombsdpapp1 00021460 E99 FI04 f2 7 11
#203 2021-01-19 dombsdpapp1 00021460 E99 FI04 f2 10 13
#204 2021-01-19 dombsdpapp1 00021460 E99 FI04 f2 11 16
#205 2021-01-19 dombsdpapp1 00021460 E99 FI04 f2 12 81
2nd combination:
# Looping 2
combination2 <- mydata2[(mydata2$seller == "dombsdpapp2" & mydata2$product == "00021460" & mydata2$detail == "E99" & mydata2$status == "FI04" & mydata2$channel == "f2"),]
mydata.test <- t.test(combination2$transaction)
mydata.conf <- mydata.test$conf.int[2]
mydata.index <- data.frame(indeks = 1:length(combination2$transaction),
result = ifelse(combination2$transaction > mydata.conf, 1, NA))
mydata.index <- na.omit(mydata.index)[,1]
mydata.result2 <- combination2[c(mydata.index),]
mydata.result2
#datex seller product detail status channel hourx transaction
#127 2021-01-18 dombsdpapp2 00021460 E99 FI04 f2 6 9
#128 2021-01-18 dombsdpapp2 00021460 E99 FI04 f2 7 8
#129 2021-01-18 dombsdpapp2 00021460 E99 FI04 f2 8 13
#130 2021-01-18 dombsdpapp2 00021460 E99 FI04 f2 9 15
#131 2021-01-18 dombsdpapp2 00021460 E99 FI04 f2 10 18
#132 2021-01-18 dombsdpapp2 00021460 E99 FI04 f2 11 15
#134 2021-01-18 dombsdpapp2 00021460 E99 FI04 f2 13 12
#135 2021-01-18 dombsdpapp2 00021460 E99 FI04 f2 14 12
#136 2021-01-18 dombsdpapp2 00021460 E99 FI04 f2 15 14
#338 2021-01-19 dombsdpapp2 00021460 E99 FI04 f2 1 8
#344 2021-01-19 dombsdpapp2 00021460 E99 FI04 f2 7 13
#346 2021-01-19 dombsdpapp2 00021460 E99 FI04 f2 9 12
#348 2021-01-19 dombsdpapp2 00021460 E99 FI04 f2 11 9
#349 2021-01-19 dombsdpapp2 00021460 E99 FI04 f2 12 8
All looping insert into 1 table,
# All Result
mydata.all.result <- rbind(mydata.result1, mydata.result2)
How do i loop all process to get "All Result" using dplyr? Thank you.
Upvotes: 0
Views: 50
Reputation: 388982
We can write a function to remove outlier values and then apply it for each group.
library(dplyr)
remove_outliers <- function(transaction) {
if(n_distinct(transaction) == 1) return(TRUE)
mydata.test <- t.test(transaction)
mydata.conf <- mydata.test$conf.int[2]
transaction > mydata.conf
}
mydata2 %>%
group_by(datex, seller, product, detail, status, channel) %>%
filter(remove_outliers(transaction)) %>%
ungroup
Upvotes: 1