Kulis
Kulis

Reputation: 1010

Rewrite code using %>% operator

I tried to rewrite this code (to learn this approach), using %>% operator:

library(arules) 
data(AdultUCI) #https://archive.ics.uci.edu/ml/datasets/Census+Income

AdultUCI[["capital-gain"]] <- ordered(cut(AdultUCI[["capital-gain"]],
+ c(-Inf, 0, median(AdultUCI[["capital-gain"]][AdultUCI
+ [["capital-gain"]] > 0]), Inf)),
+ labels = c("None", "Low", "High"))

Is it possible to do? Here is my attempt:

AdultUCI[["capital-gain"]] <- ordered %>% cut %>% AdultUCI[["capital-gain"]], 
                            + c(-Inf, 0, median(AdultUCI[["capital-gain"]][AdultUCI[["capital-gain"]] > 0]), 
                            + Inf),labels = c("None", "Low", "High")

Upvotes: 1

Views: 93

Answers (1)

zx8754
zx8754

Reputation: 56159

This should work:

library(dplyr)

#reproducible data
AdultUCI <- read.csv("https://archive.ics.uci.edu/ml/machine-learning-databases/adult/adult.data",header=FALSE)  
colnames(AdultUCI)[13] <- "capital-gain"

#original code
originalOrdered <- 
  ordered(cut(AdultUCI[["capital-gain"]],
              c(-Inf, 0, 
                median(AdultUCI[["capital-gain"]][AdultUCI[["capital-gain"]] > 0]), Inf),
              labels = c("None", "Low", "High")),
          levels = c("None", "Low", "High"))

#using dplyr
newOrdered <- 
  AdultUCI %>% 
  select(x=`capital-gain`) %>% 
  mutate(capitalGainOrdered=
           ordered(
             cut(x,c(-Inf, 0, median(x[x > 0]), Inf),
                 labels = c("None", "Low", "High")),
             levels = c("None", "Low", "High"))) %>% 
  .$capitalGainOrdered


#test if same
identical(originalOrdered,newOrdered)
#[1] TRUE

str(newOrdered)
#Ord.factor w/ 3 levels "None"<"Low"<"High": 2 2 2 2 2 2 2 3 3 2 ...

Upvotes: 1

Related Questions