user
user

Reputation: 914

Error in tapply(var, y, mean, na.rm = TRUE) : arguments must have same length

Why am I attaining this error? what does tapply mean? I haven't even used that method?

Error occurs: naive_model <-naiveBayes(X_train,Y_train)

Error:

Error in tapply(var, y, mean, na.rm = TRUE) : 
  arguments must have same length

CODE:

  library(e1071)
  #Naive Bayes
  #Learn Time
  start.time <- Sys.time()
  naive_model <-naiveBayes(X_train,Y_train)
  end.time <- Sys.time()
  time.taken <- end.time - start.time
  naivebayes_Learnruntime[i]<- time.taken

  #Prediction Time
  start.time <- Sys.time()
  pred = predict(naive_model,X_test)
  end.time <- Sys.time()
  time.taken <- end.time - start.time
  naivebayes_Predictruntime [i]<- time.taken

dataset

entire code

balance_data = read.table(file.choose(), sep=",")
attach(balance_data)
x <- balance_data[, c(2,3,4,5)]
y <- balance_data[,1]
X_train <-head(x,500)
Y_train <- head(y,100)
X_test <-tail(x,122)
str(X_train)
str(X_test)
str(Y_train)


decisionTree_Learnruntime = c()
svm_Learnruntime = c()
naivebayes_Learnruntime = c()
knn_Learnruntime = c()

decisionTree_Predictruntime = c()
svm_Predictruntime = c()
naivebayes_Predictruntime =c()
knn_Predictruntime = c()


for (i in 1:20){
  library(e1071)
  library(caret)
  #SVM Model
  start.time <- Sys.time()
  svm_model <- svm(X_train,Y_train)
  end.time <- Sys.time()
  time.taken <- end.time - start.time
  svm_Learnruntime[i]<- time.taken

  #Prediction Time
  start.time <- Sys.time()
  pred <- predict(svm_model,X_test)
  end.time <- Sys.time()
  time.taken <- end.time - start.time
  svm_Predictruntime[i]<- time.taken

  library(rpart)
  #Decision Tree
  #Learn Time
  start.time <- Sys.time()
  tree_model <- rpart(X_train,Y_train)
  end.time <- Sys.time()
  time.taken <- end.time - start.time
  decisionTree_Learnruntime[i]<- time.taken

  #Prediction Time 
  start.time <- Sys.time()
  pred = predict(tree_model,X_test)
  end.time <- Sys.time()
  time.taken <- end.time - start.time
  decisionTree_Predictruntime[i] <- time.taken


  library(e1071)
  #Naive Bayes
  #Learn Time
  start.time <- Sys.time()
  naive_model <-naiveBayes(X_train,Y_train)
  end.time <- Sys.time()
  time.taken <- end.time - start.time
  naivebayes_Learnruntime[i]<- time.taken

  #Prediction Time
  start.time <- Sys.time()
  pred = predict(naive_model,X_test)
  end.time <- Sys.time()
  time.taken <- end.time - start.time
  naivebayes_Predictruntime [i]<- time.taken



}

svm_Learnruntime
svm_Predictruntime
decisionTree_Learnruntime
decisionTree_Predictruntime
naivebayes_Learnruntime
naivebayes_Predictruntime

Upvotes: 0

Views: 3665

Answers (1)

Julia Wilkerson
Julia Wilkerson

Reputation: 581

The error indicates a difference in lengths of your naiveBayes() inputs (X_train and Y_train) which need to be the same (i.e., a corresponding y value for every row of your x data). From

X_train <- head(x,500)
Y_train <- head(y,100)

we can see the inputs differ (1st 500 vs 1st 100 rows of balance_data). Assigning the outcome data to Y_train from the corresponding rows of X_train will resolve this error message. For example, you could use:

trainset <- 1:500  # to be similar to your 'head(x,500)'

# create train/test 
X_train <- balance_data[trainset, -1]  
Y_train <- balance_data[trainset, 1]   
X_test <- balance_data[-trainset, -1]   

# model and predict
naive_model <- naiveBayes(X_train, Y_train)
pred <- predict(naive_model, X_test)

Upvotes: 2

Related Questions