Reputation: 91
I want to apply a CFS using 10 fold cross validation to select important features in my dataset. While my original dataset ka contains 71 independent variables and a target variable with 2 class levels. In addition i choosed svm model to test the accuracy of the selected features.I got this error Error in eval(predvars, data, env) : numeric 'envir' arg not of length one Error during wrapup: cannot open the connection
library(caret)
library(e1071)
library(FSelector)
#split data into train and test
trainIndex <- createDataPartition(data$Cardio1M, p=0.7, list=FALSE)
data_train <- data[ trainIndex,]
data_test <- data[-trainIndex,]
#final selected subset
finalSubset<-as.character()
set.seed(10)
#Applying 10 fold cross validation on the training set (external cross validation)
#relevant and unredundant features selected
nrFolds <- 10
# generate array containing fold-number for each sample (row)
folds <- rep_len(1:nrFolds, nrow(data_train))
#suffle data
folds <- sample(folds, nrow(data_train))
# actual cross validation
for(k in 1:nrFolds) {
# actual split of the data
fold <- which(folds == k)
data.tr <- data_train[-fold,]
data.validation <- data_train[fold,-72]
# train and test your model with data.train and data.test
subset <- cfs(Cardio1M~., data.tr)
#Using selected features to train svm
svm_model<-svm(Cardio1M~.,data.tr,cost=.1,kernel="radial")
#tuning svm model hyperparameters
#tuned<-tune(svm_model,Cardio1M,data.tr,ranges=list(cost=c(0.001,0.01,.1,1,100)))
#Predict test set
p<-predict(data.validation[,72],svm_model)
accuracy[[k]]=mean(p==data.validation[,72])
if(accuracy[[k+1]]>accuracy[[k]]){
final_subset=subset[[k+1]]
}
else{
final_subset= subset[[k]]
}
}
data_t=data_test[,-72]
prediction <-predict(data_t[,final_subset],final_model)
accuracy=table(prediction,data_test[,finalSubset])
Upvotes: 0
Views: 473
Reputation: 91
In filter method we don't need to use cross validation because it i independent of the clssifier and didin't cause any variablity
library(caret)
library(e1071)
library(FSelector)
#split data into train and test
trainIndex <- createDataPartition(data$Cardio1M, p=0.7, list=FALSE)
data_train <- data[ trainIndex,]
data_test <- data[-trainIndex,]
#final selected subset
finalSubset<-as.character()
set.seed(10)
#relevant and unredundant features selected
# train and test your model with data.train and data.test
subset <- cfs(Cardio1M~.,data_train)
#then we have to fit the model with the new subset
subset<-list(subset)
train<-data_train[,subset]
#Using selected features to train svm
svm_model<-svm(Cardio1M~.,train,cost=.1,kernel="radial")
#tuning svm model hyperparameters
#tuned<-tune(svm_model,Cardio1M,train,ranges=list(cost=c(0.001,0.01,.1,1,100)))
#Predict test set
p<-predict(data.validation[,-72],bestmodel)
#accuracy of the model
accuracy=mean(p==data_test[,72])
Upvotes: 1