Reputation: 2220
I am trying to get the best parameters for the DNN Model using RandomizedSearchCV, but each time I run the code I get different sets of best parameters.
I tried to add random_state=42 to GridSearchCV, but it seems not acceptable.
Step of my experiment:
I set the random seed for my experiment:
seed = 42
os.environ['PYTHONHASHSEED'] = str(seed)
random.seed(seed)
np.random.seed(seed)
torch.manual_seed(seed)
torch.cuda.manual_seed(seed)
torch.cuda.manual_seed_all(seed)
# Enforce deterministic behavior in PyTorch
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False
I have divided the dataset into training and test datasets:
data_indices = list(range(2400))
train_indices, test_indices = train_test_split(
data_indices, random_state=42, test_size=0.20, shuffle=True, stratify=y)
X_train, y_train = X[train_indices], Y[train_indices]
X_test, y_test = X[test_indices], Y[test_indices]
import torch
import torch.nn as nn
import torch.optim as optim
from skorch import NeuralNetClassifier
from sklearn.model_selection import RandomizedSearchCV
# Define the model
class MlpNet(nn.Module):
def __init__(self, layer_sizes=[], activation=nn.ReLU):
super(MlpNet, self).__init__()
layers = []
self.act = activation
for l_id in range(len(layer_sizes) - 1):
layers.append(nn.Linear(layer_sizes[l_id], layer_sizes[l_id + 1]))
if l_id < len(layer_sizes) - 2: # No activation or batchnorm for the last layer
layers.append(nn.BatchNorm1d(num_features=layer_sizes[l_id + 1], affine=False))
layers.append(self.act())
self.layers = nn.Sequential(*layers)
self.prob = nn.Sigmoid()
self.float()
def forward(self, x):
x = self.layers(x)
return self.prob(x)
# Create the model
model = NeuralNetClassifier(
module=MlpNet,
criterion=nn.BCELoss,
optimizer=optim.Adam,
device=device,
verbose=False
)
param_grid = {
'optimizer__lr': [0.001, 0.01],
'module__layer_sizes': [ [ 108, 256, 256, 128, 1],
[ 108, 256, 256, 1],
[ 108, 128, 64, 1],
[ 108, 128, 128, 1],
],
'module__activation': [nn.ReLU, nn.Tanh, nn.Sigmoid],
'batch_size': [10, 30, 50, 80, 100],
'max_epochs': [10, 50, 80, 100]
}
skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
random_search = RandomizedSearchCV(estimator=model, param_distributions=param_grid, n_iter=5,
scoring='accuracy', n_jobs=4, cv=skf.split(X_train,y_train), random_state=42 )
grid_result = random_search.fit(X_train.float(), torch.unsqueeze(y_train.float(), 1))
print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))
I have seen this question from StackOverflow, but none have solved my queries.
Each time I run the code, I get different sets of best parameters. These different best hyperparameters give different training and test accuracy.
I want to reproduce the result of hyperparameter tuning. How can I get the same set of hyperparameters in every run?
Upvotes: 0
Views: 140