basilisk
basilisk

Reputation: 1277

what should I do if my regression model stuck at a high value loss?

I'm using neural nets for a regression problem where I have 3 features and I'm trying to predict one continuous value. I noticed that my neural net start learning good but after 10 epochs it get stuck on a high loss value and could not improve anymore.

My real dataset have 40000 data, I don't know what should I try, I almost try all things that I know for optimization but none of them worked. I would appreciate it if someone can guide me on this. I'll post my Code but maybe it is too messy to try to understand, I'm sure there is no problem with my implementation, I'm using skorch/pytorch and some SKlearn functions:

# take all features as an Independant variable except the bearing and distance
# here when I start small the model learn good but from 3000 data points as you can see the model stuck on a high value. I mean the start loss is 15 and it start to learn good but when it reach 9 it stucks there
# and if I try to use the whole dataset for training then the loss start at 47 and start decreasing until it reach 36 and then stucks there too
X = dataset.iloc[:3000, 0:-2].reset_index(drop=True).to_numpy().astype(np.float32)

# take distance and bearing as the output values:
y = dataset.iloc[:3000, -2:].reset_index(drop=True).to_numpy().astype(np.float32)
y_bearing = y[:, 0].reshape(-1, 1)
y_distance = y[:, 1].reshape(-1, 1)

# normalize the input values
scaler = StandardScaler()
X_norm = scaler.fit_transform(X, y)

X_br_train, X_br_test, y_br_train, y_br_test = train_test_split(X_norm,
                                                                y_bearing,
                                                                test_size=0.1,
                                                                random_state=42,
                                                                shuffle=True)

X_dis_train, X_dis_test, y_dis_train, y_dis_test = train_test_split(X_norm,
                                                                    y_distance,
                                                                    test_size=0.1,
                                                                    random_state=42,
                                                                    shuffle=True)
bearing_trainset = Dataset(X_br_train, y_br_train)
bearing_testset = Dataset(X_br_test, y_br_test)

distance_trainset = Dataset(X_dis_train, y_dis_train)
distance_testset = Dataset(X_dis_test, y_dis_test)


def root_mse(y_true, y_pred):
    return np.sqrt(mean_squared_error(y_true, y_pred))


class RMSELoss(nn.Module):
    def __init__(self):
        super().__init__()
        self.mse = nn.MSELoss()

    def forward(self, yhat, y):
        return torch.sqrt(self.mse(yhat, y))


class AED(nn.Module):
    """custom average euclidean distance loss"""
    def __init__(self):
        super().__init__()

    def forward(self, yhat, y):
        return torch.dist(yhat, y)


def train(on_target,
          hidden_units,
          batch_size,
          epochs,
          optimizer,
          lr,
          regularisation_factor,
          train_shuffle):

    network = None
    trainset = distance_trainset if on_target.lower() == 'distance' else bearing_trainset
    testset = distance_testset if on_target.lower() == 'distance' else bearing_testset
    print(f"shape of trainset.X = {trainset.X.shape}, shape of trainset.y = {trainset.y.shape}")
    print(f"shape of testset.X = {testset.X.shape}, shape of testset.y = {testset.y.shape}")

    mse = EpochScoring(scoring=mean_squared_error, lower_is_better=True, name='MSE')
    r2 = EpochScoring(scoring=r2_score, lower_is_better=False, name='R2')
    rmse = EpochScoring(scoring=make_scorer(root_mse), lower_is_better=True, name='RMSE')

    checkpoint = Checkpoint(dirname=f'results/{on_target}/checkpoints')
    train_end_checkpoint = TrainEndCheckpoint(dirname=f'results/{on_target}/checkpoints')

    if on_target.lower() == 'bearing':
        network = BearingNetwork(n_features=X_norm.shape[1],
                                 n_hidden=hidden_units,
                                 n_out=y_distance.shape[1])

    elif on_target.lower() == 'distance':
        network = DistanceNetwork(n_features=X_norm.shape[1],
                                  n_hidden=hidden_units,
                                  n_out=1)

    model = NeuralNetRegressor(
        module=network,
        criterion=RMSELoss,
        device='cpu',
        batch_size=batch_size,
        lr=lr,
        optimizer=optim.Adam if optimizer.lower() == 'adam' else optim.SGD,
        optimizer__weight_decay=regularisation_factor,
        max_epochs=epochs,
        iterator_train__shuffle=train_shuffle,

        train_split=predefined_split(testset),
        callbacks=[mse, r2, rmse, checkpoint, train_end_checkpoint]
    )

    print(f"{'*' * 10} start training the {on_target} model {'*' * 10}")
    history = model.fit(trainset, y=None)

    print(f"{'*' * 10} End Training the {on_target} Model {'*' * 10}")


if __name__ == '__main__':

    args = parser.parse_args()

    train(on_target=args.on_target,
          hidden_units=args.hidden_units,
          batch_size=args.batch_size,
          epochs=args.epochs,
          optimizer=args.optimizer,
          lr=args.learning_rate,
          regularisation_factor=args.regularisation_lambda,
          train_shuffle=args.shuffle)

and this is my network declaration:

class DistanceNetwork(nn.Module):
    """separate NN for predicting distance"""
    def __init__(self, n_features=5, n_hidden=16, n_out=1):
        super().__init__()
        self.model = nn.Sequential(
            nn.Linear(n_features, n_hidden),
            nn.LeakyReLU(),
            nn.Linear(n_hidden, 5),
            nn.LeakyReLU(),
            nn.Linear(5, n_out)
        )

here is the log while training: enter image description here

Upvotes: 4

Views: 635

Answers (0)

Related Questions