BatchNormalization layer constructing

Question

I'm trying to setup a BatchNormalization layer in C++.

The code as I have it looks like this:

mx::Symbol loadBatchNormalization(mx::Symbol previous, std::istream &file, const std::string &name, const Shape &inputShape, const Shape &outputShape, std::map &args, bool tensorflow, bool debug)
{
    auto gammaShape_ = ReadShape(file);
    auto gamma_ = ReadFloats(file, sizeOf(gammaShape_));
    auto gammaShape = shape_(gammaShape_);
    mx::NDArray gamma { gamma_, gammaShape, ctx };

    auto betaShape_ = ReadShape(file);
    auto beta_ = ReadFloats(file, sizeOf(betaShape_));
    auto betaShape = shape_(betaShape_);
    mx::NDArray beta { beta_, betaShape, ctx };

    auto movingMeanShape_ = ReadShape(file);
    auto movingMean_ = ReadFloats(file, sizeOf(movingMeanShape_));
    auto movingMeanShape = shape_(movingMeanShape_);
    mx::NDArray movingMean { movingMean_, movingMeanShape, ctx };

    auto movingVarianceShape_ = ReadShape(file);
    auto movingVariance_ = ReadFloats(file, sizeOf(movingVarianceShape_));
    auto movingVarianceShape = shape_(movingVarianceShape_);
    mx::NDArray movingVariance { movingVariance_, movingVarianceShape, ctx };

    mx::Symbol gammaSymbol(name + "_gamma");
    mx::Symbol betaSymbol(name + "_beta");
    mx::Symbol movingMeanSymbol(name + "_movingMean");
    mx::Symbol movingVarianceSymbol(name + "_movingVariance");

    double eps = 0.001;
    mx_float momentum = 0.9; // should never be used?
    bool fix_gamma = false;
    bool use_global_stats = false;
    bool output_mean_var = false;
    int axis = 1;
    bool cudnn_off = false;

    mx::Symbol layer = mx::BatchNorm(
        name,
        previous,
        gammaSymbol,
        betaSymbol,
        movingMeanSymbol,
        movingVarianceSymbol,
        eps,
        momentum,
        fix_gamma,
        use_global_stats,
        output_mean_var,
        axis,
        cudnn_off
    );

    args[name + "_gamma"] = gamma;
    args[name + "_beta"] = beta;
    args[name + "_movingMean"] = movingMean;
    args[name + "_movingVariance"] = movingVariance;

    return layer;
}

In a nutshell, make the gamma, beta, movingMean and movingVariance and make the BatchNorm using those symbols.

But. The BatchNorm layer is outputting zeros. Which makes me think I need to do something else.

Could any one give me some clues to constructing the BatchNorm layer with previously trained weights?

BatchNormalization layer constructing

Answers (1)

Related Questions