Reputation: 565
I'm trying to setup a BatchNormalization layer in C++.
The code as I have it looks like this:
mx::Symbol loadBatchNormalization(mx::Symbol previous, std::istream &file, const std::string &name, const Shape &inputShape, const Shape &outputShape, std::map<std::string, mx::NDArray> &args, bool tensorflow, bool debug)
{
auto gammaShape_ = ReadShape(file);
auto gamma_ = ReadFloats(file, sizeOf(gammaShape_));
auto gammaShape = shape_(gammaShape_);
mx::NDArray gamma { gamma_, gammaShape, ctx };
auto betaShape_ = ReadShape(file);
auto beta_ = ReadFloats(file, sizeOf(betaShape_));
auto betaShape = shape_(betaShape_);
mx::NDArray beta { beta_, betaShape, ctx };
auto movingMeanShape_ = ReadShape(file);
auto movingMean_ = ReadFloats(file, sizeOf(movingMeanShape_));
auto movingMeanShape = shape_(movingMeanShape_);
mx::NDArray movingMean { movingMean_, movingMeanShape, ctx };
auto movingVarianceShape_ = ReadShape(file);
auto movingVariance_ = ReadFloats(file, sizeOf(movingVarianceShape_));
auto movingVarianceShape = shape_(movingVarianceShape_);
mx::NDArray movingVariance { movingVariance_, movingVarianceShape, ctx };
mx::Symbol gammaSymbol(name + "_gamma");
mx::Symbol betaSymbol(name + "_beta");
mx::Symbol movingMeanSymbol(name + "_movingMean");
mx::Symbol movingVarianceSymbol(name + "_movingVariance");
double eps = 0.001;
mx_float momentum = 0.9; // should never be used?
bool fix_gamma = false;
bool use_global_stats = false;
bool output_mean_var = false;
int axis = 1;
bool cudnn_off = false;
mx::Symbol layer = mx::BatchNorm(
name,
previous,
gammaSymbol,
betaSymbol,
movingMeanSymbol,
movingVarianceSymbol,
eps,
momentum,
fix_gamma,
use_global_stats,
output_mean_var,
axis,
cudnn_off
);
args[name + "_gamma"] = gamma;
args[name + "_beta"] = beta;
args[name + "_movingMean"] = movingMean;
args[name + "_movingVariance"] = movingVariance;
return layer;
}
In a nutshell, make the gamma, beta, movingMean and movingVariance and make the BatchNorm using those symbols.
But. The BatchNorm layer is outputting zeros. Which makes me think I need to do something else.
Could any one give me some clues to constructing the BatchNorm layer with previously trained weights?
Upvotes: -2
Views: 204
Reputation: 565
As of Jan 23, 2020. Mxnet Batchnorm does not seem to work correctly if constructed with gamma, beta, movingMean and movingVariance from a keras mxnet trained network.
Look at the keras source code for their batchnorm for prediction.
a possible solution is something like:
mx::Symbol generateBatchNormalization (const std::string &name, mx::Symbol &inputSymbol_, mx::Symbol &gammaSymbol, mx::Symbol &betaSymbol, mx::Symbol &movingMeanSymbol, mx::Symbol &movingVarianceSymbol)
{
// auto normalization = (inputSymbol - movingMeanSymbol) / mx::sqrt(movingVarianceSymbol + eps) * gammaSymbol + betaSymbol;
auto inputSymbol = mx::SwapAxis(inputSymbol_, 1, 3);
auto n0 = mx::broadcast_sub(inputSymbol, movingMeanSymbol);
double epsilon = 0.0001;
auto n1 = mx::sqrt(movingVarianceSymbol + epsilon);
auto n2 = mx::broadcast_div(n0, n1);
auto n3 = mx::broadcast_mul(n2, gammaSymbol);
auto n4 = mx::broadcast_add(n3, betaSymbol);
auto normalization = mx::SwapAxis(n4, 1, 3);
return normalization;
}
Upvotes: 0