Joseph Wu
Joseph Wu

Reputation: 91

How to train model with TensorflowSharp

I am new with TensorfolwSharp. I see some examples about how to load model and predict using TensorfolwSharp. But I cannot find examples to train model using TensorfolwSharp. I give it a try and stuck at Optimizer now. Too many parameters for ApplyAdam, not even sure this is the right function to use.

Following is my WORKING Tensorflow code.

dataX = pd.read_csv('dataX.csv', sep = ',', header = None)
dataX = pd.read_csv('dataY.csv', sep = ',', header = None)  
x = tf.placeholder(tf.float32, [None, trainX.shape[1]]) 
y = tf.placeholder(tf.float32, [None, 1]) 
W0 = tf.Variable(tf.random_normal([trainX.shape[1], h0size], seed = seed))
b0 = tf.Variable(tf.random_normal([h0size], seed = seed))
h = tf.matmul(x, W0) + b0
pred = tf.sigmoid(h)
cost = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(labels = y, logits = h))
optimizer = tf.train.AdamOptimizer(learning_rate).minimize(cost)
with tf.Session() as sess:
    sess.run(init)
    for epoch in range(50):
        _, c = sess.run([optimizer, cost], feed_dict ={x: dataX, y: dataX})

Following is my attemp to convert to TensorflowSharp

        using (var session = new TFSession())
        {
            var graph = session.Graph;
            //dataX = pd.read_csv('dataX.csv', sep = ',', header = None)
            //dataY = pd.read_csv('dataY.csv', sep = ',', header = None)
            float[,] aX = LoadCsv("dataX.csv");
            float[,] aY = LoadCsv("dataY.csv");
            TFTensor dataX = new TFTensor(aX);
            TFTensor dataY = new TFTensor(aY);
            //x = tf.placeholder(tf.float32, [None, trainX.shape[1]]) 
            //y = tf.placeholder(tf.float32, [None, 1]) 
            var x = graph.Placeholder(TFDataType.Float, new TFShape(dataX.Shape));
            var y = graph.Placeholder(TFDataType.Float, new TFShape(dataY.Shape));
            var W0 = graph.Variable(graph.RandomNormal(new TFShape(dataX.Shape[1], 1)));
            var b0 = graph.Variable(graph.RandomNormal(new TFShape(1)));
            var h = graph.Add(graph.MatMul(x, W0), b0);
            var pred = graph.Sigmoid(h);
            var cost = graph.ReduceMean(graph.SigmoidCrossEntropyWithLogits(y,h));
            //optimizer = tf.train.AdamOptimizer(learning_rate).minimize(cost)
            // Too many parameters for ApplyAdam, not even sure this is the right function to use. 
            TFOutput var, m = 0, v = 0, beta1_power, beta2_power, lr = 0.01, beta1 = 0.9, beta2 = 0.999, epsilon = 0.00000001, grad;
            var optimizer = graph.ApplyAdam(var, m, v, beta1_power, beta2_power, lr, beta1, beta2, epsilon, grad);
            for (int i = 0; i < 50 ; i++)
                session.GetRunner().Run(
                    inputs: new[] { optimizer },
                    inputValues: new[] { dataX, dataY }
                    );
}

Upvotes: 2

Views: 3661

Answers (1)

Art Kipel
Art Kipel

Reputation: 21

TensorFlow API doesn't provide optimizer classes yet. So to minimize cost function you should calculate gradients using API and then manually update trainable parameters of your model. Here is an example with separate classes for convinience:

class Model : IDisposable
{
    TFSession _session;

    TFGraph _graph;

    TFOutput _input;

    TFOutput _output;

    LinearLayer _y_out;

    TFOutput _cost;

    TFTensor _dataX;

    TFTensor _dataY;

    GradientDescentOptimizer _gradientDescentOptimizer;


    public Model()
    {
        float[,] aX = LoadCsv("dataX.csv");
        float[,] aY = LoadCsv("dataY.csv");
        _dataX = new TFTensor(aX);
        _dataY = new TFTensor(aY);

        _session = new TFSession();

        _graph = _session.Graph;

        _input = _graph.Placeholder(TFDataType.Float);
        _output = _graph.Placeholder(TFDataType.Float);

        _y_out = new LinearLayer(_graph, _input, (int)_dataX.Shape[0], 1);

        cost = _graph.ReduceMean(_graph.SigmoidCrossEntropyWithLogits(_y_out.Result, _output));

        _gradientDescentOptimizer = new GradientDescentOptimizer(_graph, _cost, _y_out.W, _y_out.b);
        _gradientDescentOptimizer.ApplyGradientDescent(_graph);

        var runner = _session.GetRunner();


        runner.AddTarget(_y_out.InitB.Operation);

        runner.Run();
    }

    public void TrainModelIteration()
    {
        var runner = _session.GetRunner();

        runner.AddInput(_input, _dataX);
        runner.AddInput(_output, _dataY);


        for (int i = 0; i < 2; i++)
        {
            runner.Fetch(_gradientDescentOptimizer.Updates[i]);
        }

        runner.Run();
    }


    public void Dispose()
    {
        _graph.Dispose();
        _session.Dispose();
    }
}


class LinearLayer
{
    public TFOutput Result { get; set; }

    public TFOutput W { get; set; }

    public TFOutput b { get; set; }

    public TFOutput InitW { get; set; }

    public TFOutput InitB { get; set; }

    public LinearLayer(TFGraph graph, TFOutput x, int inSize, int outSize)
    {
        var wShape = new TFShape(inSize, outSize);

        W = graph.VariableV2(wShape, TFDataType.Float);

        TFOutput tfOutputWShape = graph.Const(wShape);
        TFOutput initialW = graph.RandomUniform(tfOutputWShape, TFDataType.Float);

        InitW = graph.Assign(W, initialW);


        var bShape = new TFShape(outSize);

        b = graph.VariableV2(bShape, TFDataType.Float);

        TFOutput tfOutputBShape = graph.Const(bShape);
        TFOutput initialB = graph.RandomUniform(tfOutputBShape, TFDataType.Float);

        InitB = graph.Assign(b, initialB);

        var matMul = graph.MatMul(x, W);

        Result = graph.Add(matMul, b);
    }
}


class GradientDescentOptimizer
{
    private TFOutput[] _variables;

    public TFOutput[] Updates { get; set; }

    private TFOutput[] _gradients;

    public GradientDescentOptimizer(TFGraph graph, TFOutput grad, TFOutput w, TFOutput b)
    {
        _variables = new TFOutput[4];

        _variables[0] = w;

        _variables[1] = b;

        _gradients = graph.AddGradients(new TFOutput[] { grad }, new TFOutput[] { w, b });

        Updates = new TFOutput[4];
    }

    public void ApplyGradientDescent(TFGraph graph, float alpha = 0.01f)
    {
        TFOutput tfAlpha = graph.Const(alpha);

        for (int i = 0; i < 2; i++)
        {
            Updates[i] = graph.ApplyGradientDescent(_variables[i], tfAlpha, _gradients[i]);
        }
    }
}

Upvotes: 2

Related Questions