Reputation: 23
I am trying to export the weights and bias values from a model I got by adapting this example https://github.com/decentralion/tf-dev-summit-tensorboard-tutorial .
First I thought that I would just print the variables on the screen and copy them to a excel file, and then export them as a .csv file so that I could use them in MATLAB.
But because there were so many weights this wasn't a viable options. So I instead used numpy to save them as a .csv file. Everything was working fine, but the model does'nt seem to work when I run it in Matlab. It is ofcourse possible that I made an error in my Matlab code.
BUT I've noticed that the values geting printed by my code are not the same as the ones getting written to the .csv file.
As I am new to tensorflow and python I wrote the code by "stitching" together different examples.
Here is the tensorflow code:
#BREZ KONVOLUCIJE
import os
import os.path
import shutil
import tensorflow as tf
import numpy as np
LOGDIR = "/home/ubuntu/ml/tf-dev-summit-tensorboard-tutorial-master/mnist_NOCONV/"
LABELS = os.path.join(os.getcwd(), "labels_1024.tsv")
SPRITES = os.path.join(os.getcwd(), "sprite_1024.png")
### MNIST EMBEDDINGS ###
mnist = tf.contrib.learn.datasets.mnist.read_data_sets(train_dir=LOGDIR + "data", one_hot=True)
### Get a sprite and labels file for the embedding projector ###
if not (os.path.isfile(LABELS) and os.path.isfile(SPRITES)):
print("Necessary data files were not found. Run this command from inside the "
"repo provided at "
"https://github.com/dandelionmane/tf-dev-summit-tensorboard-tutorial.")
exit(1)
def fc_layer(input, size_in, size_out, name="fc"):
with tf.name_scope(name):
w = tf.Variable(tf.truncated_normal([size_in, size_out], stddev=0.1), name="W")
b = tf.Variable(tf.constant(0.1, shape=[size_out]), name="B")
act = tf.matmul(input, w) + b
tf.summary.histogram("weights", w)
tf.summary.histogram("biases", b)
tf.summary.histogram("activations", act)
return act
def mnist_model(learning_rate, use_two_fc, use_two_conv, hparam):
tf.reset_default_graph()
sess = tf.Session()
# Setup placeholders, and reshape the data
x = tf.placeholder(tf.float32, shape=[None, 784], name="x")
x_image = tf.reshape(x, [-1, 28, 28, 1])
tf.summary.image('input', x_image, 10)
y = tf.placeholder(tf.float32, shape=[None, 10], name="labels")
# FC layer 1
w1 = tf.Variable(tf.truncated_normal([784, 1024], stddev=0.1), name="W1")
b1 = tf.Variable(tf.constant(0.1, shape=[1024]), name="B1")
act1 = tf.matmul(x, w1) + b1
tf.summary.histogram("weights", w1)
tf.summary.histogram("biases", b1)
tf.summary.histogram("activations", act1)
relu = tf.nn.relu(act1)
embedding_input = relu
embedding_size = 1024
tf.summary.histogram("fc1/relu", relu)
# FC layer 2
w2 = tf.Variable(tf.truncated_normal([1024, 10], stddev=0.1), name="W2")
b2 = tf.Variable(tf.constant(0.1, shape=[10]), name="B2")
logits = tf.matmul(relu, w2) + b2
tf.summary.histogram("weights", w2)
tf.summary.histogram("biases", b2)
tf.summary.histogram("activations", logits)
#if use_two_fc:
# fc1 = fc_layer(x, 784, 1024, "fc1")
# relu = tf.nn.relu(fc1)
# embedding_input = relu
#tf.summary.histogram("fc1/relu", relu)
# embedding_size = 1024
#logits = fc_layer(relu, 1024, 10, "fc2")
with tf.name_scope("xent"):
xent = tf.reduce_mean(
tf.nn.softmax_cross_entropy_with_logits(
logits=logits, labels=y), name="xent")
tf.summary.scalar("xent", xent)
with tf.name_scope("train"):
train_step = tf.train.AdamOptimizer(learning_rate).minimize(xent)
with tf.name_scope("accuracy"):
correct_prediction = tf.equal(tf.argmax(logits, 1), tf.argmax(y, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
tf.summary.scalar("accuracy", accuracy)
summ = tf.summary.merge_all()
embedding = tf.Variable(tf.zeros([1024, embedding_size]), name="test_embedding")
assignment = embedding.assign(embedding_input)
saver = tf.train.Saver()
sess.run(tf.global_variables_initializer())
writer = tf.summary.FileWriter(LOGDIR + hparam)
writer.add_graph(sess.graph)
config = tf.contrib.tensorboard.plugins.projector.ProjectorConfig()
embedding_config = config.embeddings.add()
embedding_config.tensor_name = embedding.name
embedding_config.sprite.image_path = SPRITES
embedding_config.metadata_path = LABELS
# Specify the width and height of a single thumbnail.
embedding_config.sprite.single_image_dim.extend([28, 28])
tf.contrib.tensorboard.plugins.projector.visualize_embeddings(writer, config)
for i in range(2001):
batch = mnist.train.next_batch(100)
if i % 5 == 0:
[train_accuracy, s] = sess.run([accuracy, summ], feed_dict={x: batch[0], y: batch[1]})
writer.add_summary(s, i)
if i % 500 == 0:
sess.run(assignment, feed_dict={x: mnist.test.images[:1024], y: mnist.test.labels[:1024]})
saver.save(sess, os.path.join(LOGDIR, "model.ckpt"), i)
sess.run(train_step, feed_dict={x: batch[0], y: batch[1]})
# Get the values of variables
w1_val, b1_val, w2_val, b2_val = sess.run([w1, b1, w2, b2])
np.savetxt("w1.csv", w1_val, delimiter=",")
np.savetxt("b1.csv", b1_val, delimiter=",")
np.savetxt("w2.csv", w2_val, delimiter=",")
np.savetxt("b2.csv", b2_val, delimiter=",")
tvars = tf.trainable_variables()
tvars_vals = sess.run(tvars)
print("\n \n \n")
for var, val in zip(tvars, tvars_vals):
print(var.name, val) # Prints the name of the variable alongside its value.
print("\n \n \n")
def main():
# You can try adding some more learning rates
learning_rate = 1E-4
use_two_fc = True
hparam = "OCR_2FC_NOCONV"
print('Starting run for %s' % hparam)
# Actually run with the new settings
mnist_model(learning_rate, use_two_fc, False, hparam)
print('Done training!')
print('Run `tensorboard --logdir=%s --host localhost --port 8088` to see the results.' % LOGDIR)
if __name__ == '__main__':
main()
And the MATLAB code:
% This is a simple matlab representation of a mnist OCR system.
% Grayscale images are saved in a .csv format (as 2d arrays) in the
% folder '/slike'
clear
% Import the image
IMAGE = csvread("slike\img0.csv");
% Visualize it
image(IMAGE);
% Import the weights
w1 = csvread("mnist_NOCONV\w1.csv");
b1 = csvread("mnist_NOCONV\b1.csv");
w2 = csvread("mnist_NOCONV\w2.csv");
b2 = csvread("mnist_NOCONV\b2.csv");
% Unroll image to use it as an input vector
x = transpose(IMAGE(:));
% Fully connected layer 1
act1 = x*w1 + transpose(b1);
relu = ReLU(act1);
% Fully connected layer 2
act2 = relu*w2 + transpose(b2);
res = softmax(act2);
act2
res
The softmax function in matlab:
function f = softmax(X)
assert(size(X,1) == 1); % Should be a vector
exps = exp(X-max(X)); % we add "-max(X)" to prevent overflow
f = exps/sum(exps);
end
And the ReLU function:
% ReLU activivation function. We will need this later.
function f = ReLU(X)
f = arrayfun(@(x) ReLU0D(x),X);
end
%ReLU activation function for a scalar
function f = ReLU0D(x)
if x < 0
f = 0;
else
f = x;
end
end
The result is that the Matlab code behaves as it should (i.e. res sums to 1) but it just gets the wrong anwser most of the time. But the accuracy should be around 95% (as measured in tensorflow). So what am I doing wrong?
EDIT: Visualizing the pictures in Matlab works fine.
EDIT: I am adding the code I wrote to make the .csv pictures
import gzip
import numpy as np
f = gzip.open('train-images-idx3-ubyte.gz','r')
image_size = 28
num_images = 20
f.read(16)
buf = f.read(image_size * image_size * num_images)
data = np.frombuffer(buf, dtype=np.uint8).astype(np.float32)
data = data.reshape(num_images, image_size, image_size, 1)
for i in range(num_images):
string = 'img'
string +=str(i)
string +=".csv"
image = np.asarray(data[i]).squeeze()
image= np.int32(image)
np.savetxt(string, image, delimiter=",")
Upvotes: 0
Views: 571
Reputation: 23
I found my mistake. I was enrolling the image the wrong way. I added one line of code to the matlab script:
% This is a simple matlab representation of a mnist OCR system.
% Grayscale images are saved in a .csv format (as 2d arrays) in the
% folder '/slike'
clear
% Import the image
IMAGE = csvread("slike\img7.csv");
% Visualize it
image(IMAGE);
IMAGE = transpose(IMAGE); % -- ADDED THIS LINE -- !
% Unroll image to use it as an input vector
x = IMAGE(:);
% Import the weights
w1 = h5read('mnist_NOCONV\coefficients.h5', '/w1');
b1 = h5read('mnist_NOCONV\coefficients.h5', '/b1');
w2 = h5read('mnist_NOCONV\coefficients.h5', '/w2');
b2 = h5read('mnist_NOCONV\coefficients.h5', '/b2');
% Fully connected layer 1
act1 = w1*x + b1;
relu = ReLU(act1);
% Fully connected layer 2
act2 = w2*relu + b2;
res = softmax(act2);
act2
res
Upvotes: 0