MilkKnight
MilkKnight

Reputation: 1

memory issue in tensorflow

I am trying to build a gaussian RBM model with Tensorflow. But the program will use too much memory.

gaussian_rbm.py

import tensorflow as tf
import math
import input_data
import numpy as np

def sample_prob(probs):
    return tf.nn.relu(
        tf.sign(
            probs - tf.random_uniform(tf.shape(probs))))

class RBM(object):
    """ represents a sigmoidal rbm """

    def __init__(self, name, input_size, output_size, gaussian_std_val=0.1):
        with tf.name_scope("rbm_" + name):
            self.weights = tf.Variable(
                tf.truncated_normal([input_size, output_size],
                    stddev=1.0 / math.sqrt(float(input_size))), name="weights")
            self.v_bias = tf.Variable(tf.zeros([input_size]), name="v_bias")
            self.h_bias = tf.Variable(tf.zeros([output_size]), name="h_bias")
            self.input = tf.placeholder("float", shape=[None, 784])

            #Gaussian
            def_a = 1/(np.sqrt(2)*gaussian_std_val)
            def_a = tf.constant(def_a, dtype=tf.float32)
            self.a = tf.Variable( tf.ones(shape=[input_size]) * def_a,
                                  name="a")


    def propup(self, visible):
        """ P(h|v) """
        return tf.nn.sigmoid(tf.matmul(visible, self.weights) + self.h_bias)

    def propdown(self, hidden):
        """ P(v|h) """
        # return tf.nn.sigmoid(tf.matmul(hidden, tf.transpose(self.weights)) + self.v_bias)
        return ( tf.matmul(hidden, tf.transpose(self.weights)) + self.v_bias ) / (2 * (self.a * self.a))

    def sample_h_given_v(self, v_sample):
        """ Generate a sample from the hidden layer """
        return sample_prob(self.propup(v_sample))

    def sample_v_given_h(self, h_sample):
        """ Generate a sample from the visible layer """
        return self.sample_gaussian(self.propdown(h_sample))

    def gibbs_hvh(self, h0_sample):
        """ A gibbs step starting from the hidden layer """
        v_sample = self.sample_v_given_h(h0_sample)
        h_sample = self.sample_h_given_v(v_sample)
        return [v_sample, h_sample]

    def gibbs_vhv(self, v0_sample):
        """ A gibbs step starting from the visible layer """
        h_sample = self.sample_h_given_v(v0_sample)
        v_sample = self.sample_v_given_h(h_sample)
        return  [h_sample, v_sample]

    def sample_gaussian(self, mean_field):
        return tf.random_normal(shape=tf.shape(mean_field),
                                mean=mean_field,
                                stddev=1.0 / (np.sqrt(2) * self.a))

    def cd1(self, learning_rate=0.1):
        " One step of contrastive divergence, with Rao-Blackwellization "
        h_start = self.sample_h_given_v(self.input)
        v_end = self.sample_v_given_h(h_start)
        h_end = self.sample_h_given_v(v_end)
        w_positive_grad = tf.matmul(tf.transpose(self.input), h_start)
        w_negative_grad = tf.matmul(tf.transpose(v_end), h_end)

        update_w = self.weights + (learning_rate * (w_positive_grad - w_negative_grad) / tf.to_float(tf.shape(self.input)[0]))

        update_vb = self.v_bias + (learning_rate * tf.reduce_mean(self.input - v_end, 0))

        update_hb = self.h_bias + (learning_rate * tf.reduce_mean(h_start - h_end, 0))

        return [update_w, update_vb, update_hb]

    def cal_err(self):
        err = self.input - self.gibbs_vhv(self.input)[1]
        return tf.reduce_mean(err * err)

test_mnist.py

import tensorflow as tf
import input_data
from gaussian_RBM import RBM

mnist = input_data.read_data_sets("MNIST_data/", one_hot=True)
trX, trY, teX, teY = mnist.train.images, mnist.train.labels, mnist.test.images, mnist.test.labels

rbm_modle = RBM(name="gaussian_rbm", input_size=784, output_size=1000)

sess = tf.Session()
init_op = tf.initialize_all_variables()
sess.run(init_op)

for i in range(100):
    print "step: %s"%i
    for start, end in zip(range(0, len(trX), 128), range(128, len(trX), 128)):

        rbm_modle.weights, rbm_modle.v_bias, rbm_modle.h_bias = \
            sess.run(rbm_modle.cd1(), feed_dict={rbm_modle.input : trX[start : end]})

        if start % 1280 == 0:
            print sess.run(rbm_modle.cal_err(), feed_dict={rbm_modle.input : teX})

the output is

run test_mnist.py Extracting MNIST_data/train-images-idx3-ubyte.gz Extracting MNIST_data/train-labels-idx1-ubyte.gz Extracting MNIST_data/t10k-images-idx3-ubyte.gz Extracting MNIST_data/t10k-labels-idx1-ubyte.gz I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:900] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero I tensorflow/core/common_runtime/gpu/gpu_init.cc:102] Found device 0 with properties: name: GeForce GTX 560 major: 2 minor: 1 memoryClockRate (GHz) 1.62 pciBusID 0000:01:00.0 Total memory: 1018.69MiB Free memory: 916.73MiB I tensorflow/core/common_runtime/gpu/gpu_init.cc:126] DMA: 0 I tensorflow/core/common_runtime/gpu/gpu_init.cc:136] 0: Y I tensorflow/core/common_runtime/gpu/gpu_device.cc:684] Ignoring gpu device (device: 0, name: GeForce GTX 560, pci bus id: 0000:01:00.0) with Cuda compute capability 2.1. The minimum required Cuda capability is 3.5. step: 0 0.0911714 0.0781856 0.0773076 0.0770751 0.0776582 0.0764748 0.0755164 0.0741131 0.0726497 0.0712237 0.0701839 0.0686315 0.0664856 0.0658309 0.0646239 0.0626652 0.0616178 0.0610061 0.0598332 0.0588843 0.0587477 0.0572056 0.0561556 0.0554848 Killed

Is there some way to monitor the memory? Can someone help me?

Upvotes: 0

Views: 4146

Answers (4)

Guillem Cucurull
Guillem Cucurull

Reputation: 1691

There may be a problem with the training loop, that causes your computer to run out of memory.

For each iteration of your loop, you are calling:

sess.run(rbm_modle.cd1(), feed_dict={rbm_modle.input : trX[start : end]})

Inside this rbm_modle.cd1() function, you are creating several new operations, such as tf.matmul(), so every time you call rbm_modle.cd1() you will create new operations, which will cause the used memory to increase after each iteration.

You should define all the operations before the loop, and then during run the operation with sess.run(), without creating new ones.

Upvotes: 2

Shadow Template
Shadow Template

Reputation: 113

Make sure there are no memory leaks by making your graph read-only before training with

tf.get_default_graph().finalize()

TensorFlow will raise an exception each time you will try to add a new node.

Upvotes: 0

Thierry Barnier
Thierry Barnier

Reputation: 47

The answer seems correct, (Insufficient compute capabilities to run latest version of CUDA / Tensorflow

However, the minimal requirement seems to be "Compute Capabilites = 3.0", as my GTX_770M is able to run Tensorflow 1.0 / CUDA 8.0 (see below)

And / Or trying to recompile tensorflow from the sources, and include the 2.0 target during generation (it is proposing 3.5-5.5 by default)

Have a good day!!

+-----------------------------------------------------------------------------+
| NVIDIA-SMI 375.51                 Driver Version: 375.51                    |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|===============================+======================+======================|
|   0  GeForce GTX 770M    Off  | 0000:01:00.0     N/A |                  N/A |
|100%   48C    P0    N/A /  N/A |   2819MiB /  3017MiB |     N/A      Default |
+-------------------------------+----------------------+----------------------+

+-----------------------------------------------------------------------------+
| Processes:                                                       GPU Memory |
|  GPU       PID  Type  Process name                               Usage      |
|=============================================================================|
|    0                  Not Supported                                         |
+-----------------------------------------------------------------------------+

Upvotes: 0

j314erre
j314erre

Reputation: 2827

You can monitory GPU memory with the command nvidia-smi

It looks like your GPU does not support the later versions of CUDA required to run tensorflow. You can check CUDA-Enabled GeForce Products

From your output it looks like tensorflow is smart enough to not use the GPU, so either your model / batch size is too big for your RAM or you have a memory leak.

Try running running session with log_device_placement=True to see what tensorflow is doing step by step, while running 'top' to monitor memory?

    with tf.Session(config=tf.ConfigProto(log_device_placement=True)) as sess:

Upvotes: 5

Related Questions