convolution matrix filter size vs channel length

Question

I am a beginner in computer vision. what I would like to know the relationship between the size of the filter and channel over the output image

My aim is to understand the various relation that connect the size and the channels with the Convolutional Neural Network

I am using tensorflow library , i have found an example that apply CNN on cifar 10 data-set ,

data-set consists of :-

data -- a 10000x3072 numpy array of uint8s. Each row of the array stores a 32x32 colour image. The first 1024 entries contain the red channel values, the next 1024 the green, and the final 1024 the blue. The image is stored in row-major order, so that the first 32 entries of the array are the red channel values of the first row of the image.
labels -- a list of 10000 numbers in the range 0-9.

The Code:

import tensorflow as tf


X = tf.placeholder(tf.float32,shape=[None,32,32,3])
y_true = tf.placeholder(tf.float32,shape=[None,10])



hold_prob = tf.placeholder(tf.float32)

#  Helper Functions 



def init_weight(shape,name_W):
    init_rand_dist = tf.truncated_normal(shape,stddev=0.1)
    return tf.Variable(init_rand_dist,name=name_W)

#  init bais

def init_bias(shape, name_b):
    init_bias_vals = tf.constant(value=0.1,shape=shape)
    return tf.Variable(init_bias_vals,name=name_b)

#  convolution 2d

# Conv2D

def conv2d(X,W,name_conv):
    #  X --> [batch,H,W,Channels]
    #  W --> [filter H , filter W , Channel In , Channel Out]



    return tf.nn.conv2d(X,W,strides=[1,1,1,1],padding='SAME',name=name_conv)

#  convolutional Layer with activation and bais

# Convolutional Layes

def convolutional_layer(input_x, shape,name_W,name_b,name_conv):
    W = init_weight(shape = shape,name_W=name_W)
    b = init_bias(shape = [shape[3]],name_b = name_b)


    return tf.nn.relu(conv2d(input_x,W, name_conv =name_conv) + b )

#  pooling layer

# Pooling
def max_pooling_2by2(X):
    #  X --> [batch,H,W,Channels]

    return tf.nn.max_pool(X,ksize=[1,2,2,1],strides=[1,2,2,1],padding='SAME')

#  Fully connected Layer

#  Normal Layer (fully connected Layer)

def normal_full_layer(input_layer,size,name_W,name_b):

    input_size = int(input_layer.get_shape()[1])
    W = init_weight([input_size,size],name_W=name_W)
    b = init_bias([size],name_b=name_b)

    return tf.matmul(input_layer , W) + b

#  Create the Layers 



convo_1 = convolutional_layer( X , shape = [4,4,3,32] , name_W = "W_conv1" , name_b = "bias_Conv1" , name_conv = "Conv_1")
convo_1_pooling = max_pooling_2by2(convo_1)



convo_2 = convolutional_layer( convo_1_pooling, shape = [4,4,32,64] , name_W = "W_conv2" , name_b = "bias_Conv2" , name_conv = "Conv_2")
convo_2_pooling = max_pooling_2by2(convo_2)

# ** Now create a flattened layer  [-1,8 \* 8 \* 64] or [-1,4096] **

convo_2_flat = tf.reshape(convo_2_pooling,[-1,8*8*64])



full_layer_one = tf.nn.relu(normal_full_layer(convo_2_flat,1024,name_W="full_layer_W",name_b="full_layer_b"))



hold_prob = tf.placeholder(tf.float32)
full_one_dropot = tf.nn.dropout(full_layer_one,keep_prob=hold_prob)



y_pred = normal_full_layer(full_one_dropot,10,name_W = 'out_W',name_b='out_b' )

#    Loss Function 

cross_entropy = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels = y_true , logits= y_pred ))

#   Optimizer Adam Optimizer. 

optimizer = tf.train.AdamOptimizer(learning_rate=0.001)
train = optimizer.minimize(cross_entropy)


init = tf.global_variables_initializer()

#  Graph Session

steps = 5000

with tf.Session() as sess:
    sess.run(init)


    print (tf.all_variables())



    for i in range (steps) :   
        batch_x , batch_y = ch.next_batch(100)
        # print(convo_2_flat.eval(feed_dict={X:batch_x, y_true:batch_y, hold_prob:1.0}).shape)


        sess.run(train,feed_dict={X:batch_x,y_true:batch_y,hold_prob:0.5})

        #  PRINT OUT A MESSAGE EVERY 100 STEPS
        if i%100 == 0:

            print('Currently on step {}'.format(i))
            print('Accuracy is:')
            #  Test the Train Model
            matches = tf.equal(tf.argmax(y_pred,1),tf.argmax(y_true,1))

            acc = tf.reduce_mean(tf.cast(matches,tf.float32))

            print(sess.run(acc,feed_dict={X:training_images,y_true:training_labels,hold_prob:1.0}))
            print('
')

i am wondering about the filter size for example the first convolutional layer has filter size (4 * 4) and channel (32) , i would like to know why this numbers are chosen and cascade over the next layers .

another example the last layer

tf.nn.relu(normal_full_layer(convo_2_flat,1024,name_W="full_layer_W",name_b="full_layer_b"))

it takes the output of the flatten layer and resized to 1024 , also what is the reason behind that

as this image also

convolution matrix filter size vs channel length

Answers (1)

Related Questions