Training a model with tensorflow,but loss just won't go down?

I am training a model that classifies 62 alphanumeric characters.But The loss value first drastically drops during first few batches and then hit the ground and never go down.I don't know where it is wrong or how to debug the model.

Here is a snapshot of training log:

Here is examples of traning data:

I use 4 conv layers followed by 1 fc layer,adam optimizer to minimize log loss.I double checked that the image label is right.So i don't know where else it is wrong.

Here is the code:

import numpy as np
import tensorflow as tf
import os
from PIL import Image
import shutil
import time

input = temp= tf.placeholder(dtype='float32', shape=(None,32,32,1), name='input')#(None,62)
label = tf.placeholder(dtype='float32',shape=(None,62))#(None,62)

temp = tf.layers.conv2d(inputs=temp,filters=32,kernel_size=(3,3),padding="SAME",activation=tf.nn.relu,kernel_initializer=tf.keras.initializers.he_normal())#(None,32,32,32)
#temp = tf.layers.dropout(inputs=temp,rate=0.5)
temp = tf.layers.max_pooling2d(temp,pool_size=[2, 2], strides=2)#(None,16,16,32)

temp = tf.layers.conv2d(inputs=temp,filters=64,kernel_size=(3,3),padding="SAME",activation=tf.nn.relu,kernel_initializer=tf.keras.initializers.he_normal())#(None,16,16,64)
#temp = tf.layers.dropout(inputs=temp,rate=0.2)
temp = tf.layers.max_pooling2d(temp,pool_size=[2, 2], strides=2)#(None,8,8,64)

temp = tf.layers.conv2d(inputs=temp,filters=128,kernel_size=(3,3),padding="SAME",activation=tf.nn.relu,kernel_initializer=tf.keras.initializers.he_normal())#(None,8,8,128)
temp = tf.layers.dropout(inputs=temp,rate=0.2)
temp = tf.layers.max_pooling2d(temp,pool_size=[2, 2], strides=2)#(None,4,4,128)

temp = tf.layers.conv2d(inputs=temp,filters=256,kernel_size=(3,3),padding="SAME",activation=tf.nn.relu,kernel_initializer=tf.keras.initializers.he_normal())#(None,4,4,256)
temp = tf.layers.dropout(inputs=temp,rate=0.2)
temp = tf.layers.max_pooling2d(temp,pool_size=[2, 2], strides=2)#(None,2,2,256)

temp = tf.layers.conv2d(inputs=temp,filters=62,kernel_size=(2,2),padding="VALID",kernel_initializer=tf.keras.initializers.he_normal())#fc (None,1,1,62)
output = temp = tf.layers.flatten(temp)#(None,62)
output = tf.nn.softmax(output)

output_clip = tf.clip_by_value(output,1e-7,1-1e-7)
loss = tf.reduce_mean(tf.reduce_sum(-label*tf.log(output_clip)-(1-label)*tf.log(1-output_clip),axis=-1))#scaler
optimizer = tf.train.AdamOptimizer().minimize(loss)

indexoutput = tf.argmax(output,axis=-1)#(None,)
labelindex = tf.argmax(label,axis=-1)#(None,)
equals = tf.equal(indexoutput,labelindex)#(None,)
equals = tf.reduce_sum(tf.cast(equals,dtype='int8'),axis=-1)#scaler
acc = tf.cast(equals,dtype='float32')/tf.cast(tf.shape(output)[0],dtype='float32')#scaler

def train(epochs):
    saver = tf.train.Saver()
    accrec = []
    with tf.Session() as sess:
        #saver.restore(sess, os.path.join(os.getcwd(),'model_logloss','captchabreak.ckpt'))
        valimg,vallabel = next(validategenerator(os.path.join(os.getcwd(),'tests')))
        for i in range(epochs):
            for j, (trainimg, trainlabel) in enumerate(traingenerator(os.path.join(os.getcwd(),'pics'),32)):
                _, trainacc, trainloss = sess.run([optimizer, acc, loss],feed_dict={input: trainimg, label: trainlabel});
                valacc, valloss = sess.run([acc, loss], feed_dict={input: np.array(valimg), label: np.array(vallabel)})
                print("epoch:{} batch:{} trainloss:{:.4f} validateloss:{:.4f} trainacc:{:.2f} validateacc:{:.2f}"
                      .format(i, j, trainloss, valloss, trainacc, valacc))
            #keep some logs
            if len(lossrec) >= 3 and valloss >= lossrec[-2] and valloss >= lossrec[-3]: break;
            saver.save(sess, "model_logloss/captchabreak.ckpt")

def traingenerator(path,batch_size):
    fs = os.listdir(path);
    if batch_size == 0:
        batch_size = len(fs)
    while offset<len(fs):
        yield fetch(fs[offset:offset+batch_size],path)

def validategenerator(path):
    fs = os.listdir(path);
    yield fetch(fs,path)

def fetch(fs,path):
    imgs = []
    labels = []
    for i, fname in enumerate(fs):
        fp = os.path.join(path, fname)
        imp = Image.open(fp).resize((32, 32));
        imp = imp.convert('L')
        imp = imp.point(lambda p: p > 210 and 255)
        im = np.array(imp)
        im = np.expand_dims(im, axis=-1)

        c = fname[0]
        lb = np.zeros((62))
        if ord(c) >= 48 and ord(c) <= 57:
            lb[ord(c) - 48] = 1
        if ord(c) >= 65 and ord(c) <= 90:
            lb[ord(c) - 65 + 10] = 1
        if ord(c) >= 97 and ord(c) <= 122:
            lb[ord(c) - 97 + 36] = 1
    return np.array(imgs), np.array(labels)

if __name__ == "__main__":

Answers (1)


It's solved . I forgot to divide the pixel value by 255.

