geek
geek

Reputation: 65

Testing weighted categorical cross entropy for multiple classes in keras with tensorflow backend

I have an issue that seems to have no straight forward solution in Keras. My server runs on ubuntu 14.04, Keras with backend tensorflow. It has 4 Nvidia Geforce gtx1080 GPUs.

I am trying to test the best available implementation of weighted categorical cross entropy(https://github.com/keras-team/keras/issues/2115)(curiale commented on Jan20,2017). The code pasted below is reproducible for the error shown below.

The input array Xtrain is of shape (800,40) where 800 indicates the number of samples and 40 represents the input feature dimension. Similarly Xtest is of shape (400,40). The problem is of a multiclass scenario where the number of classes is three. Following code is used to implement but an error is showing up indicating a GPU and batchsize mismatch, which is difficult to address, please provide some pointers to address this.

import keras
from keras.models import Sequential, Model, load_model
from keras.layers.embeddings import Embedding
from keras.layers.core import Activation, Dense, Dropout, Reshape
from keras.optimizers import SGD, Adam, RMSprop
#from keras.layers import TimeDistributed,Merge, Conv1D, Conv2D, Flatten, MaxPooling2D, Conv2DTranspose, UpSampling2D, RepeatVector
#from 

#keras.layers.recurrent import GRU, LSTM
#from keras.datasets.data_utils import get_file
#import tarfile
from functools import partial, update_wrapper
from keras.callbacks import TensorBoard
from time import time
from sklearn.model_selection import KFold
import numpy as np
from keras.callbacks import EarlyStopping
import tensorflow as tf
import scipy.io
from keras import backend as K
from keras.layers import Input, Lambda
import os
from keras import optimizers
from matplotlib import pyplot
from sklearn.preprocessing import MinMaxScaler
#os.export CUDA_VISIBLE_DEVICES="0,1"
import keras, sys
from matplotlib import pyplot
from keras.wrappers.scikit_learn import KerasRegressor
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import KFold
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
#from keras.utils import np_utils
from itertools import product
from keras.layers import Input

def w_categorical_crossentropy(y_true, y_pred, weights):
    nb_cl = weights.shape[1]#len(weights[0,:])
    print weights.shape
    print nb_cl
    print y_pred
    print y_true
    final_mask = K.zeros_like(y_pred[:, 0])
    y_pred_max = K.max(y_pred, axis=1)#returns maximum value along an axis in a tensor
    y_pred_max = K.reshape(y_pred_max, (K.shape(y_pred)[0], 1))
    y_pred_max_mat = K.cast(K.equal(y_pred, y_pred_max), K.floatx())
    for c_p, c_t in product(range(nb_cl), range(nb_cl)):
        final_mask += (weights[c_t, c_p] *y_pred_max_mat[:, c_p]*y_true[:, c_t])
    #ypred_tensor=K.constant(y_pred,dtype=K.set_floatx('float32'))
    #ytrue_tensor=K.constant(y_true,dtype=K.set_floatx('float32'))
    return K.categorical_crossentropy(y_true,y_pred) * final_mask

#def get_mat_data(add,in1,in2):
    # Assuming sample_matlab_file.mat has 2 matrices A and B
    #matData = scipy.io.loadmat(add)
    #matrixA = matData[in1]
    #matrixA1 = matData[in2]
    #matrixB = matData['Ytrain']
    #matrixB1 = matData['Ytest']
    #weights = matData['w']
    #matrixC = matData['Ytrainclassify']
    #matrixC1 = matData['Ytestclassify']
    #nfold = matData['nfold']
    #return matrixA, matrixA1, matrixB, matrixB1, weights, matrixC, matrixC1, nfold 
def wrapped_partial(func, *args, **kwargs):
    partial_func = partial(func, *args, **kwargs)
    update_wrapper(partial_func, func)
    return partial_func

def gen_model():
    input = Input(shape=(40,))  
    #m1=Sequential()
    # m1.add(conv_model)
    # #m1.add(Conv2D(15, (5,5), strides=(1, 1),activation='relu', input_shape=(1,30,125), kernel_initializer='glorot_uniform'))#temporal filters theano
    # m1.add(Dropout(0.2))
    # #m1.add(Conv2D(15, (5,1), strides=(1, 1),activation='relu',kernel_initializer='glorot_uniform'))#spatial filters
    # #m1.add(Dropout(0.2))
    # m1.add(Flatten())
    # m1.add(Dropout(0.2))
    x1 =(Dense(200,activation='relu',name='dense_1'))(input)
    x2 =(Dropout(0.2))(x1)
    x3 =(Dense(100,activation='relu',name='dense_2'))(x2)
    x4 =(Dropout(0.2))(x3)
    x5 =(Dense(3,activation='softmax',name='softmax_layer'))(x4)
    model = Model(input=input, output=[x5])
    return model

    #in1 = 'Xtrain'
    #in2 = 'Xtest'
    #add = '/home/tharun/all_mat_files/test_keras.mat'
    #Xtrain, Xtest, Ytrain, Ytest, weights, Ytrainclassify, Ytestclassify, nfold = get_mat_data(add,in1,in2)
    Ytrain = np.random.randint(3, size=(800, 1))
    Ytest = np.random.randint(3, size=(400, 1))
    Ytrainclassify = Ytrain
    Ytestclassify = Ytrain
    Xtrain=np.random.rand(800,40)
    Xtest=np.random.rand(400,40)
    nb_classes = 3
    print Xtrain.shape, Xtest.shape, Ytrain.shape, Ytest.shape, Ytrainclassify.shape, Ytestclassify.shape
    wts = np.ones((3,3))
    print 'wts:' 
    print wts.shape
    # convert class vectors to binary class matrices
    Y_train = keras.utils.to_categorical(Ytrainclassify[:,None], nb_classes)
    Y_test = keras.utils.to_categorical(Ytestclassify[:,None], nb_classes)
    Xtrain=Xtrain.astype('float32')
    Xtest=Xtest.astype('float32')

    print Xtrain.shape
    print Y_train.shape
    print Xtest.shape
    print Y_test.shape
    ncce = wrapped_partial(w_categorical_crossentropy, weights=wts)
    batch_size = 10
    nb_classes = 3
    nb_epoch = 1
    model=gen_model()
    #model.compile(loss=ncce, optimizer="adam")
    model.summary()
    rms = SGD()
    model.compile(loss=ncce, optimizer=rms)

    model.fit(Xtrain, Y_train,batch_size=batch_size, nb_epoch=nb_epoch)
    model.evaluate(Xtest, Y_test)
    #print('Test score:', score[0])
    #print('Test accuracy:', score[1])

    #saving weights
    model.save('model_classify_weights.h5')

Error:

python /home/tharun/keras_workshop/EEG_RxtimeDNN_regress_classify.py 

/home/tharun/anaconda2/envs/kerasdl/lib/python2.7/site-packages/h5py/__init__.py:36: FutureWarning: Conversion of the second argument of issubdtype from `float` to `np.floating` is deprecated. In future, it will be treated as `np.float64 == np.dtype(float).type`.
  from ._conv import register_converters as _register_converters
Using TensorFlow backend.
(800, 40) (400, 40) (800, 1) (400, 1) (1, 3) (800, 1) (400, 1)
wts:
(3, 3)
(800, 40)
(800, 3)
(400, 40)
(400, 3)
/home/tharun/keras_workshop/EEG_RxtimeDNN_regress_classify.py:129: UserWarning: Update your `Model` call to the Keras 2 API: `Model(outputs=[<tf.Tenso..., inputs=Tensor("in...)`
  model = Model(input=input, output=[x5])
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
=================================================================
input_1 (InputLayer)         (None, 40)                0         
_________________________________________________________________
dense_1 (Dense)              (None, 200)               8200      
_________________________________________________________________
dropout_1 (Dropout)          (None, 200)               0         
_________________________________________________________________
dense_2 (Dense)              (None, 100)               20100     
_________________________________________________________________
dropout_2 (Dropout)          (None, 100)               0         
_________________________________________________________________
softmax_layer (Dense)        (None, 3)                 303       
=================================================================
Total params: 28,603
Trainable params: 28,603
Non-trainable params: 0
_________________________________________________________________
(?, 3)
3
Tensor("softmax_layer_target:0", shape=(?, ?), dtype=float32)
[[array([1.41292294]) 1 1]
 [1 array([7.328564]) 1]
 [1 1 array([2.38611435])]]
/home/tharun/keras_workshop/EEG_RxtimeDNN_regress_classify.py:176: UserWarning: The `nb_epoch` argument in `fit` has been renamed `epochs`.
  model.fit(Xtrain, Y_train,batch_size=batch_size, nb_epoch=nb_epoch)
Epoch 1/1
2018-02-13 15:41:44.382214: I tensorflow/core/platform/cpu_feature_guard.cc:137] Your CPU supports instructions that this TensorFlow binary was not compiled to use: SSE4.1 SSE4.2 AVX AVX2 FMA
2018-02-13 15:41:44.758387: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1030] Found device 0 with properties: 
name: GeForce GTX 1080 major: 6 minor: 1 memoryClockRate(GHz): 1.7715
pciBusID: 0000:05:00.0
totalMemory: 7.92GiB freeMemory: 7.42GiB
2018-02-13 15:41:44.992640: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1030] Found device 1 with properties: 
name: GeForce GTX 1080 major: 6 minor: 1 memoryClockRate(GHz): 1.7715
pciBusID: 0000:06:00.0
totalMemory: 7.92GiB freeMemory: 7.80GiB
2018-02-13 15:41:45.225696: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1030] Found device 2 with properties: 
name: GeForce GTX 1080 major: 6 minor: 1 memoryClockRate(GHz): 1.7715
pciBusID: 0000:09:00.0
totalMemory: 7.92GiB freeMemory: 7.80GiB
2018-02-13 15:41:45.458070: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1030] Found device 3 with properties: 
name: GeForce GTX 1080 major: 6 minor: 1 memoryClockRate(GHz): 1.7715
pciBusID: 0000:0a:00.0
totalMemory: 7.92GiB freeMemory: 7.80GiB
2018-02-13 15:41:45.461078: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1045] Device peer to peer matrix
2018-02-13 15:41:45.461151: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1051] DMA: 0 1 2 3 
2018-02-13 15:41:45.461160: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1061] 0:   Y Y Y Y 
2018-02-13 15:41:45.461165: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1061] 1:   Y Y Y Y 
2018-02-13 15:41:45.461170: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1061] 2:   Y Y Y Y 
2018-02-13 15:41:45.461175: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1061] 3:   Y Y Y Y 
2018-02-13 15:41:45.461191: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1120] Creating TensorFlow device (/device:GPU:0) -> (device: 0, name: GeForce GTX 1080, pci bus id: 0000:05:00.0, compute capability: 6.1)
2018-02-13 15:41:45.461198: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1120] Creating TensorFlow device (/device:GPU:1) -> (device: 1, name: GeForce GTX 1080, pci bus id: 0000:06:00.0, compute capability: 6.1)
2018-02-13 15:41:45.461204: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1120] Creating TensorFlow device (/device:GPU:2) -> (device: 2, name: GeForce GTX 1080, pci bus id: 0000:09:00.0, compute capability: 6.1)
2018-02-13 15:41:45.461209: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1120] Creating TensorFlow device (/device:GPU:3) -> (device: 3, name: GeForce GTX 1080, pci bus id: 0000:0a:00.0, compute capability: 6.1)
Traceback (most recent call last):
  File "/home/tharun/keras_workshop/EEG_RxtimeDNN_regress_classify.py", line 239, in <module>
    main()
  File "/home/tharun/keras_workshop/EEG_RxtimeDNN_regress_classify.py", line 176, in main
    model.fit(Xtrain, Y_train,batch_size=batch_size, nb_epoch=nb_epoch)
  File "/home/tharun/anaconda2/envs/kerasdl/lib/python2.7/site-packages/keras/engine/training.py", line 1598, in fit
    validation_steps=validation_steps)
  File "/home/tharun/anaconda2/envs/kerasdl/lib/python2.7/site-packages/keras/engine/training.py", line 1183, in _fit_loop
    outs = f(ins_batch)
  File "/home/tharun/anaconda2/envs/kerasdl/lib/python2.7/site-packages/keras/backend/tensorflow_backend.py", line 2273, in __call__
    **self.session_kwargs)
  File "/home/tharun/anaconda2/envs/kerasdl/lib/python2.7/site-packages/tensorflow/python/client/session.py", line 889, in run
    run_metadata_ptr)
  File "/home/tharun/anaconda2/envs/kerasdl/lib/python2.7/site-packages/tensorflow/python/client/session.py", line 1120, in _run
    feed_dict_tensor, options, run_metadata)
  File "/home/tharun/anaconda2/envs/kerasdl/lib/python2.7/site-packages/tensorflow/python/client/session.py", line 1317, in _do_run
    options, run_metadata)
  File "/home/tharun/anaconda2/envs/kerasdl/lib/python2.7/site-packages/tensorflow/python/client/session.py", line 1336, in _do_call
    raise type(e)(node_def, op, message)
tensorflow.python.framework.errors_impl.InvalidArgumentError: Incompatible shapes: [3] vs. [10]
     [[Node: training/SGD/gradients/loss/softmax_layer_loss/mul_20_grad/BroadcastGradientArgs = BroadcastGradientArgs[T=DT_INT32, _class=["loc:@loss/softmax_layer_loss/mul_20"], _device="/job:localhost/replica:0/task:0/device:GPU:0"](training/SGD/gradients/loss/softmax_layer_loss/mul_20_grad/Shape, training/SGD/gradients/loss/softmax_layer_loss/mul_20_grad/Shape_1)]]
     [[Node: loss/mul/_19 = _Recv[client_terminated=false, recv_device="/job:localhost/replica:0/task:0/device:CPU:0", send_device="/job:localhost/replica:0/task:0/device:GPU:0", send_device_incarnation=1, tensor_name="edge_806_loss/mul", tensor_type=DT_FLOAT, _device="/job:localhost/replica:0/task:0/device:CPU:0"]()]]

Caused by op u'training/SGD/gradients/loss/softmax_layer_loss/mul_20_grad/BroadcastGradientArgs', defined at:
  File "/home/tharun/keras_workshop/EEG_RxtimeDNN_regress_classify.py", line 239, in <module>
    main()
  File "/home/tharun/keras_workshop/EEG_RxtimeDNN_regress_classify.py", line 176, in main
    model.fit(Xtrain, Y_train,batch_size=batch_size, nb_epoch=nb_epoch)
  File "/home/tharun/anaconda2/envs/kerasdl/lib/python2.7/site-packages/keras/engine/training.py", line 1575, in fit
    self._make_train_function()
  File "/home/tharun/anaconda2/envs/kerasdl/lib/python2.7/site-packages/keras/engine/training.py", line 960, in _make_train_function
    loss=self.total_loss)
  File "/home/tharun/anaconda2/envs/kerasdl/lib/python2.7/site-packages/keras/legacy/interfaces.py", line 87, in wrapper
    return func(*args, **kwargs)
  File "/home/tharun/anaconda2/envs/kerasdl/lib/python2.7/site-packages/keras/optimizers.py", line 156, in get_updates
    grads = self.get_gradients(loss, params)
  File "/home/tharun/anaconda2/envs/kerasdl/lib/python2.7/site-packages/keras/optimizers.py", line 73, in get_gradients
    grads = K.gradients(loss, params)
  File "/home/tharun/anaconda2/envs/kerasdl/lib/python2.7/site-packages/keras/backend/tensorflow_backend.py", line 2310, in gradients
    return tf.gradients(loss, variables, colocate_gradients_with_ops=True)
  File "/home/tharun/anaconda2/envs/kerasdl/lib/python2.7/site-packages/tensorflow/python/ops/gradients_impl.py", line 581, in gradients
    grad_scope, op, func_call, lambda: grad_fn(op, *out_grads))
  File "/home/tharun/anaconda2/envs/kerasdl/lib/python2.7/site-packages/tensorflow/python/ops/gradients_impl.py", line 353, in _MaybeCompile
    return grad_fn()  # Exit early
  File "/home/tharun/anaconda2/envs/kerasdl/lib/python2.7/site-packages/tensorflow/python/ops/gradients_impl.py", line 581, in <lambda>
    grad_scope, op, func_call, lambda: grad_fn(op, *out_grads))
  File "/home/tharun/anaconda2/envs/kerasdl/lib/python2.7/site-packages/tensorflow/python/ops/math_grad.py", line 742, in _MulGrad
    rx, ry = gen_array_ops._broadcast_gradient_args(sx, sy)
  File "/home/tharun/anaconda2/envs/kerasdl/lib/python2.7/site-packages/tensorflow/python/ops/gen_array_ops.py", line 532, in _broadcast_gradient_args
    "BroadcastGradientArgs", s0=s0, s1=s1, name=name)
  File "/home/tharun/anaconda2/envs/kerasdl/lib/python2.7/site-packages/tensorflow/python/framework/op_def_library.py", line 787, in _apply_op_helper
    op_def=op_def)
  File "/home/tharun/anaconda2/envs/kerasdl/lib/python2.7/site-packages/tensorflow/python/framework/ops.py", line 2956, in create_op
    op_def=op_def)
  File "/home/tharun/anaconda2/envs/kerasdl/lib/python2.7/site-packages/tensorflow/python/framework/ops.py", line 1470, in __init__
    self._traceback = self._graph._extract_stack()  # pylint: disable=protected-access

...which was originally created as op u'loss/softmax_layer_loss/mul_20', defined at:
  File "/home/tharun/keras_workshop/EEG_RxtimeDNN_regress_classify.py", line 239, in <module>
    main()
  File "/home/tharun/keras_workshop/EEG_RxtimeDNN_regress_classify.py", line 174, in main
    model.compile(loss=ncce, optimizer=rms)
  File "/home/tharun/anaconda2/envs/kerasdl/lib/python2.7/site-packages/keras/engine/training.py", line 850, in compile
    sample_weight, mask)
  File "/home/tharun/anaconda2/envs/kerasdl/lib/python2.7/site-packages/keras/engine/training.py", line 466, in weighted
    score_array *= weights
  File "/home/tharun/anaconda2/envs/kerasdl/lib/python2.7/site-packages/tensorflow/python/ops/math_ops.py", line 894, in binary_op_wrapper
    return func(x, y, name=name)
  File "/home/tharun/anaconda2/envs/kerasdl/lib/python2.7/site-packages/tensorflow/python/ops/math_ops.py", line 1117, in _mul_dispatch
    return gen_math_ops._mul(x, y, name=name)
  File "/home/tharun/anaconda2/envs/kerasdl/lib/python2.7/site-packages/tensorflow/python/ops/gen_math_ops.py", line 2726, in _mul
    "Mul", x=x, y=y, name=name)
  File "/home/tharun/anaconda2/envs/kerasdl/lib/python2.7/site-packages/tensorflow/python/framework/op_def_library.py", line 787, in _apply_op_helper
    op_def=op_def)
  File "/home/tharun/anaconda2/envs/kerasdl/lib/python2.7/site-packages/tensorflow/python/framework/ops.py", line 2956, in create_op
    op_def=op_def)
  File "/home/tharun/anaconda2/envs/kerasdl/lib/python2.7/site-packages/tensorflow/python/framework/ops.py", line 1470, in __init__
    self._traceback = self._graph._extract_stack()  # pylint: disable=protected-access

InvalidArgumentError (see above for traceback): Incompatible shapes: [3] vs. [10]
     [[Node: training/SGD/gradients/loss/softmax_layer_loss/mul_20_grad/BroadcastGradientArgs = BroadcastGradientArgs[T=DT_INT32, _class=["loc:@loss/softmax_layer_loss/mul_20"], _device="/job:localhost/replica:0/task:0/device:GPU:0"](training/SGD/gradients/loss/softmax_layer_loss/mul_20_grad/Shape, training/SGD/gradients/loss/softmax_layer_loss/mul_20_grad/Shape_1)]]
     [[Node: loss/mul/_19 = _Recv[client_terminated=false, recv_device="/job:localhost/replica:0/task:0/device:CPU:0", send_device="/job:localhost/replica:0/task:0/device:GPU:0", send_device_incarnation=1, tensor_name="edge_806_loss/mul", tensor_type=DT_FLOAT, _device="/job:localhost/replica:0/task:0/device:CPU:0"]()]]

Upvotes: 1

Views: 4237

Answers (2)

geek
geek

Reputation: 65

The issue is with input labels arrays (Ytrainclassify). It had to be reshaped to (Ytrainclassify.shape[0],). The working code is shared below without errors.

import keras
from keras.models import Sequential, Model, load_model
from keras.layers.embeddings import Embedding
from keras.layers.core import Activation, Dense, Dropout, Reshape
from keras.optimizers import SGD, Adam, RMSprop
#from keras.layers import TimeDistributed,Merge, Conv1D, Conv2D, Flatten, MaxPooling2D, Conv2DTranspose, UpSampling2D, RepeatVector
#from keras.layers.recurrent import GRU, LSTM
#from keras.datasets.data_utils import get_file
#import tarfile
from ipdb import set_trace as bp
from functools import partial, update_wrapper
from keras.callbacks import TensorBoard
from time import time
from sklearn.model_selection import KFold
import numpy as np
from keras.callbacks import EarlyStopping
import tensorflow as tf
import scipy.io
from keras import backend as K
from keras.layers import Input, Lambda
import os
from keras import optimizers
from matplotlib import pyplot
from sklearn.preprocessing import MinMaxScaler
#os.export CUDA_VISIBLE_DEVICES="0,1"
import keras, sys
from matplotlib import pyplot
from keras.wrappers.scikit_learn import KerasRegressor
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import KFold
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
#from keras.utils import np_utils
from itertools import product
from keras.layers import Input


# Custom loss function with costs

def w_categorical_crossentropy(y_true, y_pred, weights):
    nb_cl = weights.shape[1]#len(weights[0,:])
    print "dbg \n\n\n\n\n\n\n\n\n\n"
    print weights.shape
    print nb_cl
    print y_pred
    print y_true
    final_mask = K.zeros_like(y_pred[:, 0])
    print final_mask
    y_pred_max = K.max(y_pred, axis=1)#returns maximum value along an axis in a tensor
    print y_pred_max
    y_pred_max = K.reshape(y_pred_max, (K.shape(y_pred)[0], 1))
    print y_pred_max
    y_pred_max_mat = K.cast(K.equal(y_pred, y_pred_max), K.floatx())
    print y_pred_max_mat
    for c_p, c_t in product(range(nb_cl), range(nb_cl)):
        final_mask += (weights[c_t, c_p] *y_pred_max_mat[:, c_p]*y_true[:, c_t])
    #ypred_tensor=K.constant(y_pred,dtype=K.set_floatx('float32'))
    #ytrue_tensor=K.constant(y_true,dtype=K.set_floatx('float32'))
    return K.categorical_crossentropy(y_true,y_pred) * final_mask
# def joint_classificatn_regressn_loss(x1,ytrn,x2,ytst,w):

#     return K.sqrt(K.mean(K.square(y_pred - y_true), axis=-1))

#def get_mat_data(add,in1,in2):
    # Assuming sample_matlab_file.mat has 2 matrices A and B
    #matData = scipy.io.loadmat(add)
    #matrixA = matData[in1]
    #matrixA1 = matData[in2]
    #matrixB = matData['Ytrain']
    #matrixB1 = matData['Ytest']
    #weights = matData['w']
    #matrixC = matData['Ytrainclassify']
    #matrixC1 = matData['Ytestclassify']
    #nfold = matData['nfold']
    #return matrixA, matrixA1, matrixB, matrixB1, weights, matrixC, matrixC1, nfold 
#load riemannian features from matlab
#phase I
#train and test three DNN models
# def cutomized_loss(args):
#     #A is from the training data
#     #S is the internal state
#     A, A, S, S = args
#     #customize your own loss components
#     loss1 = K.mean(K.square(A-A),axis=-1)
#     loss1 = K.mean(K.square(A-A),axis=-1)
def wrapped_partial(func, *args, **kwargs):
    partial_func = partial(func, *args, **kwargs)
    update_wrapper(partial_func, func)
    return partial_func

def gen_model():
    input = Input(shape=(40,))  
    #m1=Sequential()
    # m1.add(conv_model)
    # #m1.add(Conv2D(15, (5,5), strides=(1, 1),activation='relu', input_shape=(1,30,125), kernel_initializer='glorot_uniform'))#temporal filters theano
    # m1.add(Dropout(0.2))
    # #m1.add(Conv2D(15, (5,1), strides=(1, 1),activation='relu',kernel_initializer='glorot_uniform'))#spatial filters
    # #m1.add(Dropout(0.2))
    # m1.add(Flatten())
    # m1.add(Dropout(0.2))
    x1 =(Dense(200,activation='relu',name='dense_1'))(input)
    x2 =(Dropout(0.2))(x1)
    x3 =(Dense(100,activation='relu',name='dense_2'))(x2)
    x4 =(Dropout(0.2))(x3)
    x5 =(Dense(3,activation='softmax',name='softmax_layer'))(x4)
    model = Model(input=input, output=[x5])
    return model
    #x6 =(Dropout(0.2))(x5)

def main():
    # print command line arguments
    # for arg in sys.argv[1:]:
    #     print arg


    batch_size = 10
    nb_classes = 3
    nb_epoch = 1
    Ytrain = np.random.randint(3, size=(800, 1))
    Ytest = np.random.randint(3, size=(400, 1))
    Ytrainclassify = Ytrain
    Ytestclassify = Ytrain
    Xtrain=np.random.rand(800,40)
    Xtest=np.random.rand(400,40)
    #add = '/home/tharun/all_mat_files/'#+sys.argv[1]
    #in1 = 'Xfff'
    #in2 = 'Xtestf'
    #in1 = 'Xtrain'
    #in2 = 'Xtest'
    #add = '/home/tharun/all_mat_files/test_keras.mat'
    #Xtrain, Xtest, Ytrain, Ytest, weights, Ytrainclassify, Ytestclassify, nfold = get_mat_data(add,in1,in2)


    wts = np.ones((3,3))
    #np.array([[1/weights[:,0], 1, 1],[1, 1/weights[:,1], 1],[1, 1, 1/weights[:,2]]])
    #y = np.bincount(Ytrain)
    #ii = np.nonzero(y)[0]
    #weight_indx = y[ii]

    # wts[0,0]=1/weights[0,0]
    # wts[1,1]=1/weights[0,1]
    # wts[2,2]=1/weights[0,2]
    print 'wts.shape:' 
    print wts.shape
    print wts
    ncce = wrapped_partial(w_categorical_crossentropy, weights=wts)

    Xtrain = Xtrain.astype('float32')
    Xtest = Xtest.astype('float32')
    nb_classes = 3
    print Xtrain.shape, Xtest.shape, Ytrain.shape, Ytest.shape, wts.shape,Ytrainclassify.shape, Ytestclassify.shape

    Ytestclassify = Ytestclassify.reshape(Ytestclassify.shape[0],)
    Ytrainclassify = Ytrainclassify.reshape(Ytrainclassify.shape[0],)
    # convert class vectors to binary class matrices
    Y_train = keras.utils.to_categorical(Ytrainclassify, nb_classes)
    Y_test = keras.utils.to_categorical(Ytestclassify, nb_classes)
    Xtrain=Xtrain.astype('float32')
    Xtest=Xtest.astype('float32')
    Y_train=Y_train.astype('float32')
    Y_test=Y_test.astype('float32')
    print Ytrainclassify.shape
    print Ytestclassify.shape
    print Xtrain.shape
    print Y_train.shape
    print Xtest.shape
    print Y_test.shape


    #weights = np.array(sys.argv[2:], dtype=np.float64)

    # m1.add(Dense(400,activation='relu'))
    # m1.add(Dropout(0.2))
    # m1.add(Dense(100,activation='relu'))
    # m1.add(Dropout(0.2))
    # m1.add(Dense(3, activation='softmax'))
    #parent model
    model=gen_model()
    #model.compile(loss=ncce, optimizer="adam")
    model.summary()
    rms = SGD()
    model.compile(loss=ncce, optimizer=rms)


    print Xtrain.shape
    print Y_train.shape
    print Xtest.shape
    print Y_test.shape

    model.fit(Xtrain, Y_train, batch_size=batch_size, nb_epoch=nb_epoch)
    model.evaluate(Xtest, Y_test)
    #print('Test score:', score[0])
    #print('Test accuracy:', score[1])

    #saving weights
    model.save('model_classify_weights.h5')


if __name__ == "__main__":
    main()

Upvotes: 1

Claude COULOMBE
Claude COULOMBE

Reputation: 3738

Did you check the labels in your input data to see if there is only 3 classes but not 10? Indeed, the 2nd suspect is your w_categorical_crossentropy function... Did you benchmark it on dummy data?

Upvotes: 1

Related Questions