Reputation: 3982
I tried to solve XOR task with different approaches. The first one with using of sequential model:
result = Sequential()
result.add(Dense(2, input_shape=(2,), activation='sigmoid'))
result.add(Dense(1, input_shape=(2,), activation='sigmoid'))
ada_grad = Adagrad(lr=0.1, epsilon=1e-08, decay=0.0)
result.compile(optimizer=ada_grad, loss=_loss_tensor)
Based on test data:
X = np.array([ [1, 1], [1, 0], [0, 1], [0, 0] ])
Y_train.append(0)
Y_train.append(1)
Y_train.append(1)
Y_train.append(0)
I train the model and accuracy will be about 1.
If I try to solve the same task in the following way:
first_input = Input(shape=(1,), name='x1')
input_dense = Dense(1, activation='sigmoid', )(first_input)
second_input = Input(shape=(1,), name='x2')
second_dense = Dense(1, activation='sigmoid', )(second_input)
merge_one = concatenate([input_dense, second_dense])
merge_one_dense2 = Dense(2, activation='sigmoid', )(merge_one)
merge_one_dense3 = Dense(1, activation='sigmoid', )(merge_one_dense2)
result = Model(inputs=[first_input, second_input], outputs=merge_one_dense3)
ada_grad = Adagrad(lr=0.1, epsilon=1e-08, decay=0.0)
result.compile(optimizer=ada_grad, loss=_loss_tensor)
with following test data:
X1 = np.array([ [1], [0], [1], [0] ])
X2 = np.array([ [1], [1], [0], [0] ])
Y_train.append(0)
Y_train.append(1)
Y_train.append(1)
Y_train.append(0)
I got accuracy about 0.5.
Visually the both models look the same for me: the first model:
What have I missed or haven't considered?
UPD: After some investigations I can say that there are not any difference between these models. The reason why answers are different is the fact that I use different (random) initial conditions. If starts models simulation with the same initial conditions results will be same.
Upvotes: 1
Views: 1048
Reputation: 66
the second model is more complex than the first model, maybe you should train for more steps.
here is my code, the acc is simple_acc: 0.7923, complex_acc: 0.7244. you can fine-tune it yourself.
#coding: utf-8
import numpy as np
from keras.layers import Input,Dense,merge
from keras.models import Sequential,Model
from keras.optimizers import Adagrad
def simple_model():
result = Sequential()
result.add(Dense(2, input_shape=(2,), activation='sigmoid'))
result.add(Dense(1, input_shape=(2,), activation='sigmoid'))
ada_grad = Adagrad(lr=0.001, epsilon=1e-08, decay=0.0)
result.compile(optimizer=ada_grad, loss='hinge')
return result
def complex_model():
first_input = Input(shape=(1,), name='x1')
input_dense = Dense(1, activation='sigmoid', )(first_input)
second_input = Input(shape=(1,), name='x2')
second_dense = Dense(1, activation='sigmoid', )(second_input)
merge_one = merge([input_dense, second_dense],mode='concat',concat_axis=1)
merge_one_dense2 = Dense(2, activation='sigmoid', )(merge_one)
merge_one_dense3 = Dense(1, activation='sigmoid', )(merge_one_dense2)
result = Model(inputs=[first_input, second_input], outputs=merge_one_dense3)
ada_grad = Adagrad(lr=0.001, epsilon=1e-08, decay=0.0)
result.compile(optimizer=ada_grad, loss='hinge')
return result
def simple_data():
X = np.array([ [1, 1], [1, 0], [0, 1], [0, 0] ])
Y = np.array([ [0], [1], [1], [0] ])
return X,Y
def complex_data():
X1 = np.array([ [1], [0], [1], [0] ])
X2 = np.array([ [1], [1], [0], [0] ])
Y = np.array([ [0], [1], [1], [0] ])
return [X1,X2],Y
def test_simple_model():
model = simple_model()
X,Y = simple_data()
model.summary()
model.fit(X,Y,batch_size=4,epochs=1000,verbose=1,validation_data=(X,Y))
score = model.evaluate(X,Y,verbose=0)
return score
def test_complex_model():
model = complex_model()
X,Y = complex_data()
model.summary()
model.fit(X,Y,batch_size=4,epochs=1000,verbose=1,validation_data=(X,Y))
score = model.evaluate(X,Y,verbose=0)
return score
def main():
simple_acc = test_simple_model()
complex_acc = test_complex_model()
print 'simple_acc: %.4f' % simple_acc
print 'complex_acc: %.4f' % complex_acc
if __name__ == '__main__':
main()
Upvotes: 1