Reputation: 632
My input is an series of videos, 8500 in number. Each video is fed to LSTM as a series of 50 frames, each frame having 960 pixels. So the input dim is 8500,50,960 There are 487 possible output classes possible, so the output dimension is 8500,487.
But when I run the following code, I am getting these errors in keras.
Any help is greatly appreciated. Thanks!
(8500, 50, 960)
(8500, 487)
Creating model..
Adding first layer..
Adding second layer..
Adding output layer..
Traceback (most recent call last):
File "/Users/temp/PycharmProjects/detect_sport_video/build_model.py", line 68, in model.add(Dense(487, activation='softmax'))
File "/Users/temp/anaconda/lib/python2.7/site-packages/Keras-1.0.3-py2.7.egg/keras/models.py", line 146, in add output_tensor = layer(self.outputs[0])
File "/Users/temp/anaconda/lib/python2.7/site-packages/Keras-1.0.3-py2.7.egg/keras/engine/topology.py", line 441, in call self.assert_input_compatibility(x)
File "/Users/temp/anaconda/lib/python2.7/site-packages/Keras-1.0.3-py2.7.egg/keras/engine/topology.py", line 382, in assert_input_compatibility str(K.ndim(x)))
Exception: Input 0 is incompatible with layer dense_1: expected ndim=2, found ndim=3
from keras.models import Sequential
from keras.layers import LSTM, Dense
import numpy as np
from PIL import Image
import os
def atoi(video):
return int(video) if video.isdigit() else video
def natural_keys(video):
return [ atoi(c) for c in os.path.splitext(video) ]
input_data =np.zeros((8500,50,960))
video_index = 0
data = 'train'
video_list = sorted(os.listdir('/Users/temp/PycharmProjects/detect_sport_video/' + data + '_frame_resize1/'))
video_list.sort(key=natural_keys)
for video in video_list:
if video != '.DS_Store':
frame_index = 0
frame_list = sorted(os.listdir('/Users/temp/PycharmProjects/detect_sport_video/' + data + '_frame_resize1/' + video + '/'))
frame_list.sort(key=natural_keys)
for frame in frame_list:
image = np.asarray(Image.open('/Users/temp/PycharmProjects/detect_sport_video/' + data + '_frame_resize1/' + video + '/' + frame))
image = image.reshape(image.shape[0] * image.shape[1],3)
image = (image[:,0] + image[:,1] + image[:,2]) / 3
image = image.reshape(len(image),1)
image = image[:960]
image = image.T
input_data[video_index][frame_index] = image
frame_index += 1
video_index += 1
print input_data.shape
cnt = 1
output_classes = []
with open('/Users/temp/PycharmProjects/detect_sport_video/sports-1m-dataset/' + data + '_correct_links.txt') as input_file:
while cnt <= 8500:
output_classes.append(int(input_file.readline().split()[2]))
cnt += 1
output_data =np.zeros((8500,487))
output_index = 0
while(output_index < 8500):
output_data[output_index,output_classes[output_index]] = 1
output_index += 1
print output_data.shape
print("Creating model..")
model = Sequential()
print("Adding first layer..")
model.add(LSTM(100, return_sequences=True,
input_shape=(50, 960)))
print("Adding second layer..")
model.add(LSTM(100, return_sequences=True))
print("Adding output layer..")
model.add(Dense(487, activation='softmax'))
print "Compiling model.."
model.compile(loss='categorical_crossentropy',
optimizer='RMSprop',
metrics=['accuracy'])
print "Fitting model.."
model.fit(input_data,output_data,
batch_size=50, nb_epoch=100)
Also, If I try to print model.output_shape after adding every LSTM layer, the output I get is (None, 50, 200) but it should have been (None,200). Thats where the problem is. But I dont know why am getting (None,50,200). Any ideas?
Upvotes: 2
Views: 2469
Reputation: 66
print("Adding second layer..") model.add(LSTM(100, return_sequences=False))
Upvotes: 4