Lohith Ravi
Lohith Ravi

Reputation: 68

Keras TypeError: Cannot convert Type TensorType(float32, 3D) (of Variable Subtensor{:int64:}.0)

The following code snippet gives me some tensorType error

TypeError: Cannot convert Type TensorType(float32, 3D) (of Variable Subtensor{:int64:}.0) into Type TensorType(float32, (False, False, True)). You can try to manually convert Subtensor{:int64:}.0 into a TensorType(float32, (False, False, True)).

this is a basic example from one of the tutorial websites i was reading. can you please help me understanding this error? I'm new to both Machine learning and keras

import itertools
import numpy as np
# put together a model to predict 
from keras.layers import Input, Embedding, merge, Flatten, SimpleRNN
from keras.models import Model


sentences = '''
sam is red
hannah not red
hannah is green
bob is green
bob not red
sam not green
sarah is red
sarah not green'''.strip().split('\n')
is_green = np.asarray([[0, 1, 1, 1, 1, 0, 0, 0]], dtype='int32').T

lemma = lambda x: x.strip().lower().split(' ')
sentences_lemmatized = [lemma(sentence) for sentence in sentences]
words = set(itertools.chain(*sentences_lemmatized))
# set(['boy', 'fed', 'ate', 'cat', 'kicked', 'hat'])

# dictionaries for converting words to integers and vice versa
word2idx = dict((v, i) for i, v in enumerate(words))
idx2word = list(words)

# convert the sentences a numpy array
to_idx = lambda x: [word2idx[word] for word in x]
sentences_idx = [to_idx(sentence) for sentence in sentences_lemmatized]
sentences_array = np.asarray(sentences_idx, dtype='int32')

# parameters for the model
sentence_maxlen = 3
n_words = len(words)
n_embed_dims = 5

input_sentence = Input(shape=(sentence_maxlen,), dtype='int32')
input_embedding = Embedding(n_words, n_embed_dims)(input_sentence)
#color_prediction = SimpleRNN(init='uniform',output_dim=1,input_dim=3)(input_embedding)
#color_prediction = SimpleRNN(output_dim=1,input_dim=5, 
 #       init='glorot_uniform', inner_init='orthogonal', activation='sigmoid', weights=None, return_sequences=False)(input_embedding);

color_prediction = SimpleRNN(1, return_sequences=False, batch_input_shape=(10, 2, 3))(input_embedding);

predict_green = Model(input=[input_sentence], output=[color_prediction])
predict_green.compile(optimizer='sgd', loss='binary_crossentropy')

# fit the model to predict what color each person is
predict_green.fit([sentences_array], [is_green], nb_epoch=5000, verbose=1)
embeddings = predict_green.layers[1].W.get_value()

# print out the embedding vector associated with each word
for i in range(n_words):
    print('{}: {}'.format(idx2word[i], embeddings[i]))

Upvotes: 1

Views: 1325

Answers (2)

ruoho ruotsi
ruoho ruotsi

Reputation: 1313

For reference this example code comes from http://benjaminbolte.com/blog/2016/keras-language-modeling.html and although from April 2016, it doesn't work on current versions of Keras==1.1.1, Theano==0.8.2

At the time of the blog post, I too had taken this code, modified it to use LSTMs and also now noticed that it was no longer runs with the latest Keras/Theano.

The main thing that I think you need, especially using the new Keras-1 functional API is a Dense layer at the end after SimpleRNN. You also need to ensure that your SimpleRNN has an output_dim of 3.

I modified the minimum to get your code working, here is the updated code that runs and gives the following output:

import itertools
import numpy as np
# put together a model to predict
from keras.layers import Input, Embedding, merge, Flatten, Dense, SimpleRNN
from keras.models import Model


sentences = '''
sam is red
hannah not red
hannah is green
bob is green
bob not red
sam not green
sarah is red
sarah not green'''.strip().split('\n')
is_green = np.asarray([[0, 1, 1, 1, 1, 0, 0, 0]], dtype='int32').T

lemma = lambda x: x.strip().lower().split(' ')
sentences_lemmatized = [lemma(sentence) for sentence in sentences]
words = set(itertools.chain(*sentences_lemmatized))
# set(['boy', 'fed', 'ate', 'cat', 'kicked', 'hat'])

# dictionaries for converting words to integers and vice versa
word2idx = dict((v, i) for i, v in enumerate(words))
idx2word = list(words)

# convert the sentences a numpy array
to_idx = lambda x: [word2idx[word] for word in x]
sentences_idx = [to_idx(sentence) for sentence in sentences_lemmatized]
sentences_array = np.asarray(sentences_idx, dtype='int32')

# parameters for the model
sentence_maxlen = 3
n_words = len(words)
n_embed_dims = 5

input_sentence = Input(shape=(sentence_maxlen,), dtype='int32')
input_embedding = Embedding(n_words, n_embed_dims)(input_sentence)
#color_prediction = SimpleRNN(init='uniform',output_dim=1,input_dim=3)(input_embedding)
#color_prediction = SimpleRNN(output_dim=1,input_dim=5,
 #       init='glorot_uniform', inner_init='orthogonal', activation='sigmoid', weights=None, return_sequences=False)(input_embedding);

color_prediction = SimpleRNN(3, return_sequences=False, batch_input_shape=(10, 2, 3))(input_embedding);
output = Dense(1, activation='sigmoid')(color_prediction)

predict_green = Model(input=[input_sentence], output=[output])
predict_green.compile(optimizer='sgd', loss='binary_crossentropy')

# fit the model to predict what color each person is
predict_green.fit([sentences_array], [is_green], nb_epoch=5000, verbose=1)
embeddings = predict_green.layers[1].W.get_value()

# print out the embedding vector associated with each word
for i in range(n_words):
    print('{}: {}'.format(idx2word[i], embeddings[i])) 

output:

red: [-0.03866547 -0.04583547 -0.0314577  -0.04576075  0.00064603]
is: [-0.02093433 -0.00811194 -0.01167872 -0.02813761 -0.02160992]
bob: [-0.46798751 -0.01344876  0.36456427 -0.04407313  0.22842836]
sarah: [ 0.45210958  0.02854088 -0.34124625  0.0184452  -0.23039177]
green: [-0.00628944  0.04167764 -0.02662347 -0.01051781  0.03500782]
not: [-0.01113868 -0.01490347 -0.00235422 -0.03276222 -0.0263596 ]
sam: [ 0.45785579  0.04527023 -0.34266111 -0.01125967 -0.25254542]
hannah: [-0.45697126 -0.00369712  0.37587604 -0.04988953  0.23814654]

Upvotes: 0

lejinghu
lejinghu

Reputation: 11

I am new to machine learning and also met your problem. I changed the code as below and it ran, but I'm not sure it's correct.

import itertools
import os
import numpy as np
from keras.preprocessing import sequence
from keras.models import Sequential
from keras.layers import Dense, Dropout, Activation
from keras.layers import Embedding
from keras.layers import Input, Embedding, merge, Flatten, SimpleRNN
sentences = '''
sam is red
hannah not red
hannah is green
bob is green
bob not red
sam not green
sarah is red
sarah not green'''.strip().split('\n')
is_green = np.asarray([[0, 1, 1, 1, 1, 0, 0, 0]], dtype='int32').T
lemma = lambda x: x.strip().lower().split(' ')
sentences_lemmatized = [lemma(sentence) for sentence in sentences]
words = set(itertools.chain(*sentences_lemmatized))
# set(['boy', 'fed', 'ate', 'cat', 'kicked', 'hat'])
# dictionaries for converting words to integers and vice versa
word2idx = dict((v, i) for i, v in enumerate(words))
idx2word = list(words)
# convert the sentences a numpy array
to_idx = lambda x: [word2idx[word] for word in x]
sentences_idx = [to_idx(sentence) for sentence in sentences_lemmatized]
sentences_array = np.asarray(sentences_idx, dtype='int32')
# parameters for the model
sentence_maxlen = 3
n_words = len(words)
n_embed_dims = 3
model = Sequential()
model.add(Embedding(n_words, n_embed_dims,input_length=sentence_maxlen))
model.add(SimpleRNN(3))
model.add(Dense(1))
model.add(Activation('sigmoid'))
model.compile(loss='binary_crossentropy',
              optimizer='sgd',
              metrics=['accuracy'])
model.fit([sentences_array], [is_green], nb_epoch=5000, verbose=1)
predictions = model.predict(sentences_array)
print predictions.shape
embeddings = model.layers[0].W.get_value()
# print out the embedding vector associated with each word
for i in range(n_words):
    print('{}: {}'.format(idx2word[i], embeddings[i]))

OUTPUT:

sarah: [-0.51089537 -0.30958903 -0.17312947] sam: [-0.47487321
-0.33426151 -0.18260512] hannah: [ 0.51548952  0.33343625  0.18121554] is: [ 0.02989657 -0.02573686  0.01081978] green: [ 0.0155487 
-0.02551323  0.00846179] not: [ 0.01339869 -0.02586824  0.01932905] bob: [ 0.47654441  0.37283263  0.17969941] red: [-0.02136148 
0.04420395 -0.03119873]

Upvotes: 1

Related Questions