Reputation: 68
The following code snippet gives me some tensorType error
TypeError: Cannot convert Type TensorType(float32, 3D) (of Variable Subtensor{:int64:}.0) into Type TensorType(float32, (False, False, True)). You can try to manually convert Subtensor{:int64:}.0 into a TensorType(float32, (False, False, True)).
this is a basic example from one of the tutorial websites i was reading. can you please help me understanding this error? I'm new to both Machine learning and keras
import itertools
import numpy as np
# put together a model to predict
from keras.layers import Input, Embedding, merge, Flatten, SimpleRNN
from keras.models import Model
sentences = '''
sam is red
hannah not red
hannah is green
bob is green
bob not red
sam not green
sarah is red
sarah not green'''.strip().split('\n')
is_green = np.asarray([[0, 1, 1, 1, 1, 0, 0, 0]], dtype='int32').T
lemma = lambda x: x.strip().lower().split(' ')
sentences_lemmatized = [lemma(sentence) for sentence in sentences]
words = set(itertools.chain(*sentences_lemmatized))
# set(['boy', 'fed', 'ate', 'cat', 'kicked', 'hat'])
# dictionaries for converting words to integers and vice versa
word2idx = dict((v, i) for i, v in enumerate(words))
idx2word = list(words)
# convert the sentences a numpy array
to_idx = lambda x: [word2idx[word] for word in x]
sentences_idx = [to_idx(sentence) for sentence in sentences_lemmatized]
sentences_array = np.asarray(sentences_idx, dtype='int32')
# parameters for the model
sentence_maxlen = 3
n_words = len(words)
n_embed_dims = 5
input_sentence = Input(shape=(sentence_maxlen,), dtype='int32')
input_embedding = Embedding(n_words, n_embed_dims)(input_sentence)
#color_prediction = SimpleRNN(init='uniform',output_dim=1,input_dim=3)(input_embedding)
#color_prediction = SimpleRNN(output_dim=1,input_dim=5,
# init='glorot_uniform', inner_init='orthogonal', activation='sigmoid', weights=None, return_sequences=False)(input_embedding);
color_prediction = SimpleRNN(1, return_sequences=False, batch_input_shape=(10, 2, 3))(input_embedding);
predict_green = Model(input=[input_sentence], output=[color_prediction])
predict_green.compile(optimizer='sgd', loss='binary_crossentropy')
# fit the model to predict what color each person is
predict_green.fit([sentences_array], [is_green], nb_epoch=5000, verbose=1)
embeddings = predict_green.layers[1].W.get_value()
# print out the embedding vector associated with each word
for i in range(n_words):
print('{}: {}'.format(idx2word[i], embeddings[i]))
Upvotes: 1
Views: 1325
Reputation: 1313
For reference this example code comes from http://benjaminbolte.com/blog/2016/keras-language-modeling.html and although from April 2016, it doesn't work on current versions of Keras==1.1.1
, Theano==0.8.2
At the time of the blog post, I too had taken this code, modified it to use LSTMs and also now noticed that it was no longer runs with the latest Keras/Theano.
The main thing that I think you need, especially using the new Keras-1 functional API is a Dense
layer at the end after SimpleRNN
. You also need to ensure that your SimpleRNN
has an output_dim
of 3.
I modified the minimum to get your code working, here is the updated code that runs and gives the following output:
import itertools
import numpy as np
# put together a model to predict
from keras.layers import Input, Embedding, merge, Flatten, Dense, SimpleRNN
from keras.models import Model
sentences = '''
sam is red
hannah not red
hannah is green
bob is green
bob not red
sam not green
sarah is red
sarah not green'''.strip().split('\n')
is_green = np.asarray([[0, 1, 1, 1, 1, 0, 0, 0]], dtype='int32').T
lemma = lambda x: x.strip().lower().split(' ')
sentences_lemmatized = [lemma(sentence) for sentence in sentences]
words = set(itertools.chain(*sentences_lemmatized))
# set(['boy', 'fed', 'ate', 'cat', 'kicked', 'hat'])
# dictionaries for converting words to integers and vice versa
word2idx = dict((v, i) for i, v in enumerate(words))
idx2word = list(words)
# convert the sentences a numpy array
to_idx = lambda x: [word2idx[word] for word in x]
sentences_idx = [to_idx(sentence) for sentence in sentences_lemmatized]
sentences_array = np.asarray(sentences_idx, dtype='int32')
# parameters for the model
sentence_maxlen = 3
n_words = len(words)
n_embed_dims = 5
input_sentence = Input(shape=(sentence_maxlen,), dtype='int32')
input_embedding = Embedding(n_words, n_embed_dims)(input_sentence)
#color_prediction = SimpleRNN(init='uniform',output_dim=1,input_dim=3)(input_embedding)
#color_prediction = SimpleRNN(output_dim=1,input_dim=5,
# init='glorot_uniform', inner_init='orthogonal', activation='sigmoid', weights=None, return_sequences=False)(input_embedding);
color_prediction = SimpleRNN(3, return_sequences=False, batch_input_shape=(10, 2, 3))(input_embedding);
output = Dense(1, activation='sigmoid')(color_prediction)
predict_green = Model(input=[input_sentence], output=[output])
predict_green.compile(optimizer='sgd', loss='binary_crossentropy')
# fit the model to predict what color each person is
predict_green.fit([sentences_array], [is_green], nb_epoch=5000, verbose=1)
embeddings = predict_green.layers[1].W.get_value()
# print out the embedding vector associated with each word
for i in range(n_words):
print('{}: {}'.format(idx2word[i], embeddings[i]))
output:
red: [-0.03866547 -0.04583547 -0.0314577 -0.04576075 0.00064603]
is: [-0.02093433 -0.00811194 -0.01167872 -0.02813761 -0.02160992]
bob: [-0.46798751 -0.01344876 0.36456427 -0.04407313 0.22842836]
sarah: [ 0.45210958 0.02854088 -0.34124625 0.0184452 -0.23039177]
green: [-0.00628944 0.04167764 -0.02662347 -0.01051781 0.03500782]
not: [-0.01113868 -0.01490347 -0.00235422 -0.03276222 -0.0263596 ]
sam: [ 0.45785579 0.04527023 -0.34266111 -0.01125967 -0.25254542]
hannah: [-0.45697126 -0.00369712 0.37587604 -0.04988953 0.23814654]
Upvotes: 0
Reputation: 11
I am new to machine learning and also met your problem. I changed the code as below and it ran, but I'm not sure it's correct.
import itertools
import os
import numpy as np
from keras.preprocessing import sequence
from keras.models import Sequential
from keras.layers import Dense, Dropout, Activation
from keras.layers import Embedding
from keras.layers import Input, Embedding, merge, Flatten, SimpleRNN
sentences = '''
sam is red
hannah not red
hannah is green
bob is green
bob not red
sam not green
sarah is red
sarah not green'''.strip().split('\n')
is_green = np.asarray([[0, 1, 1, 1, 1, 0, 0, 0]], dtype='int32').T
lemma = lambda x: x.strip().lower().split(' ')
sentences_lemmatized = [lemma(sentence) for sentence in sentences]
words = set(itertools.chain(*sentences_lemmatized))
# set(['boy', 'fed', 'ate', 'cat', 'kicked', 'hat'])
# dictionaries for converting words to integers and vice versa
word2idx = dict((v, i) for i, v in enumerate(words))
idx2word = list(words)
# convert the sentences a numpy array
to_idx = lambda x: [word2idx[word] for word in x]
sentences_idx = [to_idx(sentence) for sentence in sentences_lemmatized]
sentences_array = np.asarray(sentences_idx, dtype='int32')
# parameters for the model
sentence_maxlen = 3
n_words = len(words)
n_embed_dims = 3
model = Sequential()
model.add(Embedding(n_words, n_embed_dims,input_length=sentence_maxlen))
model.add(SimpleRNN(3))
model.add(Dense(1))
model.add(Activation('sigmoid'))
model.compile(loss='binary_crossentropy',
optimizer='sgd',
metrics=['accuracy'])
model.fit([sentences_array], [is_green], nb_epoch=5000, verbose=1)
predictions = model.predict(sentences_array)
print predictions.shape
embeddings = model.layers[0].W.get_value()
# print out the embedding vector associated with each word
for i in range(n_words):
print('{}: {}'.format(idx2word[i], embeddings[i]))
OUTPUT:
sarah: [-0.51089537 -0.30958903 -0.17312947] sam: [-0.47487321
-0.33426151 -0.18260512] hannah: [ 0.51548952 0.33343625 0.18121554] is: [ 0.02989657 -0.02573686 0.01081978] green: [ 0.0155487
-0.02551323 0.00846179] not: [ 0.01339869 -0.02586824 0.01932905] bob: [ 0.47654441 0.37283263 0.17969941] red: [-0.02136148
0.04420395 -0.03119873]
Upvotes: 1