Reputation: 6197
I am trying to convert my Tensorflow graph to use a custom tensorflow estimator, but I am getting stuck defining the function for input_fn
; I am currently getting an error.
This is the function I use to generate my input data and labels
data_index = 0
epoch_index = 0
recEpoch_indexA = 0 #Used to help keep store of the total number of epoches with the models
def generate_batch(batch_size, inputCount):
global data_index, epoch_index
batch = np.ndarray(shape=(batch_size, inputCount), dtype=np.int32)
labels = np.ndarray(shape=(batch_size, 1), dtype=np.int32)
n=0
while n < batch_size:
if len( set(my_data[data_index, 1]) ) >= inputCount:
labels[n,0] = my_data[data_index, 0]
batch[n] = random.sample( set(my_data[data_index, 1]), inputCount)
n = n+1
data_index = (data_index + 1) % len(my_data) #may have to do something like len my_data[:]
if data_index == 0:
epoch_index = epoch_index + 1
print('Completed %d Epochs' % epoch_index)
else:
data_index = (data_index + 1) % len(my_data)
if data_index == 0:
epoch_index = epoch_index + 1
print('Completed %d Epochs' % epoch_index)
return batch, labels
This is where I define my Estimator and attempt to do training
#Define the estimator
word2vecEstimator = tf.estimator.Estimator(
model_fn=my_model,
params={
'batch_size': 1024,
'embedding_size': 50,
'num_inputs': 5,
'num_sampled':128
})
batch_size = 16
num_inputs = 3
#Train with Estimator
word2vecEstimator.train(
input_fn=generate_batch(batch_size, num_inputs),
steps=10)
This is the error message that I get
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
/usr/lib/python3.6/inspect.py in getfullargspec(func)
1118 skip_bound_arg=False,
-> 1119 sigcls=Signature)
1120 except Exception as ex:
/usr/lib/python3.6/inspect.py in _signature_from_callable(obj, follow_wrapper_chains, skip_bound_arg, sigcls)
2185 if not callable(obj):
-> 2186 raise TypeError('{!r} is not a callable object'.format(obj))
2187
TypeError: (array([[1851833, 670357, 343012],
[ 993526, 431296, 935528],
[ 938067, 1155719, 2277388],
[ 534965, 1125669, 1665716],
[1412657, 2152211, 1176177],
[ 268114, 2097642, 2707258],
[1280762, 1516464, 453615],
[2545980, 2302607, 2421182],
[1706260, 2735027, 292652],
[1802025, 2949676, 653015],
[ 854228, 2626773, 225486],
[1747135, 1608478, 2503487],
[1326661, 272883, 2089444],
[3082922, 1359481, 621031],
[2636832, 1842777, 1979638],
[2512269, 1617986, 389356]], dtype=int32), array([[1175598],
[2528125],
[1870906],
[ 643521],
[2349752],
[ 754986],
[2277570],
[2121120],
[2384306],
[1881398],
[3046987],
[2505729],
[2908573],
[2438025],
[ 441422],
[2355625]], dtype=int32)) is not a callable object
The above exception was the direct cause of the following exception:
TypeError Traceback (most recent call last)
<ipython-input-15-7acc939af001> in <module>()
5 word2vecEstimator.train(
6 input_fn=generate_batch(batch_size, num_inputs),
----> 7 steps=10)
/usr/local/lib/python3.6/dist-packages/tensorflow/python/estimator/estimator.py in train(self, input_fn, hooks, steps, max_steps, saving_listeners)
352
353 saving_listeners = _check_listeners_type(saving_listeners)
--> 354 loss = self._train_model(input_fn, hooks, saving_listeners)
355 logging.info('Loss for final step: %s.', loss)
356 return self
/usr/local/lib/python3.6/dist-packages/tensorflow/python/estimator/estimator.py in _train_model(self, input_fn, hooks, saving_listeners)
1205 return self._train_model_distributed(input_fn, hooks, saving_listeners)
1206 else:
-> 1207 return self._train_model_default(input_fn, hooks, saving_listeners)
1208
1209 def _train_model_default(self, input_fn, hooks, saving_listeners):
/usr/local/lib/python3.6/dist-packages/tensorflow/python/estimator/estimator.py in _train_model_default(self, input_fn, hooks, saving_listeners)
1232 features, labels, input_hooks = (
1233 self._get_features_and_labels_from_input_fn(
-> 1234 input_fn, model_fn_lib.ModeKeys.TRAIN))
1235 worker_hooks.extend(input_hooks)
1236 estimator_spec = self._call_model_fn(
/usr/local/lib/python3.6/dist-packages/tensorflow/python/estimator/estimator.py in _get_features_and_labels_from_input_fn(self, input_fn, mode)
1073 """Extracts the `features` and labels from return values of `input_fn`."""
1074 return estimator_util.parse_input_fn_result(
-> 1075 self._call_input_fn(input_fn, mode))
1076
1077 def _extract_batch_length(self, preds_evaluated):
/usr/local/lib/python3.6/dist-packages/tensorflow/python/estimator/estimator.py in _call_input_fn(self, input_fn, mode)
1151 ValueError: if `input_fn` takes invalid arguments.
1152 """
-> 1153 input_fn_args = function_utils.fn_args(input_fn)
1154 kwargs = {}
1155 if 'mode' in input_fn_args:
/usr/local/lib/python3.6/dist-packages/tensorflow/python/util/function_utils.py in fn_args(fn)
54 if _is_callable_object(fn):
55 fn = fn.__call__
---> 56 args = tf_inspect.getfullargspec(fn).args
57 if _is_bounded_method(fn):
58 args.remove('self')
/usr/local/lib/python3.6/dist-packages/tensorflow/python/util/tf_inspect.py in getfullargspec(obj)
214 return next((d.decorator_argspec
215 for d in decorators
--> 216 if d.decorator_argspec is not None), _getfullargspec(target))
217
218
/usr/lib/python3.6/inspect.py in getfullargspec(func)
1123 # else. So to be fully backwards compatible, we catch all
1124 # possible exceptions here, and reraise a TypeError.
-> 1125 raise TypeError('unsupported callable') from ex
1126
1127 args = []
TypeError: unsupported callable
Here is a link to the Google Colab notebook for people to run on their own. For anyone looking to execute this, this will download a data file that is ~500 mbs.
https://colab.research.google.com/drive/1LjIz04xhRi5Fsw_Q3IzoG_5KkkXI3WFE
And here is the full code, from the notebook.
import math
import numpy as np
import random
import zipfile
import shutil
from collections import namedtuple
import os
import pprint
import tensorflow as tf
import pandas as pd
import pickle
from numpy import genfromtxt
!pip install -U -q PyDrive
from google.colab import files
from pydrive.auth import GoogleAuth
from pydrive.drive import GoogleDrive
from google.colab import auth
from oauth2client.client import GoogleCredentials
auth.authenticate_user()
gauth = GoogleAuth()
gauth.credentials = GoogleCredentials.get_application_default()
drive = GoogleDrive(gauth)
vocabulary_size = 3096637 #updated 10-25-18 3096636
import gc
dl_id = '19yha9Scxq4zOdfPcw5s6L2lkYQWenApC' #updated 10-22-18
myDownload = drive.CreateFile({'id': dl_id})
myDownload.GetContentFile('Data.npy')
my_data = np.load('Data.npy')
#os.remove('Data.npy')
np.random.shuffle(my_data)
print(my_data[0:15])
data_index = 0
epoch_index = 0
recEpoch_indexA = 0 #Used to help keep store of the total number of epoches with the models
def generate_batch(batch_size, inputCount):
global data_index, epoch_index
batch = np.ndarray(shape=(batch_size, inputCount), dtype=np.int32)
labels = np.ndarray(shape=(batch_size, 1), dtype=np.int32)
n=0
while n < batch_size:
if len( set(my_data[data_index, 1]) ) >= inputCount:
labels[n,0] = my_data[data_index, 0]
batch[n] = random.sample( set(my_data[data_index, 1]), inputCount)
n = n+1
data_index = (data_index + 1) % len(my_data) #may have to do something like len my_data[:]
if data_index == 0:
epoch_index = epoch_index + 1
print('Completed %d Epochs' % epoch_index)
else:
data_index = (data_index + 1) % len(my_data)
if data_index == 0:
epoch_index = epoch_index + 1
print('Completed %d Epochs' % epoch_index)
return batch, labels
def my_model( features, labels, mode, params):
# train_dataset = tf.placeholder(tf.int32, shape=[batch_size, num_inputs ])
# train_labels = tf.placeholder(tf.int32, shape=[batch_size, 1])
train_dataset = features
train_labels = labels
batch_sizeE=params["batch_size"]
embedding_sizeE=params["embedding_size"]
num_inputsE=params["num_inputs"]
num_sampledE=params["num_sampled"]
epochCount = tf.get_variable( 'epochCount', initializer= 0) #to store epoch count to total # of epochs are known
update_epoch = tf.assign(epochCount, epochCount + 1)
embeddings = tf.get_variable( 'embeddings', dtype=tf.float32,
initializer= tf.random_uniform([vocabulary_size, embedding_sizeE], -1.0, 1.0, dtype=tf.float32) )
softmax_weights = tf.get_variable( 'softmax_weights', dtype=tf.float32,
initializer= tf.truncated_normal([vocabulary_size, embedding_sizeE],
stddev=1.0 / math.sqrt(embedding_sizeE), dtype=tf.float32 ) )
softmax_biases = tf.get_variable('softmax_biases', dtype=tf.float32,
initializer= tf.zeros([vocabulary_size], dtype=tf.float32), trainable=False )
embed = tf.nn.embedding_lookup(embeddings, train_dataset) #train data set is
embed_reshaped = tf.reshape( embed, [batch_sizeE*num_inputs, embedding_sizeE] )
segments= np.arange(batch_size).repeat(num_inputs)
averaged_embeds = tf.segment_mean(embed_reshaped, segments, name=None)
loss = tf.reduce_mean(
tf.nn.sampled_softmax_loss(weights=softmax_weights, biases=softmax_biases, inputs=averaged_embeds,
sampled_values=tf.nn.uniform_candidate_sampler(true_classes=tf.cast(train_labels, tf.int64), num_sampled=num_sampled, num_true=1, unique=True, range_max=vocabulary_size, seed=None),
labels=train_labels, num_sampled=num_sampled, num_classes=vocabulary_size))
optimizer = tf.train.AdagradOptimizer(1.0).minimize(loss)
saver = tf.train.Saver()
#Define the estimator
word2vecEstimator = tf.estimator.Estimator(
model_fn=my_model,
params={
'batch_size': 1024,
'embedding_size': 50,
'num_inputs': 5,
'num_sampled':128
})
batch_size = 16
num_inputs = 3
#Train with Estimator
word2vecEstimator.train(
input_fn=generate_batch(batch_size, num_inputs),
steps=10)
Upvotes: 0
Views: 998
Reputation: 11
There's no way of correcting the function, cuz it can never be implemented using Tensorflow. The input_fn() function must return Tensors, not numpy arrays, cuz the input_fn() is a function constructing the graph, and it maybe is just called once when building the graph. In this context, the numpy array is just a constant value. It may seem to be weird, But it's the truth. You need to understand the mechanism of Tensorflow: the STATIC COMPUTE GRAPH!
Upvotes: 1
Reputation: 6197
Answer here
Tensorflow error : unsupported callable
train method accepts the input function, so it should be input_fn, not input_fn().
Upvotes: 0