Reputation: 55
I'm trying to implement simple k-means clustering using TensorFlow 2.0. It is expected that functions decorated with @tf.function
including those with for loops be converted using autograph.
Please let me know what is causing the ValueError.
tf_kmeans.py
import tensorflow as tf
import numpy as np
from typeguard import typechecked
from typing import Union
@tf.function
def train_kmeans(X: Union[tf.Tensor, np.ndarray],
k: Union[int, tf.Tensor],
n_iter: Union[int, tf.Tensor] = 10) -> (tf.Tensor, tf.Tensor):
X = tf.convert_to_tensor(X)
X = tf.cast(X, tf.float32)
assert len(tf.shape(X)) == 2, "Training data X must be represented as 2D array only"
m = tf.shape(X)[0]
k = tf.convert_to_tensor(k, dtype=tf.int64)
random_select = tf.random.shuffle(X)
init_centroids = random_select[:k]
centroids = tf.Variable(init_centroids)
clusters = tf.zeros([m, ], dtype=tf.int64)
clusters = tf.Variable(clusters)
for _ in tf.range(n_iter):
squared_diffs = tf.square(X[None, :, :] - centroids[:, None, :])
euclidean_dists = tf.reduce_sum(squared_diffs, axis=-1) ** 0.5
clusters.assign(tf.argmin(euclidean_dists, axis=0))
selector = tf.range(k)[:, None] == clusters[None, :]
for c in tf.range(k):
select = selector[c]
points = X[select]
mean_points = tf.reduce_mean(points, axis=0)
centroids[c].assign(mean_points)
centroids = tf.convert_to_tensor(centroids)
return centroids, clusters
The following code is used to call the function:
tf_means_test.py
import tensorflow as tf
import numpy as np
X = np.array([[ 2., 10.],
[ 2., 5.],
[ 8., 4.],
[ 5., 8.],
[ 7., 5.],
[ 6., 4.],
[ 1., 2.],
[ 4., 9.]])
k = 3
import tf_kmeans
centroids, clusters = tf_kmeans.train_kmeans(X, k)
print(centroids)
print(clusters)
Error message is provided below.
2020-04-11 00:58:33.140511: I tensorflow/core/platform/cpu_feature_guard.cc:143] Your CPU supports instructions that this TensorFlow binary was not compiled to use: AVX2 FMA
2020-04-11 00:58:33.217765: I tensorflow/compiler/xla/service/service.cc:168] XLA service 0x7ffc4f310c50 initialized for platform Host (this does not guarantee that XLA will be used). Devices:
2020-04-11 00:58:33.217798: I tensorflow/compiler/xla/service/service.cc:176] StreamExecutor device (0): Host, Default Version
WARNING:tensorflow:From /Users/swg/opt/homebrew/lib/python3.7/site-packages/tensorflow/python/ops/resource_variable_ops.py:1817: calling BaseResourceVariable.__init__ (from tensorflow.python.ops.resource_variable_ops) with constraint is deprecated and will be removed in a future version.
Instructions for updating:
If using Keras pass *_constraint arguments to layers.
Traceback (most recent call last):
File "tf_kmeans_test.py", line 15, in <module>
centroids, clusters = tf_kmeans.train_kmeans(X, k)
File "/Users/swg/opt/homebrew/lib/python3.7/site-packages/tensorflow/python/eager/def_function.py", line 580, in __call__
result = self._call(*args, **kwds)
File "/Users/swg/opt/homebrew/lib/python3.7/site-packages/tensorflow/python/eager/def_function.py", line 708, in _call
return function_lib.defun(fn_with_cond)(*canon_args, **canon_kwds)
File "/Users/swg/opt/homebrew/lib/python3.7/site-packages/tensorflow/python/eager/function.py", line 2419, in __call__
graph_function, args, kwargs = self._maybe_define_function(args, kwargs)
File "/Users/swg/opt/homebrew/lib/python3.7/site-packages/tensorflow/python/eager/function.py", line 2777, in _maybe_define_function
graph_function = self._create_graph_function(args, kwargs)
File "/Users/swg/opt/homebrew/lib/python3.7/site-packages/tensorflow/python/eager/function.py", line 2667, in _create_graph_function
capture_by_value=self._capture_by_value),
File "/Users/swg/opt/homebrew/lib/python3.7/site-packages/tensorflow/python/framework/func_graph.py", line 981, in func_graph_from_py_func
func_outputs = python_func(*func_args, **func_kwargs)
File "/Users/swg/opt/homebrew/lib/python3.7/site-packages/tensorflow/python/framework/func_graph.py", line 968, in wrapper
raise e.ag_error_metadata.to_exception(e)
ValueError: in user code:
/Users/swg/opt/homebrew/lib/python3.7/site-packages/tensorflow/python/eager/def_function.py:700 fn_with_cond *
functools.partial(self._concrete_stateful_fn._filtered_call, # pylint: disable=protected-access
/Users/swg/Repositories/UG-Courses/CSE2705/tf_kmeans.py:21 train_kmeans *
centroids = tf.Variable(init_centroids)
/Users/swg/opt/homebrew/lib/python3.7/site-packages/tensorflow/python/ops/variables.py:261 __call__ **
return cls._variable_v2_call(*args, **kwargs)
/Users/swg/opt/homebrew/lib/python3.7/site-packages/tensorflow/python/ops/variables.py:255 _variable_v2_call
shape=shape)
/Users/swg/opt/homebrew/lib/python3.7/site-packages/tensorflow/python/ops/variables.py:66 getter
return captured_getter(captured_previous, **kwargs)
/Users/swg/opt/homebrew/lib/python3.7/site-packages/tensorflow/python/eager/def_function.py:511 invalid_creator_scope
"tf.function-decorated function tried to create "
ValueError: tf.function-decorated function tried to create variables on non-first call.
If the tf.function
decorator is removed, the code works perfectly fine because autograph is not executed in that case.
Thanks in advance.
Upvotes: 1
Views: 1134
Reputation: 3764
I'm assuming you want an instance of tf.Variable
only to use assign
. However, when using tf.function
, you should always provide variables from outside, and use built-in TensorFlow data structures inside.
For example, your code with minimal changes, without tf.Variable
objects would be:
import tensorflow as tf
import numpy as np
from typeguard import typechecked
from typing import Union
@tf.function
def train_kmeans(X: Union[tf.Tensor, np.ndarray],
k: Union[int, tf.Tensor],
n_iter: Union[int, tf.Tensor] = 10) -> (tf.Tensor, tf.Tensor):
X = tf.convert_to_tensor(X)
X = tf.cast(X, tf.float32)
# Required as an int later
num_centers = k
assert len(tf.shape(X)) == 2, "Training data X must be represented as 2D array only"
m = tf.shape(X)[0]
k = tf.convert_to_tensor(k, dtype=tf.int64)
random_select = tf.random.shuffle(X)
init_centroids = random_select[:k]
centroids = init_centroids
clusters = tf.zeros([m, ], dtype=tf.int64)
for _ in tf.range(n_iter):
squared_diffs = tf.square(X[None, :, :] - centroids[:, None, :])
euclidean_dists = tf.reduce_sum(squared_diffs, axis=-1) ** 0.5
clusters = tf.argmin(euclidean_dists, axis=0)
selector = tf.range(k)[:, None] == clusters[None, :]
# TF data structure
new_centroids = tf.TensorArray(tf.float32, num_centers, element_shape=[1, 2])
for c in range(k):
select = selector[c]
points = X[select]
centroid = tf.reduce_mean(points, axis=0)
centroid = tf.reshape(centroid, [1, 2])
new_centroids.write(tf.cast(c, tf.int32), centroid)
centroids = new_centroids.concat()
centroids = tf.reshape(centroids, [num_centers, 2])
return centroids, clusters
Upvotes: 1