Reputation: 2195
I'm developing a Convolutional Autoencoder with Tensorflow 2.1.
This is the code
class ConvAutoencoder:
def __init__(self, input_shape, latent_dim):
self.input_shape = input_shape
self.latent_dim = latent_dim
self.__create_model()
def __create_model(self):
# Define Encoder
encoder_input = Input(shape=self.input_shape, name='encoder_input')
x = Conv2D(filters=16, kernel_size=5, activation='relu', padding='same')(encoder_input)
x = Conv2D(filters=32, kernel_size=3, strides=2, activation='relu', padding='same')(x)
x = Conv2D(filters=64, kernel_size=3, strides=2, activation='relu', padding='same')(x)
x = Conv2D(filters=128, kernel_size=2, strides=2, activation='relu', padding='same')(x)
last_conv_shape = x.shape
x = Flatten()(x)
x = Dense(256, activation='relu')(x)
x = Dense(units=self.latent_dim, name='encoded_rep')(x)
self.encoder = Model(encoder_input, x, name='encoder_model')
self.encoder.summary()
# Define Decoder
decoder_input = Input(shape=self.latent_dim, name='decoder_input')
x = Dense(units=256)(decoder_input)
x = Dense(units=(last_conv_shape[1] * last_conv_shape[2] * last_conv_shape[3]), activation='relu')(x)
x = Reshape(target_shape=(last_conv_shape[1], last_conv_shape[2], last_conv_shape[3]))(x)
x = Conv2DTranspose(filters=128, kernel_size=2, activation='relu', padding='same')(x)
x = Conv2DTranspose(filters=64, kernel_size=3, strides=2, activation='relu', padding='same')(x)
x = Conv2DTranspose(filters=32, kernel_size=3, strides=2, activation='relu', padding='same')(x)
x = Conv2DTranspose(filters=16, kernel_size=5, strides=2, activation='relu', padding='same')(x)
x = Conv2DTranspose(filters=self.input_shape[2], kernel_size=5, activation='sigmoid', padding='same')(x)
self.decoder = Model(decoder_input, x, name='decoder_model')
self.decoder.summary()
# Define Autoencoder from encoder input to decoder output
self.autoencoder = Model(encoder_input, self.decoder(self.encoder(encoder_input)))
self.optimizer = Adam()
self.autoencoder.summary()
@tf.function
def compute_loss(model, batch):
decoded = model.autoencoder(batch)
return tf.reduce_mean(tf.reduce_sum(tf.square(batch - decoded), axis=[1, 2, 3]))
@tf.function
def train(train_data, model, epochs=2, batch_size=32):
for epoch in range(epochs):
for i in tqdm(range(0, len(train_data), batch_size)):
batch = train_data[i: i + batch_size]
with tf.GradientTape() as tape:
loss = compute_loss(model, batch)
gradients = tape.gradient(loss, model.autoencoder.trainable_variables)
model.optimizer.apply_gradients(zip(gradients, model.autoencoder.trainable_variables))
if __name__ == "__main__":
img_dim = 64
channels = 1
(x_train, _), (x_test, _) = mnist.load_data()
# Resize images to (img_dim x img_dim)
x_train = np.array([cv2.resize(img, (img_dim, img_dim)) for img in x_train])
x_test = np.array([cv2.resize(img, (img_dim, img_dim)) for img in x_test])
# Normalize images
x_train = x_train.astype('float32') / 255.
x_test = x_test.astype('float32') / 255.
# Reshape datasets for tensorflow
x_train = x_train.reshape((-1, img_dim, img_dim, channels))
x_test = x_test.reshape((-1, img_dim, img_dim, channels))
# Create autoencoder and fit the model
autoenc = ConvAutoencoder(input_shape=(img_dim, img_dim, channels), latent_dim=4)
# Train autoencoder
train(train_data=x_train, model=autoenc, epochs=2, batch_size=32)
Now, the problems are two:
train()
, which is labeled with @tf.function
is called twice. This doesn't happen without the @tf.function
labelWhat am I doing wrong?
Other info:
There's nothing much to say in addition but StackOverflow is forcing me to write something
Upvotes: 3
Views: 1235
Reputation: 1836
For your First problem, when you use @tf.function
, the function is executed and traced.
During this Eager execution is disabled in this context, therefore every tf. method
just defines a tf.Operation
node that produces a tf.Tensor
output.
Code debugging 1 :
# Train autoencoder
train(train_data=x_train, model=autoenc, epochs=5, batch_size=32)
Note: Increased epochs to 5 with shorter dataset for better debugging.
Train Function:
@tf.function
def train(train_data, model, epochs=2, batch_size=32):
for epoch in range(epochs):
print("Python execution: ", epoch) ## This Line only Prints during Python Execution
tf.print("Graph execution: ", epoch) ## This Line only Print during Graph Execution
# for i in tqdm(range(0, len(train_data), batch_size)): ## RAISES ERROR
for i in range(0, len(train_data), batch_size):
batch = train_data[i: i + batch_size]
with tf.GradientTape() as tape:
loss = compute_loss(model, batch)
gradients = tape.gradient(loss, model.autoencoder.trainable_variables)
model.optimizer.apply_gradients(zip(gradients, model.autoencoder.trainable_variables))
Here is the output of your original code when debugged with Python
print( )
and Tensorflow print tf.print( )
function.
You can see that the function looks like "executed" twice, but it is for tracing and execution to build the Graph, but the succeeding calls for this function is already using the AutoGraph generated.
Observing this, It is better to use the epoch outside the training loop when optimizing with @tf.function
.
Code debugging 2 :
# Train autoencoder
epochs = 5
print('Loop Training using Dataset (Epochs : {})'.format(epochs))
for epoch in range(epochs):
train(train_data=x_train, model=autoenc, batch_size = 32)
Train Function:
@tf.function
def train(train_data, model, batch_size=32):
print("Python execution") ## This Line only Prints during Python Execution
tf.print("Graph execution") ## This Line only Print during Graph Execution
# for i in tqdm(range(0, len(train_data), batch_size)):
for i in range(0, len(train_data), batch_size):
batch = train_data[i: i + batch_size]
with tf.GradientTape() as tape:
loss = compute_loss(model, batch)
gradients = tape.gradient(loss, model.autoencoder.trainable_variables)
model.optimizer.apply_gradients(zip(gradients, model.autoencoder.trainable_variables))
print("#################") # For Debugging Purpose
Here is the output of the modified flow and function, you can still see that the function is "executed" twice. And executes the training using the AutoGraph built for the 5 Epochs.
Here every succeeding call for train function is already executed in Graph, resulting in a shorter time of execution due to Tensorflow Optimizations.
For your Second problem, with regards to running out of memory.
You could try using Tensorflow Dataset Generators, rather than loading your entire Dataset in memory.
You could read more about this in this link.
Upvotes: 3