Why tf_agent variables do not change even after training?

Question

tf version : 2.3.0

import numpy as np
import tensorflow as tf

from tf_agents.agents.reinforce import reinforce_agent
from tf_agents.drivers import dynamic_step_driver
from tf_agents.environments import suite_gym, tf_py_environment
from tf_agents.eval import metric_utils
from tf_agents.metrics import tf_metrics
from tf_agents.networks import actor_distribution_network
from tf_agents.replay_buffers import tf_uniform_replay_buffer
from tf_agents.trajectories import trajectory
from tf_agents.utils import common

tf.compat.v1.enable_v2_behavior()

env_name='CartPole-v0'
num_iterations=1
collect_episodes_per_iteration=2
replay_buffer_capacity=2000

fc_layer_params=(100, )

learning_rate=1e-3
log_interval=5
num_eval_episodes=10
eval_interval=10

env=suite_gym.load(env_name)

env.reset()

time_step=env.reset()

train_py_env=suite_gym.load(env_name)

train_env=tf_py_environment.TFPyEnvironment(train_py_env)
actor_net=actor_distribution_network.ActorDistributionNetwork(train_env.observation_spec(), train_env.action_spec(), fc_layer_params=fc_layer_params)

optimizer=tf.compat.v1.train.AdamOptimizer(learning_rate=learning_rate)
train_step_counter=tf.compat.v2.Variable(0)

tf_agent=reinforce_agent.ReinforceAgent(train_env.time_step_spec(),
train_env.action_spec(),
actor_network=actor_net,
optimizer=optimizer,
normalize_returns=True,
train_step_counter=train_step_counter)

tf_agent.initialize()

eval_policy=tf_agent.policy
collect_policy=tf_agent.collect_policy

replay_buffer=tf_uniform_replay_buffer.TFUniformReplayBuffer(
data_spec=tf_agent.collect_data_spec,
batch_size=train_env.batch_size,
max_length=replay_buffer_capacity
)

tf_agent.train=common.function(tf_agent.train)

def collect_episode(environment, policy, num_episodes):
    episode_counter=0
    environment.reset()

    while episode_counter



https://www.tensorflow.org/agents/tutorials/6_reinforce_tutorial
I was following tutorials of TFAgents, but I found that
before=copy(tf_agent.trainable_variables)

tf_agent.train(experience)     

after=copy(tf_agent.trainable_variables)

Then 'before' should be different with 'after'.
But (before==after) always represents 'True'.
I am very confusing about this. I thought that gradients might be zero.
However, it it unreasonable  cause the model's loss continues to decrease in a training step.
On reinforce_agent module, gradient tape step was written well..
I can't find what is the problem...
even tf_agent.policy.trainable_variables is the same regardless of a training step..

Why tf_agent variables do not change even after training?

tf version : 2.3.0

Answers (1)

Related Questions