Reputation: 1568
I have a custom gym environment, where it has 3 continuous and 1 discrete action space. I would like to apply a reinforcement-learning algorithm, however I am not sure what to use.
Below you can find the environment code, action space is basically setting some parameters within the gas network which is called via self.func
and observation space is the pressure results of nodes, and velocity results of elements:
import numpy as np
import gymnasium as gym
import simtools as st
class GasNetworkEnv(gym.Env):
def __init__(self, map_, qcorr_bounds, pset_bounds, cs_ctrl_bounds,
obs_size, func, func_args):
super(GasNetworkEnv, self).__init__()
self.action_space = gym.spaces.Dict({
'qcorr': gym.spaces.Box(
low=np.array([qcorr_bounds[0]]),
high=np.array([qcorr_bounds[1]]),
dtype=np.float64),
'pset': gym.spaces.Box(
low=np.array([pset_bounds[0]]),
high=np.array([pset_bounds[1]]),
dtype=np.float64),
'cs_ctrl': gym.spaces.Box(
low=np.repeat(cs_ctrl_bounds[0], len(map_)),
high=np.repeat(cs_ctrl_bounds[1], len(map_)),
dtype=np.float64),
'cs_state': gym.spaces.MultiBinary(
sum([len(map_[k].no) for k in map_]))})
self.observation_space = gym.spaces.Box(
low=-1e5, high=1e5, shape=(obs_size,), dtype=np.float64)
self.func = func
self.func_args = func_args
def step(self, action):
# call objective function (we are trying to minimize score which is bad when higher)
node_results, element_results, score = self.func(action, self.func_args)
reward = -score
# observation
observation = np.concatenate((node_results, element_results))
# termination conditions (currently: no termination)
done = False
return observation, reward, done, {}
def reset(self, **kwargs):
super().reset(seed=kwargs.get('seed', None))
initial_observation = np.random.uniform(
low=self.observation_space.low,
high=self.observation_space.high,
size=self.observation_space.shape)
return initial_observation
Upvotes: 0
Views: 35