Source code for NormalizeActionWrapper


import numpy as np
import gym

[docs]class NormalizeActionWrapper(gym.Wrapper):
    """
    Wrapper to normalize the action space.

    :param env: (gym.Env) Gym environment that will be wrapped
    """
    def __init__(self, env):
        # Retrieve the action space
        action_space = env.action_space
        assert isinstance(action_space, gym.spaces.Box), "This wrapper only works with continuous action space (spaces.Box)"
        # Retrieve the max/min values
        self.low, self.high = action_space.low, action_space.high

        # We modify the action space, so all actions will lie in [-1, 1]
        env.action_space = gym.spaces.Box(low=-1, high=1, shape=action_space.shape, dtype=np.float32)

        # Call the parent constructor, so we can access self.env later
        super(NormalizeActionWrapper, self).__init__(env)
    
[docs]    def rescale_action(self, scaled_action):
        """
        Rescale the action from [-1, 1] to [low, high]

        :param scaled_action: The action to rescale.
        :type scaled_action: np.ndarray

        :return: The rescaled action.
        :rtype: np.ndarray
        """
        return self.low + (0.5 * (scaled_action + 1.0) * (self.high -  self.low))

[docs]    def reset(self):
        """
        Reset the environment 
        """
        # Reset the counter
        return self.env.reset()

[docs]    def step(self, action):
        """
        :param action: Action taken by the agent
        :type action: floar or int

        :return: observation, reward, is the episode over, additional informations
        :rtype: (np.ndarray, float, bool, dict)
        """
        # Rescale action from [-1, 1] to original [low, high] interval
        rescaled_action = self.rescale_action(action)
        obs, reward, done, info = self.env.step(rescaled_action)
        return obs, reward, done, info