Source code for machin.frame.noise.action_space_noise

from typing import Tuple, Iterable, Union, Dict, Any
import torch as t

from .generator import OrnsteinUhlenbeckNoiseGen


DEFAULT_OU_GEN = None

NoiseParam = Union[Iterable[Tuple], Tuple]


[docs]def add_uniform_noise_to_action(
    action: t.Tensor, noise_param: NoiseParam = (0.0, 1.0), ratio: float = 1.0
):
    """
    Add uniform noise to action tensor.

    Hint:
        The innermost tuple contains: ``(uniform_min, uniform_max)``

        If ``noise_param`` is ``Tuple[float, float]``, then the same uniform
        noise will be added to ``action[*, :]``.

        If ``noise_param`` is ``Iterable[Tuple[float, float]]``, then for each
        ``action[*, i]`` slice i, uniform noise with ``noise_param[i]`` will
        be added respectively.

    Args:
        action: Raw action.
        noise_param: Param of the uniform noise.
        ratio: Sampled noise is multiplied with this ratio.

    Returns:
        Action with uniform noise.
    """
    if isinstance(noise_param[0], tuple):
        if len(noise_param) != action.shape[-1]:
            raise ValueError(
                "Noise param length doesn't match " "the last dimension of action"
            )
        noise = t.rand(action.shape, device=action.device)
        for i in range(action.shape[-1]):
            noi_p = noise_param[i]
            noise.view(-1, noise.shape[-1])[:, i] *= noi_p[1] - noi_p[0]
            noise.view(-1, noise.shape[-1])[:, i] += noi_p[0]
    else:
        noise_param = noise_param  # type: Tuple[float, float]
        noise = (
            t.rand(action.shape, device=action.device)
            * (noise_param[1] - noise_param[0])
            + noise_param[0]
        )
    return action + noise * ratio


[docs]def add_clipped_normal_noise_to_action(
    action: t.Tensor, noise_param: NoiseParam = (0.0, 1.0, -1.0, 1.0), ratio=1.0
):
    """
    Add clipped normal noise to action tensor.

    Hint:
        The innermost tuple contains:
        ``(normal_mean, normal_sigma, clip_min, clip_max)``

        If ``noise_param`` is ``Tuple[float, float, float, float]``,
        then the same clipped normal noise will be added to ``action[*, :]``.

        If ``noise_param`` is ``Iterable[Tuple[float, float, float, float]]``,
        then for each ``action[*, i]`` slice i, clipped normal noise with
        ``noise_param[i]`` will be applied respectively.

    Args:
        action: Raw action
        noise_param: Param of the normal noise.
        ratio: Sampled noise is multiplied with this ratio.

    Returns:
        Action with uniform noise.
    """
    if isinstance(noise_param[0], tuple):
        if len(noise_param) != action.shape[-1]:
            raise ValueError(
                "Noise param length doesn't match " "the last dimension of action"
            )
        noise = t.rand(action.shape, device=action.device)
        for i in range(action.shape[-1]):
            noi_p = noise_param[i]
            noise.view(-1, noise.shape[-1])[:, i] *= noi_p[1] - noi_p[0]
            noise.view(-1, noise.shape[-1])[:, i] += noi_p[0]
            noise.view(-1, noise.shape[-1])[:, i].clamp(noi_p[2], noi_p[3])
    else:
        noise_param = noise_param  # type: Tuple[float, float, float, float]
        noise = (
            t.rand(action.shape, device=action.device)
            * (noise_param[1] - noise_param[0])
            + noise_param[0]
        )
        noise.clamp(noise_param[2], noise_param[3])
    return action + noise * ratio


[docs]def add_normal_noise_to_action(action: t.Tensor, noise_param=(0.0, 1.0), ratio=1.0):
    """
    Add normal noise to action tensor.

    Hint:
        The innermost tuple contains:
        ``(normal_mean, normal_sigma)``

        If ``noise_param`` is ``Tuple[float, float]``,
        then the same normal noise will be added to ``action[*, :]``.

        If ``noise_param`` is ``Iterable[Tuple[float, float]]``,
        then for each ``action[*, i]`` slice i, clipped normal noise with
        ``noise_param[i]`` will be applied respectively.

    Args:
        action: Raw action
        noise_param: Param of the normal noise.
        ratio: Sampled noise is multiplied with this ratio.

    Returns:
        Action with normal noise.
    """
    if isinstance(noise_param[0], tuple):
        if len(noise_param) != action.shape[-1]:
            raise ValueError(
                "Noise param length doesn't match " "the last dimension of action"
            )
        noise = t.randn(action.shape, device=action.device)
        for i in range(action.shape[-1]):
            noi_p = noise_param[i]
            noise.view(-1, noise.shape[-1])[:, i] *= noi_p[1]
            noise.view(-1, noise.shape[-1])[:, i] += noi_p[0]
    else:
        noise = (
            t.rand(action.shape, device=action.device) * noise_param[1] + noise_param[0]
        )
    return action + noise * ratio


[docs]def add_ou_noise_to_action(
    action: t.Tensor, noise_param: Dict[str, Any] = None, ratio=1.0, reset=False
):
    """
    Add Ornstein-Uhlenbeck noise to action tensor.

    Warning:
        Ornstein-Uhlenbeck noise generator is shared. And you cannot
        specify OU noise of different distributions
        for each of the last dimension of your action.

    Args:
        action: Raw action
        noise_param: :class:`.OrnsteinUhlenbeckGen` params. Used as
            keyword arguments of the generator. Will only be effective if
            ``reset`` is ``True``.
        ratio: Sampled noise is multiplied with this ratio.
        reset: Whether to reset the default Ornstein-Uhlenbeck noise generator.

    Returns:
        Action with Ornstein-Uhlenbeck noise.
    """
    global DEFAULT_OU_GEN
    if reset:
        DEFAULT_OU_GEN = None
    if DEFAULT_OU_GEN is None:
        DEFAULT_OU_GEN = OrnsteinUhlenbeckNoiseGen(action.shape, **noise_param)
        DEFAULT_OU_GEN.reset()
    return action + DEFAULT_OU_GEN(action.device) * ratio