Source code for machin.frame.noise.param_space_noise

from typing import Callable, Tuple, Any, Dict
import torch as t
import torch.nn as nn

from machin.utils.logging import default_logger as logger
from machin.utils.helper_classes import Switch
from .generator import NormalNoiseGen


[docs]class AdaptiveParamNoise:
    def __init__(
        self,
        initial_stddev: float = 0.1,
        desired_action_stddev: float = 0.1,
        adoption_coefficient: float = 1.01,
    ):
        """
        Implements the adaptive parameter space method in
        `<<Parameter space noise for exploration>> \
<https://arxiv.org/pdf/1706.01905.pdf>`_.

        Hint:
            Let :math:`\\theta` be the standard deviation of noise,
            and :math:`\\alpha` be the adpotion coefficient, then:

            :math:`\\theta_{n+1} = \\left \\{ \
                \\begin{array}{ll} \
                    \\alpha \\theta_k \
                        & if\\ d(\\pi,\\tilde{\\pi})\\leq\\delta, \\\\ \
                    \\frac{1}{\\alpha} \\theta_k & otherwise, \
                \\end{array} \
            \\right. \\ `

            Noise is directly applied to network parameters.

        Args:
            initial_stddev: Initial noise standard deviation.
            desired_action_stddev: Desired standard deviation for
            adoption_coefficient: Adoption coefficient.
        """

        self.initial_stddev = initial_stddev
        self.desired_action_stddev = desired_action_stddev
        self.adoption_coefficient = adoption_coefficient

        self.current_stddev = initial_stddev

[docs]    def adapt(self, distance: float):
        """
        Update noise standard deviation according to distance.

        Args:
            distance: Current distance between the noisy action and clean
                action.
        """
        if distance > self.desired_action_stddev:
            # Decrease stddev.
            self.current_stddev /= self.adoption_coefficient
        else:
            # Increase stddev.
            self.current_stddev *= self.adoption_coefficient

[docs]    def get_dev(self) -> float:
        """
        Returns:
            Current noise standard deviation.
        """
        return self.current_stddev

    def __repr__(self):
        fmt = "AdaptiveParamNoise(i_std={}, da_std={}, adpt_coeff={})"
        return fmt.format(
            self.initial_stddev, self.desired_action_stddev, self.adoption_coefficient
        )


def _add_perturb_hook(
    module, perturb_switch, reset_switch, perturb_gen, debug_backward
):
    org_params = {}
    noisy_params = {}

    def perturb_pre_hook(*_):
        with t.no_grad():
            if perturb_switch.get():
                if noisy_params and not reset_switch.get():
                    # Use generated noisy parameters.
                    for p_name, p_value in module.named_parameters():
                        if t.is_tensor(p_value):
                            p_value.set_(noisy_params[p_name])
                else:
                    # Generate noisy parameters if they don't exist.
                    org_params.clear()
                    noisy_params.clear()
                    for p_name, p_value in module.named_parameters():
                        if t.is_tensor(p_value):
                            org_params[p_name] = p_value.clone()
                            p_value += perturb_gen(
                                p_value.shape, p_value.device
                            ).detach()
                            noisy_params[p_name] = p_value.clone()

            elif not perturb_switch.get():
                # Use original parameters
                if org_params:
                    for p_name, p_value in module.named_parameters():
                        if t.is_tensor(p_value):
                            p_value.set_(org_params[p_name])

    pre_hook_handle = module.register_forward_pre_hook(perturb_pre_hook)

    post_hook_handles = []
    for param_name, param_value in module.named_parameters():

        def perturb_post_hook(*_):  # pragma: no cover
            # pytest will not detect execution by autograd engine
            # Called before backward update, swap noisy parameters out,
            # so gradients are applied to original parameters.
            if debug_backward:
                print(f"Backward swapped for {param_name}!")
            with t.no_grad():
                if org_params and t.is_tensor(param_value):
                    param_value.set_(org_params[param_name])

        post_hook_handle = param_value.register_hook(perturb_post_hook)
        post_hook_handles.append(post_hook_handle)

    return pre_hook_handle, post_hook_handles


# noinspection PyTypeChecker
[docs]def perturb_model(
    model: nn.Module,
    perturb_switch: Switch,
    reset_switch: Switch,
    distance_func: Callable = lambda x, y: t.dist(x, y, 2).mean().item(),
    desired_action_stddev: float = 0.5,
    noise_generator: Any = NormalNoiseGen,
    noise_generator_args: Tuple = (),
    noise_generator_kwargs: Dict = None,
    noise_generate_function: Callable = None,
    debug_backward=False,
):
    """
    Give model's parameters a little perturbation. Implements
    `<<Parameter space noise for exploration>> \
<https://arxiv.org/pdf/1706.01905.pdf>`_.

    Note:
        Only parameters of type ``t.Tensor`` and gettable from
        ``model.named_parameters()`` will be perturbed.

        Original parameters will be automatically swapped in during the
        backward pass, and you can safely call optimizers afterwards.

    Hint:
        1. ``noise_generator`` must accept (shape, \\*args) in its ``__init__``
        function, where shape is the required shape. it also needs to have
        ``__call__(device=None)`` which produce a noise tensor on the specified
        device when invoked.

        2. ``noise_generate_function`` must accept (shape, device, std:float)
        and return a noise tensor on the specified device.

    Example:
        In order to use this function to perturb your model, you need to::

            from machin.utils.helper_classes import Switch
            from machin.frame.noise.param_space_noise import perturb_model
            from machin.utils.visualize import visualize_graph
            import torch as t

            dims = 5

            t.manual_seed(0)
            model = t.nn.Linear(dims, dims)
            optim = t.optim.Adam(model.parameters(), 1e-3)
            p_switch, r_switch = Switch(), Switch()
            cancel = perturb_model(model, p_switch, r_switch)

            # you should keep this switch on if you do one training step after
            # every sampling step. otherwise you may turn it off in one episode
            # and turn it on in the next to speed up training.
            r_switch.on()

            # turn off/on the perturbation switch to see the difference
            p_switch.on()

            # do some sampling
            action = model(t.ones([dims]))

            # in order to let parameter noise adapt to generate noisy actions
            # within ``desired_action_stddev``, you must periodically
            # use the original model to generate some actions:
            p_switch.off()
            action = model(t.ones([dims]))

            # visualize will not show any leaf noise tensors
            # because they are created in t.no_grad() context
            # and added in-place.
            visualize_graph(action, exit_after_vis=False)

            # do some training
            loss = (action - t.ones([dims])).sum()
            loss.backward()
            optim.step()
            print(model.weight)

            # clear hooks
            cancel()

    Args:
        model: Neural network model.
        perturb_switch: The switch used to enable perturbation. If switch is
            set to ``False`` (off), then during the forward process, original
            parameters are used.
        reset_switch: The switch used to reset perturbation noise. If switch is
            set to ``True`` (on), and ``perturb_switch`` is also on, then during
            every forward process, a new set of noise is applied to each param.
            If only ``perturb_switch`` is on, then the same set of noisy
            parameters is used in the forward process and they **will not be
            updated**.
        distance_func: Distance function, accepts two tensors produced by
            ``model`` (one is noisy), return the distance as float. Used
            to compare the distance between actions generated by
            noisy parameters and original parameters.
        desired_action_stddev: Desired action standard deviation.
        noise_generator: Noise generator class.
        noise_generator_args: Additional args other than shape of the noise
            generator.
        noise_generator_kwargs: Additional kwargs other than shape of the noise
            generator.
        noise_generate_function: Noise generation function, mutually exclusive
            with ``noise_generator`` and ``noise_generator_args``.
        debug_backward: Print a message if the backward hook is correctly
            executed.

    Returns:
        1. A reset function with no arguments, will swap in original paramters.
        2. A deregister function with no arguments, will deregister all hooks
            applied on your model.
    """
    tmp_action = {}
    hook_handles = []

    param_noise_spec = AdaptiveParamNoise(desired_action_stddev=desired_action_stddev)

    def param_noise_gen(shape, device):
        nonlocal noise_generator_args, noise_generator_kwargs
        if noise_generator_kwargs is None:
            noise_generator_kwargs = {}
        gen = noise_generator(shape, *noise_generator_args, **noise_generator_kwargs)
        return gen(device) * param_noise_spec.get_dev()

    def param_noise_custom_gen_wrapper(shape, device):
        std_dev = param_noise_spec.get_dev()
        return noise_generate_function(shape, device, std_dev)

    if noise_generate_function is not None:
        param_noise_gen = param_noise_custom_gen_wrapper

    def perturb_adjust_hook(_model, _input, output):
        if perturb_switch.get():
            tmp_action["with_noise"] = output.clone()
        else:
            tmp_action["without_noise"] = output.clone()
        if "with_noise" in tmp_action and "without_noise" in tmp_action:
            # Compute distance between two actions generated by
            # noisy parameters and original parameters.
            with t.no_grad():
                dist = distance_func(
                    tmp_action["with_noise"], tmp_action["without_noise"]
                )
                tmp_action.clear()
                param_noise_spec.adapt(dist)
                logger.info(f"Current output distance: {dist}")
                logger.info(f"Current param noise stddev: {param_noise_spec.get_dev()}")

    # Boise generation happens in pre-forward and noise adjust happens
    # in post-forward
    hook_handles.append(model.register_forward_hook(perturb_adjust_hook))

    pre, post = _add_perturb_hook(
        model, perturb_switch, reset_switch, param_noise_gen, debug_backward
    )
    hook_handles.append(pre)
    hook_handles += post

    def cancel():
        for hh in hook_handles:
            hh.remove()

    return cancel