diff --git a/ml-agents/mlagents/trainers/tests/torch/test_decoders.py b/ml-agents/mlagents/trainers/tests/torch/test_decoders.py new file mode 100644 index 0000000000..aa417edd05 --- /dev/null +++ b/ml-agents/mlagents/trainers/tests/torch/test_decoders.py @@ -0,0 +1,31 @@ +import pytest +import torch + +from mlagents.trainers.torch.decoders import ValueHeads + + +def test_valueheads(): + stream_names = [f"reward_signal_{num}" for num in range(5)] + input_size = 5 + batch_size = 4 + + # Test default 1 value per head + value_heads = ValueHeads(stream_names, input_size) + input_data = torch.ones((batch_size, input_size)) + value_out, _ = value_heads(input_data) # Note: mean value will be removed shortly + + for stream_name in stream_names: + assert value_out[stream_name].shape == (batch_size,) + + # Test that inputting the wrong size input will throw an error + with pytest.raises(Exception): + value_out = value_heads(torch.ones((batch_size, input_size + 2))) + + # Test multiple values per head (e.g. discrete Q function) + output_size = 4 + value_heads = ValueHeads(stream_names, input_size, output_size) + input_data = torch.ones((batch_size, input_size)) + value_out, _ = value_heads(input_data) + + for stream_name in stream_names: + assert value_out[stream_name].shape == (batch_size, output_size) diff --git a/ml-agents/mlagents/trainers/tests/torch/test_distributions.py b/ml-agents/mlagents/trainers/tests/torch/test_distributions.py new file mode 100644 index 0000000000..6637eb159b --- /dev/null +++ b/ml-agents/mlagents/trainers/tests/torch/test_distributions.py @@ -0,0 +1,141 @@ +import pytest +import torch + +from mlagents.trainers.torch.distributions import ( + GaussianDistribution, + MultiCategoricalDistribution, + GaussianDistInstance, + TanhGaussianDistInstance, + CategoricalDistInstance, +) + + +@pytest.mark.parametrize("tanh_squash", [True, False]) +@pytest.mark.parametrize("conditional_sigma", [True, False]) +def test_gaussian_distribution(conditional_sigma, tanh_squash): + torch.manual_seed(0) + hidden_size = 16 + act_size = 4 + sample_embedding = torch.ones((1, 16)) + gauss_dist = GaussianDistribution( + hidden_size, + act_size, + conditional_sigma=conditional_sigma, + tanh_squash=tanh_squash, + ) + + # Make sure backprop works + force_action = torch.zeros((1, act_size)) + optimizer = torch.optim.Adam(gauss_dist.parameters(), lr=3e-3) + + for _ in range(50): + dist_inst = gauss_dist(sample_embedding)[0] + if tanh_squash: + assert isinstance(dist_inst, TanhGaussianDistInstance) + else: + assert isinstance(dist_inst, GaussianDistInstance) + log_prob = dist_inst.log_prob(force_action) + loss = torch.nn.functional.mse_loss(log_prob, -2 * torch.ones(log_prob.shape)) + optimizer.zero_grad() + loss.backward() + optimizer.step() + for prob in log_prob.flatten(): + assert prob == pytest.approx(-2, abs=0.1) + + +def test_multi_categorical_distribution(): + torch.manual_seed(0) + hidden_size = 16 + act_size = [3, 3, 4] + sample_embedding = torch.ones((1, 16)) + gauss_dist = MultiCategoricalDistribution(hidden_size, act_size) + + # Make sure backprop works + optimizer = torch.optim.Adam(gauss_dist.parameters(), lr=3e-3) + + def create_test_prob(size: int) -> torch.Tensor: + test_prob = torch.tensor( + [[1.0 - 0.01 * (size - 1)] + [0.01] * (size - 1)] + ) # High prob for first action + return test_prob.log() + + for _ in range(100): + dist_insts = gauss_dist(sample_embedding, masks=torch.ones((1, sum(act_size)))) + loss = 0 + for i, dist_inst in enumerate(dist_insts): + assert isinstance(dist_inst, CategoricalDistInstance) + log_prob = dist_inst.all_log_prob() + test_log_prob = create_test_prob(act_size[i]) + # Force log_probs to match the high probability for the first action generated by + # create_test_prob + loss += torch.nn.functional.mse_loss(log_prob, test_log_prob) + optimizer.zero_grad() + loss.backward() + optimizer.step() + for dist_inst, size in zip(dist_insts, act_size): + # Check that the log probs are close to the fake ones that we generated. + test_log_probs = create_test_prob(size) + for _prob, _test_prob in zip( + dist_inst.all_log_prob().flatten().tolist(), + test_log_probs.flatten().tolist(), + ): + assert _prob == pytest.approx(_test_prob, abs=0.1) + + # Test masks + masks = [] + for branch in act_size: + masks += [0] * (branch - 1) + [1] + masks = torch.tensor([masks]) + dist_insts = gauss_dist(sample_embedding, masks=masks) + for dist_inst in dist_insts: + log_prob = dist_inst.all_log_prob() + assert log_prob.flatten()[-1] == pytest.approx(0, abs=0.001) + + +def test_gaussian_dist_instance(): + torch.manual_seed(0) + act_size = 4 + dist_instance = GaussianDistInstance( + torch.zeros(1, act_size), torch.ones(1, act_size) + ) + action = dist_instance.sample() + assert action.shape == (1, act_size) + for log_prob in dist_instance.log_prob(torch.zeros((1, act_size))).flatten(): + # Log prob of standard normal at 0 + assert log_prob == pytest.approx(-0.919, abs=0.01) + + for ent in dist_instance.entropy().flatten(): + # entropy of standard normal at 0 + assert ent == pytest.approx(2.83, abs=0.01) + + +def test_tanh_gaussian_dist_instance(): + torch.manual_seed(0) + act_size = 4 + dist_instance = GaussianDistInstance( + torch.zeros(1, act_size), torch.ones(1, act_size) + ) + for _ in range(10): + action = dist_instance.sample() + assert action.shape == (1, act_size) + assert torch.max(action) < 1.0 and torch.min(action) > -1.0 + + +def test_categorical_dist_instance(): + torch.manual_seed(0) + act_size = 4 + test_prob = torch.tensor( + [1.0 - 0.1 * (act_size - 1)] + [0.1] * (act_size - 1) + ) # High prob for first action + dist_instance = CategoricalDistInstance(test_prob) + + for _ in range(10): + action = dist_instance.sample() + assert action.shape == (1,) + assert action < act_size + + # Make sure the first action as higher probability than the others. + prob_first_action = dist_instance.log_prob(torch.tensor([0])) + + for i in range(1, act_size): + assert dist_instance.log_prob(torch.tensor([i])) < prob_first_action diff --git a/ml-agents/mlagents/trainers/tests/torch/test_encoders.py b/ml-agents/mlagents/trainers/tests/torch/test_encoders.py new file mode 100644 index 0000000000..7f77b3d72a --- /dev/null +++ b/ml-agents/mlagents/trainers/tests/torch/test_encoders.py @@ -0,0 +1,110 @@ +import torch +from unittest import mock +import pytest + +from mlagents.trainers.torch.encoders import ( + VectorEncoder, + VectorAndUnnormalizedInputEncoder, + Normalizer, + SimpleVisualEncoder, + ResNetVisualEncoder, + NatureVisualEncoder, +) + + +# This test will also reveal issues with states not being saved in the state_dict. +def compare_models(module_1, module_2): + is_same = True + for key_item_1, key_item_2 in zip( + module_1.state_dict().items(), module_2.state_dict().items() + ): + # Compare tensors in state_dict and not the keys. + is_same = torch.equal(key_item_1[1], key_item_2[1]) and is_same + return is_same + + +def test_normalizer(): + input_size = 2 + norm = Normalizer(input_size) + + # These three inputs should mean to 0.5, and variance 2 + # with the steps starting at 1 + vec_input1 = torch.tensor([[1, 1]]) + vec_input2 = torch.tensor([[1, 1]]) + vec_input3 = torch.tensor([[0, 0]]) + norm.update(vec_input1) + norm.update(vec_input2) + norm.update(vec_input3) + + # Test normalization + for val in norm(vec_input1)[0]: + assert val == pytest.approx(0.707, abs=0.001) + + # Test copy normalization + norm2 = Normalizer(input_size) + assert not compare_models(norm, norm2) + norm2.copy_from(norm) + assert compare_models(norm, norm2) + for val in norm2(vec_input1)[0]: + assert val == pytest.approx(0.707, abs=0.001) + + +@mock.patch("mlagents.trainers.torch.encoders.Normalizer") +def test_vector_encoder(mock_normalizer): + mock_normalizer_inst = mock.Mock() + mock_normalizer.return_value = mock_normalizer_inst + input_size = 64 + hidden_size = 128 + num_layers = 3 + normalize = False + vector_encoder = VectorEncoder(input_size, hidden_size, num_layers, normalize) + output = vector_encoder(torch.ones((1, input_size))) + assert output.shape == (1, hidden_size) + + normalize = True + vector_encoder = VectorEncoder(input_size, hidden_size, num_layers, normalize) + new_vec = torch.ones((1, input_size)) + vector_encoder.update_normalization(new_vec) + + mock_normalizer.assert_called_with(input_size) + mock_normalizer_inst.update.assert_called_with(new_vec) + + vector_encoder2 = VectorEncoder(input_size, hidden_size, num_layers, normalize) + vector_encoder.copy_normalization(vector_encoder2) + mock_normalizer_inst.copy_from.assert_called_with(mock_normalizer_inst) + + +@mock.patch("mlagents.trainers.torch.encoders.Normalizer") +def test_vector_and_unnormalized_encoder(mock_normalizer): + mock_normalizer_inst = mock.Mock() + mock_normalizer.return_value = mock_normalizer_inst + input_size = 64 + unnormalized_size = 32 + hidden_size = 128 + num_layers = 3 + normalize = True + mock_normalizer_inst.return_value = torch.ones((1, input_size)) + vector_encoder = VectorAndUnnormalizedInputEncoder( + input_size, hidden_size, unnormalized_size, num_layers, normalize + ) + # Make sure normalizer is only called on input_size + mock_normalizer.assert_called_with(input_size) + normal_input = torch.ones((1, input_size)) + + unnormalized_input = torch.ones((1, 32)) + output = vector_encoder(normal_input, unnormalized_input) + mock_normalizer_inst.assert_called_with(normal_input) + assert output.shape == (1, hidden_size) + + +@pytest.mark.parametrize("image_size", [(36, 36, 3), (84, 84, 4), (256, 256, 5)]) +@pytest.mark.parametrize( + "vis_class", [SimpleVisualEncoder, ResNetVisualEncoder, NatureVisualEncoder] +) +def test_visual_encoder(vis_class, image_size): + num_outputs = 128 + enc = vis_class(image_size[0], image_size[1], image_size[2], num_outputs) + # Note: NCHW not NHWC + sample_input = torch.ones((1, image_size[2], image_size[0], image_size[1])) + encoding = enc(sample_input) + assert encoding.shape == (1, num_outputs) diff --git a/ml-agents/mlagents/trainers/tests/torch/test_utils.py b/ml-agents/mlagents/trainers/tests/torch/test_utils.py new file mode 100644 index 0000000000..25c7a6c05e --- /dev/null +++ b/ml-agents/mlagents/trainers/tests/torch/test_utils.py @@ -0,0 +1,166 @@ +import pytest +import torch +import numpy as np + +from mlagents.trainers.settings import EncoderType +from mlagents.trainers.torch.utils import ModelUtils +from mlagents.trainers.exception import UnityTrainerException +from mlagents.trainers.torch.encoders import ( + VectorEncoder, + VectorAndUnnormalizedInputEncoder, +) +from mlagents.trainers.torch.distributions import ( + CategoricalDistInstance, + GaussianDistInstance, +) + + +def test_min_visual_size(): + # Make sure each EncoderType has an entry in MIS_RESOLUTION_FOR_ENCODER + assert set(ModelUtils.MIN_RESOLUTION_FOR_ENCODER.keys()) == set(EncoderType) + + for encoder_type in EncoderType: + good_size = ModelUtils.MIN_RESOLUTION_FOR_ENCODER[encoder_type] + vis_input = torch.ones((1, 3, good_size, good_size)) + ModelUtils._check_resolution_for_encoder(vis_input, encoder_type) + enc_func = ModelUtils.get_encoder_for_type(encoder_type) + enc = enc_func(good_size, good_size, 3, 1) + enc.forward(vis_input) + + # Anything under the min size should raise an exception. If not, decrease the min size! + with pytest.raises(Exception): + bad_size = ModelUtils.MIN_RESOLUTION_FOR_ENCODER[encoder_type] - 1 + vis_input = torch.ones((1, 3, bad_size, bad_size)) + + with pytest.raises(UnityTrainerException): + # Make sure we'd hit a friendly error during model setup time. + ModelUtils._check_resolution_for_encoder(vis_input, encoder_type) + + enc = enc_func(bad_size, bad_size, 3, 1) + enc.forward(vis_input) + + +@pytest.mark.parametrize("unnormalized_inputs", [0, 1]) +@pytest.mark.parametrize("num_visual", [0, 1, 2]) +@pytest.mark.parametrize("num_vector", [0, 1, 2]) +@pytest.mark.parametrize("normalize", [True, False]) +@pytest.mark.parametrize("encoder_type", [EncoderType.SIMPLE, EncoderType.NATURE_CNN]) +def test_create_encoders( + encoder_type, normalize, num_vector, num_visual, unnormalized_inputs +): + vec_obs_shape = (5,) + vis_obs_shape = (84, 84, 3) + obs_shapes = [] + for _ in range(num_vector): + obs_shapes.append(vec_obs_shape) + for _ in range(num_visual): + obs_shapes.append(vis_obs_shape) + h_size = 128 + num_layers = 3 + unnormalized_inputs = 1 + vis_enc, vec_enc = ModelUtils.create_encoders( + obs_shapes, h_size, num_layers, encoder_type, unnormalized_inputs, normalize + ) + vec_enc = list(vec_enc) + vis_enc = list(vis_enc) + assert len(vec_enc) == ( + 1 if unnormalized_inputs + num_vector > 0 else 0 + ) # There's always at most one vector encoder. + assert len(vis_enc) == num_visual + + if unnormalized_inputs > 0: + assert isinstance(vec_enc[0], VectorAndUnnormalizedInputEncoder) + elif num_vector > 0: + assert isinstance(vec_enc[0], VectorEncoder) + + for enc in vis_enc: + assert isinstance(enc, ModelUtils.get_encoder_for_type(encoder_type)) + + +def test_list_to_tensor(): + # Test converting pure list + unconverted_list = [[1, 2], [1, 3], [1, 4]] + tensor = ModelUtils.list_to_tensor(unconverted_list) + # Should be equivalent to torch.tensor conversion + assert torch.equal(tensor, torch.tensor(unconverted_list)) + + # Test converting pure numpy array + np_list = np.asarray(unconverted_list) + tensor = ModelUtils.list_to_tensor(np_list) + # Should be equivalent to torch.tensor conversion + assert torch.equal(tensor, torch.tensor(unconverted_list)) + + # Test converting list of numpy arrays + list_of_np = [np.asarray(_el) for _el in unconverted_list] + tensor = ModelUtils.list_to_tensor(list_of_np) + # Should be equivalent to torch.tensor conversion + assert torch.equal(tensor, torch.tensor(unconverted_list)) + + +def test_break_into_branches(): + # Test normal multi-branch case + all_actions = torch.tensor([[1, 2, 3, 4, 5, 6]]) + action_size = [2, 1, 3] + broken_actions = ModelUtils.break_into_branches(all_actions, action_size) + assert len(action_size) == len(broken_actions) + for i, _action in enumerate(broken_actions): + assert _action.shape == (1, action_size[i]) + + # Test 1-branch case + action_size = [6] + broken_actions = ModelUtils.break_into_branches(all_actions, action_size) + assert len(broken_actions) == 1 + assert broken_actions[0].shape == (1, 6) + + +def test_actions_to_onehot(): + all_actions = torch.tensor([[1, 0, 2], [1, 0, 2]]) + action_size = [2, 1, 3] + oh_actions = ModelUtils.actions_to_onehot(all_actions, action_size) + expected_result = [ + torch.tensor([[0, 1], [0, 1]]), + torch.tensor([[1], [1]]), + torch.tensor([[0, 0, 1], [0, 0, 1]]), + ] + for res, exp in zip(oh_actions, expected_result): + assert torch.equal(res, exp) + + +def test_get_probs_and_entropy(): + # Test continuous + # Add two dists to the list. This isn't done in the code but we'd like to support it. + dist_list = [ + GaussianDistInstance(torch.zeros((1, 2)), torch.ones((1, 2))), + GaussianDistInstance(torch.zeros((1, 2)), torch.ones((1, 2))), + ] + action_list = [torch.zeros((1, 2)), torch.zeros((1, 2))] + log_probs, entropies, all_probs = ModelUtils.get_probs_and_entropy( + action_list, dist_list + ) + assert log_probs.shape == (1, 2, 2) + assert entropies.shape == (1, 2, 2) + assert all_probs is None + + for log_prob in log_probs.flatten(): + # Log prob of standard normal at 0 + assert log_prob == pytest.approx(-0.919, abs=0.01) + + for ent in entropies.flatten(): + # entropy of standard normal at 0 + assert ent == pytest.approx(2.83, abs=0.01) + + # Test continuous + # Add two dists to the list. + act_size = 2 + test_prob = torch.tensor( + [1.0 - 0.1 * (act_size - 1)] + [0.1] * (act_size - 1) + ) # High prob for first action + dist_list = [CategoricalDistInstance(test_prob), CategoricalDistInstance(test_prob)] + action_list = [torch.tensor([0]), torch.tensor([1])] + log_probs, entropies, all_probs = ModelUtils.get_probs_and_entropy( + action_list, dist_list + ) + assert all_probs.shape == (len(dist_list * act_size),) + assert entropies.shape == (len(dist_list),) + # Make sure the first action has high probability than the others. + assert log_probs.flatten()[0] > log_probs.flatten()[1] diff --git a/ml-agents/mlagents/trainers/torch/distributions.py b/ml-agents/mlagents/trainers/torch/distributions.py index bdca1a0382..c83ae4649e 100644 --- a/ml-agents/mlagents/trainers/torch/distributions.py +++ b/ml-agents/mlagents/trainers/torch/distributions.py @@ -1,4 +1,5 @@ import abc +from typing import List import torch from torch import nn import numpy as np @@ -114,13 +115,12 @@ def entropy(self): class GaussianDistribution(nn.Module): def __init__( self, - hidden_size, - num_outputs, - conditional_sigma=False, - tanh_squash=False, - **kwargs + hidden_size: int, + num_outputs: int, + conditional_sigma: bool = False, + tanh_squash: bool = False, ): - super().__init__(**kwargs) + super().__init__() self.conditional_sigma = conditional_sigma self.mu = nn.Linear(hidden_size, num_outputs) self.tanh_squash = tanh_squash @@ -133,7 +133,7 @@ def __init__( torch.zeros(1, num_outputs, requires_grad=True) ) - def forward(self, inputs): + def forward(self, inputs: torch.Tensor) -> List[DistInstance]: mu = self.mu(inputs) if self.conditional_sigma: log_sigma = torch.clamp(self.log_sigma(inputs), min=-20, max=2) @@ -146,12 +146,12 @@ def forward(self, inputs): class MultiCategoricalDistribution(nn.Module): - def __init__(self, hidden_size, act_sizes): + def __init__(self, hidden_size: int, act_sizes: List[int]): super().__init__() self.act_sizes = act_sizes - self.branches = self.create_policy_branches(hidden_size) + self.branches = self._create_policy_branches(hidden_size) - def create_policy_branches(self, hidden_size): + def _create_policy_branches(self, hidden_size: int) -> nn.ModuleList: branches = [] for size in self.act_sizes: branch_output_layer = nn.Linear(hidden_size, size) @@ -159,13 +159,13 @@ def create_policy_branches(self, hidden_size): branches.append(branch_output_layer) return nn.ModuleList(branches) - def mask_branch(self, logits, mask): + def _mask_branch(self, logits: torch.Tensor, mask: torch.Tensor) -> torch.Tensor: raw_probs = torch.nn.functional.softmax(logits, dim=-1) * mask normalized_probs = raw_probs / torch.sum(raw_probs, dim=-1).unsqueeze(-1) normalized_logits = torch.log(normalized_probs + EPSILON) return normalized_logits - def split_masks(self, masks): + def _split_masks(self, masks: torch.Tensor) -> List[torch.Tensor]: split_masks = [] for idx, _ in enumerate(self.act_sizes): start = int(np.sum(self.act_sizes[:idx])) @@ -173,13 +173,13 @@ def split_masks(self, masks): split_masks.append(masks[:, start:end]) return split_masks - def forward(self, inputs, masks): + def forward(self, inputs: torch.Tensor, masks: torch.Tensor) -> List[DistInstance]: # Todo - Support multiple branches in mask code branch_distributions = [] - masks = self.split_masks(masks) + masks = self._split_masks(masks) for idx, branch in enumerate(self.branches): logits = branch(inputs) - norm_logits = self.mask_branch(logits, masks[idx]) + norm_logits = self._mask_branch(logits, masks[idx]) distribution = CategoricalDistInstance(norm_logits) branch_distributions.append(distribution) return branch_distributions diff --git a/ml-agents/mlagents/trainers/torch/encoders.py b/ml-agents/mlagents/trainers/torch/encoders.py index 8676193532..dd9543987e 100644 --- a/ml-agents/mlagents/trainers/torch/encoders.py +++ b/ml-agents/mlagents/trainers/torch/encoders.py @@ -9,9 +9,9 @@ class Normalizer(nn.Module): def __init__(self, vec_obs_size: int): super().__init__() - self.normalization_steps = torch.tensor(1) - self.running_mean = torch.zeros(vec_obs_size) - self.running_variance = torch.ones(vec_obs_size) + self.register_buffer("normalization_steps", torch.tensor(1)) + self.register_buffer("running_mean", torch.zeros(vec_obs_size)) + self.register_buffer("running_variance", torch.ones(vec_obs_size)) def forward(self, inputs: torch.Tensor) -> torch.Tensor: normalized_state = torch.clamp( @@ -33,9 +33,10 @@ def update(self, vector_input: torch.Tensor) -> None: new_variance = self.running_variance + ( input_to_new_mean * input_to_old_mean ).sum(0) - self.running_mean = new_mean - self.running_variance = new_variance - self.normalization_steps = total_new_steps + # Update in-place + self.running_mean.data.copy_(new_mean.data) + self.running_variance.data.copy_(new_variance.data) + self.normalization_steps.data.copy_(total_new_steps.data) def copy_from(self, other_normalizer: "Normalizer") -> None: self.normalization_steps.data.copy_(other_normalizer.normalization_steps.data) diff --git a/ml-agents/mlagents/trainers/torch/utils.py b/ml-agents/mlagents/trainers/torch/utils.py index 600c7eb8d9..86e3e15b39 100644 --- a/ml-agents/mlagents/trainers/torch/utils.py +++ b/ml-agents/mlagents/trainers/torch/utils.py @@ -40,11 +40,9 @@ def get_encoder_for_type(encoder_type: EncoderType) -> nn.Module: @staticmethod def _check_resolution_for_encoder( - vis_in: torch.Tensor, vis_encoder_type: EncoderType + height: int, width: int, vis_encoder_type: EncoderType ) -> None: min_res = ModelUtils.MIN_RESOLUTION_FOR_ENCODER[vis_encoder_type] - height = vis_in.shape[1] - width = vis_in.shape[2] if height < min_res or width < min_res: raise UnityTrainerException( f"Visual observation resolution ({width}x{height}) is too small for" @@ -80,6 +78,9 @@ def create_encoders( vector_size = 0 for i, dimension in enumerate(observation_shapes): if len(dimension) == 3: + ModelUtils._check_resolution_for_encoder( + dimension[0], dimension[1], vis_encode_type + ) visual_encoders.append( visual_encoder_class( dimension[0], dimension[1], dimension[2], h_size @@ -136,6 +137,14 @@ def break_into_branches( def actions_to_onehot( discrete_actions: torch.Tensor, action_size: List[int] ) -> List[torch.Tensor]: + """ + Takes a tensor of discrete actions and turns it into a List of onehot encoding for each + action. + :param discrete_actions: Actions in integer form. + :param action_size: List of branch sizes. Should be of same size as discrete_actions' + last dimension. + :return: List of one-hot tensors, one representing each branch. + """ onehot_branches = [ torch.nn.functional.one_hot(_act.T, action_size[i]) for i, _act in enumerate(discrete_actions.T)