Skip to content

Commit

Permalink
Add unit tests for wsmf.metamodels
Browse files Browse the repository at this point in the history
  • Loading branch information
azoz01 committed Aug 3, 2024
1 parent 8a61358 commit d75e800
Show file tree
Hide file tree
Showing 24 changed files with 926 additions and 64 deletions.
5 changes: 3 additions & 2 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
dataset2vec
dataset2vec==1.0.0
openml==0.14.2
loguru==0.7.2
optuna==3.6.1
Expand All @@ -7,4 +7,5 @@ scikit-learn==1.4.2
pytest==8.2.1
pymfe==0.4.3
seaborn==0.13.2
tensorboard==2.17.0
tensorboard==2.17.0
numpy==1.26.4
File renamed without changes.
File renamed without changes.
92 changes: 92 additions & 0 deletions test/wsmf/metamodels/data/test_dataset.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,92 @@
import pytest
from torch import Tensor

from wsmf.metamodels.data import EncoderHpoDataset


def test_d2v_hpo_dataset_has_proper_length():
# Given
dataset1_X = Tensor([[1, 2, 3], [4, 5, 6]])
dataset1_y = Tensor([[0], [1]])
dataset2_X = Tensor([[7, 8, 9, 10], [4, 5, 6, 11]])
dataset2_y = Tensor([[1], [0]])
datasets = {
"dataset1": (dataset1_X, dataset1_y),
"dataset2": (dataset2_X, dataset2_y),
}
landmarkers = {
"dataset1": Tensor([1, 2, 3]),
"dataset2": Tensor([-1, -2, -3]),
}

# When
d2v_hpo_dataset = EncoderHpoDataset(datasets, landmarkers)

# Then
assert len(d2v_hpo_dataset) == 2


def test_d2v_hpo_dataset_has_proper_dataset_names():
# Given
dataset1_X = Tensor([[1, 2, 3], [4, 5, 6]])
dataset1_y = Tensor([[0], [1]])
dataset2_X = Tensor([[7, 8, 9, 10], [4, 5, 6, 11]])
dataset2_y = Tensor([[1], [0]])
datasets = {
"dataset1": (dataset1_X, dataset1_y),
"dataset2": (dataset2_X, dataset2_y),
}
landmarkers = {
"dataset1": Tensor([1, 2, 3]),
"dataset2": Tensor([-1, -2, -3]),
}

# When
d2v_hpo_dataset = EncoderHpoDataset(datasets, landmarkers)

# Then
assert d2v_hpo_dataset.dataset_names == ["dataset1", "dataset2"]


def test_d2v_hpo_dataset_returns_proper_data_on_index():
# Given
dataset1_X = Tensor([[1, 2, 3], [4, 5, 6]])
dataset1_y = Tensor([[0], [1]])
dataset2_X = Tensor([[7, 8, 9, 10], [4, 5, 6, 11]])
dataset2_y = Tensor([[1], [0]])
datasets = {
"dataset1": (dataset1_X, dataset1_y),
"dataset2": (dataset2_X, dataset2_y),
}
landmarkers = {
"dataset1": Tensor([1, 2, 3]),
"dataset2": Tensor([-1, -2, -3]),
}

# When
d2v_hpo_dataset = EncoderHpoDataset(datasets, landmarkers)
actual_dataset_X, actual_dataset_y, actual_landmarkers = d2v_hpo_dataset[
"dataset2"
]

# Then
assert (actual_dataset_X == dataset2_X).all()
assert (actual_dataset_y == dataset2_y).all()
assert (actual_landmarkers == Tensor([-1, -2, -3])).all()


def test_d2v_hpo_dataset_fail_when_inconsistent_data_sizes():
# Given
dataset1_X = Tensor([[1, 2, 3], [4, 5, 6]])
dataset1_y = Tensor([[0], [1]])
datasets = {
"dataset1": (dataset1_X, dataset1_y),
}
landmarkers = {
"dataset1": Tensor([1, 2, 3]),
"dataset2": Tensor([-1, -2, -3]),
}

# Then
with pytest.raises(AssertionError):
EncoderHpoDataset(datasets, landmarkers)
87 changes: 87 additions & 0 deletions test/wsmf/metamodels/data/test_landmarker_reconstruction.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
from unittest.mock import Mock, patch

from torch import Tensor

from wsmf.metamodels.data import (
EncoderHpoDataset,
LandmarkerReconstructionLoader,
)


@patch("numpy.random.choice")
def test_landmarker_reconstruction_loader_returns_proper_sample(
choice_mock: Mock,
):
# Given
choice_mock.return_value = [0, 1]
dataset1_X = Tensor([[1, 2, 3], [4, 5, 6]])
dataset1_y = Tensor([[0], [1]])
datasets = {
"dataset1": (dataset1_X, dataset1_y),
}
landmarkers = {
"dataset1": Tensor([1, 2, 3]),
}

# When
d2v_hpo_dataset = EncoderHpoDataset(datasets, landmarkers)
dataloader = LandmarkerReconstructionLoader(d2v_hpo_dataset, 1)
sample = list(dataloader)[0][0]

# Then
assert (sample[0] == dataset1_X).all()
assert (sample[1] == dataset1_y).all()
assert (sample[2] == landmarkers["dataset1"]).all()


def test_landmarker_reconstruction_loader_returns_proper_batch_size():
# Given
dataset1_X = Tensor([[1, 2, 3], [4, 5, 6]])
dataset1_y = Tensor([[0], [1]])
dataset2_X = Tensor([[7, 8, 9, 10], [4, 5, 6, 11]])
dataset2_y = Tensor([[1], [0]])
datasets = {
"dataset1": (dataset1_X, dataset1_y),
"dataset2": (dataset2_X, dataset2_y),
}
landmarkers = {
"dataset1": Tensor([1, 2, 3]),
"dataset2": Tensor([-1, -2, -3]),
}

# When
d2v_hpo_dataset = EncoderHpoDataset(datasets, landmarkers)
dataloader = LandmarkerReconstructionLoader(d2v_hpo_dataset, 2)
batch = list(dataloader)[0]

# Then
assert len(batch) == 2


def test_landmarker_reconstruction_loader_returns_all_datasets():
# Given
dataset1_X = Tensor([[1, 2, 3], [4, 5, 6]])
dataset1_y = Tensor([[0], [1]])
dataset2_X = Tensor([[7, 8, 9, 10], [4, 5, 6, 11]])
dataset2_y = Tensor([[1], [0]])
datasets = {
"dataset1": (dataset1_X, dataset1_y),
"dataset2": (dataset2_X, dataset2_y),
}
landmarkers = {
"dataset1": Tensor([1, 2, 3]),
"dataset2": Tensor([-1, -2, -3]),
}

# When
d2v_hpo_dataset = EncoderHpoDataset(datasets, landmarkers)
dataloader = LandmarkerReconstructionLoader(d2v_hpo_dataset, 1)
returned_datasets = list(dataloader)

# Then
assert (returned_datasets[0][0][0] == dataset1_X).all()
assert (returned_datasets[0][0][1] == dataset1_y).all()
assert (returned_datasets[0][0][2] == landmarkers["dataset1"]).all()
assert (returned_datasets[1][0][0] == dataset2_X).all()
assert (returned_datasets[1][0][1] == dataset2_y).all()
assert (returned_datasets[1][0][2] == landmarkers["dataset2"]).all()
88 changes: 88 additions & 0 deletions test/wsmf/metamodels/data/test_metric_loader.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
from unittest.mock import Mock, patch

import numpy as np
from torch import Tensor

from wsmf.metamodels.data import EncoderHpoDataset, EncoderMetricLearningLoader


@patch("numpy.random.choice")
def test_encoder_metric_loader_calculates_sample_properly(choice_mock: Mock):
# Given
choice_mock.return_value = [0, 1]
dataset1_X = Tensor([[1, 2, 3], [4, 5, 6]])
dataset1_y = Tensor([[0], [1]])
dataset2_X = Tensor([[7, 8, 9, 10], [4, 5, 6, 11]])
dataset2_y = Tensor([[1], [0]])
dataset3_X = Tensor([[7, 8, 9, 10, 11], [4, 5, 6, 11, 12]])
dataset3_y = Tensor([[1], [0], [1]])
datasets = {
"dataset1": (dataset1_X, dataset1_y),
"dataset2": (dataset2_X, dataset2_y),
"dataset3": (dataset3_X, dataset3_y),
}
landmarkers = {
"dataset1": Tensor([1, 2, 3]),
"dataset2": Tensor([-1, -2, -3]),
"dataset3": Tensor([-1, -1, -1]),
}

# When
d2v_hpo_dataset = EncoderHpoDataset(datasets, landmarkers)
dataloader = EncoderMetricLearningLoader(d2v_hpo_dataset, 1, 1)
sample = list(dataloader)[0][0]

# Then
assert (sample[0] == dataset1_X).all()
assert (sample[1] == dataset1_y).all()
assert (sample[2] == dataset2_X).all()
assert (sample[3] == dataset2_y).all()
assert np.isclose(sample[4], 56 / 3)


def test_encoder_metric_loader_returns_proper_number_of_batches():
# Given
dataset1_X = Tensor([[1, 2, 3], [4, 5, 6]])
dataset1_y = Tensor([[0], [1]])
dataset2_X = Tensor([[7, 8, 9, 10], [4, 5, 6, 11]])
dataset2_y = Tensor([[1], [0]])
datasets = {
"dataset1": (dataset1_X, dataset1_y),
"dataset2": (dataset2_X, dataset2_y),
}
landmarkers = {
"dataset1": Tensor([1, 2, 3]),
"dataset2": Tensor([-1, -2, -3]),
}

# When
d2v_hpo_dataset = EncoderHpoDataset(datasets, landmarkers)
dataloader = EncoderMetricLearningLoader(d2v_hpo_dataset, 16, 32)
batches = list(dataloader)

# Then
assert len(batches) == 16


def test_encoder_metric_loader_returns_batch_with_proper_size():
# Given
dataset1_X = Tensor([[1, 2, 3], [4, 5, 6]])
dataset1_y = Tensor([[0], [1]])
dataset2_X = Tensor([[7, 8, 9, 10], [4, 5, 6, 11]])
dataset2_y = Tensor([[1], [0]])
datasets = {
"dataset1": (dataset1_X, dataset1_y),
"dataset2": (dataset2_X, dataset2_y),
}
landmarkers = {
"dataset1": Tensor([1, 2, 3]),
"dataset2": Tensor([-1, -2, -3]),
}

# When
d2v_hpo_dataset = EncoderHpoDataset(datasets, landmarkers)
dataloader = EncoderMetricLearningLoader(d2v_hpo_dataset, 16, 32)
batch = list(dataloader)[0]

# Then
assert len(batch) == 32
42 changes: 42 additions & 0 deletions test/wsmf/metamodels/data/test_repeatable.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
from torch import Tensor

from wsmf.metamodels.data import (
EncoderHpoDataset,
EncoderMetricLearningLoader,
GenericRepeatableD2vLoader,
)


def test_encoder_metric_loader_calculates_sample_properly():
# Given
dataset1_X = Tensor([[1, 2, 3], [4, 5, 6]])
dataset1_y = Tensor([[0], [1]])
dataset2_X = Tensor([[7, 8, 9, 10], [4, 5, 6, 11]])
dataset2_y = Tensor([[1], [0]])
dataset3_X = Tensor([[7, 8, 9, 10, 11], [4, 5, 6, 11, 12]])
dataset3_y = Tensor([[1], [0], [1]])
datasets = {
"dataset1": (dataset1_X, dataset1_y),
"dataset2": (dataset2_X, dataset2_y),
"dataset3": (dataset3_X, dataset3_y),
}
landmarkers = {
"dataset1": Tensor([1, 2, 3]),
"dataset2": Tensor([-1, -2, -3]),
"dataset3": Tensor([-1, -1, -1]),
}

# When
d2v_hpo_dataset = EncoderHpoDataset(datasets, landmarkers)
dataloader = EncoderMetricLearningLoader(d2v_hpo_dataset, 2, 1)
repeatable_loader = GenericRepeatableD2vLoader(dataloader)
batches1 = list(repeatable_loader)
batches2 = list(repeatable_loader)

# Then
assert (batches1[0][0][0] == batches2[0][0][0]).all()
assert (batches1[0][0][1] == batches2[0][0][1]).all()
assert (batches1[0][0][2] == batches2[0][0][2]).all()
assert (batches1[1][0][0] == batches2[1][0][0]).all()
assert (batches1[1][0][1] == batches2[1][0][1]).all()
assert (batches1[1][0][2] == batches2[1][0][2]).all()
19 changes: 19 additions & 0 deletions test/wsmf/metamodels/networks/test_d2v_metric.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
from unittest.mock import Mock, patch

from torch import Tensor, rand

from wsmf.metamodels.networks import Dataset2VecMetricLearning


@patch("dataset2vec.Dataset2Vec.forward")
def test_d2v_metric_forward(dataset2vec_mock: Mock):
# Given
dataset2vec_mock.return_value = Tensor([1.0, 2.0, 3.0])
meta_model = Dataset2VecMetricLearning()
X, y = rand((10, 5)), rand(10, 1)

# When
actual_encoding = meta_model(X, y)

# Then
assert (actual_encoding == Tensor([1.0, 2.0, 3.0])).all()
39 changes: 39 additions & 0 deletions test/wsmf/metamodels/networks/test_d2v_reconstruction.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
from unittest.mock import Mock

from torch import Size, Tensor, rand

from wsmf.metamodels.networks import Dataset2VecForLandmarkerReconstruction


def test_meta_model_returns_output_of_proper_dimensionality():
# Given
meta_model = Dataset2VecForLandmarkerReconstruction(4)
X, y = rand((10, 5)), rand((10, 1))

# When
reconstructed_landmarkers = meta_model(X, y)

# Then
assert reconstructed_landmarkers.shape == Size([4])


def test_meta_model_uses_reconstructor():
# Given
meta_model = Dataset2VecForLandmarkerReconstruction(3)
encoder_mock = Mock(return_value=Tensor([1, 2, 3]))
meta_model.dataset2vec.forward = encoder_mock
reconstructor_mock = Mock(return_value=Tensor([4, 5, 6]))
meta_model.landmarker_reconstructor.forward = reconstructor_mock
X, y = rand((10, 5)), rand((10, 1))

# When
reconstructed_landmarkers = meta_model(X, y)

# Then
encoder_calls = encoder_mock.call_args
reconstructor_calls = reconstructor_mock.call_args

assert (reconstructed_landmarkers == Tensor([4, 5, 6])).all()
assert (encoder_calls[0][0] == X).all()
assert (encoder_calls[0][1] == y).all()
assert (reconstructor_calls[0][0] == Tensor([1, 2, 3])).all()
Loading

0 comments on commit d75e800

Please sign in to comment.