Skip to content
This repository has been archived by the owner on Dec 16, 2022. It is now read-only.

Add GPU test workflow #48

Merged
merged 8 commits into from
May 12, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 18 additions & 0 deletions .github/workflows/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -84,3 +84,21 @@ jobs:
- name: Run pretrained tests
run: |
make docker-test-run DOCKER_TAG=$DOCKER_TAG ARGS='test-pretrained'

gpu_checks:
runs-on: [self-hosted, GPU]

steps:
- uses: actions/checkout@v2

- name: Set Docker tag
run: |
echo "::set-env name=DOCKER_TAG::$GITHUB_SHA";

- name: Build test image
run: |
make docker-test-image DOCKER_TAG=$DOCKER_TAG

- name: Run GPU tests
run: |
make docker-test-run DOCKER_TAG=$DOCKER_TAG ARGS='gpu-test'
1 change: 0 additions & 1 deletion Dockerfile.test
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@ FROM python:3.7
ENV LC_ALL=C.UTF-8
ENV LANG=C.UTF-8

ENV PATH /usr/local/nvidia/bin/:$PATH
ENV LD_LIBRARY_PATH /usr/local/nvidia/lib:/usr/local/nvidia/lib64

# Tell nvidia-docker the driver spec that we need as well as to
Expand Down
6 changes: 5 additions & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -15,12 +15,16 @@ format :

.PHONY : typecheck
typecheck :
mypy allennlp_models --ignore-missing-imports --no-strict-optional --no-site-packages
mypy allennlp_models tests --ignore-missing-imports --no-strict-optional --no-site-packages

.PHONY : test
test :
pytest --color=yes -rf --durations=40 -m "not pretrained_model_test"

.PHONY : gpu-test
gpu-test :
pytest --color=yes -v -rf -m gpu

.PHONY : test-with-cov
test-with-cov :
pytest --color=yes -rf --cov-config=.coveragerc --cov=allennlp_models/ --durations=40 -m "not pretrained_model_test"
Expand Down
2 changes: 2 additions & 0 deletions pytest.ini
Original file line number Diff line number Diff line change
@@ -1,10 +1,12 @@
[pytest]
testpaths = tests/
python_classes = Test* *Test
log_format = %(asctime)s - %(levelname)s - %(name)s - %(message)s
log_level = DEBUG
markers =
pretrained_model_test
java
gpu: marks tests that need at least one GPU
filterwarnings =
# Note: When a warning matches more than one option in the list,
# the action for the _last_ matching option is performed.
Expand Down
2 changes: 1 addition & 1 deletion tests/coref/coref_model_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ def _test_coref_model_can_train_save_and_load(
)
# fmt: on
self.ensure_model_can_train_save_and_load(self.param_file, overrides=overrides)
self.tearDown()
self.teardown_method()
self.setup_method()

def test_coref_bert_model_can_train_save_and_load(self):
Expand Down
15 changes: 0 additions & 15 deletions tests/rc/qanet/qanet_model_test.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@
import torch
import pytest
from flaky import flaky
import numpy
from numpy.testing import assert_almost_equal
Expand All @@ -8,9 +6,7 @@
from allennlp.common.testing import ModelTestCase
from allennlp.data import DatasetReader, Vocabulary
from allennlp.data import Batch
from allennlp.data import DataLoader
from allennlp.models import Model
from allennlp.training import Trainer

from tests import FIXTURES_ROOT

Expand Down Expand Up @@ -52,17 +48,6 @@ def test_forward_pass_runs_correctly(self):
def test_model_can_train_save_and_load(self):
self.ensure_model_can_train_save_and_load(self.param_file, tolerance=1e-4)

@pytest.mark.skipif(torch.cuda.device_count() < 2, reason="Need multiple GPUs.")
def test_multigpu_qanet(self):
params = Params.from_file(self.param_file)
vocab = Vocabulary.from_instances(self.instances)
model = Model.from_params(vocab=vocab, params=params["model"]).cuda()
optimizer = torch.optim.SGD(self.model.parameters(), 0.01, momentum=0.9)
self.instances.index_with(model.vocab)
loader = DataLoader(self.instances, batch_size=4)
trainer = Trainer(model, optimizer, loader, num_epochs=2, cuda_device=[0, 1])
trainer.train()

def test_batch_predictions_are_consistent(self):
# The same issue as the bidaf test case.
# The CNN encoder has problems with this kind of test - it's not properly masked yet, so
Expand Down
4 changes: 2 additions & 2 deletions tests/rc/qanet/stacked_self_attention_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@

import pytest

from allennlp.common.testing import AllenNlpTestCase
from allennlp.common.testing import AllenNlpTestCase, requires_multi_gpu

from allennlp_models.rc.qanet.stacked_self_attention import StackedSelfAttentionEncoder

Expand Down Expand Up @@ -37,7 +37,7 @@ def test_stacked_self_attention_can_run_foward(self):
encoder_output = encoder(inputs, None)
assert list(encoder_output.size()) == [3, 5, 12]

@pytest.mark.skipif(torch.cuda.device_count() < 2, reason="Need multiple GPUs.")
@requires_multi_gpu
def test_stacked_self_attention_can_run_foward_on_multiple_gpus(self):
encoder = StackedSelfAttentionEncoder(
input_dim=9,
Expand Down
4 changes: 2 additions & 2 deletions tests/syntax/srl/bert_srl_model_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,10 +31,10 @@ def setup_method(self):
FIXTURES_ROOT / "syntax" / "srl" / "conll_2012",
)

def tearDown(self):
def teardown_method(self):
self.monkeypatch.undo()
self.monkeypatch.undo()
super().tearDown()
super().teardown_method()

def test_bert_srl_model_can_train_save_and_load(self):
ignore_grads = {"bert_model.pooler.dense.weight", "bert_model.pooler.dense.bias"}
Expand Down