From 2eac7f48a28b8438140ab11da1225fa867a57b1f Mon Sep 17 00:00:00 2001
From: Brian Chen <ToucheSir@users.noreply.github.com>
Date: Mon, 11 Dec 2023 20:22:13 -0800
Subject: [PATCH] Add GPU path, fast path, parallelism and 1.6

---
 .buildkite/pipeline.yml  |   4 +-
 .github/workflows/CI.yml |  10 +-
 test/convnet_tests.jl    | 225 ++++++++++++++++++++-------------------
 test/mixer_tests.jl      |  27 ++---
 test/model_tests.jl      |  21 ++--
 test/runtests.jl         |   6 +-
 test/vit_tests.jl        |   5 +-
 7 files changed, 162 insertions(+), 136 deletions(-)

diff --git a/.buildkite/pipeline.yml b/.buildkite/pipeline.yml
index 5471c98e..acae7852 100644
--- a/.buildkite/pipeline.yml
+++ b/.buildkite/pipeline.yml
@@ -19,7 +19,9 @@ steps:
             Pkg.rm("ReTestItems") # not compatible with 1.6
           end'
     env:
-      GROUP: "CUDA" # TODO there are zero tests under this group
+      GROUP: "All"
+      TEST_FAST: true
+      TEST_WORKERS: 4
     if: build.message !~ /\[skip tests\]/
     timeout_in_minutes: 180
     matrix:
diff --git a/.github/workflows/CI.yml b/.github/workflows/CI.yml
index a5ac46ac..33105f52 100644
--- a/.github/workflows/CI.yml
+++ b/.github/workflows/CI.yml
@@ -26,11 +26,13 @@ jobs:
     runs-on: ${{ matrix.os }}
     env:
       GROUP: ${{ matrix.suite }}
+      TEST_FAST: ${{ matrix.version != '1' || matrix.os != 'ubuntu-latest' }}
     strategy:
       fail-fast: false
       matrix:
         version:
-          - '1' # Replace this with the minimum Julia version that your package supports.
+          - '1.6' # Replace this with the minimum Julia version that your package supports.
+          - '1'
           - 'nightly'
         os:
           - ubuntu-latest
@@ -61,13 +63,15 @@ jobs:
       - uses: julia-actions/cache@v1
       - uses: julia-actions/julia-buildpkg@v1
 
+      - name: Setup test env for 1.6
+        if: ${{ matrix.version == '1.6' }}
+        run: |
+          julia --color=yes --depwarn=yes --project=./test -e 'using Pkg; Pkg.rm("ReTestItems")'
       - name: Run tests
         uses: julia-actions/julia-runtest@v1
         continue-on-error: ${{ !(matrix.version == '1' && matrix.os == 'ubuntu-latest') && matrix.version == 'nightly' }}
         with:
           coverage: ${{ matrix.version == '1' && matrix.os == 'ubuntu-latest' }}
-        # run: |
-        #   julia --color=yes --depwarn=yes --project=./test -e 'include("test/retest.jl"); retest(${{ matrix.suite }})'
       - uses: actions/upload-artifact@v3
         with:
           name: coverage-${{ hashFiles('**/*.cov') }}
diff --git a/test/convnet_tests.jl b/test/convnet_tests.jl
index fb7acb9d..49069195 100644
--- a/test/convnet_tests.jl
+++ b/test/convnet_tests.jl
@@ -1,5 +1,5 @@
 @testitem "AlexNet" setup=[TestModels] begin
-    model = AlexNet()
+    model = AlexNet() |> gpu
     @test size(model(x_256)) == (1000, 1)
     @test_throws ArgumentError AlexNet(pretrain = true)
     @test gradtest(model, x_256)
@@ -7,11 +7,12 @@
 end
 
 @testitem "VGG" setup=[TestModels] begin
-    @testset "VGG($sz, batchnorm=$bn)" for sz in [11, 13, 16, 19], bn in [true, false]
-        m = VGG(sz, batchnorm = bn)
+    sizes = TEST_FAST ? [11] : [11, 13, 16, 19]
+    @testset "VGG($sz, batchnorm=$bn)" for sz in sizes, bn in [true, false]
+        m = VGG(sz; batchnorm = bn) |> gpu
         @test size(m(x_224)) == (1000, 1)
         if (VGG, sz, bn) in PRETRAINED_MODELS
-            @test acctest(VGG(sz, batchnorm = bn, pretrain = true))
+            @test acctest(VGG(sz; batchnorm = bn, pretrain = true))
         else
             @test_throws ArgumentError VGG(sz, batchnorm = bn, pretrain = true)
         end
@@ -22,11 +23,12 @@ end
 
 @testitem "ResNet" setup=[TestModels] begin
     # Tests for pretrained ResNets
-    @testset "ResNet($sz)" for sz in [18, 34, 50, 101, 152]
-        m = ResNet(sz)
+    sizes = TEST_FAST ? [18] : [18, 34, 50, 101, 152]
+    @testset "ResNet($sz)" for sz in sizes
+        m = ResNet(sz) |> gpu
         @test size(m(x_224)) == (1000, 1)
         if (ResNet, sz) in PRETRAINED_MODELS
-            @test acctest(ResNet(sz, pretrain = true))
+            @test acctest(ResNet(sz; pretrain = true))
         else
             @test_throws ArgumentError ResNet(sz, pretrain = true)
         end
@@ -38,7 +40,7 @@ end
                 [2, 2, 2, 2],
                 [3, 4, 6, 3],
                 [3, 4, 23, 3],
-                [3, 8, 36, 3]
+                [3, 8, 36, 3],
             ]
             @testset for layers in layer_list
                 drop_list = [
@@ -57,15 +59,15 @@ end
     end
 end
 
-
 @testitem "WideResNet" setup=[TestModels] begin
-    @testset "WideResNet($sz)" for sz in [50, 101]
-        m = WideResNet(sz)
+    sizes = TEST_FAST ? [50] : [50, 101]
+    @testset "WideResNet($sz)" for sz in sizes
+        m = WideResNet(sz) |> gpu
         @test size(m(x_224)) == (1000, 1)
         @test gradtest(m, x_224)
         _gc()
         if (WideResNet, sz) in PRETRAINED_MODELS
-            @test acctest(WideResNet(sz, pretrain = true))
+            @test acctest(WideResNet(sz; pretrain = true))
         else
             @test_throws ArgumentError WideResNet(sz, pretrain = true)
         end
@@ -73,29 +75,32 @@ end
 end
 
 @testitem "ResNeXt" setup=[TestModels] begin
-    @testset for depth in [50, 101, 152]
-        @testset for cardinality in [32, 64]
-            @testset for base_width in [4, 8]
-                m = ResNeXt(depth; cardinality, base_width)
-                @test size(m(x_224)) == (1000, 1)
-                if (ResNeXt, depth, cardinality, base_width) in PRETRAINED_MODELS
-                    @test acctest(ResNeXt(depth; cardinality, base_width, pretrain = true))
-                else
-                    @test_throws ArgumentError ResNeXt(depth; cardinality, base_width, pretrain = true)
-                end
-                @test gradtest(m, x_224)
-                _gc()
-            end
+    depths = TEST_FAST ? [50] : [50, 101, 152]
+    cardinalities = TEST_FAST ? [32] : [32, 64]
+    base_widths = TEST_FAST ? [4] : [4, 8]
+    @testset for depth in depths, cardinality in cardinalities, base_width in base_widths
+        m = ResNeXt(depth; cardinality, base_width) |> gpu
+        @test size(m(x_224)) == (1000, 1)
+        if (ResNeXt, depth, cardinality, base_width) in PRETRAINED_MODELS
+            @test acctest(ResNeXt(depth; cardinality, base_width, pretrain = true))
+        else
+            @test_throws ArgumentError ResNeXt(depth;
+                cardinality,
+                base_width,
+                pretrain = true)
         end
+        @test gradtest(m, x_224)
+        _gc()
     end
 end
 
 @testitem "SEResNet" setup=[TestModels] begin
-    @testset for depth in [18, 34, 50, 101, 152]
-        m = SEResNet(depth)
+    depths = TEST_FAST ? [18] : [18, 34, 50, 101, 152]
+    @testset for depth in depths
+        m = SEResNet(depth) |> gpu
         @test size(m(x_224)) == (1000, 1)
         if (SEResNet, depth) in PRETRAINED_MODELS
-            @test acctest(SEResNet(depth, pretrain = true))
+            @test acctest(SEResNet(depth; pretrain = true))
         else
             @test_throws ArgumentError SEResNet(depth, pretrain = true)
         end
@@ -105,26 +110,26 @@ end
 end
 
 @testitem "SEResNeXt" setup=[TestModels] begin
-    @testset for depth in [50, 101, 152]
-        @testset for cardinality in [32, 64]
-            @testset for base_width in [4, 8]
-                m = SEResNeXt(depth; cardinality, base_width)
-                @test size(m(x_224)) == (1000, 1)
-                if (SEResNeXt, depth, cardinality, base_width) in PRETRAINED_MODELS
-                    @test acctest(SEResNeXt(depth, pretrain = true))
-                else
-                    @test_throws ArgumentError SEResNeXt(depth, pretrain = true)
-                end
-                @test gradtest(m, x_224)
-                _gc()
-            end
+    depths = TEST_FAST ? [50] : [50, 101, 152]
+    cardinalities = TEST_FAST ? [32] : [32, 64]
+    base_widths = TEST_FAST ? [4] : [4, 8]
+    @testset for depth in depths, cardinality in cardinalities, base_width in base_widths
+        m = SEResNeXt(depth; cardinality, base_width) |> gpu
+        @test size(m(x_224)) == (1000, 1)
+        if (SEResNeXt, depth, cardinality, base_width) in PRETRAINED_MODELS
+            @test acctest(SEResNeXt(depth; pretrain = true))
+        else
+            @test_throws ArgumentError SEResNeXt(depth, pretrain = true)
         end
+        @test gradtest(m, x_224)
+        _gc()
     end
 end
 
 @testitem "Res2Net" setup=[TestModels] begin
-    @testset for (base_width, scale) in [(26, 4), (48, 2), (14, 8), (26, 6), (26, 8)]
-        m = Res2Net(50; base_width, scale)
+    configs = TEST_FAST ? [(26, 4)] : [(26, 4), (48, 2), (14, 8), (26, 6), (26, 8)]
+    @testset for (base_width, scale) in configs
+        m = Res2Net(50; base_width, scale) |> gpu
         @test size(m(x_224)) == (1000, 1)
         if (Res2Net, 50, base_width, scale) in PRETRAINED_MODELS
             @test acctest(Res2Net(50; base_width, scale, pretrain = true))
@@ -134,25 +139,28 @@ end
         @test gradtest(m, x_224)
         _gc()
     end
-    @testset for (base_width, scale) in [(26, 4)]
-        m = Res2Net(101; base_width, scale)
-        @test size(m(x_224)) == (1000, 1)
-        if (Res2Net, 101, base_width, scale) in PRETRAINED_MODELS
-            @test acctest(Res2Net(101; base_width, scale, pretrain = true))
-        else
-            @test_throws ArgumentError Res2Net(101; base_width, scale, pretrain = true)
+
+    if !TEST_FAST
+        @testset for (base_width, scale) in [(26, 4)]
+            m = Res2Net(101; base_width, scale) |> gpu
+            @test size(m(x_224)) == (1000, 1)
+            if (Res2Net, 101, base_width, scale) in PRETRAINED_MODELS
+                @test acctest(Res2Net(101; base_width, scale, pretrain = true))
+            else
+                @test_throws ArgumentError Res2Net(101; base_width, scale, pretrain = true)
+            end
+            @test gradtest(m, x_224)
+            _gc()
         end
-        @test gradtest(m, x_224)
-        _gc()
     end
 end
 
 @testitem "Res2NeXt" setup=[TestModels] begin
     @testset for depth in [50, 101]
-        m = Res2NeXt(depth)
+        m = Res2NeXt(depth) |> gpu
         @test size(m(x_224)) == (1000, 1)
         if (Res2NeXt, depth) in PRETRAINED_MODELS
-            @test acctest(Res2NeXt(depth, pretrain = true))
+            @test acctest(Res2NeXt(depth; pretrain = true))
         else
             @test_throws ArgumentError Res2NeXt(depth, pretrain = true)
         end
@@ -162,14 +170,15 @@ end
 end
 
 @testitem "EfficientNet" setup=[TestModels] begin
-    @testset "EfficientNet($config)" for config in [:b0, :b1, :b2, :b3, :b4, :b5,] #:b6, :b7, :b8]
+    config = TEST_FAST ? [:b0] : [:b0, :b1, :b2, :b3, :b4, :b5] #:b6, :b7, :b8]
+    @testset "EfficientNet($config)" for config in configs
         # preferred image resolution scaling
         r = Metalhead.EFFICIENTNET_GLOBAL_CONFIGS[config][1]
-        x = rand(Float32, r, r, 3, 1)
-        m = EfficientNet(config)
+        x = rand(Float32, r, r, 3, 1) |> gpu
+        m = EfficientNet(config) |> gpu
         @test size(m(x)) == (1000, 1)
         if (EfficientNet, config) in PRETRAINED_MODELS
-            @test acctest(EfficientNet(config, pretrain = true))
+            @test acctest(EfficientNet(config; pretrain = true))
         else
             @test_throws ArgumentError EfficientNet(config, pretrain = true)
         end
@@ -179,11 +188,12 @@ end
 end
 
 @testitem "EfficientNetv2" setup=[TestModels] begin
-    @testset for config in [:small, :medium, :large] # :xlarge]
-        m = EfficientNetv2(config)
+    configs = TEST_FAST ? [:small] : [:small, :medium, :large] # :xlarge]
+    @testset for config in configs
+        m = EfficientNetv2(config) |> gpu
         @test size(m(x_224)) == (1000, 1)
         if (EfficientNetv2, config) in PRETRAINED_MODELS
-            @test acctest(EfficientNetv2(config, pretrain = true))
+            @test acctest(EfficientNetv2(config; pretrain = true))
         else
             @test_throws ArgumentError EfficientNetv2(config, pretrain = true)
         end
@@ -194,10 +204,10 @@ end
 
 @testitem "GoogLeNet" setup=[TestModels] begin
     @testset for bn in [true, false]
-        m = GoogLeNet(batchnorm = bn)
+        m = GoogLeNet(; batchnorm = bn) |> gpu
         @test size(m(x_224)) == (1000, 1)
         if (GoogLeNet, bn) in PRETRAINED_MODELS
-            @test acctest(GoogLeNet(batchnorm = bn, pretrain = true))
+            @test acctest(GoogLeNet(; batchnorm = bn, pretrain = true))
         else
             @test_throws ArgumentError GoogLeNet(batchnorm = bn, pretrain = true)
         end
@@ -207,12 +217,12 @@ end
 end
 
 @testitem "Inception" setup=[TestModels] begin
-    x_299 = rand(Float32, 299, 299, 3, 2)
+    x_299 = rand(Float32, 299, 299, 3, 2) |> gpu
     @testset "$Model" for Model in [Inceptionv3, Inceptionv4, InceptionResNetv2, Xception]
-        m = Model()
+        m = Model() |> gpu
         @test size(m(x_299)) == (1000, 2)
         if Model in PRETRAINED_MODELS
-            @test acctest(Model(pretrain = true))
+            @test acctest(Model(; pretrain = true))
         else
             @test_throws ArgumentError Model(pretrain = true)
         end
@@ -222,10 +232,10 @@ end
 end
 
 @testitem "SqueezeNet" setup=[TestModels] begin
-    m = SqueezeNet()
+    m = SqueezeNet() |> gpu
     @test size(m(x_224)) == (1000, 1)
     if SqueezeNet in PRETRAINED_MODELS
-        @test acctest(SqueezeNet(pretrain = true))
+        @test acctest(SqueezeNet(; pretrain = true))
     else
         @test_throws ArgumentError SqueezeNet(pretrain = true)
     end
@@ -234,11 +244,12 @@ end
 end
 
 @testitem "DenseNet" setup=[TestModels] begin
-    @testset for sz in [121, 161, 169, 201]
-        m = DenseNet(sz)
+    sizes = TEST_FAST ? [121] : [121, 161, 169, 201]
+    @testset for sz in sizes
+        m = DenseNet(sz) |> gpu
         @test size(m(x_224)) == (1000, 1)
         if (DenseNet, sz) in PRETRAINED_MODELS
-            @test acctest(DenseNet(sz, pretrain = true))
+            @test acctest(DenseNet(sz; pretrain = true))
         else
             @test_throws ArgumentError DenseNet(sz, pretrain = true)
         end
@@ -248,16 +259,16 @@ end
 end
 
 @testsetup module TestMobileNets
-    export WIDTH_MULTS
-    const WIDTH_MULTS = [0.5, 0.75, 1.0, 1.3]
+export WIDTH_MULTS
+const WIDTH_MULTS = get(ENV, "FAST_TEST", "false") == "true" ? [0.5] : [0.5, 0.75, 1.0, 1.3]
 end
 
-@testitem "MobileNetsV1" setup=[TestModels, TestMobileNets] begin
+@testitem "MobileNetV1" setup=[TestModels, TestMobileNets] begin
     @testset for width_mult in WIDTH_MULTS
-        m = MobileNetv1(width_mult)
+        m = MobileNetv1(width_mult) |> gpu
         @test size(m(x_224)) == (1000, 1)
         if (MobileNetv1, width_mult) in PRETRAINED_MODELS
-            @test acctest(MobileNetv1(pretrain = true))
+            @test acctest(MobileNetv1(; pretrain = true))
         else
             @test_throws ArgumentError MobileNetv1(pretrain = true)
         end
@@ -267,11 +278,11 @@ end
 end
 
 @testitem "MobileNetv2" setup=[TestModels, TestMobileNets] begin
-    @testset for width_mult in WIDTH_MULTS 
-        m = MobileNetv2(width_mult)
+    @testset for width_mult in WIDTH_MULTS
+        m = MobileNetv2(width_mult) |> gpu
         @test size(m(x_224)) == (1000, 1)
         if (MobileNetv2, width_mult) in PRETRAINED_MODELS
-            @test acctest(MobileNetv2(pretrain = true))
+            @test acctest(MobileNetv2(; pretrain = true))
         else
             @test_throws ArgumentError MobileNetv2(pretrain = true)
         end
@@ -279,42 +290,39 @@ end
     end
 end
 
-
 @testitem "MobileNetv3" setup=[TestModels, TestMobileNets] begin
-    @testset for width_mult in WIDTH_MULTS
-        @testset for config in [:small, :large]
-            m = MobileNetv3(config; width_mult)
-            @test size(m(x_224)) == (1000, 1)
-            if (MobileNetv3, config, width_mult) in PRETRAINED_MODELS
-                @test acctest(MobileNetv3(config; pretrain = true))
-            else
-                @test_throws ArgumentError MobileNetv3(config; pretrain = true)
-            end
-            @test gradtest(m, x_224)
-            _gc()
+    configs = TEST_FAST ? [:small] : [:small, :large]
+    @testset for width_mult in WIDTH_MULTS, config in configs
+        m = MobileNetv3(config; width_mult) |> gpu
+        @test size(m(x_224)) == (1000, 1)
+        if (MobileNetv3, config, width_mult) in PRETRAINED_MODELS
+            @test acctest(MobileNetv3(config; pretrain = true))
+        else
+            @test_throws ArgumentError MobileNetv3(config; pretrain = true)
         end
+        @test gradtest(m, x_224)
+        _gc()
     end
 end
 
 @testitem "MNASNet" setup=[TestModels, TestMobileNets] begin
-    @testset for width_mult in WIDTH_MULTS
-        @testset for config in [:A1, :B1]
-            m = MNASNet(config; width_mult)
-            @test size(m(x_224)) == (1000, 1)
-            if (MNASNet, config, width_mult) in PRETRAINED_MODELS
-                @test acctest(MNASNet(config; pretrain = true))
-            else
-                @test_throws ArgumentError MNASNet(config; pretrain = true)
-            end
-            @test gradtest(m, x_224)
-            _gc()
+    @testset for width_mult in WIDTH_MULTS, config in [:A1, :B1]
+        m = MNASNet(config; width_mult) |> gpu
+        @test size(m(x_224)) == (1000, 1)
+        if (MNASNet, config, width_mult) in PRETRAINED_MODELS
+            @test acctest(MNASNet(config; pretrain = true))
+        else
+            @test_throws ArgumentError MNASNet(config; pretrain = true)
         end
+        @test gradtest(m, x_224)
+        _gc()
     end
 end
 
 @testitem "ConvNeXt" setup=[TestModels] begin
-    @testset for config in [:small, :base, :large, :tiny, :xlarge]
-        m = ConvNeXt(config)
+    configs = TEST_FAST ? [:small] : [:small, :base, :large, :tiny, :xlarge]
+    @testset for config in configs
+        m = ConvNeXt(config) |> gpu
         @test size(m(x_224)) == (1000, 1)
         @test gradtest(m, x_224)
         _gc()
@@ -322,8 +330,9 @@ end
 end
 
 @testitem "ConvMixer" setup=[TestModels] begin
-    @testset for config in [:small, :base, :large]
-        m = ConvMixer(config)
+    configs = TEST_FAST ? [:small] : [:small, :base, :large]
+    @testset for config in configs
+        m = ConvMixer(config) |> gpu
         @test size(m(x_224)) == (1000, 1)
         @test gradtest(m, x_224)
         _gc()
@@ -332,11 +341,11 @@ end
 
 @testitem "UNet" setup=[TestModels] begin
     encoder = Metalhead.backbone(ResNet(18))
-    model = UNet((256, 256), 3, 10, encoder)
+    model = UNet((256, 256), 3, 10, encoder) |> gpu
     @test size(model(x_256)) == (256, 256, 10, 1)
     @test gradtest(model, x_256)
 
-    model = UNet()
+    model = UNet() |> gpu
     @test size(model(x_256)) == (256, 256, 3, 1)
     _gc()
 end
diff --git a/test/mixer_tests.jl b/test/mixer_tests.jl
index dd52d5aa..1a03f33f 100644
--- a/test/mixer_tests.jl
+++ b/test/mixer_tests.jl
@@ -1,26 +1,29 @@
 @testitem "MLP-Mixer" setup=[TestModels] begin
-    @testset for config in [:small, :base, :large]
-        m, x = MLPMixer(config), x_224
-        @test size(m(x)) == (1000, 1)
-        @test gradtest(m, x)
+    configs = TEST_FAST ? [:small] : [:small, :base, :large]
+    @testset for config in configs
+        m = MLPMixer(config) |> gpu
+        @test size(m(x_224)) == (1000, 1)
+        @test gradtest(m, x_224)
         _gc()
     end
 end
 
 @testitem "ResMLP" setup=[TestModels] begin
-    @testset for config in [:small, :base, :large]
-        m, x = ResMLP(config), x_224
-        @test size(m(x)) == (1000, 1)
-        @test gradtest(m, x)
+    configs = TEST_FAST ? [:small] : [:small, :base, :large]
+    @testset for config in configs
+        m = ResMLP(config) |> gpu
+        @test size(m(x_224)) == (1000, 1)
+        @test gradtest(m, x_224)
         _gc()
     end
 end
 
 @testitem "gMLP" setup=[TestModels] begin
-    @testset for config in [:small, :base, :large]
-        m, x = gMLP(config), x_224
-        @test size(m(x)) == (1000, 1)
-        @test gradtest(m, x)
+    configs = TEST_FAST ? [:small] : [:small, :base, :large]
+    @testset for config in configs
+        m = gMLP(config) |> gpu
+        @test size(m(x_224)) == (1000, 1)
+        @test gradtest(m, x_224)
         _gc()
     end
 end
\ No newline at end of file
diff --git a/test/model_tests.jl b/test/model_tests.jl
index 88690955..c50e6279 100644
--- a/test/model_tests.jl
+++ b/test/model_tests.jl
@@ -1,8 +1,9 @@
 @testsetup module TestModels
 using Metalhead, Images
-using Flux: Zygote
+using Flux: gradient, gpu
 
 export PRETRAINED_MODELS,
+    TEST_FAST,
     _gc,
     gradtest,
     normalize_imagenet,
@@ -12,7 +13,8 @@ export PRETRAINED_MODELS,
     TEST_LBLS,
     acctest,
     x_224,
-    x_256
+    x_256,
+    gpu
 
 const PRETRAINED_MODELS = [
     # (DenseNet, 121),
@@ -40,14 +42,15 @@ const PRETRAINED_MODELS = [
     (VGG, 19, false),
 ]
 
+const TEST_FAST = get(ENV, "FAST_TEST", "false") == "true"
+
 function _gc()
     GC.safepoint()
     return GC.gc(true)
 end
 
 function gradtest(model, input)
-    y, pb = Zygote.pullback(model, input)
-    pb(ones(Float32, size(y)))
+    gradient(model, input)
     # if we make it to here with no error, success!
     return true
 end
@@ -62,17 +65,19 @@ end
 const TEST_PATH = download("https://cdn.pixabay.com/photo/2015/05/07/11/02/guitar-756326_960_720.jpg")
 const TEST_IMG = imresize(Images.load(TEST_PATH), (224, 224))
 # CHW -> WHC
-const TEST_X = permutedims(convert(Array{Float32}, channelview(TEST_IMG)), (3, 2, 1)) |> normalize_imagenet
+const TEST_X = let img_array = convert(Array{Float32}, channelview(TEST_IMG))
+    permutedims(img_array, (3, 2, 1)) |> normalize_imagenet |> gpu
+end
 
 # ImageNet labels
 const TEST_LBLS = readlines(download("https://raw.githubusercontent.com/pytorch/hub/master/imagenet_classes.txt"))
 
 function acctest(model)
-    ypred = model(TEST_X) |> vec
+    ypred = gpu(model)(TEST_X) |> vec
     top5 = TEST_LBLS[sortperm(ypred; rev = true)]
     return "acoustic guitar" in top5
 end
 
-const x_224 = rand(Float32, 224, 224, 3, 1)
-const x_256 = rand(Float32, 256, 256, 3, 1)
+const x_224 = rand(Float32, 224, 224, 3, 1) |> gpu
+const x_256 = rand(Float32, 256, 256, 3, 1) |> gpu
 end
\ No newline at end of file
diff --git a/test/runtests.jl b/test/runtests.jl
index b4e3c626..6bd2e47b 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -7,7 +7,9 @@ const name_filter = test_group == "All" ? nothing : Regex(test_group)
 
 @static if VERSION >= v"1.7"
     using ReTestItems
-    runtests(Metalhead; name = name_filter)
+    verbose_results = get(ENV, "CI", "false") == "true"
+    nworkers = parse(Int, get(ENV, "TEST_WORKERS", "0"))
+    runtests(Metalhead; name = name_filter, verbose_results, nworkers)
 else
     using TestItemRunner
     function testitem_filter(ti)
@@ -17,5 +19,5 @@ end
 
 # Not sure why this needs to be split into a separate conditional...
 @static if VERSION < v"1.7"
-    @run_package_tests filter=testitem_filter
+    @run_package_tests filter = testitem_filter
 end
\ No newline at end of file
diff --git a/test/vit_tests.jl b/test/vit_tests.jl
index e38d1fb8..eb9969be 100644
--- a/test/vit_tests.jl
+++ b/test/vit_tests.jl
@@ -1,6 +1,7 @@
 @testitem "ViT" setup=[TestModels] begin
-    @testset for config in [:tiny, :small, :base, :large, :huge] # :giant, :gigantic]
-        m = ViT(config)
+    configs = TEST_FAST ? [:tiny] : [:tiny, :small, :base, :large, :huge] # :giant, :gigantic]
+    @testset for config in configs
+        m = ViT(config) |> gpu
         @test size(m(x_224)) == (1000, 1)
         @test gradtest(m, x_224)
         _gc()