From 5faf79d23ac21861def8bb92c425fba89101ed1b Mon Sep 17 00:00:00 2001
From: Dennis Yatunin <dyatun@gmail.com>
Date: Fri, 21 Jul 2023 17:38:45 -0700
Subject: [PATCH] Add operator_matrix to MatrixFields

---
 .buildkite/pipeline.yml                       |  10 +
 src/MatrixFields/MatrixFields.jl              |   2 +
 src/MatrixFields/band_matrix_row.jl           |   9 +
 src/MatrixFields/lazy_operators.jl            |  84 ++
 src/MatrixFields/matrix_multiplication.jl     |   2 +-
 src/MatrixFields/operator_matrices.jl         | 845 ++++++++++++++++++
 src/Operators/finitedifference.jl             |   9 +-
 src/Operators/spectralelement.jl              |  10 +-
 .../MatrixFields/matrix_field_broadcasting.jl |  69 +-
 test/MatrixFields/matrix_field_test_utils.jl  |  89 ++
 test/MatrixFields/operator_matrices.jl        | 188 ++++
 test/runtests.jl                              |   1 +
 12 files changed, 1248 insertions(+), 70 deletions(-)
 create mode 100644 src/MatrixFields/lazy_operators.jl
 create mode 100644 src/MatrixFields/operator_matrices.jl
 create mode 100644 test/MatrixFields/matrix_field_test_utils.jl
 create mode 100644 test/MatrixFields/operator_matrices.jl

diff --git a/.buildkite/pipeline.yml b/.buildkite/pipeline.yml
index 2a48440d08..63cee89cd1 100755
--- a/.buildkite/pipeline.yml
+++ b/.buildkite/pipeline.yml
@@ -528,6 +528,16 @@ steps:
           slurm_gpus: 1
           slurm_mem: 20GB
 
+      - label: "Unit: operator matrices (CPU)"
+        key: unit_operator_matrices_cpu
+        command: "julia --color=yes --check-bounds=yes --project=test test/MatrixFields/operator_matrices.jl"
+
+      - label: "Unit: operator matrices (GPU)"
+        key: unit_operator_matrices_gpu
+        command: "julia --color=yes --project=test test/MatrixFields/operator_matrices.jl"
+        agents:
+          slurm_gpus: 1
+
   - group: "Unit: Hypsography"
     steps:
 
diff --git a/src/MatrixFields/MatrixFields.jl b/src/MatrixFields/MatrixFields.jl
index 2ca9ec15e0..e38c7b7fe4 100644
--- a/src/MatrixFields/MatrixFields.jl
+++ b/src/MatrixFields/MatrixFields.jl
@@ -52,6 +52,8 @@ include("band_matrix_row.jl")
 include("rmul_with_projection.jl")
 include("matrix_shape.jl")
 include("matrix_multiplication.jl")
+include("lazy_operators.jl")
+include("operator_matrices.jl")
 include("field2arrays.jl")
 
 const ColumnwiseBandMatrixField{V, S} = Fields.Field{
diff --git a/src/MatrixFields/band_matrix_row.jl b/src/MatrixFields/band_matrix_row.jl
index 114e37767a..ec23717f0a 100644
--- a/src/MatrixFields/band_matrix_row.jl
+++ b/src/MatrixFields/band_matrix_row.jl
@@ -12,6 +12,8 @@ several aliases for commonly used subtypes of `BandMatrixRow`:
 - `TridiagonalMatrixRow(entry_1, entry_2, entry_3)`
 - `QuaddiagonalMatrixRow(entry_1, entry_2, entry_3, entry_4)`
 - `PentadiagonalMatrixRow(entry_1, entry_2, entry_3, entry_4, entry_5)`
+It is also possible to construct band matrix rows from other band matrix rows,
+e.g., `QuaddiagonalMatrixRow(BidiagonalMatrixRow(entry_1, entry_2))`.
 """
 struct BandMatrixRow{ld, bw, T} # bw is the bandwidth (the number of diagonals)
     entries::NTuple{bw, T}
@@ -25,6 +27,13 @@ BandMatrixRow{ld}(entries::Vararg{Any, bw}) where {ld, bw} =
 BandMatrixRow{ld, bw}(entries::Vararg{Any, bw}) where {ld, bw} =
     BandMatrixRow{ld, bw, rpromote_type(map(typeof, entries)...)}(entries)
 
+BandMatrixRow{ld, bw}(row::BandMatrixRow) where {ld, bw} =
+    BandMatrixRow{ld, bw, eltype(row)}(row)
+BandMatrixRow{ld, bw, T}(row::BandMatrixRow) where {ld, bw, T} =
+    convert(BandMatrixRow{ld, bw, T}, row)
+
+BandMatrixRow{ld, 0}() where {ld} = BandMatrixRow{ld, 0, Nothing}(())
+
 const DiagonalMatrixRow{T} = BandMatrixRow{0, 1, T}
 const BidiagonalMatrixRow{T} = BandMatrixRow{-1 + half, 2, T}
 const TridiagonalMatrixRow{T} = BandMatrixRow{-1, 3, T}
diff --git a/src/MatrixFields/lazy_operators.jl b/src/MatrixFields/lazy_operators.jl
new file mode 100644
index 0000000000..4ae18c69f2
--- /dev/null
+++ b/src/MatrixFields/lazy_operators.jl
@@ -0,0 +1,84 @@
+abstract type AbstractLazyOperator end
+
+abstract type AbstractLazyOperatorStyle <: Base.Broadcast.BroadcastStyle end
+struct LazyOperatorStyle <: AbstractLazyOperatorStyle end
+struct LazyOperatorStyleWithSpaceAvailable <: AbstractLazyOperatorStyle end
+
+Base.Broadcast.broadcastable(lazy_op::AbstractLazyOperator) = lazy_op
+
+Base.Broadcast.BroadcastStyle(::Type{<:AbstractLazyOperator}) =
+    LazyOperatorStyle()
+
+# Broadcasting over an AbstractLazyOperator and either a Ref or a Tuple
+Base.Broadcast.BroadcastStyle(
+    style::AbstractLazyOperatorStyle,
+    ::Union{Base.Broadcast.AbstractArrayStyle{0}, Base.Broadcast.Style{Tuple}},
+) = style
+
+# Broadcasting over an AbstractLazyOperator and at least one Field
+Base.Broadcast.BroadcastStyle(
+    ::AbstractLazyOperatorStyle,
+    ::Fields.AbstractFieldStyle,
+) = LazyOperatorStyleWithSpaceAvailable()
+
+# Broadcasting over more than one AbstractLazyOperator
+Base.Broadcast.BroadcastStyle(
+    ::LazyOperatorStyle,
+    ::LazyOperatorStyleWithSpaceAvailable,
+) = LazyOperatorStyleWithSpaceAvailable()
+
+Base.Broadcast.materialize(::Base.Broadcast.Broadcasted{LazyOperatorStyle}) =
+    error("Cannot materialize lazy operator broadcast because it does not \
+           contain any Fields with spatial information")
+
+Base.Broadcast.materialize(
+    bc::Base.Broadcast.Broadcasted{LazyOperatorStyleWithSpaceAvailable},
+) = Base.Broadcast.materialize(remove_lazy_operators(largest_space(bc), bc))
+
+Base.Broadcast.materialize!(
+    ::AbstractLazyOperatorStyle,
+    dest::Fields.Field,
+    bc::Base.Broadcast.Broadcasted,
+) = Base.Broadcast.materialize!(dest, remove_lazy_operators(axes(dest), bc))
+
+remove_lazy_operators(_, arg) = arg
+remove_lazy_operators(space, bc::Base.Broadcast.Broadcasted) =
+    Base.Broadcast.broadcasted(
+        bc.f,
+        map(arg -> remove_lazy_operators(space, arg), bc.args)...,
+        # remove_lazy_operators_args(space, bc.args...)...,
+    )
+remove_lazy_operators(_, lazy_op::AbstractLazyOperator) =
+    error("Missing method: remove_lazy_operators(space, ::$(typeof(lazy_op)))")
+
+# remove_lazy_operators_args(_) = ()
+# remove_lazy_operators_args(space, arg, args...) = (
+#     remove_lazy_operators(space, arg),
+#     remove_lazy_operators_args(space, args...)...,
+# )
+
+largest_space(_) = nothing
+largest_space(field::Fields.Field) = axes(field)
+largest_space(bc::Base.Broadcast.Broadcasted) = largest_space(bc.args...)
+largest_space(arg, args...) =
+    larger_space(largest_space(arg), largest_space(args...))
+
+larger_space(space1, ::Nothing) = space1
+larger_space(::Nothing, space2) = space2
+larger_space(space1::S, ::S) where {S} = space1 # Neither space is larger.
+larger_space(
+    space1::Spaces.ExtrudedFiniteDifferenceSpace,
+    ::Spaces.ExtrudedFiniteDifferenceSpace,
+) = space1 # The staggering does not matter here, so neither space is larger.
+larger_space(
+    space1::Spaces.FiniteDifferenceSpace,
+    ::Spaces.FiniteDifferenceSpace,
+) = space1 # The staggering does not matter here, so neither space is larger.
+larger_space(
+    space1::Spaces.ExtrudedFiniteDifferenceSpace,
+    ::Spaces.AbstractSpectralElementSpace,
+) = space1 # The types indicate that space2 is a subspace of space1.
+larger_space(
+    ::Spaces.AbstractSpectralElementSpace,
+    space2::Spaces.ExtrudedFiniteDifferenceSpace,
+) = space2 # The types indicate that space1 is a subspace of space2.
diff --git a/src/MatrixFields/matrix_multiplication.jl b/src/MatrixFields/matrix_multiplication.jl
index 5ad6a5393c..b0fa506369 100644
--- a/src/MatrixFields/matrix_multiplication.jl
+++ b/src/MatrixFields/matrix_multiplication.jl
@@ -266,7 +266,7 @@ function multiply_matrix_at_index(
             if is_interior || ld1_or_boundary_ld1 <= d <= ud1_or_boundary_ud1
                 @inbounds Operators.getidx(space, matrix2, loc, idx + d, hidx)
             else
-                rzero(eltype(matrix2)) # This value is never used.
+                zero(eltype(matrix2)) # This value is never used.
             end
         end # The rows are precomputed to avoid recomputing them multiple times.
         matrix2_rows_wrapper = BandMatrixRow{ld1}(matrix2_rows...)
diff --git a/src/MatrixFields/operator_matrices.jl b/src/MatrixFields/operator_matrices.jl
new file mode 100644
index 0000000000..e3a53a6b81
--- /dev/null
+++ b/src/MatrixFields/operator_matrices.jl
@@ -0,0 +1,845 @@
+# Note: This list must be kept up-to-date with finitedifference.jl.
+const OneArgFDOperatorWithCenterInput = Union{
+    Operators.InterpolateC2F,
+    Operators.LeftBiasedC2F,
+    Operators.RightBiasedC2F,
+    Operators.GradientC2F,
+    Operators.DivergenceC2F,
+    Operators.CurlC2F,
+}
+const OneArgFDOperatorWithFaceInput = Union{
+    Operators.InterpolateF2C,
+    Operators.LeftBiasedF2C,
+    Operators.RightBiasedF2C,
+    Operators.GradientF2C,
+    Operators.DivergenceF2C,
+}
+const TwoArgFDOperatorWithCenterInput = Union{
+    Operators.WeightedInterpolateC2F,
+    Operators.UpwindBiasedProductC2F,
+    Operators.Upwind3rdOrderBiasedProductC2F,
+    Operators.AdvectionC2C,
+    Operators.FluxCorrectionC2C,
+}
+const TwoArgFDOperatorWithFaceInput = Union{
+    Operators.WeightedInterpolateF2C,
+    Operators.AdvectionF2F,
+    Operators.FluxCorrectionF2F,
+}
+const NonlinearFDOperator = Union{
+    Operators.FCTBorisBook,
+    Operators.FCTZalesak,
+    Operators.SetBoundaryOperator,
+}
+const ErroneousFDOperator = Union{
+    Operators.LeftBiased3rdOrderC2F,
+    Operators.LeftBiased3rdOrderF2C,
+    Operators.RightBiased3rdOrderC2F,
+    Operators.RightBiased3rdOrderF2C,
+}
+
+const OneArgFDOperator =
+    Union{OneArgFDOperatorWithCenterInput, OneArgFDOperatorWithFaceInput}
+const TwoArgFDOperator =
+    Union{TwoArgFDOperatorWithCenterInput, TwoArgFDOperatorWithFaceInput}
+
+const FDOperatorWithCenterInput =
+    Union{OneArgFDOperatorWithCenterInput, TwoArgFDOperatorWithCenterInput}
+const FDOperatorWithFaceInput =
+    Union{OneArgFDOperatorWithFaceInput, TwoArgFDOperatorWithFaceInput}
+
+operator_input_space(
+    ::FDOperatorWithCenterInput,
+    space::Spaces.FiniteDifferenceSpace,
+) = Spaces.CenterFiniteDifferenceSpace(space)
+operator_input_space(
+    ::FDOperatorWithCenterInput,
+    space::Spaces.ExtrudedFiniteDifferenceSpace,
+) = Spaces.CenterExtrudedFiniteDifferenceSpace(space)
+operator_input_space(
+    ::FDOperatorWithFaceInput,
+    space::Spaces.FiniteDifferenceSpace,
+) = Spaces.FaceFiniteDifferenceSpace(space)
+operator_input_space(
+    ::FDOperatorWithFaceInput,
+    space::Spaces.ExtrudedFiniteDifferenceSpace,
+) = Spaces.FaceExtrudedFiniteDifferenceSpace(space)
+
+################################################################################
+
+const AffineBoundaryCondition = Union{
+    Operators.SetValue,
+    Operators.SetGradient,
+    Operators.SetDivergence,
+    Operators.SetCurl,
+}
+
+const LowerEmptyMatrixRow{T} = BandMatrixRow{-1 + half, 0, T}       #    _
+const UpperEmptyMatrixRow{T} = BandMatrixRow{half, 0, T}            #    _
+const LowerDiagonalMatrixRow{T} = BandMatrixRow{-1 + half, 1, T}    # -0.5
+const UpperDiagonalMatrixRow{T} = BandMatrixRow{half, 1, T}         #  0.5
+const LowerBidiagonalMatrixRow{T} = BandMatrixRow{-2 + half, 2, T}  # -1.5, -0.5
+const UpperBidiagonalMatrixRow{T} = BandMatrixRow{half, 2, T}       #  0.5,  1.5
+const LowerTridiagonalMatrixRow{T} = BandMatrixRow{-2 + half, 3, T} # -1.5, -0.5, 0.5
+const UpperTridiagonalMatrixRow{T} = BandMatrixRow{-1 + half, 3, T} # -0.5,  0.5, 1.5
+
+const C3{T} = Geometry.Covariant3Vector{T}
+const CT3{T} = Geometry.Contravariant3Vector{T}
+const CT12_CT12{T} = Geometry.Axis2Tensor{
+    T,
+    Tuple{Geometry.Contravariant12Axis, Geometry.Contravariant12Axis},
+}
+
+################################################################################
+
+"""
+    operator_matrix(op)
+
+Constructs a new operator or operator-like object that generates the matrix
+applied by `op` to its argument. That is, if `op_matrix = operator_matrix(op)`,
+then either `@. op(arg) == @. op_matrix ⋅ arg` or
+`@. op(args..., arg) = @. op_matrix(args...) ⋅ arg`, depending on whether `op`
+takes one or more arguments. This will throw an error if `op` is not a linear
+operator.
+"""
+operator_matrix(op::OneArgFDOperator) = LazyOneArgFDOperatorMatrix(op)
+operator_matrix(op::TwoArgFDOperator) = GetFDOperatorMatrix(op)
+operator_matrix(::O) where {O <: NonlinearFDOperator} = error(
+    "$(O.name.name) applies a nonlinear transformation to its argument, so it \
+     cannot be represented by a matrix",
+)
+operator_matrix(::O) where {O <: ErroneousFDOperator} = error(
+    "$(O.name.name) always throws an AssertionError when it is used, so its \
+     operator matrix has not been implemented",
+)
+operator_matrix(::O) where {O <: Operators.AbstractOperator} =
+    error("operator_matrix has not been defined for $(O.name.name)")
+
+struct LazyOneArgFDOperatorMatrix{O <: OneArgFDOperator} <: AbstractLazyOperator
+    op::O
+end
+
+# Since we don't have any args, we need to use a lazy operator to add one.
+remove_lazy_operators(space, lazy_op::LazyOneArgFDOperatorMatrix) =
+    Base.Broadcast.broadcasted(
+        GetFDOperatorMatrix(lazy_op.op),
+        Fields.local_geometry_field(operator_input_space(lazy_op.op, space)),
+    )
+
+struct GetFDOperatorMatrix{O <: Operators.FiniteDifferenceOperator} <:
+       Operators.FiniteDifferenceOperator
+    op::O
+end
+function GetFDOperatorMatrix(op::O) where {O}
+    has_affine_bc = any(op.bcs) do bc
+        bc isa AffineBoundaryCondition && bc.val != rzero(typeof(bc.val))
+    end
+    has_affine_bc && error(
+        "$O applies an affine transformation to its argument because of the \
+         boundary conditions it has been assigned, so it cannot be represented \
+         by a matrix",
+    )
+    return GetFDOperatorMatrix{O}(op)
+end
+
+# Since we already have the first arg, we can modify Base.broadcasted to add the
+# second arg.
+Base.Broadcast.broadcasted(
+    op_matrix::GetFDOperatorMatrix{<:TwoArgFDOperator},
+    arg,
+) = Base.Broadcast.broadcasted(
+    op_matrix,
+    arg,
+    Fields.local_geometry_field(operator_input_space(op_matrix.op, axes(arg))),
+)
+
+Operators.has_boundary(
+    op_matrix::GetFDOperatorMatrix,
+    lbw::Operators.LeftBoundaryWindow{name},
+) where {name} = Operators.has_boundary(op_matrix.op, lbw)
+Operators.has_boundary(
+    op_matrix::GetFDOperatorMatrix,
+    rbw::Operators.RightBoundaryWindow{name},
+) where {name} = Operators.has_boundary(op_matrix.op, rbw)
+
+Operators.get_boundary(
+    op_matrix::GetFDOperatorMatrix,
+    lbw::Operators.LeftBoundaryWindow{name},
+) where {name} = Operators.get_boundary(op_matrix.op, lbw)
+Operators.get_boundary(
+    op_matrix::GetFDOperatorMatrix,
+    rbw::Operators.RightBoundaryWindow{name},
+) where {name} = Operators.get_boundary(op_matrix.op, rbw)
+
+Operators.stencil_interior_width(op_matrix::GetFDOperatorMatrix, args...) =
+    Operators.stencil_interior_width(op_matrix.op, args...)
+
+Operators.left_interior_idx(
+    space::Spaces.AbstractSpace,
+    op_matrix::GetFDOperatorMatrix,
+    bc::Operators.AbstractBoundaryCondition,
+    args...,
+) = Operators.left_interior_idx(space, op_matrix.op, bc, args...)
+
+Operators.right_interior_idx(
+    space::Spaces.AbstractSpace,
+    op_matrix::GetFDOperatorMatrix,
+    bc::Operators.AbstractBoundaryCondition,
+    args...,
+) = Operators.right_interior_idx(space, op_matrix.op, bc, args...)
+
+Operators.return_space(op_matrix::GetFDOperatorMatrix, spaces...) =
+    Operators.return_space(op_matrix.op, spaces...)
+
+Operators.return_eltype(op_matrix::GetFDOperatorMatrix, args...) = typeof(
+    op_matrix_interior_row(op_matrix.op, Geometry.undertype(eltype(args[end]))),
+)
+
+Base.@propagate_inbounds Operators.stencil_interior(
+    op_matrix::GetFDOperatorMatrix,
+    loc,
+    space,
+    idx,
+    hidx,
+    args...,
+) = op_matrix_interior_row(
+    op_matrix.op,
+    space,
+    loc,
+    idx,
+    hidx,
+    args[1:(end - 1)]...,
+)
+
+Base.@propagate_inbounds Operators.stencil_left_boundary(
+    op_matrix::GetFDOperatorMatrix,
+    bc::Operators.AbstractBoundaryCondition,
+    loc,
+    space,
+    idx,
+    hidx,
+    args...,
+) = op_matrix_first_row(
+    op_matrix.op,
+    bc,
+    space,
+    loc,
+    idx,
+    hidx,
+    args[1:(end - 1)]...,
+)
+
+Base.@propagate_inbounds Operators.stencil_right_boundary(
+    op_matrix::GetFDOperatorMatrix,
+    bc::Operators.AbstractBoundaryCondition,
+    loc,
+    space,
+    idx,
+    hidx,
+    args...,
+) = op_matrix_last_row(
+    op_matrix.op,
+    bc,
+    space,
+    loc,
+    idx,
+    hidx,
+    args[1:(end - 1)]...,
+)
+
+op_matrix_interior_row(op, space, loc, idx, hidx, args...) =
+    op_matrix_interior_row(op, Spaces.undertype(space))
+op_matrix_first_row(op, bc, space, loc, idx, hidx, args...) =
+    op_matrix_first_row(op, bc, Spaces.undertype(space))
+op_matrix_last_row(op, bc, space, loc, idx, hidx, args...) =
+    op_matrix_last_row(op, bc, Spaces.undertype(space))
+
+uses_extrapolate(op) = any(bc -> bc isa Operators.Extrapolate, op.bcs)
+
+Base.@propagate_inbounds ct3_data(velocity, space, loc, idx, hidx) =
+    Geometry.contravariant3(
+        Operators.getidx(space, velocity, loc, idx, hidx),
+        Geometry.LocalGeometry(space, idx, hidx),
+    )
+
+op_matrix_interior_row(
+    ::Union{Operators.InterpolateC2F, Operators.InterpolateF2C},
+    ::Type{FT},
+) where {FT} = BidiagonalMatrixRow(FT(1), FT(1)) / 2
+op_matrix_first_row(
+    ::Operators.InterpolateC2F,
+    ::Operators.SetValue,
+    ::Type{FT},
+) where {FT} = UpperDiagonalMatrixRow(FT(0))
+op_matrix_last_row(
+    ::Operators.InterpolateC2F,
+    ::Operators.SetValue,
+    ::Type{FT},
+) where {FT} = LowerDiagonalMatrixRow(FT(0))
+op_matrix_first_row(
+    ::Operators.InterpolateC2F,
+    ::Operators.SetGradient,
+    ::Type{FT},
+) where {FT} = UpperDiagonalMatrixRow(FT(1))
+op_matrix_last_row(
+    ::Operators.InterpolateC2F,
+    ::Operators.SetGradient,
+    ::Type{FT},
+) where {FT} = LowerDiagonalMatrixRow(FT(1))
+op_matrix_first_row(
+    ::Operators.InterpolateC2F,
+    ::Operators.Extrapolate,
+    ::Type{FT},
+) where {FT} = UpperDiagonalMatrixRow(FT(1))
+op_matrix_last_row(
+    ::Operators.InterpolateC2F,
+    ::Operators.Extrapolate,
+    ::Type{FT},
+) where {FT} = LowerDiagonalMatrixRow(FT(1))
+
+op_matrix_interior_row(
+    ::Union{Operators.LeftBiasedC2F, Operators.LeftBiasedF2C},
+    ::Type{FT},
+) where {FT} = LowerDiagonalMatrixRow(FT(1))
+op_matrix_first_row(
+    ::Operators.LeftBiasedC2F,
+    ::Operators.SetValue,
+    ::Type{FT},
+) where {FT} = LowerEmptyMatrixRow()
+op_matrix_first_row(
+    ::Operators.LeftBiasedF2C,
+    ::Operators.SetValue,
+    ::Type{FT},
+) where {FT} = LowerDiagonalMatrixRow(FT(0))
+
+op_matrix_interior_row(
+    ::Union{Operators.RightBiasedC2F, Operators.RightBiasedF2C},
+    ::Type{FT},
+) where {FT} = UpperDiagonalMatrixRow(FT(1))
+op_matrix_last_row(
+    ::Operators.RightBiasedC2F,
+    ::Operators.SetValue,
+    ::Type{FT},
+) where {FT} = UpperEmptyMatrixRow()
+op_matrix_last_row(
+    ::Operators.RightBiasedF2C,
+    ::Operators.SetValue,
+    ::Type{FT},
+) where {FT} = UpperDiagonalMatrixRow(FT(0))
+
+Operators.return_eltype(
+    ::GetFDOperatorMatrix{<:Operators.WeightedInterpolationOperator},
+    weight,
+    _,
+) = BidiagonalMatrixRow{eltype(weight)}
+Base.@propagate_inbounds function op_matrix_interior_row(
+    ::Operators.WeightedInterpolationOperator,
+    space,
+    loc,
+    idx,
+    hidx,
+    weight,
+)
+    w⁻ = Operators.getidx(space, weight, loc, idx - half, hidx)
+    w⁺ = Operators.getidx(space, weight, loc, idx + half, hidx)
+    denominator = radd(w⁻, w⁺)
+    return BidiagonalMatrixRow(rdiv(w⁻, denominator), rdiv(w⁺, denominator))
+end
+op_matrix_first_row(
+    ::Operators.WeightedInterpolateC2F,
+    ::Operators.SetValue,
+    ::Type{FT},
+) where {FT} = UpperDiagonalMatrixRow(FT(0))
+op_matrix_last_row(
+    ::Operators.WeightedInterpolateC2F,
+    ::Operators.SetValue,
+    ::Type{FT},
+) where {FT} = LowerDiagonalMatrixRow(FT(0))
+op_matrix_first_row(
+    ::Operators.WeightedInterpolateC2F,
+    ::Operators.SetGradient,
+    ::Type{FT},
+) where {FT} = UpperDiagonalMatrixRow(FT(1))
+op_matrix_last_row(
+    ::Operators.WeightedInterpolateC2F,
+    ::Operators.SetGradient,
+    ::Type{FT},
+) where {FT} = LowerDiagonalMatrixRow(FT(1))
+op_matrix_first_row(
+    ::Operators.WeightedInterpolateC2F,
+    ::Operators.Extrapolate,
+    ::Type{FT},
+) where {FT} = UpperDiagonalMatrixRow(FT(1))
+op_matrix_last_row(
+    ::Operators.WeightedInterpolateC2F,
+    ::Operators.Extrapolate,
+    ::Type{FT},
+) where {FT} = LowerDiagonalMatrixRow(FT(1))
+
+function Operators.return_eltype(
+    op_matrix::GetFDOperatorMatrix{<:Operators.UpwindBiasedProductC2F},
+    velocity,
+    _,
+)
+    T = CT3{eltype(eltype(velocity))}
+    return uses_extrapolate(op_matrix.op) ? QuaddiagonalMatrixRow{T} :
+           BidiagonalMatrixRow{T}
+end
+Base.@propagate_inbounds function op_matrix_interior_row(
+    op::Operators.UpwindBiasedProductC2F,
+    space,
+    loc,
+    idx,
+    hidx,
+    velocity,
+)
+    v³ = CT3(ct3_data(velocity, space, loc, idx, hidx))
+    av³ = CT3(abs(v³.u³))
+    matrix_row = BidiagonalMatrixRow(v³ + av³, v³ - av³) / 2
+    return uses_extrapolate(op) ? QuaddiagonalMatrixRow(matrix_row) : matrix_row
+end
+Base.@propagate_inbounds function op_matrix_first_row(
+    ::Operators.UpwindBiasedProductC2F,
+    ::Operators.SetValue,
+    space,
+    loc,
+    idx,
+    hidx,
+    velocity,
+)
+    v³ = CT3(ct3_data(velocity, space, loc, idx, hidx))
+    av³ = CT3(abs(v³.u³))
+    return UpperDiagonalMatrixRow(v³ - av³) / 2
+end
+Base.@propagate_inbounds function op_matrix_last_row(
+    ::Operators.UpwindBiasedProductC2F,
+    ::Operators.SetValue,
+    space,
+    loc,
+    idx,
+    hidx,
+    velocity,
+)
+    v³ = CT3(ct3_data(velocity, space, loc, idx, hidx))
+    av³ = CT3(abs(v³.u³))
+    return LowerDiagonalMatrixRow(v³ + av³) / 2
+end
+Base.@propagate_inbounds function op_matrix_first_row(
+    ::Operators.UpwindBiasedProductC2F,
+    ::Operators.Extrapolate,
+    space,
+    loc,
+    idx,
+    hidx,
+    velocity,
+)
+    v³ = CT3(ct3_data(velocity, space, loc, idx + 1, hidx))
+    av³ = CT3(abs(v³.u³))
+    return UpperBidiagonalMatrixRow(v³ + av³, v³ - av³) / 2
+end
+Base.@propagate_inbounds function op_matrix_last_row(
+    ::Operators.UpwindBiasedProductC2F,
+    ::Operators.Extrapolate,
+    space,
+    loc,
+    idx,
+    hidx,
+    velocity,
+)
+    v³ = CT3(ct3_data(velocity, space, loc, idx - 1, hidx))
+    av³ = CT3(abs(v³.u³))
+    return LowerBidiagonalMatrixRow(v³ + av³, v³ - av³) / 2
+end
+
+Operators.return_eltype(
+    ::GetFDOperatorMatrix{<:Operators.Upwind3rdOrderBiasedProductC2F},
+    velocity,
+    _,
+) = QuaddiagonalMatrixRow{CT3{eltype(eltype(velocity))}}
+Base.@propagate_inbounds function op_matrix_interior_row(
+    ::Operators.Upwind3rdOrderBiasedProductC2F,
+    space,
+    loc,
+    idx,
+    hidx,
+    velocity,
+)
+    v³ = CT3(ct3_data(velocity, space, loc, idx, hidx))
+    av³ = CT3(abs(v³.u³))
+    return QuaddiagonalMatrixRow(-v³ - av³, 7v³ + 3av³, 7v³ - 3av³, -v³ + av³) /
+           12
+end
+Base.@propagate_inbounds function op_matrix_first_row(
+    ::Operators.Upwind3rdOrderBiasedProductC2F,
+    ::Operators.FirstOrderOneSided,
+    space,
+    loc,
+    idx,
+    hidx,
+    velocity,
+)
+    v³ = CT3(ct3_data(velocity, space, loc, idx, hidx))
+    av³ = CT3(abs(v³.u³))
+    return UpperTridiagonalMatrixRow(8v³ + 4av³, 5v³ - 5av³, -v³ + av³) / 12
+end
+Base.@propagate_inbounds function op_matrix_last_row(
+    ::Operators.Upwind3rdOrderBiasedProductC2F,
+    ::Operators.FirstOrderOneSided,
+    space,
+    loc,
+    idx,
+    hidx,
+    velocity,
+)
+    v³ = CT3(ct3_data(velocity, space, loc, idx, hidx))
+    av³ = CT3(abs(v³.u³))
+    return LowerTridiagonalMatrixRow(-v³ - av³, 5v³ + 5av³, 8v³ - 4av³) / 12
+end
+Base.@propagate_inbounds function op_matrix_first_row(
+    ::Operators.Upwind3rdOrderBiasedProductC2F,
+    ::Operators.ThirdOrderOneSided,
+    space,
+    loc,
+    idx,
+    hidx,
+    velocity,
+)
+    v³ = CT3(ct3_data(velocity, space, loc, idx, hidx))
+    return UpperTridiagonalMatrixRow(4v³, 10v³, -2v³) / 12
+end
+Base.@propagate_inbounds function op_matrix_last_row(
+    ::Operators.Upwind3rdOrderBiasedProductC2F,
+    ::Operators.ThirdOrderOneSided,
+    space,
+    loc,
+    idx,
+    hidx,
+    velocity,
+)
+    v³ = CT3(ct3_data(velocity, space, loc, idx, hidx))
+    return LowerTridiagonalMatrixRow(-2v³, 10v³, 4v³) / 12
+end
+
+Operators.return_eltype(
+    ::GetFDOperatorMatrix{<:Operators.AdvectionOperator},
+    velocity,
+    _,
+) = TridiagonalMatrixRow{eltype(eltype(velocity))}
+Base.@propagate_inbounds function op_matrix_interior_row(
+    ::Operators.AdvectionC2C,
+    space,
+    loc,
+    idx,
+    hidx,
+    velocity,
+)
+    v³⁻_data = ct3_data(velocity, space, loc, idx - half, hidx)
+    v³⁺_data = ct3_data(velocity, space, loc, idx + half, hidx)
+    return TridiagonalMatrixRow(-v³⁻_data, v³⁻_data - v³⁺_data, v³⁺_data) / 2
+end
+Base.@propagate_inbounds function op_matrix_first_row(
+    ::Operators.AdvectionC2C,
+    ::Operators.SetValue,
+    space,
+    loc,
+    idx,
+    hidx,
+    velocity,
+)
+    v³⁻_data = ct3_data(velocity, space, loc, idx - half, hidx)
+    v³⁺_data = ct3_data(velocity, space, loc, idx + half, hidx)
+    return BandMatrixRow{0}(2 * v³⁻_data - v³⁺_data, v³⁺_data) / 2
+end
+Base.@propagate_inbounds function op_matrix_last_row(
+    ::Operators.AdvectionC2C,
+    ::Operators.SetValue,
+    space,
+    loc,
+    idx,
+    hidx,
+    velocity,
+)
+    v³⁻_data = ct3_data(velocity, space, loc, idx - half, hidx)
+    v³⁺_data = ct3_data(velocity, space, loc, idx + half, hidx)
+    return BandMatrixRow{-1}(-v³⁻_data, v³⁻_data - 2 * v³⁺_data) / 2
+end
+Base.@propagate_inbounds function op_matrix_first_row(
+    ::Operators.AdvectionC2C,
+    ::Operators.Extrapolate,
+    space,
+    loc,
+    idx,
+    hidx,
+    velocity,
+)
+    v³⁺_data = ct3_data(velocity, space, loc, idx + half, hidx)
+    return BandMatrixRow{0}(-v³⁺_data, v³⁺_data)
+end
+Base.@propagate_inbounds function op_matrix_last_row(
+    ::Operators.AdvectionC2C,
+    ::Operators.Extrapolate,
+    space,
+    loc,
+    idx,
+    hidx,
+    velocity,
+)
+    v³⁻_data = ct3_data(velocity, space, loc, idx - half, hidx)
+    return BandMatrixRow{-1}(-v³⁻_data, v³⁻_data)
+end
+Base.@propagate_inbounds function op_matrix_interior_row(
+    ::Operators.AdvectionF2F,
+    space,
+    loc,
+    idx,
+    hidx,
+    velocity,
+)
+    FT = Spaces.undertype(space)
+    v³_data = ct3_data(velocity, space, loc, idx, hidx)
+    return TridiagonalMatrixRow(-v³_data, FT(0), v³_data) / 2
+end
+Base.@propagate_inbounds function op_matrix_interior_row(
+    ::Union{Operators.FluxCorrectionC2C, Operators.FluxCorrectionF2F},
+    space,
+    loc,
+    idx,
+    hidx,
+    velocity,
+)
+    av³⁻_data = abs(ct3_data(velocity, space, loc, idx - half, hidx))
+    av³⁺_data = abs(ct3_data(velocity, space, loc, idx + half, hidx))
+    return TridiagonalMatrixRow(av³⁻_data, -av³⁻_data - av³⁺_data, av³⁺_data)
+end
+Base.@propagate_inbounds function op_matrix_first_row(
+    ::Union{Operators.FluxCorrectionC2C, Operators.FluxCorrectionF2F},
+    ::Operators.Extrapolate,
+    space,
+    loc,
+    idx,
+    hidx,
+    velocity,
+)
+    av³⁺_data = abs(ct3_data(velocity, space, loc, idx + half, hidx))
+    return BandMatrixRow{0}(-av³⁺_data, av³⁺_data)
+end
+Base.@propagate_inbounds function op_matrix_last_row(
+    ::Union{Operators.FluxCorrectionC2C, Operators.FluxCorrectionF2F},
+    ::Operators.Extrapolate,
+    space,
+    loc,
+    idx,
+    hidx,
+    velocity,
+)
+    av³⁻_data = abs(ct3_data(velocity, space, loc, idx - half, hidx))
+    return BandMatrixRow{-1}(av³⁻_data, -av³⁻_data)
+end
+
+function op_matrix_interior_row(
+    op::Operators.GradientOperator,
+    ::Type{FT},
+) where {FT}
+    matrix_row = BidiagonalMatrixRow(-C3(FT(1)), C3(FT(1)))
+    return uses_extrapolate(op) ? QuaddiagonalMatrixRow(matrix_row) : matrix_row
+end
+op_matrix_first_row(
+    ::Operators.GradientC2F,
+    ::Operators.SetValue,
+    ::Type{FT},
+) where {FT} = UpperDiagonalMatrixRow(C3(FT(2)))
+op_matrix_last_row(
+    ::Operators.GradientC2F,
+    ::Operators.SetValue,
+    ::Type{FT},
+) where {FT} = LowerDiagonalMatrixRow(-C3(FT(2)))
+op_matrix_first_row(
+    ::Operators.GradientC2F,
+    ::Operators.SetGradient,
+    ::Type{FT},
+) where {FT} = UpperDiagonalMatrixRow(C3(FT(0)))
+op_matrix_last_row(
+    ::Operators.GradientC2F,
+    ::Operators.SetGradient,
+    ::Type{FT},
+) where {FT} = LowerDiagonalMatrixRow(C3(FT(0)))
+op_matrix_first_row(
+    ::Operators.GradientF2C,
+    ::Operators.SetValue,
+    ::Type{FT},
+) where {FT} = BidiagonalMatrixRow(C3(FT(0)), C3(FT(1)))
+op_matrix_last_row(
+    ::Operators.GradientF2C,
+    ::Operators.SetValue,
+    ::Type{FT},
+) where {FT} = BidiagonalMatrixRow(-C3(FT(1)), C3(FT(0)))
+op_matrix_first_row(
+    ::Operators.GradientF2C,
+    ::Operators.Extrapolate,
+    ::Type{FT},
+) where {FT} = UpperTridiagonalMatrixRow(C3(FT(0)), -C3(FT(1)), C3(FT(1)))
+op_matrix_last_row(
+    ::Operators.GradientF2C,
+    ::Operators.Extrapolate,
+    ::Type{FT},
+) where {FT} = LowerTridiagonalMatrixRow(-C3(FT(1)), C3(FT(1)), C3(FT(0)))
+
+function Operators.return_eltype(
+    op_matrix::GetFDOperatorMatrix{<:Operators.DivergenceOperator},
+    lg,
+)
+    T = adjoint_type(C3{Geometry.undertype(eltype(lg))})
+    return uses_extrapolate(op_matrix.op) ? QuaddiagonalMatrixRow{T} :
+           BidiagonalMatrixRow{T}
+end
+Base.@propagate_inbounds function op_matrix_interior_row(
+    op::Operators.DivergenceOperator,
+    space,
+    loc,
+    idx,
+    hidx,
+)
+    invJ = Geometry.LocalGeometry(space, idx, hidx).invJ
+    J⁻ = Geometry.LocalGeometry(space, idx - half, hidx).J
+    J⁺ = Geometry.LocalGeometry(space, idx + half, hidx).J
+    matrix_row = BidiagonalMatrixRow(-C3(J⁻)', C3(J⁺)') * invJ
+    return uses_extrapolate(op) ? QuaddiagonalMatrixRow(matrix_row) : matrix_row
+end
+Base.@propagate_inbounds function op_matrix_first_row(
+    ::Operators.DivergenceC2F,
+    ::Operators.SetValue,
+    space,
+    loc,
+    idx,
+    hidx,
+)
+    invJ = Geometry.LocalGeometry(space, idx, hidx).invJ
+    J⁺ = Geometry.LocalGeometry(space, idx + half, hidx).J
+    return UpperDiagonalMatrixRow(C3(J⁺)') * 2 * invJ
+end
+Base.@propagate_inbounds function op_matrix_last_row(
+    ::Operators.DivergenceC2F,
+    ::Operators.SetValue,
+    space,
+    loc,
+    idx,
+    hidx,
+)
+    invJ = Geometry.LocalGeometry(space, idx, hidx).invJ
+    J⁻ = Geometry.LocalGeometry(space, idx - half, hidx).J
+    return LowerDiagonalMatrixRow(-C3(J⁻)') * 2 * invJ
+end
+op_matrix_first_row(
+    ::Operators.DivergenceC2F,
+    ::Operators.SetDivergence,
+    ::Type{FT},
+) where {FT} = UpperDiagonalMatrixRow(C3(FT(0))')
+op_matrix_last_row(
+    ::Operators.DivergenceC2F,
+    ::Operators.SetDivergence,
+    ::Type{FT},
+) where {FT} = LowerDiagonalMatrixRow(C3(FT(0))')
+Base.@propagate_inbounds function op_matrix_first_row(
+    ::Operators.DivergenceF2C,
+    ::Operators.SetValue,
+    space,
+    loc,
+    idx,
+    hidx,
+)
+    FT = Spaces.undertype(space)
+    invJ = Geometry.LocalGeometry(space, idx, hidx).invJ
+    J⁺ = Geometry.LocalGeometry(space, idx + half, hidx).J
+    return BidiagonalMatrixRow(C3(FT(0))', C3(J⁺)') * invJ
+end
+Base.@propagate_inbounds function op_matrix_last_row(
+    ::Operators.DivergenceF2C,
+    ::Operators.SetValue,
+    space,
+    loc,
+    idx,
+    hidx,
+)
+    FT = Spaces.undertype(space)
+    invJ = Geometry.LocalGeometry(space, idx, hidx).invJ
+    J⁻ = Geometry.LocalGeometry(space, idx - half, hidx).J
+    return BidiagonalMatrixRow(-C3(J⁻)', C3(FT(0))') * invJ
+end
+Base.@propagate_inbounds function op_matrix_first_row(
+    ::Operators.DivergenceF2C,
+    ::Operators.Extrapolate,
+    space,
+    loc,
+    idx,
+    hidx,
+)
+    FT = Spaces.undertype(space)
+    invJ = Geometry.LocalGeometry(space, idx + 1, hidx).invJ
+    J⁻ = Geometry.LocalGeometry(space, idx + 1 - half, hidx).J
+    J⁺ = Geometry.LocalGeometry(space, idx + 1 + half, hidx).J
+    return UpperTridiagonalMatrixRow(C3(FT(0))', -C3(J⁻)', C3(J⁺)') * invJ
+end
+Base.@propagate_inbounds function op_matrix_last_row(
+    ::Operators.DivergenceF2C,
+    ::Operators.Extrapolate,
+    space,
+    loc,
+    idx,
+    hidx,
+)
+    FT = Spaces.undertype(space)
+    invJ = Geometry.LocalGeometry(space, idx - 1, hidx).invJ
+    J⁻ = Geometry.LocalGeometry(space, idx - 1 - half, hidx).J
+    J⁺ = Geometry.LocalGeometry(space, idx - 1 + half, hidx).J
+    return LowerTridiagonalMatrixRow(-C3(J⁻)', C3(J⁺)', C3(FT(0))') * invJ
+end
+
+Operators.return_eltype(
+    ::GetFDOperatorMatrix{<:Operators.CurlFiniteDifferenceOperator},
+    lg,
+) = BidiagonalMatrixRow{CT12_CT12{Geometry.undertype(eltype(lg))}}
+Base.@propagate_inbounds function op_matrix_interior_row(
+    ::Operators.CurlFiniteDifferenceOperator,
+    space,
+    loc,
+    idx,
+    hidx,
+)
+    invJ = Geometry.LocalGeometry(space, idx, hidx).invJ
+    tensor = CT12_CT12(SMatrix{2, 2}(0, 1, -1, 0))
+    return BidiagonalMatrixRow(-tensor, tensor) * invJ
+end
+Base.@propagate_inbounds function op_matrix_first_row(
+    ::Operators.CurlC2F,
+    ::Operators.SetValue,
+    space,
+    loc,
+    idx,
+    hidx,
+)
+    invJ = Geometry.LocalGeometry(space, idx, hidx).invJ
+    tensor = CT12_CT12(SMatrix{2, 2}(0, 1, -1, 0))
+    return UpperDiagonalMatrixRow(tensor) * 2 * invJ
+end
+Base.@propagate_inbounds function op_matrix_last_row(
+    ::Operators.CurlC2F,
+    ::Operators.SetValue,
+    space,
+    loc,
+    idx,
+    hidx,
+)
+    invJ = Geometry.LocalGeometry(space, idx, hidx).invJ
+    tensor = CT12_CT12(SMatrix{2, 2}(0, 1, -1, 0))
+    return LowerDiagonalMatrixRow(-tensor) * 2 * invJ
+end
+op_matrix_first_row(
+    ::Operators.CurlC2F,
+    ::Operators.SetCurl,
+    ::Type{FT},
+) where {FT} = UpperDiagonalMatrixRow(CT12_CT12(SMatrix{2, 2, FT}(0, 0, 0, 0)))
+op_matrix_last_row(
+    ::Operators.CurlC2F,
+    ::Operators.SetCurl,
+    ::Type{FT},
+) where {FT} = LowerDiagonalMatrixRow(CT12_CT12(SMatrix{2, 2, FT}(0, 0, 0, 0)))
diff --git a/src/Operators/finitedifference.jl b/src/Operators/finitedifference.jl
index 79b4d0c599..4abbc9b014 100644
--- a/src/Operators/finitedifference.jl
+++ b/src/Operators/finitedifference.jl
@@ -2418,7 +2418,7 @@ Base.@propagate_inbounds function stencil_left_boundary(
 )
     @assert idx == left_center_boundary_idx(space)
     Geometry.project(
-        Geometery.Covariant3Axis(),
+        Geometry.Covariant3Axis(),
         stencil_interior(op, loc, space, idx + 1, hidx, arg),
         Geometry.LocalGeometry(space, idx, hidx),
     )
@@ -3326,7 +3326,12 @@ Base.@propagate_inbounds function setidx!(
 end
 
 function Base.Broadcast.broadcasted(op::FiniteDifferenceOperator, args...)
-    Base.Broadcast.broadcasted(StencilStyle(), op, args...)
+    args′ = map(Base.Broadcast.broadcastable, args)
+    style = Base.Broadcast.result_style(
+        StencilStyle(),
+        Base.Broadcast.combine_styles(args′...),
+    )
+    Base.Broadcast.broadcasted(style, op, args′...)
 end
 
 function Base.Broadcast.broadcasted(
diff --git a/src/Operators/spectralelement.jl b/src/Operators/spectralelement.jl
index 373c229533..4fc493d3cc 100644
--- a/src/Operators/spectralelement.jl
+++ b/src/Operators/spectralelement.jl
@@ -105,8 +105,14 @@ Adapt.adapt_structure(to, sbc::SpectralBroadcasted{Style}) where {Style} =
 return_space(::SpectralElementOperator, space) = space
 
 
-Base.Broadcast.broadcasted(op::SpectralElementOperator, args...) =
-    Base.Broadcast.broadcasted(SpectralStyle(), op, args...)
+function Base.Broadcast.broadcasted(op::SpectralElementOperator, args...)
+    args′ = map(Base.Broadcast.broadcastable, args)
+    style = Base.Broadcast.result_style(
+        SpectralStyle(),
+        Base.Broadcast.combine_styles(args′...),
+    )
+    Base.Broadcast.broadcasted(style, op, args′...)
+end
 
 Base.Broadcast.broadcasted(
     ::SpectralStyle,
diff --git a/test/MatrixFields/matrix_field_broadcasting.jl b/test/MatrixFields/matrix_field_broadcasting.jl
index 5b0d4aee5e..18a00d6315 100644
--- a/test/MatrixFields/matrix_field_broadcasting.jl
+++ b/test/MatrixFields/matrix_field_broadcasting.jl
@@ -3,28 +3,8 @@ using JET
 import CUDA
 import BandedMatrices: band
 import LinearAlgebra: I, mul!
-import Random: seed!
-
-using ClimaCore.MatrixFields
-import ClimaCore:
-    Geometry, Domains, Meshes, Topologies, Hypsography, Spaces, Fields
-import ClimaComms
-
-# Using @benchmark from BenchmarkTools is extremely slow; it appears to keep
-# triggering recompilations and allocating a lot of memory in the process.
-# This macro returns the minimum time (in seconds) required to run the
-# expression after it has been compiled.
-macro benchmark(expression)
-    return quote
-        $(esc(expression)) # Compile the expression first. Use esc for hygiene.
-        best_time = Inf
-        start_time = time_ns()
-        while time_ns() - start_time < 1e8 # Benchmark for 0.1 s (1e8 ns).
-            best_time = min(best_time, @elapsed $(esc(expression)))
-        end
-        best_time
-    end
-end
+
+include("matrix_field_test_utils.jl")
 
 # This function is used for benchmarking ref_set_result!.
 function call_array_func(
@@ -178,49 +158,8 @@ function test_matrix_broadcast_against_reference(;
     end
 end
 
-function random_test_fields(::Type{FT}) where {FT}
-    velem = 20 # This should be big enough to test high-bandwidth matrices.
-    helem = npoly = 1 # These should be small enough for the tests to be fast.
-
-    comms_ctx = ClimaComms.SingletonCommsContext()
-    hdomain = Domains.SphereDomain(FT(10))
-    hmesh = Meshes.EquiangularCubedSphere(hdomain, helem)
-    htopology = Topologies.Topology2D(comms_ctx, hmesh)
-    quad = Spaces.Quadratures.GLL{npoly + 1}()
-    hspace = Spaces.SpectralElementSpace2D(htopology, quad)
-    vdomain = Domains.IntervalDomain(
-        Geometry.ZPoint(FT(0)),
-        Geometry.ZPoint(FT(10));
-        boundary_tags = (:bottom, :top),
-    )
-    vmesh = Meshes.IntervalMesh(vdomain, nelems = velem)
-    vtopology = Topologies.IntervalTopology(comms_ctx, vmesh)
-    vspace = Spaces.CenterFiniteDifferenceSpace(vtopology)
-    sfc_coord = Fields.coordinate_field(hspace)
-    hypsography =
-        comms_ctx.device isa ClimaComms.CUDADevice ? Hypsography.Flat() :
-        Hypsography.LinearAdaption(
-            @. cosd(sfc_coord.lat) + cosd(sfc_coord.long) + 1
-        ) # TODO: FD operators don't currently work with hypsography on GPUs.
-    center_space =
-        Spaces.ExtrudedFiniteDifferenceSpace(hspace, vspace, hypsography)
-    face_space = Spaces.FaceExtrudedFiniteDifferenceSpace(center_space)
-    ᶜcoord = Fields.coordinate_field(center_space)
-    ᶠcoord = Fields.coordinate_field(face_space)
-
-    seed!(1) # ensures reproducibility
-    ᶜᶜmat = map(c -> DiagonalMatrixRow(ntuple(_ -> rand(FT), 1)...), ᶜcoord)
-    ᶜᶠmat = map(c -> BidiagonalMatrixRow(ntuple(_ -> rand(FT), 2)...), ᶜcoord)
-    ᶠᶠmat = map(c -> TridiagonalMatrixRow(ntuple(_ -> rand(FT), 3)...), ᶠcoord)
-    ᶠᶜmat = map(c -> QuaddiagonalMatrixRow(ntuple(_ -> rand(FT), 4)...), ᶠcoord)
-    ᶜvec = map(c -> rand(FT), ᶜcoord)
-    ᶠvec = map(c -> rand(FT), ᶠcoord)
-
-    return ᶜᶜmat, ᶜᶠmat, ᶠᶠmat, ᶠᶜmat, ᶜvec, ᶠvec
-end
-
 @testset "Scalar Matrix Field Broadcasting" begin
-    ᶜᶜmat, ᶜᶠmat, ᶠᶠmat, ᶠᶜmat, ᶜvec, ᶠvec = random_test_fields(Float64)
+    ᶜᶜmat, ᶜᶠmat, ᶠᶠmat, ᶠᶜmat, ᶜvec, ᶠvec = random_matrix_fields(Float64)
 
     test_matrix_broadcast_against_array_reference(;
         test_name = "diagonal matrix times vector",
@@ -686,7 +625,7 @@ end
 end
 
 @testset "Non-scalar Matrix Field Broadcasting" begin
-    ᶜᶜmat, ᶜᶠmat, ᶠᶠmat, ᶠᶜmat, ᶜvec, ᶠvec = random_test_fields(Float64)
+    ᶜᶜmat, ᶜᶠmat, ᶠᶠmat, ᶠᶜmat, ᶜvec, ᶠvec = random_matrix_fields(Float64)
 
     ᶜlg = Fields.local_geometry_field(ᶜvec)
     ᶠlg = Fields.local_geometry_field(ᶠvec)
diff --git a/test/MatrixFields/matrix_field_test_utils.jl b/test/MatrixFields/matrix_field_test_utils.jl
new file mode 100644
index 0000000000..dae3c393a2
--- /dev/null
+++ b/test/MatrixFields/matrix_field_test_utils.jl
@@ -0,0 +1,89 @@
+import Random: seed!
+import ClimaComms
+import ClimaCore:
+    Geometry, Domains, Meshes, Topologies, Hypsography, Spaces, Fields
+using ClimaCore.MatrixFields
+
+# Using @benchmark from BenchmarkTools is extremely slow; it appears to keep
+# triggering recompilations and allocating a lot of memory in the process.
+# This macro returns the minimum time (in seconds) required to run the
+# expression after it has been compiled.
+macro benchmark(expression)
+    return quote
+        $(esc(expression)) # Compile the expression first. Use esc for hygiene.
+        best_time = Inf
+        start_time = time_ns()
+        while time_ns() - start_time < 1e8 # Benchmark for 0.1 s (1e8 ns).
+            best_time = min(best_time, @elapsed $(esc(expression)))
+        end
+        best_time
+    end
+end
+
+function coordinate_fields(::Type{FT}) where {FT}
+    velem = 20 # This should be big enough to test high-bandwidth matrices.
+    helem = npoly = 1 # These should be small enough for the tests to be fast.
+
+    comms_ctx = ClimaComms.SingletonCommsContext()
+    hdomain = Domains.SphereDomain(FT(10))
+    hmesh = Meshes.EquiangularCubedSphere(hdomain, helem)
+    htopology = Topologies.Topology2D(comms_ctx, hmesh)
+    quad = Spaces.Quadratures.GLL{npoly + 1}()
+    hspace = Spaces.SpectralElementSpace2D(htopology, quad)
+    vdomain = Domains.IntervalDomain(
+        Geometry.ZPoint(FT(0)),
+        Geometry.ZPoint(FT(10));
+        boundary_tags = (:bottom, :top),
+    )
+    vmesh = Meshes.IntervalMesh(vdomain, nelems = velem)
+    vtopology = Topologies.IntervalTopology(comms_ctx, vmesh)
+    vspace = Spaces.CenterFiniteDifferenceSpace(vtopology)
+    sfc_coord = Fields.coordinate_field(hspace)
+    hypsography =
+        comms_ctx.device isa ClimaComms.CUDADevice ? Hypsography.Flat() :
+        Hypsography.LinearAdaption(
+            @. cosd(sfc_coord.lat) + cosd(sfc_coord.long) + 1
+        ) # TODO: FD operators don't currently work with hypsography on GPUs.
+    center_space =
+        Spaces.ExtrudedFiniteDifferenceSpace(hspace, vspace, hypsography)
+    face_space = Spaces.FaceExtrudedFiniteDifferenceSpace(center_space)
+    ᶜcoord = Fields.coordinate_field(center_space)
+    ᶠcoord = Fields.coordinate_field(face_space)
+
+    return ᶜcoord, ᶠcoord
+end
+
+nested_value(value1, value2, value3) =
+    (; a = (), b = value1, c = (value2, (; d = (value3,)), (;)))
+
+function random_matrix_fields(::Type{FT}) where {FT}
+    ᶜcoord, ᶠcoord = coordinate_fields(FT)
+
+    seed!(1) # ensures reproducibility
+    ᶜᶜmat = map(_ -> DiagonalMatrixRow(ntuple(_ -> rand(FT), 1)...), ᶜcoord)
+    ᶜᶠmat = map(_ -> BidiagonalMatrixRow(ntuple(_ -> rand(FT), 2)...), ᶜcoord)
+    ᶠᶠmat = map(_ -> TridiagonalMatrixRow(ntuple(_ -> rand(FT), 3)...), ᶠcoord)
+    ᶠᶜmat = map(_ -> QuaddiagonalMatrixRow(ntuple(_ -> rand(FT), 4)...), ᶠcoord)
+    ᶜvec = map(_ -> rand(FT), ᶜcoord)
+    ᶠvec = map(_ -> rand(FT), ᶠcoord)
+
+    return ᶜᶜmat, ᶜᶠmat, ᶠᶠmat, ᶠᶜmat, ᶜvec, ᶠvec
+end
+
+function random_fields(::Type{FT}) where {FT}
+    ᶜcoord, ᶠcoord = coordinate_fields(FT)
+
+    seed!(1) # ensures reproducibility
+    ᶜscalar = map(_ -> rand(FT), ᶜcoord)
+    ᶠscalar = map(_ -> rand(FT), ᶠcoord)
+    ᶜnested = map(_ -> nested_value(rand(FT), rand(FT), rand(FT)), ᶜcoord)
+    ᶠnested = map(_ -> nested_value(rand(FT), rand(FT), rand(FT)), ᶠcoord)
+    ᶜuvw_vector =
+        map(_ -> Geometry.UVWVector(rand(FT), rand(FT), rand(FT)), ᶜcoord)
+    ᶠuvw_vector =
+        map(_ -> Geometry.UVWVector(rand(FT), rand(FT), rand(FT)), ᶠcoord)
+    ᶜc12_vector =
+        map(_ -> Geometry.Covariant12Vector(rand(FT), rand(FT)), ᶜcoord)
+
+    ᶜscalar, ᶠscalar, ᶜnested, ᶠnested, ᶜuvw_vector, ᶠuvw_vector, ᶜc12_vector
+end
diff --git a/test/MatrixFields/operator_matrices.jl b/test/MatrixFields/operator_matrices.jl
new file mode 100644
index 0000000000..62cddcaf48
--- /dev/null
+++ b/test/MatrixFields/operator_matrices.jl
@@ -0,0 +1,188 @@
+using Test
+using JET
+import CUDA
+import BandedMatrices: band
+import LinearAlgebra: I, mul!
+
+import ClimaCore.Operators:
+    SetValue,
+    SetGradient,
+    SetDivergence,
+    SetCurl,
+    Extrapolate,
+    FirstOrderOneSided,
+    ThirdOrderOneSided,
+    InterpolateC2F,
+    InterpolateF2C,
+    LeftBiasedC2F,
+    LeftBiasedF2C,
+    RightBiasedC2F,
+    RightBiasedF2C,
+    WeightedInterpolateC2F,
+    WeightedInterpolateF2C,
+    UpwindBiasedProductC2F,
+    Upwind3rdOrderBiasedProductC2F,
+    AdvectionC2C,
+    AdvectionF2F,
+    FluxCorrectionC2C,
+    FluxCorrectionF2F,
+    SetBoundaryOperator,
+    GradientC2F,
+    GradientF2C,
+    DivergenceC2F,
+    DivergenceF2C,
+    CurlC2F,
+    return_eltype
+import ClimaCore.RecursiveApply: rzero
+
+include("matrix_field_test_utils.jl")
+
+apply_op(::Nothing, op, args) = @. op(args...)
+apply_op(outer_op, op, args) = @. outer_op(op(args...))
+
+apply_op!(output, ::Nothing, op, args) = (@. output = op(args...); nothing)
+apply_op!(output, outer_op, op, args) =
+    (@. output = outer_op(op(args...)); nothing)
+
+apply_op_matrix(::Nothing, op_matrix, args::Tuple{Any}) = @. op_matrix ⋅ args[1]
+apply_op_matrix(outer_op, op_matrix, args::Tuple{Any}) =
+    @. outer_op(op_matrix ⋅ args[1])
+apply_op_matrix(::Nothing, op_matrix, args::Tuple{Any, Any}) =
+    @. op_matrix(args[1]) ⋅ args[2]
+apply_op_matrix(outer_op, op_matrix, args::Tuple{Any, Any}) =
+    @. outer_op(op_matrix(args[1]) ⋅ args[2])
+
+apply_op_matrix!(output, ::Nothing, op_matrix, args::Tuple{Any}) =
+    (@. output = op_matrix ⋅ args[1]; nothing)
+apply_op_matrix!(output, outer_op, op_matrix, args::Tuple{Any}) =
+    (@. output = outer_op(op_matrix ⋅ args[1]); nothing)
+apply_op_matrix!(output, ::Nothing, op_matrix, args::Tuple{Any, Any}) =
+    (@. output = op_matrix(args[1]) ⋅ args[2]; nothing)
+apply_op_matrix!(output, outer_op, op_matrix, args::Tuple{Any, Any}) =
+    (@. output = outer_op(op_matrix(args[1]) ⋅ args[2]); nothing)
+
+function test_op_matrix(
+    ::Type{Op},
+    ::Type{BC},
+    args,
+    requires_boundary_values = false,
+) where {Op, BC}
+    FT = Spaces.undertype(axes(args[end]))
+    op_bc = if BC <: SetValue
+        BC(rzero(eltype(args[end])))
+    elseif BC <: SetGradient
+        BC(zero(Geometry.Covariant3Vector{FT}))
+    elseif BC <: SetDivergence
+        BC(zero(FT))
+    elseif BC <: SetCurl
+        BC(zero(Geometry.Contravariant12Vector{FT}))
+    else
+        BC()
+    end
+    op = if isnothing(op_bc)
+        Op()
+    elseif Op <: Union{LeftBiasedC2F, LeftBiasedF2C}
+        Op(; bottom = op_bc)
+    elseif Op <: Union{RightBiasedC2F, RightBiasedF2C}
+        Op(; top = op_bc)
+    else
+        Op(; top = op_bc, bottom = op_bc)
+    end
+    op_matrix = MatrixFields.operator_matrix(op)
+    outer_op = if requires_boundary_values
+        outer_op_bc = SetValue(rzero(return_eltype(op, args...)))
+        SetBoundaryOperator(; top = outer_op_bc, bottom = outer_op_bc)
+    else
+        nothing
+    end
+
+    result = apply_op_matrix(outer_op, op_matrix, args)
+    ref_result = apply_op(outer_op, op, args)
+
+    max_error = maximum(abs.(parent(result) .- parent(ref_result)))
+    max_eps_error = ceil(Int, max_error / eps(FT))
+
+    best_time = @benchmark apply_op_matrix!(result, outer_op, op_matrix, args)
+    best_ref_time = @benchmark apply_op!(result, outer_op, op, args)
+    allocs = @allocated apply_op_matrix!(result, outer_op, op_matrix, args)
+    ref_allocs = @allocated apply_op!(result, outer_op, op, args)
+
+    op_str = typeof(op).name.name
+    bc_str = isempty(op.bcs) ? "" : " ($(eltype(op.bcs).name.name))"
+    @info "$op_str$bc_str:\n\tMaximum Error = $max_eps_error eps \n\t\
+           Performance Penalty = $(round(Int, best_time / best_ref_time)) \
+           ($(round(best_time; sigdigits = 2)) s per op matrix vs. \
+           $(round(best_ref_time; sigdigits = 2)) s per original op)"
+
+    @test max_eps_error <= 4
+    @test allocs == ref_allocs == 0
+    @test_opt apply_op(outer_op, op, args)
+    @test_opt apply_op_matrix(outer_op, op_matrix, args)
+    @test_opt apply_op!(result, outer_op, op, args)
+    @test_opt apply_op_matrix!(result, outer_op, op_matrix, args)
+end
+
+@testset "operator_matrix Unit Tests" begin
+    FT = Float64
+    ᶜscalar, ᶠscalar, ᶜnested, ᶠnested, ᶜuvw_vector, ᶠuvw_vector, ᶜc12_vector =
+        random_fields(FT)
+
+    test_op_matrix(InterpolateC2F, Nothing, (ᶜnested,), true)
+    test_op_matrix(InterpolateC2F, SetValue, (ᶜnested,))
+    test_op_matrix(InterpolateC2F, SetGradient, (ᶜnested,))
+    test_op_matrix(InterpolateC2F, Extrapolate, (ᶜnested,))
+    test_op_matrix(InterpolateF2C, Nothing, (ᶠnested,))
+    test_op_matrix(LeftBiasedC2F, Nothing, (ᶜnested,), true)
+    test_op_matrix(LeftBiasedC2F, SetValue, (ᶜnested,))
+    test_op_matrix(LeftBiasedF2C, Nothing, (ᶠnested,))
+    test_op_matrix(LeftBiasedF2C, SetValue, (ᶠnested,))
+    test_op_matrix(RightBiasedC2F, Nothing, (ᶜnested,), true)
+    test_op_matrix(RightBiasedC2F, SetValue, (ᶜnested,))
+    test_op_matrix(RightBiasedF2C, Nothing, (ᶠnested,))
+    test_op_matrix(RightBiasedF2C, SetValue, (ᶠnested,))
+    test_op_matrix(WeightedInterpolateC2F, Nothing, (ᶜnested, ᶜnested), true)
+    test_op_matrix(WeightedInterpolateC2F, SetValue, (ᶜnested, ᶜnested))
+    test_op_matrix(WeightedInterpolateC2F, SetGradient, (ᶜnested, ᶜnested))
+    test_op_matrix(WeightedInterpolateC2F, Extrapolate, (ᶜnested, ᶜnested))
+    test_op_matrix(WeightedInterpolateF2C, Nothing, (ᶠnested, ᶠnested))
+    test_op_matrix(
+        UpwindBiasedProductC2F,
+        Nothing,
+        (ᶠuvw_vector, ᶜscalar),
+        true,
+    )
+    test_op_matrix(UpwindBiasedProductC2F, SetValue, (ᶠuvw_vector, ᶜscalar))
+    test_op_matrix(UpwindBiasedProductC2F, Extrapolate, (ᶠuvw_vector, ᶜscalar))
+    test_op_matrix(
+        Upwind3rdOrderBiasedProductC2F,
+        FirstOrderOneSided,
+        (ᶠuvw_vector, ᶜscalar),
+        true,
+    )
+    test_op_matrix(
+        Upwind3rdOrderBiasedProductC2F,
+        ThirdOrderOneSided,
+        (ᶠuvw_vector, ᶜscalar),
+        true,
+    )
+    test_op_matrix(AdvectionC2C, SetValue, (ᶠuvw_vector, ᶜnested))
+    test_op_matrix(AdvectionC2C, Extrapolate, (ᶠuvw_vector, ᶜnested))
+    test_op_matrix(AdvectionF2F, Nothing, (ᶠuvw_vector, ᶠnested), true)
+    test_op_matrix(FluxCorrectionC2C, Extrapolate, (ᶠuvw_vector, ᶜnested))
+    test_op_matrix(FluxCorrectionF2F, Extrapolate, (ᶜuvw_vector, ᶠnested))
+    test_op_matrix(GradientC2F, Nothing, (ᶜscalar,), true)
+    test_op_matrix(GradientC2F, SetValue, (ᶜscalar,))
+    test_op_matrix(GradientC2F, SetGradient, (ᶜscalar,))
+    test_op_matrix(GradientF2C, Nothing, (ᶠscalar,))
+    test_op_matrix(GradientF2C, SetValue, (ᶠscalar,))
+    test_op_matrix(GradientF2C, Extrapolate, (ᶠscalar,))
+    test_op_matrix(DivergenceC2F, Nothing, (ᶜuvw_vector,), true)
+    test_op_matrix(DivergenceC2F, SetValue, (ᶜuvw_vector,))
+    test_op_matrix(DivergenceC2F, SetDivergence, (ᶜuvw_vector,))
+    test_op_matrix(DivergenceF2C, Nothing, (ᶠuvw_vector,))
+    test_op_matrix(DivergenceF2C, SetValue, (ᶠuvw_vector,))
+    test_op_matrix(DivergenceF2C, Extrapolate, (ᶠuvw_vector,))
+    test_op_matrix(CurlC2F, Nothing, (ᶜc12_vector,), true)
+    test_op_matrix(CurlC2F, SetValue, (ᶜc12_vector,))
+    test_op_matrix(CurlC2F, SetCurl, (ᶜc12_vector,))
+end
diff --git a/test/runtests.jl b/test/runtests.jl
index 28967cdd3c..bc0e5c68b6 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -78,6 +78,7 @@ if !Sys.iswindows()
     @safetestset "MatrixFields - BandMatrixRow" begin @time include("MatrixFields/band_matrix_row.jl") end
     @safetestset "MatrixFields - rmul_with_projection" begin @time include("MatrixFields/rmul_with_projection.jl") end
     @safetestset "MatrixFields - field2arrays" begin @time include("MatrixFields/field2arrays.jl") end
+    @safetestset "MatrixFields - operator matrices" begin @time include("MatrixFields/operator_matrices.jl") end
     # now part of buildkite
     # @safetestset "MatrixFields - matrix field broadcasting" begin @time include("MatrixFields/matrix_field_broadcasting.jl") end