diff --git a/.gitignore b/.gitignore
index a87ed2d..7b013f5 100644
--- a/.gitignore
+++ b/.gitignore
@@ -23,3 +23,4 @@ multid-static
 trace.*
 *.log
 *problem_3D_100.npy
+D/multigrid_old
diff --git a/D/GSRBBenchmark.d b/D/GSRBBenchmark.d
index 6209667..f67f8b8 100644
--- a/D/GSRBBenchmark.d
+++ b/D/GSRBBenchmark.d
@@ -20,7 +20,7 @@ void main(string[] argv)
     void warmup()
     {
         auto UF1 = npyload!(double, 2)(i.default_path);
-        GS_RB!(double, 2, iterations, iterations + 10, 1e-8, SweepType.field)(UF1[1].slice, UF1[0].slice, 1);
+        GS_RB!(SweepType.ndslice)(UF1[1].slice, UF1[0].slice, 1, iterations, iterations + 10, 1e-8);
     }
 
     const uint dim = getDim(i.path);
@@ -32,13 +32,16 @@ void main(string[] argv)
     switch (i.sweep)
     {
     case "slice":
-        GS_RB!(double, 2, iterations, iterations + 10, 1e-8, SweepType.slice)(UF[1].slice, UF[0].slice, 1);
+        GS_RB!(SweepType.slice)(UF[1].slice, UF[0].slice, 1, iterations, iterations + 10, 1e-8);
         break;
     case "naive":
-        GS_RB!(double, 2, iterations, iterations + 10, 1e-8, SweepType.naive)(UF[1].slice, UF[0].slice, 1);
+        GS_RB!(SweepType.naive)(UF[1].slice, UF[0].slice, 1, iterations, iterations + 10, 1e-8);
+        break;
+    case "field":
+        GS_RB!(SweepType.field)(UF[1].slice, UF[0].slice, 1, iterations, iterations + 10, 1e-8);
         break;
     default:
-        GS_RB!(double, 2, iterations, iterations + 10, 1e-8, SweepType.field)(UF[1].slice, UF[0].slice, 1);
+        GS_RB!(SweepType.ndslice)(UF[1].slice, UF[0].slice, 1, iterations, iterations + 10, 1e-8);
 
     }
     i.print_time();
diff --git a/D/app.d b/D/app.d
index 741e383..29cde66 100644
--- a/D/app.d
+++ b/D/app.d
@@ -19,7 +19,7 @@ void main(string[] argv)
     void warmup()
     {
         auto UF1 = npyload!(double, 2)(i.default_path);
-        poisson_multigrid!(double, 2, 2, 2)(UF1[1].slice, UF1[0].slice, 0, 1, 1);
+        poisson_multigrid(UF1[1].slice, UF1[0].slice, 0, 1, 2, 2, 1);
     }
 
     const uint dim = getDim(i.path);
@@ -30,19 +30,19 @@ void main(string[] argv)
         auto UF = npyload!(double, 1)(i.path);
         warmup();
         i.wait_till();
-        poisson_multigrid!(double, 1, 2, 2)(UF[1].slice, UF[0].slice, 0, 1, 100, i.sweep);
+        poisson_multigrid(UF[1].slice, UF[0].slice, 0, 1, 2, 2, 100, i.sweep);
         break;
     case 2:
         auto UF = npyload!(double, 2)(i.path);
         warmup();
         i.wait_till();
-        poisson_multigrid!(double, 2, 2, 2)(UF[1].slice, UF[0].slice, 0, 1, 100, i.sweep);
+        poisson_multigrid(UF[1].slice, UF[0].slice, 0, 1, 2, 2, 100, i.sweep);
         break;
     case 3:
         auto UF = npyload!(double, 3)(i.path);
         warmup();
         i.wait_till();
-        poisson_multigrid!(double, 3, 2, 2)(UF[1].slice, UF[0].slice, 0, 1, 100, i.sweep);
+        poisson_multigrid(UF[1].slice, UF[0].slice, 0, 1, 2, 2, 100, i.sweep);
         break;
     default:
         throw new Exception("wrong dimension!");
diff --git a/D/dub.json b/D/dub.json
index 938035a..377ff60 100644
--- a/D/dub.json
+++ b/D/dub.json
@@ -2,10 +2,9 @@
   "name": "multid",
 
   "dependencies": {
-    "mir-algorithm": "~>3.9.6",
+    "mir-algorithm": "~>3.10.11",
     "mir-random": "~>2.2.14",
-    "numir": "~>2.0.5",
-    "pretty_array": "~>1.0.2"
+    "numir": "~>2.0.5"
   },
   "configurations": [
     {
@@ -13,7 +12,7 @@
       "targetName": "multigrid",
       "mainSourceFile": "app.d",
       "compiler": "ldc",
-      "dflags-ldc": ["-mcpu=native", "--boundscheck=off"],
+      "dflags-ldc": ["-mcpu=native"],
       "targetType": "executable"
     },
     {
@@ -22,13 +21,13 @@
       "targetType": "executable",
       "targetName": "multid-static",
       "compiler": "ldc",
-      "dflags-ldc": ["--static"]
+      "dflags-ldc": ["-mcpu=native", "--static"]
     },
     {
       "name": "gsrb",
       "mainSourceFile": "GSRBBenchmark.d",
       "targetName": "gsrb",
-      "dflags-ldc": ["-mcpu=native", "--boundscheck=off"],
+      "dflags-ldc": ["-mcpu=native"],
       "targetType": "executable",
       "compiler": "ldc"
     }
diff --git a/D/source/loadproblem.d b/D/source/loadproblem.d
index 85795dc..ce8c4ec 100644
--- a/D/source/loadproblem.d
+++ b/D/source/loadproblem.d
@@ -1,10 +1,10 @@
 module loadproblem;
 
+import mir.conv : to;
 import mir.ndslice;
 import numir.io;
 import std.regex;
 import std.stdio;
-import std.conv : to;
 
 // /++
 // Implementation of npy loader
diff --git a/D/source/multid/gaussseidel/redblack.d b/D/source/multid/gaussseidel/redblack.d
index 62cc13b..7b138a4 100644
--- a/D/source/multid/gaussseidel/redblack.d
+++ b/D/source/multid/gaussseidel/redblack.d
@@ -1,27 +1,18 @@
 module multid.gaussseidel.redblack;
 
+import mir.math: fastmath, approxEqual;
+import mir.algorithm.iteration: Chequer, all;
+import mir.ndslice : slice, uninitSlice, sliced, Slice, strided;
+import multid.gaussseidel.sweep;
 import multid.tools.apply_poisson : compute_residual;
 import multid.tools.norm : nrmL2;
-
-import mir.ndslice : slice, sliced, Slice, strided;
-import std.traits : isFloatingPoint;
-
-import multid.gaussseidel.sweep;
 import std.experimental.logger;
-
-/++
-    red is for even indicies
-    black for the odd
-+/
-enum Color
-{
-    red = 1u,
-    black = 0u
-}
+import std.traits : isFloatingPoint;
 
 /++ enum to differentiate between sweep types +/
 enum SweepType
 {
+    ndslice = "ndslice",
     field = "field",
     slice = "slice",
     naive = "naive"
@@ -38,27 +29,55 @@ it solves AU = F, with A being a poisson matrix like this
         1 .. 1 1 1 1
 so the borders of U remain unchanged
 Params:
-    U = slice of dimension Dim
     F = slice of dimension Dim
+    U = slice of dimension Dim
+    R = slice of dimension Dim
     h = the distance between the grid points
 Returns: U
 +/
-Slice!(T*, Dim) GS_RB(T, size_t Dim, size_t max_iter = 10_000_000, size_t norm_iter = 1_000,
-        double eps = 1e-8, SweepType sweeptype = SweepType.field)(in Slice!(T*, Dim) F, Slice!(T*, Dim) U, in T h)
-        if (1 <= Dim && Dim <= 3 && isFloatingPoint!T)
+
+Slice!(T*, Dim) GS_RB(SweepType sweeptype = SweepType.ndslice, T, size_t Dim)(
+    Slice!(const(T)*, Dim) F,
+    Slice!(T*, Dim) U,
+    const T h,
+    size_t max_iter = 10_000_000,
+    size_t norm_iter = 1_000,
+    double eps = 1e-8,
+    )
+    if ((1 <= Dim && Dim <= 8) && isFloatingPoint!T)
+{
+    auto R = U.shape.slice!T;
+    T norm;
+    auto it = GS_RB!sweeptype(F, U, R, h, norm, max_iter, norm_iter, eps);
+    logf("GS_RB converged after %d iterations with %e error", it, norm);
+    return U;
+}
+
+@nogc @fastmath
+size_t GS_RB(SweepType sweeptype = SweepType.ndslice, T, size_t Dim)(
+    Slice!(const(T)*, Dim) F,
+    Slice!(T*, Dim) U,
+    Slice!(T*, Dim) R, //residual
+    const T h,
+    out T norm,
+    size_t max_iter = 10_000_000,
+    size_t norm_iter = 1_000,
+    double eps = 1e-8,
+    )
+    if ((1 <= Dim && Dim <= 8) && isFloatingPoint!T)
 {
     mixin("alias sweep = sweep_" ~ sweeptype ~ ";");
 
     const T h2 = h * h;
-    T norm = 0;
-    size_t it = 0;
-
+    size_t it;
+    norm = 0;
     while (it < max_iter)
     {
         it++;
         if (it % norm_iter == 0)
         {
-            norm = compute_residual!(T, Dim)(F, U, h).nrmL2;
+            compute_residual(R, F, U, h);
+            norm = R.nrmL2;
             if (norm <= eps)
             {
                 break;
@@ -66,12 +85,11 @@ Slice!(T*, Dim) GS_RB(T, size_t Dim, size_t max_iter = 10_000_000, size_t norm_i
 
         }
         // rote Halbiteration
-        sweep!(T, Dim, Color.red)(F, U, h2);
+        sweep(Chequer.red, F, U, h2);
         // schwarze Halbiteration
-        sweep!(T, Dim, Color.black)(F, U, h2);
+        sweep(Chequer.black, F, U, h2);
     }
-    logf("GS_RB converged after %d iterations with %e error", it, norm);
-    return U;
+    return it;
 }
 
 unittest
@@ -80,7 +98,7 @@ unittest
     auto U1 = slice!double([N], 1.0);
     auto F1 = slice!double([N], 0.0);
     F1[1] = 1;
-    GS_RB!(double, 1, 1)(F1, U1, 1.0);
+    GS_RB(F1, U1, 1.0, 1);
     assert(U1 == [1.0, 1.0 / 2.0, 1.0].sliced);
 
     auto U2 = slice!double([N, N], 1.0);
@@ -89,16 +107,17 @@ unittest
 
     auto expected = slice!double([N, N], 1.0);
     expected[1, 1] = 3.0 / 4.0;
-    GS_RB!(double, 2, 1)(F2, U2, 1.0);
+    GS_RB(F2, U2, 1.0, 1);
     assert(expected == U2);
 
     auto U3 = slice!double([N, N, N], 1.0);
     auto F3 = slice!double([N, N, N], 0.0);
     F3[1, 1, 1] = 1;
-    GS_RB!(double, 3, 1)(F3, U3, 1.0);
+    GS_RB(F3, U3, 1.0, 1);
 
     auto expected3 = slice!double([N, N, N], 1.0);
-    expected3[1, 1, 1] = 5.0 / 6.0;
+    expected3[1, 1, 1] = 5.0;
+    expected3[1, 1, 1] *= 1 / 6.0;
     assert(expected3 == U3);
 
 }
@@ -114,19 +133,24 @@ unittest
     auto U = randomMatrix!(double, 1)(N);
     auto U1 = U.dup;
     auto U2 = U.dup;
-    const auto F = slice!double([N], 1.0);
+    auto U3 = U.dup;
+    const F = slice!double([N], 1.0);
 
-    sweep_naive!(double, 1, Color.red)(F, U, h2);
-    sweep_field!(double, 1, Color.red)(F, U1, h2);
-    sweep_slice!(double, 1, Color.red)(F, U2, h2);
+    sweep_naive(Chequer.red, F, U, h2);
+    sweep_field(Chequer.red, F, U1, h2);
+    sweep_slice(Chequer.red, F, U2, h2);
+    sweep_ndslice(Chequer.red, F, U3, h2);
     assert(U == U1);
     assert(U1 == U2);
+    assert(all!approxEqual(U, U3));
 
-    sweep_naive!(double, 1, Color.black)(F, U, h2);
-    sweep_field!(double, 1, Color.black)(F, U1, h2);
-    sweep_slice!(double, 1, Color.black)(F, U2, h2);
+    sweep_naive(Chequer.black, F, U, h2);
+    sweep_field(Chequer.black, F, U1, h2);
+    sweep_slice(Chequer.black, F, U2, h2);
+    sweep_ndslice(Chequer.black, F, U3, h2);
     assert(U == U1);
     assert(U1 == U2);
+    assert(all!approxEqual(U, U3));
 
 }
 
@@ -141,19 +165,24 @@ unittest
     auto U = randomMatrix!(double, 2)(N);
     auto U1 = U.dup;
     auto U2 = U.dup;
-    const auto F = slice!double([N, N], 1.0);
+    auto U3 = U.dup;
+    const F = slice!double([N, N], 1.0);
 
-    sweep_naive!(double, 2, Color.red)(F, U, h2);
-    sweep_field!(double, 2, Color.red)(F, U1, h2);
-    sweep_slice!(double, 2, Color.red)(F, U2, h2);
+    sweep_naive(Chequer.red, F, U, h2);
+    sweep_field(Chequer.red, F, U1, h2);
+    sweep_slice(Chequer.red, F, U2, h2);
+    sweep_ndslice(Chequer.red, F, U3, h2);
     assert(U == U1);
     assert(U1 == U2);
+    assert(all!approxEqual(U, U3));
 
-    sweep_naive!(double, 2, Color.black)(F, U, h2);
-    sweep_field!(double, 2, Color.black)(F, U1, h2);
-    sweep_slice!(double, 2, Color.black)(F, U2, h2);
+    sweep_naive(Chequer.black, F, U, h2);
+    sweep_field(Chequer.black, F, U1, h2);
+    sweep_slice(Chequer.black, F, U2, h2);
+    sweep_ndslice(Chequer.black, F, U3, h2);
     assert(U == U1);
     assert(U1 == U2);
+    assert(all!approxEqual(U, U3));
 }
 
 unittest
@@ -165,18 +194,25 @@ unittest
     auto U = randomMatrix!(double, 3)(N);
     auto U1 = U.dup;
     auto U2 = U.dup;
-    const auto F = slice!double([N, N, N], 1.0);
+    auto U3 = U.dup;
+    const F = slice!double([N, N, N], 1.0);
     const double h2 = 1.0;
 
-    sweep_naive!(double, 3, Color.red)(F, U, h2);
-    sweep_field!(double, 3, Color.red)(F, U1, h2);
-    sweep_slice!(double, 3, Color.red)(F, U2, h2);
+    sweep_naive(Chequer.red, F, U, h2);
+    sweep_field(Chequer.red, F, U1, h2);
+    sweep_slice(Chequer.red, F, U2, h2);
+    sweep_ndslice(Chequer.red, F, U3, h2);
+    // import std.stdio;
+    // writeln(U - U1);
     assert(U == U1);
     assert(U1 == U2);
+    assert(all!approxEqual(U, U3));
 
-    sweep_naive!(double, 3, Color.black)(F, U, h2);
-    sweep_field!(double, 3, Color.black)(F, U1, h2);
-    sweep_slice!(double, 3, Color.black)(F, U2, h2);
+    sweep_naive(Chequer.black, F, U, h2);
+    sweep_field(Chequer.black, F, U1, h2);
+    sweep_slice(Chequer.black, F, U2, h2);
+    sweep_ndslice(Chequer.black, F, U3, h2);
     assert(U == U1);
     assert(U1 == U2);
+    assert(all!approxEqual(U, U3));
 }
diff --git a/D/source/multid/gaussseidel/sweep.d b/D/source/multid/gaussseidel/sweep.d
index a44dcb5..a28c9b9 100644
--- a/D/source/multid/gaussseidel/sweep.d
+++ b/D/source/multid/gaussseidel/sweep.d
@@ -1,7 +1,19 @@
 module multid.gaussseidel.sweep;
 
-import mir.ndslice : slice, sliced, Slice, strided;
-import multid.gaussseidel.redblack : Color;
+import mir.math: fastmath;
+import mir.algorithm.iteration: Chequer, each;
+import mir.ndslice : assumeSameShape, slice, sliced, Slice, SliceKind, strided, dropBorders, withNeighboursSum, zip;
+
+
+@nogc @fastmath
+void sweep_ndslice(T, size_t N)(Chequer color, Slice!(const(T)*, N) F, Slice!(T*, N) U, const T h2) nothrow
+{
+    // find the naive implementation R/B order
+    color = N % 2 ? cast(Chequer)!color : color;
+    assumeSameShape(F, U);
+    color.each!(z => z.a.a = (T(1) / (2 * N)) * (z.a.b - h2 * z.b))
+        (U.withNeighboursSum.zip!true(F.dropBorders));
+}
 
 /++
 This is a sweep implementation for 1D
@@ -12,15 +24,15 @@ Params:
     U  = slice of dimension Dim
     h2 = the squared distance between the grid points
 +/
-@nogc
-void sweep_field(T, size_t Dim : 1, Color color)(in Slice!(T*, 1) F, Slice!(T*, 1) U, in T h2) nothrow
+@nogc @fastmath
+void sweep_field(T)(Chequer color, Slice!(const(T)*, 1) F, Slice!(T*, 1) U, const T h2) nothrow
 {
-    const auto N = F.shape[0];
+    const N = F.shape[0];
     auto UF = U.field;
     auto FF = F.field;
     for (size_t i = 2u - color; i < N - 1u; i += 2u)
     {
-        UF[i] = (UF[i - 1u] + UF[i + 1u] - FF[i] * h2) / 2.0;
+        UF[i] = (UF[i - 1u] + UF[i + 1u] - FF[i] * h2) / 2;
     }
 }
 
@@ -33,11 +45,11 @@ Params:
     U  = slice of dimension Dim
     h2 = the squared distance between the grid points
 +/
-@nogc
-void sweep_field(T, size_t Dim : 2, Color color)(in Slice!(T*, 2) F, Slice!(T*, 2) U, in T h2) nothrow
+@nogc @fastmath
+void sweep_field(T)(Chequer color, Slice!(const(T)*, 2) F, Slice!(T*, 2) U, const T h2) nothrow
 {
-    const auto m = F.shape[0];
-    const auto n = F.shape[1];
+    const m = F.shape[0];
+    const n = F.shape[1];
     auto UF = U.field;
     auto FF = F.field;
 
@@ -51,7 +63,7 @@ void sweep_field(T, size_t Dim : 2, Color color)(in Slice!(T*, 2) F, Slice!(T*,
                     UF[flatindex - m] +
                     UF[flatindex + m] +
                     UF[flatindex - 1] +
-                    UF[flatindex + 1] - h2 * FF[flatindex]) / cast(T) 4;
+                    UF[flatindex + 1] - h2 * FF[flatindex]) / 4;
         }
     }
 }
@@ -65,19 +77,19 @@ Params:
     U  = slice of dimension Dim
     h2 = the squared distance between the grid points
 +/
-@nogc
-void sweep_field(T, size_t Dim : 3, Color color)(in Slice!(T*, 3) F, Slice!(T*, 3) U, in T h2) nothrow
+@nogc @fastmath
+void sweep_field(T)(Chequer color, Slice!(const(T)*, 3) F, Slice!(T*, 3) U, const T h2) nothrow
 {
-    const auto m = F.shape[0];
-    const auto n = F.shape[1];
-    const auto l = F.shape[2];
+    const m = F.shape[0];
+    const n = F.shape[1];
+    const l = F.shape[2];
     auto UF = U.field;
     auto FF = F.field;
     foreach (i; 1 .. m - 1)
     {
         foreach (j; 1 .. n - 1)
         {
-            const auto flatindex2d = i * (n * l) + j * l;
+            const flatindex2d = i * (n * l) + j * l;
             for (size_t k = 1u + (i + j + 1 + color) % 2; k < l - 1u; k += 2)
             {
                 const flatindex = flatindex2d + k;
@@ -87,146 +99,150 @@ void sweep_field(T, size_t Dim : 3, Color color)(in Slice!(T*, 3) F, Slice!(T*,
                         UF[flatindex - l] +
                         UF[flatindex + l] +
                         UF[flatindex - 1] +
-                        UF[flatindex + 1] - h2 * FF[flatindex]) / 6.0;
+                        UF[flatindex + 1] - h2 * FF[flatindex]) * (T(1) / 6);
             }
         }
     }
 }
 
+private struct SweepKernel(T, size_t Dim)
+{
+    import std.meta: Repeat;
+
+    T h2;
+
+    this(T h2)
+    {
+        this.h2 = h2;
+    }
+
+    @fastmath
+    void opCall()(ref scope T r, ref scope const Repeat!(2 * Dim, T) neighbors, ref scope const T f) const
+    {
+        T sum = neighbors[0];
+        foreach (ref neighbor; neighbors[1 .. $])
+            sum += neighbor;
+        r = (sum - f * h2) * (T(1) / neighbors.length);
+    }
+}
+
 /++ slow sweep for 1D +/
-@nogc
-void sweep_slice(T, size_t Dim : 1, Color color)(in Slice!(T*, 1) F, Slice!(T*, 1) U, in T h2) nothrow
+@nogc @fastmath
+void sweep_slice(T)(Chequer color, Slice!(const(T)*, 1) F, Slice!(T*, 1) U, const T h2) nothrow
 {
-    U[2 - color .. $ - 1].strided!0(2)[] = (
-            U[1 - color .. $ - 2].strided!0(2) + U[3 - color .. $].strided!0(
-            2) - h2 * F[2 - color .. $ - 1].strided!0(2)) / 2.0;
+    assumeSameShape(F, U);
+    auto kernel = SweepKernel!(T, 1)(h2);
+
+    each!kernel(
+        U[2 - color .. $ - 1].strided(2),
+        U[1 - color .. $ - 2].strided(2),
+        U[3 - color .. $].strided(2),
+        F[2 - color .. $ - 1].strided(2));
 }
 
 /++ slow sweep for 2D +/
-@nogc
-void sweep_slice(T, size_t Dim : 2, Color color)(in Slice!(T*, 2) F, Slice!(T*, 2) U, in T h2) nothrow
+@nogc @fastmath
+void sweep_slice(T)(Chequer color, Slice!(const(T)*, 2) F, Slice!(T*, 2) U, const T h2) nothrow
 {
-    const auto m = F.shape[0];
-    const auto n = F.shape[1];
-    auto strideU = U[1 .. m - 1, 1 + color .. n - 1].strided!(0, 1)(2, 2);
-    strideU[] = U[0 .. m - 2, 1 + color .. n - 1].strided!(0, 1)(2, 2);
-    strideU[] += U[2 .. m, 1 + color .. n - 1].strided!(0, 1)(2, 2);
-    strideU[] += U[1 .. m - 1, color .. n - 2].strided!(0, 1)(2, 2);
-    strideU[] += U[1 .. m - 1, 2 + color .. n].strided!(0, 1)(2, 2);
-    strideU[] -= F[1 .. m - 1, 1 + color .. n - 1].strided!(0, 1)(2, 2) * h2;
-    strideU[] /= cast(T) 4;
-
-    strideU = U[2 .. m - 1, 2 - color .. n - 1].strided!(0, 1)(2, 2);
-    strideU[] = U[1 .. m - 2, 2 - color .. n - 1].strided!(0, 1)(2, 2);
-    strideU[] += U[3 .. m, 2 - color .. n - 1].strided!(0, 1)(2, 2);
-    strideU[] += U[2 .. m - 1, 1 - color .. n - 2].strided!(0, 1)(2, 2);
-    strideU[] += U[2 .. m - 1, 3 - color .. n].strided!(0, 1)(2, 2);
-    strideU[] -= F[2 .. m - 1, 2 - color .. n - 1].strided!(0, 1)(2, 2) * h2;
-    strideU[] /= cast(T) 4;
+    assumeSameShape(F, U);
+    auto kernel = SweepKernel!(T, 2)(h2);
+
+    each!kernel(
+        U[1 .. $ - 1, 1 + color .. $ - 1].strided(2),
+        U[0 .. $ - 2, 1 + color .. $ - 1].strided(2),
+        U[2 .. $, 1 + color .. $ - 1].strided(2),
+        U[1 .. $ - 1, color .. $ - 2].strided(2),
+        U[1 .. $ - 1, 2 + color .. $].strided(2),
+        F[1 .. $ - 1, 1 + color .. $ - 1].strided(2));
+
+    each!kernel(
+        U[2 .. $ - 1, 2 - color .. $ - 1].strided(2),
+        U[1 .. $ - 2, 2 - color .. $ - 1].strided(2),
+        U[3 .. $, 2 - color .. $ - 1].strided(2),
+        U[2 .. $ - 1, 1 - color .. $ - 2].strided(2),
+        U[2 .. $ - 1, 3 - color .. $].strided(2),
+        F[2 .. $ - 1, 2 - color .. $ - 1].strided(2));
 }
+
 /++ slow sweep for 3D +/
-@nogc
-void sweep_slice(T, size_t Dim : 3, Color color)(in Slice!(T*, 3) F, Slice!(T*, 3) U, in T h2) nothrow
+@nogc @fastmath
+void sweep_slice(T)(Chequer color, Slice!(const(T)*, 3) F, Slice!(T*, 3) U, const T h2) nothrow
 {
-    const auto m = F.shape[0];
-    const auto n = F.shape[1];
-    const auto o = F.shape[2];
-
-    auto strideU = U[2 .. m - 1, 1 .. n - 1, 1 + color .. o - 1].strided!(0, 1, 2)(2, 2, 2);
-    strideU[] = U[1 .. m - 2, 1 .. n - 1, 1 + color .. o - 1].strided!(0, 1, 2)(2, 2, 2);
-    strideU[] += U[3 .. m, 1 .. n - 1, 1 + color .. o - 1].strided!(0, 1, 2)(2, 2, 2);
-    strideU[] += U[2 .. m - 1, 0 .. n - 2, 1 + color .. o - 1].strided!(0, 1, 2)(2, 2, 2);
-    strideU[] += U[2 .. m - 1, 2 .. n, 1 + color .. o - 1].strided!(0, 1, 2)(2, 2, 2);
-    strideU[] += U[2 .. m - 1, 1 .. n - 1, color .. o - 2].strided!(0, 1, 2)(2, 2, 2);
-    strideU[] += U[2 .. m - 1, 1 .. n - 1, 2 + color .. o].strided!(0, 1, 2)(2, 2, 2);
-    strideU[] -= F[2 .. m - 1, 1 .. n - 1, 1 + color .. o - 1].strided!(0, 1, 2)(2, 2, 2) * h2;
-    strideU[] /= cast(T) 6;
-
-    strideU = U[1 .. m - 1, 1 .. n - 1, 2 - color .. o - 1].strided!(0, 1, 2)(2, 2, 2);
-    strideU[] = U[0 .. m - 2, 1 .. n - 1, 2 - color .. o - 1].strided!(0, 1, 2)(2, 2, 2);
-    strideU[] += U[2 .. m, 1 .. n - 1, 2 - color .. o - 1].strided!(0, 1, 2)(2, 2, 2);
-    strideU[] += U[1 .. m - 1, 0 .. n - 2, 2 - color .. o - 1].strided!(0, 1, 2)(2, 2, 2);
-    strideU[] += U[1 .. m - 1, 2 .. n, 2 - color .. o - 1].strided!(0, 1, 2)(2, 2, 2);
-    strideU[] += U[1 .. m - 1, 1 .. n - 1, 1 - color .. o - 2].strided!(0, 1, 2)(2, 2, 2);
-    strideU[] += U[1 .. m - 1, 1 .. n - 1, 3 - color .. o].strided!(0, 1, 2)(2, 2, 2);
-    strideU[] -= F[1 .. m - 1, 1 .. n - 1, 2 - color .. o - 1].strided!(0, 1, 2)(2, 2, 2) * h2;
-    strideU[] /= cast(T) 6;
-
-    strideU = U[1 .. m - 1, 2 .. n - 1, 1 + color .. o - 1].strided!(0, 1, 2)(2, 2, 2);
-    strideU[] = U[0 .. m - 2, 2 .. n - 1, 1 + color .. o - 1].strided!(0, 1, 2)(2, 2, 2);
-    strideU[] += U[2 .. m, 2 .. n - 1, 1 + color .. o - 1].strided!(0, 1, 2)(2, 2, 2);
-    strideU[] += U[1 .. m - 1, 1 .. n - 2, 1 + color .. o - 1].strided!(0, 1, 2)(2, 2, 2);
-    strideU[] += U[1 .. m - 1, 3 .. n, 1 + color .. o - 1].strided!(0, 1, 2)(2, 2, 2);
-    strideU[] += U[1 .. m - 1, 2 .. n - 1, color .. o - 2].strided!(0, 1, 2)(2, 2, 2);
-    strideU[] += U[1 .. m - 1, 2 .. n - 1, 2 + color .. o].strided!(0, 1, 2)(2, 2, 2);
-    strideU[] -= F[1 .. m - 1, 2 .. n - 1, 1 + color .. o - 1].strided!(0, 1, 2)(2, 2, 2) * h2;
-    strideU[] /= cast(T) 6;
-
-    strideU = U[2 .. m - 1, 2 .. n - 1, 2 - color .. o - 1].strided!(0, 1, 2)(2, 2, 2);
-    strideU[] = U[1 .. m - 2, 2 .. n - 1, 2 - color .. o - 1].strided!(0, 1, 2)(2, 2, 2);
-    strideU[] += U[3 .. m, 2 .. n - 1, 2 - color .. o - 1].strided!(0, 1, 2)(2, 2, 2);
-    strideU[] += U[2 .. m - 1, 1 .. n - 2, 2 - color .. o - 1].strided!(0, 1, 2)(2, 2, 2);
-    strideU[] += U[2 .. m - 1, 3 .. n, 2 - color .. o - 1].strided!(0, 1, 2)(2, 2, 2);
-    strideU[] += U[2 .. m - 1, 2 .. n - 1, 1 - color .. o - 2].strided!(0, 1, 2)(2, 2, 2);
-    strideU[] += U[2 .. m - 1, 2 .. n - 1, 3 - color .. o].strided!(0, 1, 2)(2, 2, 2);
-    strideU[] -= F[2 .. m - 1, 2 .. n - 1, 2 - color .. o - 1].strided!(0, 1, 2)(2, 2, 2) * h2;
-    strideU[] /= cast(T) 6;
-
+    assumeSameShape(F, U);
+    auto kernel = SweepKernel!(T, 3)(h2);
+
+    each!kernel(
+        U[2 .. $ - 1, 1 .. $ - 1, 1 + color .. $ - 1].strided(2),
+        U[1 .. $ - 2, 1 .. $ - 1, 1 + color .. $ - 1].strided(2),
+        U[3 .. $, 1 .. $ - 1, 1 + color .. $ - 1].strided(2),
+        U[2 .. $ - 1, 0 .. $ - 2, 1 + color .. $ - 1].strided(2),
+        U[2 .. $ - 1, 2 .. $, 1 + color .. $ - 1].strided(2),
+        U[2 .. $ - 1, 1 .. $ - 1, color .. $ - 2].strided(2),
+        U[2 .. $ - 1, 1 .. $ - 1, 2 + color .. $].strided(2),
+        F[2 .. $ - 1, 1 .. $ - 1, 1 + color .. $ - 1].strided(2));
+
+    each!kernel(
+        U[1 .. $ - 1, 1 .. $ - 1, 2 - color .. $ - 1].strided(2),
+        U[0 .. $ - 2, 1 .. $ - 1, 2 - color .. $ - 1].strided(2),
+        U[2 .. $, 1 .. $ - 1, 2 - color .. $ - 1].strided(2),
+        U[1 .. $ - 1, 0 .. $ - 2, 2 - color .. $ - 1].strided(2),
+        U[1 .. $ - 1, 2 .. $, 2 - color .. $ - 1].strided(2),
+        U[1 .. $ - 1, 1 .. $ - 1, 1 - color .. $ - 2].strided(2),
+        U[1 .. $ - 1, 1 .. $ - 1, 3 - color .. $].strided(2),
+        F[1 .. $ - 1, 1 .. $ - 1, 2 - color .. $ - 1].strided(2));
+
+    each!kernel(
+        U[1 .. $ - 1, 2 .. $ - 1, 1 + color .. $ - 1].strided(2),
+        U[0 .. $ - 2, 2 .. $ - 1, 1 + color .. $ - 1].strided(2),
+        U[2 .. $, 2 .. $ - 1, 1 + color .. $ - 1].strided(2),
+        U[1 .. $ - 1, 1 .. $ - 2, 1 + color .. $ - 1].strided(2),
+        U[1 .. $ - 1, 3 .. $, 1 + color .. $ - 1].strided(2),
+        U[1 .. $ - 1, 2 .. $ - 1, color .. $ - 2].strided(2),
+        U[1 .. $ - 1, 2 .. $ - 1, 2 + color .. $].strided(2),
+        F[1 .. $ - 1, 2 .. $ - 1, 1 + color .. $ - 1].strided(2));
+
+    each!kernel(
+        U[2 .. $ - 1, 2 .. $ - 1, 2 - color .. $ - 1].strided(2),
+        U[1 .. $ - 2, 2 .. $ - 1, 2 - color .. $ - 1].strided(2),
+        U[3 .. $, 2 .. $ - 1, 2 - color .. $ - 1].strided(2),
+        U[2 .. $ - 1, 1 .. $ - 2, 2 - color .. $ - 1].strided(2),
+        U[2 .. $ - 1, 3 .. $, 2 - color .. $ - 1].strided(2),
+        U[2 .. $ - 1, 2 .. $ - 1, 1 - color .. $ - 2].strided(2),
+        U[2 .. $ - 1, 2 .. $ - 1, 3 - color .. $].strided(2),
+        F[2 .. $ - 1, 2 .. $ - 1, 2 - color .. $ - 1].strided(2));
 }
 
 /++ naive sweep for 1D +/
-@nogc
-void sweep_naive(T, size_t Dim : 1, Color color)(const Slice!(T*, 1) F, Slice!(T*, 1) U, T h2) nothrow
+@nogc @fastmath
+void sweep_naive(T)(Chequer color, Slice!(const(T)*, 1) F, Slice!(T*, 1) U, const T h2) nothrow
 {
-
-    const auto n = F.shape[0];
-    foreach (i; 1 .. n - 1)
-    {
+    foreach (i; 1 .. F.length - 1)
         if (i % 2 == color)
-        {
-            U[i] = (U[i - 1u] + U[i + 1u] - F[i] * h2) / 2.0;
-        }
-    }
-
+            U[i] = (U[i - 1u] + U[i + 1u] - F[i] * h2) / 2;
 }
 /++ naive sweep for 2D +/
-@nogc
-void sweep_naive(T, size_t Dim : 2, Color color)(const Slice!(T*, 2) F, Slice!(T*, 2) U, T h2) nothrow
+@nogc @fastmath
+void sweep_naive(T)(Chequer color, Slice!(const(T)*, 2) F, Slice!(T*, 2) U, const T h2) nothrow
 {
-    const auto n = F.shape[0];
-    const auto m = F.shape[1];
-
-    foreach (i; 1 .. n - 1)
-    {
-        foreach (j; 1 .. m - 1)
-        {
-            if ((i + j) % 2 == color)
-            {
-                U[i, j] = (U[i - 1, j] + U[i + 1, j] + U[i, j - 1] + U[i, j + 1] - h2 * F[i, j]) / 4.0;
-            }
-        }
-    }
+    foreach (i; 1 .. F.length!0 - 1)
+    foreach (j; 1 .. F.length!1 - 1)
+        if ((i + j) % 2 == color)
+            U[i, j] = (U[i - 1, j] + U[i + 1, j] + U[i, j - 1] + U[i, j + 1] - h2 * F[i, j]) / 4;
 }
 /++ naive sweep for 3D +/
-@nogc
-void sweep_naive(T, size_t Dim : 3, Color color)(const Slice!(T*, 3) F, Slice!(T*, 3) U, T h2) nothrow
+@nogc @fastmath
+void sweep_naive(T)(Chequer color, Slice!(const(T)*, 3) F, Slice!(T*, 3) U, const T h2) nothrow
 {
-    const auto n = F.shape[0];
-    const auto m = F.shape[1];
-    const auto l = F.shape[2];
-    for (size_t i = 1u; i < n - 1u; i++)
-    {
-        for (size_t j = 1u; j < m - 1u; j++)
-        {
-            for (size_t k = 1u; k < l - 1u; k++)
-            {
-                if ((i + j + k) % 2 == color)
-                {
-                    U[i, j, k] = (U[i - 1, j, k] + U[i + 1, j, k] + U[i, j - 1,
-                            k] + U[i, j + 1, k] + U[i, j, k - 1] + U[i, j, k + 1] - h2 * F[i, j, k]) / 6.0;
-                }
-            }
-        }
-    }
+    foreach (i; 1 .. F.length!0 - 1)
+    foreach (j; 1 .. F.length!1 - 1)
+    foreach (k; 1 .. F.length!2 - 1)
+        if ((i + j + k) % 2 == color)
+            U[i, j, k] = (T(1) / 6) *
+                ( U[i - 1, j, k]
+                + U[i + 1, j, k]
+                + U[i, j - 1, k]
+                + U[i, j + 1, k]
+                + U[i, j, k - 1]
+                + U[i, j, k + 1]
+                - h2 * F[i, j, k]);
 }
-
diff --git a/D/source/multid/multigrid/cycle.d b/D/source/multid/multigrid/cycle.d
index 32b0ed0..6acca32 100644
--- a/D/source/multid/multigrid/cycle.d
+++ b/D/source/multid/multigrid/cycle.d
@@ -1,67 +1,76 @@
 module multid.multigrid.cycle;
 
-import mir.ndslice : Slice, slice;
-import std.exception : enforce;
-import std.math : log2;
-import std.conv : to;
-import std.traits : isFloatingPoint;
-import multid.multigrid.prolongation : prolongation;
+import mir.conv : to;
+import mir.exception : enforce;
+import mir.math : log2;
+import mir.ndslice : Slice, slice, iota, sliced, uninitSlice;
 import multid.gaussseidel.redblack : SweepType;
+import multid.multigrid.prolongation : prolongation;
+import std.traits : isFloatingPoint;
 
 /++
     Abstract base class for the Cycles it implements the base MG sheme
 +/
-class Cycle(T, size_t Dim) if (1 <= Dim && Dim <= 3 && isFloatingPoint!T)
+class Cycle(T, size_t Dim) if (1 <= Dim && isFloatingPoint!T)
 {
 protected:
     uint mu, l;
-    Slice!(T*, Dim) F;
+    Slice!(const(T)*, Dim) initialF;
+    T[] Rdata;
+    Slice!(T*, Dim + 1)[] temp;
+
+    @nogc final auto R(size_t[Dim] shape)
+    {
+        return Rdata[0 .. shape.iota.elementCount].sliced(shape);
+    }
+
     T h;
 
-    abstract Slice!(T*, Dim) presmooth(Slice!(T*, Dim) F, Slice!(T*, Dim) U, T current_h);
-    abstract Slice!(T*, Dim) postsmooth(Slice!(T*, Dim) F, Slice!(T*, Dim) U, T current_h);
-    abstract Slice!(T*, Dim) compute_residual(Slice!(T*, Dim) F, Slice!(T*, Dim) U, T current_h);
-    abstract Slice!(T*, Dim) solve(Slice!(T*, Dim) F, Slice!(T*, Dim) U, T current_h);
-    abstract Slice!(T*, Dim) restriction(Slice!(T*, Dim) U);
+    abstract @nogc void presmooth(Slice!(const(T)*, Dim) F, Slice!(T*, Dim) U, T current_h);
+    abstract @nogc void postsmooth(Slice!(const(T)*, Dim) F, Slice!(T*, Dim) U, T current_h);
+    abstract @nogc Slice!(T*, Dim) compute_residual(Slice!(const(T)*, Dim) F, Slice!(const(T)*, Dim) U, T current_h);
+    abstract @nogc void solve(Slice!(const(T)*, Dim) F, Slice!(T*, Dim) U, T current_h);
+    abstract @nogc void restriction(Slice!(T*, Dim) res, Slice!(const(T)*, Dim) U);
 
-    Slice!(T*, Dim) compute_correction(Slice!(T*, Dim) r, uint l, T current_h)
+    @nogc void compute_correction(Slice!(T*, Dim) e, Slice!(const(T)*, Dim) r, uint d, T current_h)
     {
-        auto e = slice!T(r.shape, 0);
+        e[] = 0;
         foreach (_; 0 .. mu)
         {
-            e = do_cycle(r, e, l, current_h);
-
+            do_cycle(r, e, d, current_h);
         }
-        return e;
     }
 
     /++ adds the correction vector to the U +/
-    Slice!(T*, Dim) add_correction(Slice!(T*, Dim) U, Slice!(T*, Dim) e)
+    @nogc void add_correction(Slice!(T*, Dim) U, Slice!(const(T)*, Dim) e)
     {
-
-        U.field[] += e.field[];
-        return U;
+        U[] += e;
     }
 
-    Slice!(T*, Dim) do_cycle(Slice!(T*, Dim) F, Slice!(T*, Dim) U, uint l, T current_h)
+    @nogc void do_cycle(Slice!(const(T)*, Dim) F, Slice!(T*, Dim) U, uint d, T current_h)
     {
-        if (l <= 1 || U.shape[0] <= 1)
+        if (d + 1 >= l || U.length <= 1)
         {
-            return solve(F, U, current_h);
+            solve(F, U, current_h);
+            return;
         }
 
-        U = presmooth(F, U, current_h);
+        presmooth(F, U, current_h);
 
         auto r = compute_residual(F, U, current_h);
 
-        r = restriction(r);
+        auto res = temp[d][0];
+        restriction(res, r);
+
+        auto cor = temp[d][1];
+        compute_correction(cor, res, d + 1, current_h * 2);
 
-        auto e = compute_correction(r, l - 1, current_h * 2);
+        auto e = R(U.shape);
+        prolongation(e, cor);
 
-        e = prolongation!(T, Dim)(e, U.shape);
-        U = add_correction(U, e);
+        add_correction(U, e);
 
-        return postsmooth(F, U, current_h);
+        postsmooth(F, U, current_h);
     }
 
 public:
@@ -71,25 +80,37 @@ public:
         F = Dim-slice as righthandsite
         mu = indicator for type of cycle
         l = the depth of the multigrid cycle if it is set to 0, the maxmium depth is choosen
-        h = is the distance between the grid points if set to 0 1 / F.shape[0] is used
+        h = is the distance between the grid points if set to 0 1 / F.length is used
     +/
-    this(Slice!(T*, Dim) F, uint mu, uint l, T h)
+    this(Slice!(const(T)*, Dim) F, uint mu, uint l, T h)
     {
-        enforce(l == 0 || log2(F.shape[0]) > l, "l is to big for F");
-        this.F = F;
+        auto ls = F.shape[0].to!double.log2;
+        enforce!"l is to big for F"(l == 0 || ls > l);
+        this.initialF = F;
+        this.Rdata = F.shape.uninitSlice!double.field;
         this.l = l;
         this.h = h != 0 ? h : 1.0 / F.shape[0];
         this.mu = mu;
         if (this.l == 0)
         {
-            this.l = F.shape[0].log2.to!uint - 1;
+            this.l = ls.to!uint - 1;
         }
+        auto m = F.length;
+        
+        if (m > 1) do
+        {
+            m = m / 2 + 1;
+            size_t[Dim + 1] shape = m;
+            shape[0] = 2;
+            temp ~= shape.uninitSlice!double;
+        }
+        while(m > 2 && temp.length + 1 < this.l);
     }
 
     /++
         This computes the residual
     +/
-    Slice!(T*, Dim) residual(Slice!(T*, Dim) F, Slice!(T*, Dim) U)
+    @nogc Slice!(T*, Dim) residual(Slice!(const(T)*, Dim) F, Slice!(const(T)*, Dim) U)
     {
         return compute_residual(F, U, this.h);
     }
@@ -97,13 +118,13 @@ public:
     /++
         The actual function to caculate a cycle
     +/
-    Slice!(T*, Dim) cycle(Slice!(T*, Dim) U)
+    @nogc void cycle(Slice!(T*, Dim) U)
     {
-        return do_cycle(this.F, U, this.l, this.h);
+        do_cycle(this.initialF, U, 0, this.h);
     }
 
     /++ Computes the l2 norm of U and the inital F+/
-    abstract T norm(Slice!(T*, Dim) U);
+    @nogc abstract T norm(Slice!(const(T)*, Dim) U);
 }
 
 /++ Poisson Cycle:
@@ -113,41 +134,54 @@ public:
     v2 = number of postsmoothing steps
     eps = the epsilon the is used in the cycle esspecially in the solve step as stopcriteria
 +/
-class PoissonCycle(T, size_t Dim, uint v1, uint v2, SweepType sweep = SweepType.field,
-        T eps = 1e-8) : Cycle!(T, Dim) if (1 <= Dim && Dim <= 3 && isFloatingPoint!T)
+final class PoissonCycle(
+    T,
+    size_t Dim,
+    SweepType sweep = SweepType.ndslice,
+) : Cycle!(T, Dim)
+    if (1 <= Dim && (Dim <= 3 || SweepType.ndslice && Dim <= 8) && isFloatingPoint!T)
 {
     import multid.gaussseidel.redblack : GS_RB;
 
 protected:
-    override Slice!(T*, Dim) presmooth(Slice!(T*, Dim) F, Slice!(T*, Dim) U, T current_h)
-    {
+    uint v1;
+    uint v2;
+    T eps = 1e-8;
 
-        return GS_RB!(T, Dim, v1, 1_000, eps, sweep)(F, U, current_h);
+    override void presmooth(Slice!(const(T)*, Dim) F, Slice!(T*, Dim) U, T current_h)
+    {
+        auto r = R(F.shape);
+        T norm;
+        auto it = GS_RB!sweep(F, U, r, current_h, norm, v1, 1_000, eps);
     }
 
-    override Slice!(T*, Dim) postsmooth(Slice!(T*, Dim) F, Slice!(T*, Dim) U, T current_h)
+    override void postsmooth(Slice!(const(T)*, Dim) F, Slice!(T*, Dim) U, T current_h)
     {
-
-        return GS_RB!(T, Dim, v2, 1_000, eps, sweep)(F, U, current_h);
+        auto r = R(F.shape);
+        T norm;
+        auto it = GS_RB!sweep(F, U, r, current_h, norm, v2, 1_000, eps);
     }
 
-    override Slice!(T*, Dim) compute_residual(Slice!(T*, Dim) F, Slice!(T*, Dim) U, T current_h)
+    override Slice!(T*, Dim) compute_residual(Slice!(const(T)*, Dim) F, Slice!(const(T)*, Dim) U, T current_h)
     {
         import ap = multid.tools.apply_poisson;
 
-        return ap.compute_residual!(T, Dim)(F, U, current_h);
+        auto r = R(F.shape);
+        ap.compute_residual!(T, Dim)(r, F, U, current_h);
+        return r;
     }
 
-    override Slice!(T*, Dim) solve(Slice!(T*, Dim) F, Slice!(T*, Dim) U, T current_h)
+    override void solve(Slice!(const(T)*, Dim) F, Slice!(T*, Dim) U, T current_h)
     {
-        return GS_RB!(T, Dim, 100_000, 5, eps, sweep)(F, U, current_h);
+        auto r = R(F.shape);
+        T norm;
+        auto it = GS_RB!sweep(F, U, r, current_h, norm, 100_000, 5, eps);
     }
 
-    override Slice!(T*, Dim) restriction(Slice!(T*, Dim) U)
+    override void restriction(Slice!(T*, Dim) res, Slice!(const(T)*, Dim) U)
     {
         import multid.multigrid.restriction : weighted_restriction;
-
-        return weighted_restriction!(T, Dim)(U);
+        return weighted_restriction(res, U);
     }
 
 public:
@@ -156,25 +190,34 @@ public:
         F = Dim-slice as righthandside
         mu = indicator for type of cycle
         l = the depth of the multigrid cycle if it is set to 0, the maxmium depth is choosen
-        h = is the distance between the grid points if set to 0 1 / F.shape[0] is used
+        h = is the distance between the grid points if set to 0 1 / F.length is used
     +/
-    this(Slice!(T*, Dim) F, uint mu, uint l, T h)
+    this(Slice!(const(T)*, Dim) F,
+        uint mu,
+        uint l,
+        T h,
+        uint v1,
+        uint v2,
+    T eps = 1e-8)
     {
         super(F, mu, l, h);
+        this.v1 = v1;
+        this.v2 = v2;
+        this.eps = eps;
     }
 
-    override T norm(Slice!(T*, Dim) U)
+    override T norm(Slice!(const(T)*, Dim) U)
     {
         import multid.tools.norm : nrmL2;
 
-        auto res = residual(F, U);
+        auto res = residual(initialF, U);
         return nrmL2(res);
     }
 }
 
 unittest
 {
-    import std.algorithm : all;
+    import mir.algorithm.iteration : all;
     import multid.tools.util : randomMatrix;
 
     const size_t N = 10;
@@ -191,21 +234,27 @@ unittest
     import multid.tools.apply_poisson : compute_residual;
     import multid.tools.norm : nrmL2;
 
-    const auto norm_before = compute_residual(F, U, h).nrmL2;
+    const norm_before = compute_residual(F, U, h).nrmL2;
     F[0][0 .. $] = 1.0;
     F[1 .. $, 0] = 1.0;
     F[$ - 1][1 .. $] = 0.0;
     F[1 .. $, $ - 1] = 0.0;
-    auto p = new PoissonCycle!(double, 2, 2, 2)(F, 2, 0, h);
+    auto p = new PoissonCycle!(double, 2)(F, 2, 0, h, 2, 2);
     p.cycle(U);
 
-    const auto norm_after = compute_residual(F, U, h).nrmL2;
+    const norm_after = compute_residual(F, U, h).nrmL2;
 
     assert(U[0][0 .. $].all!"a == 1.0");
     assert(U[1 .. $, 0].all!"a == 1.0");
-    assert(U[$ - 1][1 .. $].all!"a== 0.0");
+    assert(U[$ - 1][1 .. $].all!"a == 0.0");
     assert(U[1 .. $, $ - 1].all!"a == 0.0");
 
     // it should be at least a bit smaller than before
     assert(norm_after <= norm_before);
 }
+
+unittest
+{
+    // check we can compile 5-dimensional algorithm, 6-dim and higher are quite slow
+    alias PC5 = PoissonCycle!(double, 5, SweepType.ndslice);
+}
diff --git a/D/source/multid/multigrid/multigrid.d b/D/source/multid/multigrid/multigrid.d
index d1a2e14..d178432 100644
--- a/D/source/multid/multigrid/multigrid.d
+++ b/D/source/multid/multigrid/multigrid.d
@@ -12,11 +12,11 @@ Method to run some multigrid steps for abstract cycle
 Slice!(T*, Dim) multigrid(T, size_t Dim)(Cycle!(T, Dim) cycle, Slice!(T*, Dim) U, size_t iter_cycle, double eps)
 {
     //scale the epsilon with the number of gridpoints
-    eps *= U.shape[0] * U.shape[0];
+    eps *= U.elementCount;
     foreach (i; 1 .. iter_cycle + 1)
     {
 
-        U = cycle.cycle(U);
+        cycle.cycle(U);
         auto norm = cycle.norm(U);
         logf("Residual has a L2-Norm of %f after %d iterations", norm, i);
         if (norm <= eps)
@@ -42,21 +42,32 @@ Params:
 
 Returns: U
 +/
-Slice!(T*, Dim) poisson_multigrid(T, size_t Dim, uint v1, uint v2)(
-        Slice!(T*, Dim) F, Slice!(T*, Dim) U, uint level, uint mu, size_t iter_cycles,
-        string sweep = "field", T eps = 1e-6, T h = 0)
+Slice!(T*, Dim) poisson_multigrid(T, size_t Dim)(
+        Slice!(T*, Dim) F,
+        Slice!(T*, Dim) U,
+        uint level,
+        uint mu,
+        uint v1,
+        uint v2,
+        size_t iter_cycles,
+        string sweep = "ndslice",
+        T eps = 1e-6,
+        T h = 0)
 {
     Cycle!(T, Dim) cycle;
     switch (sweep)
     {
     case "slice":
-        cycle = new PoissonCycle!(T, Dim, v1, v2, SweepType.slice)(F, mu, level, h);
+        cycle = new PoissonCycle!(T, Dim, SweepType.slice)(F, mu, level, h, v1, v2);
         break;
     case "naive":
-        cycle = new PoissonCycle!(T, Dim, v1, v2, SweepType.naive)(F, mu, level, h);
+        cycle = new PoissonCycle!(T, Dim, SweepType.naive)(F, mu, level, h, v1, v2);
+        break;
+    case "field":
+        cycle = new PoissonCycle!(T, Dim, SweepType.field)(F, mu, level, h, v1, v2);
         break;
     default:
-        cycle = new PoissonCycle!(T, Dim, v1, v2, SweepType.field)(F, mu, level, h);
+        cycle = new PoissonCycle!(T, Dim, SweepType.ndslice)(F, mu, level, h, v1, v2);
     }
     return multigrid!(T, Dim)(cycle, U, iter_cycles, eps);
 }
@@ -87,9 +98,9 @@ unittest
     F[$ - 1][1 .. $] = 0.0;
     F[1 .. $, $ - 1] = 0.0;
     auto U1 = U.dup;
-    poisson_multigrid!(double, 2, 2, 2)(F, U, 0, 2, 100, "field", 1e-9);
+    poisson_multigrid(F, U, 0, 2, 2, 2, 100, "field", 1e-9);
 
-    GS_RB!(double, 2)(F, U1, h);
+    GS_RB(F, U1, h);
 
     import numir : approxEqual;
 
diff --git a/D/source/multid/multigrid/prolongation.d b/D/source/multid/multigrid/prolongation.d
index ea9fa98..7dcd06e 100644
--- a/D/source/multid/multigrid/prolongation.d
+++ b/D/source/multid/multigrid/prolongation.d
@@ -1,6 +1,8 @@
 module multid.multigrid.prolongation;
 
-import mir.ndslice : slice, Slice;
+import mir.functional: naryFun;
+import mir.math: fastmath;
+import mir.ndslice;
 import numir : approxEqual;
 
 /++
@@ -10,318 +12,32 @@ This is the implementation of a prolongation
         fine_shape = the shape of the returned grid
     Returns: the finer grid with interpolated values in between
 +/
-Slice!(T*, Dim) prolongation(T, size_t Dim)(in Slice!(T*, Dim) e, in size_t[Dim] fine_shape)
+
+Slice!(T*, Dim) prolongation(T, size_t Dim)(Slice!(const(T)*, Dim) e, size_t[Dim] fine_shape)
 {
     auto w = slice!T(fine_shape);
-    immutable end = e.shape[0] - (fine_shape[0] + 1) % 2;
-    auto WF = w.field;
-    auto EF = e.field;
+    prolongation(w, e);
+    return w;
+}
 
-    static if (Dim == 1)
+@nogc @fastmath
+void prolongation(IteratorA, IteratorB, size_t N, SliceKind aKind, SliceKind bKind)(Slice!(IteratorA, N, aKind) a, Slice!(IteratorB, N, bKind) b)
+{
+    static if (N == 1)
     {
-        for (size_t i = 1; i < end; i++)
-        {
-            w.field[2 * i - 1] = (e.field[i - 1] + e.field[i]) / 2;
-            w.field[2 * i] = e.field[i];
-        }
-        w.field[$ - 1] = e.field[$ - 1];
-        w.field[0] = e.field[0];
-    }
-    else static if (Dim == 2)
-    {
-        immutable NW = fine_shape[1];
-        immutable NE = e.shape[1];
-
-        auto idxe = (size_t i, size_t j) => i * NE + j;
-        auto idxw = (size_t i, size_t j) => i * NW + j;
-
-        foreach (i; 0 .. end - 1)
-        {
-            foreach (j; 0 .. end - 1)
-            {
-                // the value that is copied
-                WF[idxw(2 * i, 2 * j)] = EF[idxe(i, j)];
-                // the value next a copied one
-                WF[idxw(2 * i, 2 * j + 1)] = (EF[idxe(i, j)] + EF[idxe(i, j + 1)]) / 2;
-                // the value below a copied one
-                WF[idxw(2 * i + 1, 2 * j)] = (EF[idxe(i + 1, j)] + EF[idxe(i, j)]) / 2;
-                // interpolation
-                WF[idxw(2 * i + 1, 2 * j + 1)] = (
-                        EF[idxe(i, j)] +
-                        EF[idxe(i, j + 1)] +
-                        EF[idxe(i + 1, j)] +
-                        EF[idxe(i + 1, j + 1)]) / 4;
-            }
-            WF[idxw(2 * i, 2 * (end - 1))] = EF[idxe(i, end - 1)];
-            WF[idxw(2 * i + 1, 2 * (end - 1))] = (EF[idxe(i + 1, end - 1)] +
-                    EF[idxe(i, end - 1)]) / 2;
-
-            // this is for the last row and the last colomn
-            WF[idxw(2 * (end - 1), 2 * i)] = EF[idxe(end - 1, i)];
-            WF[idxw(2 * (end - 1), 2 * i + 1)] = (EF[idxe(end - 1, i)] + EF[idxe(end - 1, i + 1)]) / 2;
-        }
-        WF[$ - 1] = EF[$ - 1];
-
-        // Since we restrict always to N//2 + 1 we need to handle the case if
-        // the finer grid is even sized, because that means between the last
-        // and the forelast is no new colomn that needs to be calculated
-        if (fine_shape[0] % 2 == 0)
-        {
-            foreach (j; 0 .. end - 1)
-            {
-                WF[idxw(NW - 1, 2 * j)] = EF[idxe(NE - 1, j)];
-                WF[idxw(NW - 1, 2 * j + 1)] = (EF[idxe(NE - 1, j)] + EF[idxe(NE - 1, j + 1)]) / 2;
-
-                WF[idxw(2 * j, NW - 1)] = EF[idxe(j, NE - 1)];
-
-                WF[idxw(2 * j + 1, NW - 1)] = (
-                        EF[idxe(j, NE - 1)] +
-                        EF[idxe(j + 1, NE - 1)]) / 2;
-            }
-            w[$ - 2 .. $, $ - 2 .. $] = e[$ - 2 .. $, $ - 2 .. $];
-        }
-    }
-    else static if (Dim == 3)
-    {
-        immutable MW = fine_shape[0];
-        immutable NW = fine_shape[1];
-        immutable OW = fine_shape[2];
-
-        immutable ME = e.shape[0];
-        immutable NE = e.shape[1];
-        immutable OE = e.shape[2];
-
-        auto idxe = (size_t i, size_t j, size_t k) => i * (ME * NE) + j * NE + k;
-        auto idxw = (size_t i, size_t j, size_t k) => i * (MW * NW) + j * NW + k;
-
-        WF[$ - 1] = EF[$ - 1];
-
-        foreach (i; 0 .. end - 1)
-        {
-            foreach (j; 0 .. end - 1)
-            {
-                foreach (k; 0 .. end - 1)
-                {
-                    WF[idxw(2 * i, 2 * j, 2 * k)] = EF[idxe(i, j, k)];
-
-                    WF[idxw(2 * i, 2 * j, 2 * k + 1)] = (EF[idxe(i, j, k)] + EF[idxe(i, j, k + 1)]) / 2;
-                    WF[idxw(2 * i, 2 * j + 1, 2 * k)] = (EF[idxe(i, j, k)] + EF[idxe(i, j + 1, k)]) / 2;
-
-                    WF[idxw(2 * i + 1, 2 * j, 2 * k)] = (EF[idxe(i, j, k)] + EF[idxe(i + 1, j, k)]) / 2;
-
-                    WF[idxw(2 * i, 2 * j + 1, 2 * k + 1)] = (
-                            EF[idxe(i, j, k)] +
-                            EF[idxe(i, j + 1, k)] +
-                            EF[idxe(i, j, k + 1)] +
-                            EF[idxe(i, j + 1, k + 1)]) / 4;
-
-                    WF[idxw(2 * i + 1, 2 * j + 1, 2 * k)] = (
-                            EF[idxe(i, j, k)] +
-                            EF[idxe(i, j + 1, k)] +
-                            EF[idxe(i + 1, j, k)] +
-                            EF[idxe(i + 1, j + 1, k)]) / 4;
-
-                    WF[idxw(2 * i + 1, 2 * j, 2 * k + 1)] = (
-                            EF[idxe(i, j, k)] +
-                            EF[idxe(i, j, k + 1)] +
-                            EF[idxe(i + 1, j, k)] +
-                            EF[idxe(i + 1, j, k + 1)]) / 4;
-
-                    WF[idxw(2 * i + 1, 2 * j + 1, 2 * k + 1)] = (
-                            EF[idxe(i, j, k)] +
-                            EF[idxe(i, j + 1, k)] +
-                            EF[idxe(i, j, k + 1)] +
-                            EF[idxe(i, j + 1, k + 1)] +
-                            EF[idxe(i + 1, j, k)] +
-                            EF[idxe(i + 1, j, k + 1)] +
-                            EF[idxe(i + 1, j + 1, k)] +
-                            EF[idxe(i + 1, j + 1, k + 1)]) / 8;
-
-                }
-
-                WF[idxw(2 * i, 2 * j, 2 * (end - 1))] = EF[idxe(i, j, end - 1)];
-
-                WF[idxw(2 * i, 2 * j + 1, 2 * (end - 1))] = (
-                        EF[idxe(i, j, (end - 1))] + EF[idxe(i, j + 1, (end - 1))]) / 2;
-
-                WF[idxw(2 * i, 2 * (end - 1), 2 * j + 1)] = (
-                        EF[idxe(i, (end - 1), j)] + EF[idxe(i, (end - 1), j + 1)]) / 2;
-
-                WF[idxw(2 * (end - 1), 2 * i, 2 * j)] = EF[idxe(end - 1, i, j)];
-
-                WF[idxw(2 * (end - 1), 2 * i, 2 * j + 1)] = (
-                        EF[idxe(end - 1, i, j)] + EF[idxe(end - 1, i, j + 1)]) / 2;
-
-                WF[idxw(2 * i, 2 * (end - 1), 2 * j)] = EF[idxe(i, end - 1, j)];
-
-                WF[idxw(2 * (end - 1), 2 * i + 1, 2 * j)] = (
-                        EF[idxe(end - 1, i, j)] + EF[idxe(end - 1, i + 1, j)]) / 2;
-
-                WF[idxw(2 * i + 1, 2 * j, 2 * (end - 1))] = (
-                        EF[idxe(i, j, (end - 1))] + EF[idxe(i + 1, j, (end - 1))]) / 2;
-
-                WF[idxw(2 * i + 1, 2 * (end - 1), 2 * j)] = (
-                        EF[idxe(i, (end - 1), j)] + EF[idxe(i + 1, (end - 1), j)]) / 2;
-
-                WF[idxw(2 * (end - 1), 2 * i + 1, 2 * j + 1)] = (
-                        EF[idxe((end - 1), i, j)] +
-                        EF[idxe((end - 1), i + 1, j)] +
-                        EF[idxe((end - 1), i, j + 1)] +
-                        EF[idxe((end - 1), i + 1, j + 1)]) / 4;
-
-                WF[idxw(2 * i + 1, 2 * j + 1, 2 * (end - 1))] = (
-                        EF[idxe(i, j, (end - 1))] +
-                        EF[idxe(i, j + 1, (end - 1))] +
-                        EF[idxe(i + 1, j, (end - 1))] +
-                        EF[idxe(i + 1, j + 1, (end - 1))]) / 4;
-
-                WF[idxw(2 * i + 1, 2 * (end - 1), 2 * j + 1)] = (
-                        EF[idxe(i, (end - 1), j)] +
-                        EF[idxe(i, (end - 1), j + 1)] +
-                        EF[idxe(i + 1, (end - 1), j)] +
-                        EF[idxe(i + 1, (end - 1), j + 1)]) / 4;
-
-            }
-
-            WF[idxw(2 * i, 2 * (end - 1), 2 * (end - 1))] = EF[idxe(i, end - 1, end - 1)];
-
-            WF[idxw(2 * i + 1, 2 * (end - 1), 2 * (end - 1))] = (
-                    EF[idxe(i, end - 1, end - 1)] + EF[idxe(i + 1, end - 1, end - 1)]) / 2;
-
-            WF[idxw(2 * (end - 1), 2 * i, 2 * (end - 1))] = EF[idxe(end - 1, i, end - 1)];
-
-            WF[idxw(2 * (end - 1), 2 * i + 1, 2 * (end - 1))] = (
-                    EF[idxe(end - 1, i, end - 1)] + EF[idxe(end - 1, i + 1, end - 1)]) / 2;
-
-            WF[idxw(2 * (end - 1), 2 * (end - 1), 2 * i)] = EF[idxe(end - 1, end - 1, i)];
-
-            WF[idxw(2 * (end - 1), 2 * (end - 1), 2 * i + 1)] = (
-                    EF[idxe(end - 1, end - 1, i)] + EF[idxe(end - 1, end - 1, i + 1)]) / 2;
-        }
-        // Since we restrict always to N//2 + 1 we need to handle the case if
-        // the finer grid is even sized, because that means between the last
-        // and the forelast is no new colomn that needs to be calculated
-        if (fine_shape[0] % 2 == 0) // != e.shape[0] % 2)
-        {
-
-            foreach (i; 0 .. end - 1)
-            {
-                foreach (j; 0 .. end - 1)
-                {
-                    WF[idxw(2 * i, 2 * j, OW - 1)] = EF[idxe(i, j, OE - 1)];
-                    WF[idxw(2 * i, MW - 1, 2 * j)] = EF[idxe(i, ME - 1, j)];
-                    WF[idxw(NW - 1, 2 * i, 2 * j)] = EF[idxe(NE - 1, i, j)];
-
-                    WF[idxw(2 * i, 2 * j + 1, OW - 1)] = (
-                            EF[idxe(i, j, OE - 1)] +
-                            EF[idxe(i, j + 1, OE - 1)]) / 2;
-
-                    WF[idxw(2 * i, MW - 1, 2 * j + 1)] = (
-                            EF[idxe(i, ME - 1, j)] +
-                            EF[idxe(i, ME - 1, j + 1)]) / 2;
-
-                    WF[idxw(2 * i + 1, 2 * j, OW - 1)] = (
-                            EF[idxe(i, j, OE - 1)] +
-                            EF[idxe(i + 1, j, OE - 1)]) / 2;
-
-                    WF[idxw(2 * i + 1, MW - 1, 2 * j)] = (
-                            EF[idxe(i, ME - 1, j)] +
-                            EF[idxe(i + 1, ME - 1, j)]) / 2;
-
-                    WF[idxw(2 * i + 1, MW - 1, 2 * j + 1)] = (
-                            EF[idxe(i, ME - 1, j)] +
-                            EF[idxe(i + 1, ME - 1, j)] +
-                            EF[idxe(i, ME - 1, j + 1)] +
-                            EF[idxe(i + 1, ME - 1, j + 1)]) / 4;
-
-                    WF[idxw(2 * i + 1, 2 * j + 1, OW - 1)] = (
-                            EF[idxe(i, j, OE - 1)] +
-                            EF[idxe(i, j + 1, OE - 1)] +
-                            EF[idxe(i + 1, j, OE - 1)] +
-                            EF[idxe(i + 1, j + 1, OE - 1)]) / 4;
-
-                    WF[idxw(NW - 1, 2 * i + 1, 2 * j)] = (
-                            EF[idxe(NE - 1, i, j)] +
-                            EF[idxe(NE - 1, i + 1, j)]) / 2;
-
-                    WF[idxw(NW - 1, 2 * i, 2 * j + 1)] = (
-                            EF[idxe(NE - 1, i, j)] +
-                            EF[idxe(NE - 1, i, j + 1)]) / 2;
-
-                    WF[idxw(NW - 1, 2 * i + 1, 2 * j + 1)] = (
-                            EF[idxe(NE - 1, i, j)] +
-                            EF[idxe(NE - 1, i, j + 1)] +
-                            EF[idxe(NE - 1, i + 1, j)] +
-                            EF[idxe(NE - 1, i + 1, j + 1)]) / 4;
-                }
-
-                WF[idxw(2 * i, 2 * (end - 1), OW - 1)] = EF[idxe(i, (end - 1), OE - 1)];
-                WF[idxw(2 * i, MW - 1, 2 * (end - 1))] = EF[idxe(i, ME - 1, (end - 1))];
-
-                WF[idxw(2 * (end - 1), MW - 1, 2 * i)] = EF[idxe((end - 1), ME - 1, i)];
-                WF[idxw(NW - 1, 2 * (end - 1), 2 * i)] = EF[idxe(NE - 1, (end - 1), i)];
-
-                WF[idxw(NW - 1, 2 * i, 2 * (end - 1))] = EF[idxe(NE - 1, i, (end - 1))];
-                WF[idxw(2 * (end - 1), 2 * i, OW - 1)] = EF[idxe((end - 1), i, OE - 1)];
-
-                WF[idxw(2 * i, MW - 1, OW - 1)] = EF[idxe(i, ME - 1, OE - 1)];
-                WF[idxw(NW - 1, 2 * i, OW - 1)] = EF[idxe(NE - 1, i, OE - 1)];
-                WF[idxw(NW - 1, MW - 1, 2 * i)] = EF[idxe(NE - 1, ME - 1, i)];
-
-                WF[idxw(2 * (end - 1), MW - 1, 2 * i + 1)] = (
-                        EF[idxe((end - 1), ME - 1, i)] +
-                        EF[idxe((end - 1), ME - 1, i + 1)]) / 2;
-
-                WF[idxw(2 * (end - 1), 2 * i + 1, OW - 1)] = (
-                        EF[idxe((end - 1), i, OE - 1)] +
-                        EF[idxe((end - 1), i + 1, OE - 1)]) / 2;
-
-                WF[idxw((NW - 1), 2 * i + 1, OW - 1)] = (
-                        EF[idxe((NE - 1), i, OE - 1)] +
-                        EF[idxe((NE - 1), i + 1, OE - 1)]) / 2;
-
-                WF[idxw(NW - 1, MW - 1, 2 * i + 1)] = (
-                        EF[idxe(NE - 1, ME - 1, i)] +
-                        EF[idxe(NE - 1, ME - 1, i + 1)]) / 2;
-
-                WF[idxw(2 * i + 1, MW - 1, OW - 1)] = (
-                        EF[idxe(i, ME - 1, OE - 1)] +
-                        EF[idxe(i + 1, ME - 1, OE - 1)]) / 2;
-
-                WF[idxw(2 * i + 1, 2 * (end - 1), OW - 1)] = (
-                        EF[idxe(i, (end - 1), OE - 1)] +
-                        EF[idxe(i + 1, (end - 1), OE - 1)]) / 2;
-
-                WF[idxw(2 * i + 1, MW - 1, 2 * (end - 1))] = (EF[idxe(i, ME - 1, (end - 1))] +
-                        EF[idxe(i + 1, ME - 1, (end - 1))]) / 2;
-
-                WF[idxw(NW - 1, 2 * i + 1, 2 * (end - 1))] = (
-                        EF[idxe(NE - 1, i, (end - 1))] +
-                        EF[idxe(NE - 1, i + 1, (end - 1))]) / 2;
-
-                WF[idxw(NW - 1, 2 * (end - 1), 2 * i + 1)] = (
-                        EF[idxe(NE - 1, (end - 1), i)] +
-                        EF[idxe(NE - 1, (end - 1), i + 1)]) / 2;
-            }
-
-            WF[idxw(2 * (end - 1), MW - 1, OW - 1)] = EF[idxe((end - 1), ME - 1, OE - 1)];
-            WF[idxw(NW - 1, 2 * (end - 1), OW - 1)] = EF[idxe(NE - 1, (end - 1), OE - 1)];
-            WF[idxw(NW - 1, MW - 1, 2 * (end - 1))] = EF[idxe(NE - 1, ME - 1, (end - 1))];
-
-            WF[idxw(2 * (end - 1), MW - 1, 2 * (end - 1))] = EF[idxe((end - 1), ME - 1, (end - 1))];
-            WF[idxw(2 * (end - 1), 2 * (end - 1), OW - 1)] = EF[idxe((end - 1), (end - 1), OE - 1)];
-            WF[idxw(NW - 1, 2 * (end - 1), 2 * (end - 1))] = EF[idxe(NE - 1, end - 1, (end - 1))];
-            WF[idxw(2 * (end - 1), 2 * (end - 1), 2 * (end - 1))] =
-                EF[idxe((end - 1), (end - 1), (end - 1))];
-
-        }
-
+        alias expand = naryFun!"a = b";
+        alias apply = naryFun!"b = a / 2";
     }
     else
     {
-        static assert(false, Dim.stringof ~ " is not a supported dimension!");
+        alias expand = prolongation;
+        alias apply = each!"b = a / 2";
     }
-    return w;
+
+    import mir.functional: reverseArgs;
+    import multid.multigrid.restriction;
+    restriction!(reverseArgs!expand)(b.byDim!0, a.byDim!0);
+    each!apply(a.byDim!0.slide!(3, "a + c").stride, a.byDim!0[1 .. $ - 1].stride);
 }
 
 // Tests 1D
@@ -331,17 +47,17 @@ unittest
 
     auto a = [0, 2, 4, 6, 8].sliced!double;
     auto correct = 9.iota.slice;
-    const auto ret = prolongation!(double, 1)(a, correct.shape);
+    auto ret = prolongation!(double, 1)(a, correct.shape);
     assert(ret == correct);
 
     auto a2 = [0, 2, 4, 6, 8, 9].sliced!long;
     auto correct2 = 10.iota.slice;
-    const auto ret2 = prolongation!(long, 1)(a2, correct2.shape);
+    auto ret2 = prolongation!(long, 1)(a2, correct2.shape);
     assert(ret2 == correct2);
 
     auto a3 = [0, 2, 4, 6, 7].sliced!long;
     auto correct3 = 8.iota.slice;
-    const auto ret3 = prolongation!(long, 1)(a3, correct3.shape);
+    auto ret3 = prolongation!(long, 1)(a3, correct3.shape);
     assert(ret3 == correct3);
 
 }
@@ -360,12 +76,12 @@ unittest
     ].sliced(5, 5);
 
     auto correct = iota([9, 9]).slice;
-    const auto ret = prolongation!(double, 2)(arr, correct.shape);
+    auto ret = prolongation!(double, 2)(arr, correct.shape);
     assert(ret == correct);
     auto arr2 = [0., 2., 4., 6., 7., 16., 18., 20., 22., 23., 32., 34., 36.,
         38., 39., 48., 50., 52., 54., 55., 56., 58., 60., 62., 63.].sliced(5, 5);
     auto correct2 = iota([8, 8]).slice;
-    const auto ret2 = prolongation!(double, 2)(arr2, correct2.shape);
+    auto ret2 = prolongation!(double, 2)(arr2, correct2.shape);
     assert(ret2 == correct2);
 }
 
@@ -416,16 +132,16 @@ unittest
     auto ret6 = prolongation!(double, 2)(A, [6, 6]);
     auto ret7 = prolongation!(double, 2)(A, [7, 7]);
 
-    assert(ret6[0, 0 .. $].strided!(0)(2) == A[0, 0 .. $ - 1]);
-    assert(ret6[0 .. $, 0].strided!(0)(2) == A[0 .. $ - 1, 0]);
-    assert(ret6[$ - 2 .. $, 0 .. $].strided!(0, 1)(1, 2) == A[$ - 2 .. $, 0 .. $ - 1]);
-    assert(ret6[0 .. $, $ - 2 .. $].strided!(0)(2) == A[0 .. $ - 1, $ - 2 .. $]);
+    assert(ret6[0, 0 .. $].stride == A[0, 0 .. $ - 1]);
+    assert(ret6[0 .. $, 0].stride == A[0 .. $ - 1, 0]);
+    assert(ret6[$ - 2 .. $, 0 .. $].strided!1(2) == A[$ - 2 .. $, 0 .. $ - 1]);
+    assert(ret6[0 .. $, $ - 2 .. $].strided!0(2) == A[0 .. $ - 1, $ - 2 .. $]);
     assert(ret6[$ - 2 .. $, $ - 2 .. $] == A[$ - 2 .. $, $ - 2 .. $]);
 
-    assert(ret7[0, 0 .. $].strided!(0)(2) == A[0, 0 .. $]);
-    assert(ret7[0 .. $, 0].strided!(0)(2) == A[0 .. $, 0]);
-    assert(ret7[$ - 1 .. $, 0 .. $].strided!(1)(2) == A[$ - 1 .. $, 0 .. $]);
-    assert(ret7[0 .. $, $ - 1 .. $].strided!(0)(2) == A[0 .. $, $ - 1 .. $]);
+    assert(ret7[0, 0 .. $].stride == A[0, 0 .. $]);
+    assert(ret7[0 .. $, 0].stride == A[0 .. $, 0]);
+    assert(ret7[$ - 1 .. $, 0 .. $].strided!1(2) == A[$ - 1 .. $, 0 .. $]);
+    assert(ret7[0 .. $, $ - 1 .. $].strided!0(2) == A[0 .. $, $ - 1 .. $]);
 }
 
 unittest
@@ -451,8 +167,8 @@ unittest
         322., 324., 326., 328.,
         336., 338., 340., 342.
     ].sliced(4, 4, 4);
-    const auto correct = iota([7, 7, 7]).slice;
-    const auto B = prolongation!(double, 3)(A, [7, 7, 7]);
+    auto correct = iota([7, 7, 7]).slice;
+    auto B = prolongation!(double, 3)(A, [7, 7, 7]);
     assert(correct == B);
 }
 
@@ -479,7 +195,7 @@ unittest
         204., 206., 208., 209.,
         210., 212., 214., 215.
     ].sliced(4, 4, 4);
-    const auto correct = iota([6, 6, 6]).slice;
-    const auto B = prolongation!(double, 3)(A, [6, 6, 6]);
+    auto correct = iota([6, 6, 6]).slice;
+    auto B = prolongation!(double, 3)(A, [6, 6, 6]);
     assert(correct == B);
 }
diff --git a/D/source/multid/multigrid/restriction.d b/D/source/multid/multigrid/restriction.d
index a54b623..c01df34 100644
--- a/D/source/multid/multigrid/restriction.d
+++ b/D/source/multid/multigrid/restriction.d
@@ -1,207 +1,106 @@
 module multid.multigrid.restriction;
 
-import mir.ndslice : Slice, slice, sliced, strided, iota, as, fuse;
-import std.exception : enforce;
+import mir.algorithm.iteration: each;
+import mir.exception : enforce;
+import mir.functional: recurseTemplatePipe;
+import mir.math: fastmath;
+import mir.ndslice;
 import numir : approxEqual;
+import std.traits: isNumeric;
 
 /++
-This is the implementation of a restriction for 1D
+This is the implementation of a restriction for 1D, 2D, 3D
 +/
-Slice!(T*, Dim) restriction(T, size_t Dim : 1)(in Slice!(T*, Dim) A)
-{
-    auto N = A.shape[0] / 2 + 1;
-    auto ret = slice!T([N]);
-    const auto end = N - (A.shape[0] + 1) % 2;
-    auto AF = A.field;
+@fastmath
 
-    foreach (i; 0 .. end)
-    {
-        // get every second element in AF
-        ret.field[i] = AF[i * 2];
-    }
-    // special case: outer corner
-    ret.field[$ - 1] = AF[$ - 1];
+// TODO: move to mir.algorithm.iteration
 
-    return ret;
-}
-
-/++
-This is the implementation of a restriction for 2D
-works only for square grids
-+/
-Slice!(T*, Dim) restriction(T, size_t Dim : 2)(in Slice!(T*, Dim) A)
+template restriction(alias fun = "a = b")
 {
-    enforce(A.shape[0] == A.shape[1], "not all dimensions have the same length");
-    const auto M = A.shape[0];
-    const auto N = M / 2 + 1;
-    auto ret = slice!T([N, N]);
-    const auto end = N - (A.shape[0] + 1) % 2;
-    auto AF = A.field;
-
-    foreach (i; 0 .. end)
+    import mir.functional: naryFun;
+
+    static if (__traits(isSame, naryFun!fun, fun))
+    Slice!(T*, Dim) restriction(T, size_t Dim)(Slice!(const(T)*, Dim) A)
     {
-        foreach (j; 0 .. end)
-        {
-            // get every second element in AF
-            auto flattindexret = i * N + j;
-            auto flattindexA = i * M * 2 + j * 2;
-            ret.field[flattindexret] = AF[flattindexA];
-        }
+        size_t[Dim] shape = A.length / 2 + 1;
+        auto ret = shape.slice!T;
+        .restriction!fun(ret, A);
+        return ret;
     }
-    // special case: borders
-    foreach (i; 0 .. end)
+
+    static if (__traits(isSame, naryFun!fun, fun))
+    @nogc @fastmath
+    void restriction(IteratorA, IteratorB, size_t N, SliceKind aKind, SliceKind bKind)(Slice!(IteratorA, N, aKind) a, Slice!(IteratorB, N, bKind) b)
     {
-        auto indexrowR = (N - 1) * N + i;
-        auto indexrowA = (M - 1) * M + 2 * i;
-        auto indexcolR = N * i + N - 1;
-        auto indexcolA = M * 2 * i + M - 1;
-        ret.field[indexrowR] = AF[indexrowA];
-        ret.field[indexcolR] = AF[indexcolA];
+        import std.traits: Select;
+        alias f = Select!(N == 1, fun, .restriction!fun);
+        each!f(a[0 .. $ - 1].byDim!0, b[0 .. $ - 1].byDim!0.stride);
+        f(a.back, b.back);
     }
-    // special case: outer corner
-    ret.field[$ - 1] = AF[$ - 1];
-
-    return ret;
+    else
+        alias restriction = .restriction!(naryFun!fun);
 }
 
-/++
-This is the implementation of a restriction for 3D
-works only if all dimensions have the same length
-+/
-Slice!(T*, Dim) restriction(T, size_t Dim : 3)(in Slice!(T*, Dim) A)
-{
-    enforce(A.shape[0] == A.shape[1] && A.shape[1] == A.shape[2], "not all dimensions have the same length");
-    const auto M = A.shape[0];
-    const auto N = M / 2 + 1;
-    auto ret = slice!T([N, N, N]);
-    const auto end = N - (A.shape[0] + 1) % 2;
-    auto AF = A.field;
-
-    foreach (k; 0 .. end)
-    {
-        foreach (i; 0 .. end)
-        {
-            foreach (j; 0 .. end)
-            {
-                // get every second element in AF
-                auto flattindexret = k * (N * N) + i * N + j;
-                auto flattindexA = k * (M * M) * 2 + i * M * 2 + j * 2;
-                ret.field[flattindexret] = AF[flattindexA];
-            }
-        }
-    }
-    // special case: inner borders
-    ret[0 .. end, 0 .. end, $ - 1] = A[0 .. $, 0 .. $, $ - 1].strided!(0, 1)(2, 2);
-    ret[$ - 1, 0 .. end, 0 .. end] = A[$ - 1, 0 .. $, 0 .. $].strided!(0, 1)(2, 2);
-    ret[0 .. end, $ - 1, 0 .. end] = A[0 .. $, $ - 1, 0 .. $].strided!(0, 1)(2, 2);
-    // special case: outer borders
-    ret[0 .. end, $ - 1, $ - 1] = A[0 .. $, $ - 1, $ - 1].strided!(0)(2);
-    ret[$ - 1, 0 .. end, $ - 1] = A[$ - 1, 0 .. $, $ - 1].strided!(0)(2);
-    ret[$ - 1, $ - 1, 0 .. end] = A[$ - 1, $ - 1, 0 .. $].strided!(0)(2);
-    // special case: outer corner
-    ret.field[$ - 1] = AF[$ - 1];
 
-    return ret;
-}
 
 /++
-This is the implementation of a weighted_restriction 1D
+This is the implementation of a restriction for 1D, 2D, 3D
 +/
-Slice!(T*, Dim) weighted_restriction(T, size_t Dim : 1)(in Slice!(T*, Dim) A)
+@fastmath
+Slice!(T*, Dim) weighted_restriction(T, size_t Dim)(Slice!(const(T)*, Dim) A)
 {
-    const auto M = A.shape[0];
-    const auto N = M / 2 + 1;
-    auto ret = restriction!(T, Dim)(A);
-    auto AF = A.field;
-    foreach (i; 1u .. N - 1u)
-    {
-        ret.field[i] = ret.field[i] / 2 + (AF[i * 2 - 1u] + AF[i * 2 + 1u]) / cast(T)(4);
-    }
+    size_t[Dim] shape = A.length / 2 + 1;
+    auto ret = shape.slice!T;
+    weighted_restriction(ret, A);
     return ret;
 }
 
-/++
-This is the implementation of a weighted_restriction 2D
-+/
-Slice!(T*, Dim) weighted_restriction(T, size_t Dim : 2)(in Slice!(T*, Dim) A)
+@nogc @fastmath
+void weighted_restriction(T, size_t N)(Slice!(T*, N) r, Slice!(const(T)*, N) A)
 {
-    enforce(A.shape[0] == A.shape[1], "not all dimensions have the same length");
-    const auto M = A.shape[0];
-    const auto N = M / 2 + 1;
-    auto ret = restriction!(T, Dim)(A);
-    auto AF = A.field;
+    weighted_restriction_borders(r, A);
 
-    foreach (i; 1u .. N - 1u)
-    {
-        foreach (j; 1u .. N - 1u)
-        {
-            auto indexR = i * N + j;
-            auto indexA = i * M * 2 + j * 2;
-            ret.field[indexR] = ret.field[indexR] / cast(T)(4) +
-                (
-                        AF[indexA - 1] + AF[indexA + 1] +
-                        AF[indexA - M] + AF[indexA + M]) / cast(T)(8) +
-                (
-                        AF[indexA - 1 - M] + AF[indexA - 1 + M] +
-                        AF[indexA + 1 - M] + AF[indexA + 1 + M]) / cast(T)(16);
-        }
-    }
-    return ret;
+    auto rc = r.dropBorders;
+    auto ac = A.canonical;
+    ac.popFrontAll;
+
+    enum factor = 2 ^^ (N * 2);
+    static if (__traits(isIntegral, T))
+        alias scaled = a => cast(T)(a / factor);
+    else
+        alias scaled = a => a * (T(1) / factor);
+
+    rc[] = ac.slide!(3, "a + 2 * b + c").map!scaled.strided(2);
 }
 
-/++
-This is the implementation of a weighted_restriction 3D
-+/
-Slice!(T*, Dim) weighted_restriction(T, size_t Dim : 3)(in Slice!(T*, Dim) A)
+
+@nogc @fastmath
+void weighted_restriction_borders(T, size_t N)(Slice!(T*, N) r, Slice!(const(T)*, N) A)
 {
-    enforce(A.shape[0] == A.shape[1] && A.shape[1] == A.shape[2], "not all dimensions have the same length");
-    const auto M = A.shape[0];
-    const auto N = M / 2 + 1;
-    auto ret = restriction!(T, Dim)(A);
-    auto AF = A.field;
-    foreach (k; 1u .. N - 1u)
+    import std.traits: Select;
+    static if (N == 1)
     {
-        foreach (i; 1u .. N - 1u)
-        {
-            foreach (j; 1u .. N - 1u)
-            {
-                auto indexR = k * (N * N) + i * N + j;
-                auto indexA = k * (M * M) * 2 + i * M * 2 + j * 2;
-                ret.field[indexR] = (
-                        ret.field[indexR] * cast(T)(8) +
-                        (
-                            AF[indexA - 1] + AF[indexA + 1] +
-                            AF[indexA - M] + AF[indexA + M] +
-                            AF[indexA - M * M] + AF[indexA + M * M]) * cast(T)(4) +
-                        (
-                            AF[indexA - 1 - M] + AF[indexA + 1 + M] +
-                            AF[indexA - 1 + M] + AF[indexA + 1 - M] +
-                            AF[indexA - 1 - M * M] + AF[indexA + 1 + M * M] +
-                            AF[indexA - 1 + M * M] + AF[indexA + 1 - M * M] +
-                            AF[indexA - M - M * M] + AF[indexA + M + M * M] +
-                            AF[indexA - M + M * M] + AF[indexA + M - M * M]) * cast(
-                            T)(2) +
-                        (AF[indexA - 1 - M - M * M] + AF[indexA + 1 - M - M * M] +
-                            AF[indexA - 1 + M - M * M] + AF[indexA + 1 + M - M * M] +
-                            AF[indexA - 1 - M + M * M] + AF[indexA + 1 - M + M * M] +
-                            AF[indexA - 1 + M + M * M] + AF[indexA + 1 + M + M * M])) /
-                    cast(T)(64);
-            }
-        }
+        r.front = A.front;
+        r.back = A.back;
+    }
+    else
+    {
+        restriction(r.front, A.front);
+        each!weighted_restriction_borders(r[1 .. $ - 1].byDim!0, A[2 .. $ - 1].byDim!0.stride);
+        restriction(r.back, A.back);
     }
-    return ret;
 }
 
 // Test restriction 1D
 unittest
 {
     auto arr = iota(10).slice;
-    auto ret = restriction!(long, 1)(arr);
+    auto ret = restriction!"a = b"(arr);
     auto correct = [0, 2, 4, 6, 8, 9];
     assert(ret == correct);
 
     arr = iota(11).slice;
-    ret = restriction!(long, 1)(arr);
+    ret = restriction(arr);
     correct = [0, 2, 4, 6, 8, 10];
     assert(ret == correct);
 }
@@ -209,13 +108,13 @@ unittest
 // Test restriction 2D
 unittest
 {
-    auto arr = iota([5, 5]).slice;
-    auto ret = restriction!(long, 2)(arr);
+    auto arr = [5, 5].iota.slice;
+    auto ret = restriction(arr);
     auto correct = [[0, 2, 4], [10, 12, 14], [20, 22, 24]];
     assert(ret == correct);
 
-    arr = iota([6, 6]).slice;
-    ret = restriction!(long, 2)(arr);
+    arr = [6, 6].iota.slice;
+    ret = restriction(arr);
     correct = [[0, 2, 4, 5], [12, 14, 16, 17], [24, 26, 28, 29], [30, 32, 34, 35]];
     assert(ret == correct);
 }
@@ -223,8 +122,8 @@ unittest
 // Test restrtiction 3D
 unittest
 {
-    auto arr = iota([5, 5, 5]).slice;
-    auto ret = restriction!(long, 3)(arr);
+    auto arr = [5, 5, 5].iota.slice;
+    auto ret = restriction(arr);
     auto correct = [[[0., 2., 4.],
             [10., 12., 14.],
             [20., 22., 24.]],
@@ -238,8 +137,8 @@ unittest
             [120., 122., 124.]]];
     assert(ret == correct);
 
-    arr = iota([6, 6, 6]).slice;
-    ret = restriction!(long, 3)(arr);
+    arr = [6, 6, 6].iota.slice;
+    ret = restriction(arr);
     correct = [[[0., 2., 4., 5.],
             [12., 14., 16., 17.],
             [24., 26., 28., 29.],
@@ -266,12 +165,12 @@ unittest
 unittest
 {
     auto arr = iota(10).slice;
-    auto ret = weighted_restriction!(long, 1)(arr);
+    auto ret = weighted_restriction(arr);
     auto correct = [0, 2, 4, 6, 8, 9];
     assert(ret == correct);
 
     arr = iota(11).slice;
-    ret = weighted_restriction!(long, 1)(arr);
+    ret = weighted_restriction(arr);
     correct = [0, 2, 4, 6, 8, 10];
     assert(ret == correct);
 }
@@ -279,26 +178,27 @@ unittest
 unittest
 {
     auto arr2 = [1.0, 2.0, 3.0, 2.0, 1.0].sliced;
-    auto ret2 = weighted_restriction!(double, 1)(arr2);
+    auto ret2 = weighted_restriction(arr2);
     auto correct2 = [1.0, 2.5, 1.0];
     assert(ret2 == correct2);
 
     arr2 = [1.0, 2.0, 3.0, 3.0, 2.0, 1.0].sliced;
-    ret2 = weighted_restriction!(double, 1)(arr2);
+    ret2 = weighted_restriction(arr2);
     correct2 = [1.0, 2.75, 2.0, 1.0];
     assert(ret2 == correct2);
 }
 
+
 // Test weighted_restriction 2D long
 unittest
 {
-    auto arr = iota([5, 5]).slice;
-    auto ret = weighted_restriction!(long, 2)(arr);
+    auto arr = [5, 5].iota.slice;
+    auto ret = weighted_restriction(arr);
     auto correct = [[0, 2, 4], [10, 12, 14], [20, 22, 24]];
     assert(ret == correct);
 
-    arr = iota([6, 6]).slice;
-    ret = restriction!(long, 2)(arr);
+    arr = [6, 6].iota.slice;
+    ret = restriction(arr);
     correct = [[0, 2, 4, 5], [12, 14, 16, 17], [24, 26, 28, 29], [30, 32, 34, 35]];
     assert(ret == correct);
 }
@@ -310,10 +210,11 @@ unittest
         3.0, 4.0, 5.0, 4.0, 3.0,
         4.0, 5.0, 6.0, 5.0, 4.0,
         5.0, 6.0, 7.0, 6.0, 5.0].sliced(5, 5);
-    auto ret2 = weighted_restriction!(double, 2)(arr2);
+    auto ret2 = weighted_restriction(arr2);
     auto correct2 = [[1.0, 3.0, 1.0],
         [3.0, 4.5, 3.0],
         [5.0, 7.0, 5.0]];
+
     assert(ret2 == correct2);
 
     arr2 = [1.0, 2.0, 3.0, 3.0, 2.0, 1.0,
@@ -322,7 +223,7 @@ unittest
         4.0, 5.0, 6.0, 6.0, 5.0, 4.0,
         5.0, 6.0, 7.0, 7.0, 6.0, 5.0,
         6.0, 7.0, 8.0, 8.0, 7.0, 6.0].sliced(6, 6);
-    ret2 = weighted_restriction!(double, 2)(arr2);
+    ret2 = weighted_restriction(arr2);
     correct2 = [[1.0, 3.0, 2.0, 1.0],
         [3.0, 4.75, 4.0, 3.0],
         [5.0, 6.75, 6.0, 5.0],
@@ -333,8 +234,8 @@ unittest
 // Test weighted_restriction 3D double
 unittest
 {
-    auto arr = iota([5, 5, 5]).as!double.slice;
-    auto ret = weighted_restriction!(double, 3)(arr);
+    auto arr = [5, 5, 5].iota.as!double.slice;
+    auto ret = weighted_restriction(arr);
     auto correct = [[[0., 2., 4.],
             [10., 12., 14.],
             [20., 22., 24.]],
@@ -348,8 +249,8 @@ unittest
             [120., 122., 124.]]];
     assert(ret == correct);
 
-    arr = iota([6, 6, 6]).as!double.slice;
-    ret = restriction!(double, 3)(arr);
+    arr = [6, 6, 6].iota.as!double.slice;
+    ret = restriction(arr);
     correct = [[[0., 2., 4., 5.],
             [12., 14., 16., 17.],
             [24., 26., 28., 29.],
@@ -392,7 +293,7 @@ unittest
         0.9038084, 0.45367844, 0.41827524, 0.95954425,
         0.30096364, 0.37174358, 0.45047108, 0.47731472,
         0.98000574, 0.49313159, 0.44181032, 0.97419118].sliced(4, 4);
-    auto ret = weighted_restriction!(double, 2)(arr);
+    auto ret = weighted_restriction(arr);
 
     assert(approxEqual(ret, correct, 1e-2, 1e-8));
 }
@@ -420,7 +321,7 @@ unittest
         [0.99019009, 0.54380879, 0.5277031, 0.6169349, 0.14346233],
         [0.24181791, 0.44420891, 0.44207825, 0.45405526, 0.51607495],
         [0.68527047, 0.13484427, 0.23751941, 0.20323849, 0.59139025]];
-    auto ret = weighted_restriction!(double, 2)(arr.fuse);
+    auto ret = weighted_restriction(arr.fuse);
 
     assert(approxEqual(ret, correct.fuse, 1e-8, 1e-8));
 }
diff --git a/D/source/multid/tools/apply_poisson.d b/D/source/multid/tools/apply_poisson.d
index 4608de8..5a605a4 100644
--- a/D/source/multid/tools/apply_poisson.d
+++ b/D/source/multid/tools/apply_poisson.d
@@ -1,5 +1,6 @@
 module multid.tools.apply_poisson;
 
+import mir.math: fastmath;
 import mir.ndslice;
 
 /++
@@ -10,109 +11,54 @@ import mir.ndslice;
         h = distance between grid points
     Returns: x = A*U
 +/
-Slice!(T*, Dim) apply_poisson(T, size_t Dim)(in Slice!(T*, Dim) U, in T h)
+Slice!(T*, Dim) apply_poisson(T, size_t Dim)(Slice!(const(T)*, Dim) U, const T h)
 {
-    auto x = slice!(T)(U.shape);
-    const T h2 = h * h;
-    auto UF = U.field;
-
-    static if (Dim == 1)
-    {
-        x.field[0] = UF[0];
-        x.field[$ - 1] = UF[$ - 1];
-        foreach (i; 1 .. U.shape[0] - 1)
-        {
-            x.field[i] = (-2.0 * UF[i] + UF[i - 1] + UF[i + 1]) / h2;
-        }
-
-    }
-    else static if (Dim == 2)
-    {
-        immutable m = U.shape[0];
-        immutable n = U.shape[1];
-
-        x.field[0 .. m] = UF[0 .. m];
-        x.field[$ - m .. $] = UF[$ - m .. $];
-
-        x[1 .. $ - 1, 0] = U[1 .. $ - 1, 0];
-        x[1 .. $ - 1, $ - 1] = U[1 .. $ - 1, $ - 1];
-
-        foreach (i; 1 .. m - 1)
-        {
-            foreach (j; 1 .. n - 1)
-            {
-                auto flatindex = i * m + j;
-                x.field[flatindex] = (
-                        -4.0 * UF[flatindex] +
-                        UF[flatindex - m] +
-                        UF[flatindex + m] +
-                        UF[flatindex - 1] +
-                        UF[flatindex + 1]) / h2;
-            }
-        }
-    }
-    else static if (Dim == 3)
-    {
-
-        const auto m = U.shape[0];
-        const auto n = U.shape[1];
-        const auto l = U.shape[2];
-
-        x[0 .. $, 0 .. $, 0] = U[0 .. $, 0 .. $, 0];
-        x[0 .. $, 0, 0 .. $] = U[0 .. $, 0, 0 .. $];
-        x[0, 0 .. $, 0 .. $] = U[0, 0 .. $, 0 .. $];
-
-        x[0 .. $, 0 .. $, $ - 1] = U[0 .. $, 0 .. $, $ - 1];
-        x[0 .. $, $ - 1, 0 .. $] = U[0 .. $, $ - 1, 0 .. $];
-        x[$ - 1, 0 .. $, 0 .. $] = U[$ - 1, 0 .. $, 0 .. $];
-
-        for (size_t i = 1; i < m - 1; i++)
-        {
-            for (size_t j = 1; j < n - 1; j++)
-            {
-                const auto flatindex2d = i * (n * l) + j * l;
-                for (size_t k = 1; k < l - 1; k++)
-                {
-                    const flatindex = flatindex2d + k;
-                    x.field[flatindex] = (
-                            -6.0 *
-                            UF[flatindex] +
-                            UF[flatindex - n * l] +
-                            UF[flatindex + n * l] +
-                            UF[flatindex - l] +
-                            UF[flatindex + l] +
-                            UF[flatindex - 1] +
-                            UF[flatindex + 1]) / h2;
-                }
-            }
-        }
-    }
-    else
-    {
-        static assert(false, Dim.stringof ~ " is not a supported dimension!");
-    }
-
+    auto x = U.shape.slice!T;
+    apply_poisson(x, U, h);
     return x;
 }
 
+@nogc @fastmath
+void apply_poisson(T, size_t Dim)(Slice!(T*, Dim) x, Slice!(const(T)*, Dim) U, const T h)
+{
+    assumeSameShape(x, U);
+    eachOnBorder!"a = b"(x, U);
+    x.dropBorders[] = (1 / (h * h)) * U.withNeighboursSum.map!((u, sum) => sum - 2 * Dim * u);
+}
+
 /++
     Computes F - AU were A is the poisson matrix
 +/
-Slice!(T*, Dim) compute_residual(T, size_t Dim)(in Slice!(T*, Dim) F, in Slice!(T*, Dim) U, in T current_h)
+@nogc @fastmath
+void compute_residual(T, size_t Dim)(Slice!(T*, Dim) R, Slice!(const(T)*, Dim) F, Slice!(const(T)*, Dim) U, const T current_h)
 {
-    auto AU = apply_poisson!(T, Dim)(U, current_h);
-    AU.field[] = F.field[] - AU.field[];
+    assumeSameShape(U, R, F);
+    // performs
+    // apply_poisson(R, U, current_h);
+    // R[] = F - R
+    // in a single memory access
+    R.dropBorders[] = ((1 / current_h ^^ 2) * U.withNeighboursSum.map!((u, sum) => sum - 2 * Dim * u)).zip!true(F.dropBorders).map!"b - a";
+    eachOnBorder!"a = b - c"(R, F, U);
+}
+
+Slice!(T*, Dim) compute_residual(T, size_t Dim)(Slice!(const(T)*, Dim) F, Slice!(const(T)*, Dim) U, const T current_h)
+{
+    auto AU = U.shape.slice!T;
+    assert(AU.shape == F.shape);
+    compute_residual(AU, F, U, current_h);
     return AU;
 }
 
 unittest
 {
+    import mir.algorithm.iteration: all;
+    import mir.math.common: approxEqual;
     import multid.tools.util : randomMatrix;
 
     const size_t N = 100;
     immutable auto h = 1.0 / double(N);
 
-    auto U = randomMatrix!(double, 1)(N);
+    auto U = N.randomMatrix!(double, 1);
 
     auto x = U.dup;
     for (size_t i = 1; i < U.shape[0] - 1; i++)
@@ -120,19 +66,20 @@ unittest
         x[i] = (-2.0 * U[i] + U[i - 1] + U[i + 1]) / (h * h);
     }
 
-    const auto x1 = apply_poisson!(double, 1)(U, h);
-    assert(x == x1);
+    auto x1 = apply_poisson(U, h);
+    assert(all!approxEqual(x, x1));
 }
 
 unittest
 {
-
+    import mir.algorithm.iteration: all;
+    import mir.math.common: approxEqual;
     import multid.tools.util : randomMatrix;
 
     const size_t N = 100;
     immutable auto h = 1.0 / double(N);
 
-    auto U = randomMatrix!(double, 2)(N);
+    auto U = N.randomMatrix!(double, 2);
 
     immutable m = U.shape[0];
     immutable n = U.shape[1];
@@ -142,23 +89,29 @@ unittest
     {
         for (size_t j = 1; j < n - 1; j++)
         {
-            x[i, j] = (-4.0 * U[i, j] + U[i - 1, j] + U[i + 1, j] + U[i, j - 1]
-                    + U[i, j + 1]) / (h * h);
+            x[i, j] = (-4.0 * U[i, j]
+                + U[i - 1, j]
+                + U[i + 1, j]
+                + U[i, j - 1]
+                + U[i, j + 1]) / (h * h);
         }
     }
-    const auto x1 = apply_poisson!(double, 2)(U, h);
-    assert(x == x1);
+
+    auto x1 = apply_poisson(U, h);
+    assert(all!approxEqual(x, x1));
 }
 
 
 unittest
 {
+    import mir.algorithm.iteration: all;
+    import mir.math.common: approxEqual;
     import multid.tools.util : randomMatrix;
 
     const size_t N = 100;
     immutable auto h = 1.0 / double(N);
 
-    auto U = randomMatrix!(double, 3)(N);
+    auto U = N.randomMatrix!(double, 3);
 
     auto x = U.dup;
     for (size_t i = 1; i < U.shape[0] - 1; i++)
@@ -179,7 +132,6 @@ unittest
         }
     }
 
-    const auto x1 = apply_poisson!(double, 3)(U, h);
-
-    assert(x == x1);
+    auto x1 = apply_poisson(U, h);
+    assert(all!approxEqual(x, x1));
 }
diff --git a/D/source/multid/tools/norm.d b/D/source/multid/tools/norm.d
index 018707d..237abd6 100644
--- a/D/source/multid/tools/norm.d
+++ b/D/source/multid/tools/norm.d
@@ -1,11 +1,12 @@
 module multid.tools.norm;
 
-import std.math : sqrt;
+import mir.math : sqrt, fastmath;
 import mir.ndslice : Slice, sliced;
 
 /++
     Computes the L2 norm
 +/
+@fastmath
 T nrmL2(T, size_t Dim)(Slice!(T*, Dim) v)
 {
     T s = 0.0;
diff --git a/D/source/multid/tools/util.d b/D/source/multid/tools/util.d
index d051921..4a2c855 100644
--- a/D/source/multid/tools/util.d
+++ b/D/source/multid/tools/util.d
@@ -1,8 +1,6 @@
 module multid.tools.util;
-import mir.ndslice : Slice, slice;
-import std.range : generate;
-import std.random : uniform;
-import std.algorithm : fill;
+
+import mir.ndslice.slice: Slice;
 
 /++ Timer Template +/
 template Timer()
@@ -28,8 +26,9 @@ template Timer()
 /++ Generator for random matrix with dimension Dim and dimension size N +/
 Slice!(T*, Dim) randomMatrix(T, size_t Dim)(size_t N)
 {
+    import mir.random.algorithm: randomSlice;
+    import mir.random.variable: uniformVar;
+
     size_t[Dim] shape = N;
-    auto ret = slice!T(shape);
-    ret.field.fill(generate!(() => uniform(0.0, 1.0)));
-    return ret;
+    return uniformVar!T(0, 1).randomSlice(shape);
 }
diff --git a/D/source/scripts.d b/D/source/scripts.d
index 3d54a2a..7663746 100644
--- a/D/source/scripts.d
+++ b/D/source/scripts.d
@@ -1,18 +1,13 @@
-import std.stdio;
-import std.range : generate;
-import std.random : uniform;
-import std.array;
-import std.algorithm;
+import loadproblem;
 import mir.ndslice;
-import pretty_array;
-import std.datetime.stopwatch : StopWatch;
-import std.conv : to;
-
 import multid.gaussseidel.redblack;
-import multid.multigrid.restriction;
 import multid.multigrid.cycle;
 import multid.multigrid.multigrid;
-import loadproblem;
+import multid.multigrid.restriction;
+import multid.tools.util;
+// import pretty_array;
+import std.datetime.stopwatch : StopWatch;
+import std.stdio;
 
 /++
 This performs a GS_RB run for 3D
@@ -21,9 +16,7 @@ void test3D()
 {
 
     immutable size_t N = 50;
-    auto U = slice!double(N, N, N);
-    auto fun = generate!(() => uniform(0.0, 1.0));
-    U.field.fill(fun);
+    auto U = N.randomMatrix!(double, 3);
     U[0, 0 .. $, 0 .. $] = 1.0;
     U[0 .. $, 0, 0 .. $] = 1.0;
     U[0 .. $, 0 .. $, 0] = 1.0;
@@ -34,8 +27,8 @@ void test3D()
     auto F = slice!double([N, N, N], 0.0);
     const double h = 1.0 / double(N);
 
-    GS_RB!(double, 3)(F, U, h);
-    U.prettyArr.writeln;
+    GS_RB(F, U, h);
+    // U.prettyArr.writeln;
 
 }
 
@@ -46,9 +39,7 @@ void test2D()
 {
 
     immutable size_t N = 200;
-    auto U = slice!double(N, N);
-    auto fun = generate!(() => uniform(0.0, 1.0));
-    U.field.fill(fun);
+    auto U = N.randomMatrix!(double, 2);
     U[0][0 .. $] = 1.0;
     U[1 .. $, 0] = 1.0;
     U[$ - 1][1 .. $] = 0.0;
@@ -57,8 +48,8 @@ void test2D()
     auto F = slice!double([N, N], 0.0);
     const double h = 1.0 / double(N);
 
-    GS_RB!(double, 2)(F, U, h);
-    U.prettyArr.writeln;
+    GS_RB(F, U, h);
+    // U.prettyArr.writeln;
 
 }
 
@@ -69,17 +60,15 @@ void test1D()
 {
 
     immutable size_t N = 1000;
-    auto U = slice!double(N);
-    auto fun = generate!(() => uniform(0.0, 1.0));
-    U.field.fill(fun);
+    auto U = N.randomMatrix!(double, 1);
     U[0] = 1.0;
     U[$ - 1] = 0.0;
 
     auto F = slice!double([N], 0.0);
     const double h = 1.0 / double(N);
 
-    GS_RB!(double, 1, 30_000)(F, U, h);
-    U.prettyArr.writeln;
+    GS_RB(F, U, h, 30_000);
+    // U.prettyArr.writeln;
 
 }
 
@@ -89,9 +78,7 @@ This performs multigrid for 2D
 void testMG2D()
 {
     immutable size_t N = 1000;
-    auto U = slice!double(N, N);
-    auto fun = generate!(() => uniform(0.0, 1.0));
-    U.field.fill(fun);
+    auto U = N.randomMatrix!(double, 2);
     U[0][0 .. $] = 1.0;
     U[1 .. $, 0] = 1.0;
     U[$ - 1][1 .. $] = 0.0;
@@ -103,7 +90,7 @@ void testMG2D()
     F[$ - 1][1 .. $] = 0.0;
     F[1 .. $, $ - 1] = 0.0;
 
-    U = poisson_multigrid!(double, 2, 2, 2)(F, U, 0, 2, 100);
+    U = poisson_multigrid(F, U, 0, 2, 2, 2, 100);
 
     //U.prettyArr.writeln;
 }
diff --git a/D/source/startup.d b/D/source/startup.d
index 651244e..c0c3efb 100644
--- a/D/source/startup.d
+++ b/D/source/startup.d
@@ -3,19 +3,18 @@ module startup;
 
 template init()
 {
-
     import core.thread : Thread;
+    import mir.conv : to;
     import std.datetime.stopwatch : StopWatch, msecs;
     import std.experimental.logger : infof, globalLogLevel, LogLevel;
     import std.getopt : getopt;
-    import std.conv : to;
 
     StopWatch sw;
 
     uint delay = 500;
     bool verbose = false;
     string path = default_path;
-    string sweep = "field";
+    string sweep = "ndslice";
     immutable string default_path = "../problems/problem_2D_100.npy";
 
     void start()