From c48d0f62d5a6a53445cb4479c125dc0f772c2844 Mon Sep 17 00:00:00 2001
From: Angel Ezquerra <angel.ezquerra@gmail.com>
Date: Sat, 15 Jun 2024 15:59:01 +0200
Subject: [PATCH 01/16] Fix issue #17 (compilation issue when importing scinim)

---
 scinim.nim | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/scinim.nim b/scinim.nim
index c462df0..4111ad4 100644
--- a/scinim.nim
+++ b/scinim.nim
@@ -1,4 +1,4 @@
-import ./scinim/signals/signals
+import ./scinim/signals
 export signals
 
 import ./scinim/numpyarrays

From 9ab2b287659e3b2803c2ebc659f578eadbbb69d0 Mon Sep 17 00:00:00 2001
From: Vindaar <basti90@gmail.com>
Date: Sun, 16 Jun 2024 17:56:15 +0200
Subject: [PATCH 02/16] add CI

This one is based on the `impulse` CI (recently adjusted to use newer
action to pull Nim)
---
 .github/workflows/ci.yml | 103 +++++++++++++++++++++++++++++++++++++++
 1 file changed, 103 insertions(+)
 create mode 100644 .github/workflows/ci.yml

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
new file mode 100644
index 0000000..64d9de1
--- /dev/null
+++ b/.github/workflows/ci.yml
@@ -0,0 +1,103 @@
+name: scinim CI
+on:
+  push:
+    paths:
+      - 'tests/**'
+      - '**'
+      - 'scinim.nimble'
+      - '.github/workflows/ci.yml'
+  pull_request:
+    paths:
+      - 'tests/**'
+      - '**'
+      - 'scinim.nimble'
+      - '.github/workflows/ci.yml'
+
+jobs:
+  build:
+    runs-on: ${{ matrix.os }}
+    strategy:
+      matrix:
+        nim:
+          - '1.6.x'
+          - '2.0.x'
+          - 'stable'
+        os:
+          - ubuntu-latest
+          - windows-latest
+          - macOS-latest
+    name: '${{ matrix.nim }} (${{ matrix.os }})'
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v3
+        with:
+          path: scinim
+
+      - name: Setup nim
+        uses: jiro4989/setup-nim-action@v1
+        with:
+          nim-version: ${{ matrix.nim }}
+          repo-token: ${{ secrets.GITHUB_TOKEN }}
+
+      - name: Setup MSYS2 (Windows)
+        if: ${{matrix.target == 'windows'}}
+        uses: msys2/setup-msys2@v2
+        with:
+          path-type: inherit
+          update: true
+          install: base-devel git mingw-w64-x86_64-toolchain
+
+      - name: Install dependencies (Windows)
+        if: ${{matrix.target == 'windows'}}
+        shell: msys2 {0}
+        run: |
+          pacman -Syu --noconfirm
+          pacman -S --needed --noconfirm mingw-w64-x86_64-lapack
+
+      - name: Setup nimble & deps
+        shell: bash
+        run: |
+          cd scinim
+          nimble refresh -y
+          nimble install -y
+
+      - name: Run tests (Linux & OSX)
+        if: ${{matrix.target != 'windows'}}
+        shell: bash
+        run: |
+          cd scinim
+          nimble -y test
+
+      - name: Run tests (Windows)
+        if: ${{matrix.target == 'windows'}}
+        shell: msys2 {0}
+        run: |
+          cd scinim
+          nimble -y test
+
+      - name: Build docs
+        if: >
+          github.event_name == 'push' && github.ref == 'refs/heads/master' &&
+          matrix.target == 'linux' && matrix.branch == 'devel'
+        shell: bash
+        run: |
+          cd scinim
+          branch=${{ github.ref }}
+          branch=${branch##*/}
+          nimble doc --project --outdir:docs \
+            '--git.url:https://github.com/${{ github.repository }}' \
+            '--git.commit:${{ github.sha }}' \
+            "--git.devel:$branch" \
+            scinim.nim
+          # Ignore failures for older Nim
+          cp docs/{the,}index.html || true
+
+      - name: Publish docs
+        if: >
+          github.event_name == 'push' && github.ref == 'refs/heads/master' &&
+          matrix.target == 'linux' && matrix.branch == 'devel'
+        uses: crazy-max/ghaction-github-pages@v1
+        with:
+          build_dir: scinim/docs
+        env:
+          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}

From b0166717b4b81b93593d522c747eca0c448f4bbc Mon Sep 17 00:00:00 2001
From: Vindaar <basti90@gmail.com>
Date: Sun, 16 Jun 2024 18:00:53 +0200
Subject: [PATCH 03/16] update CI depnedencies, add nimble task for tests

---
 .github/workflows/ci.yml | 12 ++++++++++++
 scinim.nimble            |  6 ++++++
 2 files changed, 18 insertions(+)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 64d9de1..bf7b652 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -47,12 +47,24 @@ jobs:
           update: true
           install: base-devel git mingw-w64-x86_64-toolchain
 
+      - name: Install dependencies (Ubuntu)
+        if: ${{matrix.target == 'linux'}}
+        run: |
+          sudo apt-get update
+          sudo apt-get install -y python-numpy python3-numpy
+
+      - name: Install dependencies (OSX)
+        if: ${{matrix.target == 'macos'}}
+        run: |
+          brew install numpy
+
       - name: Install dependencies (Windows)
         if: ${{matrix.target == 'windows'}}
         shell: msys2 {0}
         run: |
           pacman -Syu --noconfirm
           pacman -S --needed --noconfirm mingw-w64-x86_64-lapack
+          pacman -S --needed --noconfirm mingw-w64-x86_64-python-numpy
 
       - name: Setup nimble & deps
         shell: bash
diff --git a/scinim.nimble b/scinim.nimble
index 59eb9ff..a48c130 100644
--- a/scinim.nimble
+++ b/scinim.nimble
@@ -13,3 +13,9 @@ requires "threading"
 requires "arraymancer >= 0.7.3"
 requires "polynumeric >= 0.2.0"
 requires "nimpy >= 0.2.0"
+
+task test, "Run all tests":
+  exec "nim c -r tests/tnumpyarrays.nim"
+  exec "nim c -r --gc:orc tests/tnumpyarrays.nim"
+  exec "nim cpp -r tests/tnumpyarrays.nim"
+  exec "nim cpp -r --gc:orc tests/tnumpyarrays.nim"

From 4ee1c0befd79ee2849d5c26ba6ca8ac9caf11595 Mon Sep 17 00:00:00 2001
From: Vindaar <basti90@gmail.com>
Date: Sun, 23 Jan 2022 14:02:37 +0100
Subject: [PATCH 04/16] add PoC for an experimental math sugar module

---
 scinim/experimental/sugar.nim | 103 ++++++++++++++++++++++++++++++++++
 1 file changed, 103 insertions(+)
 create mode 100644 scinim/experimental/sugar.nim

diff --git a/scinim/experimental/sugar.nim b/scinim/experimental/sugar.nim
new file mode 100644
index 0000000..dd46832
--- /dev/null
+++ b/scinim/experimental/sugar.nim
@@ -0,0 +1,103 @@
+import math
+proc Σ*[T](s: openArray[T]): T = s.sum
+proc Π*[T](s: openArray[T]): T = s.prod
+
+let s = @[1,2,3]
+echo Σ s
+echo Π s
+
+template Σ_i*(frm, to: int, body: untyped): untyped =
+  var res: int
+  for i {.inject.} in frm ..< to:
+    res += body
+  res
+
+#template Σ_i(to: int, body: untyped): untyped =
+#  #var res: typeof(s[0])
+#  var res: int
+#  for i {.inject.} in 0 ..< to:
+#    res += body
+#  res
+
+import macros
+proc getTypeReplaceI(arg: NimNode): NimNode =
+  if arg.len > 0:
+    result = newTree(arg.kind)
+    for ch in arg:
+      result.add getTypeReplaceI(ch)
+  else:
+    case arg.kind
+    of nnkIdent, nnkSym:
+      if arg.strVal == "i": return newLit(0)
+      else: return arg
+    else: return arg
+
+
+macro Σ_i*(col, body: untyped): untyped =
+  let typ = getTypeReplaceI(body)
+  let iId = ident"i"
+  result = quote do:
+    var res: typeof(`typ`)
+    for `iId` in 0 ..< `col`.len:
+      res += `body`
+    res
+  echo result.repr
+
+type
+  Foo = object
+    x: int
+let f = @[Foo(x: 1), Foo(x: 4)]
+
+echo Σ_i(0, f.len, f[i].x)
+let r = Σ_i(f): f[i].x
+echo r
+
+macro λ*(arg, body: untyped): untyped =
+  let a = arg[1]
+  let typ = arg[2]
+  echo body.repr
+  result = quote do:
+    let fn = proc(`a`: `typ`): auto = `body`
+    fn
+
+let fn = λ(x -> int): x*x
+echo fn(2)
+
+proc sliceTypes(n: NimNode, sl: Slice[int]): NimNode =
+  result = nnkFormalParams.newTree(ident"auto")
+  var args = nnkIdentDefs.newTree()
+  for i in sl.a .. sl.b:
+    args.add n[i]
+  args.add ident"T"
+  args.add newEmptyNode()
+  result.add args
+
+proc generateFunc(arg: NimNode): NimNode =
+  expectKind(arg, nnkAsgn)
+  let lhs = arg[0]
+  let rhs = arg[1]
+  let fnName = lhs[0]
+  let fnArgs = sliceTypes(lhs, 1 ..< lhs.len)
+  echo fnArgs.treerepr
+  result = newProc(name = fnName, body = rhs)
+  result[2] = nnkGenericParams.newTree(
+    nnkIdentDefs.newTree(
+      ident"T", newEmptyNode(), newEmptyNode()
+    )
+  )
+  result[3] = fnArgs
+
+macro mathScope*(args: untyped): untyped =
+  echo args.treerepr
+  expectKind(args, nnkStmtList)
+  result = newStmtList()
+  for arg in args:
+    result.add generateFunc(arg)
+  echo result.repr
+
+mathScope:
+  g(x) = exp(-x)
+  h(x, μ, σ) = 1.0/sqrt(2*Pi) * exp(-pow(x - μ, 2) / (2 * σ*σ))
+
+echo g(1.5)
+echo h(1.0, 0.5, 1.1)

From add082ed5c548111d5552115134ef8bcf29025c5 Mon Sep 17 00:00:00 2001
From: Vindaar <basti90@gmail.com>
Date: Sat, 29 Jan 2022 13:19:15 +0100
Subject: [PATCH 05/16] make `mathScope` functions multi generics

To allow for multiple types in the arguments (e.g. different units)
---
 scinim/experimental/sugar.nim | 28 +++++++++++++++-------------
 1 file changed, 15 insertions(+), 13 deletions(-)

diff --git a/scinim/experimental/sugar.nim b/scinim/experimental/sugar.nim
index dd46832..7ac6e0e 100644
--- a/scinim/experimental/sugar.nim
+++ b/scinim/experimental/sugar.nim
@@ -63,29 +63,31 @@ macro λ*(arg, body: untyped): untyped =
 let fn = λ(x -> int): x*x
 echo fn(2)
 
-proc sliceTypes(n: NimNode, sl: Slice[int]): NimNode =
-  result = nnkFormalParams.newTree(ident"auto")
-  var args = nnkIdentDefs.newTree()
+proc sliceTypes(n: NimNode, sl: Slice[int]): tuple[args, genTyps: NimNode] =
+  var args = nnkFormalParams.newTree(ident"auto")
+  var genTyps = nnkIdentDefs.newTree()
   for i in sl.a .. sl.b:
-    args.add n[i]
-  args.add ident"T"
-  args.add newEmptyNode()
-  result.add args
+    let typ = ident($char('A'.ord + i - 1))
+    args.add nnkIdentDefs.newTree(n[i],
+                                  typ,
+                                  newEmptyNode())
+    genTyps.add typ
+  genTyps.add newEmptyNode()
+  genTyps.add newEmptyNode()
+  genTyps = nnkGenericParams.newTree(genTyps)
+  result = (args: args, genTyps: genTyps)
 
 proc generateFunc(arg: NimNode): NimNode =
   expectKind(arg, nnkAsgn)
   let lhs = arg[0]
   let rhs = arg[1]
   let fnName = lhs[0]
-  let fnArgs = sliceTypes(lhs, 1 ..< lhs.len)
+  let (fnArgs, genTyps) = sliceTypes(lhs, 1 ..< lhs.len)
   echo fnArgs.treerepr
   result = newProc(name = fnName, body = rhs)
-  result[2] = nnkGenericParams.newTree(
-    nnkIdentDefs.newTree(
-      ident"T", newEmptyNode(), newEmptyNode()
-    )
-  )
+  result[2] = genTyps
   result[3] = fnArgs
+  echo result.repr
 
 macro mathScope*(args: untyped): untyped =
   echo args.treerepr

From 83fbae936081a5cf6ed7aba07419989f1a8bbb08 Mon Sep 17 00:00:00 2001
From: Vindaar <basti90@gmail.com>
Date: Mon, 13 Jun 2022 12:45:22 +0200
Subject: [PATCH 06/16] =?UTF-8?q?add=20sqrt=20via=20=E2=88=9A=20sugar?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 scinim/experimental/sugar.nim | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/scinim/experimental/sugar.nim b/scinim/experimental/sugar.nim
index 7ac6e0e..0e97914 100644
--- a/scinim/experimental/sugar.nim
+++ b/scinim/experimental/sugar.nim
@@ -2,6 +2,9 @@ import math
 proc Σ*[T](s: openArray[T]): T = s.sum
 proc Π*[T](s: openArray[T]): T = s.prod
 
+proc √*[T](x: T): T = sqrt(x)
+proc √*[T](x: openArray[T]): T = sqrt(x)
+
 let s = @[1,2,3]
 echo Σ s
 echo Π s

From 361e9048d5780f3c1e21c0263ad87cdcb64c1042 Mon Sep 17 00:00:00 2001
From: Vindaar <basti90@gmail.com>
Date: Sun, 16 Jun 2024 18:36:29 +0200
Subject: [PATCH 07/16] clean up `sugar.nim`

---
 scinim/experimental/sugar.nim | 71 +++++++++++------------------------
 1 file changed, 22 insertions(+), 49 deletions(-)

diff --git a/scinim/experimental/sugar.nim b/scinim/experimental/sugar.nim
index 0e97914..3c9496d 100644
--- a/scinim/experimental/sugar.nim
+++ b/scinim/experimental/sugar.nim
@@ -1,28 +1,11 @@
-import math
-proc Σ*[T](s: openArray[T]): T = s.sum
-proc Π*[T](s: openArray[T]): T = s.prod
+import std / [macros, math]
+export math
 
-proc √*[T](x: T): T = sqrt(x)
-proc √*[T](x: openArray[T]): T = sqrt(x)
-
-let s = @[1,2,3]
-echo Σ s
-echo Π s
-
-template Σ_i*(frm, to: int, body: untyped): untyped =
-  var res: int
-  for i {.inject.} in frm ..< to:
-    res += body
-  res
+## This module contains a whole bunch of fun little sugar procs / templates / macros
+## to (mostly) help with writing math code. The most likely use case might be for
+## illustrative / explanatory code that is close to math in nature already.
 
-#template Σ_i(to: int, body: untyped): untyped =
-#  #var res: typeof(s[0])
-#  var res: int
-#  for i {.inject.} in 0 ..< to:
-#    res += body
-#  res
 
-import macros
 proc getTypeReplaceI(arg: NimNode): NimNode =
   if arg.len > 0:
     result = newTree(arg.kind)
@@ -35,6 +18,17 @@ proc getTypeReplaceI(arg: NimNode): NimNode =
       else: return arg
     else: return arg
 
+proc Σ*[T](s: openArray[T]): T = s.sum
+proc Π*[T](s: openArray[T]): T = s.prod
+
+proc √*[T](x: T): T = sqrt(x)
+proc √*[T](x: openArray[T]): T = sqrt(x)
+
+template Σ_i*(frm, to: int, body: untyped): untyped =
+  var res: int
+  for i {.inject.} in frm ..< to:
+    res += body
+  res
 
 macro Σ_i*(col, body: untyped): untyped =
   let typ = getTypeReplaceI(body)
@@ -44,27 +38,17 @@ macro Σ_i*(col, body: untyped): untyped =
     for `iId` in 0 ..< `col`.len:
       res += `body`
     res
-  echo result.repr
-
-type
-  Foo = object
-    x: int
-let f = @[Foo(x: 1), Foo(x: 4)]
-
-echo Σ_i(0, f.len, f[i].x)
-let r = Σ_i(f): f[i].x
-echo r
 
 macro λ*(arg, body: untyped): untyped =
+  ##
+  # XXX: Support multiple arguments!
+  if arg.kind != nnkInfix or
+     (arg.kind == nnkInfix and arg[0].kind in {nnkIdent, nnkSym} and arg[0].strVal != "->"):
+    error("Unsupported operation in `λ`. The infix must be `->`, but is: " & arg[0].repr)
   let a = arg[1]
   let typ = arg[2]
-  echo body.repr
   result = quote do:
-    let fn = proc(`a`: `typ`): auto = `body`
-    fn
-
-let fn = λ(x -> int): x*x
-echo fn(2)
+    proc(`a`: `typ`): auto = `body`
 
 proc sliceTypes(n: NimNode, sl: Slice[int]): tuple[args, genTyps: NimNode] =
   var args = nnkFormalParams.newTree(ident"auto")
@@ -86,23 +70,12 @@ proc generateFunc(arg: NimNode): NimNode =
   let rhs = arg[1]
   let fnName = lhs[0]
   let (fnArgs, genTyps) = sliceTypes(lhs, 1 ..< lhs.len)
-  echo fnArgs.treerepr
   result = newProc(name = fnName, body = rhs)
   result[2] = genTyps
   result[3] = fnArgs
-  echo result.repr
 
 macro mathScope*(args: untyped): untyped =
-  echo args.treerepr
   expectKind(args, nnkStmtList)
   result = newStmtList()
   for arg in args:
     result.add generateFunc(arg)
-  echo result.repr
-
-mathScope:
-  g(x) = exp(-x)
-  h(x, μ, σ) = 1.0/sqrt(2*Pi) * exp(-pow(x - μ, 2) / (2 * σ*σ))
-
-echo g(1.5)
-echo h(1.0, 0.5, 1.1)

From 463ee26de6642f051c91f4aca1e2af9c9eeefcd8 Mon Sep 17 00:00:00 2001
From: Vindaar <basti90@gmail.com>
Date: Sun, 16 Jun 2024 18:36:35 +0200
Subject: [PATCH 08/16] [tests] add mini test case for experimental sugar
 module

---
 tests/tsugar.nim | 57 ++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 57 insertions(+)
 create mode 100644 tests/tsugar.nim

diff --git a/tests/tsugar.nim b/tests/tsugar.nim
new file mode 100644
index 0000000..896df6c
--- /dev/null
+++ b/tests/tsugar.nim
@@ -0,0 +1,57 @@
+import ../scinim/experimental/sugar
+import unittest
+
+# Note: these are all highly experimental and subject to change!
+
+suite "SciNim experimental sugar":
+  test "Σ and Π short for `s.sum`, `s.prod`":
+    let s1 = @[1,2,3]
+    let s2 = @[1.0, 2.0, 3.0]
+    check (Σ s1) == s1.sum
+    check (Σ s2) == s2.sum
+    check (Π s1) == s1.prod
+    check (Π s2) == s2.prod
+
+  test "Σ_i short for summing a collection with field access in a specific range":
+    type
+      Foo = object
+        x: int
+    let f = @[Foo(x: 1), Foo(x: 2), Foo(x: 3), Foo(x: 4)]
+
+    # Long form taking start, stop indices and the body
+    check Σ_i(0, f.len, f[i].x) == 10
+    check Σ_i(1, f.len, f[i].x) == 9
+    check Σ_i(2, f.len, f[i].x) == 7
+    check Σ_i(3, f.len, f[i].x) == 4
+    check Σ_i(2, f.len - 1, f[i].x) == 3
+
+    # short form (similar to `Σ`), but allowing field acces etc
+    # Injects the index `i`
+    let res = Σ_i(f, f[i].x)
+    check res == 10
+    let res2 = Σ_i(f): f[i].x
+    check res2 == 10
+    check (block: # we're are a bit limited in what conditions the syntax parses correctly
+             Σ_i(f): f[i].x
+    ) == 10
+    # i.e. this does not compile:
+    # check (Σ_i(f): f[i].x) == 10
+    check Σ_i(f, f[i].x) == 10
+
+  test "λ to define 'lambdas', i.e. anonymous functions / closures":
+    block:
+      let fn = λ(x -> int): x*x
+      check fn(2) == 4
+      check fn(3) == 9
+    block:
+      let fn = λ(x -> int): x+x
+      check fn(2) == 4
+      check fn(3) == 6
+
+  test "`mathScope` DSL that acts 'untyped'":
+    mathScope:
+      g(x) = exp(-x)
+      h(x, μ, σ) = 1.0/sqrt(2*Pi) * exp(-pow(x - μ, 2) / (2 * σ*σ))
+
+    check g(1.5) == exp(-1.5)
+    check h(1.0, 0.5, 1.1) == 1.0/sqrt(2*Pi) * exp(-pow(1.0 - 0.5, 2) / (2 * 1.1^2))

From 201ddcb224bd20d6e76d20eccdadbadaa1e0566e Mon Sep 17 00:00:00 2001
From: Vindaar <basti90@gmail.com>
Date: Sun, 16 Jun 2024 18:36:46 +0200
Subject: [PATCH 09/16] =?UTF-8?q?[tests]=20disable=20one=20run=20of=20the?=
 =?UTF-8?q?=20numpy=20=E2=87=94=20nim=20interop=20test=20case?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 tests/tnumpyarrays.nim | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/tests/tnumpyarrays.nim b/tests/tnumpyarrays.nim
index 770f2e7..136c8e2 100644
--- a/tests/tnumpyarrays.nim
+++ b/tests/tnumpyarrays.nim
@@ -70,7 +70,9 @@ proc test(arg: tuple[s: string]) =
 
 
 when isMainModule:
-  test((s: "toTensor, toNdArray in main thread"))
+  #test((s: "toTensor, toNdArray in main thread"))
+  ## XXX: Running both is currently broken, due to the python interpreter
+  ## being in a bad state after one?
   var thr: Thread[tuple[s: string]]
   createThread(thr, test, (s: "toTensor, toNdArray in external thread"))
   joinThread(thr)

From 1106145bf0517e6c69766c8fcf32fcac249d4946 Mon Sep 17 00:00:00 2001
From: Angel Ezquerra <angel.ezquerra@gmail.com>
Date: Sat, 15 Jun 2024 15:56:13 +0200
Subject: [PATCH 10/16] Bump required nim version to 1.6

This is required to change the arraymancer dependency to its latest version.
---
 scinim.nimble | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/scinim.nimble b/scinim.nimble
index a48c130..3ca2bac 100644
--- a/scinim.nimble
+++ b/scinim.nimble
@@ -8,7 +8,7 @@ license       = "MIT"
 backend       = "cpp"
 
 # Dependencies
-requires "nim >= 1.4.0"
+requires "nim >= 1.6.0"
 requires "threading"
 requires "arraymancer >= 0.7.3"
 requires "polynumeric >= 0.2.0"

From 972d84ac82491d7e79238acb33b0a2a2d3ae8cf6 Mon Sep 17 00:00:00 2001
From: Angel Ezquerra <angel.ezquerra@gmail.com>
Date: Sat, 15 Jun 2024 15:56:31 +0200
Subject: [PATCH 11/16] Bump arraymancer requirement to 0.7.31

---
 scinim.nimble | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/scinim.nimble b/scinim.nimble
index 3ca2bac..de0fc18 100644
--- a/scinim.nimble
+++ b/scinim.nimble
@@ -10,7 +10,7 @@ backend       = "cpp"
 # Dependencies
 requires "nim >= 1.6.0"
 requires "threading"
-requires "arraymancer >= 0.7.3"
+requires "arraymancer >= 0.7.31"
 requires "polynumeric >= 0.2.0"
 requires "nimpy >= 0.2.0"
 

From 469c878f4a7634bd42b3870f14e629c35ad66035 Mon Sep 17 00:00:00 2001
From: Angel Ezquerra <angel.ezquerra@gmail.com>
Date: Sat, 15 Jun 2024 15:56:43 +0200
Subject: [PATCH 12/16] Add primes module

Prime numbers are an essential building block of many algorithms in diverse areas such as cryptography, digital communications and many others.

This module adds the following functions:
- primes: Procedure to generate lists of primes upto a certain value.
- factor: Procedure which returns a Tensor containing the prime factors of the input.
- isprime: Single element and element-wise procedures that return true when their inputs are prime numbers.
---
 scinim/primes.nim     | 201 ++++++++++++++++++++++++++++++++++++++++++
 tests/test_primes.nim | 133 ++++++++++++++++++++++++++++
 2 files changed, 334 insertions(+)
 create mode 100644 scinim/primes.nim
 create mode 100644 tests/test_primes.nim

diff --git a/scinim/primes.nim b/scinim/primes.nim
new file mode 100644
index 0000000..ff04979
--- /dev/null
+++ b/scinim/primes.nim
@@ -0,0 +1,201 @@
+## Module that implements several procedures related to prime numbers
+##
+## Prime numbers are an essential building block of many algorithms in diverse
+## areas such as cryptography, digital communications and many others.
+## This module adds a function to generate rank-1 tensors of primes upto a
+## certain value; as well as a function to calculate the prime factors of a
+## number.
+
+import arraymancer
+
+proc primes*[T: SomeInteger | SomeFloat](upto: T): Tensor[T] =
+  ## Generate a Tensor of prime numbers up to a certain value
+  ##
+  ## Return a Tensor of the prime numbers less than or equal to `upto`.
+  ## A prime number is one that has no factors other than 1 and itself.
+  ##
+  ## Input:
+  ##   - upto: Integer up to which primes will be generated
+  ##
+  ## Result:
+  ##   - Integer Tensor of prime values less than or equal to `upto`
+  ##
+  ## Note:
+  ##   - This function implements a "half" Sieve of Erathostenes algorithm
+  ##     which is a classical Sieve of Erathostenes in which only odd numbers
+  ##     are checked. Many examples of this algorithm can be found online.
+  ##     It also stops checking after sqrt(upto)
+  ##   - The memory required by this procedure is proportional to the input
+  ##     number.
+  when T is SomeFloat:
+    if upto != round(upto):
+      raise newException(ValueError,
+        "`upto` value (" & $upto & ") must be a whole number")
+
+  if upto < 11:
+    # Handle the primes below 11 to simplify the general code below
+    # (by removing the need to handle the few cases in which the index to
+    # `isprime`, calculated based on `factor` is negative)
+    # This is the minimum set of primes that we must handle, but we could
+    # extend this list to make the calculation faster for more of the
+    # smallest primes
+    let prime_candidates = [2.T, 3, 5, 7].toTensor()
+    return prime_candidates[prime_candidates <=. upto]
+
+  # General algorithm (valid for numbers higher than 10)
+  let prime_candidates = arange(3.T, T(upto + 1), 2.T)
+  var isprime = ones[bool]((upto.int - 1) div 2)
+  let max_possible_factor_idx = int(sqrt(upto.float)) div 2
+  for factor in prime_candidates[_ ..< max_possible_factor_idx]:
+    if isprime[(factor.int - 2) div 2]:
+      isprime[(factor.int * 3 - 2) div 2 .. _ | factor.int] = false
+
+  # Note that 2 is missing from the result, so it must be manually added to
+  # the front of the result tensor
+  return [2.T].toTensor().append(prime_candidates[isprime])
+
+# The maximum float64 that can be represented as an integer that is followed by a
+# another integer that is representable as a float64 as well
+const maximumConsecutiveFloat64Int = pow(2.0, 53) - 1.0
+
+proc factor*[T: SomeInteger | SomeFloat](n: T): Tensor[T] =
+  ## Return a Tensor containing the prime factors of the input
+  ##
+  ## Input:
+  ##   - n: A value that will be factorized.
+  ##        If its type is floating-point it must be a whole number. Otherwise
+  ##        a ValueError will be raised.
+  ## Result:
+  ##   - A sorted Tensor containing the prime factors of the input.
+  ##
+  ## Example:
+  ## ```nim
+  ## echo factor(60)
+  ## # Tensor[system.int] of shape "[4]" on backend "Cpu"
+  ## #     2     2     3     5
+  ## ```
+  if n < 0:
+    raise newException(ValueError,
+      "Negative values (" & $n & ") cannot be factorized")
+  when T is int64:
+    if n > T(maximumConsecutiveFloat64Int):
+      raise newException(ValueError,
+        "Value (" & $n & ") is too large to be factorized")
+  elif T is SomeFloat:
+    if floor(n) != n:
+      raise newException(ValueError,
+        "Non whole numbers (" & $n & ") cannot be factorized")
+
+  if n < 4:
+    return [n].toTensor
+
+  # The algorithm works by keeping track of the list of unique potential,
+  # candidate prime factors of the input, and iteratively adding those
+  # that are confirmed to be factors into a list of confirmed factors
+  # (which is stored in the `result` tensor variable).
+
+  # First we must initialize the `candidate_factor` Tensor
+  # The factors of the input can be found among the list of primes
+  # that are smaller than or equal to input. However we can significantly
+  # reduce the candidate list by taking into account the fact that only a
+  # single factor can be greater than the square root of the input.
+  # The algorithm is such that if that is the case we will add the input number
+  # at the very end of the loop below
+  var candidate_factors = primes(T(ceil(sqrt(float(n)))))
+
+  # This list of prime candidate_factors is refined by finding those of them
+  # that divide the input value (i.e. those whose `input mod prime` == 0).
+  # Those candiates that don't divide the input are known to not be valid
+  # factors and can be removed from the candidate_factors list. Those that do
+  # divide the input are confirmed as valid factors and as such are added to
+  # the result list. Then the input is divided by all of the remaining
+  # candidates (by dividing the input by the product of all the remaining
+  # candidates). The result is a number that is the product of all the factors
+  # that are still unknown (which must be among the remaining candidates!) and
+  # which we can call `unknown_factor_product`.
+  # Then we can simply repeat the same process over and over, replacing the
+  # original input with the remaining `unknown_factor_product` after each
+  # iteration, until the `unknown_factors_product` (which is reduced by each
+  # division at the end of each iteration) reaches 1. Alternatively, we might
+  # run out of candidates, which will only happen when there is only one factor
+  # left (which must be greater than the square root of the input) and is stored
+  # in the `unknown_factors_product`. In that case we add it to the confirmed
+  # factors (result) list and the process can stop.
+  var unknown_factor_product = n
+  while unknown_factor_product > 1:
+    # Find the primes that are divisors of the remaining unknown_factors_product
+    # Note that this tells us which of the remaining candidate_factors are
+    # factors of the input _at least once_ (but they could divide it more
+    # than once)
+    let is_factor = (unknown_factor_product mod candidate_factors) ==. 0
+    # Keep only the factors that divide the remainder and remove the rest
+    # from the list of candidates
+    candidate_factors = candidate_factors[is_factor]
+    # after this step, all the items incandidate_factors are _known_ to be
+    # factors of `unknown_factor_product` _at least once_!
+    if candidate_factors.len == 0:
+      # If there are no more prime candidates left, it means that the remainder
+      # is a prime (and that it must be greater than the sqrt of the input),
+      # and that we are done (after adding it to the result list)
+      result = result.append([unknown_factor_product].toTensor)
+      break
+    # If we didn't stop it means that there are still candidates which we
+    # _know_ are factors of the remainder, so we must add them to the result
+    result = result.append(candidate_factors)
+    # Now we can prepare for the next iteration by dividing the remainder,
+    # by the factors we just added to the result. This reminder is the product
+    # of the factors we don't know yet
+    unknown_factor_product = T(unknown_factor_product / product(candidate_factors))
+    # After this division the items in `candidate_factors` become candidates again
+    # and we can start a new iteration
+  result = sorted(result)
+
+proc isprimeImpl[T: SomeInteger | SomeFloat](n: T, candidate_factors: Tensor[int]): bool {.inline.} =
+  ## Actual implementation of the isprime check
+  # This function is optimized for speed in 2 ways:
+  # 1. By first rejecting all non-whole float numbers and then performing the
+  #    actual isprime check using integers (which is faster than using floats)
+  # 2. By receving a pre-calculated tensor of candidate_factors. This does not
+  #    speed up the check of a single value, but it does speed up the check of
+  #    a tensor of values. Note that because of #1 the candidate_factors must
+  #    be a tensor of ints (even if the number being checked is a float)
+  when T is SomeFloat:
+    if floor(n) != n:
+      return false
+    let n = int(n)
+  result = (n > 1) and all(n mod candidate_factors[candidate_factors <. n])
+
+proc isprime*[T: SomeInteger | SomeFloat](n: T): bool =
+  ## Check whether the input is a prime number
+  ##
+  ## Only positive values higher than 1 can be primes (i.e. we exclude 1 and -1
+  ## which are sometimes considered primes).
+  ##
+  ## Note that this function also works with floats, which are considered
+  ## non-prime when they are not whole numbers.
+  # Note that we do here some checks that are repeated later inside of
+  # `isprimeImpl`. This is done to avoid the unnecessary calculation of
+  # the `candidate_factors` tensor in those cases
+  if n <= 1:
+    return false
+  when T is int64:
+    if n > T(maximumConsecutiveFloat64Int):
+      raise newException(ValueError,
+        "Value (" & $n & ") is too large to be factorized")
+  elif T is SomeFloat:
+    if floor(n) != n:
+      return false
+  var candidate_factors = primes(int(ceil(sqrt(float(n)))))
+  isprimeImpl(n, candidate_factors)
+
+proc isprime*[T: SomeInteger | SomeFloat](t: Tensor[T]): Tensor[bool] =
+  ## Element-wise check if the input values are prime numbers
+  result = zeros[bool](t.len)
+  # Pre-calculate the list of primes that will be used for the element-wise
+  # isprime check and then call isprimeImpl on each element
+  # Note that the candidate_factors must be a tensor of ints (for performance
+  # reasons)
+  var candidate_factors = primes(int(ceil(sqrt(float(max(t.flatten()))))))
+  for idx, val in t.enumerate():
+    result[idx] = isprimeImpl(val, candidate_factors)
+  return result.reshape(t.shape)
diff --git a/tests/test_primes.nim b/tests/test_primes.nim
new file mode 100644
index 0000000..6c7f887
--- /dev/null
+++ b/tests/test_primes.nim
@@ -0,0 +1,133 @@
+import ../scinim/primes
+import arraymancer
+import std / [unittest, random]
+
+proc test_primes() =
+  ## Test the `primes` function
+  test "Prime number generation (integer values)":
+    check: primes(0).len == 0
+    check: primes(1).len == 0
+    check: primes(2) == [2].toTensor
+    check: primes(3) == [2, 3].toTensor
+    check: primes(4) == [2, 3].toTensor
+    check: primes(11) == [2, 3, 5, 7, 11].toTensor
+    check: primes(12) == [2, 3, 5, 7, 11].toTensor
+    check: primes(19) == [2, 3, 5, 7, 11, 13, 17, 19].toTensor
+    check: primes(20) == [2, 3, 5, 7, 11, 13, 17, 19].toTensor
+    check: primes(22) == [2, 3, 5, 7, 11, 13, 17, 19].toTensor
+    check: primes(100000).len == 9592
+    check: primes(100003).len == 9593
+    check: primes(100000)[^1].item == 99991
+
+  test "Prime number generation (floating-point values)":
+    check: primes(100000.0).len == 9592
+    check: primes(100000.0)[^1].item == 99991.0
+
+    # An exception must be raised if the `upto` value is not a whole number
+    try:
+      discard primes(100.5)
+      check: false
+    except ValueError:
+      # This is what should happen!
+      discard
+
+proc generate_random_factor_tensor[T](
+    max_value: T, max_factors: int, prime_list: Tensor[T]): Tensor[T] =
+  ## Generate a tensor of random prime factors taken from a tensor of primes
+  ## The tensor length will not exceed the `max_factors` and the product of
+  ## the factors will not exceed `max_value` either.
+  ## This is not just a random list of values taken from the `prime_list`
+  ## Instead we artificially introduce a random level of repetition of the
+  ## chosen factors to emulate the fact that many numbers have repeated
+  ## prime factors
+  let max_value = rand(4 .. 2 ^ 53)
+  let max_factors = rand(1 .. 20)
+  result = newTensor[int](max_factors)
+  var value = 1
+  var factor = prime_list[rand(prime_list.len - 1)]
+  for idx in 0 ..< max_factors:
+    # Randomly repeat the previous factor
+    # Higher number of repetitions are less likely
+    let repeat_factor = rand(5) < 1
+    if not repeat_factor:
+      factor = prime_list[rand(prime_list.len - 1)]
+    let new_value = factor * value
+    if new_value >= max_value:
+      break
+    result[idx] = factor
+    value = new_value
+  result = sorted(result)
+  result = result[result >. 0]
+
+proc test_factor() =
+  test "Prime factorization of integer values (factor)":
+    check: factor(60) == [2, 2, 3, 5].toTensor
+    check: factor(100001) == [11, 9091].toTensor
+
+    # Check that the product of the factorization of a few random values
+    # equals the original numbers
+    for n in 0 ..< 10:
+      let value = rand(10000)
+      check: value == product(factor(value))
+
+    # Repeat the previous test in a more sophisticated manner
+    # Instead of generating random values and checking that the
+    # product of their factorization is the same as the original values
+    # (which could work for many incorrect implementations of factor),
+    # generate a few random factor tensors, multiply them to get
+    # the number that has them as prime factors, factorize those numbers
+    # and check that their factorizations matches the original tensors
+    let prime_list = primes(100)
+    for n in 0 ..< 10:
+      let max_value = rand(4 .. 2 ^ 53)
+      let max_factors = rand(1 .. 20)
+      var factors = generate_random_factor_tensor(
+        max_value, max_factors, prime_list)
+      let value = product(factors)
+      check: factor(value) == factors
+
+  test "Prime factorization of floating-point values (factor)":
+    # Floating-point
+    check: factor(60.0) == [2.0, 2, 3, 5].toTensor
+    check: factor(100001.0) == [11.0, 9091].toTensor
+
+    # Check that the product of the factorization of a few random values
+    # equals the original numbers
+    # Note that here we do not also do the reverse check (as we do for ints)
+    # in order to make the test faster
+    for n in 0 ..< 10:
+      let value = floor(rand(10000.0))
+      check: value == product(factor(value))
+
+    # An exception must be raised if we try to factorize a non-whole number
+    try:
+      discard factor(100.5)
+      check: false
+    except ValueError:
+      # This is what should happen!
+      discard
+
+proc test_isprime() =
+  test "isprime":
+    check: isprime(7) == true
+    check: isprime(7.0) == true
+    check: isprime(7.5) == false
+    check: isprime(1) == false
+    check: isprime(0) == false
+    check: isprime(-1) == false
+    let t = [
+      [-1, 0, 2,  4],
+      [ 5, 6, 7, 11]
+    ].toTensor
+    let expected = [
+      [false, false, true, false],
+      [ true, false, true,  true]
+    ].toTensor
+    check: isprime(t) == expected
+    check: isprime(t.asType(float)) == expected
+
+# Run the tests
+suite "Primes":
+  test_primes()
+  test_factor()
+  test_isprime()

From 1496ac0d27f1ba7e0e6085e7b1fed8e192722856 Mon Sep 17 00:00:00 2001
From: Vindaar <basti90@gmail.com>
Date: Sun, 16 Jun 2024 18:41:31 +0200
Subject: [PATCH 13/16] add `fuseLoops` macro to fuse loops in Nim

Fusing the loops in this context means the equivalent of OpenMP's
`collapse` statement, i.e. merging multiple nested loops into a single
loop.

One can use the `nofuse` argument to indicate a nested loop should
_not_ be fused.
---
 scinim/fuse_loops.nim | 189 ++++++++++++++++++++++++++++++++++++++++++
 tests/tFuseLoops.nim  |  54 ++++++++++++
 2 files changed, 243 insertions(+)
 create mode 100644 scinim/fuse_loops.nim
 create mode 100644 tests/tFuseLoops.nim

diff --git a/scinim/fuse_loops.nim b/scinim/fuse_loops.nim
new file mode 100644
index 0000000..8e44b4c
--- /dev/null
+++ b/scinim/fuse_loops.nim
@@ -0,0 +1,189 @@
+import std / [macros, options, algorithm]
+
+type
+  ForLoop = object
+    n: NimNode # the actual node
+    body: NimNode # body of the loop *WITHOUT* any inner loops!
+    idx: NimNode # the loop index
+    start: NimNode # start of the loop
+    stop: NimNode # stop of the loop
+
+template nofuse*(arg: untyped): untyped =
+  ## Just a dummy template, which can be easily used to disable fusing of
+  ## a nested loop
+  arg
+
+proc extractBody(n: NimNode): NimNode =
+  ## Returns the input tree without any possible nested for loops. Nested
+  ## loops are replaced by `nnkEmpty` nodes to be filled again later in `bodies`.
+  case n.kind
+  of nnkForStmt:
+    if n[1].kind == nnkInfix and n[1][0].strVal == "..<":
+      result = newEmptyNode() ## Flattened nested loop body will be inserted here
+    else:
+      result = n
+  else:
+    if n.len > 0:
+      result = newTree(n.kind)
+      for ch in n:
+        let bd = extractBody(ch)
+        if bd != nil:
+          result.add bd
+    else:
+      result = n
+
+proc toForLoop(n: NimNode): Option[ForLoop] =
+  ## Returns a `some(ForLoop)` if the given node is a fuse-able for loop
+  doAssert n.kind == nnkForStmt
+  if n[1].kind != nnkInfix: return
+  if n[1][0].strVal != "..<":
+    error("Unexpected iterator: " & $n[1].repr &
+      ". It must be of the form `0 ..< X`.")
+  if not (n[1][1].kind == nnkIntLit and n[1][1].intVal == 0):
+    error("Starting iteration index must be 0!")
+  result = some(ForLoop(n: n,
+                        body: extractBody(n[2]),
+                        idx: n[0],
+                        start: n[1][1],
+                        stop: n[1][2]))
+
+template addIf(s, opt): untyped =
+  if opt.isSome:
+    s.add opt.unsafeGet
+
+proc extractLoops(n: NimNode): seq[ForLoop] =
+  ## Extracts (fuse-able) loops from the given Nim node and errors if more than
+  ## one for loop found at the same level.
+  case n.kind
+  of nnkForStmt:
+    result.addIf toForLoop(n)
+    result.add extractLoops(n[2]) # go over body
+  else:
+    var foundLoops = 0 # counter for number of loops at current body
+    for ch in n:
+      let loops = extractLoops(ch)
+      if loops.len > 0:
+        result.add loops
+        inc foundLoops
+    if foundLoops > 1:
+      error("Found more than one loop (" & $foundLoops & ") at the level of node: " &
+        n.repr & ". Please wrap " & "these loops as `nofuse`, i.e. `nofuse(0 ..< X)`")
+
+proc genFusedLoop(idx: NimNode, stop: NimNode, ompStr = ""): NimNode =
+  ## Generate either regular or OpenMP for loop
+  let loopIter = if ompStr.len == 0:
+                   nnkInfix.newTree(ident"..<",
+                                    newLit 0,
+                                    stop)
+                 else:
+                   nnkCall.newTree(ident"||",
+                                   newLit 0,
+                                   stop,
+                                   newLit ompStr)
+  result = nnkForStmt.newTree(
+    idx,
+    loopIter
+  )
+
+proc calcStop(loops: seq[ForLoop]): NimNode =
+  ## Returns `N * T * U * ...` expression where the indices are
+  ## the stop indices of the loops to be fused.
+  case loops.len
+  of 0: doAssert false, "Must not happen"
+  of 1: result = loops[0].stop
+  else:
+    var ml = loops.reversed # want last elements first
+    let x = ml.pop
+    result = nnkInfix.newTree(ident"*", x.stop,
+                              calcStop(ml.reversed))
+
+proc modOrDiv(prefix, suffix: NimNode, isDiv: bool): NimNode =
+  if isDiv:
+    result = quote do:
+      `prefix` div `suffix`
+  else:
+    result = quote do:
+      `prefix` mod `suffix`
+
+proc asLet(v, val: NimNode): NimNode =
+  result = quote do:
+    let `v` = `val`
+
+proc genPrelude(idx: NimNode, loops: seq[ForLoop]): NimNode =
+  ## The basic algorithm for generating the correct index for fused loops is
+  ##
+  ## Notation:
+  ## `i` = Loop index of single remaining outer loop
+  ## `N_i` = Stopping index (-1) of the inner loop `i`
+  ## `n` = Total number of nested loops
+  ##
+  ## Whichever is easiest to read for you:
+  ##
+  ## `let i0 = i div (N_0 * N_1 ... N_n)`
+  ## `let i1 = (i mod (N_0 * N_1 ... N_n)) div (N_1 * N_2 * ... N_n)`
+  ## `let i2 = ((i mod (N_0 * N_1 ... N_n)) mod (N_1 * N_2 * ... N_n)) div (N_2 * ... * N_n)`
+  ## ...
+  ##
+  ## ... or
+  ##
+  ## `let i0 = i div Π_i=0^n N_i`
+  ## `let i1 = (i mod Π_i=0^n N_i) div Π_i=1^n N_i`
+  ## `let i2 = ((i mod Π_i=0^n N_i) mod Π_i=1^n N_i) div Π_i=2^n N_i`
+  ##
+  ## ...or
+  ##
+  ## `let i0 = Idx div [Product of remaining N-1 loops]`
+  ## `let i1 = (Idx mod [Product of remaining loops]) div [Product of remaining N-2 loops]`
+  ## `let i2 = (Idx mod [Product of remaining loops]) mod [Product of remaining N-2 loops]`
+  result = newStmtList()
+  var prefix = idx
+  var ml = loops.reversed
+  var lIdx = ml.pop # drop first element
+  var suffix = ml.calcStop()
+  while ml.len > 0:
+    result.add asLet(lIdx.idx, modOrDiv(prefix, suffix, isDiv = true))
+    lIdx = ml.pop # get next loop index & adjust remaining loops
+    # now adjust prefix and suffix
+    prefix = modOrDiv(prefix, suffix, isDiv = false)
+    if ml.len > 0: # adjust suffix
+      suffix = ml.calcStop()
+    else: # simply add last 'prefix'
+      result.add asLet(lIdx.idx, prefix)
+
+proc bodies(loops: seq[ForLoop]): NimNode =
+  ## Concatenates all loop bodies, by placing the next loop into the
+  ## `nnkEmpty` node of the current node
+  var ml = loops.reversed
+  #echo ml.repr
+  var cur = ml.pop
+  result = cur.body
+  for i in 0 ..< result.len:
+    let ch = result[i]
+    if ch.kind == nnkEmpty:
+      # insert next loop
+      result[i] = bodies(ml.reversed) # revert order again
+      break # there can only be a single `nnkEmpty` (multiple loops not allowed,
+            # yields CT error)
+
+proc fuseLoopImpl(ompStr: string, body: NimNode): NimNode =
+  # 1. extract all loops from the body
+  let loops = extractLoops(body)
+  # 2. generate identifier for the final loop
+  let idx = genSym(nskForVar, "idx")
+  # 3. generate the fused outer loop
+  result = genFusedLoop(idx, calcStop(loops), ompStr)
+  # 4. generate final loop body by...
+  var loopBody = newStmtList()
+  # 4a. generate prelude of loop variables of original loops
+  loopBody.add genPrelude(idx, loops) # gen code to produce the old loop variables
+  # 4b. insert old loop bodies into respective positions
+  loopBody.add bodies(loops)
+  result.add loopBody
+  when defined(DebugFuseLoop):
+    echo result.repr
+
+macro fuseLoops*(body: untyped): untyped =
+  result = fuseLoopImpl("", body)
+
+macro fuseLoops*(ompStr: untyped{lit}, body: untyped): untyped =
+  result = fuseLoopImpl(ompStr.strVal, body)
diff --git a/tests/tFuseLoops.nim b/tests/tFuseLoops.nim
new file mode 100644
index 0000000..474da11
--- /dev/null
+++ b/tests/tFuseLoops.nim
@@ -0,0 +1,54 @@
+import ../scinim/fuse_loops
+import std / unittest
+
+suite "fuseLoops":
+  test "Compiles test for different `fuseLoops` setups":
+    const N = 5
+    const T = 10
+    const X = 3
+
+    ## XXX: These should probably become proper tests. :)
+
+    fuseLoops:
+      for i in 0 ..< N:
+        let x = i * 2
+        for j in 0 ..< T:
+          let z = x * j
+          echo i, j, x, z
+        echo x
+
+    fuseLoops:
+      for i in 0 ..< N:
+        let x = i * 2
+        for j in 0 ..< T:
+          let z = x * j
+          echo i, j, x, z
+          for k in nofuse(0 ..< T):
+            echo k
+        echo x
+
+    fuseLoops("parallel for"):
+      for i in 0 ..< N:
+        let x = i * 2
+        for j in 0 ..< T:
+          let z = x * j
+          for k in 0 ..< X:
+            echo i, j, k, x, z
+        echo x
+
+    ## The following raises a CT error
+    when compiles((
+      fuseLoops:
+        for i in 0 ..< N:
+          let x = i * 2
+          var zsum = 0
+          for j in 0 ..< T:
+            let z = x * j
+            zsum += z
+            echo i, x, z
+          echo x
+          for j in 0 ..< 2 * T:
+            zsum += j
+          echo zsum
+    )):
+      doAssert false

From 19370f2a4357160f42681e4a77ebbb4f412ec877 Mon Sep 17 00:00:00 2001
From: Vindaar <basti90@gmail.com>
Date: Sun, 16 Jun 2024 18:51:44 +0200
Subject: [PATCH 14/16] export `fuseLoops` by default in `scinim`

---
 scinim.nim | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/scinim.nim b/scinim.nim
index 4111ad4..4dbe9cd 100644
--- a/scinim.nim
+++ b/scinim.nim
@@ -3,3 +3,6 @@ export signals
 
 import ./scinim/numpyarrays
 export numpyarrays
+
+import ./scinim/fuse_loops
+export fuse_loops

From c7ccd4bb9b93319bd4358bc8a1df0da171ab982b Mon Sep 17 00:00:00 2001
From: Vindaar <basti90@gmail.com>
Date: Sun, 16 Jun 2024 19:02:25 +0200
Subject: [PATCH 15/16] add minor docstring and add note about OpenMP
 compilation

---
 scinim/fuse_loops.nim | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)

diff --git a/scinim/fuse_loops.nim b/scinim/fuse_loops.nim
index 8e44b4c..3355880 100644
--- a/scinim/fuse_loops.nim
+++ b/scinim/fuse_loops.nim
@@ -183,7 +183,24 @@ proc fuseLoopImpl(ompStr: string, body: NimNode): NimNode =
     echo result.repr
 
 macro fuseLoops*(body: untyped): untyped =
+  ## Fuses all loops inside the body of the macro, unless they are annotated with
+  ## `nofuse`.
   result = fuseLoopImpl("", body)
 
 macro fuseLoops*(ompStr: untyped{lit}, body: untyped): untyped =
+  ## Fuses all loops inside the body of the macro, unless they are annotated with
+  ## `nofuse`.
+  ##
+  ## This version supports handing a string to be passed to OpenMP, i.e.
+  ## `fuseLoops("parallelFor"): body`
+  ##
+  ## Note: To utilize OpenMP, you may have to compile with
+  ## `--passC:"-fopenmp" --passL:"-lgomp"`
+  ## (at least for GCC. For Clang the commands differ slightly I believe).
+  ##
+  ## There is also a chance you either have to compile with
+  ## `--exceptions:quirky`
+  ## or using the C++ backend, due to the C backend producing `goto` statements
+  ## inside the loops, which lead to C compiler errors when combined with
+  ## OpenMP.
   result = fuseLoopImpl(ompStr.strVal, body)

From 95e301cab5679f5ba78f06c538d25f46bf85e127 Mon Sep 17 00:00:00 2001
From: Vindaar <basti90@gmail.com>
Date: Sun, 16 Jun 2024 19:08:01 +0200
Subject: [PATCH 16/16] update arraymancer dependency to 0.7.32 for SomeSets
 fix

---
 scinim.nimble | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/scinim.nimble b/scinim.nimble
index de0fc18..f443502 100644
--- a/scinim.nimble
+++ b/scinim.nimble
@@ -10,7 +10,7 @@ backend       = "cpp"
 # Dependencies
 requires "nim >= 1.6.0"
 requires "threading"
-requires "arraymancer >= 0.7.31"
+requires "arraymancer >= 0.7.32"
 requires "polynumeric >= 0.2.0"
 requires "nimpy >= 0.2.0"