diff --git a/stint/private/as_signed_words.nim b/stint/private/as_signed_words.nim
deleted file mode 100644
index 3a225dc..0000000
--- a/stint/private/as_signed_words.nim
+++ /dev/null
@@ -1,137 +0,0 @@
-# Stint
-# Copyright 2018 Status Research & Development GmbH
-# Licensed under either of
-#
-#  * Apache License, version 2.0, ([LICENSE-APACHE](LICENSE-APACHE) or http://www.apache.org/licenses/LICENSE-2.0)
-#  * MIT license ([LICENSE-MIT](LICENSE-MIT) or http://opensource.org/licenses/MIT)
-#
-# at your option. This file may not be copied, modified, or distributed except according to those terms.
-
-import  ./datatypes, macros, as_words
-
-proc optimInt*(x: NimNode): NimNode =
-  let size = getSize(x)
-
-  if size > 64:
-    result = quote do:
-      # We represent as unsigned int. Signedness will be managed at a higher level.
-      array[`size` div 64, uint64]
-  elif size == 64:
-    result = quote do:
-      int64
-  elif size == 32:
-    result = quote do:
-      int32
-  elif size == 16:
-    result = quote do:
-      int16
-  elif size == 8:
-    result = quote do:
-      int8
-  else:
-    error "Unreachable path reached"
-
-proc isInt*(x: NimNode): static[bool] =
-  if   eqIdent(x, "uint64"): true
-  elif eqIdent(x, "int64"):  true
-  elif eqIdent(x, "int32"):  true
-  elif eqIdent(x, "int16"):  true
-  elif eqIdent(x, "int8"):   true
-  else: false
-
-macro most_significant_word*(x: IntImpl): untyped =
-
-  let optim_type = optimInt(x)
-  if optim_type.isInt:
-    result = quote do:
-      cast[`optim_type`](`x`)
-  else:
-    when system.cpuEndian == littleEndian:
-      let size = getSize(x)
-      let msw_pos = size div 64 - 1
-    else:
-      let msw_pos = 0
-    result = quote do:
-      # most significant word must be returned signed for addition/substraction
-      # overflow checking
-      cast[int](cast[`optim_type`](`x`)[`msw_pos`])
-
-macro asSignedWordsZip*[T](
-  x, y: IntImpl[T],
-  loopBody: untyped): untyped =
-  ## Iterates over x and y, as an array of words.
-  ## Input:
-  ##   - x, y: The multiprecision ints
-  ##   - loopBody: the operation you want to do.
-  ##               For the most significant word,
-  ##               the operation will be sign aware.
-  ##               for the next words it will ignore sign.
-  ## Iteration is always done from most significant to least significant
-  let
-    optim_type = optimInt(x)
-    idx = ident("idx_asSignedWordsRawZip")
-  var
-    first_x, first_y: NimNode
-    next_x, next_y: NimNode
-    to_replace = nnkBracket.newTree
-    replacing  = nnkBracket.newTree
-
-  to_replace.add x
-  to_replace.add y
-
-  # We directly cast the first x and y if the result fits in a word
-  # Otherwise we special case the most significant word
-  if optim_type.isInt:
-    first_x = quote do:
-      cast[`optim_type`](`x`)
-    first_y = quote do:
-      cast[`optim_type`](`y`)
-  else:
-    first_x = getAST(most_significant_word(x))
-    first_y = getAST(most_significant_word(y))
-
-  replacing.add first_x
-  replacing.add first_y
-
-  let firstReplacedAST = replaceNodes(loopBody, replacing, to_replace)
-
-  # Reset the replacement array
-  replacing = nnkBracket.newTree
-
-  # Setup the loop variables
-  next_x = ident("x_asSignedWordsRawZip")
-  next_y = ident("y_asSignedWordsRawZip")
-
-  # We replace the inner loop with the next_x[idx]
-  replacing.add quote do:
-    `next_x`[`idx`]
-  replacing.add quote do:
-    `next_y`[`idx`]
-
-  let nextReplacedAST = replaceNodes(loopBody, replacing, to_replace)
-
-  # Result:
-  result = newStmtList()
-  result.add firstReplacedAST
-
-  if not optim_type.isInt:
-    # if we have multiple iterations to do
-    if system.cpuEndian == bigEndian:
-      result = quote do:
-        {.pragma: restrict, codegenDecl: "$# __restrict $#".}
-        let
-          `next_x`{.restrict.} = cast[ptr `optim_type`](`x`.unsafeaddr)
-          `next_y`{.restrict.} = cast[ptr `optim_type`](`y`.unsafeaddr)
-        for `idx` in 1 ..< `next_x`[].len:
-          # We start from the second word
-          `nextReplacedAST`
-    else:
-      # Little-Endian, iteration in reverse
-      result = quote do:
-        {.pragma: restrict, codegenDecl: "$# __restrict $#".}
-        let
-          `next_x`{.restrict.} = cast[ptr `optim_type`](`x`.unsafeaddr)
-          `next_y`{.restrict.} = cast[ptr `optim_type`](`y`.unsafeaddr)
-        for `idx` in countdown(`next_x`[].len - 2, 0):
-          # We stop stop at the second to last word
-          `nextReplacedAST`
diff --git a/stint/private/as_words.nim b/stint/private/as_words.nim
index 2181fda..f2148f7 100644
--- a/stint/private/as_words.nim
+++ b/stint/private/as_words.nim
@@ -7,316 +7,120 @@
 #
 # at your option. This file may not be copied, modified, or distributed except according to those terms.
 
-import  ./datatypes, macros
+import  ./datatypes
 
-proc optimUint(x: NimNode): NimNode =
-  let size = getSize(x)
-
-  if size > 64:
-    result = quote do:
-      array[`size` div 64, uint64]
-  elif size == 64:
-    result = quote do:
-      uint64
-  elif size == 32:
-    result = quote do:
-      uint32
-  elif size == 16:
-    result = quote do:
-      uint16
-  elif size == 8:
-    result = quote do:
-      uint8
+template least_significant_word*(x: AnyImpl): untyped =
+  when cpuEndian == littleEndian:
+    x.raw_data[0]
   else:
-    error "Unreachable path reached"
-
-proc isUint(x: NimNode): static[bool] =
-  if eqIdent(x, "uint64"):   true
-  elif eqIdent(x, "uint32"): true
-  elif eqIdent(x, "uint16"): true
-  elif eqIdent(x, "uint8"):  true
-  else: false
-
-proc replaceNodes*(ast: NimNode, replacing: NimNode, to_replace: NimNode): NimNode =
-  # Args:
-  #   - The full syntax tree
-  #   - an array of replacement value
-  #   - an array of identifiers to replace
-  proc inspect(node: NimNode): NimNode =
-    case node.kind:
-    of {nnkIdent, nnkSym}:
-      for i, c in to_replace:
-        if node.eqIdent($c):
-          return replacing[i]
-      return node
-    of nnkEmpty:
-      return node
-    of nnkLiterals:
-      return node
-    else:
-      var rTree = node.kind.newTree()
-      for child in node:
-        rTree.add inspect(child)
-      return rTree
-  result = inspect(ast)
-
-proc least_significant_two_words*(x: NimNode): tuple[lo, hi: NimNode] =
-  var node = x.getTypeInst
-  var result_lo = x
-
-  while node.kind == nnkBracketExpr:
-    assert eqIdent(node[0], "UintImpl") or eqIdent(node[0], "IntImpl"), (
-      "least_significant_word only supports primitive integers, Stint and Stuint")
-    result_lo = quote do: `result_lo`.lo
-    node = node[1]
-
-  var result_hi = result_lo.copyNimTree # ⚠ Aliasing: NimNodes are ref objects
-  result_hi[1] = newIdentNode("hi")     # replace the last lo by hi
-  result = (result_lo, result_hi)
+    x.raw_data[x.high]
 
-macro second_least_significant_word*(x: UintImpl or IntImpl): untyped =
-  result = least_significant_two_words(x).hi
+template second_least_significant_word*(x: AnyImpl): untyped =
+  static: assert x.len >= 2
+  when cpuEndian == littleEndian:
+    x.raw_data[1]
+  else:
+    x.raw_data[x.high - 1]
 
-macro least_significant_word*(x: UintImpl or IntImpl): untyped =
-  result = least_significant_two_words(x).lo
+template most_significant_word*(x: AnyImpl): untyped =
+  when cpuEndian == littleEndian:
+    x.raw_data[x.high]
+  else:
+    x.raw_data[0]
 
-macro asWords*(n: UintImpl or IntImpl, ignoreEndianness: static[bool], loopBody: untyped): untyped =
-  ## Iterates over n, as an array of words.
+iterator asWords*[N, T](impl: AnyImpl[N, T], ignoreEndianness: static[bool]): T =
+  ## Iterates over n, returning words.
   ## Input:
-  ##   - n: The Multiprecision int
-  ##   - If endianness should be taken into account for iteratio order.
+  ##   - impl: The Multiprecision int
+  ##   - word: an identifier for the currently iterated over word
+  ##   - If endianness should be taken into account for iteration order.
   ##     If yes, iteration is done from most significant word to least significant.
   ##     Otherwise it is done in memory layout order.
-  ##   - loopBody: the operation you want to do on each word of n
-  let
-    optim_type = optimUint(n)
-  var
-    inner_n: NimNode
-    to_replace = nnkBracket.newTree
-    replacing  = nnkBracket.newTree
-
-  if optim_type.isUint:
-    # We directly cast n
-    inner_n = quote do:
-      cast[`optim_type`](`n`)
+  when ignoreEndianness or cpuEndian == bigEndian:
+    for word in impl.raw_data:
+      yield word
   else:
-    # If we have an array of words, inner_n is a loop intermediate variable
-    inner_n = ident("n_asWordsRaw")
-
-  to_replace.add n
-  replacing.add inner_n
+    for idx in countdown(x.raw_data.high, 0):
+      yield x.raw_data[idx]
 
-  let replacedAST = replaceNodes(loopBody, replacing, to_replace)
-
-  if optim_type.isUint:
-    result = replacedAST
-  else:
-    if ignoreEndianness or system.cpuEndian == bigEndian:
-      result = quote do:
-        for `inner_n` in cast[`optim_type`](`n`):
-          `replacedAST`
-    else:
-      assert false, "Not implemented"
-
-macro asWordsZip*(x, y: UintImpl or IntImpl, ignoreEndianness: static[bool], loopBody: untyped): untyped =
-  ## Iterates over x and y, as an array of words.
+iterator asWordsZip*[N, T](x, y: AnyImpl[N, T], ignoreEndianness: static[bool]): (T, T) =
+  ## Iterates over x and y, returning words.
   ## Input:
   ##   - x, y: The multiprecision ints
-  ##   - If endianness should be taken into account for iteratio order.
+  ##   - If endianness should be taken into account for iteration order.
   ##     If yes, iteration is done from most significant word to least significant.
   ##     Otherwise it is done in memory layout order.
-  ##   - loopBody: the operation you want to do on each word of n
-  let
-    optim_type = optimUint(x)
-    idx = ident("idx_asWordsRawZip")
-  var
-    inner_x, inner_y: NimNode
-    to_replace = nnkBracket.newTree
-    replacing  = nnkBracket.newTree
-
-  to_replace.add x
-  to_replace.add y
-
-  if optim_type.isUint:
-    # We directly castx and y
-    inner_x = quote do:
-      cast[`optim_type`](`x`)
-    inner_y = quote do:
-      cast[`optim_type`](`y`)
-
-    replacing.add inner_x
-    replacing.add inner_y
+  when ignoreEndianness or cpuEndian == bigEndian:
+    for idx in 0 ..< x.raw_data.len:
+      yield (x.raw_data[idx], y.raw_data[idx])
   else:
-    # If we have an array of words, inner_x and inner_y is are loop intermediate variable
-    inner_x = ident("x_asWordsRawZip")
-    inner_y = ident("y_asWordsRawZip")
+    for idx in countdown(x.raw_data.high, 0):
+      yield (x.raw_data[idx], y.raw_data[idx])
 
-    # We replace the inner loop with the inner_x[idx]
-    replacing.add quote do:
-      `inner_x`[`idx`]
-    replacing.add quote do:
-      `inner_y`[`idx`]
-
-  let replacedAST = replaceNodes(loopBody, replacing, to_replace)
-
-  if optim_type.isUint:
-    result = replacedAST
-  else:
-    if ignoreEndianness or system.cpuEndian == bigEndian:
-      result = quote do:
-        {.pragma: restrict, codegenDecl: "$# __restrict $#".}
-        let
-          `inner_x`{.restrict.} = cast[ptr `optim_type`](`x`.unsafeaddr)
-          `inner_y`{.restrict.} = cast[ptr `optim_type`](`y`.unsafeaddr)
-        for `idx` in 0 ..< `inner_x`[].len:
-          `replacedAST`
+template asSignedWordsZip*(x, y: AnyImpl, wx, wy, body: untyped): untyped =
+  ## Iterates over x and y, returning words.
+  ## Use only for comparison operations.
+  ## Input:
+  ##   - x, y: The multiprecision ints
+  ##   - wx, wy: Nim idents of the words
+  ##   - the loop body
+  ## Iteration is always done from most significant to least significant word
+  ## For the most significant, body is applied to ``signed`` word
+  ## For the rest of the iteration, body is applied to unsigned words
+  ## Please refer to int4 two-complement representation:
+  ##   0b0000 =  0 | 0b0001 =  1 | 0b0010 =  2 | 0b0011 =  3
+  ##   0b0100 =  4 | 0b0101 =  5 | 0b0110 =  6 | 0b0111 =  7
+  ##   -----------------------------------------------------
+  ##   0b1100 = -4 | 0b1011 = -5 | 0b1010 = -6 | 0b1001 = -7
+  ##   0b1000 = -8 | 0b1111 = -1 | 0b1110 = -2 | 0b1101 = -3
+  block:
+    when cpuEndian == bigEndian:
+      let wx{.inject.} = x.raw_data[0]
+      let wy{.inject.} = y.raw_data[0]
+    else:
+      let wx{.inject.} = x.raw_data[x.raw_data.high]
+      let wy{.inject.} = y.raw_data[y.raw_data.high]
+    body
+  block:
+    when ignoreEndianness or cpuEndian == bigEndian:
+      for idx in 1 ..< x.raw_data.len:
+        let wx{.inject.} = x.raw_data[idx]
+        let wy{.inject.} = y.raw_data[idx]
+        body
     else:
-      # Little-Endian, iteration in reverse
-      result = quote do:
-        {.pragma: restrict, codegenDecl: "$# __restrict $#".}
-        let
-          `inner_x`{.restrict.} = cast[ptr `optim_type`](`x`.unsafeaddr)
-          `inner_y`{.restrict.} = cast[ptr `optim_type`](`y`.unsafeaddr)
-        for `idx` in countdown(`inner_x`[].len - 1, 0):
-          `replacedAST`
+      for idx in countdown(x.raw_data.high - 1, 0):
+        let wx{.inject.} = x.raw_data[idx]
+        let wy{.inject.} = y.raw_data[idx]
+        body
 
-macro m_asWordsZip*[T: UintImpl or IntImpl](m: var T, x: T,
-  ignoreEndianness: static[bool], loopBody: untyped): untyped =
-  ## Iterates over a mutable int m and x as an array of words.
-  ## returning a !! Pointer !! of the proper type to m.
+iterator m_asWordsZip*[N, T](m: var AnyImpl[N, T], x: AnyImpl[N, T], ignoreEndianness: static[bool]): (var T, T) =
+  ## Iterates over a mutable int m and x, returning words.
   ## Input:
-  ##   - m: A mutable array
-  ##   - x: The multiprecision ints
-  ##   - If endianness should be taken into account for iteratio order.
+  ##   - m: A mutable multiprecision int
+  ##   - x: The multiprecision int
+  ##   - If endianness should be taken into account for iteration order.
   ##     If yes, iteration is done from most significant word to least significant.
   ##     Otherwise it is done in memory layout order.
-  ##   - loopBody: the operation you want to do on each word of n
-  let
-    optim_type = optimUint(x)
-    idx = ident("idx_asWordsRawZip")
-  var
-    inner_m, inner_x: NimNode
-    to_replace = nnkBracket.newTree
-    replacing  = nnkBracket.newTree
-
-  to_replace.add m
-  to_replace.add x
-
-  if optim_type.isUint:
-    # We directly cast m and x
-    inner_m = quote do:
-      cast[var `optim_type`](`m`.addr)
-    inner_x = quote do:
-      cast[`optim_type`](`x`)
-
-    replacing.add inner_m
-    replacing.add inner_x
-  else:
-    # If we have an array of words, inner_x and inner_y is are loop intermediate variable
-    inner_m = ident("m_asWordsRawZip")
-    inner_x = ident("x_asWordsRawZip")
-
-    # We replace the inner loop with the inner_x[idx]
-    replacing.add quote do:
-      `inner_m`[`idx`]
-    replacing.add quote do:
-      `inner_x`[`idx`]
-
-  let replacedAST = replaceNodes(loopBody, replacing, to_replace)
-
-  if optim_type.isUint:
-    result = replacedAST
+  when ignoreEndianness or cpuEndian == bigEndian:
+    for idx in 0 ..< x.raw_data.len:
+      yield (m.raw_data[idx], x.raw_data[idx])
   else:
-    if ignoreEndianness or system.cpuEndian == bigEndian:
-      result = quote do:
-        {.pragma: restrict, codegenDecl: "$# __restrict $#".}
-        let
-          `inner_m`{.restrict.} = cast[ptr `optim_type`](`m`.addr)
-          `inner_x`{.restrict.} = cast[ptr `optim_type`](`x`.unsafeaddr)
-        for `idx` in 0 ..< `inner_x`[].len:
-          `replacedAST`
-    else:
-      # Little-Endian, iteration in reverse
-      result = quote do:
-        {.pragma: restrict, codegenDecl: "$# __restrict $#".}
-        let
-          `inner_m`{.restrict.} = cast[ptr `optim_type`](`m`.addr)
-          `inner_x`{.restrict.} = cast[ptr `optim_type`](`x`.unsafeaddr)
-        for `idx` in countdown(`inner_x`[].len - 1, 0):
-          `replacedAST`
+    for idx in countdown(x.raw_data.high, 0):
+      yield (m.raw_data[idx], x.raw_data[idx])
 
-
-macro m_asWordsZip*[T: UintImpl or IntImpl](m: var T, x, y: T,
-  ignoreEndianness: static[bool], loopBody: untyped): untyped =
+iterator m_asWordsZip*[N, T](m: var AnyImpl[N, T], x, y: AnyImpl[N,T], ignoreEndianness: static[bool]): (var T, T) =
   ## Iterates over a mutable int m and x as an array of words.
   ## returning a !! Pointer !! of the proper type to m.
   ## Input:
-  ##   - m: A mutable array
-  ##   - x: The multiprecision ints
+  ##   - m: A mutable multiprecision int
+  ##   - x, y: The multiprecision int
   ##   - If endianness should be taken into account for iteratio order.
   ##     If yes, iteration is done from most significant word to least significant.
   ##     Otherwise it is done in memory layout order.
   ##   - loopBody: the operation you want to do on each word of n
-  let
-    optim_type = optimUint(x)
-    idx = ident("idx_asWordsRawZip")
-  var
-    inner_m, inner_x, inner_y: NimNode
-    to_replace = nnkBracket.newTree
-    replacing  = nnkBracket.newTree
-
-  to_replace.add m
-  to_replace.add x
-  to_replace.add y
-
-  if optim_type.isUint:
-    # We directly cast m, x and y
-    inner_m = quote do:
-      cast[var `optim_type`](`m`.addr)
-    inner_x = quote do:
-      cast[`optim_type`](`x`)
-    inner_y = quote do:
-      cast[`optim_type`](`y`)
-
-    replacing.add inner_m
-    replacing.add inner_x
-    replacing.add inner_y
-  else:
-    # If we have an array of words, inner_x and inner_y is are loop intermediate variable
-    inner_m = ident("m_asWordsRawZip")
-    inner_x = ident("x_asWordsRawZip")
-    inner_y = ident("y_asWordsRawZip")
-
-    # We replace the inner loop with the inner_x[idx]
-    replacing.add quote do:
-      `inner_m`[`idx`]
-    replacing.add quote do:
-      `inner_x`[`idx`]
-    replacing.add quote do:
-      `inner_y`[`idx`]
-
-  let replacedAST = replaceNodes(loopBody, replacing, to_replace)
-
-  # Arrays are in the form (`[]`, array, type)
-  if optim_type.isUint:
-    result = replacedAST
+  when ignoreEndianness or cpuEndian == bigEndian:
+    for idx in 0 ..< x.raw_data.len:
+      yield (m.raw_data[idx], x.raw_data[idx], y.raw_data[idx])
   else:
-    if ignoreEndianness or system.cpuEndian == bigEndian:
-      result = quote do:
-        {.pragma: restrict, codegenDecl: "$# __restrict $#".}
-        let
-          `inner_m`{.restrict.} = cast[ptr `optim_type`](`m`.addr)
-          `inner_x`{.restrict.} = cast[ptr `optim_type`](`x`.unsafeaddr)
-          `inner_y`{.restrict.} = cast[ptr `optim_type`](`y`.unsafeaddr)
-        for `idx` in 0 ..< `inner_x`[].len:
-          `replacedAST`
-    else:
-      # Little-Endian, iteration in reverse
-      result = quote do:
-        {.pragma: restrict, codegenDecl: "$# __restrict $#".}
-        let
-          `inner_m`{.restrict.} = cast[ptr `optim_type`](`m`.addr)
-          `inner_x`{.restrict.} = cast[ptr `optim_type`](`x`.unsafeaddr)
-          `inner_y`{.restrict.} = cast[ptr `optim_type`](`y`.unsafeaddr)
-        for `idx` in countdown(`inner_x`[].len - 1, 0):
-          `replacedAST`
+    for idx in countdown(x.raw_data.high, 0):
+      yield (m.raw_data[idx], x.raw_data[idx], y.raw_data[idx])
diff --git a/stint/private/bithacks.nim b/stint/private/bithacks.nim
index 11694db..bacb536 100644
--- a/stint/private/bithacks.nim
+++ b/stint/private/bithacks.nim
@@ -7,8 +7,7 @@
 #
 # at your option. This file may not be copied, modified, or distributed except according to those terms.
 
-import  ./datatypes, stdlib_bitops, as_signed_words
-export stdlib_bitops
+import  ./datatypes, ./stdlib_bitops, ./as_words
 
 # We reuse bitops from Nim standard lib, and expand it for multi-precision int.
 # MpInt rely on no undefined behaviour as often we scan 0. (if 1 is stored in a uint128 for example)
diff --git a/stint/private/conversion.nim b/stint/private/conversion.nim
index af14a7d..0ae4d13 100644
--- a/stint/private/conversion.nim
+++ b/stint/private/conversion.nim
@@ -9,39 +9,42 @@
 
 import  ./datatypes
 
-func toSubtype*[T: SomeInteger](b: bool, _: typedesc[T]): T {.inline.}=
-  b.T
-
-func toSubtype*[T: UintImpl](b: bool, _: typedesc[T]): T {.inline.}=
-  type SubTy = type result.lo
-  result.lo = toSubtype(b, SubTy)
-
-func toUint*(n: UintImpl): auto {.inline.}=
-  ## Casts an unsigned integer to an uint of the same size
-
-  # TODO: uint128 support
-  when n.sizeof > 8:
-    raise newException("Unreachable. You are trying to cast a StUint with more than 64-bit of precision")
-  elif n.sizeof == 8:
-    cast[uint64](n)
-  elif n.sizeof == 4:
-    cast[uint32](n)
-  elif n.sizeof == 2:
-    cast[uint16](n)
-  else:
-    raise newException("Unreachable. StUint must be 16-bit minimum and a power of 2")
-
-func toUint*(n: SomeUnsignedInt): SomeUnsignedInt {.inline.}=
-  ## No-op overload of multi-precision int casting
-  n
-
-func asDoubleUint*(n: BaseUint): auto {.inline.} =
-  ## Convert an integer or StUint to an uint with double the size
-
-  type Double = (
-    when n.sizeof == 4: uint64
-    elif n.sizeof == 2: uint32
-    else: uint16
-  )
-
-  n.toUint.Double
+# func toSubtype*[Impl: SomeInteger](b: bool, _: typedesc[Impl]): Impl {.inline.}=
+#   b.T
+
+# func toSubtype*[Impl: UintImpl](b: bool, _: typedesc[Impl]): Impl {.inline.}=
+#   when Impl.N == 1:      # N refers to the number of words
+#     type SubTy = Impl.T  # T refers to the type of words
+#   else:
+#     type SubTy = type result.lo
+#     result.lo = toSubtype(b, SubTy)
+
+# func toUint*(n: UintImpl): auto {.inline.}=
+#   ## Casts an unsigned integer to an uint of the same size
+
+#   # TODO: uint128 support
+#   when n.sizeof > 8:
+#     raise newException("Unreachable. You are trying to cast a StUint with more than 64-bit of precision")
+#   elif n.sizeof == 8:
+#     cast[uint64](n)
+#   elif n.sizeof == 4:
+#     cast[uint32](n)
+#   elif n.sizeof == 2:
+#     cast[uint16](n)
+#   else:
+#     raise newException("Unreachable. StUint must be 16-bit minimum and a power of 2")
+
+# func toUint*(n: SomeUnsignedInt): SomeUnsignedInt {.inline.}=
+#   ## No-op overload of multi-precision int casting
+#   n
+
+# func asDoubleUint*(n: BaseUint): auto {.inline.} =
+#   ## Convert an integer or StUint to an uint with double the size
+
+#   type Double = (
+#     when n.sizeof == 4: uint64
+#     elif n.sizeof == 2: uint32
+#     else: uint16
+#   )
+
+#   n.toUint.Double
diff --git a/stint/private/datatypes.nim b/stint/private/datatypes.nim
index e6fe5bd..dd6359a 100644
--- a/stint/private/datatypes.nim
+++ b/stint/private/datatypes.nim
@@ -12,136 +12,144 @@
 import macros
 
 # The macro uintImpl must be exported
-
 when defined(mpint_test):
-  macro uintImpl*(bits: static[int]): untyped =
+  macro stintImpl*(bits: static[int], typeImpl: untyped): untyped =
     # Test version, StUint[64] = 2 uint32. Test the logic of the library
     assert (bits and (bits-1)) == 0, $bits & " is not a power of 2"
     assert bits >= 16, "The number of bits in a should be greater or equal to 16"
 
-    if bits >= 128:
-      let inner = getAST(uintImpl(bits div 2))
-      result = newTree(nnkBracketExpr, ident("UintImpl"), inner)
-    elif bits == 64:
-      result = newTree(nnkBracketExpr, ident("UintImpl"), ident("uint32"))
-    elif bits == 32:
-      result = newTree(nnkBracketExpr, ident("UintImpl"), ident("uint16"))
+    if bits >= 32:
+      let nb_words = bits div 32
+      result = quote do: `typeImpl`[`nb_words`, uint32]
     elif bits == 16:
-      result = newTree(nnkBracketExpr, ident("UintImpl"), ident("uint8"))
-    else:
-      error "Fatal: unreachable"
-
-  macro intImpl*(bits: static[int]): untyped =
-    # Test version, StInt[64] = 2 uint32. Test the logic of the library
-    # Note that ints are implemented in terms of unsigned ints
-    # Signed operatiosn will be built on top of that.
-    assert (bits and (bits-1)) == 0, $bits & " is not a power of 2"
-    assert bits >= 16, "The number of bits in a should be greater or equal to 16"
-
-    if bits >= 128:
-      let inner = getAST(uintImpl(bits div 2)) # IntImpl is built on top of UintImpl
-      result = newTree(nnkBracketExpr, ident("IntImpl"), inner)
-    elif bits == 64:
-      result = newTree(nnkBracketExpr, ident("IntImpl"), ident("uint32"))
-    elif bits == 32:
-      result = newTree(nnkBracketExpr, ident("IntImpl"), ident("uint16"))
-    elif bits == 16:
-      result = newTree(nnkBracketExpr, ident("IntImpl"), ident("uint8"))
+      result = quote do: `typeImpl`[1, uint16]
+    elif bits == 8:
+      result = quote do: `typeImpl`[1, uint8]
     else:
       error "Fatal: unreachable"
 
 else:
-  macro uintImpl*(bits: static[int]): untyped =
+  macro stintImpl*(bits: static[int], typeImpl: untyped): untyped =
     # Release version, StUint[64] = uint64.
     assert (bits and (bits-1)) == 0, $bits & " is not a power of 2"
     assert bits >= 8, "The number of bits in a should be greater or equal to 8"
 
-    if bits >= 128:
-      let inner = getAST(uintImpl(bits div 2))
-      result = newTree(nnkBracketExpr, ident("UintImpl"), inner)
-    elif bits == 64:
-      result = ident("uint64")
-    elif bits == 32:
-      result = ident("uint32")
-    elif bits == 16:
-      result = ident("uint16")
-    elif bits == 8:
-      result = ident("uint8")
-    else:
-      error "Fatal: unreachable"
-
-  macro intImpl*(bits: static[int]): untyped =
-    # Release version, StInt[64] = int64.
-    # Note that int of size 128+ are implemented in terms of unsigned ints
-    # Signed operations will be built on top of that.
-
-    if bits >= 128:
-      let inner = getAST(uintImpl(bits div 2))
-      result = newTree(nnkBracketExpr, ident("IntImpl"), inner)
-    elif bits == 64:
-      result = ident("int64")
+    if bits >= 64:
+      let nb_words = bits div 64
+      result = quote do: `typeImpl`[`nb_words`, uint64]
     elif bits == 32:
-      result = ident("int32")
+      result = quote do: `typeImpl`[1, uint32]
     elif bits == 16:
-      result = ident("int16")
+      result = quote do: `typeImpl`[1, uint16]
     elif bits == 8:
-      result = ident("int8")
+      result = quote do: `typeImpl`[1, uint8]
     else:
       error "Fatal: unreachable"
 
-proc getSize*(x: NimNode): static[int] =
+type
+  # ### Private ### #
+  UintImpl*[N: static[int], T: SomeUnsignedInt] = object
+    raw_data*: array[N, T]
 
-  # Size of doesn't always work at compile-time, pending PR https://github.com/nim-lang/Nim/pull/5664
+  IntImpl*[N: static[int], T: SomeUnsignedInt] = object
+    raw_data*: array[N, T]
+  # ### Private ### #
 
-  var multiplier = 1
-  var node = x.getTypeInst
+  StUint*[bits: static[int]] = object
+    data*: stintImpl(bits, UintImpl)
 
-  while node.kind == nnkBracketExpr:
-    assert eqIdent(node[0], "UintImpl") or eqIdent(node[0], "IntImpl"), (
-      "getSize only supports primitive integers, Stint and Stuint")
-    multiplier *= 2
-    node = node[1]
+  StInt*[bits: static[int]] = object
+    data*: stintImpl(bits, IntImpl)
 
-  # node[1] has the type
-  # size(node[1]) * multiplier is the size in byte
+  AnyImpl*[N: static[int], T: SomeUnsignedInt] = UintImpl[N, T] or IntImpl[N, T]
 
-  # For optimization we cast to the biggest possible uint
-  result =  if eqIdent(node, "uint64") or eqIdent(node, "int64"): multiplier * 64
-            elif eqIdent(node, "uint32") or eqIdent(node, "int32"): multiplier * 32
-            elif eqIdent(node, "uint16") or eqIdent(node, "int16"): multiplier * 16
-            elif eqIdent(node, "uint8") or eqIdent(node, "int8"): multiplier * 8
-            elif eqIdent(node, "int") or eqIdent(node, "uint"):
-              multiplier * 8 * sizeof(int)
-            else:
-              assert false, "Error when computing the size. Found: " & $node
-              0
+# #################################
 
-macro getSize*(x: typed): untyped =
-  let size = getSize(x)
-  result = quote do:
-    `size`
+func getSize*[N: static[int], T: SomeUnsignedInt](x: AnyImpl[N,T]): static[int] =
+  ## Get size of int or uint implementation in bits
+  N * T.sizeof * 8
 
-type
-  # ### Private ### #
-  BaseUint* = UintImpl or SomeUnsignedInt
+# ###### lo and hi accessors ######
+# macro used as workaround because template crashes - https://github.com/nim-lang/Nim/issues/8052
 
-  UintImpl*[Baseuint] = object
-    when system.cpuEndian == littleEndian:
-      lo*, hi*: BaseUint
-    else:
-      hi*, lo*: BaseUint
+macro loImpl(dst, src: untyped, N: static[int]): untyped =
+  assert N >= 2
 
-  IntImpl*[Baseuint] = object
-    # Ints are implemented in terms of uints
-    when system.cpuEndian == littleEndian:
-      lo*, hi*: BaseUint
+  result = quote do:
+    const halfSize = `N` div 2
+    when cpuEndian == littleEndian:
+      for i in 0 ..< halfSize:
+        {.unroll.}
+        `dst`.raw_data[i] = `src`.raw_data[i]
     else:
-      hi*, lo*: BaseUint
-
-  # ### Private ### #
+      for i in halfSize ..< `N`:
+        {.unroll.}
+        `dst`.raw_data[i] = `src`.raw_data[i]
 
-  StUint*[bits: static[int]] = object
-    data*: uintImpl(bits)
+macro hiImpl(dst, src: untyped, N: static[int]): untyped =
+  assert N >= 2
 
-  StInt*[bits: static[int]] = object
-    data*: intImpl(bits)
+  result = quote do:
+    const halfSize = `N` div 2
+    when cpuEndian == littleEndian:
+      for i in halfSize ..< `N`:
+        {.unroll.}
+        `dst`.raw_data[i] = `src`.raw_data[i]
+    else:
+      for i in 0 ..< halfSize:
+        {.unroll.}
+        `dst`.raw_data[i] = `src`.raw_data[i]
+
+proc loProc[N: static[int], T: SomeUnsignedInt](x: AnyImpl[N, T]): AnyImpl[N div 2, T] {.inline.}=
+  loImpl(result, x, N)
+
+proc hiProc[N: static[int], T: SomeUnsignedInt](x: AnyImpl[N, T]): AnyImpl[N div 2, T] {.inline.}=
+  hiImpl(result, x, N)
+
+proc loProc[N: static[int], T: SomeUnsignedInt](x: var AnyImpl[N, T]): var AnyImpl[N div 2, T] {.inline.}=
+  loImpl(result, x, N)
+
+proc hiProc[N: static[int], T: SomeUnsignedInt](x: var AnyImpl[N, T]): var AnyImpl[N div 2, T] {.inline.}=
+  hiImpl(result, x, N)
+
+macro lo*[N: static[int], T: SomeUnsignedInt](x: AnyImpl[N,T]): untyped=
+  ## Get the low part of an unsigned integer
+  # TODO: as this is called extensively and it is not a field
+  #       we should make sure repeated calls are optimized away by the compiler
+  if N == 1:
+    result = quote do: `x`.raw_data[0]
+  elif N > 1:
+    let loProc = bindSym"loProc"
+    result = quote do: `loProc`(`x`)
+  else:
+    result = quote do: {.fatal: "Unreachable".}
+
+macro `lo=`*[N: static[int], T: SomeUnsignedInt](dst: var AnyImpl[N,T], src: AnyImpl[N div 2, T]): untyped =
+  ## Get the mutable part of an unsigned integer
+  if N == 1:
+    result = quote do: `dst`.raw_data[0] = `src`.raw_data[0]
+  elif N > 1:
+    result = getAST(loImpl(dst, src, N))
+  else:
+    result = quote do: {.fatal: "Unreachable".}
+
+macro hi*[N: static[int], T: SomeUnsignedInt](x: AnyImpl[N,T]): untyped =
+  ## Get the high part of an unsigned integer
+  # TODO: as this is called extensively and it is not a field
+  #       we should make sure repeated calls are optimized away by the compiler
+  if N == 1:
+    result = quote do: `x`.raw_data[0]
+  elif N > 1:
+    let hiProc = bindSym"hiProc"
+    result = quote do: `hiProc`(`x`)
+  else:
+    result = quote do: {.fatal: "Unreachable".}
+
+macro `hi=`*[N: static[int], T: SomeUnsignedInt](dst: var AnyImpl[N,T], src: AnyImpl[N div 2, T]): untyped =
+  ## Get the high part of an unsigned integer
+  if N == 1:
+    result = quote do: `dst`.raw_data[0] = `src`.raw_data[0]
+  elif N > 1:
+    result = getAST(hiImpl(dst, src, N))
+  else:
+    result = quote do: {.fatal: "Unreachable".}
diff --git a/stint/private/initialization.nim b/stint/private/initialization.nim
index d7d59e3..6b89aae 100644
--- a/stint/private/initialization.nim
+++ b/stint/private/initialization.nim
@@ -7,12 +7,15 @@
 #
 # at your option. This file may not be copied, modified, or distributed except according to those terms.
 
-import ./datatypes, ./as_words, typetraits
+import ./datatypes, ./as_words
 
-func zero*[T: BaseUint](_: typedesc[T]): T {.inline.}=
+func zero*(T: typedesc[AnyImpl]): T {.inline.} =
   discard
 
-func one*(T: typedesc[UintImpl or IntImpl]): T {.inline.} =
+func zero*(T: typedesc[SomeInteger]): T {.inline.} =
+  0
+
+func one*(T: typedesc[AnyImpl]): T {.inline.} =
   least_significant_word(result) = 1
 
 func one*(T: typedesc[SomeInteger]): T {.inline.} =
diff --git a/stint/private/uint_addsub.nim b/stint/private/uint_addsub.nim
index 6e94a5c..ad8c191 100644
--- a/stint/private/uint_addsub.nim
+++ b/stint/private/uint_addsub.nim
@@ -14,28 +14,35 @@ import  ./bithacks, ./conversion, ./initialization,
 
 # ############ Addition & Substraction ############ #
 
-proc `+=`*(x: var UintImpl, y: UintImpl) {.noSideEffect, inline.}=
+func inc*(x: var UintImpl){.inline.}=
+  x += one(type x)
+
+func dec*(x: var UintImpl){.inline.}=
+  x -= one(type x)
+
+func `+=`*(x: var UintImpl, y: UintImpl) {.inline.}=
   ## In-place addition for multi-precision unsigned int
 
-  type SubTy = type x.lo
   x.lo += y.lo
-  x.hi += (x.lo < y.lo).toSubtype(SubTy) + y.hi
+  if x.lo < y.lo:
+    # TODO: check if it triggers efficient add with carry
+    inc x.hi
+  x.hi += y.hi
 
-proc `+`*(x, y: UintImpl): UintImpl {.noSideEffect, inline.}=
+func `+`*(x, y: UintImpl): UintImpl {.inline.}=
   # Addition for multi-precision unsigned int
   result = x
   result += y
 
-proc `-`*(x, y: UintImpl): UintImpl {.noSideEffect, inline.}=
+func `-`*(x, y: UintImpl): UintImpl {.inline.}=
   # Substraction for multi-precision unsigned int
 
   type SubTy = type x.lo
   result.lo = x.lo - y.lo
-  result.hi = x.hi - y.hi - (x.lo < y.lo).toSubtype(SubTy)
+  result.hi = x.hi - y.hi
+  if x.lo < y.lo:
+    dec x.hi
 
-proc `-=`*(x: var UintImpl, y: UintImpl) {.noSideEffect, inline.}=
+func `-=`*(x: var UintImpl, y: UintImpl) {.inline.}=
   ## In-place substraction for multi-precision unsigned int
   x = x - y
-
-func inc*(x: var UintImpl){.inline.}=
-  x += one(type x)
diff --git a/stint/private/uint_comparison.nim b/stint/private/uint_comparison.nim
index 6bb5d69..2f3da89 100644
--- a/stint/private/uint_comparison.nim
+++ b/stint/private/uint_comparison.nim
@@ -13,30 +13,30 @@ func isZero*(n: SomeUnsignedInt): bool {.inline.} =
   n == 0
 
 func isZero*(n: UintImpl): bool {.inline.} =
-  asWords(n, ignoreEndianness = true):
-    if n != 0:
+  for word in asWords(n, ignoreEndianness = true):
+    if word != 0:
       return false
   return true
 
 func `<`*(x, y: UintImpl): bool {.inline.}=
   # Lower comparison for multi-precision integers
-  asWordsZip(x, y, ignoreEndianness = false):
-    if x != y:
-      return x < y
+  for wx, wy in asWordsZip(x, y, ignoreEndianness = false):
+    if wx != wy:
+      return wx < wy
   return false # they're equal
 
 func `==`*(x, y: UintImpl): bool {.inline.}=
   # Equal comparison for multi-precision integers
-  asWordsZip(x, y, ignoreEndianness = true):
-    if x != y:
+  for wx, wy in asWordsZip(x, y, ignoreEndianness = true):
+    if wx != wy:
       return false
   return true # they're equal
 
 func `<=`*(x, y: UintImpl): bool {.inline.}=
   # Lower or equal comparison for multi-precision integers
-  asWordsZip(x, y, ignoreEndianness = false):
-    if x != y:
-      return x < y
+  for wx, wy in asWordsZip(x, y, ignoreEndianness = false):
+    if wx != wy:
+      return wx < wy
   return true # they're equal
 
 func isOdd*(x: UintImpl): bool {.inline.}=