From 1ddc2ed8f46dfcba3c608fbdb8477b048db066e8 Mon Sep 17 00:00:00 2001
From: jon-edward <townsend.jonathan.e@gmail.com>
Date: Fri, 28 Jun 2024 22:37:04 -0400
Subject: [PATCH 1/7] Add tests for --define:"noidentnormalize"

---
 tests/tnoidentnormalize.h    |  2 ++
 tests/tnoidentnormalize.nim  | 26 ++++++++++++++++++++++++++
 tests/tnoidentnormalize.nims |  2 ++
 3 files changed, 30 insertions(+)
 create mode 100644 tests/tnoidentnormalize.h
 create mode 100644 tests/tnoidentnormalize.nim
 create mode 100644 tests/tnoidentnormalize.nims

diff --git a/tests/tnoidentnormalize.h b/tests/tnoidentnormalize.h
new file mode 100644
index 0000000..bbe8ee2
--- /dev/null
+++ b/tests/tnoidentnormalize.h
@@ -0,0 +1,2 @@
+const int doNot_normalize = 1;
+const int will__normalize = 2;
\ No newline at end of file
diff --git a/tests/tnoidentnormalize.nim b/tests/tnoidentnormalize.nim
new file mode 100644
index 0000000..f091953
--- /dev/null
+++ b/tests/tnoidentnormalize.nim
@@ -0,0 +1,26 @@
+import os, strutils
+
+import ../src/futhark
+
+const outputPath = currentSourcePath.parentDir / "tnoidentnormalize_out.nim"
+
+importc:
+  path "."
+  outputPath outputPath
+  "tnoidentnormalize.h"
+
+
+let fObject = open(outputPath, FileMode.fmRead)
+let outputText = fObject.readAll()
+fObject.close()
+
+# Because of Nim's case/underscore insensitivity, the output of `importc`
+# has to be evaluated directly. 
+
+# The existence of the exported identifiers `doNot_normalize`, `willnormalize`, 
+# and the inexistence of exported `donotnormalize` should be sufficient in determining 
+# whether the test has passed.
+
+doAssert " doNot_normalize*" in outputText
+doAssert " willnormalize*" in outputText
+doAssert (not (" donotnormalize*" in outputText))
diff --git a/tests/tnoidentnormalize.nims b/tests/tnoidentnormalize.nims
new file mode 100644
index 0000000..1f9f789
--- /dev/null
+++ b/tests/tnoidentnormalize.nims
@@ -0,0 +1,2 @@
+--define:noidentnormalize
+--define:nodeclguards
\ No newline at end of file

From a5e96cb2516e7a6ad47c7e8098aa54f84238a72b Mon Sep 17 00:00:00 2001
From: jon-edward <townsend.jonathan.e@gmail.com>
Date: Fri, 28 Jun 2024 22:37:32 -0400
Subject: [PATCH 2/7] Add compilation flag "noidentnormalize"

---
 src/futhark.nim | 37 ++++++++++++++++++++++++++++++++++++-
 1 file changed, 36 insertions(+), 1 deletion(-)

diff --git a/src/futhark.nim b/src/futhark.nim
index bee61b5..815a7c5 100644
--- a/src/futhark.nim
+++ b/src/futhark.nim
@@ -34,6 +34,7 @@ const
   preAnsiFuncDecl = defined(preAnsiFuncDecl)
   echoForwards = defined(echoForwards)
   generateInline = defined(generateInline)
+  noIdentNormalize = defined(noIdentNormalize)
   VERSION = block:
     # source style, go up one dir
     var nimblePath = currentSourcePath().parentDir().parentDir() / "futhark.nimble"
@@ -186,6 +187,39 @@ proc isUnsignedNumber(x: string): bool =
   except ValueError:
     result = false
 
+const identStartChars = {'a'..'z', 'A'..'Z', char(0x80)..char(0xff)}
+const identChars = identStartChars + {'0'..'9', '_'}
+
+iterator span(s: string, startIndex: int, endIndex: int = 0): char = 
+  # Span of characters in string `s`, starting at startIndex (inclusive)
+  # and ending at endIndex (exclusive). If endIndex is 0, then iterate to 
+  # end of string.
+
+  var endIndex = if endIndex == 0:
+    s.len
+  else:
+    endIndex  
+  for i in startIndex..<endIndex: 
+    yield s[i]
+
+proc isValidIdent(name: string): bool = 
+  #  Check for https://nim-lang.org/docs/manual.html#lexical-analysis-identifiers-amp-keywords
+  
+  if name.len == 0:
+    return false
+  let firstChar = name[0]
+  let startCondition = firstChar in identStartChars
+  if not startCondition or name.len == 1:
+    return startCondition
+  var lastChar = firstChar
+  for c in name.span(1):
+    if (lastChar == c) and (c == '_'):
+      return false
+    if not (c in identChars):
+      return false
+    lastChar = c
+  true
+
 proc sanitizeName(usedNames: var HashSet[string], origName: string, kind: string, renameCallback: RenameCallback, partof = ""): string {.compileTime.} =
   result = origName
   if not renameCallback.isNil:
@@ -195,7 +229,8 @@ proc sanitizeName(usedNames: var HashSet[string], origName: string, kind: string
       result = "compiler_" & result[2..^1]
     else:
       result = "internal_" & result[1..^1]
-  result = result.nimIdentNormalize()
+  if (not noIdentNormalize) or not result.isValidIdent:
+    result = result.nimIdentNormalize()
   var renamed = false
   if usedNames.contains(result) or result in builtins:
     result.add kind

From caeab3d2d3cb99780cc622d0a9930d5c8236c49a Mon Sep 17 00:00:00 2001
From: jon-edward <townsend.jonathan.e@gmail.com>
Date: Fri, 28 Jun 2024 22:59:26 -0400
Subject: [PATCH 3/7] Update README with explanation for -d:noidentnormalize

---
 README.md | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/README.md b/README.md
index 012f3dd..6904479 100644
--- a/README.md
+++ b/README.md
@@ -211,6 +211,14 @@ functions. However if you are compiling your code directly against some C code
 these might be useful to you. In this case you can pass `-d:generateInline` to
 generate function definitions for inline functions.
 
+## Preventing identifier normalization
+By default, Futhark generates identifiers that are normalized per 
+[`strutils.nimIdentNormalize`](https://nim-lang.org/docs/strutils.html#nimIdentNormalize%2Cstring).
+You might prefer keeping the case convention from your source library consistent 
+with your wrappe and in cases when the source name is a valid Nim identifier you can can use 
+`-d:noIdentNormalize`. For the cases when a source name is not a valid Nim identifier 
+this flag is ignored.
+
 ## Pre-ANSI C function declarations
 Also known as K&R style functions. By definition C code like
 ```c

From 326d97e3b4805e378a557776c4dbf8cf8c2b5d42 Mon Sep 17 00:00:00 2001
From: jon-edward <townsend.jonathan.e@gmail.com>
Date: Fri, 28 Jun 2024 22:59:49 -0400
Subject: [PATCH 4/7] Remove -d:nodeclguards

---
 tests/tnoidentnormalize.nims | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/tests/tnoidentnormalize.nims b/tests/tnoidentnormalize.nims
index 1f9f789..2cb3c6a 100644
--- a/tests/tnoidentnormalize.nims
+++ b/tests/tnoidentnormalize.nims
@@ -1,2 +1 @@
---define:noidentnormalize
---define:nodeclguards
\ No newline at end of file
+--define:noidentnormalize
\ No newline at end of file

From 26a2f8695597e4eb91b292437bb190db17efaa62 Mon Sep 17 00:00:00 2001
From: jon-edward <townsend.jonathan.e@gmail.com>
Date: Fri, 28 Jun 2024 23:03:43 -0400
Subject: [PATCH 5/7] Fix typo, move readme section

---
 README.md | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/README.md b/README.md
index 6904479..cf6f9fc 100644
--- a/README.md
+++ b/README.md
@@ -205,20 +205,20 @@ will be exported as well and documentation will be readable. This is mostly
 useful if you want to export documentation but can't use `nodeclguards` (which
 makes even more readable documentation).
 
-## Inline functions
-When using Futhark with dynamic libraries it doesn't make sense to wrap inline
-functions. However if you are compiling your code directly against some C code
-these might be useful to you. In this case you can pass `-d:generateInline` to
-generate function definitions for inline functions.
-
-## Preventing identifier normalization
+### Preventing identifier normalization
 By default, Futhark generates identifiers that are normalized per 
 [`strutils.nimIdentNormalize`](https://nim-lang.org/docs/strutils.html#nimIdentNormalize%2Cstring).
 You might prefer keeping the case convention from your source library consistent 
-with your wrappe and in cases when the source name is a valid Nim identifier you can can use 
+with your wrapper and in cases when the source name is a valid Nim identifier you can can use 
 `-d:noIdentNormalize`. For the cases when a source name is not a valid Nim identifier 
 this flag is ignored.
 
+## Inline functions
+When using Futhark with dynamic libraries it doesn't make sense to wrap inline
+functions. However if you are compiling your code directly against some C code
+these might be useful to you. In this case you can pass `-d:generateInline` to
+generate function definitions for inline functions.
+
 ## Pre-ANSI C function declarations
 Also known as K&R style functions. By definition C code like
 ```c

From e551b697390ddc02a01f11307b0c403f563eeab0 Mon Sep 17 00:00:00 2001
From: jon-edward <townsend.jonathan.e@gmail.com>
Date: Sat, 29 Jun 2024 13:09:44 -0400
Subject: [PATCH 6/7] Add test for -d:noidentnormalize with name collisions

---
 tests/tnoidentnormalizecollision.h    |  7 ++++++
 tests/tnoidentnormalizecollision.nim  | 36 +++++++++++++++++++++++++++
 tests/tnoidentnormalizecollision.nims |  1 +
 3 files changed, 44 insertions(+)
 create mode 100644 tests/tnoidentnormalizecollision.h
 create mode 100644 tests/tnoidentnormalizecollision.nim
 create mode 100644 tests/tnoidentnormalizecollision.nims

diff --git a/tests/tnoidentnormalizecollision.h b/tests/tnoidentnormalizecollision.h
new file mode 100644
index 0000000..7590e2f
--- /dev/null
+++ b/tests/tnoidentnormalizecollision.h
@@ -0,0 +1,7 @@
+#define my_var 1
+#define myVar 2
+#define myvar 3
+#define MYVAR 4
+#define MY_VAR 5
+#define MyVar 6
+#define My_Var 7
diff --git a/tests/tnoidentnormalizecollision.nim b/tests/tnoidentnormalizecollision.nim
new file mode 100644
index 0000000..7297405
--- /dev/null
+++ b/tests/tnoidentnormalizecollision.nim
@@ -0,0 +1,36 @@
+import os, strutils
+
+import ../src/futhark
+
+const outputPath = currentSourcePath.parentDir / "tnoidentnormalizecollision_out.nim"
+
+importc:
+  path "."
+  outputPath outputPath
+  "tnoidentnormalizecollision.h"
+
+
+let fObject = open(outputPath, FileMode.fmRead)
+let outputText = fObject.readAll()
+fObject.close()
+
+# Test default behavior per tnormalize.nim
+doAssert(my_var == 1)
+doAssert(myVarconst == 2) # Renamed as 1st definition collides
+doAssert(myvarconstC690172C == 3) # Renamed as both 1st and 2nd definition collides
+doAssert(MYVAR == 4)
+doAssert(MY_VARconst == 5) # Renamed as 4th definition collides
+doAssert(MyVarconst2E4AA817 == 6) # Renamed as 4th and 5th definition collides
+doAssert(My_Varconst038D4D97 == 7) # Renamed as 4th and 5th definiton collides
+
+# Manually test identifier case of output
+doAssert " my_var*" in outputText
+doAssert " myVarconst*" in outputText
+doAssert " myvarconstC690172C*" in outputText
+doAssert " MYVAR*" in outputText
+doAssert " MyVarconst2E4AA817*" in outputText
+doAssert " My_Varconst038D4D97*" in outputText
+
+# `myvar` was renamed in the second name collision, so we can check its 
+# nonexistence in the output file.
+doAssert (not (" myvar*" in outputText))
\ No newline at end of file
diff --git a/tests/tnoidentnormalizecollision.nims b/tests/tnoidentnormalizecollision.nims
new file mode 100644
index 0000000..2cb3c6a
--- /dev/null
+++ b/tests/tnoidentnormalizecollision.nims
@@ -0,0 +1 @@
+--define:noidentnormalize
\ No newline at end of file

From 7410c6c6d1d7929af734b077f969954a2e044372 Mon Sep 17 00:00:00 2001
From: jon-edward <townsend.jonathan.e@gmail.com>
Date: Sat, 29 Jun 2024 13:11:14 -0400
Subject: [PATCH 7/7] Fix name collision resolution, fix trailing underscore
 case for isValidIdent

---
 src/futhark.nim | 16 ++++++++++------
 1 file changed, 10 insertions(+), 6 deletions(-)

diff --git a/src/futhark.nim b/src/futhark.nim
index 815a7c5..0bc06e7 100644
--- a/src/futhark.nim
+++ b/src/futhark.nim
@@ -218,7 +218,7 @@ proc isValidIdent(name: string): bool =
     if not (c in identChars):
       return false
     lastChar = c
-  true
+  lastChar != '_'
 
 proc sanitizeName(usedNames: var HashSet[string], origName: string, kind: string, renameCallback: RenameCallback, partof = ""): string {.compileTime.} =
   result = origName
@@ -229,18 +229,22 @@ proc sanitizeName(usedNames: var HashSet[string], origName: string, kind: string
       result = "compiler_" & result[2..^1]
     else:
       result = "internal_" & result[1..^1]
+  var normalizedName = result.nimIdentNormalize()
   if (not noIdentNormalize) or not result.isValidIdent:
-    result = result.nimIdentNormalize()
+    result = normalizedName
   var renamed = false
-  if usedNames.contains(result) or result in builtins:
+  if usedNames.contains(normalizedName) or result in builtins:
+    normalizedName.add kind
     result.add kind
     renamed = true
-  if usedNames.contains(result) or result in builtins:
-    result.add hash(origName).uint32.toHex
+  if usedNames.contains(normalizedName) or result in builtins:
+    let uniqueTail = hash(origName).uint32.toHex
+    result.add uniqueTail
+    normalizedName.add uniqueTail
     renamed = true
   if renamed:
     hint "Renaming \"" & origName & "\" to \"" & result & "\"" & (if partof.len != 0: " in " & partof else: "")
-  usedNames.incl result
+  usedNames.incl normalizedName
 
 proc sanitizeName(state: var State, origName: string, kind: string): string {.compileTime.} =
   if not state.renamed.hasKey(origName):