From de1ed11a3c33686b09eeb6d62cdb966b8cd16cd2 Mon Sep 17 00:00:00 2001
From: Christopher Blanchard <git@nf3.co>
Date: Wed, 24 Feb 2021 05:40:17 +0000
Subject: [PATCH] feat(fix): Fix common errors

`fix` Attempts to correct and clean up a postcode without validating by replacing commonly misplaced characters (e.g. mixing up `0` and `"O"`, `1` and `"I"`). This method will also uppercase and fix spacing. The original input is returned if it cannot be reliably fixed.
---
 README.md               | 31 ++++++++++++++++--
 lib/index.ts            | 70 +++++++++++++++++++++++++++++++++++++++++
 test/exhaustive_unit.ts |  9 +++++-
 test/fix.unit.ts        | 64 +++++++++++++++++++++++++++++++++++++
 4 files changed, 171 insertions(+), 3 deletions(-)
 create mode 100644 test/fix.unit.ts

diff --git a/README.md b/README.md
index 451a2d1..c15d02a 100644
--- a/README.md
+++ b/README.md
@@ -60,7 +60,7 @@ isValid("AA1 1AB"); // => true
 
 Pass a string to `parse`. This will return a valid or invalid postcode instance which can be easily destructured.
 
-#### Valid Postcode 
+#### Valid Postcode
 
 `ValidPostcode` type definition
 
@@ -80,7 +80,7 @@ const {
 } = parse("Sw1A     2aa");
 ```
 
-#### Invalid Postcode 
+#### Invalid Postcode
 
 `InvalidPostcode` type definition
 
@@ -163,6 +163,33 @@ toSector("Sw1A 2aa");      // => "SW1A 2"
 toUnit("Sw1A 2aa");        // => "AA"
 ```
 
+#### Fix
+
+`fix` Attempts to correct and clean up a postcode without validating by replacing commonly misplaced characters (e.g. mixing up `0` and `"O"`, `1` and `"I"`). This method will also uppercase and fix spacing. The original input is returned if it cannot be reliably fixed.
+
+```javascript
+fix("SWIA 2AA") => "SW1A 2AA" // Corrects I to 1
+fix("SW1A 21A") => "SW1A 2IA" // Corrects 1 to I
+fix("SW1A OAA") => "SW1A 0AA" // Corrects O to 0
+fix("SW1A 20A") => "SW1A 2OA" // Corrects 0 to O
+
+// Other effects
+fix(" SW1A  2AO") => "SW1A 2AO" // Properly spaces
+fix("SW1A 2A0") => "SW1A 2AO" // 0 is coerced into "0"
+```
+
+Aims to be used in conjunction with parse to make postcode entry more forgiving:
+
+```javascript
+const { inward } = parse(fix("SW1A 2A0")); // inward = "2AO"
+```
+
+If the input is not deemed fixable, the original string will be returned
+
+```javascript
+fix("12a") => "12a"
+```
+
 #### Extract & Replace
 
 `match`. Retrieve valid postcodes in a body of text
diff --git a/lib/index.ts b/lib/index.ts
index 663eb3e..7a1caad 100644
--- a/lib/index.ts
+++ b/lib/index.ts
@@ -357,3 +357,73 @@ export const replace = (corpus: string, replaceWith = ""): ReplaceResult => ({
   match: match(corpus),
   result: corpus.replace(POSTCODE_CORPUS_REGEX, replaceWith),
 });
+
+export const FIXABLE_REGEX = /^\s*[a-z01]{1,2}[0-9oi][a-z\d]?\s*[0-9oi][a-z01]{2}\s*$/i;
+
+/**
+ * Attempts to fix and clean a postcode. Specifically:
+ * - Performs character conversion on obviously wrong and commonly mixed up letters (e.g. O => 0 and vice versa)
+ * - Trims string
+ * - Properly adds space between outward and inward codes
+ *
+ * If the postcode cannot be coerced into a valid format, the original string is returned
+ *
+ * @example
+ * ```javascript
+ * fix(" SW1A  2AO") => "SW1A 2AO" // Properly spaces
+ * fix("SW1A 2A0") => "SW1A 2AO" // 0 is coerced into "0"
+ * ```
+ *
+ * Aims to be used in conjunction with parse to make postcode entry more forgiving:
+ *
+ * @example
+ * ```javascript
+ * const { inward } = parse(fix("SW1A 2A0")); // inward = "2AO"
+ * ```
+ */
+export const fix = (s: string): string => {
+  const match = s.match(FIXABLE_REGEX);
+  if (match === null) return s;
+  s = s.toUpperCase().trim().replace(/\s+/gi, "");
+  const l = s.length;
+  const inward = s.slice(l - 3, l);
+  return `${coerceOutcode(s.slice(0, l - 3))} ${coerce("NLL", inward)}`;
+};
+
+const toLetter: Record<string, string> = {
+  "0": "O",
+  "1": "I",
+};
+
+const toNumber: Record<string, string> = {
+  O: "0",
+  I: "1",
+};
+
+const coerceOutcode = (i: string): string => {
+  if (i.length === 2) return coerce("LN", i);
+  if (i.length === 3) return coerce("L??", i);
+  if (i.length === 4) return coerce("LLN?", i);
+  return i;
+};
+
+/**
+ * Given a pattern of letters, numbers and unknowns represented as a sequence
+ * of L, Ns and ? respectively; coerce them into the correct type given a
+ * mapping of potentially confused letters
+ *
+ * @hidden
+ *
+ * @example coerce("LLN", "0O8") => "OO8"
+ */
+const coerce = (pattern: string, input: string): string =>
+  input
+    .split("")
+    .reduce<string[]>((acc, c, i) => {
+      const target = pattern.charAt(i);
+      if (target === "N") acc.push(toNumber[c] || c);
+      if (target === "L") acc.push(toLetter[c] || c);
+      if (target === "?") acc.push(c);
+      return acc;
+    }, [])
+    .join("");
diff --git a/test/exhaustive_unit.ts b/test/exhaustive_unit.ts
index e1eecf6..f216558 100644
--- a/test/exhaustive_unit.ts
+++ b/test/exhaustive_unit.ts
@@ -5,7 +5,7 @@ import axios from "axios";
 
 const TIMEOUT = 60000;
 
-import { parse } from "../lib/index";
+import { parse, fix } from "../lib/index";
 
 const url = "https://data.ideal-postcodes.co.uk/postcodes.csv";
 
@@ -24,6 +24,13 @@ describe("Exhaustive postcode test", () => {
       .filter((p: string) => p !== "GIR 0AA");
   });
 
+  describe("fix", () => {
+    it("never corrects a valid postcode", function () {
+      this.timeout(TIMEOUT);
+      postcodes.forEach((p) => assert.equal(fix(p), p));
+    });
+  });
+
   describe(".valid", () => {
     it("should all be valid", function () {
       this.timeout(TIMEOUT);
diff --git a/test/fix.unit.ts b/test/fix.unit.ts
new file mode 100644
index 0000000..98aa258
--- /dev/null
+++ b/test/fix.unit.ts
@@ -0,0 +1,64 @@
+import { assert } from "chai";
+import { fix } from "../lib/index";
+
+describe("fix", () => {
+  it("trims postcode", () => {
+    assert.equal(fix(" SW1A 2AA "), "SW1A 2AA");
+  });
+
+  it("upper cases string", () => {
+    assert.equal(fix(" Sw1A 2aa "), "SW1A 2AA");
+  });
+
+  it("fixes spacing", () => {
+    assert.equal(fix(" Sw1A2aa "), "SW1A 2AA");
+    assert.equal(fix(" Sw1A  2aa "), "SW1A 2AA");
+  });
+
+  it("returns original string if not fixable", () => {
+    assert.equal(fix(" 1A2aa "), " 1A2aa ");
+  });
+
+  describe("outward code", () => {
+    it("fixes LN format", () => {
+      assert.equal(fix("01 OAA"), "O1 0AA");
+      assert.equal(fix("SO OAA"), "S0 0AA");
+    });
+
+    it("fixes L?? format", () => {
+      assert.equal(fix("0W1 OAA"), "OW1 0AA");
+
+      // Too ambiguous
+      assert.equal(fix("S01 OAA"), "S01 0AA");
+      assert.equal(fix("SO1 OAA"), "SO1 0AA");
+      assert.equal(fix("SWO OAA"), "SWO 0AA");
+      assert.equal(fix("SW0 OAA"), "SW0 0AA");
+    });
+
+    it("fixes LLN? format", () => {
+      assert.equal(fix("0W1A OAA"), "OW1A 0AA");
+      assert.equal(fix("S01A OAA"), "SO1A 0AA");
+      assert.equal(fix("SWOA OAA"), "SW0A 0AA");
+      // Ambiguous
+      assert.equal(fix("SW10 OAA"), "SW10 0AA");
+      assert.equal(fix("SW1O OAA"), "SW1O 0AA");
+    });
+  });
+
+  describe("inward code", () => {
+    it("coerces first character", () => {
+      assert.equal(fix(" SW1A OAA"), "SW1A 0AA");
+    });
+    it("coerces second character", () => {
+      assert.equal(fix("SW1A 20A"), "SW1A 2OA");
+    });
+    it("coerces last character", () => {
+      assert.equal(fix("SW1A 2A0"), "SW1A 2AO");
+    });
+  });
+
+  it("fixes 1 <=> I", () => {
+    assert.equal(fix("SWIA 2AA"), "SW1A 2AA");
+    assert.equal(fix("1W1A 2AA"), "IW1A 2AA");
+  });
+});