From de1ed11a3c33686b09eeb6d62cdb966b8cd16cd2 Mon Sep 17 00:00:00 2001 From: Christopher Blanchard Date: Wed, 24 Feb 2021 05:40:17 +0000 Subject: [PATCH] feat(fix): Fix common errors `fix` Attempts to correct and clean up a postcode without validating by replacing commonly misplaced characters (e.g. mixing up `0` and `"O"`, `1` and `"I"`). This method will also uppercase and fix spacing. The original input is returned if it cannot be reliably fixed. --- README.md | 31 ++++++++++++++++-- lib/index.ts | 70 +++++++++++++++++++++++++++++++++++++++++ test/exhaustive_unit.ts | 9 +++++- test/fix.unit.ts | 64 +++++++++++++++++++++++++++++++++++++ 4 files changed, 171 insertions(+), 3 deletions(-) create mode 100644 test/fix.unit.ts diff --git a/README.md b/README.md index 451a2d1..c15d02a 100644 --- a/README.md +++ b/README.md @@ -60,7 +60,7 @@ isValid("AA1 1AB"); // => true Pass a string to `parse`. This will return a valid or invalid postcode instance which can be easily destructured. -#### Valid Postcode +#### Valid Postcode `ValidPostcode` type definition @@ -80,7 +80,7 @@ const { } = parse("Sw1A 2aa"); ``` -#### Invalid Postcode +#### Invalid Postcode `InvalidPostcode` type definition @@ -163,6 +163,33 @@ toSector("Sw1A 2aa"); // => "SW1A 2" toUnit("Sw1A 2aa"); // => "AA" ``` +#### Fix + +`fix` Attempts to correct and clean up a postcode without validating by replacing commonly misplaced characters (e.g. mixing up `0` and `"O"`, `1` and `"I"`). This method will also uppercase and fix spacing. The original input is returned if it cannot be reliably fixed. + +```javascript +fix("SWIA 2AA") => "SW1A 2AA" // Corrects I to 1 +fix("SW1A 21A") => "SW1A 2IA" // Corrects 1 to I +fix("SW1A OAA") => "SW1A 0AA" // Corrects O to 0 +fix("SW1A 20A") => "SW1A 2OA" // Corrects 0 to O + +// Other effects +fix(" SW1A 2AO") => "SW1A 2AO" // Properly spaces +fix("SW1A 2A0") => "SW1A 2AO" // 0 is coerced into "0" +``` + +Aims to be used in conjunction with parse to make postcode entry more forgiving: + +```javascript +const { inward } = parse(fix("SW1A 2A0")); // inward = "2AO" +``` + +If the input is not deemed fixable, the original string will be returned + +```javascript +fix("12a") => "12a" +``` + #### Extract & Replace `match`. Retrieve valid postcodes in a body of text diff --git a/lib/index.ts b/lib/index.ts index 663eb3e..7a1caad 100644 --- a/lib/index.ts +++ b/lib/index.ts @@ -357,3 +357,73 @@ export const replace = (corpus: string, replaceWith = ""): ReplaceResult => ({ match: match(corpus), result: corpus.replace(POSTCODE_CORPUS_REGEX, replaceWith), }); + +export const FIXABLE_REGEX = /^\s*[a-z01]{1,2}[0-9oi][a-z\d]?\s*[0-9oi][a-z01]{2}\s*$/i; + +/** + * Attempts to fix and clean a postcode. Specifically: + * - Performs character conversion on obviously wrong and commonly mixed up letters (e.g. O => 0 and vice versa) + * - Trims string + * - Properly adds space between outward and inward codes + * + * If the postcode cannot be coerced into a valid format, the original string is returned + * + * @example + * ```javascript + * fix(" SW1A 2AO") => "SW1A 2AO" // Properly spaces + * fix("SW1A 2A0") => "SW1A 2AO" // 0 is coerced into "0" + * ``` + * + * Aims to be used in conjunction with parse to make postcode entry more forgiving: + * + * @example + * ```javascript + * const { inward } = parse(fix("SW1A 2A0")); // inward = "2AO" + * ``` + */ +export const fix = (s: string): string => { + const match = s.match(FIXABLE_REGEX); + if (match === null) return s; + s = s.toUpperCase().trim().replace(/\s+/gi, ""); + const l = s.length; + const inward = s.slice(l - 3, l); + return `${coerceOutcode(s.slice(0, l - 3))} ${coerce("NLL", inward)}`; +}; + +const toLetter: Record = { + "0": "O", + "1": "I", +}; + +const toNumber: Record = { + O: "0", + I: "1", +}; + +const coerceOutcode = (i: string): string => { + if (i.length === 2) return coerce("LN", i); + if (i.length === 3) return coerce("L??", i); + if (i.length === 4) return coerce("LLN?", i); + return i; +}; + +/** + * Given a pattern of letters, numbers and unknowns represented as a sequence + * of L, Ns and ? respectively; coerce them into the correct type given a + * mapping of potentially confused letters + * + * @hidden + * + * @example coerce("LLN", "0O8") => "OO8" + */ +const coerce = (pattern: string, input: string): string => + input + .split("") + .reduce((acc, c, i) => { + const target = pattern.charAt(i); + if (target === "N") acc.push(toNumber[c] || c); + if (target === "L") acc.push(toLetter[c] || c); + if (target === "?") acc.push(c); + return acc; + }, []) + .join(""); diff --git a/test/exhaustive_unit.ts b/test/exhaustive_unit.ts index e1eecf6..f216558 100644 --- a/test/exhaustive_unit.ts +++ b/test/exhaustive_unit.ts @@ -5,7 +5,7 @@ import axios from "axios"; const TIMEOUT = 60000; -import { parse } from "../lib/index"; +import { parse, fix } from "../lib/index"; const url = "https://data.ideal-postcodes.co.uk/postcodes.csv"; @@ -24,6 +24,13 @@ describe("Exhaustive postcode test", () => { .filter((p: string) => p !== "GIR 0AA"); }); + describe("fix", () => { + it("never corrects a valid postcode", function () { + this.timeout(TIMEOUT); + postcodes.forEach((p) => assert.equal(fix(p), p)); + }); + }); + describe(".valid", () => { it("should all be valid", function () { this.timeout(TIMEOUT); diff --git a/test/fix.unit.ts b/test/fix.unit.ts new file mode 100644 index 0000000..98aa258 --- /dev/null +++ b/test/fix.unit.ts @@ -0,0 +1,64 @@ +import { assert } from "chai"; +import { fix } from "../lib/index"; + +describe("fix", () => { + it("trims postcode", () => { + assert.equal(fix(" SW1A 2AA "), "SW1A 2AA"); + }); + + it("upper cases string", () => { + assert.equal(fix(" Sw1A 2aa "), "SW1A 2AA"); + }); + + it("fixes spacing", () => { + assert.equal(fix(" Sw1A2aa "), "SW1A 2AA"); + assert.equal(fix(" Sw1A 2aa "), "SW1A 2AA"); + }); + + it("returns original string if not fixable", () => { + assert.equal(fix(" 1A2aa "), " 1A2aa "); + }); + + describe("outward code", () => { + it("fixes LN format", () => { + assert.equal(fix("01 OAA"), "O1 0AA"); + assert.equal(fix("SO OAA"), "S0 0AA"); + }); + + it("fixes L?? format", () => { + assert.equal(fix("0W1 OAA"), "OW1 0AA"); + + // Too ambiguous + assert.equal(fix("S01 OAA"), "S01 0AA"); + assert.equal(fix("SO1 OAA"), "SO1 0AA"); + assert.equal(fix("SWO OAA"), "SWO 0AA"); + assert.equal(fix("SW0 OAA"), "SW0 0AA"); + }); + + it("fixes LLN? format", () => { + assert.equal(fix("0W1A OAA"), "OW1A 0AA"); + assert.equal(fix("S01A OAA"), "SO1A 0AA"); + assert.equal(fix("SWOA OAA"), "SW0A 0AA"); + // Ambiguous + assert.equal(fix("SW10 OAA"), "SW10 0AA"); + assert.equal(fix("SW1O OAA"), "SW1O 0AA"); + }); + }); + + describe("inward code", () => { + it("coerces first character", () => { + assert.equal(fix(" SW1A OAA"), "SW1A 0AA"); + }); + it("coerces second character", () => { + assert.equal(fix("SW1A 20A"), "SW1A 2OA"); + }); + it("coerces last character", () => { + assert.equal(fix("SW1A 2A0"), "SW1A 2AO"); + }); + }); + + it("fixes 1 <=> I", () => { + assert.equal(fix("SWIA 2AA"), "SW1A 2AA"); + assert.equal(fix("1W1A 2AA"), "IW1A 2AA"); + }); +});