From 9d6f03b076321f8f04fb19d44981af7d90ff1b3e Mon Sep 17 00:00:00 2001 From: zhongliang02 Date: Wed, 15 Jan 2025 07:12:01 +0800 Subject: [PATCH] feat: add allowedCharactersInPath whitelist --- packages/validators/package.json | 2 +- packages/validators/src/__tests__/url.test.ts | 131 +++++++++++++++++- packages/validators/src/url/index.ts | 3 +- packages/validators/src/url/options.ts | 17 ++- packages/validators/src/url/schema.ts | 56 +++++++- packages/validators/src/url/utils.ts | 72 +++++++--- 6 files changed, 245 insertions(+), 36 deletions(-) diff --git a/packages/validators/package.json b/packages/validators/package.json index e0a40d5..a595aef 100644 --- a/packages/validators/package.json +++ b/packages/validators/package.json @@ -1,6 +1,6 @@ { "name": "@opengovsg/starter-kitty-validators", - "version": "1.2.11", + "version": "1.2.12", "main": "./dist/index.js", "types": "./dist/index.d.ts", "exports": { diff --git a/packages/validators/src/__tests__/url.test.ts b/packages/validators/src/__tests__/url.test.ts index fb855fe..18cd89d 100644 --- a/packages/validators/src/__tests__/url.test.ts +++ b/packages/validators/src/__tests__/url.test.ts @@ -1,8 +1,10 @@ import { describe, expect, it } from 'vitest' +import { ZodError } from 'zod' import { OptionsError } from '@/common/errors' import { createUrlSchema, RelUrlValidator, UrlValidator } from '@/index' import { UrlValidationError } from '@/url/errors' +import { defaultAllowedChars } from '@/url/options' describe('UrlValidator with default options', () => { const validator = new UrlValidator() @@ -12,6 +14,11 @@ describe('UrlValidator with default options', () => { expect(url).toBeInstanceOf(URL) }) + it('should parse a valid URL', () => { + const url = validator.parse('https://example.com/path?query=1#hash') + expect(url).toBeInstanceOf(URL) + }) + it('should throw an error when the protocol is not http or https', () => { expect(() => validator.parse('ftp://example.com')).toThrow(UrlValidationError) }) @@ -30,10 +37,11 @@ describe('UrlValidator with default options', () => { expect(() => validator.parse('https://example.com/[[...slug]]')).toThrow(UrlValidationError) expect(() => validator.parse('https://example.com/[[slug]]')).toThrow(UrlValidationError) expect(() => validator.parse('https://example.com/[x]?x=1')).toThrow(UrlValidationError) + expect(() => validator.parse('https://example.com/path/(.)part')).toThrow(UrlValidationError) }) it('should prevent XSS attacks', () => { - expect(() => validator.parse('javascript&colonalert(/xss/)').protocol).toThrow(UrlValidationError) + expect(() => validator.parse('javascript&colonalert(/xss/)')).toThrow(UrlValidationError) expect(() => validator.parse('javascript:alert(/xss/)')).toThrow(UrlValidationError) }) @@ -47,19 +55,20 @@ describe('UrlValidator with default options', () => { describe('UrlValidator with custom protocol whitelist', () => { const validator = new UrlValidator({ whitelist: { - protocols: ['http', 'https', 'mailto'], + protocols: ['http', 'https'], + allowedCharactersInPath: '', // blank to allow all characters for tests }, }) it('should not throw an error when the protocol on the whitelist', () => { expect(() => validator.parse('https://example.com')).not.toThrow() expect(() => validator.parse('http://example.com')).not.toThrow() - expect(() => validator.parse('mailto:contact@example.com')).not.toThrow() }) it('should throw an error when the protocol is not on the whitelist', () => { expect(() => validator.parse('ftp://example.com')).toThrow(UrlValidationError) expect(() => validator.parse('javascript:alert()')).toThrow(UrlValidationError) + expect(() => validator.parse('mailto:contact@example.com')).toThrow(UrlValidationError) }) }) @@ -68,6 +77,7 @@ describe('UrlValidator with custom host whitelist', () => { whitelist: { protocols: ['http', 'https'], hosts: ['example.com'], + allowedCharactersInPath: '', // blank to allow all characters }, }) @@ -85,6 +95,7 @@ describe('UrlValidator with disallowHostnames', () => { whitelist: { protocols: ['http', 'https'], disallowHostnames: true, + allowedCharactersInPath: '', // blank to allow all characters for tests }, }) @@ -92,6 +103,11 @@ describe('UrlValidator with disallowHostnames', () => { expect(() => validator.parse('https://example.com')).not.toThrow() }) + it('should not throw an error with a proper domain', () => { + const url = validator.parse('https://example.com/path?query=1#hash') + expect(url).toBeInstanceOf(URL) + }) + it('should throw an error with a hostname', () => { expect(() => validator.parse('https://tld')).toThrow(UrlValidationError) expect(() => validator.parse('https://.tld')).toThrow(UrlValidationError) @@ -110,6 +126,7 @@ describe('UrlValidator with both hosts and disallowHostnames', () => { protocols: ['http', 'https'], hosts: ['example.com', 'localhost'], disallowHostnames: true, + allowedCharactersInPath: '', // blank to allow all characters for tests }, }) @@ -117,6 +134,11 @@ describe('UrlValidator with both hosts and disallowHostnames', () => { expect(() => validator.parse('https://example.com')).not.toThrow() }) + it('should not throw an error when the host is on the whitelist', () => { + const url = validator.parse('https://example.com/path?query=1#hash') + expect(url).toBeInstanceOf(URL) + }) + it('should ignore the disallowHostnames option', () => { expect(() => validator.parse('https://localhost')).not.toThrow() }) @@ -125,6 +147,10 @@ describe('UrlValidator with both hosts and disallowHostnames', () => { describe('UrlValidator with base URL', () => { const validator = new UrlValidator({ baseOrigin: 'https://example.com', + whitelist: { + protocols: ['http', 'https'], // default + allowedCharactersInPath: '', // blank to allow all characters for tests + }, }) it('should parse a valid relative URL', () => { @@ -161,6 +187,82 @@ describe('UrlValidator with base URL', () => { UrlValidationError, ) }) + + it('should not allow Next.js dynamic routes', () => { + expect(() => validator.parse('/[[x]]http:example.com/(.)[y]/?x&y')).toThrow(UrlValidationError) + }) +}) + +describe('UrlValidator with a whitelist of allowed characters in the path', () => { + const validator = new UrlValidator({ + whitelist: { + protocols: ['http', 'https'], + allowedCharactersInPath: 'abc123/', + }, + }) + it('should parse a valid URL', () => { + const url = validator.parse('https://example.com/abc123') + expect(url).toBeInstanceOf(URL) + }) + + it('should parse a valid URL with query string and hash', () => { + const url = validator.parse('https://example.com/abc123?q=1#hash') + expect(url).toBeInstanceOf(URL) + }) + + it('should throw an error when the path contains disallowed characters', () => { + expect(() => validator.parse('https://example.com/abc1234')).toThrow(UrlValidationError) + }) +}) + +describe('UrlValidator with the default whitelist', () => { + const validator = new UrlValidator({}) + + it('should parse a valid URL', () => { + const url = validator.parse('https://example.com') + expect(url).toBeInstanceOf(URL) + }) + + it('should parse a valid URL', () => { + const url = validator.parse('https://example.com/path?query=1#hash') + expect(url).toBeInstanceOf(URL) + }) + + it('should throw an error when the path contains disallowed characters', () => { + expect(() => validator.parse('https://example.com/1@23')).toThrow(UrlValidationError) + }) + + it('should throw an error when the protocol is not http or https', () => { + expect(() => validator.parse('ftp://example.com')).toThrow(UrlValidationError) + }) + + it('should allow any host when no host whitelist is provided', () => { + expect(() => validator.parse('https://open.gov.sg')).not.toThrow() + }) + + // https://url.spec.whatwg.org/#missing-scheme-non-relative-url + it('should throw an error when missing a scheme and no base URL or base URL', () => { + expect(() => validator.parse('example.com')).toThrow(UrlValidationError) + expect(() => validator.parse('/path')).toThrow(UrlValidationError) + }) + + it('should not allow Next.js dynamic routes', () => { + expect(() => validator.parse('https://example.com/[[...slug]]')).toThrow(UrlValidationError) + expect(() => validator.parse('https://example.com/[[slug]]')).toThrow(UrlValidationError) + expect(() => validator.parse('https://example.com/[x]?x=1')).toThrow(UrlValidationError) + expect(() => validator.parse('https://example.com/path/(.)part')).toThrow(UrlValidationError) + }) + + it('should prevent XSS attacks', () => { + expect(() => validator.parse('javascript&colonalert(/xss/)')).toThrow(UrlValidationError) + expect(() => validator.parse('javascript:alert(/xss/)')).toThrow(UrlValidationError) + }) + + it('should throw an error when given an invalid type', () => { + expect(() => validator.parse(123)).toThrow(UrlValidationError) + expect(() => validator.parse(undefined)).toThrow(UrlValidationError) + expect(() => validator.parse(['1', '2'])).toThrow(UrlValidationError) + }) }) describe('UrlValidator with invalid options', () => { @@ -185,6 +287,11 @@ describe('RelUrlValidator with string origin', () => { expect(url).toBeInstanceOf(URL) }) + it('should parse a valid absolute URL', () => { + const url = validator.parse('https://a.com/path?query=1#hash') + expect(url).toBeInstanceOf(URL) + }) + it('should throw an error on invalid URL', () => { expect(() => validator.parse('https://b.com/hello')).toThrow(UrlValidationError) }) @@ -251,6 +358,7 @@ describe('UrlValidatorOptions.parsePathname', () => { it('should throw an error when the path is a NextJS dynamic path', () => { expect(() => validator.parsePathname('https://a.com/hello/[id]?id=3')).toThrow(UrlValidationError) + expect(() => validator.parsePathname('https://a.com/hello/(.)bye')).toThrow(UrlValidationError) }) it('should fallback to fallbackUrl if it is provided', () => { @@ -271,7 +379,17 @@ describe('createUrlSchema', () => { protocols: ['http', 'https', 'mailto'], }, }) - expect(() => schema.parse('mailto:test@test.test')).not.toThrow() + expect(() => schema.parse('https://example.com')).not.toThrow() + }) + + it('should create a schema with custom options', () => { + const schema = createUrlSchema({ + whitelist: { + protocols: ['http', 'https', 'mailto'], + allowedCharactersInPath: defaultAllowedChars + '@', + }, + }) + expect(() => schema.parse('mailto:contact@example.com')).not.toThrow() }) it('should throw an error when the options are invalid', () => { @@ -287,13 +405,14 @@ describe('createUrlSchema', () => { protocols: ['http', 'https'], hosts: ['example.com'], disallowHostnames: true, + allowedCharactersInPath: defaultAllowedChars, }, }), ).not.toThrow() }) - it('should reject relaative URLs when the base URL is not provided', () => { + it('should reject relative URLs when the base URL is not provided', () => { const schema = createUrlSchema() - expect(() => schema.parse('/path')).toThrow(UrlValidationError) + expect(() => schema.parse('/path')).toThrow(ZodError) }) }) diff --git a/packages/validators/src/url/index.ts b/packages/validators/src/url/index.ts index f219d09..088b1ce 100644 --- a/packages/validators/src/url/index.ts +++ b/packages/validators/src/url/index.ts @@ -3,7 +3,7 @@ import { fromError } from 'zod-validation-error' import { OptionsError } from '@/common/errors' import { UrlValidationError } from '@/url/errors' -import { defaultOptions, optionsSchema, UrlValidatorOptions } from '@/url/options' +import { defaultAllowedChars, defaultOptions, optionsSchema, UrlValidatorOptions } from '@/url/options' import { toSchema } from '@/url/schema' /** @@ -128,6 +128,7 @@ export class RelUrlValidator extends UrlValidator { whitelist: { protocols: ['http', 'https'], hosts: [urlObject.host], + allowedCharactersInPath: defaultAllowedChars, }, }) } diff --git a/packages/validators/src/url/options.ts b/packages/validators/src/url/options.ts index e44961d..f863f5a 100644 --- a/packages/validators/src/url/options.ts +++ b/packages/validators/src/url/options.ts @@ -1,9 +1,16 @@ import { z } from 'zod' +export const lowercase = 'abcdefghijklmnopqrstuvwxyz' +export const uppercase = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' +export const digits = '0123456789' +export const alnum = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789' +export const defaultAllowedChars = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789.-_/' + export const defaultOptions = { whitelist: { protocols: ['http', 'https'], disallowHostnames: false, + allowedCharactersInPath: defaultAllowedChars, }, } @@ -28,6 +35,14 @@ export const whitelistSchema = z.object({ * @defaultValue false */ disallowHostnames: z.boolean().optional(), + /** + * Which characters are allowed in the URL path. + * Use empty string to allow all characters. + * If your protocol is mailto, you will need to include "\@" here. + * + * @defaultValue 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789.-_/' + */ + allowedCharactersInPath: z.string().optional().default(defaultAllowedChars), }) /** @@ -61,7 +76,7 @@ export interface UrlValidatorOptions { * * @public */ -export type UrlValidatorWhitelist = z.infer +export type UrlValidatorWhitelist = z.input export const optionsSchema = z.object({ baseOrigin: z diff --git a/packages/validators/src/url/schema.ts b/packages/validators/src/url/schema.ts index 5804d18..0aadaf2 100644 --- a/packages/validators/src/url/schema.ts +++ b/packages/validators/src/url/schema.ts @@ -1,13 +1,61 @@ import { z } from 'zod' import { ParsedUrlValidatorOptions } from '@/url/options' -import { isSafeUrl, resolveRelativeUrl } from '@/url/utils' +import { createAllowedCharsSchema, getErrorMessage, IS_NOT_HOSTNAME_REGEX, resolveRelativeUrl } from '@/url/utils' + +import { isDynamicRoute } from './nextjs-dynamic-route' export const toSchema = (options: ParsedUrlValidatorOptions) => { + const { whitelist } = options + + // create and cache this zod schema beforehand + const zAllowedCharsString = createAllowedCharsSchema(whitelist.allowedCharactersInPath) + return z .string() - .transform(url => resolveRelativeUrl(url, options.baseOrigin)) - .refine(url => isSafeUrl(url, options.whitelist), { - message: 'Unsafe URL', + .transform((url, ctx) => { + try { + return resolveRelativeUrl(url, options.baseOrigin) + } catch (error) { + ctx.addIssue({ + code: z.ZodIssueCode.custom, + message: getErrorMessage(error), + }) + return z.NEVER + } }) + .refine( + url => { + // only allow whitelisted protocols + if (!whitelist.protocols.some(protocol => url.protocol === `${protocol}:`)) { + return false + } + // only allow whitelisted characters in the path + const onlyHasAllowedChars = zAllowedCharsString.safeParse(url.pathname).success + if (!onlyHasAllowedChars) { + return false + } + + if (whitelist.hosts) { + // only allow whitelisted hosts + if (!whitelist.hosts.some(host => url.host === host)) { + return false + } + } else { + // no hosts provided + if (whitelist.disallowHostnames && !url.host.match(IS_NOT_HOSTNAME_REGEX)) { + return false + } + } + + // don't allow dynamic routes + if (isDynamicRoute(url)) { + return false + } + return true + }, + { + message: 'Unsafe URL', + }, + ) } diff --git a/packages/validators/src/url/utils.ts b/packages/validators/src/url/utils.ts index 1b85a29..aab6c7d 100644 --- a/packages/validators/src/url/utils.ts +++ b/packages/validators/src/url/utils.ts @@ -1,8 +1,45 @@ +import { z } from 'zod' + import { UrlValidationError } from '@/url/errors' -import { isDynamicRoute } from '@/url/nextjs-dynamic-route' -import { UrlValidatorWhitelist } from '@/url/options' -const IS_NOT_HOSTNAME_REGEX = /[^.]+\.[^.]+/g +/** + * Creates a Zod schema that validates a string to ensure all characters are within the allowed characters set. + * + * @param allowedChars - A string containing all allowed characters. If this is blank, the schema will always pass. + * @returns A Zod schema that validates a string to ensure all characters are within the allowed characters set. + * + * @example + * ```typescript + * const schema = createAllowedCharsSchema('abc123'); + * schema.parse('abc'); // Valid + * schema.parse('a1b2c3'); // Valid + * schema.parse('abcd'); // Throws an error + * ``` + */ +export const createAllowedCharsSchema = (allowedChars: string): z.ZodType => { + if (!allowedChars) { + return z.string() + } + const allowed = new Set(Array.from(allowedChars)) + + const schema = z.string().refine( + (str: string) => { + for (const char of str) { + if (!allowed.has(char)) { + return false + } + } + return true + }, + { + message: `Every character must be in ${allowedChars}`, + }, + ) + + return schema +} + +export const IS_NOT_HOSTNAME_REGEX = /[^.]+\.[^.]+/g export const resolveRelativeUrl = (url: string, baseOrigin?: URL): URL => { if (!baseOrigin) { @@ -26,26 +63,15 @@ export const resolveRelativeUrl = (url: string, baseOrigin?: URL): URL => { return normalizedUrl } -export const isSafeUrl = (url: URL, whitelist: UrlValidatorWhitelist) => { - // only allow whitelisted protocols - if (!whitelist.protocols.some(protocol => url.protocol === `${protocol}:`)) { - return false - } - if (whitelist.hosts) { - // only allow whitelisted hosts - if (!whitelist.hosts.some(host => url.host === host)) { - return false - } - } else { - // no hosts provided - if (whitelist.disallowHostnames && !url.host.match(IS_NOT_HOSTNAME_REGEX)) { - return false +export const getErrorMessage = (err: unknown): string => { + if (err instanceof Error) return err.message + if (typeof err === 'string') return err + if (err) { + try { + return JSON.stringify(err) + } catch { + return String(err) } } - - // don't allow dynamic routes - if (isDynamicRoute(url)) { - return false - } - return true + return 'Unknown error' }