Skip to content

Commit

Permalink
feat: add allowedCharactersInPath whitelist
Browse files Browse the repository at this point in the history
  • Loading branch information
zhongliang02 committed Jan 14, 2025
1 parent 56f5d3a commit 9d6f03b
Show file tree
Hide file tree
Showing 6 changed files with 245 additions and 36 deletions.
2 changes: 1 addition & 1 deletion packages/validators/package.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"name": "@opengovsg/starter-kitty-validators",
"version": "1.2.11",
"version": "1.2.12",
"main": "./dist/index.js",
"types": "./dist/index.d.ts",
"exports": {
Expand Down
131 changes: 125 additions & 6 deletions packages/validators/src/__tests__/url.test.ts
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
import { describe, expect, it } from 'vitest'
import { ZodError } from 'zod'

import { OptionsError } from '@/common/errors'
import { createUrlSchema, RelUrlValidator, UrlValidator } from '@/index'
import { UrlValidationError } from '@/url/errors'
import { defaultAllowedChars } from '@/url/options'

describe('UrlValidator with default options', () => {
const validator = new UrlValidator()
Expand All @@ -12,6 +14,11 @@ describe('UrlValidator with default options', () => {
expect(url).toBeInstanceOf(URL)
})

it('should parse a valid URL', () => {
const url = validator.parse('https://example.com/path?query=1#hash')
expect(url).toBeInstanceOf(URL)
})

it('should throw an error when the protocol is not http or https', () => {
expect(() => validator.parse('ftp://example.com')).toThrow(UrlValidationError)
})
Expand All @@ -30,10 +37,11 @@ describe('UrlValidator with default options', () => {
expect(() => validator.parse('https://example.com/[[...slug]]')).toThrow(UrlValidationError)
expect(() => validator.parse('https://example.com/[[slug]]')).toThrow(UrlValidationError)
expect(() => validator.parse('https://example.com/[x]?x=1')).toThrow(UrlValidationError)
expect(() => validator.parse('https://example.com/path/(.)part')).toThrow(UrlValidationError)
})

it('should prevent XSS attacks', () => {
expect(() => validator.parse('javascript&colonalert(/xss/)').protocol).toThrow(UrlValidationError)
expect(() => validator.parse('javascript&colonalert(/xss/)')).toThrow(UrlValidationError)
expect(() => validator.parse('javascript:alert(/xss/)')).toThrow(UrlValidationError)
})

Expand All @@ -47,19 +55,20 @@ describe('UrlValidator with default options', () => {
describe('UrlValidator with custom protocol whitelist', () => {
const validator = new UrlValidator({
whitelist: {
protocols: ['http', 'https', 'mailto'],
protocols: ['http', 'https'],
allowedCharactersInPath: '', // blank to allow all characters for tests
},
})

it('should not throw an error when the protocol on the whitelist', () => {
expect(() => validator.parse('https://example.com')).not.toThrow()
expect(() => validator.parse('http://example.com')).not.toThrow()
expect(() => validator.parse('mailto:contact@example.com')).not.toThrow()
})

it('should throw an error when the protocol is not on the whitelist', () => {
expect(() => validator.parse('ftp://example.com')).toThrow(UrlValidationError)
expect(() => validator.parse('javascript:alert()')).toThrow(UrlValidationError)
expect(() => validator.parse('mailto:contact@example.com')).toThrow(UrlValidationError)
})
})

Expand All @@ -68,6 +77,7 @@ describe('UrlValidator with custom host whitelist', () => {
whitelist: {
protocols: ['http', 'https'],
hosts: ['example.com'],
allowedCharactersInPath: '', // blank to allow all characters
},
})

Expand All @@ -85,13 +95,19 @@ describe('UrlValidator with disallowHostnames', () => {
whitelist: {
protocols: ['http', 'https'],
disallowHostnames: true,
allowedCharactersInPath: '', // blank to allow all characters for tests
},
})

it('should not throw an error with a proper domain', () => {
expect(() => validator.parse('https://example.com')).not.toThrow()
})

it('should not throw an error with a proper domain', () => {
const url = validator.parse('https://example.com/path?query=1#hash')
expect(url).toBeInstanceOf(URL)
})

it('should throw an error with a hostname', () => {
expect(() => validator.parse('https://tld')).toThrow(UrlValidationError)
expect(() => validator.parse('https://.tld')).toThrow(UrlValidationError)
Expand All @@ -110,13 +126,19 @@ describe('UrlValidator with both hosts and disallowHostnames', () => {
protocols: ['http', 'https'],
hosts: ['example.com', 'localhost'],
disallowHostnames: true,
allowedCharactersInPath: '', // blank to allow all characters for tests
},
})

it('should not throw an error when the host is on the whitelist', () => {
expect(() => validator.parse('https://example.com')).not.toThrow()
})

it('should not throw an error when the host is on the whitelist', () => {
const url = validator.parse('https://example.com/path?query=1#hash')
expect(url).toBeInstanceOf(URL)
})

it('should ignore the disallowHostnames option', () => {
expect(() => validator.parse('https://localhost')).not.toThrow()
})
Expand All @@ -125,6 +147,10 @@ describe('UrlValidator with both hosts and disallowHostnames', () => {
describe('UrlValidator with base URL', () => {
const validator = new UrlValidator({
baseOrigin: 'https://example.com',
whitelist: {
protocols: ['http', 'https'], // default
allowedCharactersInPath: '', // blank to allow all characters for tests
},
})

it('should parse a valid relative URL', () => {
Expand Down Expand Up @@ -161,6 +187,82 @@ describe('UrlValidator with base URL', () => {
UrlValidationError,
)
})

it('should not allow Next.js dynamic routes', () => {
expect(() => validator.parse('/[[x]]http:example.com/(.)[y]/?x&y')).toThrow(UrlValidationError)
})
})

describe('UrlValidator with a whitelist of allowed characters in the path', () => {
const validator = new UrlValidator({
whitelist: {
protocols: ['http', 'https'],
allowedCharactersInPath: 'abc123/',
},
})
it('should parse a valid URL', () => {
const url = validator.parse('https://example.com/abc123')
expect(url).toBeInstanceOf(URL)
})

it('should parse a valid URL with query string and hash', () => {
const url = validator.parse('https://example.com/abc123?q=1#hash')
expect(url).toBeInstanceOf(URL)
})

it('should throw an error when the path contains disallowed characters', () => {
expect(() => validator.parse('https://example.com/abc1234')).toThrow(UrlValidationError)
})
})

describe('UrlValidator with the default whitelist', () => {
const validator = new UrlValidator({})

it('should parse a valid URL', () => {
const url = validator.parse('https://example.com')
expect(url).toBeInstanceOf(URL)
})

it('should parse a valid URL', () => {
const url = validator.parse('https://example.com/path?query=1#hash')
expect(url).toBeInstanceOf(URL)
})

it('should throw an error when the path contains disallowed characters', () => {
expect(() => validator.parse('https://example.com/1@23')).toThrow(UrlValidationError)
})

it('should throw an error when the protocol is not http or https', () => {
expect(() => validator.parse('ftp://example.com')).toThrow(UrlValidationError)
})

it('should allow any host when no host whitelist is provided', () => {
expect(() => validator.parse('https://open.gov.sg')).not.toThrow()
})

// https://url.spec.whatwg.org/#missing-scheme-non-relative-url
it('should throw an error when missing a scheme and no base URL or base URL', () => {
expect(() => validator.parse('example.com')).toThrow(UrlValidationError)
expect(() => validator.parse('/path')).toThrow(UrlValidationError)
})

it('should not allow Next.js dynamic routes', () => {
expect(() => validator.parse('https://example.com/[[...slug]]')).toThrow(UrlValidationError)
expect(() => validator.parse('https://example.com/[[slug]]')).toThrow(UrlValidationError)
expect(() => validator.parse('https://example.com/[x]?x=1')).toThrow(UrlValidationError)
expect(() => validator.parse('https://example.com/path/(.)part')).toThrow(UrlValidationError)
})

it('should prevent XSS attacks', () => {
expect(() => validator.parse('javascript&colonalert(/xss/)')).toThrow(UrlValidationError)
expect(() => validator.parse('javascript:alert(/xss/)')).toThrow(UrlValidationError)
})

it('should throw an error when given an invalid type', () => {
expect(() => validator.parse(123)).toThrow(UrlValidationError)
expect(() => validator.parse(undefined)).toThrow(UrlValidationError)
expect(() => validator.parse(['1', '2'])).toThrow(UrlValidationError)
})
})

describe('UrlValidator with invalid options', () => {
Expand All @@ -185,6 +287,11 @@ describe('RelUrlValidator with string origin', () => {
expect(url).toBeInstanceOf(URL)
})

it('should parse a valid absolute URL', () => {
const url = validator.parse('https://a.com/path?query=1#hash')
expect(url).toBeInstanceOf(URL)
})

it('should throw an error on invalid URL', () => {
expect(() => validator.parse('https://b.com/hello')).toThrow(UrlValidationError)
})
Expand Down Expand Up @@ -251,6 +358,7 @@ describe('UrlValidatorOptions.parsePathname', () => {

it('should throw an error when the path is a NextJS dynamic path', () => {
expect(() => validator.parsePathname('https://a.com/hello/[id]?id=3')).toThrow(UrlValidationError)
expect(() => validator.parsePathname('https://a.com/hello/(.)bye')).toThrow(UrlValidationError)
})

it('should fallback to fallbackUrl if it is provided', () => {
Expand All @@ -271,7 +379,17 @@ describe('createUrlSchema', () => {
protocols: ['http', 'https', 'mailto'],
},
})
expect(() => schema.parse('mailto:test@test.test')).not.toThrow()
expect(() => schema.parse('https://example.com')).not.toThrow()
})

it('should create a schema with custom options', () => {
const schema = createUrlSchema({
whitelist: {
protocols: ['http', 'https', 'mailto'],
allowedCharactersInPath: defaultAllowedChars + '@',
},
})
expect(() => schema.parse('mailto:contact@example.com')).not.toThrow()
})

it('should throw an error when the options are invalid', () => {
Expand All @@ -287,13 +405,14 @@ describe('createUrlSchema', () => {
protocols: ['http', 'https'],
hosts: ['example.com'],
disallowHostnames: true,
allowedCharactersInPath: defaultAllowedChars,
},
}),
).not.toThrow()
})

it('should reject relaative URLs when the base URL is not provided', () => {
it('should reject relative URLs when the base URL is not provided', () => {
const schema = createUrlSchema()
expect(() => schema.parse('/path')).toThrow(UrlValidationError)
expect(() => schema.parse('/path')).toThrow(ZodError)
})
})
3 changes: 2 additions & 1 deletion packages/validators/src/url/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ import { fromError } from 'zod-validation-error'

import { OptionsError } from '@/common/errors'
import { UrlValidationError } from '@/url/errors'
import { defaultOptions, optionsSchema, UrlValidatorOptions } from '@/url/options'
import { defaultAllowedChars, defaultOptions, optionsSchema, UrlValidatorOptions } from '@/url/options'
import { toSchema } from '@/url/schema'

/**
Expand Down Expand Up @@ -128,6 +128,7 @@ export class RelUrlValidator extends UrlValidator {
whitelist: {
protocols: ['http', 'https'],
hosts: [urlObject.host],
allowedCharactersInPath: defaultAllowedChars,
},
})
}
Expand Down
17 changes: 16 additions & 1 deletion packages/validators/src/url/options.ts
Original file line number Diff line number Diff line change
@@ -1,9 +1,16 @@
import { z } from 'zod'

export const lowercase = 'abcdefghijklmnopqrstuvwxyz'
export const uppercase = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
export const digits = '0123456789'
export const alnum = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789'
export const defaultAllowedChars = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789.-_/'

export const defaultOptions = {
whitelist: {
protocols: ['http', 'https'],
disallowHostnames: false,
allowedCharactersInPath: defaultAllowedChars,
},
}

Expand All @@ -28,6 +35,14 @@ export const whitelistSchema = z.object({
* @defaultValue false
*/
disallowHostnames: z.boolean().optional(),
/**
* Which characters are allowed in the URL path.
* Use empty string to allow all characters.
* If your protocol is mailto, you will need to include "\@" here.
*
* @defaultValue 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789.-_/'
*/
allowedCharactersInPath: z.string().optional().default(defaultAllowedChars),
})

/**
Expand Down Expand Up @@ -61,7 +76,7 @@ export interface UrlValidatorOptions {
*
* @public
*/
export type UrlValidatorWhitelist = z.infer<typeof whitelistSchema>
export type UrlValidatorWhitelist = z.input<typeof whitelistSchema>

export const optionsSchema = z.object({
baseOrigin: z
Expand Down
56 changes: 52 additions & 4 deletions packages/validators/src/url/schema.ts
Original file line number Diff line number Diff line change
@@ -1,13 +1,61 @@
import { z } from 'zod'

import { ParsedUrlValidatorOptions } from '@/url/options'
import { isSafeUrl, resolveRelativeUrl } from '@/url/utils'
import { createAllowedCharsSchema, getErrorMessage, IS_NOT_HOSTNAME_REGEX, resolveRelativeUrl } from '@/url/utils'

import { isDynamicRoute } from './nextjs-dynamic-route'

export const toSchema = (options: ParsedUrlValidatorOptions) => {
const { whitelist } = options

// create and cache this zod schema beforehand
const zAllowedCharsString = createAllowedCharsSchema(whitelist.allowedCharactersInPath)

return z
.string()
.transform(url => resolveRelativeUrl(url, options.baseOrigin))
.refine(url => isSafeUrl(url, options.whitelist), {
message: 'Unsafe URL',
.transform((url, ctx) => {
try {
return resolveRelativeUrl(url, options.baseOrigin)
} catch (error) {
ctx.addIssue({
code: z.ZodIssueCode.custom,
message: getErrorMessage(error),
})
return z.NEVER
}
})
.refine(
url => {
// only allow whitelisted protocols
if (!whitelist.protocols.some(protocol => url.protocol === `${protocol}:`)) {
return false
}
// only allow whitelisted characters in the path
const onlyHasAllowedChars = zAllowedCharsString.safeParse(url.pathname).success
if (!onlyHasAllowedChars) {
return false
}

if (whitelist.hosts) {
// only allow whitelisted hosts
if (!whitelist.hosts.some(host => url.host === host)) {
return false
}
} else {
// no hosts provided
if (whitelist.disallowHostnames && !url.host.match(IS_NOT_HOSTNAME_REGEX)) {
return false
}
}

// don't allow dynamic routes
if (isDynamicRoute(url)) {
return false
}
return true
},
{
message: 'Unsafe URL',
},
)
}
Loading

0 comments on commit 9d6f03b

Please sign in to comment.