diff --git a/index-sources.js b/index-sources.js index cc85ec7..52e5854 100644 --- a/index-sources.js +++ b/index-sources.js @@ -188,6 +188,7 @@ export default { 'https://fastify.dev', 'https://nestjs.com', 'https://docs.drone.io', + 'https://www.jenkins.io', ], 'magazines': [ diff --git a/package.json b/package.json index 2e72a09..2299854 100644 --- a/package.json +++ b/package.json @@ -8,7 +8,7 @@ "type": "module", "scripts": { "test": "node --test", - "cover": "c8 --all node --test", + "cover": "c8 --reporter lcov --all node --test", "crawl:auto": "node -r dotenv/config src/crawl-auto.js", "crawl:roots": "node -r dotenv/config src/crawl-roots.js", "crawl:roots:all": "node -r dotenv/config src/crawl-roots.js --all", diff --git a/src/helpers/__tests__/urlHelpers.test.js b/src/helpers/__tests__/urlHelpers.test.js index badedb7..3b4cab2 100644 --- a/src/helpers/__tests__/urlHelpers.test.js +++ b/src/helpers/__tests__/urlHelpers.test.js @@ -1,7 +1,7 @@ import { describe, it } from 'node:test'; import assert from 'node:assert'; -import { hrefSeemsUseful } from '../urlHelpers.js'; +import { hrefSeemsUseful, isForbidden } from '../urlHelpers.js'; describe('hrefSeemsUseful', () => { it('should return true for relative links', () => { @@ -48,3 +48,40 @@ describe('hrefSeemsUseful', () => { assert.strictEqual(hrefSeemsUseful('data:foo/bar'), false); }); }); + +describe('isForbidden', () => { + describe('curl.se', () => { + it('should allow home page', () => { + assert.strictEqual(isForbidden('https://curl.se/'), false); + assert.strictEqual(isForbidden('https://curl.se'), false); + }); + it('should allow docs', () => { + assert.strictEqual(isForbidden('https://curl.se/docs'), false); + assert.strictEqual(isForbidden('https://curl.se/docs/'), false); + assert.strictEqual(isForbidden('https://curl.se/docs/foo'), false); + }); + + it('should deny else', () => { + assert.strictEqual(isForbidden('https://curl.se/foo'), true); + assert.strictEqual(isForbidden('https://curl.se/foo/bar'), true); + }); + }); + + describe('jenkins.io', () => { + it('should allow /doc', () => { + assert.strictEqual(isForbidden('https://www.jenkins.io/doc'), false); + assert.strictEqual(isForbidden('https://www.jenkins.io/doc/'), false); + assert.strictEqual(isForbidden('https://www.jenkins.io/doc/foo'), false); + }); + it('should allow /security', () => { + assert.strictEqual(isForbidden('https://www.jenkins.io/security'), false); + assert.strictEqual(isForbidden('https://www.jenkins.io/security/'), false); + assert.strictEqual(isForbidden('https://www.jenkins.io/security/foo'), false); + }); + it('should deny /projects', () => { + assert.strictEqual(isForbidden('https://www.jenkins.io/projects'), true); + assert.strictEqual(isForbidden('https://www.jenkins.io/projects/'), true); + assert.strictEqual(isForbidden('https://www.jenkins.io/projects/foo'), true); + }); + }); +}); diff --git a/src/helpers/filters/__tests__/genericFilter.test.js b/src/helpers/filters/__tests__/genericFilter.test.js new file mode 100644 index 0000000..627abde --- /dev/null +++ b/src/helpers/filters/__tests__/genericFilter.test.js @@ -0,0 +1,37 @@ +import { describe, it } from 'node:test'; +import assert from 'node:assert'; +import { makeAllowList } from '../genericFilter.js'; + +describe('makeAllowList', () => { + describe('example.com', () => { + let tester; + it('should make a function', () => { + tester = makeAllowList('example.com', [ + '/docs', + '/blog', + ]); + assert.strictEqual(typeof tester, 'function'); + }); + + it('should abstain when different host', () => { + const result = tester('https://example.org'); + assert.strictEqual(result, false); + }); + + it('should deny when different pathname', () => { + const result = tester('https://example.com/about'); + assert.strictEqual(result, true); + }); + + it('should allow when allowed pathname', () => { + const result = tester('https://example.com/blog'); + assert.strictEqual(result, false); + }); + + it('should allow when pathname is long', () => { + const result = tester('https://example.com/blog/2021/12/31'); + assert.strictEqual(result, false); + }); + + }); +}); diff --git a/src/helpers/filters/curlFilter.js b/src/helpers/filters/curlFilter.js deleted file mode 100644 index c7b77aa..0000000 --- a/src/helpers/filters/curlFilter.js +++ /dev/null @@ -1,8 +0,0 @@ -export const curlFilter = (url) => { - if (url.startsWith('https://curl.se/') === false) return false; - - if (url === 'https://curl.se/' || url === 'https://curl.se') return false; - if (url.includes('/docs')) return false; - - return true; -}; diff --git a/src/helpers/filters/genericFilter.js b/src/helpers/filters/genericFilter.js index 65a5d20..ace749a 100644 --- a/src/helpers/filters/genericFilter.js +++ b/src/helpers/filters/genericFilter.js @@ -39,3 +39,18 @@ export const hrefSeemsUseful = (href) => { return true; }; + +export const makeAllowList = (hostname, allowedPathsInclude) => (url) => { + // Not from the same hostname + if (url.startsWith(`https://${hostname}`) === false) return false; + + // Allow home page + if (url === `https://${hostname}/` || url === `https://${hostname}`) return false; + + for (const path of allowedPathsInclude) { + if (url.includes(path)) return false; + } + + // Deny + return true; +}; diff --git a/src/helpers/filters/mongoFilter.js b/src/helpers/filters/mongoFilter.js deleted file mode 100644 index 8e2a695..0000000 --- a/src/helpers/filters/mongoFilter.js +++ /dev/null @@ -1,8 +0,0 @@ -export const mongoFilter = (url) => { - if (url.startsWith('https://www.mongodb.com/') === false) return false; - - if (url === 'https://www.mongodb.com/' || url === 'https://www.mongodb.com') return false; - if (url.startsWith('https://www.mongodb.com/docs')) return false; - - return true; -}; diff --git a/src/helpers/filters/postgresFilter.js b/src/helpers/filters/postgresFilter.js deleted file mode 100644 index da86736..0000000 --- a/src/helpers/filters/postgresFilter.js +++ /dev/null @@ -1,8 +0,0 @@ -export const postgresFilter = (url) => { - if (url.startsWith('https://www.postgresql.org') === false) return false; - - if (url === 'https://www.postgresql.org/' || url === 'https://www.postgresql.org') return false; - if (url.includes('docs')) return false; - - return true; -}; diff --git a/src/helpers/urlHelpers.js b/src/helpers/urlHelpers.js index db221c1..2df76c4 100644 --- a/src/helpers/urlHelpers.js +++ b/src/helpers/urlHelpers.js @@ -1,12 +1,26 @@ import { w3Filter } from './filters/w3filter.js'; import { githubFilter } from './filters/githubFilter.js'; import { phpFilter } from './filters/phpFilter.js'; -import { mongoFilter } from './filters/mongoFilter.js'; -import { postgresFilter } from './filters/postgresFilter.js'; -import { curlFilter } from './filters/curlFilter.js'; - +import { makeAllowList } from './filters/genericFilter.js'; export { hrefSeemsUseful } from './filters/genericFilter.js'; +const curlFilter = makeAllowList('curl.se', [ + '/docs', +]); + +const mongoFilter = makeAllowList('www.mongodb.com', [ + '/docs', +]); + +const postgresFilter = makeAllowList('www.postgresql.org', [ + '/docs', +]); + +const jenkinsFilter = makeAllowList('www.jenkins.io', [ + '/doc', + '/security' +]); + const domainFilters = [ w3Filter, githubFilter, @@ -14,6 +28,7 @@ const domainFilters = [ mongoFilter, postgresFilter, curlFilter, + jenkinsFilter, ]; export const isForbidden = (url) => {