From 8a24e9ddac564a7402178c9b5ed5baf9f1c66888 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jind=C5=99ich=20B=C3=A4r?= Date: Wed, 2 Nov 2022 13:30:56 +0100 Subject: [PATCH 1/3] fix: concurrency option override order --- packages/basic-crawler/src/internals/basic-crawler.ts | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/packages/basic-crawler/src/internals/basic-crawler.ts b/packages/basic-crawler/src/internals/basic-crawler.ts index 5c9e49033fcb..2bd4d801aeec 100644 --- a/packages/basic-crawler/src/internals/basic-crawler.ts +++ b/packages/basic-crawler/src/internals/basic-crawler.ts @@ -533,8 +533,8 @@ export class BasicCrawler = { - minConcurrency, - maxConcurrency, + minConcurrency: minConcurrency ?? autoscaledPoolOptions?.minConcurrency, + maxConcurrency: maxConcurrency ?? autoscaledPoolOptions?.maxConcurrency, maxTasksPerMinute: maxRequestsPerMinute ?? autoscaledPoolOptions?.maxTasksPerMinute, runTaskFunction: this._runTaskFunction.bind(this), isTaskReadyFunction: async () => { From fd858e6f555e5038b4336001d6ed24d8d74a97ae Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jind=C5=99ich=20B=C3=A4r?= Date: Wed, 2 Nov 2022 20:30:13 +0100 Subject: [PATCH 2/3] feat: test for short/full length scale options --- test/core/crawlers/basic_crawler.test.ts | 56 ++++++++++++++++++++++++ 1 file changed, 56 insertions(+) diff --git a/test/core/crawlers/basic_crawler.test.ts b/test/core/crawlers/basic_crawler.test.ts index b44b4e5fa42c..8f0cc302b215 100644 --- a/test/core/crawlers/basic_crawler.test.ts +++ b/test/core/crawlers/basic_crawler.test.ts @@ -91,6 +91,62 @@ describe('BasicCrawler', () => { expect(await requestList.isEmpty()).toBe(true); }); + test('should correctly combine shorthand and full length options', async () => { + const shorthandOptions = { + options: { + minConcurrency: 123, + maxConcurrency: 456, + maxRequestsPerMinute: 789, + }, + compare: { + _minConcurrency: 123, + _maxConcurrency: 456, + maxTasksPerMinute: 789, + }, + }; + + const autoscaledPoolOptions = { + options: { + minConcurrency: 16, + maxConcurrency: 32, + maxTasksPerMinute: 64, + }, + compare: { + _minConcurrency: 16, + _maxConcurrency: 32, + maxTasksPerMinute: 64, + }, + }; + + const requestList = await RequestList.open(null, []); + const requestHandler = async () => {}; + + let crawler = new BasicCrawler({ + requestList, + requestHandler, + ...shorthandOptions.options, + }); + await crawler.run(); + expect(crawler.autoscaledPool).toMatchObject(shorthandOptions.compare); + + crawler = new BasicCrawler({ + requestList, + requestHandler, + autoscaledPoolOptions: autoscaledPoolOptions.options, + }); + await crawler.run(); + expect(crawler.autoscaledPool).toMatchObject(autoscaledPoolOptions.compare); + + crawler = new BasicCrawler({ + requestList, + requestHandler, + ...shorthandOptions.options, + autoscaledPoolOptions: autoscaledPoolOptions.options, + }); + await crawler.run(); + expect(crawler.autoscaledPool).toMatchObject(shorthandOptions.compare); + }); + test('auto-saved state object', async () => { const sources = [...Array(50).keys()].map((index) => ({ url: `https://example.com/${index}` })); const sourcesCopy = JSON.parse(JSON.stringify(sources)); From ac9b0bc8dfc42ecd81f562200bf66f0f967d1b16 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jind=C5=99ich=20B=C3=A4r?= Date: Wed, 2 Nov 2022 20:37:48 +0100 Subject: [PATCH 3/3] feat: test with mocking AutoscaledPool constructor --- test/core/crawlers/basic_crawler.test.ts | 57 +++++++++++++----------- 1 file changed, 31 insertions(+), 26 deletions(-) diff --git a/test/core/crawlers/basic_crawler.test.ts b/test/core/crawlers/basic_crawler.test.ts index 8f0cc302b215..6e831957fe09 100644 --- a/test/core/crawlers/basic_crawler.test.ts +++ b/test/core/crawlers/basic_crawler.test.ts @@ -18,12 +18,24 @@ import { CriticalError, MissingRouteError, } from '@crawlee/basic'; +import { + AutoscaledPool, +} from '@crawlee/core'; import express from 'express'; import type { Dictionary } from '@crawlee/utils'; import { sleep } from '@crawlee/utils'; import { MemoryStorageEmulator } from 'test/shared/MemoryStorageEmulator'; import { startExpressAppPromise } from '../../shared/_helper'; +jest.mock('@crawlee/core', () => { + const originalModule = jest.requireActual('@crawlee/core'); + const AutoscaledPoolMockConstructor = jest.fn((...args) => new originalModule.AutoscaledPool(...args)); + return { + ...originalModule, + AutoscaledPool: AutoscaledPoolMockConstructor, + }; +}); + describe('BasicCrawler', () => { let logLevel: number; const localStorageEmulator = new MemoryStorageEmulator(); @@ -99,52 +111,45 @@ describe('BasicCrawler', () => { maxRequestsPerMinute: 789, }, compare: { - _minConcurrency: 123, - _maxConcurrency: 456, + minConcurrency: 123, + maxConcurrency: 456, maxTasksPerMinute: 789, }, }; const autoscaledPoolOptions = { - options: { - minConcurrency: 16, - maxConcurrency: 32, - maxTasksPerMinute: 64, - }, - compare: { - _minConcurrency: 16, - _maxConcurrency: 32, - maxTasksPerMinute: 64, - }, + minConcurrency: 16, + maxConcurrency: 32, + maxTasksPerMinute: 64, }; const requestList = await RequestList.open(null, []); const requestHandler = async () => {}; - let crawler = new BasicCrawler({ + await (new BasicCrawler({ requestList, requestHandler, ...shorthandOptions.options, - }); - await crawler.run(); - expect(crawler.autoscaledPool).toMatchObject(shorthandOptions.compare); + })).run(); - crawler = new BasicCrawler({ + expect((AutoscaledPool as any).mock.calls[0][0]).toMatchObject(shorthandOptions.compare); + + await (new BasicCrawler({ requestList, requestHandler, - autoscaledPoolOptions: autoscaledPoolOptions.options, - }); - await crawler.run(); - expect(crawler.autoscaledPool).toMatchObject(autoscaledPoolOptions.compare); + autoscaledPoolOptions, + })).run(); - crawler = new BasicCrawler({ + expect((AutoscaledPool as any).mock.calls[1][0]).toMatchObject(autoscaledPoolOptions); + + await (new BasicCrawler({ requestList, requestHandler, ...shorthandOptions.options, - autoscaledPoolOptions: autoscaledPoolOptions.options, - }); - await crawler.run(); - expect(crawler.autoscaledPool).toMatchObject(shorthandOptions.compare); + autoscaledPoolOptions, + })).run(); + + expect((AutoscaledPool as any).mock.calls[2][0]).toMatchObject(shorthandOptions.compare); }); test('auto-saved state object', async () => {