-
Notifications
You must be signed in to change notification settings - Fork 736
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
feat: allow configuring crawler statistics #2213
Changes from 11 commits
6b9cd5a
99c1880
9f4e015
0adb7d6
54a752f
1ca2536
8f3a10e
8da3b0c
5f8c3b6
a4eca60
a611e07
161c185
fc0b44d
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change | ||||||
---|---|---|---|---|---|---|---|---|
|
@@ -39,6 +39,13 @@ const errorTrackerConfig = { | |||||||
showFullMessage: false, | ||||||||
}; | ||||||||
|
||||||||
/** | ||||||||
* Override persistence-related options provided in {@apilink StatisticsOptions} for a single method call | ||||||||
*/ | ||||||||
interface PersistenceOptionsOverrides { | ||||||||
enablePersistence?: boolean; | ||||||||
} | ||||||||
|
||||||||
/** | ||||||||
* The statistics class provides an interface to collecting and logging run | ||||||||
* statistics for requests. | ||||||||
|
@@ -92,6 +99,7 @@ export class Statistics { | |||||||
private instanceStart!: number; | ||||||||
private logInterval: unknown; | ||||||||
private events: EventManager; | ||||||||
private enablePersistence: boolean; | ||||||||
|
||||||||
/** | ||||||||
* @internal | ||||||||
|
@@ -102,13 +110,15 @@ export class Statistics { | |||||||
logMessage: ow.optional.string, | ||||||||
keyValueStore: ow.optional.object, | ||||||||
config: ow.optional.object, | ||||||||
enablePersistence: ow.optional.boolean, | ||||||||
})); | ||||||||
|
||||||||
const { | ||||||||
logIntervalSecs = 60, | ||||||||
logMessage = 'Statistics', | ||||||||
keyValueStore, | ||||||||
config = Configuration.getGlobalConfig(), | ||||||||
enablePersistence = true, | ||||||||
} = options; | ||||||||
|
||||||||
this.logIntervalMillis = logIntervalSecs * 1000; | ||||||||
|
@@ -117,6 +127,7 @@ export class Statistics { | |||||||
this.listener = this.persistState.bind(this); | ||||||||
this.events = config.getEventManager(); | ||||||||
this.config = config; | ||||||||
this.enablePersistence = enablePersistence; | ||||||||
|
||||||||
// initialize by "resetting" | ||||||||
this.reset(); | ||||||||
|
@@ -155,7 +166,10 @@ export class Statistics { | |||||||
this._teardown(); | ||||||||
} | ||||||||
|
||||||||
async resetStore() { | ||||||||
async resetStore(opts?: PersistenceOptionsOverrides) { | ||||||||
if (!this.enablePersistence && !opts?.enablePersistence) { | ||||||||
return; | ||||||||
} | ||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. i like to keep one blank line after control statements like
Suggested change
|
||||||||
if (!this.keyValueStore) { | ||||||||
return; | ||||||||
} | ||||||||
|
@@ -247,13 +261,14 @@ export class Statistics { | |||||||
async startCapturing() { | ||||||||
this.keyValueStore ??= await KeyValueStore.open(null, { config: this.config }); | ||||||||
|
||||||||
await this._maybeLoadStatistics(); | ||||||||
|
||||||||
if (this.state.crawlerStartedAt === null) { | ||||||||
this.state.crawlerStartedAt = new Date(); | ||||||||
} | ||||||||
|
||||||||
this.events.on(EventType.PERSIST_STATE, this.listener); | ||||||||
if (this.enablePersistence) { | ||||||||
await this._maybeLoadStatistics(); | ||||||||
this.events.on(EventType.PERSIST_STATE, this.listener); | ||||||||
} | ||||||||
|
||||||||
this.logInterval = setInterval(() => { | ||||||||
this.log.info(this.logMessage, { | ||||||||
|
@@ -285,7 +300,10 @@ export class Statistics { | |||||||
/** | ||||||||
* Persist internal state to the key value store | ||||||||
*/ | ||||||||
async persistState() { | ||||||||
async persistState(opts?: PersistenceOptionsOverrides) { | ||||||||
if (!this.enablePersistence && !opts?.enablePersistence) { | ||||||||
return; | ||||||||
} | ||||||||
// this might be called before startCapturing was called without using await, should not crash | ||||||||
if (!this.keyValueStore) { | ||||||||
return; | ||||||||
|
@@ -382,11 +400,39 @@ export class Statistics { | |||||||
} | ||||||||
} | ||||||||
|
||||||||
interface StatisticsOptions { | ||||||||
/** | ||||||||
* Configuration for the {@apilink Statistics} instance used by the crawler | ||||||||
*/ | ||||||||
export interface StatisticsOptions { | ||||||||
/** | ||||||||
* Interval in seconds to log the current statistics | ||||||||
* @default 60 | ||||||||
*/ | ||||||||
logIntervalSecs?: number; | ||||||||
|
||||||||
/** | ||||||||
* Message to log with the current statistics | ||||||||
* @default 'Statistics' | ||||||||
*/ | ||||||||
logMessage?: string; | ||||||||
|
||||||||
/** | ||||||||
* Key value store instance to persist the statistics. | ||||||||
* If not provided, the default one will be used when capturing starts | ||||||||
*/ | ||||||||
keyValueStore?: KeyValueStore; | ||||||||
|
||||||||
/** | ||||||||
* Configuration instance to use | ||||||||
* @default Configuration.getGlobalConfig() | ||||||||
*/ | ||||||||
config?: Configuration; | ||||||||
|
||||||||
/** | ||||||||
* Use this flag to disable or enable periodic statistics persistence to key value store. | ||||||||
* @default true | ||||||||
*/ | ||||||||
enablePersistence?: boolean; | ||||||||
} | ||||||||
|
||||||||
/** | ||||||||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -58,6 +58,19 @@ export interface SessionPoolOptions { | |
|
||
/** @internal */ | ||
log?: Log; | ||
|
||
/** | ||
* If set to true, `SessionPool` will periodically persist its state to the key-value store. | ||
* @default true | ||
*/ | ||
enablePersistence?: boolean; | ||
} | ||
|
||
/** | ||
* Override persistence-related options provided in {@apilink SessionPoolOptions} for a single method call | ||
*/ | ||
interface PersistenceOptionsOverrides { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I wanted to have a single interface for both since it's exactly the same. Let's export it and reuse it instead of creating two local implementations. I also don't like the name very much, will try to come up with something else, I don't have any ideas right now :] There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I've restructured it a bit, so that the same interface is used in the constructor and I'm also not sure where to actually put this interface in the project hierarchy. I don't see a matching module for utilities/types shared by both statistics and session pool |
||
enablePersistence?: boolean; | ||
} | ||
|
||
/** | ||
|
@@ -138,6 +151,8 @@ export class SessionPool extends EventEmitter { | |
protected _listener!: () => Promise<void>; | ||
protected events: EventManager; | ||
protected readonly blockedStatusCodes: number[]; | ||
protected enablePersistence: boolean; | ||
protected isInitialized = false; | ||
|
||
/** | ||
* @internal | ||
|
@@ -153,6 +168,7 @@ export class SessionPool extends EventEmitter { | |
sessionOptions: ow.optional.object, | ||
blockedStatusCodes: ow.optional.array.ofType(ow.number), | ||
log: ow.optional.object, | ||
enablePersistence: ow.optional.boolean, | ||
})); | ||
|
||
const { | ||
|
@@ -163,12 +179,14 @@ export class SessionPool extends EventEmitter { | |
sessionOptions = {}, | ||
blockedStatusCodes = [401, 403, 429], | ||
log = defaultLog, | ||
enablePersistence = true, | ||
} = options; | ||
|
||
this.config = config; | ||
this.blockedStatusCodes = blockedStatusCodes; | ||
this.events = config.getEventManager(); | ||
this.log = log.child({ prefix: 'SessionPool' }); | ||
this.enablePersistence = enablePersistence; | ||
|
||
// Pool Configuration | ||
this.maxPoolSize = maxPoolSize; | ||
|
@@ -206,7 +224,12 @@ export class SessionPool extends EventEmitter { | |
* It is called automatically by the {@apilink SessionPool.open} function. | ||
*/ | ||
async initialize(): Promise<void> { | ||
if (this.isInitialized) return; | ||
this.keyValueStore = await KeyValueStore.open(this.persistStateKeyValueStoreId, { config: this.config }); | ||
if (!this.enablePersistence) { | ||
this.isInitialized = true; | ||
return; | ||
} | ||
|
||
if (!this.persistStateKeyValueStoreId) { | ||
// eslint-disable-next-line max-len | ||
|
@@ -219,6 +242,7 @@ export class SessionPool extends EventEmitter { | |
this._listener = this.persistState.bind(this); | ||
|
||
this.events.on(EventType.PERSIST_STATE, this._listener); | ||
this.isInitialized = true; | ||
} | ||
|
||
/** | ||
|
@@ -290,7 +314,10 @@ export class SessionPool extends EventEmitter { | |
return this._createSession(); | ||
} | ||
|
||
async resetStore() { | ||
async resetStore(opts?: PersistenceOptionsOverrides) { | ||
if (!this.enablePersistence && !opts?.enablePersistence) { | ||
return; | ||
} | ||
await this.keyValueStore?.setValue(this.persistStateKey, null); | ||
} | ||
|
||
|
@@ -310,7 +337,10 @@ export class SessionPool extends EventEmitter { | |
* Persists the current state of the `SessionPool` into the default {@apilink KeyValueStore}. | ||
* The state is persisted automatically in regular intervals. | ||
*/ | ||
async persistState(): Promise<void> { | ||
async persistState(opts?: PersistenceOptionsOverrides): Promise<void> { | ||
if (!this.enablePersistence && !opts?.enablePersistence) { | ||
return; | ||
} | ||
this.log.debug('Persisting state', { | ||
persistStateKeyValueStoreId: this.persistStateKeyValueStoreId, | ||
persistStateKey: this.persistStateKey, | ||
|
@@ -331,7 +361,7 @@ export class SessionPool extends EventEmitter { | |
* SessionPool should not work before initialization. | ||
*/ | ||
protected _throwIfNotInitialized() { | ||
if (!this._listener) throw new Error('SessionPool is not initialized.'); | ||
if (!this.isInitialized) throw new Error('SessionPool is not initialized.'); | ||
} | ||
|
||
/** | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
lets not use abbreviations unless needed (e.g. when there is a collision)