Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: Add exponential and server-mediated backoff on retries #56

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
33 changes: 28 additions & 5 deletions ts/src/config.ts
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@

import {AuthenticationConfig, Common, ServiceConfig} from '../third_party/types/common-types';

const parseDuration: (str: string) => number = require('parse-duration');
const common: Common = require('@google-cloud/common');
const extend = require('extend');

Expand Down Expand Up @@ -77,9 +78,25 @@ export interface Config extends AuthenticationConfig {
// stack depth may increase overhead of profiling.
heapMaxStackDepth?: number;

// Time to wait before trying to create a profile again if profile creation
// fails.
backoffMillis?: number;
// On first error during profile creation, if the backoff is not specified
// by the server response, then profiler will between 0 and
// initialBackoffMillis before asking the server to create a profile again.
// After a successful profile creation, the backoff will be reset to
// initialExpBackoffMillis.
initialBackoffMillis?: number;

// If the backoff is not specified by the server response, then profiler will
// wait at most maxBackoffMillis before asking server to create a profile
// again.
maxBackoffMillis?: number;

// On each consecutive error in profile creation, the maximum backoff will
// increase by this factor. The backoff will be random value selected
// from a uniform distribution between 0 and the maximum backoff.
backoffMultiplier?: number;

// Server-specified backoffs will be capped at backoffLimitMillis.
backoffLimitMillis?: number;
}

// Interface for an initialized config.
Expand All @@ -94,7 +111,10 @@ export interface ProfilerConfig extends AuthenticationConfig {
timeIntervalMicros: number;
heapIntervalBytes: number;
heapMaxStackDepth: number;
backoffMillis: number;
initialBackoffMillis: number;
maxBackoffMillis: number;
backoffMultiplier: number;
backoffLimitMillis: number;
}

// Default values for configuration for a profiler.
Expand All @@ -106,5 +126,8 @@ export const defaultConfig = {
timeIntervalMicros: 1000,
heapIntervalBytes: 512 * 1024,
heapMaxStackDepth: 64,
backoffMillis: 5 * 60 * 1000
initialBackoffMillis: 1000,
maxBackoffMillis: parseDuration('1h'),
backoffMultiplier: 1.3,
backoffLimitMillis: parseDuration('7d'), // 7 days
};
124 changes: 95 additions & 29 deletions ts/src/profiler.ts
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
* limitations under the License.
*/

import * as http from 'http';
import * as path from 'path';
import * as pify from 'pify';
import * as zlib from 'zlib';
Expand Down Expand Up @@ -72,6 +73,21 @@ export interface RequestProfile {
labels?: {instance?: string};
}

interface ServerBackoffResponse {
statusMessage: string;
body: {details: {retryDelay: string}};
}

/**
* Returns true if response indicates a backoff.
*/
// tslint:disable-next-line: no-any
function isServerBackoffResponse(response: any):
response is ServerBackoffResponse {
return response.body && response.body.details &&
typeof response.body.details.retryDelay === 'string';
}

/**
* @return true if an deployment is a Deployment and false otherwise.
*/
Expand Down Expand Up @@ -102,7 +118,7 @@ function isRequestProfile(prof: any): prof is RequestProfile {
}

/**
* Returns true if response has statusCode.
* @return - true if response has statusCode and false otherwise.
*/
// tslint:disable-next-line: no-any
function hasHttpStatusCode(response: any):
Expand All @@ -125,6 +141,50 @@ async function profileBytes(p: perftools.profiles.IProfile): Promise<string> {
return gzBuf.toString('base64');
}

/**
* Error constructed from http server response which indicates backoff.
*/
class BackoffResponseError extends Error {
backoffMillis: number;
constructor(response: ServerBackoffResponse) {
super(response.statusMessage);
this.backoffMillis = parseDuration(response.body.details.retryDelay);
}
}

/**
* @return - true if error is a BackoffResponseError and false otherwise
*/
function isBackoffResponseError(err: Error): err is BackoffResponseError {
// tslint:disable-next-line: no-any
return typeof (err as any).backoffMillis === 'number';
}

/**
* Class which tracks how long to wait before the next retry and can be
* used to get this backoff.
*/
export class Retryer {
private nextMaxBackoffMillis: number;
constructor(
readonly initialBackoffMillis: number, readonly maxBackoffMillis: number,
readonly backoffMultiplier: number) {
this.nextMaxBackoffMillis = this.initialBackoffMillis;
}
getBackoff(): number {
const curBackoff = Math.random() * this.nextMaxBackoffMillis;
this.nextMaxBackoffMillis =
this.backoffMultiplier * this.nextMaxBackoffMillis;
if (this.nextMaxBackoffMillis > this.maxBackoffMillis) {
this.nextMaxBackoffMillis = this.maxBackoffMillis;
}
return curBackoff;
}
reset() {
this.nextMaxBackoffMillis = this.initialBackoffMillis;
}
}

/**
* Polls profiler server for instructions on behalf of a task and
* collects and uploads profiles as requested
Expand All @@ -135,6 +195,7 @@ export class Profiler extends common.ServiceObject {
private profileLabels: {instance?: string};
private deployment: Deployment;
private profileTypes: string[];
private retryer: Retryer;

// Public for testing.
timeProfiler: TimeProfiler|undefined;
Expand Down Expand Up @@ -184,6 +245,10 @@ export class Profiler extends common.ServiceObject {
this.heapProfiler = new HeapProfiler(
this.config.heapIntervalBytes, this.config.heapMaxStackDepth);
}

this.retryer = new Retryer(
this.config.initialBackoffMillis, this.config.maxBackoffMillis,
this.config.backoffMultiplier);
}

/**
Expand All @@ -206,8 +271,6 @@ export class Profiler extends common.ServiceObject {
*/
async runLoop() {
const delayMillis = await this.collectProfile();

This comment was marked as spam.

This comment was marked as spam.

This comment was marked as spam.

This comment was marked as spam.


// Schedule the next profile.
setTimeout(this.runLoop.bind(this), delayMillis).unref();
}

Expand All @@ -216,21 +279,20 @@ export class Profiler extends common.ServiceObject {
* a profile and uploads it.
*
* @return - time, in ms, to wait before asking profiler server again about
* collecting another profile.
*
* TODO: implement backoff and retry. When error encountered in
* createProfile() should be retried when response indicates this request
* should be retried or with exponential backoff (up to one hour) if the
* response does not indicate when to retry this request.
* collecting another profile, or retryer specifying exponential backoff.
*/
async collectProfile(): Promise<number> {
let prof: RequestProfile;
try {
prof = await this.createProfile();
} catch (err) {
this.logger.error(`Failed to create profile: ${err}`);
return this.config.backoffMillis;
if (isBackoffResponseError(err)) {
return Math.min(err.backoffMillis, this.config.backoffLimitMillis);
}
return this.retryer.getBackoff();
}
this.retryer.reset();
try {
await this.profileAndUpload(prof);
} catch (err) {
Expand Down Expand Up @@ -268,25 +330,29 @@ export class Profiler extends common.ServiceObject {
body: reqBody,
json: true,
};

this.logger.debug(`Attempting to create profile.`);
const [prof, response] = await this.request(options);
if (!hasHttpStatusCode(response)) {
throw new Error('Server response missing status information.');
}
if (isErrorResponseStatusCode(response.statusCode)) {
let message: number|string = response.statusCode;
// tslint:disable-next-line: no-any
if ((response as any).statusMessage) {
message = response.statusMessage;
}
throw new Error(message.toString());
}
if (!isRequestProfile(prof)) {
throw new Error(`Profile not valid: ${JSON.stringify(prof)}.`);
}
this.logger.debug(`Successfully created profile ${prof.profileType}.`);
return prof;
return new Promise<RequestProfile>((resolve, reject) => {
this.request(
options,
(err: Error, prof: object, response: http.ServerResponse) => {
if (response && isErrorResponseStatusCode(response.statusCode)) {
if (isServerBackoffResponse(response)) {
throw new BackoffResponseError(response);
}
throw new Error(response.statusMessage);
}
if (err) {
reject(err);
return;
}
if (isRequestProfile(prof)) {
this.logger.debug(
`Successfully created profile ${prof.profileType}.`);
resolve(prof);
return;
}
throw new Error(`Profile not valid: ${JSON.stringify(prof)}.`);
});
});
}

/**
Expand Down
Loading