-
Notifications
You must be signed in to change notification settings - Fork 150
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
feat: base transaction retries on error codes #953
Changes from 3 commits
5c77e0c
9d170ce
04bd55a
42b3ac8
5ea2f30
58a3dc9
8d13959
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -18,6 +18,7 @@ import {GoogleError, Status} from 'google-gax'; | |
|
||
import * as proto from '../protos/firestore_v1_proto_api'; | ||
|
||
import {ExponentialBackoff} from './backoff'; | ||
import {DocumentSnapshot, Precondition} from './document'; | ||
import {Firestore, WriteBatch} from './index'; | ||
import {logger} from './logger'; | ||
|
@@ -64,6 +65,7 @@ const READ_AFTER_WRITE_ERROR_MSG = | |
export class Transaction { | ||
private _firestore: Firestore; | ||
private _writeBatch: WriteBatch; | ||
private _backoff: ExponentialBackoff; | ||
private _requestTag: string; | ||
private _transactionId?: Uint8Array; | ||
|
||
|
@@ -78,6 +80,7 @@ export class Transaction { | |
this._firestore = firestore; | ||
this._writeBatch = firestore.batch(); | ||
this._requestTag = requestTag; | ||
this._backoff = new ExponentialBackoff(); | ||
} | ||
|
||
/** | ||
|
@@ -407,7 +410,7 @@ export class Transaction { | |
maxAttempts: number | ||
): Promise<T> { | ||
let result: T; | ||
let lastError: Error | undefined = undefined; | ||
let lastError: GoogleError | undefined = undefined; | ||
|
||
for (let attempt = 0; attempt < maxAttempts; ++attempt) { | ||
if (lastError) { | ||
|
@@ -419,6 +422,9 @@ export class Transaction { | |
); | ||
} | ||
|
||
this._writeBatch._reset(); | ||
await this.maybeBackoff(lastError); | ||
|
||
await this.begin(); | ||
|
||
try { | ||
|
@@ -429,6 +435,8 @@ export class Transaction { | |
); | ||
} | ||
result = await promise; | ||
await this.commit(); | ||
return result; | ||
} catch (err) { | ||
logger( | ||
'Firestore.runTransaction', | ||
|
@@ -441,19 +449,10 @@ export class Transaction { | |
|
||
if (isRetryableTransactionError(err)) { | ||
lastError = err; | ||
continue; // Retry full transaction | ||
} else { | ||
return Promise.reject(err); // Callback failed w/ non-retryable error | ||
} | ||
} | ||
|
||
try { | ||
await this.commit(); | ||
return result; // Success | ||
} catch (err) { | ||
lastError = err; | ||
this._writeBatch._reset(); | ||
} | ||
} | ||
|
||
logger( | ||
|
@@ -464,6 +463,25 @@ export class Transaction { | |
); | ||
return Promise.reject(lastError); | ||
} | ||
|
||
/** | ||
* Delays further operations based on the provided error. | ||
* | ||
* @private | ||
* @return A Promise that resolves after the delay expired. | ||
*/ | ||
private async maybeBackoff(error?: GoogleError) { | ||
if (error) { | ||
if (error.code === Status.RESOURCE_EXHAUSTED) { | ||
this._backoff.resetToMax(); | ||
} else if (error.code === Status.ABORTED) { | ||
// We don't backoff for ABORTED to avoid starvation | ||
this._backoff.reset(); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This contradicts comments in go/transaction-retry-matrix. Do we have experimental data to support that starvation is a problem? Given multiple clients contending on a write, multiple clients failing to back off (with randomness) means they'll never make progress. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I checked with the Firestore team and removed the special-casing on ABORTED, which simplifies the code a bit since we now always back off. This also allowed me to simplify the test suite. |
||
} | ||
} | ||
|
||
await this._backoff.backoffAndWait(); | ||
} | ||
} | ||
|
||
/** | ||
|
@@ -562,13 +580,22 @@ function validateReadOptions( | |
} | ||
} | ||
|
||
function isRetryableTransactionError(error: Error): boolean { | ||
if (error instanceof GoogleError || 'code' in error) { | ||
// In transactions, the backend returns code ABORTED for reads that fail | ||
// with contention. These errors should be retried for both GoogleError | ||
// and GoogleError-alike errors (in case the prototype hierarchy gets | ||
// stripped somewhere). | ||
return error.code === Status.ABORTED; | ||
function isRetryableTransactionError(error: GoogleError): boolean { | ||
if (error.code !== undefined) { | ||
// This list is based on https://github.com/firebase/firebase-js-sdk/blob/master/packages/firestore/src/core/transaction_runner.ts#L112 | ||
switch (error.code) { | ||
case Status.ABORTED: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is it worth distinguishing retry-the-rpc vs retry-the-whole-transaction errors? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The per RPC retry is actually driven by GAX. The one downside of this approach is that if say a RunQuery RPC fails with DEADLINE_EXCEEDED, the RPC will retried by GAX first. If GAX retries don't resolve the issue, then we will restart the transaction. It's probably possible to turn off GAX retries, but it is a pretty invasive change. |
||
case Status.CANCELLED: | ||
case Status.UNKNOWN: | ||
case Status.DEADLINE_EXCEEDED: | ||
case Status.INTERNAL: | ||
case Status.UNAVAILABLE: | ||
case Status.UNAUTHENTICATED: | ||
case Status.RESOURCE_EXHAUSTED: | ||
return true; | ||
default: | ||
return false; | ||
} | ||
} | ||
return false; | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
async/await
is amazing. ❤️