-
Notifications
You must be signed in to change notification settings - Fork 252
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
feat(logging): enable log rotation and set retry on full log store sync #3699
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -32,9 +32,10 @@ import 'package:meta/meta.dart'; | |
const int _maxNumberOfLogEventsInBatch = 10000; | ||
const int _maxLogEventsBatchSize = 1048576; | ||
const int _baseBufferSize = 26; | ||
const int _maxLogEventsTimeSpanInBatch = Duration.millisecondsPerDay; | ||
const int _maxLogEventSize = 256000; | ||
final int _maxLogEventsTimeSpanInBatch = | ||
const Duration(hours: 24).inMilliseconds; | ||
const Duration _minusMaxLogEventTimeInFuture = Duration(hours: -2); | ||
const Duration _baseRetryInterval = Duration(seconds: 10); | ||
|
||
typedef _LogBatch = (List<QueuedItem> logQueues, List<InputLogEvent> logEvents); | ||
|
||
|
@@ -123,7 +124,8 @@ class CloudWatchLoggerPlugin extends AWSLoggerPlugin | |
bool _enabled; | ||
StoppableTimer? _timer; | ||
RemoteLoggingConstraintProvider? _remoteLoggingConstraintProvider; | ||
|
||
int _retryCount = 0; | ||
DateTime? _retryTime; | ||
set remoteLoggingConstraintProvider( | ||
RemoteLoggingConstraintProvider remoteProvider, | ||
) { | ||
|
@@ -139,32 +141,89 @@ class CloudWatchLoggerPlugin extends AWSLoggerPlugin | |
Future<void> startSyncing() async { | ||
final batchStream = _getLogBatchesToSync(); | ||
await for (final (logs, events) in batchStream) { | ||
final response = await _sendToCloudWatch(events); | ||
// TODO(nikahsn): handle tooOldLogEventEndIndex | ||
// and expiredLogEventEndIndex. | ||
if (response.rejectedLogEventsInfo?.tooNewLogEventStartIndex != null) { | ||
// TODO(nikahsn): throw and exception to enable log rotation if the | ||
// log store is full. | ||
break; | ||
_TooNewLogEventException? tooNewException; | ||
while (logs.isNotEmpty && events.isNotEmpty) { | ||
final rejectedLogEventsInfo = | ||
(await _sendToCloudWatch(events)).rejectedLogEventsInfo; | ||
if (rejectedLogEventsInfo == null) { | ||
await _logStore.deleteItems(logs); | ||
break; | ||
} | ||
|
||
final (tooOldEndIndex, tooNewStartIndex) = | ||
rejectedLogEventsInfo.parse(events.length); | ||
|
||
if (_isValidIndex(tooNewStartIndex, events.length)) { | ||
tooNewException = _TooNewLogEventException( | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why not throw it here? The control flow would be easier to follow There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. we want to continue with syncing the current batch after sanitizing the events in the current batch and then throw to stop syncing next batches |
||
events[tooNewStartIndex!].timestamp.toInt(), | ||
); | ||
// set logs to end before the index. | ||
logs.removeRange(tooNewStartIndex, events.length); | ||
// set events to end before the index. | ||
events.removeRange(tooNewStartIndex, events.length); | ||
} | ||
if (_isValidIndex(tooOldEndIndex, events.length)) { | ||
// remove old logs from log store. | ||
await _logStore.deleteItems(logs.sublist(0, tooOldEndIndex! + 1)); | ||
// set logs to start after the index. | ||
logs.removeRange(0, tooOldEndIndex + 1); | ||
// set events to start after the index. | ||
events.removeRange(0, tooOldEndIndex + 1); | ||
} | ||
} | ||
// after sending each batch to CloudWatch check if the batch has | ||
// `tooNewException` and throw to stop syncing next batches. | ||
if (tooNewException != null) { | ||
throw tooNewException; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think this would be cleaner if you just returned the retry time instead of throwing |
||
} | ||
await _logStore.deleteItems(logs); | ||
} | ||
} | ||
|
||
if (!_syncing) { | ||
// TODO(nikahsn): disable log rotation. | ||
_syncing = true; | ||
DateTime? nextRetry; | ||
try { | ||
await startSyncing(); | ||
} on _TooNewLogEventException catch (e) { | ||
nextRetry = | ||
DateTime.fromMillisecondsSinceEpoch(e.timeInMillisecondsSinceEpoch) | ||
.add(_minusMaxLogEventTimeInFuture); | ||
} on Exception catch (e) { | ||
logger.error('Failed to sync logs to CloudWatch.', e); | ||
// TODO(nikahsn): enable log rotation if the log store is full | ||
} finally { | ||
_handleFullLogStoreAfterSync( | ||
retryTime: nextRetry, | ||
); | ||
_syncing = false; | ||
} | ||
} | ||
} | ||
|
||
void _handleFullLogStoreAfterSync({ | ||
DateTime? retryTime, | ||
}) { | ||
final isLogStoreFull = | ||
_logStore.isFull(_pluginConfig.localStoreMaxSizeInMB); | ||
if (!isLogStoreFull) { | ||
_retryCount = 0; | ||
_retryTime = null; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why would you not respect There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. it is reseting retry if log store is not full. it retries to sync only if log store is full otherwise can wait till next sync. |
||
return; | ||
} | ||
if (retryTime != null && retryTime.isAfter(DateTime.timestamp())) { | ||
_retryTime = retryTime; | ||
return; | ||
} | ||
_retryCount += 1; | ||
_retryTime = DateTime.timestamp().add((_baseRetryInterval * _retryCount)); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This seems like a long time for a basic backoff There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
|
||
} | ||
|
||
bool _shouldSyncOnFullLogStore() { | ||
if (_retryTime == null) { | ||
return true; | ||
} | ||
return !(_retryTime!.isAfter(DateTime.timestamp())); | ||
} | ||
|
||
void _onTimerError(Object e) { | ||
logger.error('Failed to sync logs to CloudWatch.', e); | ||
} | ||
|
@@ -235,11 +294,17 @@ class CloudWatchLoggerPlugin extends AWSLoggerPlugin | |
return; | ||
} | ||
final item = logEntry.toQueuedItem(); | ||
final isLogStoreFull = | ||
_logStore.isFull(_pluginConfig.localStoreMaxSizeInMB); | ||
final shouldEnableQueueRotation = isLogStoreFull && _retryTime != null; | ||
|
||
await _logStore.addItem( | ||
item.value, | ||
item.timestamp, | ||
enableQueueRotation: shouldEnableQueueRotation, | ||
); | ||
if (await _logStore.isFull(_pluginConfig.localStoreMaxSizeInMB)) { | ||
|
||
if (isLogStoreFull && _shouldSyncOnFullLogStore()) { | ||
await _startSyncingIfNotInProgress(); | ||
} | ||
} | ||
|
@@ -263,6 +328,8 @@ class CloudWatchLoggerPlugin extends AWSLoggerPlugin | |
_enabled = false; | ||
_timer?.stop(); | ||
await _logStore.clear(); | ||
_retryCount = 0; | ||
_retryTime = null; | ||
} | ||
|
||
/// Sends logs on-demand to CloudWatch. | ||
|
@@ -295,3 +362,34 @@ extension on LogEntry { | |
); | ||
} | ||
} | ||
|
||
extension on RejectedLogEventsInfo { | ||
(int? pastEndIndex, int? futureStartIndex) parse(int length) { | ||
int? pastEndIndex; | ||
int? futureStartIndex; | ||
|
||
if (_isValidIndex(tooOldLogEventEndIndex, length)) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why is this allowed to be false? I would use |
||
pastEndIndex = tooOldLogEventEndIndex; | ||
} | ||
if (_isValidIndex(expiredLogEventEndIndex, length)) { | ||
pastEndIndex = pastEndIndex == null | ||
? expiredLogEventEndIndex | ||
: max(pastEndIndex, expiredLogEventEndIndex!); | ||
} | ||
if (_isValidIndex(tooNewLogEventStartIndex, length)) { | ||
futureStartIndex = tooNewLogEventStartIndex; | ||
} | ||
return (pastEndIndex, futureStartIndex); | ||
} | ||
} | ||
|
||
class _TooNewLogEventException implements Exception { | ||
const _TooNewLogEventException( | ||
this.timeInMillisecondsSinceEpoch, | ||
); | ||
final int timeInMillisecondsSinceEpoch; | ||
} | ||
|
||
bool _isValidIndex(int? index, int length) { | ||
return index != null && index >= 0 && index <= length - 1; | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Can you explain this change?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
QueuedItemStore.isFull()
is a sync method. however it was implemented async because the web implementation callscheckIsIndexedDBSupported
to either use indexedDB or InMemoryQueuedItemStore. Because thecheckIsIndexedDBSupported
was async all the web APIs had to be async.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
But why would this be guaranteed to throw in the same way as before?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
based on the docs If the operation is successful, the value of result is a connection to the database. If the request failed and the result is not available, an InvalidStateError exception is thrown.