From 1043682d4a73db018d129c3abab90064f1a2664d Mon Sep 17 00:00:00 2001 From: Boris Zbarsky Date: Fri, 2 Jun 2023 18:26:11 -0400 Subject: [PATCH] Rollback() on data model builder should reset the error on the builder. (#26982) * Rollback() on data model builder should reset the error on the builder. Without this, it's too easy to forget to ResetError() when rolling back. That's exactly what happened in ClusterStateCache:OnUpdateDataVersionFilterList, and that could lead to inability to ever re-subscribe after a subscription drop. We had the following Rollback() calls not immediately followed by ResetError(): * In SendFailureStatus in ember-compatibility-functions.cpp. This is never reached when there is a failure on aAttributeReports, so doing the ResetError here is not a problem. * AttributeValueEncoder::EncodeListItem. This was relying on the InteractionModelEngine to ResetError after the rollback. * ClusterStateCache:OnUpdateDataVersionFilterList. When CreateDataVersionFilter() failed, this could leave the DataVersionFilterIBs::Builder in an error state, which would lead to the subscribe request (incorrectly) failing completely. * WriteHandler::ProcessAttributeDataIBs. This codepath was apparently not getting hit in practice, since it would fail. It was also checkpointing/restoring on the wrong builder. * Engine::BuildSingleReportDataAttributeReportIBs. This was buggy: If we hit an error other than "out of memory" while encoding the value, we could end up with the builder in an error state and then fail to encode our status response. * ReadClient::SendReadRequest. This is never reached with an error on the builder. * ReadClient::SendSubscribeRequestImpl. This is never reached with an error on the builder. * Address review comment. --- src/app/CommandHandler.cpp | 1 - src/app/MessageDef/Builder.h | 8 ++- src/app/WriteClient.cpp | 1 - src/app/WriteClient.h | 1 - src/app/WriteHandler.cpp | 4 +- src/app/reporting/Engine.cpp | 33 ++++++------ src/app/tests/TestClusterStateCache.cpp | 70 +++++++++++++++++++++++++ 7 files changed, 95 insertions(+), 23 deletions(-) diff --git a/src/app/CommandHandler.cpp b/src/app/CommandHandler.cpp index a0a46f1a02e74a..29abc1f321a9bd 100644 --- a/src/app/CommandHandler.cpp +++ b/src/app/CommandHandler.cpp @@ -559,7 +559,6 @@ CHIP_ERROR CommandHandler::RollbackResponse() { VerifyOrReturnError(mState == State::Preparing || mState == State::AddingCommand, CHIP_ERROR_INCORRECT_STATE); mInvokeResponseBuilder.Rollback(mBackupWriter); - mInvokeResponseBuilder.ResetError(); // Note: We only support one command per request, so we reset the state to Idle here, need to review the states when adding // supports of having multiple requests in the same transaction. MoveToState(State::Idle); diff --git a/src/app/MessageDef/Builder.h b/src/app/MessageDef/Builder.h index 5585bb8deadb62..f3609953c3a5ab 100644 --- a/src/app/MessageDef/Builder.h +++ b/src/app/MessageDef/Builder.h @@ -81,9 +81,13 @@ class Builder /** * Rollback the request state to the checkpointed TLVWriter * - * @param[in] aPoint A that captured the state via Checkpoint() at some point in the past + * @param[in] aPoint A writer that captured the state via Checkpoint() at some point in the past */ - void Rollback(const chip::TLV::TLVWriter & aPoint) { *mpWriter = aPoint; } + void Rollback(const chip::TLV::TLVWriter & aPoint) + { + *mpWriter = aPoint; + ResetError(); + } void EndOfContainer(); diff --git a/src/app/WriteClient.cpp b/src/app/WriteClient.cpp index b8ddf5400fcb9e..da24d058f1c40a 100644 --- a/src/app/WriteClient.cpp +++ b/src/app/WriteClient.cpp @@ -240,7 +240,6 @@ CHIP_ERROR WriteClient::PutSinglePreencodedAttributeWritePayload(const chip::app { // If it failed with no memory, then we create a new chunk for it. mWriteRequestBuilder.GetWriteRequests().Rollback(backupWriter); - mWriteRequestBuilder.GetWriteRequests().ResetError(); ReturnErrorOnFailure(StartNewMessage()); err = TryPutSinglePreencodedAttributeWritePayload(attributePath, data); // Since we have created a new chunk for this element, the encode is expected to succeed. diff --git a/src/app/WriteClient.h b/src/app/WriteClient.h index af89a50f90b796..99c8682605c569 100644 --- a/src/app/WriteClient.h +++ b/src/app/WriteClient.h @@ -297,7 +297,6 @@ class WriteClient : public Messaging::ExchangeDelegate { // If it failed with no memory, then we create a new chunk for it. mWriteRequestBuilder.GetWriteRequests().Rollback(backupWriter); - mWriteRequestBuilder.GetWriteRequests().ResetError(); ReturnErrorOnFailure(StartNewMessage()); ReturnErrorOnFailure(TryEncodeSingleAttributeDataIB(attributePath, value)); } diff --git a/src/app/WriteHandler.cpp b/src/app/WriteHandler.cpp index d262dd522d5525..c0bc52b1f2ae71 100644 --- a/src/app/WriteHandler.cpp +++ b/src/app/WriteHandler.cpp @@ -330,7 +330,7 @@ CHIP_ERROR WriteHandler::ProcessAttributeDataIBs(TLV::TLVReader & aAttributeData MatterPreAttributeWriteCallback(dataAttributePath); TLV::TLVWriter backup; DataVersion version = 0; - mWriteResponseBuilder.Checkpoint(backup); + mWriteResponseBuilder.GetWriteResponses().Checkpoint(backup); err = element.GetDataVersion(&version); if (CHIP_NO_ERROR == err) { @@ -344,7 +344,7 @@ CHIP_ERROR WriteHandler::ProcessAttributeDataIBs(TLV::TLVReader & aAttributeData err = WriteSingleClusterData(subjectDescriptor, dataAttributePath, dataReader, this); if (err != CHIP_NO_ERROR) { - mWriteResponseBuilder.Rollback(backup); + mWriteResponseBuilder.GetWriteResponses().Rollback(backup); err = AddStatus(dataAttributePath, StatusIB(err)); } MatterPostAttributeWriteCallback(dataAttributePath); diff --git a/src/app/reporting/Engine.cpp b/src/app/reporting/Engine.cpp index 909d3a8022445b..b5ccf4566a7351 100644 --- a/src/app/reporting/Engine.cpp +++ b/src/app/reporting/Engine.cpp @@ -84,6 +84,11 @@ Engine::RetrieveClusterData(const SubjectDescriptor & aSubjectDescriptor, bool a return CHIP_NO_ERROR; } +static bool IsOutOfWriterSpaceError(CHIP_ERROR err) +{ + return err == CHIP_ERROR_NO_MEMORY || err == CHIP_ERROR_BUFFER_TOO_SMALL; +} + CHIP_ERROR Engine::BuildSingleReportDataAttributeReportIBs(ReportDataMessage::Builder & aReportDataBuilder, ReadHandler * apReadHandler, bool * apHasMoreChunks, bool * apHasEncodedData) @@ -185,13 +190,17 @@ CHIP_ERROR Engine::BuildSingleReportDataAttributeReportIBs(ReportDataMessage::Bu "Error retrieving data from clusterId: " ChipLogFormatMEI ", err = %" CHIP_ERROR_FORMAT, ChipLogValueMEI(pathForRetrieval.mClusterId), err.Format()); - // If error is not CHIP_ERROR_BUFFER_TOO_SMALL and is not CHIP_ERROR_NO_MEMORY, rollback and encode status. + // If error is not an "out of writer space" error, rollback and encode status. // Otherwise, if partial data allowed, save the encode state. // Otherwise roll back. If we have already encoded some chunks, we are done; otherwise encode status. - if (encodeState.AllowPartialData() && ((err == CHIP_ERROR_BUFFER_TOO_SMALL) || (err == CHIP_ERROR_NO_MEMORY))) + if (encodeState.AllowPartialData() && IsOutOfWriterSpaceError(err)) { // Encoding is aborted but partial data is allowed, then we don't rollback and save the state for next chunk. + // The expectation is that RetrieveClusterData has already reset attributeReportIBs to a good state (rolled + // back any partially-written AttributeReportIB instances, reset its error status). Since AllowPartialData() + // is true, we may not have encoded a complete attribute value, but we did, if we encoded anything, encode a + // set of complete AttributeReportIB instances that represent part of the attribute value. apReadHandler->SetAttributeEncodeState(encodeState); } else @@ -201,13 +210,14 @@ CHIP_ERROR Engine::BuildSingleReportDataAttributeReportIBs(ReportDataMessage::Bu attributeReportIBs.Rollback(attributeBackup); apReadHandler->SetAttributeEncodeState(AttributeValueEncoder::AttributeEncodeState()); - if (err != CHIP_ERROR_NO_MEMORY && err != CHIP_ERROR_BUFFER_TOO_SMALL) + if (!IsOutOfWriterSpaceError(err)) { // Try to encode our error as a status response. err = attributeReportIBs.EncodeAttributeStatus(pathForRetrieval, StatusIB(err)); if (err != CHIP_NO_ERROR) { - // OK, just roll back again and give up. + // OK, just roll back again and give up; if we still ran out of space we + // will send this status response in the next chunk. attributeReportIBs.Rollback(attributeBackup); } } @@ -241,15 +251,9 @@ CHIP_ERROR Engine::BuildSingleReportDataAttributeReportIBs(ReportDataMessage::Bu // These are are guaranteed to not fail since we've already reserved memory for the remaining 'close out' TLV operations in this // function and its callers. // - if ((err == CHIP_ERROR_BUFFER_TOO_SMALL) || (err == CHIP_ERROR_NO_MEMORY)) + if (IsOutOfWriterSpaceError(err)) { ChipLogDetail(DataManagement, " We cannot put more chunks into this report. Enable chunking."); - - // - // Reset the error tracked within the builder. Otherwise, any further attempts to write - // data through the builder will be blocked by that error. - // - attributeReportIBs.ResetError(); err = CHIP_NO_ERROR; } @@ -276,7 +280,6 @@ CHIP_ERROR Engine::BuildSingleReportDataAttributeReportIBs(ReportDataMessage::Bu if (!attributeDataWritten && err == CHIP_NO_ERROR) { aReportDataBuilder.Rollback(backup); - aReportDataBuilder.ResetError(); } // hasMoreChunks + no data encoded is a flag that we have encountered some trouble when processing the attribute. @@ -379,7 +382,7 @@ CHIP_ERROR Engine::BuildSingleReportDataEventReports(ReportDataMessage::Builder err = CHIP_NO_ERROR; hasMoreChunks = false; } - else if ((err == CHIP_ERROR_BUFFER_TOO_SMALL) || (err == CHIP_ERROR_NO_MEMORY)) + else if (IsOutOfWriterSpaceError(err)) { // when first cluster event is too big to fit in the packet, ignore that cluster event. // However, we may have encoded some attributes before, we don't skip it in that case. @@ -423,11 +426,9 @@ CHIP_ERROR Engine::BuildSingleReportDataEventReports(ReportDataMessage::Builder } // Maybe encoding the attributes has already used up all space. - if ((err == CHIP_NO_ERROR || err == CHIP_ERROR_NO_MEMORY || err == CHIP_ERROR_BUFFER_TOO_SMALL) && - !(hasEncodedStatus || (eventCount != 0))) + if ((err == CHIP_NO_ERROR || IsOutOfWriterSpaceError(err)) && !(hasEncodedStatus || (eventCount != 0))) { aReportDataBuilder.Rollback(backup); - aReportDataBuilder.ResetError(); err = CHIP_NO_ERROR; } diff --git a/src/app/tests/TestClusterStateCache.cpp b/src/app/tests/TestClusterStateCache.cpp index 5c05959d0c452c..b6f9af88ab1a2e 100644 --- a/src/app/tests/TestClusterStateCache.cpp +++ b/src/app/tests/TestClusterStateCache.cpp @@ -19,14 +19,17 @@ #include "app-common/zap-generated/ids/Attributes.h" #include "app-common/zap-generated/ids/Clusters.h" #include "lib/core/TLVTags.h" +#include "lib/core/TLVWriter.h" #include "protocols/interaction_model/Constants.h" #include "system/SystemPacketBuffer.h" #include "system/TLVPacketBufferBackingStore.h" #include #include +#include #include #include #include +#include #include #include #include @@ -540,8 +543,75 @@ void RunAndValidateSequence(AttributeInstructionListType list) ForwardedDataCallbackValidator dataCallbackValidator; CacheValidator client(list, dataCallbackValidator); ClusterStateCache cache(client); + + // In order for the cache to track our data versions, we need to claim to it + // that we are dealing with a wildcard path. And we need to do that before + // it has seen any reports. + AttributePathParams wildcardPath; + const Span pathSpan(&wildcardPath, 1); + { + // Just need a buffer big enough that we can start the list. We don't + // care about the actual data versions here. + uint8_t buf[20]; + TLV::TLVWriter writer; + writer.Init(buf); + DataVersionFilterIBs::Builder builder; + CHIP_ERROR err = builder.Init(&writer); + NL_TEST_ASSERT(gSuite, err == CHIP_NO_ERROR); + bool encodedDataVersionList = false; + err = cache.GetBufferedCallback().OnUpdateDataVersionFilterList(builder, pathSpan, encodedDataVersionList); + + // We had nothing to encode so far. + NL_TEST_ASSERT(gSuite, err == CHIP_NO_ERROR); + NL_TEST_ASSERT(gSuite, !encodedDataVersionList); + } + DataSeriesGenerator generator(&cache.GetBufferedCallback(), list); generator.Generate(dataCallbackValidator); + + // Now verify that we would do the right thing when encoding our data + // versions. + + size_t bufferSize = 1; + do + { + Platform::ScopedMemoryBuffer buf; + if (!buf.Calloc(bufferSize)) + { + NL_TEST_ASSERT(gSuite, false); + break; + } + + TLV::TLVWriter writer; + writer.Init(buf.Get(), bufferSize); + + DataVersionFilterIBs::Builder builder; + CHIP_ERROR err = builder.Init(&writer); + NL_TEST_ASSERT(gSuite, err == CHIP_NO_ERROR || err == CHIP_ERROR_BUFFER_TOO_SMALL); + if (err == CHIP_NO_ERROR) + { + // We had enough space to start the list. Now try encoding the data + // version filters. + bool encodedDataVersionList = false; + err = cache.GetBufferedCallback().OnUpdateDataVersionFilterList(builder, pathSpan, encodedDataVersionList); + + // We should be rolling back properly if we run out of space. + NL_TEST_ASSERT(gSuite, err == CHIP_NO_ERROR); + NL_TEST_ASSERT(gSuite, builder.GetError() == CHIP_NO_ERROR); + + if (writer.GetRemainingFreeLength() > 40) + { + // We have lots of empty space left, so we did not end up + // needing to roll back; no point testing larger buffer sizes. + // + // Note: we may still have encodedDataVersionList false here, if + // there were no non-status attribute values cached. + break; + } + } + + ++bufferSize; + } while (true); } /*