Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add the ability for OperationalSessionSetup to retry a few times automatically. #24808

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 10 additions & 1 deletion src/app/CASESessionManager.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,12 @@ CHIP_ERROR CASESessionManager::Init(chip::System::Layer * systemLayer, const CAS
}

void CASESessionManager::FindOrEstablishSession(const ScopedNodeId & peerId, Callback::Callback<OnDeviceConnected> * onConnection,
Callback::Callback<OnDeviceConnectionFailure> * onFailure)
Callback::Callback<OnDeviceConnectionFailure> * onFailure
#if CHIP_DEVICE_CONFIG_ENABLE_AUTOMATIC_CASE_RETRIES
,
uint8_t attemptCount
#endif // CHIP_DEVICE_CONFIG_ENABLE_AUTOMATIC_CASE_RETRIES
)
{
ChipLogDetail(CASESessionManager, "FindOrEstablishSession: PeerId = [%d:" ChipLogFormatX64 "]", peerId.GetFabricIndex(),
ChipLogValueX64(peerId.GetNodeId()));
Expand All @@ -53,6 +58,10 @@ void CASESessionManager::FindOrEstablishSession(const ScopedNodeId & peerId, Cal
}
}

#if CHIP_DEVICE_CONFIG_ENABLE_AUTOMATIC_CASE_RETRIES
session->UpdateAttemptCount(attemptCount);
#endif // CHIP_DEVICE_CONFIG_ENABLE_AUTOMATIC_CASE_RETRIES

session->Connect(onConnection, onFailure);
}

Expand Down
10 changes: 9 additions & 1 deletion src/app/CASESessionManager.h
Original file line number Diff line number Diff line change
Expand Up @@ -77,9 +77,17 @@ class CASESessionManager : public OperationalSessionReleaseDelegate, public Sess
*
* The `onFailure` callback may be called before the FindOrEstablishSession
* call returns, for error cases that are detected synchronously.
*
* attemptCount can be used to automatically retry multiple times if session
* setup is not successful.
*/
void FindOrEstablishSession(const ScopedNodeId & peerId, Callback::Callback<OnDeviceConnected> * onConnection,
Callback::Callback<OnDeviceConnectionFailure> * onFailure);
Callback::Callback<OnDeviceConnectionFailure> * onFailure
#if CHIP_DEVICE_CONFIG_ENABLE_AUTOMATIC_CASE_RETRIES
,
uint8_t attemptCount = 1
#endif // CHIP_DEVICE_CONFIG_ENABLE_AUTOMATIC_CASE_RETRIES
);

void ReleaseSessionsForFabric(FabricIndex fabricIndex);

Expand Down
119 changes: 113 additions & 6 deletions src/app/OperationalSessionSetup.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@
#include <lib/dnssd/Resolver.h>
#include <lib/support/CodeUtils.h>
#include <lib/support/logging/CHIPLogging.h>
#include <system/SystemClock.h>
#include <system/SystemLayer.h>

using namespace chip::Callback;
Expand All @@ -49,7 +50,7 @@ void OperationalSessionSetup::MoveToState(State aTargetState)
{
if (mState != aTargetState)
{
ChipLogDetail(Controller, "OperationalSessionSetup[%u:" ChipLogFormatX64 "]: State change %d --> %d",
ChipLogDetail(Discovery, "OperationalSessionSetup[%u:" ChipLogFormatX64 "]: State change %d --> %d",
mPeerId.GetFabricIndex(), ChipLogValueX64(mPeerId.GetNodeId()), to_underlying(mState),
to_underlying(aTargetState));
mState = aTargetState;
Expand All @@ -70,7 +71,7 @@ bool OperationalSessionSetup::AttachToExistingSecureSession()
if (!sessionHandle.HasValue())
return false;

ChipLogProgress(Controller, "Found an existing secure session to [%u:" ChipLogFormatX64 "]!", mPeerId.GetFabricIndex(),
ChipLogProgress(Discovery, "Found an existing secure session to [%u:" ChipLogFormatX64 "]!", mPeerId.GetFabricIndex(),
ChipLogValueX64(mPeerId.GetNodeId()));

mDeviceAddress = sessionHandle.Value()->AsSecureSession()->GetPeerAddress();
Expand Down Expand Up @@ -214,7 +215,7 @@ void OperationalSessionSetup::UpdateDeviceData(const Transport::PeerAddress & ad
return;
}

ChipLogError(Controller, "Received UpdateDeviceData in incorrect state");
ChipLogError(Discovery, "Received UpdateDeviceData in incorrect state");
DequeueConnectionCallbacks(CHIP_ERROR_INCORRECT_STATE);
// Do not touch `this` instance anymore; it has been destroyed in DequeueConnectionCallbacks.
}
Expand Down Expand Up @@ -304,7 +305,7 @@ void OperationalSessionSetup::DequeueConnectionCallbacks(CHIP_ERROR error)
void OperationalSessionSetup::OnSessionEstablishmentError(CHIP_ERROR error)
{
VerifyOrReturn(mState != State::Uninitialized && mState != State::NeedsAddress,
ChipLogError(Controller, "HandleCASEConnectionFailure was called while the device was not initialized"));
ChipLogError(Discovery, "HandleCASEConnectionFailure was called while the device was not initialized"));

if (CHIP_ERROR_TIMEOUT == error)
{
Expand All @@ -313,6 +314,17 @@ void OperationalSessionSetup::OnSessionEstablishmentError(CHIP_ERROR error)
MoveToState(State::ResolvingAddress);
return;
}

#if CHIP_DEVICE_CONFIG_ENABLE_AUTOMATIC_CASE_RETRIES
if (mRemainingAttempts > 0)
{
CHIP_ERROR err = ScheduleSessionSetupReattempt();
if (err == CHIP_NO_ERROR)
{
return;
}
}
#endif // CHIP_DEVICE_CONFIG_ENABLE_AUTOMATIC_CASE_RETRIES
}

DequeueConnectionCallbacks(error);
Expand All @@ -322,7 +334,7 @@ void OperationalSessionSetup::OnSessionEstablishmentError(CHIP_ERROR error)
void OperationalSessionSetup::OnSessionEstablished(const SessionHandle & session)
{
VerifyOrReturn(mState != State::Uninitialized,
ChipLogError(Controller, "HandleCASEConnected was called while the device was not initialized"));
ChipLogError(Discovery, "HandleCASEConnected was called while the device was not initialized"));

if (!mSecureSession.Grab(session))
return; // Got an invalid session, do not change any state
Expand Down Expand Up @@ -377,6 +389,17 @@ OperationalSessionSetup::~OperationalSessionSetup()

CHIP_ERROR OperationalSessionSetup::LookupPeerAddress()
{
#if CHIP_DEVICE_CONFIG_ENABLE_AUTOMATIC_CASE_RETRIES
if (mRemainingAttempts > 0)
{
--mRemainingAttempts;
}
if (mAttemptsDone < UINT8_MAX)
{
++mAttemptsDone;
}
#endif // CHIP_DEVICE_CONFIG_ENABLE_AUTOMATIC_CASE_RETRIES

// NOTE: This is public API that can be used to update our stored peer
// address even when we are in State::Connected, so we do not make any
// MoveToState calls in this method.
Expand Down Expand Up @@ -418,7 +441,7 @@ void OperationalSessionSetup::PerformAddressUpdate()
CHIP_ERROR err = LookupPeerAddress();
if (err != CHIP_NO_ERROR)
{
ChipLogError(Controller, "Failed to look up peer address: %" CHIP_ERROR_FORMAT, err.Format());
ChipLogError(Discovery, "Failed to look up peer address: %" CHIP_ERROR_FORMAT, err.Format());
DequeueConnectionCallbacks(err);
// Do not touch `this` instance anymore; it has been destroyed in DequeueConnectionCallbacks.
return;
Expand All @@ -435,9 +458,93 @@ void OperationalSessionSetup::OnNodeAddressResolutionFailed(const PeerId & peerI
ChipLogError(Discovery, "OperationalSessionSetup[%u:" ChipLogFormatX64 "]: operational discovery failed: %" CHIP_ERROR_FORMAT,
mPeerId.GetFabricIndex(), ChipLogValueX64(mPeerId.GetNodeId()), reason.Format());

// Does it make sense to ScheduleSessionSetupReattempt() here? DNS-SD
// resolution has its own retry/backoff mechanisms, so if it's failed we
// have already done a lot of that.

// No need to modify any variables in `this` since call below releases `this`.
DequeueConnectionCallbacks(reason);
// Do not touch `this` instance anymore; it has been destroyed in DequeueConnectionCallbacks.
}

#if CHIP_DEVICE_CONFIG_ENABLE_AUTOMATIC_CASE_RETRIES
void OperationalSessionSetup::UpdateAttemptCount(uint8_t attemptCount)
{
if (attemptCount == 0)
{
// Nothing to do.
return;
}

if (mState != State::NeedsAddress)
{
// We're in the middle of an attempt already, so decrement attemptCount
// by 1 to account for that.
--attemptCount;
}

if (attemptCount > mRemainingAttempts)
{
mRemainingAttempts = attemptCount;
}
}

CHIP_ERROR OperationalSessionSetup::ScheduleSessionSetupReattempt()
{
VerifyOrDie(mRemainingAttempts > 0);
// Try again, but not if things are in shutdown such that we can't get
// to a system layer, and not if we've run out of attempts.
if (!mInitParams.exchangeMgr->GetSessionManager() || !mInitParams.exchangeMgr->GetSessionManager()->SystemLayer())
{
return CHIP_ERROR_INCORRECT_STATE;
}

MoveToState(State::NeedsAddress);
System::Clock::Seconds16 timerDelay;
// Stop exponential backoff before our delays get too large.
//
// Note that mAttemptsDone is always > 0 here, because we have
// just finished one attempt.
VerifyOrDie(mAttemptsDone > 0);
static_assert(UINT16_MAX / CHIP_DEVICE_CONFIG_AUTOMATIC_CASE_RETRY_INITIAL_DELAY_SECONDS >=
(1 << CHIP_DEVICE_CONFIG_AUTOMATIC_CASE_RETRY_MAX_BACKOFF),
"Our backoff calculation will overflow.");
timerDelay = System::Clock::Seconds16(
static_cast<uint16_t>(CHIP_DEVICE_CONFIG_AUTOMATIC_CASE_RETRY_INITIAL_DELAY_SECONDS
<< min((mAttemptsDone - 1), CHIP_DEVICE_CONFIG_AUTOMATIC_CASE_RETRY_MAX_BACKOFF)));
CHIP_ERROR err = mInitParams.exchangeMgr->GetSessionManager()->SystemLayer()->StartTimer(timerDelay, TrySetupAgain, this);
// The cast on count() is needed because the type count() returns might not
// actually be uint16_t; on some platforms it's int.
ChipLogProgress(Discovery,
"OperationalSessionSetup:attempts done: %u, attempts left: %u, retry delay %us, status %" CHIP_ERROR_FORMAT,
mAttemptsDone, mRemainingAttempts, static_cast<unsigned>(timerDelay.count()), err.Format());
return err;
}

void OperationalSessionSetup::TrySetupAgain(System::Layer * systemLayer, void * state)
{
auto * self = static_cast<OperationalSessionSetup *>(state);

CHIP_ERROR err = CHIP_NO_ERROR;

if (self->mState != State::NeedsAddress)
{
err = CHIP_ERROR_INCORRECT_STATE;
}
else
{
self->MoveToState(State::ResolvingAddress);
err = self->LookupPeerAddress();
if (err == CHIP_NO_ERROR)
{
return;
}
}

// Give up; we're either in a bad state or could not start a lookup.
self->DequeueConnectionCallbacks(err);
// Do not touch `self` instance anymore; it has been destroyed in DequeueConnectionCallbacks.
}
#endif // CHIP_DEVICE_CONFIG_ENABLE_AUTOMATIC_CASE_RETRIES

} // namespace chip
25 changes: 23 additions & 2 deletions src/app/OperationalSessionSetup.h
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@
#include <messaging/ExchangeDelegate.h>
#include <messaging/ExchangeMgr.h>
#include <messaging/Flags.h>
#include <platform/CHIPDeviceConfig.h>
#include <protocols/secure_channel/CASESession.h>
#include <system/SystemLayer.h>
#include <transport/SessionManager.h>
Expand Down Expand Up @@ -158,7 +159,6 @@ class DLL_EXPORT OperationalSessionSetup : public SessionDelegate,
}

mClientPool = clientPool;
mSystemLayer = params.exchangeMgr->GetSessionManager()->SystemLayer();
mPeerId = peerId;
mReleaseDelegate = releaseDelegate;
mState = State::NeedsAddress;
Expand Down Expand Up @@ -224,6 +224,11 @@ class DLL_EXPORT OperationalSessionSetup : public SessionDelegate,
void OnNodeAddressResolved(const PeerId & peerId, const AddressResolve::ResolveResult & result) override;
void OnNodeAddressResolutionFailed(const PeerId & peerId, CHIP_ERROR reason) override;

#if CHIP_DEVICE_CONFIG_ENABLE_AUTOMATIC_CASE_RETRIES
// Update our remaining attempt count to be at least the given value.
void UpdateAttemptCount(uint8_t attemptCount);
#endif // CHIP_DEVICE_CONFIG_ENABLE_AUTOMATIC_CASE_RETRIES

private:
enum class State : uint8_t
{
Expand All @@ -237,7 +242,6 @@ class DLL_EXPORT OperationalSessionSetup : public SessionDelegate,

CASEClientInitParams mInitParams;
CASEClientPoolDelegate * mClientPool = nullptr;
System::Layer * mSystemLayer;

// mCASEClient is only non-null if we are in State::Connecting or just
// allocated it as part of an attempt to enter State::Connecting.
Expand All @@ -261,6 +265,11 @@ class DLL_EXPORT OperationalSessionSetup : public SessionDelegate,

bool mPerformingAddressUpdate = false;

#if CHIP_DEVICE_CONFIG_ENABLE_AUTOMATIC_CASE_RETRIES
uint8_t mRemainingAttempts = 0;
uint8_t mAttemptsDone = 0;
#endif // CHIP_DEVICE_CONFIG_ENABLE_AUTOMATIC_CASE_RETRIES

void MoveToState(State aTargetState);

CHIP_ERROR EstablishConnection(const ReliableMessageProtocolConfig & config);
Expand Down Expand Up @@ -301,6 +310,18 @@ class DLL_EXPORT OperationalSessionSetup : public SessionDelegate,
* This function will set new IP address, port and MRP retransmission intervals of the device.
*/
void UpdateDeviceData(const Transport::PeerAddress & addr, const ReliableMessageProtocolConfig & config);

#if CHIP_DEVICE_CONFIG_ENABLE_AUTOMATIC_CASE_RETRIES
/**
* Schedule a setup reattempt, if possible.
*/
CHIP_ERROR ScheduleSessionSetupReattempt();

/**
* Helper for our backoff retry timer.
*/
static void TrySetupAgain(System::Layer * systemLayer, void * state);
#endif // CHIP_DEVICE_CONFIG_ENABLE_AUTOMATIC_CASE_RETRIES
};

} // namespace chip
7 changes: 6 additions & 1 deletion src/controller/CHIPDeviceController.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2441,7 +2441,12 @@ void DeviceCommissioner::PerformCommissioningStep(DeviceProxy * proxy, Commissio
// If there is an error, CommissioningStageComplete will be called from OnDeviceConnectionFailureFn.
auto scopedPeerId = GetPeerScopedId(proxy->GetDeviceId());
mSystemState->CASESessionMgr()->FindOrEstablishSession(scopedPeerId, &mOnDeviceConnectedCallback,
&mOnDeviceConnectionFailureCallback);
&mOnDeviceConnectionFailureCallback
#if CHIP_DEVICE_CONFIG_ENABLE_AUTOMATIC_CASE_RETRIES
,
/* attemptCount = */ 3
#endif // CHIP_DEVICE_CONFIG_ENABLE_AUTOMATIC_CASE_RETRIES
);
}
break;
case CommissioningStage::kSendComplete: {
Expand Down
32 changes: 32 additions & 0 deletions src/include/platform/CHIPDeviceConfig.h
Original file line number Diff line number Diff line change
Expand Up @@ -1282,6 +1282,38 @@
#define CHIP_DEVICE_CONFIG_PAIRING_SECONDARY_INSTRUCTION ""
#endif

/**
* CHIP_DEVICE_CONFIG_ENABLE_AUTOMATIC_CASE_RETRIES
*
* If 1, enable support for automatic CASE establishment retries.
*/
#ifndef CHIP_DEVICE_CONFIG_ENABLE_AUTOMATIC_CASE_RETRIES
#define CHIP_DEVICE_CONFIG_ENABLE_AUTOMATIC_CASE_RETRIES 1
#endif

#if CHIP_DEVICE_CONFIG_ENABLE_AUTOMATIC_CASE_RETRIES

/**
* CHIP_DEVICE_CONFIG_AUTOMATIC_CASE_RETRY_INITIAL_DELAY_SECONDS
*
* The initial retry delay, in seconds, for our automatic CASE retries.
*/
#ifndef CHIP_DEVICE_CONFIG_AUTOMATIC_CASE_RETRY_INITIAL_DELAY_SECONDS
#define CHIP_DEVICE_CONFIG_AUTOMATIC_CASE_RETRY_INITIAL_DELAY_SECONDS 1
#endif

/**
* CHIP_DEVICE_CONFIG_AUTOMATIC_CASE_RETRY_MAX_BACKOFF
*
* The maximum number of times we back off, by a factor of 2 each time, from our
* initial CASE retry interval before we plateau.
*/
#ifndef CHIP_DEVICE_CONFIG_AUTOMATIC_CASE_RETRY_MAX_BACKOFF
#define CHIP_DEVICE_CONFIG_AUTOMATIC_CASE_RETRY_MAX_BACKOFF 5
#endif

#endif // CHIP_DEVICE_CONFIG_ENABLE_AUTOMATIC_CASE_RETRIES

// -------------------- App Platform Configuration --------------------

/**
Expand Down