diff --git a/task/host-sp-comms/src/main.rs b/task/host-sp-comms/src/main.rs index 75a64e363..0a9b000a7 100644 --- a/task/host-sp-comms/src/main.rs +++ b/task/host-sp-comms/src/main.rs @@ -258,6 +258,13 @@ struct ServerImpl { reboot_state: Option, host_kv_storage: HostKeyValueStorage, hf_mux_state: Option, + + /// Set when we receive a Boot Failed message from the host. This flag + /// changes the behavior in response to the next Request Reboot message. It + /// is cleared at Request Reboot, *or* at any of the messages sent during + /// host boot, to try and keep our state from getting out of sync for + /// whatever reason. + host_boot_just_failed: bool, } impl ServerImpl { @@ -290,6 +297,8 @@ impl ServerImpl { reboot_state: None, host_kv_storage: HostKeyValueStorage::claim_static_resources(), hf_mux_state: None, + + host_boot_just_failed: false, } } @@ -701,13 +710,24 @@ impl ServerImpl { // We defer any actions until after we've serialized our response to // avoid borrow checker issues with calling methods on `self`. + // + // NOTE: setting any action will also clear the `host_boot_just_failed` + // flag, to avoid keeping incorrect state around. Since we have no + // explicit way of tracking the host boot software's lifecycle, this is + // a compromise. let mut action = None; + let response = match request { HostToSp::_Unused => { Some(SpToHost::DecodeFailure(DecodeFailureReason::Deserialize)) } HostToSp::RequestReboot => { - action = Some(Action::RebootHost); + if core::mem::take(&mut self.host_boot_just_failed) { + // Do _not_ reboot. Reinterpret this as a poweroff request. + action = Some(Action::PowerOffHost); + } else { + action = Some(Action::RebootHost); + } None } HostToSp::RequestPowerOff => { @@ -777,6 +797,11 @@ impl ServerImpl { for b in &mut self.host_kv_storage.last_boot_fail[n..] { *b = 0; } + + // Record that this failure happened, which will change the + // behavior of the reboot request we expect to follow shortly. + self.host_boot_just_failed = true; + Some(SpToHost::Ack) } HostToSp::HostPanic { .. } => { @@ -793,6 +818,10 @@ impl ServerImpl { for b in &mut self.host_kv_storage.last_panic[n..] { *b = 0; } + + // Neither set nor clear the host_boot_just_failed flag, for + // this message often immediately follows a boot failure. + Some(SpToHost::Ack) } HostToSp::GetStatus => { @@ -888,6 +917,9 @@ impl ServerImpl { // Now that all buffer borrowing is done, we can borrow `self` mutably // again to perform any necessary action. if let Some(action) = action { + // Protectively clear this flag to avoid keeping state. + self.host_boot_just_failed = false; + match action { Action::RebootHost => self.power_off_host(true), Action::PowerOffHost => self.power_off_host(false),