Skip to content

Commit

Permalink
Add support for ports disabled by Hubris (#113)
Browse files Browse the repository at this point in the history
This PR adds support for ports disabled by Hubris
(see oxidecomputer/hubris#1441)

- Adds a wider `ExtendedStatus` type and message, which includes a bit for
  `DISABLED_BY_SP` (and 23 spare bits, just in case)
- Uses the `ExtendedStatus` message in `xcvradm`
- Adds `HwError::DisabledBySp` and `HostRequest::ClearDisableLatch`
  • Loading branch information
mkeeter authored Jun 28, 2023
1 parent b6f4990 commit 84e28d1
Show file tree
Hide file tree
Showing 8 changed files with 331 additions and 39 deletions.
4 changes: 2 additions & 2 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion controller/Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "transceiver-controller"
version = "0.1.0"
version = "0.1.1"
edition = "2021"
default-run = "xcvradm"

Expand Down
90 changes: 63 additions & 27 deletions controller/src/bin/xcvradm.rs
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ use std::time::Duration;
use tokio::sync::mpsc;
use transceiver_controller::Config;
use transceiver_controller::Controller;
use transceiver_controller::ExtendedStatusResult;
use transceiver_controller::FailedModules;
use transceiver_controller::IdentifierResult;
use transceiver_controller::LedStateResult;
Expand All @@ -32,7 +33,6 @@ use transceiver_controller::PowerModeResult;
use transceiver_controller::PowerState;
use transceiver_controller::ReadResult;
use transceiver_controller::SpRequest;
use transceiver_controller::StatusResult;
use transceiver_controller::VendorInfoResult;
use transceiver_decode::Aux1Monitor;
use transceiver_decode::Aux2Monitor;
Expand All @@ -41,6 +41,7 @@ use transceiver_decode::ReceiverPower;
use transceiver_decode::VendorInfo;
use transceiver_messages::filter_module_data;
use transceiver_messages::mac::MacAddrs;
use transceiver_messages::message::ExtendedStatus;
use transceiver_messages::message::LedState;
use transceiver_messages::message::Status;
use transceiver_messages::mgmt::cmis;
Expand Down Expand Up @@ -222,7 +223,10 @@ enum StatusKind {
// Print the usual Display representation of each set of status bits.
Normal,
// Print the truth value of a set of status bits, from all modules.
Limited { with: Status, without: Status },
Limited {
with: ExtendedStatus,
without: ExtendedStatus,
},
// Print all bits from all modules.
All,
}
Expand All @@ -231,14 +235,14 @@ enum StatusKind {
struct StatusFlags {
/// Find modules with the provided flags set.
#[arg(short, value_parser = parse_status)]
with: Option<Status>,
with: Option<ExtendedStatus>,
/// Find modules without the provided flags set.
#[arg(short, value_parser = parse_status)]
without: Option<Status>,
without: Option<ExtendedStatus>,
}

fn parse_status(s: &str) -> Result<Status, String> {
s.parse::<Status>().map_err(|e| e.to_string())
fn parse_status(s: &str) -> Result<ExtendedStatus, String> {
s.parse::<ExtendedStatus>().map_err(|e| e.to_string())
}

#[derive(Subcommand)]
Expand All @@ -255,9 +259,9 @@ enum Cmd {
/// shell interpreting that as a shell-pipeline, the bits should be
/// quoted -- for example: "PRESENT | RESET".
#[arg(long, value_parser = parse_status, conflicts_with = "all")]
with: Option<Status>,
with: Option<ExtendedStatus>,
#[arg(long, value_parser = parse_status, conflicts_with = "all")]
without: Option<Status>,
without: Option<ExtendedStatus>,

/// Print all bits from all modules.
///
Expand Down Expand Up @@ -471,6 +475,13 @@ enum Cmd {
/// power supply to be enabled again.
ClearPowerFault,

/// Clears the "disabled" latch on a set of modules
///
/// The SP may make a policy decision to disable modules (e.g. if they
/// aren't reporting temperatures to the thermal loop). Clearing the latch
/// allows them to be powered on again.
ClearDisableLatch,

/// Return the state of the addressed modules' attention LEDs.
Leds,

Expand Down Expand Up @@ -600,8 +611,8 @@ async fn main() -> anyhow::Result<()> {
(None, None, false) => StatusKind::Normal,
(None, None, true) => StatusKind::All,
(maybe_with, maybe_without, false) => {
let with = maybe_with.unwrap_or_else(Status::empty);
let without = maybe_without.unwrap_or_else(Status::empty);
let with = maybe_with.unwrap_or_else(ExtendedStatus::empty);
let without = maybe_without.unwrap_or_else(ExtendedStatus::empty);
if with.is_empty() && without.is_empty() {
eprintln!(
"If specified, one of `--with` and `--without` \
Expand All @@ -612,10 +623,25 @@ async fn main() -> anyhow::Result<()> {
}
_ => unreachable!("clap didn't do its job"),
};
let status_result = controller
.status(modules)
.await
.context("Failed to retrieve module status")?;
let status_result = match controller.extended_status(modules).await {
Ok(v) => v,
Err(err) => {
slog::warn!(
&log,
"could not read extended status ({err}); reading status instead",
);
let r = controller
.status(modules)
.await
.context("Failed to retrieve module status")?;
ExtendedStatusResult {
modules: r.modules,
data: r.data.into_iter().map(Into::into).collect(),
failures: r.failures,
}
}
};

print_module_status(&status_result, kind);
if !args.ignore_errors {
print_failures(&status_result.failures);
Expand Down Expand Up @@ -886,6 +912,15 @@ async fn main() -> anyhow::Result<()> {
print_failures(&ack_result.failures);
}
}
Cmd::ClearDisableLatch => {
let ack_result = controller
.clear_disable_latch(modules)
.await
.context("Failed to clear disable latch for modules")?;
if !args.ignore_errors {
print_failures(&ack_result.failures);
}
}
Cmd::Leds => {
let state_result = controller
.leds(modules)
Expand Down Expand Up @@ -1013,7 +1048,7 @@ fn print_power_mode(mode_result: &PowerModeResult) {
}
}

fn print_module_status(status_result: &StatusResult, kind: StatusKind) {
fn print_module_status(status_result: &ExtendedStatusResult, kind: StatusKind) {
match kind {
StatusKind::Normal => {
println!("Port Status");
Expand Down Expand Up @@ -1047,27 +1082,28 @@ fn print_module_status(status_result: &StatusResult, kind: StatusKind) {
// useful to see how we print the table header itself.
#[rustfmt::skip]
fn print_all_status_header() {
println!(" +--------------------------------- Port");
println!(" | +----------------------------- {}", Status::PRESENT);
println!(" | | +------------------------- {}", Status::ENABLED);
println!(" | | | +--------------------- {}", Status::RESET);
println!(" | | | | +----------------- {}", Status::LOW_POWER_MODE);
println!(" | | | | | +------------- {}", Status::INTERRUPT);
println!(" | | | | | | +--------- {}", Status::POWER_GOOD);
println!(" | | | | | | | +----- {}", Status::FAULT_POWER_TIMEOUT);
println!(" | | | | | | | | +- {}", Status::FAULT_POWER_LOST);
println!(" v v v v v v v v v");
println!(" +------------------------------------- Port");
println!(" | +--------------------------------- {}", ExtendedStatus::PRESENT);
println!(" | | +----------------------------- {}", ExtendedStatus::ENABLED);
println!(" | | | +------------------------- {}", ExtendedStatus::RESET);
println!(" | | | | +--------------------- {}", ExtendedStatus::LOW_POWER_MODE);
println!(" | | | | | +----------------- {}", ExtendedStatus::INTERRUPT);
println!(" | | | | | | +------------- {}", ExtendedStatus::POWER_GOOD);
println!(" | | | | | | | +--------- {}", ExtendedStatus::FAULT_POWER_TIMEOUT);
println!(" | | | | | | | | +----- {}", ExtendedStatus::FAULT_POWER_LOST);
println!(" | | | | | | | | | +- {}", ExtendedStatus::DISABLED_BY_SP);
println!(" v v v v v v v v v v");
}

fn print_all_status(status_result: &StatusResult) {
fn print_all_status(status_result: &ExtendedStatusResult) {
print_all_status_header();
for (port, status) in status_result
.modules
.to_indices()
.zip(status_result.status().iter())
{
print!("{port:>2} ");
for bit in Status::all().iter() {
for bit in ExtendedStatus::all().iter() {
let word = if status.contains(bit) { "1" } else { "0" };
print!("{word:<WIDTH$}");
}
Expand Down
43 changes: 43 additions & 0 deletions controller/src/controller.rs
Original file line number Diff line number Diff line change
Expand Up @@ -863,6 +863,14 @@ impl Controller {
.await
}

/// Clears the `disabled` flag for a set of transceiver modules
///
/// This allows them to be powered on again
pub async fn clear_disable_latch(&self, modules: ModuleId) -> Result<AckResult, Error> {
self.no_payload_request(HostRequest::ClearDisableLatch(modules))
.await
}

/// Assert physical lpmode pin for a set of transceiver modules. Note: The
/// effect this pin has on operation can change depending on if the software
/// override of power control is set.
Expand Down Expand Up @@ -950,6 +958,41 @@ impl Controller {
}
}

/// Report the status of a set of transceiver modules.
pub async fn extended_status(&self, modules: ModuleId) -> Result<ExtendedStatusResult, Error> {
let message = Message::from(HostRequest::ExtendedStatus(modules));
let request = HostRpcRequest {
header: self.next_header(MessageKind::HostRequest),
message,
data: None,
};
let response = self.rpc(request).await?;
match response.message.body {
MessageBody::SpResponse(
st @ SpResponse::ExtendedStatus {
modules,
failed_modules,
},
) => {
let data = response.data.expect("Existence of data checked earlier");
let (mut data, error_data) = data.split_at(st.expected_data_len().unwrap());
let mut status = vec![];
for _ in 0..modules.selected_transceiver_count() {
let (d, rest) = hubpack::deserialize(data).expect("data size checked earlier");
status.push(d);
data = rest;
}
let failures = Self::deserialize_hw_errors(failed_modules, error_data)?;
Ok(ExtendedStatusResult {
modules,
data: status,
failures,
})
}
other => Err(Error::UnexpectedMessage(other)),
}
}

/// Write the memory map of a set of transceiver modules.
///
/// `write` contains a description of which memory region to write to,
Expand Down
11 changes: 11 additions & 0 deletions controller/src/results.rs
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ use transceiver_decode::Monitors;
use transceiver_decode::PowerControl;
use transceiver_decode::VendorInfo;
use transceiver_messages::merge_module_data;
use transceiver_messages::message::ExtendedStatus;
use transceiver_messages::message::LedState;
use transceiver_messages::message::Status;
use transceiver_messages::remove_module_data;
Expand Down Expand Up @@ -111,6 +112,16 @@ impl StatusResult {
}
}

/// The result of accessing the extended status of a set of transceivers.
pub type ExtendedStatusResult = ModuleResult<ExtendedStatus>;

impl ExtendedStatusResult {
/// Return the status read from the modules.
pub fn status(&self) -> &[ExtendedStatus] {
&self.data
}
}

/// The result of reading the SFF-8024 identifiers of a set of transceivers.
pub type IdentifierResult = ModuleResult<Identifier>;

Expand Down
9 changes: 6 additions & 3 deletions dtrace/trace-packets.d
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@

#define MESSAGE_KIND_SP_RESPONSE 2
#define SP_RESPONSE_STATUS 2
#define SP_RESPONSE_EXTENDED_STATUS 6
#define SP_RESPONSE_ACK 3

xcvr-ctl$target:::packet-sent
Expand Down Expand Up @@ -85,14 +86,16 @@ xcvr-ctl$target:::packet-received
message_body_variant = buf[MESSAGE_BODY_VARIANT_OFFSET];
if (message_body_variant == MESSAGE_KIND_SP_RESPONSE) {
sp_response_variant = buf[SP_RESPONSE_VARIANT_OFFSET];
if ((sp_response_variant == SP_RESPONSE_STATUS) || (sp_response_variant == SP_RESPONSE_ACK))
{
if ((sp_response_variant == SP_RESPONSE_STATUS) ||
(sp_response_variant == SP_RESPONSE_EXTENDED_STATUS) ||
(sp_response_variant == SP_RESPONSE_ACK))
{
printf(" module IDs:\n");
tracemem(buf + MODULE_ID_OFFSET, 16);
n_octets = n_bytes - STATUS_DATA_OFFSET;
data = (buf + STATUS_DATA_OFFSET);
printf(" data (up to %d octets): ", n_octets);
tracemem(data, 64, n_octets);
tracemem(data, 128, n_octets);
}
}
}
Expand Down
2 changes: 1 addition & 1 deletion messages/Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "transceiver-messages"
version = "0.1.0"
version = "0.1.1"
edition = "2021"

[dependencies]
Expand Down
Loading

0 comments on commit 84e28d1

Please sign in to comment.