Skip to content

Commit

Permalink
net/mlx5: Add command failures data to debugfs
Browse files Browse the repository at this point in the history
Add new counters to command interface debugfs to count command failures.
The following counters added:
total_failed - number of times command failed (any kind of failure).
failed_mbox_status - number of times command failed on bad status
returned by FW.

In addition, add data about last command failure to command interface
debugfs:
last_failed_errno - last command failed returned errno.
last_failed_mbox_status - last bad status returned by FW.

Signed-off-by: Moshe Shemesh <moshe@nvidia.com>
Reviewed-by: Leon Romanovsky <leonro@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
  • Loading branch information
mosheshemesh2 authored and Saeed Mahameed committed Mar 9, 2022
1 parent 371c2b3 commit 34f46ae
Show file tree
Hide file tree
Showing 3 changed files with 51 additions and 9 deletions.
44 changes: 35 additions & 9 deletions drivers/net/ethernet/mellanox/mlx5/core/cmd.c
Original file line number Diff line number Diff line change
Expand Up @@ -1877,16 +1877,38 @@ static int cmd_exec(struct mlx5_core_dev *dev, void *in, int in_size, void *out,
return err;
}

static void cmd_status_log(struct mlx5_core_dev *dev, u16 opcode, u8 status, int err)
{
struct mlx5_cmd_stats *stats;

if (!err)
return;

stats = &dev->cmd.stats[opcode];
spin_lock_irq(&stats->lock);
stats->failed++;
if (err < 0)
stats->last_failed_errno = -err;
if (err == -EREMOTEIO) {
stats->failed_mbox_status++;
stats->last_failed_mbox_status = status;
}
spin_unlock_irq(&stats->lock);
}

/* preserve -EREMOTEIO for outbox.status != OK, otherwise return err as is */
static int cmd_status_err(int err, void *out)
static int cmd_status_err(struct mlx5_core_dev *dev, int err, u16 opcode, void *out)
{
if (err) /* -EREMOTEIO is preserved */
return err == -EREMOTEIO ? -EIO : err;
u8 status = MLX5_GET(mbox_out, out, status);

if (MLX5_GET(mbox_out, out, status) != MLX5_CMD_STAT_OK)
return -EREMOTEIO;
if (err == -EREMOTEIO) /* -EREMOTEIO is preserved */
err = -EIO;

return 0;
if (!err && status != MLX5_CMD_STAT_OK)
err = -EREMOTEIO;

cmd_status_log(dev, opcode, status, err);
return err;
}

/**
Expand All @@ -1910,8 +1932,10 @@ static int cmd_status_err(int err, void *out)
int mlx5_cmd_do(struct mlx5_core_dev *dev, void *in, int in_size, void *out, int out_size)
{
int err = cmd_exec(dev, in, in_size, out, out_size, NULL, NULL, false);
u16 opcode = MLX5_GET(mbox_in, in, opcode);

return cmd_status_err(err, out);
err = cmd_status_err(dev, err, opcode, out);
return err;
}
EXPORT_SYMBOL(mlx5_cmd_do);

Expand Down Expand Up @@ -1954,8 +1978,9 @@ int mlx5_cmd_exec_polling(struct mlx5_core_dev *dev, void *in, int in_size,
void *out, int out_size)
{
int err = cmd_exec(dev, in, in_size, out, out_size, NULL, NULL, true);
u16 opcode = MLX5_GET(mbox_in, in, opcode);

err = cmd_status_err(err, out);
err = cmd_status_err(dev, err, opcode, out);
return mlx5_cmd_check(dev, err, in, out);
}
EXPORT_SYMBOL(mlx5_cmd_exec_polling);
Expand Down Expand Up @@ -1991,7 +2016,7 @@ static void mlx5_cmd_exec_cb_handler(int status, void *_work)
struct mlx5_async_ctx *ctx;

ctx = work->ctx;
status = cmd_status_err(status, work->out);
status = cmd_status_err(ctx->dev, status, work->opcode, work->out);
work->user_callback(status, work);
if (atomic_dec_and_test(&ctx->num_inflight))
wake_up(&ctx->wait);
Expand All @@ -2005,6 +2030,7 @@ int mlx5_cmd_exec_cb(struct mlx5_async_ctx *ctx, void *in, int in_size,

work->ctx = ctx;
work->user_callback = callback;
work->opcode = MLX5_GET(mbox_in, in, opcode);
work->out = out;
if (WARN_ON(!atomic_inc_not_zero(&ctx->num_inflight)))
return -EIO;
Expand Down
7 changes: 7 additions & 0 deletions drivers/net/ethernet/mellanox/mlx5/core/debugfs.c
Original file line number Diff line number Diff line change
Expand Up @@ -180,6 +180,13 @@ void mlx5_cmdif_debugfs_init(struct mlx5_core_dev *dev)
debugfs_create_file("average", 0400, stats->root, stats,
&stats_fops);
debugfs_create_u64("n", 0400, stats->root, &stats->n);
debugfs_create_u64("failed", 0400, stats->root, &stats->failed);
debugfs_create_u64("failed_mbox_status", 0400, stats->root,
&stats->failed_mbox_status);
debugfs_create_u32("last_failed_errno", 0400, stats->root,
&stats->last_failed_errno);
debugfs_create_u8("last_failed_mbox_status", 0400, stats->root,
&stats->last_failed_mbox_status);
}
}
}
Expand Down
9 changes: 9 additions & 0 deletions include/linux/mlx5/driver.h
Original file line number Diff line number Diff line change
Expand Up @@ -264,6 +264,14 @@ enum {
struct mlx5_cmd_stats {
u64 sum;
u64 n;
/* number of times command failed */
u64 failed;
/* number of times command failed on bad status returned by FW */
u64 failed_mbox_status;
/* last command failed returned errno */
u32 last_failed_errno;
/* last bad status returned by FW */
u8 last_failed_mbox_status;
struct dentry *root;
/* protect command average calculations */
spinlock_t lock;
Expand Down Expand Up @@ -955,6 +963,7 @@ typedef void (*mlx5_async_cbk_t)(int status, struct mlx5_async_work *context);
struct mlx5_async_work {
struct mlx5_async_ctx *ctx;
mlx5_async_cbk_t user_callback;
u16 opcode; /* cmd opcode */
void *out; /* pointer to the cmd output buffer */
};

Expand Down

0 comments on commit 34f46ae

Please sign in to comment.