Skip to content

Commit

Permalink
Merge branch 'mlxsw-new-reset-flow'
Browse files Browse the repository at this point in the history
Petr Machata says:

====================
mlxsw: Add support for new reset flow

Ido Schimmel writes:

This patchset changes mlxsw to issue a PCI reset during probe and
devlink reload so that the PCI firmware could be upgraded without a
reboot.

Unlike the old version of this patchset [1], in this version the driver
no longer tries to issue a PCI reset by triggering a PCI link toggle on
its own, but instead calls the PCI core to issue the reset.

The PCI APIs require the device lock to be held which is why patches

Patches #7 adds reset method quirk for NVIDIA Spectrum devices.

Patch #8 adds a debug level print in PCI core so that device ready delay
will be printed even if it is shorter than one second.

Patches #9-#11 are straightforward preparations in mlxsw.

Patch #12 finally implements the new reset flow in mlxsw.

Patch #13 adds PCI reset handlers in mlxsw to avoid user space from
resetting the device from underneath an unaware driver. Instead, the
driver is gracefully de-initialized before the PCI reset and then
initialized again after it.

Patch #14 adds a PCI reset selftest to make sure this code path does not
regress.

[1] https://lore.kernel.org/netdev/cover.1679502371.git.petrm@nvidia.com/
====================

Signed-off-by: David S. Miller <davem@davemloft.net>
  • Loading branch information
davem330 committed Nov 18, 2023
2 parents 4dce97b + af51d6b commit 72a813a
Show file tree
Hide file tree
Showing 14 changed files with 247 additions and 30 deletions.
4 changes: 2 additions & 2 deletions Documentation/netlink/specs/devlink.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -1484,8 +1484,8 @@ operations:
dont-validate: [ strict ]
flags: [ admin-perm ]
do:
pre: devlink-nl-pre-doit
post: devlink-nl-post-doit
pre: devlink-nl-pre-doit-dev-lock
post: devlink-nl-post-doit-dev-lock
request:
attributes:
- bus-name
Expand Down
90 changes: 83 additions & 7 deletions drivers/net/ethernet/mellanox/mlxsw/pci.c
Original file line number Diff line number Diff line change
Expand Up @@ -130,6 +130,7 @@ struct mlxsw_pci {
const struct pci_device_id *id;
enum mlxsw_pci_cqe_v max_cqe_ver; /* Maximal supported CQE version */
u8 num_sdq_cqs; /* Number of CQs used for SDQs */
bool skip_reset;
};

static void mlxsw_pci_queue_tasklet_schedule(struct mlxsw_pci_queue *q)
Expand Down Expand Up @@ -1476,11 +1477,47 @@ static int mlxsw_pci_sys_ready_wait(struct mlxsw_pci *mlxsw_pci,
return -EBUSY;
}

static int mlxsw_pci_sw_reset(struct mlxsw_pci *mlxsw_pci,
const struct pci_device_id *id)
static int mlxsw_pci_reset_at_pci_disable(struct mlxsw_pci *mlxsw_pci)
{
struct pci_dev *pdev = mlxsw_pci->pdev;
char mrsr_pl[MLXSW_REG_MRSR_LEN];
int err;

mlxsw_reg_mrsr_pack(mrsr_pl,
MLXSW_REG_MRSR_COMMAND_RESET_AT_PCI_DISABLE);
err = mlxsw_reg_write(mlxsw_pci->core, MLXSW_REG(mrsr), mrsr_pl);
if (err)
return err;

device_lock_assert(&pdev->dev);

pci_cfg_access_lock(pdev);
pci_save_state(pdev);

err = __pci_reset_function_locked(pdev);
if (err)
pci_err(pdev, "PCI function reset failed with %d\n", err);

pci_restore_state(pdev);
pci_cfg_access_unlock(pdev);

return err;
}

static int mlxsw_pci_reset_sw(struct mlxsw_pci *mlxsw_pci)
{
char mrsr_pl[MLXSW_REG_MRSR_LEN];

mlxsw_reg_mrsr_pack(mrsr_pl, MLXSW_REG_MRSR_COMMAND_SOFTWARE_RESET);
return mlxsw_reg_write(mlxsw_pci->core, MLXSW_REG(mrsr), mrsr_pl);
}

static int
mlxsw_pci_reset(struct mlxsw_pci *mlxsw_pci, const struct pci_device_id *id)
{
struct pci_dev *pdev = mlxsw_pci->pdev;
char mcam_pl[MLXSW_REG_MCAM_LEN];
bool pci_reset_supported;
u32 sys_status;
int err;

Expand All @@ -1491,11 +1528,27 @@ static int mlxsw_pci_sw_reset(struct mlxsw_pci *mlxsw_pci,
return err;
}

mlxsw_reg_mrsr_pack(mrsr_pl);
err = mlxsw_reg_write(mlxsw_pci->core, MLXSW_REG(mrsr), mrsr_pl);
/* PCI core already issued a PCI reset, do not issue another reset. */
if (mlxsw_pci->skip_reset)
return 0;

mlxsw_reg_mcam_pack(mcam_pl,
MLXSW_REG_MCAM_FEATURE_GROUP_ENHANCED_FEATURES);
err = mlxsw_reg_query(mlxsw_pci->core, MLXSW_REG(mcam), mcam_pl);
if (err)
return err;

mlxsw_reg_mcam_unpack(mcam_pl, MLXSW_REG_MCAM_PCI_RESET,
&pci_reset_supported);

if (pci_reset_supported) {
pci_dbg(pdev, "Starting PCI reset flow\n");
err = mlxsw_pci_reset_at_pci_disable(mlxsw_pci);
} else {
pci_dbg(pdev, "Starting software reset flow\n");
err = mlxsw_pci_reset_sw(mlxsw_pci);
}

err = mlxsw_pci_sys_ready_wait(mlxsw_pci, id, &sys_status);
if (err) {
dev_err(&pdev->dev, "Failed to reach system ready status after reset. Status is 0x%x\n",
Expand Down Expand Up @@ -1537,9 +1590,9 @@ static int mlxsw_pci_init(void *bus_priv, struct mlxsw_core *mlxsw_core,
if (!mbox)
return -ENOMEM;

err = mlxsw_pci_sw_reset(mlxsw_pci, mlxsw_pci->id);
err = mlxsw_pci_reset(mlxsw_pci, mlxsw_pci->id);
if (err)
goto err_sw_reset;
goto err_reset;

err = mlxsw_pci_alloc_irq_vectors(mlxsw_pci);
if (err < 0) {
Expand Down Expand Up @@ -1672,7 +1725,7 @@ static int mlxsw_pci_init(void *bus_priv, struct mlxsw_core *mlxsw_core,
err_query_fw:
mlxsw_pci_free_irq_vectors(mlxsw_pci);
err_alloc_irq:
err_sw_reset:
err_reset:
mbox_put:
mlxsw_cmd_mbox_free(mbox);
return err;
Expand Down Expand Up @@ -2059,11 +2112,34 @@ static void mlxsw_pci_remove(struct pci_dev *pdev)
kfree(mlxsw_pci);
}

static void mlxsw_pci_reset_prepare(struct pci_dev *pdev)
{
struct mlxsw_pci *mlxsw_pci = pci_get_drvdata(pdev);

mlxsw_core_bus_device_unregister(mlxsw_pci->core, false);
}

static void mlxsw_pci_reset_done(struct pci_dev *pdev)
{
struct mlxsw_pci *mlxsw_pci = pci_get_drvdata(pdev);

mlxsw_pci->skip_reset = true;
mlxsw_core_bus_device_register(&mlxsw_pci->bus_info, &mlxsw_pci_bus,
mlxsw_pci, false, NULL, NULL);
mlxsw_pci->skip_reset = false;
}

static const struct pci_error_handlers mlxsw_pci_err_handler = {
.reset_prepare = mlxsw_pci_reset_prepare,
.reset_done = mlxsw_pci_reset_done,
};

int mlxsw_pci_driver_register(struct pci_driver *pci_driver)
{
pci_driver->probe = mlxsw_pci_probe;
pci_driver->remove = mlxsw_pci_remove;
pci_driver->shutdown = mlxsw_pci_remove;
pci_driver->err_handler = &mlxsw_pci_err_handler;
return pci_register_driver(pci_driver);
}
EXPORT_SYMBOL(mlxsw_pci_driver_register);
Expand Down
16 changes: 14 additions & 2 deletions drivers/net/ethernet/mellanox/mlxsw/reg.h
Original file line number Diff line number Diff line change
Expand Up @@ -10122,6 +10122,15 @@ mlxsw_reg_mgir_unpack(char *payload, u32 *hw_rev, char *fw_info_psid,

MLXSW_REG_DEFINE(mrsr, MLXSW_REG_MRSR_ID, MLXSW_REG_MRSR_LEN);

enum mlxsw_reg_mrsr_command {
/* Switch soft reset, does not reset PCI firmware. */
MLXSW_REG_MRSR_COMMAND_SOFTWARE_RESET = 1,
/* Reset will be done when PCI link will be disabled.
* This command will reset PCI firmware also.
*/
MLXSW_REG_MRSR_COMMAND_RESET_AT_PCI_DISABLE = 6,
};

/* reg_mrsr_command
* Reset/shutdown command
* 0 - do nothing
Expand All @@ -10130,10 +10139,11 @@ MLXSW_REG_DEFINE(mrsr, MLXSW_REG_MRSR_ID, MLXSW_REG_MRSR_LEN);
*/
MLXSW_ITEM32(reg, mrsr, command, 0x00, 0, 4);

static inline void mlxsw_reg_mrsr_pack(char *payload)
static inline void mlxsw_reg_mrsr_pack(char *payload,
enum mlxsw_reg_mrsr_command command)
{
MLXSW_REG_ZERO(mrsr, payload);
mlxsw_reg_mrsr_command_set(payload, 1);
mlxsw_reg_mrsr_command_set(payload, command);
}

/* MLCR - Management LED Control Register
Expand Down Expand Up @@ -10584,6 +10594,8 @@ MLXSW_ITEM32(reg, mcam, feature_group, 0x00, 16, 8);
enum mlxsw_reg_mcam_mng_feature_cap_mask_bits {
/* If set, MCIA supports 128 bytes payloads. Otherwise, 48 bytes. */
MLXSW_REG_MCAM_MCIA_128B = 34,
/* If set, MRSR.command=6 is supported. */
MLXSW_REG_MCAM_PCI_RESET = 48,
};

#define MLXSW_REG_BYTES_PER_DWORD 0x4
Expand Down
3 changes: 3 additions & 0 deletions drivers/pci/pci.c
Original file line number Diff line number Diff line change
Expand Up @@ -1219,6 +1219,9 @@ static int pci_dev_wait(struct pci_dev *dev, char *reset_type, int timeout)
if (delay > PCI_RESET_WAIT)
pci_info(dev, "ready %dms after %s\n", delay - 1,
reset_type);
else
pci_dbg(dev, "ready %dms after %s\n", delay - 1,
reset_type);

return 0;
}
Expand Down
13 changes: 13 additions & 0 deletions drivers/pci/quirks.c
Original file line number Diff line number Diff line change
Expand Up @@ -3786,6 +3786,19 @@ static void quirk_no_pm_reset(struct pci_dev *dev)
DECLARE_PCI_FIXUP_CLASS_HEADER(PCI_VENDOR_ID_ATI, PCI_ANY_ID,
PCI_CLASS_DISPLAY_VGA, 8, quirk_no_pm_reset);

/*
* Spectrum-{1,2,3,4} devices report that a D3hot->D0 transition causes a reset
* (i.e., they advertise NoSoftRst-). However, this transition does not have
* any effect on the device: It continues to be operational and network ports
* remain up. Advertising this support makes it seem as if a PM reset is viable
* for these devices. Mark it as unavailable to skip it when testing reset
* methods.
*/
DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_MELLANOX, 0xcb84, quirk_no_pm_reset);
DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_MELLANOX, 0xcf6c, quirk_no_pm_reset);
DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_MELLANOX, 0xcf70, quirk_no_pm_reset);
DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_MELLANOX, 0xcf80, quirk_no_pm_reset);

/*
* Thunderbolt controllers with broken MSI hotplug signaling:
* Entire 1st generation (Light Ridge, Eagle Ridge, Light Peak) and part
Expand Down
4 changes: 2 additions & 2 deletions net/devlink/core.c
Original file line number Diff line number Diff line change
Expand Up @@ -503,14 +503,14 @@ static void __net_exit devlink_pernet_pre_exit(struct net *net)
* all devlink instances from this namespace into init_net.
*/
devlinks_xa_for_each_registered_get(net, index, devlink) {
devl_lock(devlink);
devl_dev_lock(devlink, true);
err = 0;
if (devl_is_registered(devlink))
err = devlink_reload(devlink, &init_net,
DEVLINK_RELOAD_ACTION_DRIVER_REINIT,
DEVLINK_RELOAD_LIMIT_UNSPEC,
&actions_performed, NULL);
devl_unlock(devlink);
devl_dev_unlock(devlink, true);
devlink_put(devlink);
if (err && err != -EOPNOTSUPP)
pr_warn("Failed to reload devlink instance into init_net\n");
Expand Down
8 changes: 8 additions & 0 deletions net/devlink/dev.c
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
* Copyright (c) 2016 Jiri Pirko <jiri@mellanox.com>
*/

#include <linux/device.h>
#include <net/genetlink.h>
#include <net/sock.h>
#include "devl_internal.h"
Expand Down Expand Up @@ -433,6 +434,13 @@ int devlink_reload(struct devlink *devlink, struct net *dest_net,
struct net *curr_net;
int err;

/* Make sure the reload operations are invoked with the device lock
* held to allow drivers to trigger functionality that expects it
* (e.g., PCI reset) and to close possible races between these
* operations and probe/remove.
*/
device_lock_assert(devlink->dev);

memcpy(remote_reload_stats, devlink->stats.remote_reload_stats,
sizeof(remote_reload_stats));

Expand Down
21 changes: 17 additions & 4 deletions net/devlink/devl_internal.h
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
* Copyright (c) 2016 Jiri Pirko <jiri@mellanox.com>
*/

#include <linux/device.h>
#include <linux/etherdevice.h>
#include <linux/mutex.h>
#include <linux/netdevice.h>
Expand Down Expand Up @@ -96,6 +97,20 @@ static inline bool devl_is_registered(struct devlink *devlink)
return xa_get_mark(&devlinks, devlink->index, DEVLINK_REGISTERED);
}

static inline void devl_dev_lock(struct devlink *devlink, bool dev_lock)
{
if (dev_lock)
device_lock(devlink->dev);
devl_lock(devlink);
}

static inline void devl_dev_unlock(struct devlink *devlink, bool dev_lock)
{
devl_unlock(devlink);
if (dev_lock)
device_unlock(devlink->dev);
}

typedef void devlink_rel_notify_cb_t(struct devlink *devlink, u32 obj_index);
typedef void devlink_rel_cleanup_cb_t(struct devlink *devlink, u32 obj_index,
u32 rel_index);
Expand All @@ -111,9 +126,6 @@ int devlink_rel_devlink_handle_put(struct sk_buff *msg, struct devlink *devlink,
bool *msg_updated);

/* Netlink */
#define DEVLINK_NL_FLAG_NEED_PORT BIT(0)
#define DEVLINK_NL_FLAG_NEED_DEVLINK_OR_PORT BIT(1)

enum devlink_multicast_groups {
DEVLINK_MCGRP_CONFIG,
};
Expand All @@ -140,7 +152,8 @@ typedef int devlink_nl_dump_one_func_t(struct sk_buff *msg,
int flags);

struct devlink *
devlink_get_from_attrs_lock(struct net *net, struct nlattr **attrs);
devlink_get_from_attrs_lock(struct net *net, struct nlattr **attrs,
bool dev_lock);

int devlink_nl_dumpit(struct sk_buff *msg, struct netlink_callback *cb,
devlink_nl_dump_one_func_t *dump_one);
Expand Down
3 changes: 2 additions & 1 deletion net/devlink/health.c
Original file line number Diff line number Diff line change
Expand Up @@ -1151,7 +1151,8 @@ devlink_health_reporter_get_from_cb_lock(struct netlink_callback *cb)
struct nlattr **attrs = info->attrs;
struct devlink *devlink;

devlink = devlink_get_from_attrs_lock(sock_net(cb->skb->sk), attrs);
devlink = devlink_get_from_attrs_lock(sock_net(cb->skb->sk), attrs,
false);
if (IS_ERR(devlink))
return NULL;

Expand Down
Loading

0 comments on commit 72a813a

Please sign in to comment.