Skip to content

Commit

Permalink
Merge branch 'mlxsw-Refactor-headroom-management'
Browse files Browse the repository at this point in the history
Ido Schimmel says:

====================
mlxsw: Refactor headroom management

Petr says:

On Spectrum, port buffers, also called port headroom, is where packets are
stored while they are parsed and the forwarding decision is being made. For
lossless traffic flows, in case shared buffer admission is not allowed,
headroom is also where to put the extra traffic received before the sent
PAUSE takes effect. Another aspect of the port headroom is the so called
internal buffer, which is used for egress mirroring.

Linux supports two DCB interfaces related to the headroom: dcbnl_setbuffer
for configuration, and dcbnl_getbuffer for inspection. In order to make it
possible to implement these interfaces, it is first necessary to clean up
headroom handling, which is currently strewn in several places in the
driver.

The end goal is an architecture whereby it is possible to take a copy of
the current configuration, adjust parameters, and then hand the proposed
configuration over to the system to implement it. When everything works,
the proposed configuration is accepted and saved. First, this centralizes
the reconfiguration handling to one function, which takes care of
coordinating buffer size changes and priority map changes to avoid
introducing drops. Second, the fact that the configuration is all in one
place makes it easy to keep a backup and handle error path rollbacks, which
were previously hard to understand.

Patch #1 introduces struct mlxsw_sp_hdroom, which will keep port headroom
configuration.

Patch #2 unifies handling of delay provision between PFC and PAUSE. From
now on, delay is to be measured in bytes of extra space, and will not
include MTU. PFC handler sets the delay directly from the parameter it gets
through the DCB interface. For PAUSE, MLXSW_SP_PAUSE_DELAY is converted to
have the same meaning.

In patches #3-#5, MTU, lossiness and priorities are gradually moved over to
struct mlxsw_sp_hdroom.

In patches #6-#11, handling of buffer resizing and priority maps is moved
from spectrum.c and spectrum_dcb.c to spectrum_buffers.c. The API is
gradually adapted so that struct mlxsw_sp_hdroom becomes the main interface
through which the various clients express how the headroom should be
configured.

Patch #12 is a small cleanup that the previous transformation made
possible.

In patch #13, the port init code becomes a boring client of the headroom
code, instead of rolling its own thing.

Patches #14 and #15 move handling of internal mirroring buffer to the new
headroom code as well. Previously, this code was in the SPAN module. This
patchset converts the SPAN module to another boring client of the headroom
code.
====================

Signed-off-by: David S. Miller <davem@davemloft.net>
  • Loading branch information
davem330 committed Sep 16, 2020
2 parents 045e42f + 22881ad commit 18e9a40
Show file tree
Hide file tree
Showing 7 changed files with 470 additions and 295 deletions.
136 changes: 18 additions & 118 deletions drivers/net/ethernet/mellanox/mlxsw/spectrum.c
Original file line number Diff line number Diff line change
Expand Up @@ -610,138 +610,33 @@ static int mlxsw_sp_port_set_mac_address(struct net_device *dev, void *p)
return 0;
}

static u16 mlxsw_sp_pg_buf_threshold_get(const struct mlxsw_sp *mlxsw_sp,
int mtu)
{
return 2 * mlxsw_sp_bytes_cells(mlxsw_sp, mtu);
}

#define MLXSW_SP_CELL_FACTOR 2 /* 2 * cell_size / (IPG + cell_size + 1) */

static u16 mlxsw_sp_pfc_delay_get(const struct mlxsw_sp *mlxsw_sp, int mtu,
u16 delay)
{
delay = mlxsw_sp_bytes_cells(mlxsw_sp, DIV_ROUND_UP(delay,
BITS_PER_BYTE));
return MLXSW_SP_CELL_FACTOR * delay + mlxsw_sp_bytes_cells(mlxsw_sp,
mtu);
}

/* Maximum delay buffer needed in case of PAUSE frames, in bytes.
* Assumes 100m cable and maximum MTU.
*/
#define MLXSW_SP_PAUSE_DELAY 58752

static u16 mlxsw_sp_pg_buf_delay_get(const struct mlxsw_sp *mlxsw_sp, int mtu,
u16 delay, bool pfc, bool pause)
{
if (pfc)
return mlxsw_sp_pfc_delay_get(mlxsw_sp, mtu, delay);
else if (pause)
return mlxsw_sp_bytes_cells(mlxsw_sp, MLXSW_SP_PAUSE_DELAY);
else
return 0;
}

static void mlxsw_sp_pg_buf_pack(char *pbmc_pl, int index, u16 size, u16 thres,
bool lossy)
static int mlxsw_sp_port_change_mtu(struct net_device *dev, int mtu)
{
if (lossy)
mlxsw_reg_pbmc_lossy_buffer_pack(pbmc_pl, index, size);
else
mlxsw_reg_pbmc_lossless_buffer_pack(pbmc_pl, index, size,
thres);
}
struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev);
struct mlxsw_sp_hdroom orig_hdroom;
struct mlxsw_sp_hdroom hdroom;
int err;

int __mlxsw_sp_port_headroom_set(struct mlxsw_sp_port *mlxsw_sp_port, int mtu,
u8 *prio_tc, bool pause_en,
struct ieee_pfc *my_pfc)
{
struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
u8 pfc_en = !!my_pfc ? my_pfc->pfc_en : 0;
u16 delay = !!my_pfc ? my_pfc->delay : 0;
char pbmc_pl[MLXSW_REG_PBMC_LEN];
u32 taken_headroom_cells = 0;
u32 max_headroom_cells;
int i, j, err;
orig_hdroom = *mlxsw_sp_port->hdroom;

max_headroom_cells = mlxsw_sp_sb_max_headroom_cells(mlxsw_sp);
hdroom = orig_hdroom;
hdroom.mtu = mtu;
mlxsw_sp_hdroom_bufs_reset_sizes(mlxsw_sp_port, &hdroom);

mlxsw_reg_pbmc_pack(pbmc_pl, mlxsw_sp_port->local_port, 0, 0);
err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(pbmc), pbmc_pl);
if (err)
err = mlxsw_sp_hdroom_configure(mlxsw_sp_port, &hdroom);
if (err) {
netdev_err(dev, "Failed to configure port's headroom\n");
return err;

for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) {
bool configure = false;
bool pfc = false;
u16 thres_cells;
u16 delay_cells;
u16 total_cells;
bool lossy;

for (j = 0; j < IEEE_8021QAZ_MAX_TCS; j++) {
if (prio_tc[j] == i) {
pfc = pfc_en & BIT(j);
configure = true;
break;
}
}

if (!configure)
continue;

lossy = !(pfc || pause_en);
thres_cells = mlxsw_sp_pg_buf_threshold_get(mlxsw_sp, mtu);
thres_cells = mlxsw_sp_port_headroom_8x_adjust(mlxsw_sp_port, thres_cells);
delay_cells = mlxsw_sp_pg_buf_delay_get(mlxsw_sp, mtu, delay,
pfc, pause_en);
delay_cells = mlxsw_sp_port_headroom_8x_adjust(mlxsw_sp_port, delay_cells);
total_cells = thres_cells + delay_cells;

taken_headroom_cells += total_cells;
if (taken_headroom_cells > max_headroom_cells)
return -ENOBUFS;

mlxsw_sp_pg_buf_pack(pbmc_pl, i, total_cells,
thres_cells, lossy);
}

return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(pbmc), pbmc_pl);
}

int mlxsw_sp_port_headroom_set(struct mlxsw_sp_port *mlxsw_sp_port,
int mtu, bool pause_en)
{
u8 def_prio_tc[IEEE_8021QAZ_MAX_TCS] = {0};
bool dcb_en = !!mlxsw_sp_port->dcb.ets;
struct ieee_pfc *my_pfc;
u8 *prio_tc;

prio_tc = dcb_en ? mlxsw_sp_port->dcb.ets->prio_tc : def_prio_tc;
my_pfc = dcb_en ? mlxsw_sp_port->dcb.pfc : NULL;

return __mlxsw_sp_port_headroom_set(mlxsw_sp_port, mtu, prio_tc,
pause_en, my_pfc);
}

static int mlxsw_sp_port_change_mtu(struct net_device *dev, int mtu)
{
struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev);
bool pause_en = mlxsw_sp_port_is_pause_en(mlxsw_sp_port);
int err;

err = mlxsw_sp_port_headroom_set(mlxsw_sp_port, mtu, pause_en);
if (err)
return err;
err = mlxsw_sp_port_mtu_set(mlxsw_sp_port, mtu);
if (err)
goto err_port_mtu_set;
dev->mtu = mtu;
return 0;

err_port_mtu_set:
mlxsw_sp_port_headroom_set(mlxsw_sp_port, dev->mtu, pause_en);
mlxsw_sp_hdroom_configure(mlxsw_sp_port, &orig_hdroom);
return err;
}

Expand Down Expand Up @@ -1709,6 +1604,7 @@ static int mlxsw_sp_port_create(struct mlxsw_sp *mlxsw_sp, u8 local_port,
mlxsw_sp_port_tc_mc_mode_set(mlxsw_sp_port, false);
err_port_tc_mc_mode:
err_port_ets_init:
mlxsw_sp_port_buffers_fini(mlxsw_sp_port);
err_port_buffers_init:
err_port_admin_status_set:
err_port_mtu_set:
Expand Down Expand Up @@ -1745,6 +1641,7 @@ static void mlxsw_sp_port_remove(struct mlxsw_sp *mlxsw_sp, u8 local_port)
mlxsw_sp_port_fids_fini(mlxsw_sp_port);
mlxsw_sp_port_dcb_fini(mlxsw_sp_port);
mlxsw_sp_port_tc_mc_mode_set(mlxsw_sp_port, false);
mlxsw_sp_port_buffers_fini(mlxsw_sp_port);
mlxsw_sp_port_swid_set(mlxsw_sp_port, MLXSW_PORT_SWID_DISABLED_PORT);
mlxsw_sp_port_module_unmap(mlxsw_sp_port);
free_percpu(mlxsw_sp_port->pcpu_stats);
Expand Down Expand Up @@ -2800,6 +2697,7 @@ static int mlxsw_sp1_init(struct mlxsw_core *mlxsw_core,
mlxsw_sp->mac_mask = mlxsw_sp1_mac_mask;
mlxsw_sp->rif_ops_arr = mlxsw_sp1_rif_ops_arr;
mlxsw_sp->sb_vals = &mlxsw_sp1_sb_vals;
mlxsw_sp->sb_ops = &mlxsw_sp1_sb_ops;
mlxsw_sp->port_type_speed_ops = &mlxsw_sp1_port_type_speed_ops;
mlxsw_sp->ptp_ops = &mlxsw_sp1_ptp_ops;
mlxsw_sp->span_ops = &mlxsw_sp1_span_ops;
Expand Down Expand Up @@ -2828,6 +2726,7 @@ static int mlxsw_sp2_init(struct mlxsw_core *mlxsw_core,
mlxsw_sp->mac_mask = mlxsw_sp2_mac_mask;
mlxsw_sp->rif_ops_arr = mlxsw_sp2_rif_ops_arr;
mlxsw_sp->sb_vals = &mlxsw_sp2_sb_vals;
mlxsw_sp->sb_ops = &mlxsw_sp2_sb_ops;
mlxsw_sp->port_type_speed_ops = &mlxsw_sp2_port_type_speed_ops;
mlxsw_sp->ptp_ops = &mlxsw_sp2_ptp_ops;
mlxsw_sp->span_ops = &mlxsw_sp2_span_ops;
Expand All @@ -2854,6 +2753,7 @@ static int mlxsw_sp3_init(struct mlxsw_core *mlxsw_core,
mlxsw_sp->mac_mask = mlxsw_sp2_mac_mask;
mlxsw_sp->rif_ops_arr = mlxsw_sp2_rif_ops_arr;
mlxsw_sp->sb_vals = &mlxsw_sp2_sb_vals;
mlxsw_sp->sb_ops = &mlxsw_sp3_sb_ops;
mlxsw_sp->port_type_speed_ops = &mlxsw_sp2_port_type_speed_ops;
mlxsw_sp->ptp_ops = &mlxsw_sp2_ptp_ops;
mlxsw_sp->span_ops = &mlxsw_sp3_span_ops;
Expand Down
71 changes: 54 additions & 17 deletions drivers/net/ethernet/mellanox/mlxsw/spectrum.h
Original file line number Diff line number Diff line change
Expand Up @@ -125,6 +125,7 @@ struct mlxsw_sp_mr_tcam_ops;
struct mlxsw_sp_acl_rulei_ops;
struct mlxsw_sp_acl_tcam_ops;
struct mlxsw_sp_nve_ops;
struct mlxsw_sp_sb_ops;
struct mlxsw_sp_sb_vals;
struct mlxsw_sp_port_type_speed_ops;
struct mlxsw_sp_ptp_state;
Expand Down Expand Up @@ -171,6 +172,7 @@ struct mlxsw_sp {
const struct mlxsw_sp_nve_ops **nve_ops_arr;
const struct mlxsw_sp_rif_ops **rif_ops_arr;
const struct mlxsw_sp_sb_vals *sb_vals;
const struct mlxsw_sp_sb_ops *sb_ops;
const struct mlxsw_sp_port_type_speed_ops *port_type_speed_ops;
const struct mlxsw_sp_ptp_ops *ptp_ops;
const struct mlxsw_sp_span_ops *span_ops;
Expand Down Expand Up @@ -316,6 +318,7 @@ struct mlxsw_sp_port {
u8 split_base_local_port;
int max_mtu;
u32 max_speed;
struct mlxsw_sp_hdroom *hdroom;
};

struct mlxsw_sp_port_type_speed_ops {
Expand Down Expand Up @@ -412,34 +415,62 @@ mlxsw_sp_port_vlan_find_by_vid(const struct mlxsw_sp_port *mlxsw_sp_port,
return NULL;
}

static inline u32
mlxsw_sp_port_headroom_8x_adjust(const struct mlxsw_sp_port *mlxsw_sp_port,
u32 size_cells)
{
/* Ports with eight lanes use two headroom buffers between which the
* configured headroom size is split. Therefore, multiply the calculated
* headroom size by two.
*/
return mlxsw_sp_port->mapping.width == 8 ? 2 * size_cells : size_cells;
}

enum mlxsw_sp_flood_type {
MLXSW_SP_FLOOD_TYPE_UC,
MLXSW_SP_FLOOD_TYPE_BC,
MLXSW_SP_FLOOD_TYPE_MC,
};

int mlxsw_sp_port_headroom_set(struct mlxsw_sp_port *mlxsw_sp_port,
int mtu, bool pause_en);
int mlxsw_sp_port_get_stats_raw(struct net_device *dev, int grp,
int prio, char *ppcnt_pl);
int mlxsw_sp_port_admin_status_set(struct mlxsw_sp_port *mlxsw_sp_port,
bool is_up);

/* spectrum_buffers.c */
struct mlxsw_sp_hdroom_prio {
/* Number of port buffer associated with this priority. This is the
* actually configured value.
*/
u8 buf_idx;
/* Value of buf_idx deduced from the DCB ETS configuration. */
u8 ets_buf_idx;
bool lossy;
};

struct mlxsw_sp_hdroom_buf {
u32 thres_cells;
u32 size_cells;
bool lossy;
};

#define MLXSW_SP_PB_COUNT 10

struct mlxsw_sp_hdroom {
struct {
struct mlxsw_sp_hdroom_prio prio[IEEE_8021Q_MAX_PRIORITIES];
} prios;
struct {
struct mlxsw_sp_hdroom_buf buf[MLXSW_SP_PB_COUNT];
} bufs;
struct {
/* Size actually configured for the internal buffer. Equal to
* reserve when internal buffer is enabled.
*/
u32 size_cells;
/* Space reserved in the headroom for the internal buffer. Port
* buffers are not allowed to grow into this space.
*/
u32 reserve_cells;
bool enable;
} int_buf;
int delay_bytes;
int mtu;
};

int mlxsw_sp_buffers_init(struct mlxsw_sp *mlxsw_sp);
void mlxsw_sp_buffers_fini(struct mlxsw_sp *mlxsw_sp);
int mlxsw_sp_port_buffers_init(struct mlxsw_sp_port *mlxsw_sp_port);
void mlxsw_sp_port_buffers_fini(struct mlxsw_sp_port *mlxsw_sp_port);
int mlxsw_sp_sb_pool_get(struct mlxsw_core *mlxsw_core,
unsigned int sb_index, u16 pool_index,
struct devlink_sb_pool_info *pool_info);
Expand Down Expand Up @@ -475,11 +506,20 @@ int mlxsw_sp_sb_occ_tc_port_bind_get(struct mlxsw_core_port *mlxsw_core_port,
u32 *p_cur, u32 *p_max);
u32 mlxsw_sp_cells_bytes(const struct mlxsw_sp *mlxsw_sp, u32 cells);
u32 mlxsw_sp_bytes_cells(const struct mlxsw_sp *mlxsw_sp, u32 bytes);
u32 mlxsw_sp_sb_max_headroom_cells(const struct mlxsw_sp *mlxsw_sp);
void mlxsw_sp_hdroom_prios_reset_buf_idx(struct mlxsw_sp_hdroom *hdroom);
void mlxsw_sp_hdroom_bufs_reset_lossiness(struct mlxsw_sp_hdroom *hdroom);
void mlxsw_sp_hdroom_bufs_reset_sizes(struct mlxsw_sp_port *mlxsw_sp_port,
struct mlxsw_sp_hdroom *hdroom);
int mlxsw_sp_hdroom_configure(struct mlxsw_sp_port *mlxsw_sp_port,
const struct mlxsw_sp_hdroom *hdroom);

extern const struct mlxsw_sp_sb_vals mlxsw_sp1_sb_vals;
extern const struct mlxsw_sp_sb_vals mlxsw_sp2_sb_vals;

extern const struct mlxsw_sp_sb_ops mlxsw_sp1_sb_ops;
extern const struct mlxsw_sp_sb_ops mlxsw_sp2_sb_ops;
extern const struct mlxsw_sp_sb_ops mlxsw_sp3_sb_ops;

/* spectrum_switchdev.c */
int mlxsw_sp_switchdev_init(struct mlxsw_sp *mlxsw_sp);
void mlxsw_sp_switchdev_fini(struct mlxsw_sp *mlxsw_sp);
Expand Down Expand Up @@ -517,9 +557,6 @@ int mlxsw_sp_port_ets_set(struct mlxsw_sp_port *mlxsw_sp_port,
bool dwrr, u8 dwrr_weight);
int mlxsw_sp_port_prio_tc_set(struct mlxsw_sp_port *mlxsw_sp_port,
u8 switch_prio, u8 tclass);
int __mlxsw_sp_port_headroom_set(struct mlxsw_sp_port *mlxsw_sp_port, int mtu,
u8 *prio_tc, bool pause_en,
struct ieee_pfc *my_pfc);
int mlxsw_sp_port_ets_maxrate_set(struct mlxsw_sp_port *mlxsw_sp_port,
enum mlxsw_reg_qeec_hr hr, u8 index,
u8 next_index, u32 maxrate, u8 burst_size);
Expand Down
Loading

0 comments on commit 18e9a40

Please sign in to comment.