Skip to content

Commit

Permalink
Add support for DPU driven DPU scope HA attributes
Browse files Browse the repository at this point in the history
  • Loading branch information
mukeshmv committed Jun 21, 2024
1 parent f4c0735 commit 5d7a167
Show file tree
Hide file tree
Showing 7 changed files with 232 additions and 9 deletions.
13 changes: 13 additions & 0 deletions dash-pipeline/SAI/specs/dash_eni.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -585,6 +585,19 @@ sai_apis:
valid_only: null
is_vlan: false
deprecated: false
- !!python/object:utils.sai_spec.sai_attribute.SaiAttribute
name: SAI_ENI_ATTR_IS_HA_FLOW_OWNER
description: Action parameter is HA flow owner
type: bool
attr_value_field: booldata
default: 'false'
isresourcetype: false
flags: CREATE_AND_SET
object_name: null
allow_null: false
valid_only: null
is_vlan: false
deprecated: false
stats:
- !!python/object:utils.sai_spec.sai_attribute.SaiAttribute
name: SAI_ENI_STAT_RX_BYTES
Expand Down
78 changes: 78 additions & 0 deletions dash-pipeline/SAI/specs/dash_ha.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -127,6 +127,19 @@ sai_apis:
valid_only: null
is_vlan: false
deprecated: false
- !!python/object:utils.sai_spec.sai_attribute.SaiAttribute
name: SAI_HA_SET_ATTR_SWITCHOVER_NETWORK_CONVERGENCE_TIME_MS
description: Action parameter switchover network convergence time ms
type: sai_uint32_t
attr_value_field: u32
default: '0'
isresourcetype: false
flags: CREATE_AND_SET
object_name: null
allow_null: false
valid_only: null
is_vlan: false
deprecated: false
stats:
- !!python/object:utils.sai_spec.sai_attribute.SaiAttribute
name: SAI_HA_SET_STAT_DP_PROBE_REQ_RX_BYTES
Expand Down Expand Up @@ -469,6 +482,71 @@ sai_apis:
valid_only: null
is_vlan: false
deprecated: false
- !!python/object:utils.sai_spec.sai_attribute.SaiAttribute
name: SAI_HA_SCOPE_ATTR_VIP_V4
description: Action parameter VIP v4
type: sai_ip_address_t
attr_value_field: ipaddr
default: 0.0.0.0
isresourcetype: false
flags: CREATE_AND_SET
object_name: null
allow_null: false
valid_only: null
is_vlan: false
deprecated: false
- !!python/object:utils.sai_spec.sai_attribute.SaiAttribute
name: SAI_HA_SCOPE_ATTR_VIP_V6
description: Action parameter VIP v6
type: sai_ip_address_t
attr_value_field: ipaddr
default: 0.0.0.0
isresourcetype: false
flags: CREATE_AND_SET
object_name: null
allow_null: false
valid_only: null
is_vlan: false
deprecated: false
- !!python/object:utils.sai_spec.sai_attribute.SaiAttribute
name: SAI_HA_SCOPE_ATTR_ADMIN_STATE
description: Action parameter admin state
type: bool
attr_value_field: booldata
default: 'false'
isresourcetype: false
flags: CREATE_AND_SET
object_name: null
allow_null: false
valid_only: null
is_vlan: false
deprecated: false
- !!python/object:utils.sai_spec.sai_attribute.SaiAttribute
name: SAI_HA_SCOPE_ATTR_ACTIVATE_ROLE
description: Action parameter activate role
type: bool
attr_value_field: booldata
default: 'false'
isresourcetype: false
flags: CREATE_AND_SET
object_name: null
allow_null: false
valid_only: null
is_vlan: false
deprecated: false
- !!python/object:utils.sai_spec.sai_attribute.SaiAttribute
name: SAI_HA_SCOPE_ATTR_DASH_HA_STATE
description: Action parameter DASH HA state
type: sai_dash_ha_state_t
attr_value_field: s32
default: null
isresourcetype: false
flags: READ_ONLY
object_name: null
allow_null: false
valid_only: null
is_vlan: false
deprecated: false
stats: []
p4_meta: !!python/object:utils.sai_spec.sai_api_p4_meta.SaiApiP4Meta
tables:
Expand Down
56 changes: 56 additions & 0 deletions dash-pipeline/SAI/specs/sai_spec.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -176,6 +176,62 @@ enums:
name: NAT_PORT
description: ''
value: '16'
- !!python/object:utils.sai_spec.sai_enum.SaiEnum
name: sai_dash_ha_state_t
description: ''
members:
- !!python/object:utils.sai_spec.sai_enum_member.SaiEnumMember
name: DEAD
description: ''
value: '0'
- !!python/object:utils.sai_spec.sai_enum_member.SaiEnumMember
name: CONNECTING
description: ''
value: '1'
- !!python/object:utils.sai_spec.sai_enum_member.SaiEnumMember
name: CONNECTED
description: ''
value: '2'
- !!python/object:utils.sai_spec.sai_enum_member.SaiEnumMember
name: INITIALIZING_TO_ACTIVE
description: ''
value: '3'
- !!python/object:utils.sai_spec.sai_enum_member.SaiEnumMember
name: INITIALIZING_TO_STANDBY
description: ''
value: '4'
- !!python/object:utils.sai_spec.sai_enum_member.SaiEnumMember
name: PENDING_STANDALONE_ACTIVATION
description: ''
value: '5'
- !!python/object:utils.sai_spec.sai_enum_member.SaiEnumMember
name: PENDING_ACTIVE_ACTIVATION
description: ''
value: '6'
- !!python/object:utils.sai_spec.sai_enum_member.SaiEnumMember
name: PENDING_STANDBY_ACTIVATION
description: ''
value: '7'
- !!python/object:utils.sai_spec.sai_enum_member.SaiEnumMember
name: STANDALONE
description: ''
value: '8'
- !!python/object:utils.sai_spec.sai_enum_member.SaiEnumMember
name: ACTIVE
description: ''
value: '9'
- !!python/object:utils.sai_spec.sai_enum_member.SaiEnumMember
name: STANDBY
description: ''
value: '10'
- !!python/object:utils.sai_spec.sai_enum_member.SaiEnumMember
name: DESTROYING
description: ''
value: '11'
- !!python/object:utils.sai_spec.sai_enum_member.SaiEnumMember
name: SWITCHING_TO_STANDALONE
description: ''
value: '12'
port_extenstion: !!python/object:utils.sai_spec.sai_api_extension.SaiApiExtension
attributes: []
stats:
Expand Down
31 changes: 30 additions & 1 deletion dash-pipeline/bmv2/dash_metadata.p4
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,35 @@ enum bit<8> dash_ha_role_t {
SWITCHING_TO_ACTIVE = 4
};

// HA states
enum bit<8> dash_ha_state_t {
DEAD = 0,
// trying to connect to HA pair
CONNECTING = 1,
// bulk sync in progress
CONNECTED = 2,
// connection successful, bulk sync in progress
INITIALIZING_TO_ACTIVE = 3,
// connection successful, bulk sync in progress
INITIALIZING_TO_STANDBY = 4,
// ready to be in STANDALONE state, waiting for activation of admin role
PENDING_STANDALONE_ACTIVATION = 5,
// ready to be in ACTIVE state, waiting for activation of admin role
PENDING_ACTIVE_ACTIVATION = 6,
// ready to be in STANDBY state, waiting for activation of admin role
PENDING_STANDBY_ACTIVATION = 7,
// activation done, fowarding traffic
STANDALONE = 8,
// activation done, fowarding traffic and syncing flows with HA pair
ACTIVE = 9,
// activation done, ready to fowarding traffic if pair fails
STANDBY = 10,
// going down for planned shutdown
DESTROYING = 11,
// gracefully transitioning from paired state to stand-alone
SWITCHING_TO_STANDALONE = 12
};

// Flow sync state
enum bit<8> dash_ha_flow_sync_state_t {
FLOW_MISS = 0, // Flow not created yet
Expand Down Expand Up @@ -190,7 +219,7 @@ struct metadata_t {

// Actions
bit<32> routing_actions;

// Action data
bool dropped;
// encap_data is for underlay
Expand Down
7 changes: 4 additions & 3 deletions dash-pipeline/bmv2/dash_pipeline.p4
Original file line number Diff line number Diff line change
Expand Up @@ -112,7 +112,8 @@ control dash_ingress(
bit<1> disable_fast_path_icmp_flow_redirection,
bit<1> full_flow_resimulation_requested,
bit<64> max_resimulated_flow_per_second,
@SaiVal[type="sai_object_id_t"] bit<16> routing_group_id)
@SaiVal[type="sai_object_id_t"] bit<16> routing_group_id,
bit<1> is_ha_flow_owner)
{
meta.eni_data.cps = cps;
meta.eni_data.pps = pps;
Expand Down Expand Up @@ -335,7 +336,7 @@ control dash_ingress(
if (meta.eni_data.admin_state == 0) {
deny();
}

UPDATE_ENI_COUNTER(eni_rx);
if (meta.is_fast_path_icmp_flow_redirection_packet) {
UPDATE_ENI_COUNTER(eni_lb_fast_path_icmp_in);
Expand Down Expand Up @@ -372,7 +373,7 @@ control dash_ingress(
#endif // TARGET_BMV2_V1MODEL
#ifdef TARGET_DPDK_PNA
, istd
#endif // TARGET_DPDK_PNA
#endif // TARGET_DPDK_PNA
);

if (meta.eni_data.dscp_mode == dash_tunnel_dscp_mode_t.PIPE_MODEL) {
Expand Down
14 changes: 10 additions & 4 deletions dash-pipeline/bmv2/stages/ha.p4
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,12 @@ control ha_stage(inout headers_t hdr,
@SaiVal[type="sai_dash_ha_role_t"] dash_ha_role_t dash_ha_role,
@SaiVal[isreadonly="true"] bit<32> flow_version,
bit<1> flow_reconcile_requested,
@SaiVal[isreadonly="true"] bit<1> flow_reconcile_needed
@SaiVal[isreadonly="true"] bit<1> flow_reconcile_needed,
@SaiVal[type="sai_ip_address_t"] IPv4Address vip_v4,
IPv6Address vip_v6,
bit<1> admin_state,
bit<1> activate_role,
@SaiVal[isreadonly="true", type="sai_dash_ha_state_t"] dash_ha_state_t dash_ha_state
) {
meta.ha.ha_set_id = ha_set_id;
meta.ha.ha_role = dash_ha_role;
Expand Down Expand Up @@ -65,11 +70,12 @@ control ha_stage(inout headers_t hdr,
bit<16> dp_channel_max_src_port,
bit<32> dp_channel_probe_interval_ms,
bit<32> dp_channel_probe_fail_threshold,
@SaiVal[isreadonly="true"] bit<1> dp_channel_is_alive
@SaiVal[isreadonly="true"] bit<1> dp_channel_is_alive,
bit<32> switchover_network_convergence_time_ms
) {
meta.ha.peer_ip_is_v6 = peer_ip_is_v6;
meta.ha.peer_ip = peer_ip;

meta.ha.dp_channel_dst_port = dp_channel_dst_port;
meta.ha.dp_channel_src_port_min = dp_channel_min_src_port;
meta.ha.dp_channel_src_port_max = dp_channel_max_src_port;
Expand Down Expand Up @@ -97,7 +103,7 @@ control ha_stage(inout headers_t hdr,
return;
}
ha_set.apply();

// TODO: HA state machine handling.
}
}
Expand Down
42 changes: 41 additions & 1 deletion documentation/high-avail/ha-api-hld.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
| 0.4 | 04/01/2024 | Riff Jiang | Added capabilities for HA owner, simplified capabilities for HA topology. |
| 0.5 | 04/08/2024 | Riff Jiang | Added support for bulk sync. |
| 0.6 | 04/09/2024 | Riff Jiang | Added support for flow reconcile for planned and unplanned switchover. |
| 0.7 | 05/20/2024 | Mukesh MV | Added DPU scope DPU driven attributes. |

1. [1. Terminology](#1-terminology)
2. [2. Background](#2-background)
Expand Down Expand Up @@ -60,6 +61,8 @@

The DASH high availability APIs are a set of APIs to support flow HA feature for DASH. It follows the [SmartSwitch high availability design](https://github.com/sonic-net/SONiC/blob/master/doc/smart-switch/high-availability/smart-switch-ha-hld.md) and used to ensure the flow created on the active DPU can be correctly synchronized to the peered DPU.

It also supports the DPU-Scope-DPU-Driven setup in which the DPU internally owns the HA state machine.

For how the network topology is setup and how flow HA works, such as lifetime management, inline sync, bulk sync, and packet format, please refer to the [SmartSwitch high availability design](https://github.com/sonic-net/SONiC/blob/master/doc/smart-switch/high-availability/smart-switch-ha-hld.md). In this doc, we will only focus on the design from SAI API perspective.

## 3. Overview
Expand All @@ -70,7 +73,7 @@ To support the [SmartSwitch HA workflows](https://github.com/sonic-net/SONiC/blo
- **HA scope**: It controls the failover scope, such as HA role, such as active, standby or standalone, and expected flow version for new flows. Depends on the HA role of the ENI, the packet will be processed differently to get the flow synched.
- **Flow table**: It is the container of all flow entries. It can be attached to all ENIs in a DPU or being attached to a single DPU, depends on at which level we like to provide the flow HA, i.e. HA scope.
- **Flow**: It is used to represent a network connection, which contains match conditions and packet transformations. In HA, each flow will have it own HA-related states, such flow version, flow sync state and etc.
- **ENI**: In ENI-level HA, each ENI will be connected to a HA scope.
- **ENI**: In ENI-level HA, each ENI will be connected to a different HA scope. In DPU scope HA, all ENIs or a group of ENIs will be associated with a HA scope.

The components is designed to be conceptually simple and reusable, hence we can use these components to support different HA setup. For example, to support the current ENI-level HA design, these components can be put together as below:

Expand Down Expand Up @@ -105,6 +108,7 @@ HA set is defined as a SAI object and contains the following SAI attributes:
| SAI_HA_SET_ATTR_DP_CHANNEL_PROBE_INTERVAL_MS | `sai_uint32_t` | The interval of the data plane channel probe. |
| SAI_HA_SET_ATTR_DP_CHANNEL_PROBE_FAIL_THRESHOLD | `sai_uint32_t` | The threshold of the data plane channel probe fail. |
| SAI_HA_SET_ATTR_DP_CHANNEL_IS_ALIVE | `bool` | (Read-only) Is data plane channel alive. |
| SAI_HA_SET_ATTR_SWITCHOVER_NETWORK_CONVERGENCE_TIME_MS | sai_uint32_t | Time to wait for the network to switchover during planned shutdown, used in the case of DPU driven state machine. |

### 4.2. HA Scope

Expand All @@ -117,6 +121,11 @@ HA scope is also defined as a SAI object and contains the following SAI attribut
| SAI_HA_SCOPE_ATTR_FLOW_VERSION | `sai_uint32_t` | The flow version for new flows. |
| SAI_HA_SCOPE_ATTR_FLOW_RECONCILE_REQUESTED | `bool` | When set to true, flow reconcile will be initiated. |
| SAI_HA_SCOPE_ATTR_FLOW_RECONCILE_NEEDED | `bool` | (Read-only) If true, flow reconcile is needed. |
| SAI_HA_SCOPE_ATTR_VIP_V4 | `sai_ip_address_t` | Dedicated IPv4 VIP for DPU HA scope. |
| SAI_HA_SCOPE_ATTR_VIP_V6 | `sai_ip_address_t` | Dedicated IPv6 VIP for DPU HA scope. |
| SAI_HA_SCOPE_ATTR_ADMIN_STATE | `bool` | Start or stop the DPU driven HA state machine. |
| SAI_HA_SCOPE_ATTR_HA_STATE | `sai_dash_ha_state_t` | Read-only state in case of DPU driven state machine. |
| SAI_HA_SCOPE_ATTR_ACTIVATE_ROLE | `bool` | Trigger DPU driven HA state machine to transition to steady state and enable BFD towards NPUs in order to start receiving traffic destined to VIP. |

The HA role is defined as below:

Expand All @@ -131,6 +140,27 @@ typedef enum _sai_dash_ha_role_t
} sai_dash_ha_role_t;
```

The read-only HA state for DPU driven HA state machine is defined as below:

```c
typedef enum _sai_dash_ha_state_t
{
SAI_DASH_HA_STATE_DEAD,
SAI_DASH_HA_STATE_CONNECTING,
SAI_DASH_HA_STATE_CONNECTED,
SAI_DASH_HA_STATE_INITIALIZING_TO_ACTIVE,
SAI_DASH_HA_STATE_INITIALIZING_TO_STANDBY,
SAI_DASH_HA_STATE_PENDING_STANDALONE_ACTIVATION,
SAI_DASH_HA_STATE_PENDING_ACTIVE_ACTIVATION,
SAI_DASH_HA_STATE_PENDING_STANDBY_ACTIVATION,
SAI_DASH_HA_STATE_STANDALONE,
SAI_DASH_HA_STATE_ACTIVE,
SAI_DASH_HA_STATE_STANDBY,
SAI_DASH_HA_STATE_DESTROYING,
SAI_DASH_HA_STATE_SWITCHING_TO_STANDALONE,
} sai_dash_ha_state_t;
```

### 4.3. Flow table

HA uses the DASH flow table to achieve the flow state manipulation. Since the flow table already provides the CRUD operations, we don't need any extra APIs from flow table.
Expand Down Expand Up @@ -192,6 +222,7 @@ To provide the ENI-level HA control, each ENI will have the following SAI attrib
| Attribute name | Type | Description |
| -------------- | ---- | ----------- |
| SAI_ENI_ATTR_HA_SCOPE_ID | `sai_object_id_t` | The HA scope ID of the ENI. |
| SAI_ENI_ATTR_IS_HA_FLOW_OWNER | `bool` | Determines which DPU in the pair creates flows belonging to this ENI in steady-state. Typically this is set to True for the ENIs on the Active DPU and False for the Standby DPU. |

### 4.6. Event notifications

Expand Down Expand Up @@ -283,6 +314,9 @@ typedef struct _sai_ha_scope_event_data_t
/** Flow version */
sai_uint32_t flow_version;
/** HA role */
sai_dash_ha_state_t ha_state;
} sai_ha_scope_event_data_t;
/**
Expand Down Expand Up @@ -675,3 +709,9 @@ sequenceDiagram
Note over S0N,S1N: hamgrd continue to drive HA<br>state machine and update<br>nexthop on all switches.
```

### 6.2. DPU scope DPU driven HA

In this mode, the DPU owns the HA state machine and drives it based on inputs from the SAI API. The workflows are defined in the DPU scope DPU driven document. Since the hamgrd / swss act as pass-through in this mode, all the triggers from the SDN controllers shown in the DPU scope DPU driven document translate to SAI API calls on the DPU.

The swss/hamgrd listens to the state change event notification and starts BFD based on that.

0 comments on commit 5d7a167

Please sign in to comment.