Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

share npu4 to npu2, 5, 6 #216

Merged
merged 1 commit into from
Aug 22, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
148 changes: 4 additions & 144 deletions src/driver/amdxdna/npu2_regs.c
Original file line number Diff line number Diff line change
Expand Up @@ -3,159 +3,19 @@
* Copyright (C) 2023-2024, Advanced Micro Devices, Inc.
*/

#include "drm_local/amdxdna_accel.h"
#include "aie2_pci.h"
#include "npu4_family.h"

/* NPU Public Registers on MpNPUAxiXbar (refer to Diag npu_registers.h) */
#define MPNPU_PUB_SEC_INTR 0x3010060
#define MPNPU_PUB_PWRMGMT_INTR 0x3010064
#define MPNPU_PUB_SCRATCH0 0x301006C
#define MPNPU_PUB_SCRATCH1 0x3010070
#define MPNPU_PUB_SCRATCH2 0x3010074
#define MPNPU_PUB_SCRATCH3 0x3010078
#define MPNPU_PUB_SCRATCH4 0x301007C
#define MPNPU_PUB_SCRATCH5 0x3010080
#define MPNPU_PUB_SCRATCH6 0x3010084
#define MPNPU_PUB_SCRATCH7 0x3010088
#define MPNPU_PUB_SCRATCH8 0x301008C
#define MPNPU_PUB_SCRATCH9 0x3010090
#define MPNPU_PUB_SCRATCH10 0x3010094
#define MPNPU_PUB_SCRATCH11 0x3010098
#define MPNPU_PUB_SCRATCH12 0x301009C
#define MPNPU_PUB_SCRATCH13 0x30100A0
#define MPNPU_PUB_SCRATCH14 0x30100A4
#define MPNPU_PUB_SCRATCH15 0x30100A8
#define MP0_C2PMSG_73 0x3810A24
#define MP0_C2PMSG_123 0x3810AEC

#define MP1_C2PMSG_0 0x3B10900
#define MP1_C2PMSG_60 0x3B109F0
#define MP1_C2PMSG_61 0x3B109F4

#define MPNPU_SRAM_X2I_MAILBOX_0 0x3600000
#define MPNPU_SRAM_X2I_MAILBOX_15 0x361E000
#define MPNPU_SRAM_X2I_MAILBOX_31 0x363E000
#define MPNPU_SRAM_I2X_MAILBOX_31 0x363F000

#define MMNPU_APERTURE0_BASE 0x3000000
#define MMNPU_APERTURE1_BASE 0x3600000
#define MMNPU_APERTURE3_BASE 0x3810000
#define MMNPU_APERTURE4_BASE 0x3B10000

/* PCIe BAR Index for NPU2 */
#define NPU2_REG_BAR_INDEX 0
#define NPU2_MBOX_BAR_INDEX 0
#define NPU2_PSP_BAR_INDEX 4
#define NPU2_SMU_BAR_INDEX 5
#define NPU2_SRAM_BAR_INDEX 2
/* Associated BARs and Apertures */
#define NPU2_REG_BAR_BASE MMNPU_APERTURE0_BASE
#define NPU2_MBOX_BAR_BASE MMNPU_APERTURE0_BASE
#define NPU2_PSP_BAR_BASE MMNPU_APERTURE3_BASE
#define NPU2_SMU_BAR_BASE MMNPU_APERTURE4_BASE
#define NPU2_SRAM_BAR_BASE MMNPU_APERTURE1_BASE

#define NPU2_RT_CFG_TYPE_CLK_GATING 1
#define NPU2_RT_CFG_TYPE_HCLK_GATING 2
#define NPU2_RT_CFG_TYPE_PWR_GATING 3
#define NPU2_RT_CFG_TYPE_L1IMU_GATING 4
#define NPU2_RT_CFG_TYPE_PDI_LOAD 5
#define NPU2_RT_CFG_TYPE_DEBUG_BO 10

#define NPU2_RT_CFG_VAL_CLK_GATING_OFF 0
#define NPU2_RT_CFG_VAL_CLK_GATING_ON 1

#define NPU2_RT_CFG_VAL_PDI_LOAD_MGMT 0
#define NPU2_RT_CFG_VAL_PDI_LOAD_APP 1

#define NPU2_RT_CFG_VAL_DEBUG_BO_DEFAULT 0
#define NPU2_RT_CFG_VAL_DEBUG_BO_LARGE 1

#define NPU2_MPNPUCLK_FREQ_MAX 1267
#define NPU2_HCLK_FREQ_MAX 1800

const struct dpm_clk npu2_dpm_clk_table[DPM_LEVEL_MAX] = {
{396, 792},
{600, 1056},
{792, 1152},
{975, 1267},
{975, 1267},
{1056, 1408},
{1152, 1584},
{1267, 1800}
};

const struct rt_config npu2_rt_cfg[] = {
{NPU2_RT_CFG_TYPE_PDI_LOAD, NPU2_RT_CFG_VAL_PDI_LOAD_APP},
{NPU2_RT_CFG_TYPE_DEBUG_BO, NPU2_RT_CFG_VAL_DEBUG_BO_LARGE},
};

const u32 npu2_clk_gating_types[] = {
NPU2_RT_CFG_TYPE_CLK_GATING,
NPU2_RT_CFG_TYPE_HCLK_GATING,
NPU2_RT_CFG_TYPE_PWR_GATING,
NPU2_RT_CFG_TYPE_L1IMU_GATING,
};
/* NPU2 is the prototype of NPU4. It will be obsoleted in near future. */

const struct amdxdna_dev_priv npu2_dev_priv = {
.fw_path = "amdnpu/17f0_00/npu.sbin",
.protocol_major = 0x6,
.protocol_minor = 0x6,
.rt_config = npu2_rt_cfg,
.num_rt_cfg = ARRAY_SIZE(npu2_rt_cfg),
.col_align = COL_ALIGN_NATURE,
.mbox_dev_addr = NPU2_MBOX_BAR_BASE,
.mbox_size = 0, /* Use BAR size */
.sram_dev_addr = NPU2_SRAM_BAR_BASE,
.sram_offs = {
DEFINE_BAR_OFFSET(MBOX_CHANN_OFF, NPU2_SRAM, MPNPU_SRAM_X2I_MAILBOX_0),
DEFINE_BAR_OFFSET(FW_ALIVE_OFF, NPU2_SRAM, MPNPU_SRAM_X2I_MAILBOX_15),
},
.psp_regs_off = {
DEFINE_BAR_OFFSET(PSP_CMD_REG, NPU2_PSP, MP0_C2PMSG_123),
DEFINE_BAR_OFFSET(PSP_ARG0_REG, NPU2_REG, MPNPU_PUB_SCRATCH3),
DEFINE_BAR_OFFSET(PSP_ARG1_REG, NPU2_REG, MPNPU_PUB_SCRATCH4),
DEFINE_BAR_OFFSET(PSP_ARG2_REG, NPU2_REG, MPNPU_PUB_SCRATCH9),
DEFINE_BAR_OFFSET(PSP_INTR_REG, NPU2_PSP, MP0_C2PMSG_73),
DEFINE_BAR_OFFSET(PSP_STATUS_REG, NPU2_PSP, MP0_C2PMSG_123),
DEFINE_BAR_OFFSET(PSP_RESP_REG, NPU2_REG, MPNPU_PUB_SCRATCH3),
},
.smu_regs_off = {
DEFINE_BAR_OFFSET(SMU_CMD_REG, NPU2_SMU, MP1_C2PMSG_0),
DEFINE_BAR_OFFSET(SMU_ARG_REG, NPU2_SMU, MP1_C2PMSG_60),
DEFINE_BAR_OFFSET(SMU_INTR_REG, NPU2_SMU, MMNPU_APERTURE4_BASE),
DEFINE_BAR_OFFSET(SMU_RESP_REG, NPU2_SMU, MP1_C2PMSG_61),
DEFINE_BAR_OFFSET(SMU_OUT_REG, NPU2_SMU, MP1_C2PMSG_60),
},
.clk_gating = {
.types = npu2_clk_gating_types,
.num_types = ARRAY_SIZE(npu2_clk_gating_types),
.value_enable = NPU2_RT_CFG_VAL_CLK_GATING_ON,
.value_disable = NPU2_RT_CFG_VAL_CLK_GATING_OFF,
},
.smu_mpnpuclk_freq_max = NPU2_MPNPUCLK_FREQ_MAX,
.smu_hclk_freq_max = NPU2_HCLK_FREQ_MAX,
.smu_dpm_max = 7,
.smu_rev = SMU_REVISION_V1,
.smu_npu_dpm_clk_table = npu2_dpm_clk_table,
.smu_npu_dpm_levels = ARRAY_SIZE(npu2_dpm_clk_table),
#ifdef AMDXDNA_DEVEL
.priv_load_cfg = {NPU2_RT_CFG_TYPE_PDI_LOAD, NPU2_RT_CFG_VAL_PDI_LOAD_MGMT},
#endif
NPU4_COMMON_DEV_PRIV,
};

const struct amdxdna_dev_info dev_npu2_info = {
.reg_bar = NPU2_REG_BAR_INDEX,
.mbox_bar = NPU2_MBOX_BAR_INDEX,
.sram_bar = NPU2_SRAM_BAR_INDEX,
.psp_bar = NPU2_PSP_BAR_INDEX,
.smu_bar = NPU2_SMU_BAR_INDEX,
.first_col = 0,
.dev_mem_buf_shift = 15, /* 32 KiB aligned */
.dev_mem_base = AIE2_DEVM_BASE,
.dev_mem_size = AIE2_DEVM_SIZE,
.vbnv = "RyzenAI-npu2",
.device_type = AMDXDNA_DEV_TYPE_KMQ,
.dev_priv = &npu2_dev_priv,
.ops = &aie2_ops, /* NPU2 can share NPU1's callback */
NPU4_COMMON_DEV_INFO,
};
141 changes: 141 additions & 0 deletions src/driver/amdxdna/npu4_family.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,141 @@
/* SPDX-License-Identifier: GPL-2.0 */
/*
* Copyright (C) 2024, Advanced Micro Devices, Inc.
*/

#ifndef _NPU4_FAMILY_H_
#define _NPU4_FAMILY_H_

#include "drm_local/amdxdna_accel.h"
#include "aie2_pci.h"

/* NPU Public Registers on MpNPUAxiXbar (refer to Diag npu_registers.h) */
#define MPNPU_PUB_SEC_INTR 0x3010060
#define MPNPU_PUB_PWRMGMT_INTR 0x3010064
#define MPNPU_PUB_SCRATCH0 0x301006C
#define MPNPU_PUB_SCRATCH1 0x3010070
#define MPNPU_PUB_SCRATCH2 0x3010074
#define MPNPU_PUB_SCRATCH3 0x3010078
#define MPNPU_PUB_SCRATCH4 0x301007C
#define MPNPU_PUB_SCRATCH5 0x3010080
#define MPNPU_PUB_SCRATCH6 0x3010084
#define MPNPU_PUB_SCRATCH7 0x3010088
#define MPNPU_PUB_SCRATCH8 0x301008C
#define MPNPU_PUB_SCRATCH9 0x3010090
#define MPNPU_PUB_SCRATCH10 0x3010094
#define MPNPU_PUB_SCRATCH11 0x3010098
#define MPNPU_PUB_SCRATCH12 0x301009C
#define MPNPU_PUB_SCRATCH13 0x30100A0
#define MPNPU_PUB_SCRATCH14 0x30100A4
#define MPNPU_PUB_SCRATCH15 0x30100A8
#define MP0_C2PMSG_73 0x3810A24
#define MP0_C2PMSG_123 0x3810AEC

#define MP1_C2PMSG_0 0x3B10900
#define MP1_C2PMSG_60 0x3B109F0
#define MP1_C2PMSG_61 0x3B109F4

#define MPNPU_SRAM_X2I_MAILBOX_0 0x3600000
#define MPNPU_SRAM_X2I_MAILBOX_15 0x361E000
#define MPNPU_SRAM_X2I_MAILBOX_31 0x363E000
#define MPNPU_SRAM_I2X_MAILBOX_31 0x363F000

#define MMNPU_APERTURE0_BASE 0x3000000
#define MMNPU_APERTURE1_BASE 0x3600000
#define MMNPU_APERTURE3_BASE 0x3810000
#define MMNPU_APERTURE4_BASE 0x3B10000

/* PCIe BAR Index for NPU4 */
#define NPU4_REG_BAR_INDEX 0
#define NPU4_MBOX_BAR_INDEX 0
#define NPU4_PSP_BAR_INDEX 4
#define NPU4_SMU_BAR_INDEX 5
#define NPU4_SRAM_BAR_INDEX 2
/* Associated BARs and Apertures */
#define NPU4_REG_BAR_BASE MMNPU_APERTURE0_BASE
#define NPU4_MBOX_BAR_BASE MMNPU_APERTURE0_BASE
#define NPU4_PSP_BAR_BASE MMNPU_APERTURE3_BASE
#define NPU4_SMU_BAR_BASE MMNPU_APERTURE4_BASE
#define NPU4_SRAM_BAR_BASE MMNPU_APERTURE1_BASE

#define NPU4_RT_CFG_TYPE_CLK_GATING 1
#define NPU4_RT_CFG_TYPE_HCLK_GATING 2
#define NPU4_RT_CFG_TYPE_PWR_GATING 3
#define NPU4_RT_CFG_TYPE_L1IMU_GATING 4
#define NPU4_RT_CFG_TYPE_PDI_LOAD 5
#define NPU4_RT_CFG_TYPE_DEBUG_BO 10

#define NPU4_RT_CFG_VAL_CLK_GATING_OFF 0
#define NPU4_RT_CFG_VAL_CLK_GATING_ON 1

#define NPU4_RT_CFG_VAL_PDI_LOAD_MGMT 0
#define NPU4_RT_CFG_VAL_PDI_LOAD_APP 1

#define NPU4_RT_CFG_VAL_DEBUG_BO_DEFAULT 0
#define NPU4_RT_CFG_VAL_DEBUG_BO_LARGE 1

#define NPU4_MPNPUCLK_FREQ_MAX 1267
#define NPU4_HCLK_FREQ_MAX 1800

#define NPU4_INIT_RT_CFG_NUM 2
#define NPU4_CLK_GATING_CFG_NUM 4

extern const struct dpm_clk npu4_dpm_clk_table[DPM_LEVEL_MAX];
extern const struct rt_config npu4_rt_cfg[NPU4_INIT_RT_CFG_NUM];
extern const u32 npu4_clk_gating_types[NPU4_CLK_GATING_CFG_NUM];

#define NPU4_COMMON_DEV_PRIV \
.rt_config = npu4_rt_cfg, \
.num_rt_cfg = ARRAY_SIZE(npu4_rt_cfg), \
.priv_load_cfg = {NPU4_RT_CFG_TYPE_PDI_LOAD, NPU4_RT_CFG_VAL_PDI_LOAD_MGMT}, \
.col_align = COL_ALIGN_NATURE, \
.mbox_dev_addr = NPU4_MBOX_BAR_BASE, \
.mbox_size = 0, /* Use BAR size */ \
.sram_dev_addr = NPU4_SRAM_BAR_BASE, \
.sram_offs = { \
DEFINE_BAR_OFFSET(MBOX_CHANN_OFF, NPU4_SRAM, MPNPU_SRAM_X2I_MAILBOX_0), \
DEFINE_BAR_OFFSET(FW_ALIVE_OFF, NPU4_SRAM, MPNPU_SRAM_X2I_MAILBOX_15), \
}, \
.psp_regs_off = { \
DEFINE_BAR_OFFSET(PSP_CMD_REG, NPU4_PSP, MP0_C2PMSG_123), \
DEFINE_BAR_OFFSET(PSP_ARG0_REG, NPU4_REG, MPNPU_PUB_SCRATCH3), \
DEFINE_BAR_OFFSET(PSP_ARG1_REG, NPU4_REG, MPNPU_PUB_SCRATCH4), \
DEFINE_BAR_OFFSET(PSP_ARG2_REG, NPU4_REG, MPNPU_PUB_SCRATCH9), \
DEFINE_BAR_OFFSET(PSP_INTR_REG, NPU4_PSP, MP0_C2PMSG_73), \
DEFINE_BAR_OFFSET(PSP_STATUS_REG, NPU4_PSP, MP0_C2PMSG_123), \
DEFINE_BAR_OFFSET(PSP_RESP_REG, NPU4_REG, MPNPU_PUB_SCRATCH3), \
}, \
.smu_regs_off = { \
DEFINE_BAR_OFFSET(SMU_CMD_REG, NPU4_SMU, MP1_C2PMSG_0), \
DEFINE_BAR_OFFSET(SMU_ARG_REG, NPU4_SMU, MP1_C2PMSG_60), \
DEFINE_BAR_OFFSET(SMU_INTR_REG, NPU4_SMU, MMNPU_APERTURE4_BASE), \
DEFINE_BAR_OFFSET(SMU_RESP_REG, NPU4_SMU, MP1_C2PMSG_61), \
DEFINE_BAR_OFFSET(SMU_OUT_REG, NPU4_SMU, MP1_C2PMSG_60), \
}, \
.clk_gating = { \
.types = npu4_clk_gating_types, \
.num_types = ARRAY_SIZE(npu4_clk_gating_types), \
.value_enable = NPU4_RT_CFG_VAL_CLK_GATING_ON, \
.value_disable = NPU4_RT_CFG_VAL_CLK_GATING_OFF, \
}, \
.smu_mpnpuclk_freq_max = NPU4_MPNPUCLK_FREQ_MAX, \
.smu_hclk_freq_max = NPU4_HCLK_FREQ_MAX, \
.smu_dpm_max = 7, \
.smu_rev = SMU_REVISION_V1, \
.smu_npu_dpm_clk_table = npu4_dpm_clk_table, \
.smu_npu_dpm_levels = ARRAY_SIZE(npu4_dpm_clk_table)

#define NPU4_COMMON_DEV_INFO \
.reg_bar = NPU4_REG_BAR_INDEX, \
.mbox_bar = NPU4_MBOX_BAR_INDEX, \
.sram_bar = NPU4_SRAM_BAR_INDEX, \
.psp_bar = NPU4_PSP_BAR_INDEX, \
.smu_bar = NPU4_SMU_BAR_INDEX, \
.first_col = 0, \
.dev_mem_buf_shift = 15, /* 32 KiB aligned */ \
.dev_mem_base = AIE2_DEVM_BASE, \
.dev_mem_size = AIE2_DEVM_SIZE, \
.device_type = AMDXDNA_DEV_TYPE_KMQ, \
.ops = &aie2_ops

#endif /* _NPU4_FAMILY_H_ */
Loading