diff --git a/src/driver/amdxdna/aie2_message.c b/src/driver/amdxdna/aie2_message.c index fd83674..0efc68d 100644 --- a/src/driver/amdxdna/aie2_message.c +++ b/src/driver/amdxdna/aie2_message.c @@ -249,6 +249,7 @@ int aie2_create_context(struct amdxdna_dev_hdl *ndev, struct amdxdna_hwctx *hwct { DECLARE_AIE2_MSG(create_ctx, MSG_OP_CREATE_CONTEXT); struct amdxdna_dev *xdna = ndev->xdna; + enum xdna_mailbox_channel_type type; struct xdna_mailbox_chann_res x2i; struct xdna_mailbox_chann_res i2x; struct cq_pair *cq_pair; @@ -287,8 +288,12 @@ int aie2_create_context(struct amdxdna_dev_hdl *ndev, struct amdxdna_hwctx *hwct } intr_reg = i2x.mb_head_ptr_reg + 4; + if (aie2_pm_is_turbo(ndev)) + type = MB_CHANNEL_USER_POLL; + else + type = MB_CHANNEL_USER_NORMAL; hwctx->priv->mbox_chann = xdna_mailbox_create_channel(ndev->mbox, &x2i, &i2x, - intr_reg, ret); + intr_reg, ret, type); if (!hwctx->priv->mbox_chann) { XDNA_ERR(xdna, "not able to create channel"); ret = -EINVAL; diff --git a/src/driver/amdxdna/aie2_msg_priv.h b/src/driver/amdxdna/aie2_msg_priv.h index 663a608..2d18ef6 100644 --- a/src/driver/amdxdna/aie2_msg_priv.h +++ b/src/driver/amdxdna/aie2_msg_priv.h @@ -186,7 +186,6 @@ struct exec_dpu_req { u32 inst_prop_cnt; u32 cu_idx; u32 payload[35]; - } __packed; struct exec_dpu_preempt_req { diff --git a/src/driver/amdxdna/aie2_pci.c b/src/driver/amdxdna/aie2_pci.c index ae5c85e..3f95844 100644 --- a/src/driver/amdxdna/aie2_pci.c +++ b/src/driver/amdxdna/aie2_pci.c @@ -428,7 +428,7 @@ static int aie2_hw_start(struct amdxdna_dev *xdna) &ndev->mgmt_x2i, &ndev->mgmt_i2x, xdna_mailbox_intr_reg, - mgmt_mb_irq); + mgmt_mb_irq, MB_CHANNEL_MGMT); if (!ndev->mgmt_chann) { XDNA_ERR(xdna, "failed to create management mailbox channel"); ret = -EINVAL; @@ -576,6 +576,7 @@ static int aie2_init(struct amdxdna_dev *xdna) aie2_smu_setup(ndev); ndev->pw_mode = POWER_MODE_DEFAULT; + ndev->clk_gate_enabled = true; ret = aie2_hw_start(xdna); if (ret) { XDNA_ERR(xdna, "start npu failed, ret %d", ret); @@ -986,9 +987,8 @@ static int aie2_set_power_mode(struct amdxdna_client *client, struct amdxdna_drm return -EFAULT; } - /* Interpret the given buf->power_mode into the correct power mode*/ power_mode = power_state.power_mode; - if (power_mode > POWER_MODE_HIGH) { + if (power_mode > POWER_MODE_TURBO) { XDNA_ERR(xdna, "Invalid power mode %d", power_mode); return -EINVAL; } diff --git a/src/driver/amdxdna/aie2_pci.h b/src/driver/amdxdna/aie2_pci.h index 6ee8055..f0bd4a5 100644 --- a/src/driver/amdxdna/aie2_pci.h +++ b/src/driver/amdxdna/aie2_pci.h @@ -223,6 +223,7 @@ struct amdxdna_dev_hdl { struct aie_metadata metadata; struct smu smu; enum amdxdna_power_mode_type pw_mode; + bool clk_gate_enabled; /* Mailbox and the management channel */ struct mailbox *mbox; @@ -368,6 +369,7 @@ void aie2_stop_ctx_by_col_map(struct amdxdna_client *client, u32 col_map); /* aie2_pm.c */ int aie2_pm_start(struct amdxdna_dev_hdl *ndev); void aie2_pm_stop(struct amdxdna_dev_hdl *ndev); +bool aie2_pm_is_turbo(struct amdxdna_dev_hdl *ndev); int aie2_pm_set_mode(struct amdxdna_dev_hdl *ndev, enum amdxdna_power_mode_type target); #endif /* _AIE2_PCI_H_ */ diff --git a/src/driver/amdxdna/aie2_pm.c b/src/driver/amdxdna/aie2_pm.c index 64468c7..0a71e5b 100644 --- a/src/driver/amdxdna/aie2_pm.c +++ b/src/driver/amdxdna/aie2_pm.c @@ -5,13 +5,19 @@ #include "aie2_pci.h" -static int aie2_pm_clock_gating(struct amdxdna_dev_hdl *ndev, bool enable) +static int aie2_pm_clock_gating(struct amdxdna_dev_hdl *ndev, + enum amdxdna_power_mode_type target) { const struct rt_config_clk_gating *config; + bool enable; u32 value; int ret; int i; + enable = (target != POWER_MODE_TURBO && target != POWER_MODE_HIGH); + if (enable == ndev->clk_gate_enabled) + return 0; + config = &ndev->priv->clk_gating; if (enable) value = config->value_enable; @@ -30,9 +36,40 @@ static int aie2_pm_clock_gating(struct amdxdna_dev_hdl *ndev, bool enable) } } + if (!ret) + ndev->clk_gate_enabled = enable; + return ret; } +bool aie2_pm_is_turbo(struct amdxdna_dev_hdl *ndev) +{ + return ndev->pw_mode == POWER_MODE_TURBO; +} + +static int aie2_pm_check_turbo(struct amdxdna_dev_hdl *ndev, + enum amdxdna_power_mode_type prev, + enum amdxdna_power_mode_type next) +{ + struct amdxdna_dev *xdna = ndev->xdna; + struct amdxdna_client *client; + + if (prev != POWER_MODE_TURBO && next != POWER_MODE_TURBO) + return 0; + + drm_WARN_ON(&xdna->ddev, !mutex_is_locked(&xdna->dev_lock)); + list_for_each_entry(client, &xdna->client_list, node) { + bool empty; + + mutex_lock(&client->hwctx_lock); + empty = idr_is_empty(&client->hwctx_idr); + mutex_unlock(&client->hwctx_lock); + if (!empty) + return -EBUSY; + } + return 0; +} + int aie2_pm_set_mode(struct amdxdna_dev_hdl *ndev, enum amdxdna_power_mode_type target) { struct amdxdna_dev *xdna = ndev->xdna; @@ -44,23 +81,29 @@ int aie2_pm_set_mode(struct amdxdna_dev_hdl *ndev, enum amdxdna_power_mode_type if (target == POWER_MODE_LOW || target == POWER_MODE_MEDIUM) return -EOPNOTSUPP; - XDNA_DBG(xdna, "Changing power mode from %d to %d", ndev->pw_mode, target); - /* Set resource solver power property to the user choice */ + ret = aie2_pm_check_turbo(ndev, ndev->pw_mode, target); + if (ret) { + XDNA_WARN(xdna, "Change Turbo mode failed"); + return ret; + } - /* Set power level within the device */ + XDNA_DBG(xdna, "Changing power mode from %d to %d", ndev->pw_mode, target); - /* - * Other mode -> POWER_MODE_HIGH: Turn off clock gating - * POWER_MODE_HIGH -> Other mode: Turn on clock gating - * Otherwise, no change + /* TODO: + *switch (ndev->pw_mode) { + *case POWER_MODE_LOW: + * Set to low DPM level + *case POWER_MODE_MEDIUM: + * Set to medium DPM level + *case POWER_MODE_HIGH: + *case POWER_MODE_TURBO: + * Set to highest DPM level + *default: + * Let driver decides DPM level + *} */ - if (target == POWER_MODE_HIGH) { - XDNA_DBG(xdna, "Clock gating turning off"); - ret = aie2_pm_clock_gating(ndev, false); - } else if (ndev->pw_mode == POWER_MODE_HIGH) { - XDNA_DBG(xdna, "Clock gating turning on"); - ret = aie2_pm_clock_gating(ndev, true); - } + + ret = aie2_pm_clock_gating(ndev, target); if (ret) { XDNA_ERR(xdna, "Failed to config clock gating"); return ret; @@ -73,21 +116,10 @@ int aie2_pm_set_mode(struct amdxdna_dev_hdl *ndev, enum amdxdna_power_mode_type int aie2_pm_start(struct amdxdna_dev_hdl *ndev) { - /* - * TODO: should only skip POWER_MODE_DEFAULT. - * Let's make it right after full DPM support is ready - */ - if (ndev->pw_mode != POWER_MODE_HIGH) - return 0; - - return aie2_pm_clock_gating(ndev, false); + return aie2_pm_clock_gating(ndev, ndev->pw_mode); } void aie2_pm_stop(struct amdxdna_dev_hdl *ndev) { - if (ndev->pw_mode != POWER_MODE_HIGH) - return; - - /* Clock gating must be turned ON before suspend firmware */ - aie2_pm_clock_gating(ndev, true); + aie2_pm_clock_gating(ndev, POWER_MODE_DEFAULT); } diff --git a/src/driver/amdxdna/amdxdna_mailbox.c b/src/driver/amdxdna/amdxdna_mailbox.c index f8cd8cc..409ab26 100644 --- a/src/driver/amdxdna/amdxdna_mailbox.c +++ b/src/driver/amdxdna/amdxdna_mailbox.c @@ -52,8 +52,11 @@ #ifdef AMDXDNA_DEVEL int mailbox_polling; -module_param(mailbox_polling, int, 0644); -MODULE_PARM_DESC(mailbox_polling, "0:interrupt(default); >0:poll interval in ms; <0: busy poll"); +module_param(mailbox_polling, int, 0444); +MODULE_PARM_DESC(mailbox_polling, "<=0:interrupt(default); >0:poll interval in ms; <0: busy poll"); +#define MB_DEFAULT_NO_POLL (mailbox_polling <= 0) +#define MB_PERIODIC_POLL (mailbox_polling > 0) +#define MB_FORCE_USER_POLL (mailbox_polling < 0) #define MB_TIMER_JIFF msecs_to_jiffies(mailbox_polling) #endif @@ -70,39 +73,39 @@ struct mailbox { /* protect channel list */ struct mutex mbox_lock; struct list_head chann_list; -#ifdef AMDXDNA_DEVEL + struct list_head poll_chann_list; struct task_struct *polld; struct wait_queue_head poll_wait; bool sent_msg; /* For polld */ -#endif - #if defined(CONFIG_DEBUG_FS) struct list_head res_records; #endif /* CONFIG_DEBUG_FS */ - }; #if defined(CONFIG_DEBUG_FS) struct mailbox_res_record { + enum xdna_mailbox_channel_type type; struct list_head re_entry; struct xdna_mailbox_chann_res re_x2i; struct xdna_mailbox_chann_res re_i2x; int re_irq; + int active; }; #endif /* CONFIG_DEBUG_FS */ struct mailbox_channel { struct mailbox *mb; #if defined(CONFIG_DEBUG_FS) - struct list_head chann_entry; struct mailbox_res_record *record; #endif + struct list_head chann_entry; struct xdna_mailbox_chann_res res[CHAN_RES_NUM]; int msix_irq; + u32 x2i_tail; u32 iohub_int_addr; + enum xdna_mailbox_channel_type type; struct idr chan_idr; spinlock_t chan_idr_lock; /* protect idr operations */ - u32 x2i_tail; /* Received msg related fields */ struct workqueue_struct *work_q; @@ -163,17 +166,23 @@ static u32 mailbox_reg_read(struct mailbox_channel *mb_chann, u32 mbox_reg) static int mailbox_tail_read_non_zero(struct mailbox_channel *mb_chann, u32 *val) { u32 mbox_reg = mb_chann->res[CHAN_RES_I2X].mb_tail_ptr_reg; + u32 ringbuf_size = mb_chann->res[CHAN_RES_I2X].rb_size; struct xdna_mailbox_res *mb_res = &mb_chann->mb->res; u64 ringbuf_addr = mb_res->mbox_base + mbox_reg; - int ret, value; + int ret, tail; - /* Poll till value is not zero */ - ret = readx_poll_timeout(ioread32, (void *)ringbuf_addr, value, - value, 1 /* us */, 100); + /* Poll till tail is not zero */ + ret = readx_poll_timeout(ioread32, (void *)ringbuf_addr, tail, + tail, 0 /* tight-loops */, 100 /* us timeout */); if (ret < 0) return ret; - *val = value; + if (unlikely(tail > ringbuf_size || !IS_ALIGNED(tail, 4))) { + MB_WARN_ONCE(mb_chann, "Invalid tail 0x%x", tail); + return -EINVAL; + } + + *val = tail; return 0; } @@ -350,6 +359,12 @@ mailbox_get_resp(struct mailbox_channel *mb_chann, struct xdna_msg_header *heade return ret; } +/* + * mailbox_get_msg() is the key function to get message from ring buffer. + * If it returns 0, means 1 message was consumed. + * If it returns -ENOENT, means ring buffer is emtpy. + * If it returns other value, means ERROR. + */ static inline int mailbox_get_msg(struct mailbox_channel *mb_chann) { struct xdna_msg_header header; @@ -360,19 +375,15 @@ static inline int mailbox_get_msg(struct mailbox_channel *mb_chann) u64 read_addr; int ret; - if (mailbox_tail_read_non_zero(mb_chann, &tail)) { + ret = mailbox_tail_read_non_zero(mb_chann, &tail); + if (ret) { MB_WARN_ONCE(mb_chann, "Zero tail too long"); - return -EINVAL; + return ret; } head = mb_chann->i2x_head; ringbuf_size = mailbox_get_ringbuf_size(mb_chann, CHAN_RES_I2X); start_addr = mb_chann->res[CHAN_RES_I2X].rb_start_addr; - if (unlikely(tail > ringbuf_size || !IS_ALIGNED(tail, 4))) { - MB_WARN_ONCE(mb_chann, "Invalid tail 0x%x", tail); - return -EINVAL; - } - /* ringbuf empty */ if (head == tail) return -ENOENT; @@ -390,9 +401,17 @@ static inline int mailbox_get_msg(struct mailbox_channel *mb_chann) head, tail); return -EINVAL; } - mailbox_set_headptr(mb_chann, 0); - ret = 0; - goto done; + + /* Read from beginning of ringbuf */ + head = 0; + ret = mailbox_tail_read_non_zero(mb_chann, &tail); + if (ret) { + MB_WARN_ONCE(mb_chann, "Hit tombstone, re-read tail failed"); + return -EINVAL; + } + /* Re-peek size of the message */ + read_addr = mb_chann->mb->res.ringbuf_base + start_addr; + header.total_size = ioread32((void *)read_addr); } if (unlikely(!header.total_size || !IS_ALIGNED(header.total_size, 4))) { @@ -418,8 +437,6 @@ static inline int mailbox_get_msg(struct mailbox_channel *mb_chann) /* After update head, it can equal to ringbuf_size. This is expected. */ trace_mbox_set_head(MAILBOX_NAME, mb_chann->msix_irq, header.opcode, header.id); - -done: return ret; } @@ -462,6 +479,8 @@ static irqreturn_t mailbox_irq_handler(int irq, void *p) int i; trace_mbox_irq_handle(MAILBOX_NAME, irq); + if (mb_chann->type == MB_CHANNEL_USER_POLL) + return IRQ_HANDLED; /* Clear IOHUB register */ mailbox_reg_write(mb_chann, mb_chann->iohub_int_addr, 0); /* Schedule a rx_work to call the callback functions */ @@ -492,6 +511,7 @@ static void mailbox_timer(struct timer_list *t) mod_timer(&mb_chann->timer, jiffies + MB_TIMER_JIFF); } +#endif static void mailbox_polld_handle_chann(struct mailbox_channel *mb_chann) { @@ -545,7 +565,10 @@ static bool mailbox_polld_event(struct mailbox *mb) struct mailbox_channel *mb_chann; mutex_lock(&mb->mbox_lock); - list_for_each_entry(mb_chann, &mb->chann_list, chann_entry) { + list_for_each_entry(mb_chann, &mb->poll_chann_list, chann_entry) { + if (mb_chann->type == MB_CHANNEL_MGMT) + break; + if (mailbox_channel_no_msg(mb_chann)) continue; @@ -574,13 +597,11 @@ static int mailbox_polld(void *data) continue; mutex_lock(&mb->mbox_lock); - if (unlikely(list_empty(&mb->chann_list))) { - mutex_unlock(&mb->mbox_lock); - continue; - } - chann_all_empty = true; - list_for_each_entry(mb_chann, &mb->chann_list, chann_entry) { + list_for_each_entry(mb_chann, &mb->poll_chann_list, chann_entry) { + if (mb_chann->type == MB_CHANNEL_MGMT) + break; + if (mailbox_channel_no_msg(mb_chann)) continue; @@ -602,7 +623,6 @@ static int mailbox_polld(void *data) return 0; } -#endif int xdna_mailbox_send_msg(struct mailbox_channel *mb_chann, const struct xdna_mailbox_msg *msg, u64 tx_timeout) @@ -669,10 +689,8 @@ int xdna_mailbox_send_msg(struct mailbox_channel *mb_chann, goto release_id; } -#ifdef AMDXDNA_DEVEL - if (mb_chann->mb->polld) + if (mb_chann->type == MB_CHANNEL_USER_POLL) mailbox_polld_wakeup(mb_chann->mb); -#endif return 0; release_id: @@ -683,42 +701,75 @@ int xdna_mailbox_send_msg(struct mailbox_channel *mb_chann, } #if defined(CONFIG_DEBUG_FS) +static struct mailbox_res_record * +xdna_mailbox_get_record(struct mailbox *mb, int mb_irq, + const struct xdna_mailbox_chann_res *x2i, + const struct xdna_mailbox_chann_res *i2x, + enum xdna_mailbox_channel_type type) +{ + struct mailbox_res_record *record; + int record_found = 0; + + mutex_lock(&mb->mbox_lock); + list_for_each_entry(record, &mb->res_records, re_entry) { + if (record->re_irq != mb_irq) + continue; + + record_found = 1; + break; + } + + if (record_found) { + record->type = type; + goto found; + } + + record = kzalloc(sizeof(*record), GFP_KERNEL); + if (!record) + goto out; + list_add_tail(&record->re_entry, &mb->res_records); + record->re_irq = mb_irq; + +found: + record->type = type; + memcpy(&record->re_x2i, x2i, sizeof(*x2i)); + memcpy(&record->re_i2x, i2x, sizeof(*i2x)); +out: + mutex_unlock(&mb->mbox_lock); + return record; +} + int xdna_mailbox_info_show(struct mailbox *mb, struct seq_file *m) { - static const char ring_fmt[] = "%4d %3s %5d 0x%08x 0x%04x "; + static const char ring_fmt[] = "%4d %3s %5d %4d 0x%08x 0x%04x "; static const char mbox_fmt[] = "0x%08x 0x%08x 0x%04x 0x%04x\n"; struct mailbox_res_record *record; - struct mailbox_channel *chann; /* If below two puts changed, make sure update fmt[] as well */ - seq_puts(m, "mbox dir alive ring addr size "); + seq_puts(m, "mbox dir alive type ring addr size "); seq_puts(m, "head ptr tail ptr head val tail val\n"); #define xdna_mbox_dump_queue(_dir, _act) \ - { \ - u32 head_ptr, tail_ptr, head_val, tail_val; \ - u32 rb_start, rb_size; \ - u32 mbox_irq; \ - mbox_irq = record->re_irq; \ - rb_start = record->re_##_dir.rb_start_addr; \ - rb_size = record->re_##_dir.rb_size; \ - head_ptr = record->re_##_dir.mb_head_ptr_reg; \ - tail_ptr = record->re_##_dir.mb_tail_ptr_reg; \ - head_val = ioread32((void *)(mb->res.mbox_base + head_ptr)); \ - tail_val = ioread32((void *)(mb->res.mbox_base + tail_ptr)); \ - seq_printf(m, ring_fmt, mbox_irq, #_dir, _act, rb_start, rb_size); \ - seq_printf(m, mbox_fmt, head_ptr, tail_ptr, head_val, tail_val); \ - } +{ \ + u32 head_ptr, tail_ptr, head_val, tail_val; \ + u32 rb_start, rb_size; \ + u32 mbox_irq; \ + u32 type; \ + type = record->type; \ + mbox_irq = record->re_irq; \ + rb_start = record->re_##_dir.rb_start_addr; \ + rb_size = record->re_##_dir.rb_size; \ + head_ptr = record->re_##_dir.mb_head_ptr_reg; \ + tail_ptr = record->re_##_dir.mb_tail_ptr_reg; \ + head_val = ioread32((void *)(mb->res.mbox_base + head_ptr)); \ + tail_val = ioread32((void *)(mb->res.mbox_base + tail_ptr)); \ + seq_printf(m, ring_fmt, mbox_irq, #_dir, _act, type, rb_start, rb_size); \ + seq_printf(m, mbox_fmt, head_ptr, tail_ptr, head_val, tail_val); \ +} mutex_lock(&mb->mbox_lock); list_for_each_entry(record, &mb->res_records, re_entry) { - int active = 0; - - list_for_each_entry(chann, &mb->chann_list, chann_entry) { - if (record->re_irq == chann->msix_irq) - active = 1; - } - xdna_mbox_dump_queue(x2i, active); - xdna_mbox_dump_queue(i2x, active); + xdna_mbox_dump_queue(x2i, record->active); + xdna_mbox_dump_queue(i2x, record->active); } mutex_unlock(&mb->mbox_lock); @@ -760,42 +811,17 @@ struct mailbox_channel * xdna_mailbox_create_channel(struct mailbox *mb, const struct xdna_mailbox_chann_res *x2i, const struct xdna_mailbox_chann_res *i2x, - u32 iohub_int_addr, - int mb_irq) + u32 iohub_int_addr, int mb_irq, + enum xdna_mailbox_channel_type type) { struct mailbox_channel *mb_chann; int ret; #if defined(CONFIG_DEBUG_FS) struct mailbox_res_record *record; - int record_found = 0; - - mutex_lock(&mb->mbox_lock); - list_for_each_entry(record, &mb->res_records, re_entry) { - if (record->re_irq != mb_irq) - continue; - - record_found = 1; - break; - } - - if (record_found) - goto skip_record; - - record = kzalloc(sizeof(*record), GFP_KERNEL); - if (!record) { - mutex_unlock(&mb->mbox_lock); - return NULL; - } - - memcpy(&record->re_x2i, x2i, sizeof(*x2i)); - memcpy(&record->re_i2x, i2x, sizeof(*i2x)); - record->re_irq = mb_irq; - /* Record will be released when mailbox device destroy*/ - list_add_tail(&record->re_entry, &mb->res_records); - -skip_record: - mutex_unlock(&mb->mbox_lock); + record = xdna_mailbox_get_record(mb, mb_irq, x2i, i2x, type); + if (!record) + return NULL; #endif /* CONFIG_DEBUG_FS */ if (!is_power_of_2(x2i->rb_size) || !is_power_of_2(i2x->rb_size)) { @@ -808,6 +834,11 @@ xdna_mailbox_create_channel(struct mailbox *mb, return NULL; mb_chann->mb = mb; + mb_chann->type = type; +#ifdef AMDXDNA_DEVEL + if (type != MB_CHANNEL_MGMT && MB_FORCE_USER_POLL) + mb_chann->type = MB_CHANNEL_USER_POLL; +#endif mb_chann->msix_irq = mb_irq; mb_chann->iohub_int_addr = iohub_int_addr; memcpy(&mb_chann->res[CHAN_RES_X2I], x2i, sizeof(*x2i)); @@ -817,10 +848,7 @@ xdna_mailbox_create_channel(struct mailbox *mb, idr_init(&mb_chann->chan_idr); mb_chann->x2i_tail = mailbox_get_tailptr(mb_chann, CHAN_RES_X2I); mb_chann->i2x_head = mailbox_get_headptr(mb_chann, CHAN_RES_I2X); -#ifdef AMDXDNA_DEVEL - if (mb->polld) - goto skip_irq; -#endif + mailbox_reg_write(mb_chann, mb_chann->iohub_int_addr, 0); INIT_WORK(&mb_chann->rx_work, mailbox_rx_worker); mb_chann->work_q = alloc_ordered_workqueue(MAILBOX_NAME, 0); @@ -830,7 +858,7 @@ xdna_mailbox_create_channel(struct mailbox *mb, } #ifdef AMDXDNA_DEVEL - if (mailbox_polling > 0) { + if (MB_PERIODIC_POLL) { /* Poll response every few ms. Good for bring up a new device */ timer_setup(&mb_chann->timer, mailbox_timer, 0); @@ -852,13 +880,18 @@ xdna_mailbox_create_channel(struct mailbox *mb, #endif mb_chann->bad_state = false; mutex_lock(&mb->mbox_lock); - list_add(&mb_chann->chann_entry, &mb->chann_list); - mutex_unlock(&mb->mbox_lock); - + if (mb_chann->type == MB_CHANNEL_USER_POLL) + list_add_tail(&mb_chann->chann_entry, &mb->poll_chann_list); + else + list_add_tail(&mb_chann->chann_entry, &mb->chann_list); #if defined(CONFIG_DEBUG_FS) mb_chann->record = record; + record->active = 1; #endif - MB_DBG(mb_chann, "Mailbox channel created (irq: %d)", mb_chann->msix_irq); + mutex_unlock(&mb->mbox_lock); + + MB_DBG(mb_chann, "Mailbox channel created type %d (irq: %d)", + mb_chann->type, mb_chann->msix_irq); return mb_chann; destroy_wq: @@ -875,13 +908,13 @@ int xdna_mailbox_destroy_channel(struct mailbox_channel *mb_chann) mutex_lock(&mb_chann->mb->mbox_lock); list_del(&mb_chann->chann_entry); +#if defined(CONFIG_DEBUG_FS) + mb_chann->record->active = 0; +#endif mutex_unlock(&mb_chann->mb->mbox_lock); #ifdef AMDXDNA_DEVEL - if (mb_chann->mb->polld) - goto free_msg; - - if (mailbox_polling > 0) + if (MB_PERIODIC_POLL) goto destroy_wq; #endif free_irq(mb_chann->msix_irq, mb_chann); @@ -892,13 +925,11 @@ int xdna_mailbox_destroy_channel(struct mailbox_channel *mb_chann) destroy_workqueue(mb_chann->work_q); /* We can clean up and release resources */ -#ifdef AMDXDNA_DEVEL -free_msg: -#endif idr_for_each(&mb_chann->chan_idr, mailbox_release_msg, mb_chann); idr_destroy(&mb_chann->chan_idr); - MB_DBG(mb_chann, "Mailbox channel destroyed, irq: %d", mb_chann->msix_irq); + MB_DBG(mb_chann, "Mailbox channel destroyed type %d irq: %d", + mb_chann->type, mb_chann->msix_irq); kfree(mb_chann); return 0; } @@ -909,10 +940,7 @@ void xdna_mailbox_stop_channel(struct mailbox_channel *mb_chann) return; #ifdef AMDXDNA_DEVEL - if (mb_chann->mb->polld) - return; - - if (mailbox_polling > 0) { + if (MB_PERIODIC_POLL) { timer_delete_sync(&mb_chann->timer); goto skip_irq; } @@ -943,11 +971,13 @@ struct mailbox *xdna_mailbox_create(struct device *dev, mutex_init(&mb->mbox_lock); INIT_LIST_HEAD(&mb->chann_list); -#ifdef AMDXDNA_DEVEL - if (mailbox_polling >= 0) - goto skip_polld; + INIT_LIST_HEAD(&mb->poll_chann_list); - /* Launch per device busy polling kthread */ + /* + * The polld kthread will only wakeup and handle those + * MB_CHANNEL_USER_POLL channels. If no thing to do, polld should + * just sleep. It is a per device kthread. + */ mb->polld = kthread_run(mailbox_polld, mb, MAILBOX_NAME); if (IS_ERR(mb->polld)) { dev_err(mb->dev, "Failed to create polld ret %ld", PTR_ERR(mb->polld)); @@ -956,8 +986,6 @@ struct mailbox *xdna_mailbox_create(struct device *dev, } init_waitqueue_head(&mb->poll_wait); mb->sent_msg = false; -skip_polld: -#endif #if defined(CONFIG_DEBUG_FS) INIT_LIST_HEAD(&mb->res_records); @@ -981,18 +1009,11 @@ void xdna_mailbox_destroy(struct mailbox *mb) } done_release_record: #endif /* CONFIG_DEBUG_FS */ -#ifdef AMDXDNA_DEVEL - if (mailbox_polling >= 0) - goto skip_polld; - dev_dbg(mb->dev, "Stopping polld"); (void)kthread_stop(mb->polld); -skip_polld: -#endif mutex_lock(&mb->mbox_lock); - if (!list_empty(&mb->chann_list)) - WARN_ON("Channel not destroy"); + WARN_ONCE(!list_empty(&mb->chann_list), "Channel not destroy"); mutex_unlock(&mb->mbox_lock); mutex_destroy(&mb->mbox_lock); diff --git a/src/driver/amdxdna/amdxdna_mailbox.h b/src/driver/amdxdna/amdxdna_mailbox.h index 2e11464..8ac677d 100644 --- a/src/driver/amdxdna/amdxdna_mailbox.h +++ b/src/driver/amdxdna/amdxdna_mailbox.h @@ -80,6 +80,13 @@ struct mailbox *xdna_mailbox_create(struct device *dev, */ void xdna_mailbox_destroy(struct mailbox *mailbox); +enum xdna_mailbox_channel_type { + MB_CHANNEL_MGMT = 0, + MB_CHANNEL_USER_NORMAL, + MB_CHANNEL_USER_POLL, + MB_CHANNEL_MAX_TYPE, +}; + /* * xdna_mailbox_create_channel() -- Create a mailbox channel instance * @@ -88,6 +95,7 @@ void xdna_mailbox_destroy(struct mailbox *mailbox); * @i2x: firmware to host mailbox resources * @xdna_mailbox_intr_reg: register addr of MSI-X interrupt * @mb_irq: Linux IRQ number associated with mailbox MSI-X interrupt vector index + * @type: Type of channel * * Return: If success, return a handle of mailbox channel. Otherwise, return NULL. */ @@ -96,7 +104,7 @@ xdna_mailbox_create_channel(struct mailbox *mailbox, const struct xdna_mailbox_chann_res *x2i, const struct xdna_mailbox_chann_res *i2x, u32 xdna_mailbox_intr_reg, - int mb_irq); + int mb_irq, enum xdna_mailbox_channel_type type); /* * xdna_mailbox_destroy_channel() -- destroy mailbox channel diff --git a/src/include/uapi/drm_local/amdxdna_accel.h b/src/include/uapi/drm_local/amdxdna_accel.h index 134ef87..a9d0146 100644 --- a/src/include/uapi/drm_local/amdxdna_accel.h +++ b/src/include/uapi/drm_local/amdxdna_accel.h @@ -461,6 +461,7 @@ enum amdxdna_power_mode_type { POWER_MODE_LOW, /**< Set frequency to lowest DPM */ POWER_MODE_MEDIUM, /**< Set frequency to medium DPM */ POWER_MODE_HIGH, /**< Set frequency to highest DPM */ + POWER_MODE_TURBO, /**< More power, more performance */ }; /** diff --git a/src/shim/hwq.cpp b/src/shim/hwq.cpp index 2bda0db..9a4c6b3 100644 --- a/src/shim/hwq.cpp +++ b/src/shim/hwq.cpp @@ -99,10 +99,21 @@ submit_command(xrt_core::buffer_handle *cmd) } } +int +hw_q:: +poll_command(xrt_core::buffer_handle *cmd) const +{ + auto cmdpkt = reinterpret_cast(cmd->map(xrt_core::buffer_handle::map_type::write)); + return (cmdpkt->state >= ERT_CMD_STATE_COMPLETED) ? 1 : 0; +} + int hw_q:: wait_command(xrt_core::buffer_handle *cmd, uint32_t timeout_ms) const { + if (poll_command(cmd)) + return 1; + auto pkt = get_chained_command_pkt(cmd); if (!m_pdev.is_force_unchained_command() || !pkt) return wait_cmd(m_pdev, m_hwctx, cmd, timeout_ms); diff --git a/src/shim/hwq.h b/src/shim/hwq.h index ce2c1c8..afb9ca9 100644 --- a/src/shim/hwq.h +++ b/src/shim/hwq.h @@ -20,6 +20,9 @@ class hw_q : public xrt_core::hwqueue_handle void submit_command(xrt_core::buffer_handle *) override; + int + poll_command(xrt_core::buffer_handle *) const override; + int wait_command(xrt_core::buffer_handle *, uint32_t timeout_ms) const override; diff --git a/test/shim_test/io_param.h b/test/shim_test/io_param.h index b86446a..452c107 100644 --- a/test/shim_test/io_param.h +++ b/test/shim_test/io_param.h @@ -14,6 +14,9 @@ struct io_test_parameter { #define IO_TEST_NOOP_RUN 1 #define IO_TEST_BAD_RUN 2 int type; +#define IO_TEST_IOCTL_WAIT 0 +#define IO_TEST_POLL_WAIT 1 + int wait; bool debug; }; diff --git a/test/shim_test/io_test.cpp b/test/shim_test/io_test.cpp index 8ff5e6d..74df034 100644 --- a/test/shim_test/io_test.cpp +++ b/test/shim_test/io_test.cpp @@ -19,10 +19,11 @@ namespace { io_test_parameter io_test_parameters; void -io_test_parameter_init(int perf, int type, bool debug = false) +io_test_parameter_init(int perf, int type, int wait, bool debug = false) { io_test_parameters.perf = perf; io_test_parameters.type = type; + io_test_parameters.wait = wait; io_test_parameters.debug = debug; } @@ -82,7 +83,15 @@ io_test_init_runlist_cmd(bo* cmd_bo, std::vector& cmd_bos) } } -#define IO_TEST_TIMEOUT 5000 /* millisecond */ +void io_test_cmd_wait(hwqueue_handle *hwq, std::shared_ptr bo) +{ + if (io_test_parameters.wait == IO_TEST_POLL_WAIT) { + while(!hwq->poll_command(bo->get())); + } else { + hwq->wait_command(bo->get(), 0); + } +} + void io_test_cmd_submit_and_wait_latency( hwqueue_handle *hwq, @@ -96,9 +105,10 @@ io_test_cmd_submit_and_wait_latency( while (completed < total_cmd_submission) { for (auto& cmd : cmdlist_bos) { hwq->submit_command(std::get<0>(cmd).get()->get()); - hwq->wait_command(std::get<0>(cmd).get()->get(), IO_TEST_TIMEOUT); + io_test_cmd_wait(hwq, std::get<0>(cmd)); if (std::get<1>(cmd)->state != ERT_CMD_STATE_COMPLETED) throw std::runtime_error("Command error"); + std::get<1>(cmd)->state = ERT_CMD_STATE_NEW; completed++; if (completed >= total_cmd_submission) break; @@ -125,9 +135,10 @@ io_test_cmd_submit_and_wait_thruput( } while (completed < issued) { - hwq->wait_command(std::get<0>(cmdlist_bos[wait_idx]).get()->get(), IO_TEST_TIMEOUT); + io_test_cmd_wait(hwq, std::get<0>(cmdlist_bos[wait_idx])); if (std::get<1>(cmdlist_bos[wait_idx])->state != ERT_CMD_STATE_COMPLETED) throw std::runtime_error("Command error"); + std::get<1>(cmdlist_bos[wait_idx])->state = ERT_CMD_STATE_NEW; completed++; if (issued < total_cmd_submission) { @@ -235,47 +246,63 @@ io_test(device::id_type id, device* dev, int total_hwq_submit, int num_cmdlist, void TEST_io(device::id_type id, std::shared_ptr sdev, arg_type& arg) { - io_test_parameter_init(IO_TEST_NO_PERF, static_cast(arg[0])); + unsigned int run_type = static_cast(arg[0]); + + io_test_parameter_init(IO_TEST_NO_PERF, run_type, IO_TEST_IOCTL_WAIT); io_test(id, sdev.get(), 1, 1, arg[1]); } void TEST_io_latency(device::id_type id, std::shared_ptr sdev, arg_type& arg) { - io_test_parameter_init(IO_TEST_LATENCY_PERF, static_cast(arg[0])); - io_test(id, sdev.get(), 1000, 1, 1); + unsigned int run_type = static_cast(arg[0]); + unsigned int wait_type = static_cast(arg[1]); + unsigned int total = static_cast(arg[2]); + + io_test_parameter_init(IO_TEST_LATENCY_PERF, run_type, wait_type); + io_test(id, sdev.get(), total, 1, 1); } void -TEST_io_runlist_latency(device::id_type id, std::shared_ptr sdev, arg_type& arg) +TEST_io_throughput(device::id_type id, std::shared_ptr sdev, arg_type& arg) { - io_test_parameter_init(IO_TEST_LATENCY_PERF, static_cast(arg[0])); - io_test(id, sdev.get(), 32000, 1, 1); - io_test(id, sdev.get(), 16000, 1, 2); - io_test(id, sdev.get(), 8000, 1, 4); - io_test(id, sdev.get(), 4000, 1, 8); - io_test(id, sdev.get(), 2000, 1, 16); - io_test(id, sdev.get(), 1333, 1, 24); + unsigned int run_type = static_cast(arg[0]); + unsigned int wait_type = static_cast(arg[1]); + unsigned int total = static_cast(arg[2]); + + io_test_parameter_init(IO_TEST_THRUPUT_PERF, run_type, wait_type); + io_test(id, sdev.get(), total, 8, 1); } void -TEST_io_e_throughput(device::id_type id, std::shared_ptr sdev, arg_type& arg) +TEST_io_runlist_latency(device::id_type id, std::shared_ptr sdev, arg_type& arg) { - io_test_parameter_init(IO_TEST_THRUPUT_PERF, static_cast(arg[0])); - io_test(id, sdev.get(), 32000, 8, 1); + unsigned int run_type = static_cast(arg[0]); + unsigned int wait_type = static_cast(arg[1]); + unsigned int total = static_cast(arg[2]); + const size_t max_cmd_per_list = 24; + + io_test_parameter_init(IO_TEST_LATENCY_PERF, run_type, wait_type); + for (int cmds_per_list = 1; cmds_per_list <=32; cmds_per_list *=2) { + if (cmds_per_list > max_cmd_per_list) + cmds_per_list = max_cmd_per_list; + int total_hwq_submit = total / cmds_per_list; + io_test(id, sdev.get(), total_hwq_submit, 1, cmds_per_list); + } } void -TEST_io_throughput(device::id_type id, std::shared_ptr sdev, arg_type& arg) +TEST_io_runlist_throughput(device::id_type id, std::shared_ptr sdev, arg_type& arg) { + unsigned int run_type = static_cast(arg[0]); + unsigned int wait_type = static_cast(arg[1]); + unsigned int total_commands = static_cast(arg[2]); int num_bo_set = 256; - int total_commands = 32000; const size_t max_cmd_per_list = 24; - io_test_parameter_init(IO_TEST_THRUPUT_PERF, static_cast(arg[0])); + io_test_parameter_init(IO_TEST_THRUPUT_PERF, run_type, wait_type); - int cmds_per_list; - for (cmds_per_list = 1; cmds_per_list <= 32; cmds_per_list *= 2) { + for (int cmds_per_list = 1; cmds_per_list <= 32; cmds_per_list *= 2) { if (cmds_per_list > max_cmd_per_list) cmds_per_list = max_cmd_per_list; int num_cmdlist = num_bo_set / cmds_per_list; diff --git a/test/shim_test/shim_test.cpp b/test/shim_test/shim_test.cpp index fa4887f..5ccbbdc 100644 --- a/test/shim_test/shim_test.cpp +++ b/test/shim_test/shim_test.cpp @@ -29,9 +29,9 @@ using arg_type = const std::vector; void TEST_export_import_bo(device::id_type, std::shared_ptr, arg_type&); void TEST_io(device::id_type, std::shared_ptr, arg_type&); void TEST_io_latency(device::id_type, std::shared_ptr, arg_type&); -void TEST_io_runlist_latency(device::id_type, std::shared_ptr, arg_type&); -void TEST_io_e_throughput(device::id_type, std::shared_ptr, arg_type&); void TEST_io_throughput(device::id_type, std::shared_ptr, arg_type&); +void TEST_io_runlist_latency(device::id_type, std::shared_ptr, arg_type&); +void TEST_io_runlist_throughput(device::id_type, std::shared_ptr, arg_type&); void TEST_noop_io_with_dup_bo(device::id_type, std::shared_ptr, arg_type&); void TEST_shim_umq_vadd(device::id_type, std::shared_ptr, arg_type&); void TEST_shim_umq_memtiles(device::id_type, std::shared_ptr, arg_type&); @@ -521,10 +521,10 @@ std::vector test_list { TEST_POSITIVE, dev_filter_is_aie2, TEST_io, { IO_TEST_NORMAL_RUN, 1 } }, test_case{ "measure no-op kernel latency", - TEST_POSITIVE, dev_filter_is_aie2, TEST_io_latency, { IO_TEST_NOOP_RUN } + TEST_POSITIVE, dev_filter_is_aie2, TEST_io_latency, { IO_TEST_NOOP_RUN, IO_TEST_IOCTL_WAIT, 32000 } }, test_case{ "measure real kernel latency", - TEST_POSITIVE, dev_filter_is_aie2, TEST_io_latency, { IO_TEST_NORMAL_RUN } + TEST_POSITIVE, dev_filter_is_aie2, TEST_io_latency, { IO_TEST_NORMAL_RUN, IO_TEST_IOCTL_WAIT, 32000 } }, test_case{ "create and free debug bo", TEST_POSITIVE, dev_filter_is_aie2, TEST_create_free_debug_bo, { 0x1000 } @@ -536,7 +536,7 @@ std::vector test_list { TEST_POSITIVE, dev_filter_is_aie2, TEST_io, { IO_TEST_NORMAL_RUN, 3 } }, test_case{ "measure no-op kernel throughput listed command", - TEST_POSITIVE, dev_filter_is_aie2, TEST_io_throughput, { IO_TEST_NOOP_RUN } + TEST_POSITIVE, dev_filter_is_aie2, TEST_io_runlist_throughput, { IO_TEST_NOOP_RUN, IO_TEST_IOCTL_WAIT, 32000 } }, test_case{ "npu3 shim vadd", TEST_POSITIVE, dev_filter_is_aie4, TEST_shim_umq_vadd, {} @@ -565,11 +565,23 @@ std::vector test_list { test_case{ "io test no op with duplicated BOs", TEST_POSITIVE, dev_filter_is_aie2, TEST_noop_io_with_dup_bo, {} }, - test_case{ "io test no-op kernel latency listed command", - TEST_POSITIVE, dev_filter_is_aie2, TEST_io_runlist_latency, { IO_TEST_NOOP_RUN } + test_case{ "measure no-op kernel latency listed command", + TEST_POSITIVE, dev_filter_is_aie2, TEST_io_runlist_latency, { IO_TEST_NOOP_RUN, IO_TEST_IOCTL_WAIT, 32000 } }, test_case{ "measure no-op kernel throuput", - TEST_POSITIVE, dev_filter_is_aie2, TEST_io_e_throughput, { IO_TEST_NOOP_RUN } + TEST_POSITIVE, dev_filter_is_aie2, TEST_io_throughput, { IO_TEST_NOOP_RUN, IO_TEST_IOCTL_WAIT, 32000 } + }, + test_case{ "measure no-op kernel latency (polling)", + TEST_POSITIVE, dev_filter_is_aie2, TEST_io_latency, { IO_TEST_NOOP_RUN, IO_TEST_POLL_WAIT, 32000 } + }, + test_case{ "measure no-op kernel throuput (polling)", + TEST_POSITIVE, dev_filter_is_aie2, TEST_io_throughput, { IO_TEST_NOOP_RUN, IO_TEST_POLL_WAIT, 32000 } + }, + test_case{ "measure no-op kernel latency listed command (polling)", + TEST_POSITIVE, dev_filter_is_aie2, TEST_io_runlist_latency, { IO_TEST_NOOP_RUN, IO_TEST_POLL_WAIT, 32000 } + }, + test_case{ "measure no-op kernel throughput listed command (polling)", + TEST_POSITIVE, dev_filter_is_aie2, TEST_io_runlist_throughput, { IO_TEST_NOOP_RUN, IO_TEST_POLL_WAIT, 32000 } }, }; diff --git a/test/shim_test/speed.h b/test/shim_test/speed.h index 89d41fa..7970281 100644 --- a/test/shim_test/speed.h +++ b/test/shim_test/speed.h @@ -7,6 +7,7 @@ #include using clk = std::chrono::high_resolution_clock; +using ms_t = std::chrono::milliseconds; using us_t = std::chrono::microseconds; using ns_t = std::chrono::nanoseconds; diff --git a/tools/info.json b/tools/info.json index f6f4617..3deb2f8 100644 --- a/tools/info.json +++ b/tools/info.json @@ -1,7 +1,7 @@ { "copyright": "Copyright (C) 2023-2024 Advanced Micro Devices, Inc. All rights reserved.", "xrt" : { - "version": "202420.2.18.101", + "version": "202420.2.18.134", "os_rel": "22.04" }, "firmwares": [ diff --git a/xrt b/xrt index 476f42f..64d03f5 160000 --- a/xrt +++ b/xrt @@ -1 +1 @@ -Subproject commit 476f42f419bbc5d1545aded3627f03c1c2f1336e +Subproject commit 64d03f567db628c9107b6fcf5d362668d1834567