Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Pr-test-user #233

Closed
wants to merge 46 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
46 commits
Select commit Hold shift + click to select a range
ed3147d
fix bo import and support dup bo in submit list (#211)
houlz0507 Aug 21, 2024
62e2cba
somehow auto suspend not work then crash in remove() (#215)
mamin506 Aug 22, 2024
5771d8a
share npu4 to npu2, 5, 6 (#216)
mamin506 Aug 22, 2024
be2b8bf
Once signald job->fence, drm will cleanup job->base. Thus tracepoint …
mamin506 Aug 22, 2024
9337513
fix shimtest import export test failure when NPU driver is loaded wit…
houlz0507 Aug 22, 2024
27457c9
npu6 and npu4 shared same firmware binary (#219)
mamin506 Aug 23, 2024
5228267
add noop kernel runlist latency test (#221)
mamin506 Aug 23, 2024
44dd97c
support new mgmt channel info struct format (#220)
mamin506 Aug 23, 2024
b09ec43
Fix no QoS parameters case (#225)
vengutta18 Aug 27, 2024
f4bed65
Update valid qos dpm params check (#228)
vengutta18 Aug 27, 2024
3404736
fix insert pages for imported bo (#229)
houlz0507 Aug 27, 2024
d1a2c8d
add exec buf throughput test (#230)
mamin506 Aug 27, 2024
4bb5966
iohub register race condition fix (#227)
mamin506 Aug 27, 2024
da67fd1
fault in import bo pages instead of insert pages directly (#231)
houlz0507 Aug 28, 2024
d2a430e
fix noop kernel was using wrong size issue (#232)
mamin506 Aug 28, 2024
822ca7e
Update README.md
amd-akshatah Aug 28, 2024
e6038ab
user control autosuspend delay and disable autosuspend by default for…
mamin506 Aug 29, 2024
1c15f15
firmware repo location has moved (#236)
maxzhen Aug 30, 2024
a484a29
timeout_in_sec should be uint (#237)
mamin506 Aug 30, 2024
b209ee9
Add performance scripts (#238)
maxzhen Sep 3, 2024
4780456
update npu1 fw (#239)
maxzhen Sep 3, 2024
305e7f0
remove eu, looks like Vitis AI python env doesn't like it (#240)
mamin506 Sep 4, 2024
4a17aa5
CR-1210833 hw_ctx_create_with_priority fail with DRM_IOCTL_AMDXDNA_CR…
maxzhen Sep 5, 2024
03f1eb5
CR-1210981 System hang while using 4 threads to create BO at the same…
houlz0507 Sep 5, 2024
85fffa0
Add a document describing AMD NPU workings
sonals Sep 5, 2024
55a7d50
Minor changes to incorporate feedback
sonals Sep 6, 2024
6f1b341
Elaborate on error handling and other minor changes
sonals Sep 6, 2024
b8a3a1b
Merge pull request #244 from sonals/amdnpu-doc
sonals Sep 8, 2024
1be6466
improve npu_perf_analyze.sh (#246)
maxzhen Sep 8, 2024
182163a
indirect dpu pkt handling with device firmware host_queue changes (#241)
xdavidz Sep 9, 2024
79c1313
allow npu_perf_trace.sh to trace shim_test (#247)
maxzhen Sep 10, 2024
2bac8eb
update BARs and Telemetry (#248)
mamin506 Sep 10, 2024
dfd9ad5
fix coverity issues (#249)
maxzhen Sep 10, 2024
da7c4b4
update firwmare for npu4/5/6 (#251)
maxzhen Sep 11, 2024
eddd92c
Support turbo mode for better latency but burn more power on CPU & NP…
mamin506 Sep 11, 2024
fba0bd9
add missing trace event (#252)
maxzhen Sep 12, 2024
b902461
fix turbo mode (#255)
maxzhen Sep 13, 2024
b326815
remove forcibly unchaining runlist hack (#254)
maxzhen Sep 13, 2024
f158f2e
Add aie2_control_flags and some bug fix (#253)
mamin506 Sep 13, 2024
136c6d5
support response with status not at first word (#256)
mamin506 Sep 14, 2024
ca8ecbf
should use DRM_IOCTL_AMDXDNA_EXEC_CMD IOCTL for submitting wait and s…
maxzhen Sep 16, 2024
2ee0fd6
fix CID-468941 (#258)
mamin506 Sep 16, 2024
6de1490
add validate.xclbin for npu6 (#259)
mamin506 Sep 16, 2024
c4a5a6d
document fix (#260)
mamin506 Sep 18, 2024
867e2f9
ignore child process flush (#261)
mamin506 Sep 19, 2024
47fc7ad
Merge branch 'amd:main' into pr
amd-akshatah Sep 19, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ This repository is for the AMD XDNA™️ Driver (amdxdna.ko) for Linux®️ and
- [System Requirements](#system-requirements)
- [Linux compilation and installation](#linux-compilation-and-installation)
- [Clone](#clone)

- [Build](#build)
- [Test](#test)
- [Q&A](#qa)
Expand Down Expand Up @@ -88,7 +89,7 @@ cd <root-of-source-tree>/build
cd xrt/build
./build.sh -noert -noalveo
# To adapt according to your OS & version
sudo apt reinstall ./Release/xrt_202410.2.17.0_23.10-amd64-xrt.deb ./Release/xrt_202410.2.17.0_23.10-amd64-xbflash2.deb
sudo apt reinstall ./Release/xrt_202410.2.17.0_23.10-amd64-xrt.deb
cd ../../build

# Start XDNA driver release build
Expand Down
1 change: 1 addition & 0 deletions WHENCE
Original file line number Diff line number Diff line change
Expand Up @@ -11,5 +11,6 @@ File:
tools/bins/1502_00/validate.xclbin
tools/bins/17f0_10/validate.xclbin
tools/bins/17f0_11/validate.xclbin
tools/bins/17f0_20/validate.xclbin

Licence: Redistributable. See LICENSE.amdnpu for details.
2 changes: 2 additions & 0 deletions src/driver/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,8 @@ install(FILES ${CMAKE_CURRENT_BINARY_DIR}/dkms.conf

set(amdxdna_drv_tools
${CMAKE_CURRENT_SOURCE_DIR}/tools/dkms_driver.sh
${CMAKE_CURRENT_SOURCE_DIR}/tools/npu_perf_trace.sh
${CMAKE_CURRENT_SOURCE_DIR}/tools/npu_perf_analyze.sh
)
install(FILES ${amdxdna_drv_tools}
PERMISSIONS OWNER_READ OWNER_WRITE OWNER_EXECUTE GROUP_READ GROUP_EXECUTE WORLD_READ WORLD_EXECUTE
Expand Down
47 changes: 30 additions & 17 deletions src/driver/amdxdna/aie2_ctx.c
Original file line number Diff line number Diff line change
Expand Up @@ -47,21 +47,13 @@ aie2_hwctx_get_job(struct amdxdna_hwctx *hwctx, u64 seq)
{
int idx;

/* Special sequence number for oldest fence if exist */
if (seq == AMDXDNA_INVALID_CMD_HANDLE) {
idx = get_job_idx(hwctx->submitted);
goto out;
}

if (seq >= hwctx->submitted)
return ERR_PTR(-EINVAL);

if (seq + HWCTX_MAX_CMDS < hwctx->submitted)
return NULL;

idx = get_job_idx(seq);

out:
return hwctx->priv->pending[idx];
}

Expand Down Expand Up @@ -230,8 +222,8 @@ aie2_sched_notify(struct amdxdna_sched_job *job)
struct dma_fence *fence = job->fence;

job->hwctx->completed++;
trace_xdna_job(&job->base, job->hwctx->name, "signaling fence", job->seq);
dma_fence_signal(fence);
trace_xdna_job(&job->base, job->hwctx->name, "signaled fence", job->seq);
dma_fence_put(fence);
mmput(job->mm);
amdxdna_job_put(job);
Expand All @@ -257,7 +249,7 @@ aie2_sched_resp_handler(void *handle, const u32 *data, size_t size)
}

status = *data;
XDNA_DBG(job->hwctx->client->xdna, "Resp status 0x%x", status);
XDNA_DBG(job->hwctx->client->xdna, "Response status 0x%x", status);
if (status == AIE2_STATUS_SUCCESS)
amdxdna_cmd_set_state(cmd_abo, ERT_CMD_STATE_COMPLETED);
else
Expand All @@ -284,7 +276,7 @@ aie2_sched_nocmd_resp_handler(void *handle, const u32 *data, size_t size)
}

status = *data;
XDNA_DBG(job->hwctx->client->xdna, "Resp status 0x%x", status);
XDNA_DBG(job->hwctx->client->xdna, "Response status 0x%x", status);

out:
aie2_sched_notify(job);
Expand Down Expand Up @@ -540,6 +532,7 @@ int aie2_hwctx_init(struct amdxdna_hwctx *hwctx)
struct drm_gpu_scheduler *sched;
struct amdxdna_hwctx_priv *priv;
struct amdxdna_gem_obj *heap;
unsigned int wq_flags;
int i, ret;

priv = kzalloc(sizeof(*hwctx->priv), GFP_KERNEL);
Expand Down Expand Up @@ -587,12 +580,21 @@ int aie2_hwctx_init(struct amdxdna_hwctx *hwctx)

sched = &priv->sched;
mutex_init(&priv->io_lock);
ret = drm_sched_init(sched, &sched_ops, NULL, DRM_SCHED_PRIORITY_COUNT,

wq_flags = __WQ_ORDERED;
if (!aie2_pm_is_turbo(xdna->dev_handle))
wq_flags |= WQ_UNBOUND;
priv->submit_wq = alloc_workqueue(hwctx->name, wq_flags, 1);
if (!priv->submit_wq) {
XDNA_ERR(xdna, "Failed to alloc submit wq");
goto free_cmd_bufs;
}
ret = drm_sched_init(sched, &sched_ops, priv->submit_wq, DRM_SCHED_PRIORITY_COUNT,
HWCTX_MAX_CMDS, 0, MAX_SCHEDULE_TIMEOUT,
NULL, NULL, hwctx->name, xdna->ddev.dev);
if (ret) {
XDNA_ERR(xdna, "Failed to init DRM scheduler. ret %d", ret);
goto free_cmd_bufs;
goto free_wq;
}

ret = drm_sched_entity_init(&priv->entity, DRM_SCHED_PRIORITY_NORMAL,
Expand Down Expand Up @@ -645,6 +647,8 @@ int aie2_hwctx_init(struct amdxdna_hwctx *hwctx)
drm_sched_entity_destroy(&priv->entity);
free_sched:
drm_sched_fini(&priv->sched);
free_wq:
destroy_workqueue(priv->submit_wq);
free_cmd_bufs:
for (i = 0; i < ARRAY_SIZE(priv->cmd_buf); i++) {
if (!priv->cmd_buf[i])
Expand Down Expand Up @@ -681,6 +685,7 @@ void aie2_hwctx_fini(struct amdxdna_hwctx *hwctx)
aie2_hwctx_wait_for_idle(hwctx);
drm_sched_entity_destroy(&hwctx->priv->entity);
drm_sched_fini(&hwctx->priv->sched);
destroy_workqueue(hwctx->priv->submit_wq);

for (idx = 0; idx < HWCTX_MAX_CMDS; idx++) {
job = hwctx->priv->pending[idx];
Expand Down Expand Up @@ -928,7 +933,7 @@ int aie2_cmd_submit(struct amdxdna_hwctx *hwctx, struct amdxdna_sched_job *job,
job->out_fence = dma_fence_get(&job->base.s_fence->finished);

retry:
ret = drm_gem_lock_reservations(job->bos, job->bo_cnt, &acquire_ctx);
ret = amdxdna_lock_objects(job, &acquire_ctx);
if (ret) {
XDNA_WARN(xdna, "Failed to reverve fence, ret %d", ret);
goto put_fence;
Expand All @@ -937,7 +942,7 @@ int aie2_cmd_submit(struct amdxdna_hwctx *hwctx, struct amdxdna_sched_job *job,
for (i = 0; i < job->bo_cnt; i++) {
abo = to_xdna_obj(job->bos[i]);
if (abo->mem.map_invalid) {
drm_gem_unlock_reservations(job->bos, job->bo_cnt, &acquire_ctx);
amdxdna_unlock_objects(job, &acquire_ctx);
if (!timeout) {
timeout = jiffies +
msecs_to_jiffies(HMM_RANGE_DEFAULT_TIMEOUT);
Expand All @@ -955,19 +960,27 @@ int aie2_cmd_submit(struct amdxdna_hwctx *hwctx, struct amdxdna_sched_job *job,
ret = dma_resv_reserve_fences(job->bos[i]->resv, 1);
if (ret) {
XDNA_WARN(xdna, "Failed to reserve fences %d", ret);
drm_gem_unlock_reservations(job->bos, job->bo_cnt, &acquire_ctx);
amdxdna_unlock_objects(job, &acquire_ctx);
goto put_fence;
}
}

for (i = 0; i < job->bo_cnt; i++)
dma_resv_add_fence(job->bos[i]->resv, job->out_fence, DMA_RESV_USAGE_WRITE);
drm_gem_unlock_reservations(job->bos, job->bo_cnt, &acquire_ctx);
amdxdna_unlock_objects(job, &acquire_ctx);

again:
mutex_lock(&hwctx->priv->io_lock);
ret = aie2_hwctx_add_job(hwctx, job);
if (ret) {
mutex_unlock(&hwctx->priv->io_lock);

if (ret == -EAGAIN) {
// Waiting for the first pending cmd to complete before trying again.
int res = aie2_cmd_wait(hwctx, hwctx->submitted - HWCTX_MAX_CMDS, 0);
if (!res)
goto again;
}
goto signal_fence;
}

Expand Down
89 changes: 18 additions & 71 deletions src/driver/amdxdna/aie2_debugfs.c
Original file line number Diff line number Diff line change
Expand Up @@ -77,71 +77,6 @@ static int aie2_dbgfs_entry_release(struct inode *inode, struct file *file)
#define file_to_ndev_rw(file) \
(((struct seq_file *)(file)->private_data)->private)

static ssize_t
aie2_dbgfs_clock_write(struct amdxdna_dev_hdl *ndev, struct clock *clock,
const char __user *ptr, size_t len, loff_t *off)
{
u32 val;
int ret;

ret = kstrtouint_from_user(ptr, len, 10, &val);
if (ret) {
XDNA_ERR(ndev->xdna, "Invalid input value: %d", val);
return ret;
}

clock->dbg_freq_mhz = val;
if (!clock->dbg_freq_mhz) {
XDNA_INFO(ndev->xdna, "Auto %s", clock->name);
return 0;
}

ret = aie2_smu_set_clock_freq(ndev, clock, val);
if (ret) {
clock->dbg_freq_mhz = 0;
XDNA_ERR(ndev->xdna, "Set %s ret %d, use auto clock", clock->name, ret);
return ret;
}

return len;
}

static ssize_t aie2_dbgfs_mpnpu_clock_write(struct file *file, const char __user *ptr,
size_t len, loff_t *off)
{
struct amdxdna_dev_hdl *ndev = file_to_ndev_rw(file);

return aie2_dbgfs_clock_write(ndev, &ndev->smu.mp_npu_clock, ptr, len, off);
}

static int aie2_dbgfs_mpnpu_clock_show(struct seq_file *m, void *unused)
{
struct amdxdna_dev_hdl *ndev = m->private;

seq_printf(m, "%d\n", aie2_smu_get_mpnpu_clock_freq(ndev));
return 0;
}

AIE2_DBGFS_FOPS(npuclock, aie2_dbgfs_mpnpu_clock_show, aie2_dbgfs_mpnpu_clock_write);

static ssize_t aie2_dbgfs_hclock_write(struct file *file, const char __user *ptr,
size_t len, loff_t *off)
{
struct amdxdna_dev_hdl *ndev = file_to_ndev_rw(file);

return aie2_dbgfs_clock_write(ndev, &ndev->smu.h_clock, ptr, len, off);
}

static int aie2_dbgfs_hclock_show(struct seq_file *m, void *unused)
{
struct amdxdna_dev_hdl *ndev = m->private;

seq_printf(m, "%d\n", aie2_smu_get_hclock_freq(ndev));
return 0;
}

AIE2_DBGFS_FOPS(hclock, aie2_dbgfs_hclock_show, aie2_dbgfs_hclock_write);

static ssize_t aie2_pasid_write(struct file *file, const char __user *ptr,
size_t len, loff_t *off)
{
Expand Down Expand Up @@ -291,7 +226,7 @@ static ssize_t aie2_dpm_level_set(struct file *file, const char __user *ptr,
return ret;
}

ret = aie2_smu_set_dpm_level(ndev, val, true);
ret = aie2_smu_set_dpm_level(ndev, val);
if (ret) {
XDNA_ERR(ndev->xdna, "Setting dpm_level:%d failed, ret: %d", val, ret);
return ret;
Expand All @@ -302,8 +237,24 @@ static ssize_t aie2_dpm_level_set(struct file *file, const char __user *ptr,
static int aie2_dpm_level_get(struct seq_file *m, void *unused)
{
struct amdxdna_dev_hdl *ndev = m->private;
const struct dpm_clk *dpm_table;
u32 num_dpm_levels;
int dpm_level;
int i;

seq_printf(m, "%d\n", aie2_smu_get_dpm_level(ndev));
dpm_table = SMU_DPM_TABLE_ENTRY(ndev, 0);
dpm_level = aie2_smu_get_dpm_level(ndev);
num_dpm_levels = SMU_DPM_MAX(ndev);
for (i = 0; i <= num_dpm_levels; i++) {
u32 npuclk = dpm_table[i].npuclk;
u32 hclk = dpm_table[i].hclk;

if (dpm_level == i)
seq_printf(m, " [%d,%d] ", npuclk, hclk);
else
seq_printf(m, " %d,%d ", npuclk, hclk);
}
seq_puts(m, "\n");
return 0;
}

Expand Down Expand Up @@ -511,8 +462,6 @@ seq_printf(m, "%ld:%s\n", _name, #_name)
drm_ioctl_id_seq_print(DRM_IOCTL_AMDXDNA_WAIT_CMD);
drm_ioctl_id_seq_print(DRM_IOCTL_AMDXDNA_GET_INFO);
drm_ioctl_id_seq_print(DRM_IOCTL_AMDXDNA_SET_STATE);
drm_ioctl_id_seq_print(DRM_IOCTL_AMDXDNA_SUBMIT_SIGNAL);
drm_ioctl_id_seq_print(DRM_IOCTL_AMDXDNA_SUBMIT_WAIT);

drm_ioctl_id_seq_print(DRM_IOCTL_GEM_CLOSE);
drm_ioctl_id_seq_print(DRM_IOCTL_PRIME_HANDLE_TO_FD);
Expand Down Expand Up @@ -609,8 +558,6 @@ const struct {
umode_t mode;
} aie2_dbgfs_files[] = {
AIE2_DBGFS_FILE(nputest, 0600),
AIE2_DBGFS_FILE(hclock, 0600),
AIE2_DBGFS_FILE(npuclock, 0600),
AIE2_DBGFS_FILE(pasid, 0600),
AIE2_DBGFS_FILE(state, 0600),
AIE2_DBGFS_FILE(powerstate, 0600),
Expand Down
Loading