Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix turbo mode #255

Merged
merged 1 commit into from
Sep 13, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 15 additions & 2 deletions src/driver/amdxdna/aie2_ctx.c
Original file line number Diff line number Diff line change
Expand Up @@ -540,6 +540,7 @@ int aie2_hwctx_init(struct amdxdna_hwctx *hwctx)
struct drm_gpu_scheduler *sched;
struct amdxdna_hwctx_priv *priv;
struct amdxdna_gem_obj *heap;
unsigned int wq_flags;
int i, ret;

priv = kzalloc(sizeof(*hwctx->priv), GFP_KERNEL);
Expand Down Expand Up @@ -587,12 +588,21 @@ int aie2_hwctx_init(struct amdxdna_hwctx *hwctx)

sched = &priv->sched;
mutex_init(&priv->io_lock);
ret = drm_sched_init(sched, &sched_ops, NULL, DRM_SCHED_PRIORITY_COUNT,

wq_flags = __WQ_ORDERED;
if (!aie2_pm_is_turbo(xdna->dev_handle))
wq_flags |= WQ_UNBOUND;
priv->submit_wq = alloc_workqueue(hwctx->name, wq_flags, 1);
if (!priv->submit_wq) {
XDNA_ERR(xdna, "Failed to alloc submit wq");
goto free_cmd_bufs;
}
ret = drm_sched_init(sched, &sched_ops, priv->submit_wq, DRM_SCHED_PRIORITY_COUNT,
HWCTX_MAX_CMDS, 0, MAX_SCHEDULE_TIMEOUT,
NULL, NULL, hwctx->name, xdna->ddev.dev);
if (ret) {
XDNA_ERR(xdna, "Failed to init DRM scheduler. ret %d", ret);
goto free_cmd_bufs;
goto free_wq;
}

ret = drm_sched_entity_init(&priv->entity, DRM_SCHED_PRIORITY_NORMAL,
Expand Down Expand Up @@ -645,6 +655,8 @@ int aie2_hwctx_init(struct amdxdna_hwctx *hwctx)
drm_sched_entity_destroy(&priv->entity);
free_sched:
drm_sched_fini(&priv->sched);
free_wq:
destroy_workqueue(priv->submit_wq);
free_cmd_bufs:
for (i = 0; i < ARRAY_SIZE(priv->cmd_buf); i++) {
if (!priv->cmd_buf[i])
Expand Down Expand Up @@ -681,6 +693,7 @@ void aie2_hwctx_fini(struct amdxdna_hwctx *hwctx)
aie2_hwctx_wait_for_idle(hwctx);
drm_sched_entity_destroy(&hwctx->priv->entity);
drm_sched_fini(&hwctx->priv->sched);
destroy_workqueue(hwctx->priv->submit_wq);

for (idx = 0; idx < HWCTX_MAX_CMDS; idx++) {
job = hwctx->priv->pending[idx];
Expand Down
1 change: 1 addition & 0 deletions src/driver/amdxdna/aie2_pci.h
Original file line number Diff line number Diff line change
Expand Up @@ -199,6 +199,7 @@ struct amdxdna_hwctx_priv {
u32 num_pending;

struct amdxdna_gem_obj *cmd_buf[HWCTX_MAX_CMDS];
struct workqueue_struct *submit_wq;
};

struct async_events;
Expand Down
10 changes: 7 additions & 3 deletions src/driver/tools/npu_perf_analyze.sh
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,8 @@ fi
echo "${event2_ts_num} events for: '${event2}'"

# Caculate time difference between two events
diffs_event1=()
diffs_event2=()
diffs=()
i1=0
i2=0
Expand All @@ -118,6 +120,8 @@ while [ 1 ]; do


(( i1-- ))
diffs_event1+=( $((event1_ts[i1])) )
diffs_event2+=( $((event2_ts[i2])) )
diffs+=( $((event2_ts[i2] - event1_ts[i1])) )
(( i1++ ))
(( i2++ ))
Expand Down Expand Up @@ -162,6 +166,6 @@ done

# Output result
total_events=$(( range_end - range_start ))
echo Average over ${total_events} events: $(( total / total_events ))us
echo Largest: ${largest}us@${largest_idx}
echo Smallest: ${smallest}us@${smallest_idx}
echo Average over ${total_events} events: $(( total / total_events ))ns
echo Largest: ${largest}ns@${largest_idx}: event1=${diffs_event1[largest_idx]}, event2=${diffs_event2[largest_idx]}
echo Smallest: ${smallest}ns@${smallest_idx}: event1=${diffs_event1[smallest_idx]}, event2=${diffs_event2[smallest_idx]}
2 changes: 1 addition & 1 deletion src/driver/tools/npu_perf_trace.sh
Original file line number Diff line number Diff line change
Expand Up @@ -127,7 +127,7 @@ eval $command
tmp_file=/tmp/perf.out
# convert timestamp from second to microsecond to avoid floating numbers
#perf script | awk '{ $4=$4*1000000; print }' > ${tmp_file}
perf script --reltime > ${tmp_file}
perf script --reltime --ns > ${tmp_file}
# replace IOCTL cmd number to name
sed "$ioctl_sed_expr" "${tmp_file}" > perf.converted.out
rm -rf ${tmp_file}
Expand Down