Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add performance scripts #238

Merged
merged 2 commits into from
Sep 3, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions src/driver/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,8 @@ install(FILES ${CMAKE_CURRENT_BINARY_DIR}/dkms.conf

set(amdxdna_drv_tools
${CMAKE_CURRENT_SOURCE_DIR}/tools/dkms_driver.sh
${CMAKE_CURRENT_SOURCE_DIR}/tools/npu_perf_trace.sh
${CMAKE_CURRENT_SOURCE_DIR}/tools/npu_perf_analyze.sh
)
install(FILES ${amdxdna_drv_tools}
PERMISSIONS OWNER_READ OWNER_WRITE OWNER_EXECUTE GROUP_READ GROUP_EXECUTE WORLD_READ WORLD_EXECUTE
Expand Down
6 changes: 3 additions & 3 deletions src/driver/amdxdna/aie2_ctx.c
Original file line number Diff line number Diff line change
Expand Up @@ -230,7 +230,7 @@ aie2_sched_notify(struct amdxdna_sched_job *job)
struct dma_fence *fence = job->fence;

job->hwctx->completed++;
trace_xdna_job(&job->base, job->hwctx->name, "signale fence", job->seq);
trace_xdna_job(&job->base, job->hwctx->name, "signaling fence", job->seq);
dma_fence_signal(fence);
dma_fence_put(fence);
mmput(job->mm);
Expand All @@ -257,7 +257,7 @@ aie2_sched_resp_handler(void *handle, const u32 *data, size_t size)
}

status = *data;
XDNA_DBG(job->hwctx->client->xdna, "Resp status 0x%x", status);
XDNA_DBG(job->hwctx->client->xdna, "Response status 0x%x", status);
if (status == AIE2_STATUS_SUCCESS)
amdxdna_cmd_set_state(cmd_abo, ERT_CMD_STATE_COMPLETED);
else
Expand All @@ -284,7 +284,7 @@ aie2_sched_nocmd_resp_handler(void *handle, const u32 *data, size_t size)
}

status = *data;
XDNA_DBG(job->hwctx->client->xdna, "Resp status 0x%x", status);
XDNA_DBG(job->hwctx->client->xdna, "Response status 0x%x", status);

out:
aie2_sched_notify(job);
Expand Down
1 change: 1 addition & 0 deletions src/driver/amdxdna/amdxdna_mailbox.c
Original file line number Diff line number Diff line change
Expand Up @@ -429,6 +429,7 @@ static void mailbox_rx_worker(struct work_struct *rx_work)
int ret;

mb_chann = container_of(rx_work, struct mailbox_channel, rx_work);
trace_mbox_rx_worker(MAILBOX_NAME, mb_chann->msix_irq);

if (READ_ONCE(mb_chann->bad_state)) {
MB_ERR(mb_chann, "Channel in bad state, work aborted");
Expand Down
5 changes: 5 additions & 0 deletions src/driver/amdxdna/amdxdna_trace.h
Original file line number Diff line number Diff line change
Expand Up @@ -130,6 +130,11 @@ DEFINE_EVENT(xdna_mbox_name_id, mbox_irq_handle,
TP_ARGS(name, irq)
);

DEFINE_EVENT(xdna_mbox_name_id, mbox_rx_worker,
TP_PROTO(char *name, int irq),
TP_ARGS(name, irq)
);

DEFINE_EVENT(xdna_mbox_name_id, mbox_poll_handle,
TP_PROTO(char *name, int irq),
TP_ARGS(name, irq)
Expand Down
134 changes: 134 additions & 0 deletions src/driver/tools/npu_perf_analyze.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,134 @@
#!/usr/bin/bash

# SPDX-License-Identifier: Apache-2.0
# Copyright (C) 2024, Advanced Micro Devices, Inc.

perf_out_file="perf.converted.out"

usage()
{
echo "$0 [entry_index_begin:entry_index_end] event1_pattern event2_pattern"
echo "Calculate time from event1 to event2 within [entry_index_begin,entry_index_end)"
echo "event pattern examples:"
echo " sdt_xrt:ioctl_exit: \(.+\) arg1=DRM_IOCTL_AMDXDNA_WAIT_CMD"
}

read_timestamps()
{
timestamps=()

while IFS= read -r line; do
if [ "$line" != "" ]; then
timestamps+=($(("10#${line}")))
fi
done <<< `egrep "$1" ${perf_out_file} | awk '{print $4}' | tr -d '.' | tr -d ':'`
echo ${timestamps[@]}
}

range_start=0
range_end=0
event1=""
event2=""
if [ "$#" -eq 2 ]; then
event1=$1
event2=$2
elif [ "$#" -eq 3 ]; then
st=$(echo $1 | cut -d':' -f1)
end=$(echo $1 | cut -d':' -f2)
if [ "${st}" != "" ]; then
range_start=$(("10#${st}"))
fi
if [ "${end}" != "" ]; then
range_end=$(("10#${end}"))
fi
event1=$2
event2=$3
else
usage
exit 1
fi

if [ ! -f ${perf_out_file} ]; then
echo "${perf_out_file} is not found"
exit 1
else
echo "Parsing ${perf_out_file}..."
fi

event1_ts=($(read_timestamps "${event1}"))
event1_ts_num=${#event1_ts[@]}
echo "${event1_ts_num} events for: '${event1}'"

event2_ts=($(read_timestamps "${event2}"))
event2_ts_num=${#event2_ts[@]}
echo "${event2_ts_num} events for: '${event2}'"

# Sanity check collected data
if [ ${event1_ts_num} -eq 0 ]; then
echo No events found for ${event1}
exit 1
fi
if [ ${event2_ts_num} -eq 0 ]; then
echo No events found for ${event2}
exit 1
fi
# Find first event2 entry index which comes after first event1
event2_index_base=-1
for (( i=0; i<${event2_ts_num}; i++ )); do
if ! [[ ${event2_ts[i]} -lt ${event1_ts[0]} ]]; then
event2_index_base=${i}
break
fi
done
if [ ${event2_index_base} -eq -1 ]; then
echo No ${event2} is after ${event1}
exit 1
fi

# Caculate time difference between two events
diffs=()
for (( i=0; i<${event1_ts_num}; i++ )); do
i2=$(( i+${event2_index_base} ))
if ! [ ${i2} -lt ${event2_ts_num} ]; then
break
fi
diffs+=( $((event2_ts[i2] - event1_ts[i])) )
done
#echo ${diffs[@]}


# Data mining within specified range

if [ ${range_end} -eq 0 ]; then
range_end=${#diffs[@]}
fi
if [ ${range_end} -eq ${range_start} ]; then
echo Range start and end are the same
exit 1
elif [ ${range_end} -lt ${range_start} ]; then
echo Range start after end
exit 1
fi

total=0
largest=${diffs[${range_start}]}
largest_idx=${range_start}
smallest=${diffs[${range_start}]}
smallest_idx=${range_start}
for (( i=${range_start}; i<${range_end}; i++ )); do
total=$(( total + diffs[i] ))
if [[ ${largest} -lt ${diffs[i]} ]]; then
largest=${diffs[i]}
largest_idx=${i}
fi
if [[ ${smallest} -gt ${diffs[i]} ]]; then
smallest=${diffs[i]}
smallest_idx=${i}
fi
done

# Output result
total_events=$(( range_end - range_start ))
echo Average over ${total_events} events: $(( total / total_events ))us
echo Largest: ${largest}us@${largest_idx}
echo Smallest: ${smallest}us@${smallest_idx}
124 changes: 124 additions & 0 deletions src/driver/tools/npu_perf_trace.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,124 @@
#! /bin/bash --

# SPDX-License-Identifier: Apache-2.0
# Copyright (C) 2024, Advanced Micro Devices, Inc.

set -eu

bold=$(tput bold)
normal=$(tput sgr0)
red=$(tput setaf 1)
yellow=$(tput setaf 3)
blue=$(tput setaf 4)

trace_info()
{
what=$1
echo -e "[INFO]: $what"
}

trace_warn()
{
what=$1
echo -e "[${yellow}WARNING${normal}]: $what"
}

trace_error()
{
what=$1
echo -e "[${red}ERROR${normal}]: $what" 1>&2
exit 1
}

add_sdt_xrt()
{
perf list | grep sdt_xrt > /dev/null && sdt_pre_enabled=1
if [[ $sdt_pre_enabled == 1 ]]; then
remove_sdt_xrt
#trace_warn "XRT SDT had beed added. Skip..."
#return
fi

# Add XRT SDT events
perf buildid-cache --add $xrt_libs
# Convert SDT events to trace points
perf probe --add=sdt_xrt:* &> /dev/null

trace_info "XRT SDT is added"
}

remove_sdt_xrt()
{
#if [[ $sdt_pre_enabled == 1 ]]; then
# trace_warn "XRT SDT was pre added. Skip..."
# return
#fi

# Delete SDT trace points
perf probe --del=sdt_xrt:* &> /dev/null
# Remove XRT STD events
perf buildid-cache --remove $xrt_libs
trace_info "XRT SDT is removed"
}

## -------- trace flow start --------
if [ "$EUID" -ne 0 ]; then
trace_error "Please run as root"
fi

# Global variables
sdt_pre_enabled=0
xrt_lib_prefix="/opt/xilinx/xrt/lib"
accel_debugfs="/sys/kernel/debug/accel"
xrt_libs="${xrt_lib_prefix}/libxrt_coreutil.so,${xrt_lib_prefix}/libxrt_driver_xdna.so"
perf_record_args="-e amdxdna_trace:* "
perf_record_args+="-e sdt_xrt:* "
exec_cmd=""

perf --version > /dev/null

# Argument parsing
exec_cmd=$@
if [[ -z "$exec_cmd" ]]; then
trace_error "Please put execute application at the end"
fi

dev=""
ioctl_sed_expr=""
for dir in $(ls $accel_debugfs); do
accel_fs_name=$(cat ${accel_debugfs}/$dir/name)
driver_name=$(echo $accel_fs_name | awk '{print $1}')
if [[ ! "$driver_name" =~ "amdxdna" ]]; then
continue
fi

if [[ ! -f ${accel_debugfs}/$dir/ioctl_id ]]; then
trace_error "${accel_debugfs}/$dir/ioctl_id not exist. amdxdna driver too old?"
fi

dev=$(echo $accel_fs_name | awk -F'[ =]' '{print $3}')
ioctl_sed_expr=$(awk -F ':' '{print "s/"$1"/"$2"/g"}' ${accel_debugfs}/$dir/ioctl_id)
done

if [[ -z "$dev" ]]; then
trace_error "No device found"
fi

trace_info "Found NPU device $dev at ${accel_debugfs}"

add_sdt_xrt

command="perf record $perf_record_args -a $exec_cmd"
trace_info "$command"
eval $command

tmp_file=/tmp/perf.out
# convert timestamp from second to microsecond to avoid floating numbers
#perf script | awk '{ $4=$4*1000000; print }' > ${tmp_file}
perf script --reltime > ${tmp_file}
# replace IOCTL cmd number to name
sed "$ioctl_sed_expr" "${tmp_file}" > perf.converted.out
rm -rf ${tmp_file}

remove_sdt_xrt
## -------- trace flow end --------