Skip to content

Commit

Permalink
Add performance scripts (#238)
Browse files Browse the repository at this point in the history
Signed-off-by: Max Zhen <max.zhen@amd.com>
  • Loading branch information
maxzhen authored Sep 3, 2024
1 parent a484a29 commit b209ee9
Show file tree
Hide file tree
Showing 6 changed files with 269 additions and 3 deletions.
2 changes: 2 additions & 0 deletions src/driver/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,8 @@ install(FILES ${CMAKE_CURRENT_BINARY_DIR}/dkms.conf

set(amdxdna_drv_tools
${CMAKE_CURRENT_SOURCE_DIR}/tools/dkms_driver.sh
${CMAKE_CURRENT_SOURCE_DIR}/tools/npu_perf_trace.sh
${CMAKE_CURRENT_SOURCE_DIR}/tools/npu_perf_analyze.sh
)
install(FILES ${amdxdna_drv_tools}
PERMISSIONS OWNER_READ OWNER_WRITE OWNER_EXECUTE GROUP_READ GROUP_EXECUTE WORLD_READ WORLD_EXECUTE
Expand Down
6 changes: 3 additions & 3 deletions src/driver/amdxdna/aie2_ctx.c
Original file line number Diff line number Diff line change
Expand Up @@ -230,7 +230,7 @@ aie2_sched_notify(struct amdxdna_sched_job *job)
struct dma_fence *fence = job->fence;

job->hwctx->completed++;
trace_xdna_job(&job->base, job->hwctx->name, "signale fence", job->seq);
trace_xdna_job(&job->base, job->hwctx->name, "signaling fence", job->seq);
dma_fence_signal(fence);
dma_fence_put(fence);
mmput(job->mm);
Expand All @@ -257,7 +257,7 @@ aie2_sched_resp_handler(void *handle, const u32 *data, size_t size)
}

status = *data;
XDNA_DBG(job->hwctx->client->xdna, "Resp status 0x%x", status);
XDNA_DBG(job->hwctx->client->xdna, "Response status 0x%x", status);
if (status == AIE2_STATUS_SUCCESS)
amdxdna_cmd_set_state(cmd_abo, ERT_CMD_STATE_COMPLETED);
else
Expand All @@ -284,7 +284,7 @@ aie2_sched_nocmd_resp_handler(void *handle, const u32 *data, size_t size)
}

status = *data;
XDNA_DBG(job->hwctx->client->xdna, "Resp status 0x%x", status);
XDNA_DBG(job->hwctx->client->xdna, "Response status 0x%x", status);

out:
aie2_sched_notify(job);
Expand Down
1 change: 1 addition & 0 deletions src/driver/amdxdna/amdxdna_mailbox.c
Original file line number Diff line number Diff line change
Expand Up @@ -429,6 +429,7 @@ static void mailbox_rx_worker(struct work_struct *rx_work)
int ret;

mb_chann = container_of(rx_work, struct mailbox_channel, rx_work);
trace_mbox_rx_worker(MAILBOX_NAME, mb_chann->msix_irq);

if (READ_ONCE(mb_chann->bad_state)) {
MB_ERR(mb_chann, "Channel in bad state, work aborted");
Expand Down
5 changes: 5 additions & 0 deletions src/driver/amdxdna/amdxdna_trace.h
Original file line number Diff line number Diff line change
Expand Up @@ -130,6 +130,11 @@ DEFINE_EVENT(xdna_mbox_name_id, mbox_irq_handle,
TP_ARGS(name, irq)
);

DEFINE_EVENT(xdna_mbox_name_id, mbox_rx_worker,
TP_PROTO(char *name, int irq),
TP_ARGS(name, irq)
);

DEFINE_EVENT(xdna_mbox_name_id, mbox_poll_handle,
TP_PROTO(char *name, int irq),
TP_ARGS(name, irq)
Expand Down
134 changes: 134 additions & 0 deletions src/driver/tools/npu_perf_analyze.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,134 @@
#!/usr/bin/bash

# SPDX-License-Identifier: Apache-2.0
# Copyright (C) 2024, Advanced Micro Devices, Inc.

perf_out_file="perf.converted.out"

usage()
{
echo "$0 [entry_index_begin:entry_index_end] event1_pattern event2_pattern"
echo "Calculate time from event1 to event2 within [entry_index_begin,entry_index_end)"
echo "event pattern examples:"
echo " sdt_xrt:ioctl_exit: \(.+\) arg1=DRM_IOCTL_AMDXDNA_WAIT_CMD"
}

read_timestamps()
{
timestamps=()

while IFS= read -r line; do
if [ "$line" != "" ]; then
timestamps+=($(("10#${line}")))
fi
done <<< `egrep "$1" ${perf_out_file} | awk '{print $4}' | tr -d '.' | tr -d ':'`
echo ${timestamps[@]}
}
range_start=0
range_end=0
event1=""
event2=""
if [ "$#" -eq 2 ]; then
event1=$1
event2=$2
elif [ "$#" -eq 3 ]; then
st=$(echo $1 | cut -d':' -f1)
end=$(echo $1 | cut -d':' -f2)
if [ "${st}" != "" ]; then
range_start=$(("10#${st}"))
fi
if [ "${end}" != "" ]; then
range_end=$(("10#${end}"))
fi
event1=$2
event2=$3
else
usage
exit 1
fi
if [ ! -f ${perf_out_file} ]; then
echo "${perf_out_file} is not found"
exit 1
else
echo "Parsing ${perf_out_file}..."
fi
event1_ts=($(read_timestamps "${event1}"))
event1_ts_num=${#event1_ts[@]}
echo "${event1_ts_num} events for: '${event1}'"
event2_ts=($(read_timestamps "${event2}"))
event2_ts_num=${#event2_ts[@]}
echo "${event2_ts_num} events for: '${event2}'"
# Sanity check collected data
if [ ${event1_ts_num} -eq 0 ]; then
echo No events found for ${event1}
exit 1
fi
if [ ${event2_ts_num} -eq 0 ]; then
echo No events found for ${event2}
exit 1
fi
# Find first event2 entry index which comes after first event1
event2_index_base=-1
for (( i=0; i<${event2_ts_num}; i++ )); do
if ! [[ ${event2_ts[i]} -lt ${event1_ts[0]} ]]; then
event2_index_base=${i}
break
fi
done
if [ ${event2_index_base} -eq -1 ]; then
echo No ${event2} is after ${event1}
exit 1
fi
# Caculate time difference between two events
diffs=()
for (( i=0; i<${event1_ts_num}; i++ )); do
i2=$(( i+${event2_index_base} ))
if ! [ ${i2} -lt ${event2_ts_num} ]; then
break
fi
diffs+=( $((event2_ts[i2] - event1_ts[i])) )
done
#echo ${diffs[@]}
# Data mining within specified range
if [ ${range_end} -eq 0 ]; then
range_end=${#diffs[@]}
fi
if [ ${range_end} -eq ${range_start} ]; then
echo Range start and end are the same
exit 1
elif [ ${range_end} -lt ${range_start} ]; then
echo Range start after end
exit 1
fi
total=0
largest=${diffs[${range_start}]}
largest_idx=${range_start}
smallest=${diffs[${range_start}]}
smallest_idx=${range_start}
for (( i=${range_start}; i<${range_end}; i++ )); do
total=$(( total + diffs[i] ))
if [[ ${largest} -lt ${diffs[i]} ]]; then
largest=${diffs[i]}
largest_idx=${i}
fi
if [[ ${smallest} -gt ${diffs[i]} ]]; then
smallest=${diffs[i]}
smallest_idx=${i}
fi
done
# Output result
total_events=$(( range_end - range_start ))
echo Average over ${total_events} events: $(( total / total_events ))us
echo Largest: ${largest}us@${largest_idx}
echo Smallest: ${smallest}us@${smallest_idx}
124 changes: 124 additions & 0 deletions src/driver/tools/npu_perf_trace.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,124 @@
#! /bin/bash --

# SPDX-License-Identifier: Apache-2.0
# Copyright (C) 2024, Advanced Micro Devices, Inc.

set -eu

bold=$(tput bold)
normal=$(tput sgr0)
red=$(tput setaf 1)
yellow=$(tput setaf 3)
blue=$(tput setaf 4)

trace_info()
{
what=$1
echo -e "[INFO]: $what"
}

trace_warn()
{
what=$1
echo -e "[${yellow}WARNING${normal}]: $what"
}

trace_error()
{
what=$1
echo -e "[${red}ERROR${normal}]: $what" 1>&2
exit 1
}

add_sdt_xrt()
{
perf list | grep sdt_xrt > /dev/null && sdt_pre_enabled=1
if [[ $sdt_pre_enabled == 1 ]]; then
remove_sdt_xrt
#trace_warn "XRT SDT had beed added. Skip..."
#return
fi

# Add XRT SDT events
perf buildid-cache --add $xrt_libs
# Convert SDT events to trace points
perf probe --add=sdt_xrt:* &> /dev/null

trace_info "XRT SDT is added"
}

remove_sdt_xrt()
{
#if [[ $sdt_pre_enabled == 1 ]]; then
# trace_warn "XRT SDT was pre added. Skip..."
# return
#fi

# Delete SDT trace points
perf probe --del=sdt_xrt:* &> /dev/null
# Remove XRT STD events
perf buildid-cache --remove $xrt_libs
trace_info "XRT SDT is removed"
}

## -------- trace flow start --------
if [ "$EUID" -ne 0 ]; then
trace_error "Please run as root"
fi

# Global variables
sdt_pre_enabled=0
xrt_lib_prefix="/opt/xilinx/xrt/lib"
accel_debugfs="/sys/kernel/debug/accel"
xrt_libs="${xrt_lib_prefix}/libxrt_coreutil.so,${xrt_lib_prefix}/libxrt_driver_xdna.so"
perf_record_args="-e amdxdna_trace:* "
perf_record_args+="-e sdt_xrt:* "
exec_cmd=""

perf --version > /dev/null

# Argument parsing
exec_cmd=$@
if [[ -z "$exec_cmd" ]]; then
trace_error "Please put execute application at the end"
fi

dev=""
ioctl_sed_expr=""
for dir in $(ls $accel_debugfs); do
accel_fs_name=$(cat ${accel_debugfs}/$dir/name)
driver_name=$(echo $accel_fs_name | awk '{print $1}')
if [[ ! "$driver_name" =~ "amdxdna" ]]; then
continue
fi

if [[ ! -f ${accel_debugfs}/$dir/ioctl_id ]]; then
trace_error "${accel_debugfs}/$dir/ioctl_id not exist. amdxdna driver too old?"
fi

dev=$(echo $accel_fs_name | awk -F'[ =]' '{print $3}')
ioctl_sed_expr=$(awk -F ':' '{print "s/"$1"/"$2"/g"}' ${accel_debugfs}/$dir/ioctl_id)
done

if [[ -z "$dev" ]]; then
trace_error "No device found"
fi

trace_info "Found NPU device $dev at ${accel_debugfs}"

add_sdt_xrt

command="perf record $perf_record_args -a $exec_cmd"
trace_info "$command"
eval $command

tmp_file=/tmp/perf.out
# convert timestamp from second to microsecond to avoid floating numbers
#perf script | awk '{ $4=$4*1000000; print }' > ${tmp_file}
perf script --reltime > ${tmp_file}
# replace IOCTL cmd number to name
sed "$ioctl_sed_expr" "${tmp_file}" > perf.converted.out
rm -rf ${tmp_file}

remove_sdt_xrt
## -------- trace flow end --------

0 comments on commit b209ee9

Please sign in to comment.