From 4ad27762154295fcc377639fd19746cdca87d9f3 Mon Sep 17 00:00:00 2001 From: Max Zhen Date: Fri, 30 Aug 2024 22:21:57 -0700 Subject: [PATCH 1/2] Add performance scripts Signed-off-by: Max Zhen --- src/driver/CMakeLists.txt | 2 + src/driver/amdxdna/aie2_ctx.c | 6 +- src/driver/amdxdna/amdxdna_mailbox.c | 1 + src/driver/amdxdna/amdxdna_trace.h | 5 ++ src/driver/tools/npu_perf_analyze.sh | 122 ++++++++++++++++++++++++++ src/driver/tools/npu_perf_trace.sh | 124 +++++++++++++++++++++++++++ 6 files changed, 257 insertions(+), 3 deletions(-) create mode 100755 src/driver/tools/npu_perf_analyze.sh create mode 100755 src/driver/tools/npu_perf_trace.sh diff --git a/src/driver/CMakeLists.txt b/src/driver/CMakeLists.txt index 1687d274..ad33fb76 100644 --- a/src/driver/CMakeLists.txt +++ b/src/driver/CMakeLists.txt @@ -92,6 +92,8 @@ install(FILES ${CMAKE_CURRENT_BINARY_DIR}/dkms.conf set(amdxdna_drv_tools ${CMAKE_CURRENT_SOURCE_DIR}/tools/dkms_driver.sh + ${CMAKE_CURRENT_SOURCE_DIR}/tools/npu_perf_trace.sh + ${CMAKE_CURRENT_SOURCE_DIR}/tools/npu_perf_analyze.sh ) install(FILES ${amdxdna_drv_tools} PERMISSIONS OWNER_READ OWNER_WRITE OWNER_EXECUTE GROUP_READ GROUP_EXECUTE WORLD_READ WORLD_EXECUTE diff --git a/src/driver/amdxdna/aie2_ctx.c b/src/driver/amdxdna/aie2_ctx.c index 568ccb18..5bbc0514 100644 --- a/src/driver/amdxdna/aie2_ctx.c +++ b/src/driver/amdxdna/aie2_ctx.c @@ -230,7 +230,7 @@ aie2_sched_notify(struct amdxdna_sched_job *job) struct dma_fence *fence = job->fence; job->hwctx->completed++; - trace_xdna_job(&job->base, job->hwctx->name, "signale fence", job->seq); + trace_xdna_job(&job->base, job->hwctx->name, "signaling fence", job->seq); dma_fence_signal(fence); dma_fence_put(fence); mmput(job->mm); @@ -257,7 +257,7 @@ aie2_sched_resp_handler(void *handle, const u32 *data, size_t size) } status = *data; - XDNA_DBG(job->hwctx->client->xdna, "Resp status 0x%x", status); + XDNA_DBG(job->hwctx->client->xdna, "Response status 0x%x", status); if (status == AIE2_STATUS_SUCCESS) amdxdna_cmd_set_state(cmd_abo, ERT_CMD_STATE_COMPLETED); else @@ -284,7 +284,7 @@ aie2_sched_nocmd_resp_handler(void *handle, const u32 *data, size_t size) } status = *data; - XDNA_DBG(job->hwctx->client->xdna, "Resp status 0x%x", status); + XDNA_DBG(job->hwctx->client->xdna, "Response status 0x%x", status); out: aie2_sched_notify(job); diff --git a/src/driver/amdxdna/amdxdna_mailbox.c b/src/driver/amdxdna/amdxdna_mailbox.c index a27cbd17..8b152792 100644 --- a/src/driver/amdxdna/amdxdna_mailbox.c +++ b/src/driver/amdxdna/amdxdna_mailbox.c @@ -429,6 +429,7 @@ static void mailbox_rx_worker(struct work_struct *rx_work) int ret; mb_chann = container_of(rx_work, struct mailbox_channel, rx_work); + trace_mbox_rx_worker(MAILBOX_NAME, mb_chann->msix_irq); if (READ_ONCE(mb_chann->bad_state)) { MB_ERR(mb_chann, "Channel in bad state, work aborted"); diff --git a/src/driver/amdxdna/amdxdna_trace.h b/src/driver/amdxdna/amdxdna_trace.h index 6d73c823..4620d2e4 100644 --- a/src/driver/amdxdna/amdxdna_trace.h +++ b/src/driver/amdxdna/amdxdna_trace.h @@ -130,6 +130,11 @@ DEFINE_EVENT(xdna_mbox_name_id, mbox_irq_handle, TP_ARGS(name, irq) ); +DEFINE_EVENT(xdna_mbox_name_id, mbox_rx_worker, + TP_PROTO(char *name, int irq), + TP_ARGS(name, irq) +); + DEFINE_EVENT(xdna_mbox_name_id, mbox_poll_handle, TP_PROTO(char *name, int irq), TP_ARGS(name, irq) diff --git a/src/driver/tools/npu_perf_analyze.sh b/src/driver/tools/npu_perf_analyze.sh new file mode 100755 index 00000000..2d05f634 --- /dev/null +++ b/src/driver/tools/npu_perf_analyze.sh @@ -0,0 +1,122 @@ +#!/usr/bin/bash + +# SPDX-License-Identifier: Apache-2.0 +# Copyright (C) 2024, Advanced Micro Devices, Inc. + +perf_out_file="perf.converted.out" + +usage() +{ + echo "$0 [entry_index_begin:entry_index_end] event1_pattern event2_pattern" + echo "Calculate time from event1 to event2 within [entry_index_begin,entry_index_end)" + echo "event pattern examples:" + echo " sdt_xrt:ioctl_exit: \(.+\) arg1=DRM_IOCTL_AMDXDNA_WAIT_CMD" +} + +read_timestamps() +{ + timestamps=() + + while IFS= read -r line; do + if [ "$line" != "" ]; then + timestamps+=($(("10#${line}"))) + fi + done <<< `egrep "$1" ${perf_out_file} | awk '{print $4}' | tr -d '.' | tr -d ':'` + echo ${timestamps[@]} +} + +range_start=0 +range_end=0 +event1="" +event2="" +if [ "$#" -eq 2 ]; then + event1=$1 + event2=$2 +elif [ "$#" -eq 3 ]; then + st=$(echo $1 | cut -d':' -f1) + end=$(echo $1 | cut -d':' -f2) + if [ "${st}" != "" ]; then + range_start=$(("10#${st}")) + fi + if [ "${end}" != "" ]; then + range_end=$(("10#${end}")) + fi + event1=$2 + event2=$3 +else + usage + exit 1 +fi + +if [ ! -f ${perf_out_file} ]; then + echo "${perf_out_file} is not found" + exit 1 +else + echo "Parsing ${perf_out_file}..." +fi + +event1_ts=($(read_timestamps "${event1}")) +event1_ts_num=${#event1_ts[@]} +echo "${event1_ts_num} events for: '${event1}'" + +event2_ts=($(read_timestamps "${event2}")) +event2_ts_num=${#event2_ts[@]} +echo "${event2_ts_num} events for: '${event2}'" + +# Sanity check collected data +if [ ${event1_ts_num} -ne ${event2_ts_num} ]; then + echo Two events do not show up in pairs + exit 1 +fi +if [ ${event1_ts_num} -eq 0 ]; then + echo No events found + exit 1 +fi +if [ ${event2_ts[0]} -lt ${event1_ts[0]} ]; then + echo Event2 shows up before event1 + exit 1 +fi + +# Caculate time difference between two events +diffs=() +for (( i=0; i<${event1_ts_num}; i++ )); do + diffs+=( $((event2_ts[i] - event1_ts[i])) ) +done +#echo ${diffs[@]} + + +# Data mining within specified range + +if [ ${range_end} -eq 0 ]; then + range_end=${event1_ts_num} +fi +if [ ${range_end} -eq ${range_start} ]; then + echo Range start and end are the same + exit 1 +elif [ ${range_end} -lt ${range_start} ]; then + echo Range start after end + exit 1 +fi + +total=0 +largest=${diffs[${range_start}]} +largest_idx=${range_start} +smallest=${diffs[${range_start}]} +smallest_idx=${range_start} +for (( i=${range_start}; i<${range_end}; i++ )); do + total=$(( total + diffs[i] )) + if [[ ${largest} -lt ${diffs[i]} ]]; then + largest=${diffs[i]} + largest_idx=${i} + fi + if [[ ${smallest} -gt ${diffs[i]} ]]; then + smallest=${diffs[i]} + smallest_idx=${i} + fi +done + +# Output result +total_events=$(( range_end - range_start )) +echo Average over ${total_events} events: $(( total / total_events ))us +echo Largest: ${largest}us@${largest_idx} +echo Smallest: ${smallest}us@${smallest_idx} diff --git a/src/driver/tools/npu_perf_trace.sh b/src/driver/tools/npu_perf_trace.sh new file mode 100755 index 00000000..d9a09717 --- /dev/null +++ b/src/driver/tools/npu_perf_trace.sh @@ -0,0 +1,124 @@ +#! /bin/bash -- + +# SPDX-License-Identifier: Apache-2.0 +# Copyright (C) 2024, Advanced Micro Devices, Inc. + +set -eu + +bold=$(tput bold) +normal=$(tput sgr0) +red=$(tput setaf 1) +yellow=$(tput setaf 3) +blue=$(tput setaf 4) + +trace_info() +{ + what=$1 + echo -e "[INFO]: $what" +} + +trace_warn() +{ + what=$1 + echo -e "[${yellow}WARNING${normal}]: $what" +} + +trace_error() +{ + what=$1 + echo -e "[${red}ERROR${normal}]: $what" 1>&2 + exit 1 +} + +add_sdt_xrt() +{ + perf list | grep sdt_xrt > /dev/null && sdt_pre_enabled=1 + if [[ $sdt_pre_enabled == 1 ]]; then + remove_sdt_xrt + #trace_warn "XRT SDT had beed added. Skip..." + #return + fi + + # Add XRT SDT events + perf buildid-cache --add $xrt_libs + # Convert SDT events to trace points + perf probe --add=sdt_xrt:* &> /dev/null + + trace_info "XRT SDT is added" +} + +remove_sdt_xrt() +{ + #if [[ $sdt_pre_enabled == 1 ]]; then + # trace_warn "XRT SDT was pre added. Skip..." + # return + #fi + + # Delete SDT trace points + perf probe --del=sdt_xrt:* &> /dev/null + # Remove XRT STD events + perf buildid-cache --remove $xrt_libs + trace_info "XRT SDT is removed" +} + +## -------- trace flow start -------- +if [ "$EUID" -ne 0 ]; then + trace_error "Please run as root" +fi + +# Global variables +sdt_pre_enabled=0 +xrt_lib_prefix="/opt/xilinx/xrt/lib" +accel_debugfs="/sys/kernel/debug/accel" +xrt_libs="${xrt_lib_prefix}/libxrt_coreutil.so,${xrt_lib_prefix}/libxrt_driver_xdna.so" +perf_record_args="-e amdxdna_trace:* " +perf_record_args+="-e sdt_xrt:* " +exec_cmd="" + +perf --version > /dev/null + +# Argument parsing +exec_cmd=$@ +if [[ -z "$exec_cmd" ]]; then + trace_error "Please put execute application at the end" +fi + +dev="" +ioctl_sed_expr="" +for dir in $(ls $accel_debugfs); do + accel_fs_name=$(cat ${accel_debugfs}/$dir/name) + driver_name=$(echo $accel_fs_name | awk '{print $1}') + if [[ ! "$driver_name" =~ "amdxdna" ]]; then + continue + fi + + if [[ ! -f ${accel_debugfs}/$dir/ioctl_id ]]; then + trace_error "${accel_debugfs}/$dir/ioctl_id not exist. amdxdna driver too old?" + fi + + dev=$(echo $accel_fs_name | awk -F'[ =]' '{print $3}') + ioctl_sed_expr=$(awk -F ':' '{print "s/"$1"/"$2"/g"}' ${accel_debugfs}/$dir/ioctl_id) +done + +if [[ -z "$dev" ]]; then + trace_error "No device found" +fi + +trace_info "Found NPU device $dev at ${accel_debugfs}" + +add_sdt_xrt + +command="perf record $perf_record_args -a $exec_cmd" +trace_info "$command" +eval $command + +tmp_file=/tmp/perf.out +# convert timestamp from second to microsecond to avoid floating numbers +#perf script | awk '{ $4=$4*1000000; print }' > ${tmp_file} +perf script --reltime > ${tmp_file} +# replace IOCTL cmd number to name +sed "$ioctl_sed_expr" "${tmp_file}" > perf.converted.out +rm -rf ${tmp_file} + +remove_sdt_xrt +## -------- trace flow end -------- From b768015ec728c57f7bf115f5a0140bb2b06633f2 Mon Sep 17 00:00:00 2001 From: Max Zhen Date: Mon, 2 Sep 2024 22:36:37 -0700 Subject: [PATCH 2/2] first entry of event2 may be before first entry of event1 Signed-off-by: Max Zhen --- src/driver/tools/npu_perf_analyze.sh | 28 ++++++++++++++++++++-------- 1 file changed, 20 insertions(+), 8 deletions(-) diff --git a/src/driver/tools/npu_perf_analyze.sh b/src/driver/tools/npu_perf_analyze.sh index 2d05f634..514d1e10 100755 --- a/src/driver/tools/npu_perf_analyze.sh +++ b/src/driver/tools/npu_perf_analyze.sh @@ -64,23 +64,35 @@ event2_ts_num=${#event2_ts[@]} echo "${event2_ts_num} events for: '${event2}'" # Sanity check collected data -if [ ${event1_ts_num} -ne ${event2_ts_num} ]; then - echo Two events do not show up in pairs +if [ ${event1_ts_num} -eq 0 ]; then + echo No events found for ${event1} exit 1 fi -if [ ${event1_ts_num} -eq 0 ]; then - echo No events found +if [ ${event2_ts_num} -eq 0 ]; then + echo No events found for ${event2} exit 1 fi -if [ ${event2_ts[0]} -lt ${event1_ts[0]} ]; then - echo Event2 shows up before event1 +# Find first event2 entry index which comes after first event1 +event2_index_base=-1 +for (( i=0; i<${event2_ts_num}; i++ )); do + if ! [[ ${event2_ts[i]} -lt ${event1_ts[0]} ]]; then + event2_index_base=${i} + break + fi +done +if [ ${event2_index_base} -eq -1 ]; then + echo No ${event2} is after ${event1} exit 1 fi # Caculate time difference between two events diffs=() for (( i=0; i<${event1_ts_num}; i++ )); do - diffs+=( $((event2_ts[i] - event1_ts[i])) ) + i2=$(( i+${event2_index_base} )) + if ! [ ${i2} -lt ${event2_ts_num} ]; then + break + fi + diffs+=( $((event2_ts[i2] - event1_ts[i])) ) done #echo ${diffs[@]} @@ -88,7 +100,7 @@ done # Data mining within specified range if [ ${range_end} -eq 0 ]; then - range_end=${event1_ts_num} + range_end=${#diffs[@]} fi if [ ${range_end} -eq ${range_start} ]; then echo Range start and end are the same