diff --git a/.github/workflows/opensuse.yml b/.github/workflows/opensuse.yml index dad6a343..3062b89b 100644 --- a/.github/workflows/opensuse.yml +++ b/.github/workflows/opensuse.yml @@ -102,7 +102,7 @@ jobs: -DROCPROFSYS_PYTHON_ENVS="py3.6;py3.7;py3.8;py3.9;py3.10;py3.11" -DROCPROFSYS_CI_MPI_RUN_AS_ROOT=ON -DROCPROFSYS_MAX_THREADS=64 - -DROCPROFSYS_DISABLE_EXAMPLES="transpose;rccl" + -DROCPROFSYS_DISABLE_EXAMPLES="transpose;rccl;videodecode" -DROCPROFSYS_BUILD_NUMBER=${{ github.run_attempt }} - name: Install diff --git a/.github/workflows/redhat.yml b/.github/workflows/redhat.yml index d582f14d..06aeaa9d 100644 --- a/.github/workflows/redhat.yml +++ b/.github/workflows/redhat.yml @@ -83,10 +83,11 @@ jobs: ROCM_MAJOR=$(echo ${ROCM_VERSION} | sed 's/\./ /g' | awk '{print $1}') ROCM_MINOR=$(echo ${ROCM_VERSION} | sed 's/\./ /g' | awk '{print $2}') ROCM_VERSN=$(( (${ROCM_MAJOR}*10000)+(${ROCM_MINOR}*100) )) - if [ "${OS_VERSION_MAJOR}" -eq 8 ]; then PERL_REPO=powertools; else PERL_REPO=crb; fi && \ + if [ "${OS_VERSION_MAJOR}" -eq 8 ]; then PERL_REPO=powertools; else PERL_REPO=crb; fi dnf -y --enablerepo=${PERL_REPO} install perl-File-BaseDir yum install -y https://repo.radeon.com/amdgpu-install/${{ matrix.rocm-version }}/rhel/${{ matrix.os-release }}/amdgpu-install-${ROCM_MAJOR}.${ROCM_MINOR}.${ROCM_VERSN}-1${RPM_TAG}.noarch.rpm - yum install -y rocm-dev rocm-smi-lib roctracer-dev rocprofiler-dev libpciaccess + yum install -y rocm-dev rocdecode-devel + if [ "${OS_VERSION_MAJOR}" -gt 8 ]; then dnf install -y libavcodec-free-devel libavformat-free-devel; fi - name: Configure, Build, and Test timeout-minutes: 115 @@ -120,7 +121,7 @@ jobs: -DROCPROFSYS_DISABLE_EXAMPLES="transpose;rccl" -DROCPROFSYS_BUILD_NUMBER=${{ github.run_attempt }} -- - -LE "transpose|rccl" + -LE "transpose|rccl|videodecode" - name: Install timeout-minutes: 10 diff --git a/.github/workflows/ubuntu-focal.yml b/.github/workflows/ubuntu-focal.yml index 04eb1577..bdc482a8 100644 --- a/.github/workflows/ubuntu-focal.yml +++ b/.github/workflows/ubuntu-focal.yml @@ -157,7 +157,7 @@ jobs: -DROCPROFSYS_PYTHON_PREFIX=/opt/conda/envs -DROCPROFSYS_PYTHON_ENVS="py3.6;py3.7;py3.8;py3.9;py3.10;py3.11" -DROCPROFSYS_MAX_THREADS=64 - -DROCPROFSYS_DISABLE_EXAMPLES="transpose;rccl" + -DROCPROFSYS_DISABLE_EXAMPLES="transpose;rccl;videodecode" -DROCPROFSYS_BUILD_NUMBER=${{ github.run_attempt }} -DMPI_HEADERS_ALLOW_MPICH=OFF @@ -248,7 +248,7 @@ jobs: rocm-version: ['6.3'] mpi-headers: ['OFF'] build-jobs: ['3'] - ctest-exclude: ['-LE "transpose"'] + ctest-exclude: ['-LE "transpose|videodecode"'] env: BUILD_TYPE: MinSizeRel @@ -268,10 +268,16 @@ jobs: command: | apt-get update && apt-get install -y software-properties-common wget gnupg2 && - wget -q -O - https://repo.radeon.com/rocm/rocm.gpg.key | apt-key add - && - echo "deb [arch=amd64] https://repo.radeon.com/rocm/apt/${{ matrix.rocm-version }}/ ubuntu main" | tee /etc/apt/sources.list.d/rocm.list && + ROCM_VERSION=${{ matrix.rocm-version }} && + ROCM_MAJOR=$(echo ${ROCM_VERSION} | sed 's/\./ /g' | awk '{print $1}') && + ROCM_MINOR=$(echo ${ROCM_VERSION} | sed 's/\./ /g' | awk '{print $2}') && + ROCM_VERSN=$(( (${ROCM_MAJOR}*10000)+(${ROCM_MINOR}*100) )) && + echo "ROCM_MAJOR=${ROCM_MAJOR} ROCM_MINOR=${ROCM_MINOR} ROCM_VERSN=${ROCM_VERSN}" && + wget -q https://repo.radeon.com/amdgpu-install/${{ matrix.rocm-version }}/ubuntu/focal/amdgpu-install_${ROCM_MAJOR}.${ROCM_MINOR}.${ROCM_VERSN}-1_all.deb && + apt-get install -y ./amdgpu-install_${ROCM_MAJOR}.${ROCM_MINOR}.${ROCM_VERSN}-1_all.deb && apt-get update && apt-get install -y autoconf bison build-essential clang curl gettext libfabric-dev libnuma1 libomp-dev libopenmpi-dev libpapi-dev libtool libudev1 m4 openmpi-bin python3-pip rocm-dev texinfo && + apt-get install -y rocdecode-dev libavformat-dev libavcodec-dev && wget https://commondatastorage.googleapis.com/perfetto-luci-artifacts/v47.0/linux-amd64/trace_processor_shell -P /opt/trace_processor/bin && chmod +x /opt/trace_processor/bin/trace_processor_shell && python3 -m pip install --upgrade pip && @@ -483,7 +489,7 @@ jobs: -DDYNINST_BUILD_STATIC_LIBS=OFF -DDYNINST_ELFUTILS_DOWNLOAD_VERSION=${{ env.ELFUTILS_DOWNLOAD_VERSION }} -DROCPROFSYS_MAX_THREADS=64 - -DROCPROFSYS_DISABLE_EXAMPLES="transpose;rccl" + -DROCPROFSYS_DISABLE_EXAMPLES="transpose;rccl;videodecode" -DROCPROFSYS_BUILD_NUMBER=${{ github.run_attempt }} -DMPI_HEADERS_ALLOW_MPICH=ON @@ -622,5 +628,5 @@ jobs: -DROCPROFSYS_USE_ROCM=OFF -DROCPROFSYS_USE_RCCL=OFF -DROCPROFSYS_MAX_THREADS=64 - -DROCPROFSYS_DISABLE_EXAMPLES="transpose;rccl" + -DROCPROFSYS_DISABLE_EXAMPLES="transpose;rccl;videodecode" -DROCPROFSYS_BUILD_NUMBER=${{ github.run_attempt }} diff --git a/.github/workflows/ubuntu-jammy.yml b/.github/workflows/ubuntu-jammy.yml index 1debacf5..9a6b72ab 100644 --- a/.github/workflows/ubuntu-jammy.yml +++ b/.github/workflows/ubuntu-jammy.yml @@ -114,11 +114,15 @@ jobs: max_attempts: 5 shell: bash command: | - wget -q -O - https://repo.radeon.com/rocm/rocm.gpg.key | apt-key add - - echo "deb [arch=amd64] https://repo.radeon.com/rocm/apt/${{ matrix.rocm-version }}/ jammy main" | tee /etc/apt/sources.list.d/rocm.list + ROCM_VERSION=${{ matrix.rocm-version }} + ROCM_MAJOR=$(echo ${ROCM_VERSION} | sed 's/\./ /g' | awk '{print $1}') + ROCM_MINOR=$(echo ${ROCM_VERSION} | sed 's/\./ /g' | awk '{print $2}') + ROCM_VERSN=$(( (${ROCM_MAJOR}*10000)+(${ROCM_MINOR}*100) )) + echo "ROCM_MAJOR=${ROCM_MAJOR} ROCM_MINOR=${ROCM_MINOR} ROCM_VERSN=${ROCM_VERSN}" + wget -q https://repo.radeon.com/amdgpu-install/${{ matrix.rocm-version }}/ubuntu/jammy/amdgpu-install_${ROCM_MAJOR}.${ROCM_MINOR}.${ROCM_VERSN}-1_all.deb + apt-get install -y ./amdgpu-install_${ROCM_MAJOR}.${ROCM_MINOR}.${ROCM_VERSN}-1_all.deb apt-get update - ROCM_VERSION=$(apt-cache search rocm-dev[0-9] | awk '{print $1}' | sed 's/rocm-dev//g') - apt-get install -y {rocm-dev,hip-dev,roctracer-dev,rocprofiler-dev,rocm-smi-lib,rocminfo}${ROCM_VERSION} + apt-get install -y rocm-dev rocdecode-dev libavformat-dev libavcodec-dev echo "/opt/rocm/bin" >> $GITHUB_PATH echo "ROCM_PATH=/opt/rocm" >> $GITHUB_ENV echo "LD_LIBRARY_PATH=/opt/rocm/lib:${LD_LIBRARY_PATH}" >> $GITHUB_ENV @@ -185,6 +189,8 @@ jobs: -DROCPROFSYS_DISABLE_EXAMPLES="transpose;rccl" -DROCPROFSYS_BUILD_NUMBER=${{ github.run_attempt }} -DUSE_CLANG_OMP=OFF + -- + -LE "transpose|rccl|videodecode" - name: Install timeout-minutes: 10 diff --git a/.github/workflows/ubuntu-noble.yml b/.github/workflows/ubuntu-noble.yml index 68ef6648..c4ce69ef 100644 --- a/.github/workflows/ubuntu-noble.yml +++ b/.github/workflows/ubuntu-noble.yml @@ -84,7 +84,7 @@ jobs: wget -q https://repo.radeon.com/amdgpu-install/${{ matrix.rocm-version }}/ubuntu/noble/amdgpu-install_${ROCM_MAJOR}.${ROCM_MINOR}.${ROCM_VERSN}-1_all.deb apt-get install -y ./amdgpu-install_${ROCM_MAJOR}.${ROCM_MINOR}.${ROCM_VERSN}-1_all.deb apt-get update - apt-get install -y rocm-dev + apt-get install -y rocm-dev rocdecode-dev libavformat-dev libavcodec-dev - name: Configure timeout-minutes: 30 diff --git a/cmake/ConfigCPack.cmake b/cmake/ConfigCPack.cmake index bb10029b..27211c5c 100644 --- a/cmake/ConfigCPack.cmake +++ b/cmake/ConfigCPack.cmake @@ -171,6 +171,9 @@ if(ROCPROFSYS_USE_MPI) list(APPEND _DEBIAN_PACKAGE_DEPENDS "libmpich-dev") endif() endif() +if(ROCPROFSYS_BUILD_TESTING) + list(APPEND _DEBIAN_PACKAGE_DEPENDS "rocdecode-test") +endif() string(REPLACE ";" ", " _DEBIAN_PACKAGE_DEPENDS "${_DEBIAN_PACKAGE_DEPENDS}") set(CPACK_DEBIAN_PACKAGE_DEPENDS "${_DEBIAN_PACKAGE_DEPENDS}" diff --git a/examples/CMakeLists.txt b/examples/CMakeLists.txt index 4a2bda45..a4631091 100644 --- a/examples/CMakeLists.txt +++ b/examples/CMakeLists.txt @@ -56,3 +56,4 @@ add_subdirectory(rewrite-caller) add_subdirectory(causal) add_subdirectory(trace-time-window) add_subdirectory(fork) +add_subdirectory(videodecode) diff --git a/examples/videodecode/CMakeLists.txt b/examples/videodecode/CMakeLists.txt new file mode 100644 index 00000000..ac737614 --- /dev/null +++ b/examples/videodecode/CMakeLists.txt @@ -0,0 +1,160 @@ +cmake_minimum_required(VERSION 3.18.4 FATAL_ERROR) + +project(rocprofiler-systems-videodecode-example LANGUAGES CXX) + +# This example requires hip and rocdecode. +find_package(HIP QUIET) + +if(NOT HIP_FOUND) + message(WARNING "hip is not found. Skip videodecode example.") + return() +endif() + +message(STATUS "hip found: ${hip_DIR}") + +# Set AMD Clang as default compiler +if(NOT DEFINED CMAKE_CXX_COMPILER) + set(CMAKE_CXX_COMPILER ${ROCmVersion_DIR}/bin/amdclang++) +endif() + +set(CMAKE_CXX_STANDARD 17) + +list(APPEND CMAKE_MODULE_PATH ${PROJECT_SOURCE_DIR}/../../cmake) +list(APPEND CMAKE_PREFIX_PATH ${ROCmVersion_DIR}/hip ${ROCmVersion_DIR}) +list(APPEND CMAKE_MODULE_PATH ${ROCmVersion_DIR}/share/rocdecode/cmake) + +set(CMAKE_BUILD_TYPE "RelWithDebInfo") +string(REPLACE " " ";" _FLAGS "${CMAKE_CXX_FLAGS_DEBUG}") + +if(ROCPROFSYS_DISABLE_EXAMPLES) + get_filename_component(_DIR ${CMAKE_CURRENT_LIST_DIR} NAME) + + if(${PROJECT_NAME} IN_LIST ROCPROFSYS_DISABLE_EXAMPLES OR ${_DIR} IN_LIST + ROCPROFSYS_DISABLE_EXAMPLES) + return() + endif() +endif() + +function(videodecode_message _MSG_TYPE) + if("${CMAKE_PROJECT_NAME}" STREQUAL "rocprofiler-systems" + AND "$ENV{ROCPROFSYS_CI}" + AND "${_MSG_TYPE}" MATCHES "WARNING") + set(_MSG_TYPE STATUS) # don't generate warnings during CI + endif() + if("${CMAKE_PROJECT_NAME}" STREQUAL "rocprofiler-systems") + rocprofiler_systems_message(${_MSG_TYPE} ${ARGN}) + else() + message(${_MSG_TYPE} ${ARGN}) + endif() +endfunction() + +# Find RocDecode +find_package(rocDecode REQUIRED) +if(NOT ROCDECODE_FOUND) + videodecode_message(AUTHOR_WARNING "${PROJECT_NAME} skipped. Missing RocDecode...") + return() +endif() + +find_path( + ROCDECODE_INCLUDE_DIR + NAMES rocdecode.h + PATHS ${ROCmVersion_DIR}/include/rocdecode) +find_library( + ROCDECODE_LIBRARY + NAMES rocdecode + HINTS ${ROCmVersion_DIR}/lib) + +mark_as_advanced(ROCDECODE_INCLUDE_DIR ROCDECODE_LIBRARY) + +if(ROCDECODE_INCLUDE_DIR AND ROCDECODE_LIBRARY) + set(ROCDECODE_FOUND TRUE) +else() + videodecode_message(AUTHOR_WARNING "${PROJECT_NAME} skipped. Missing RocDecode ...") +endif() + +# Find FFMPEG +find_package(FFmpeg) +if(NOT FFMPEG_FOUND) + videodecode_message(AUTHOR_WARNING "${PROJECT_NAME} skipped. Missing FFMPEG...") + return() +endif() + +message(STATUS "FFMPEG libraries: ${FFMPEG_LIBRARIES}") +message(STATUS "FFMPEG AVFORMAT version: ${_FFMPEG_AVFORMAT_VERSION}") +message(STATUS "FFMPEG AVCODEC version: ${_FFMPEG_AVCODEC_VERSION}") +message(STATUS "FFMPEG AVUTIL version: ${_FFMPEG_AVUTIL_VERSION}") + +find_path( + FFMPEG_INCLUDE_DIR + NAMES libavcodec/avcodec.h libavformat/avformat.h libavutil/avutil.h + PATHS ${FFMPEG_INCLUDE_DIRS} + PATH_SUFFIXES ffmpeg libav) +find_library( + AVCODEC_LIBRARY + NAMES avcodec + PATHS ${FFMPEG_LIBRARY_DIRS}) +find_library( + AVFORMAT_LIBRARY + NAMES avformat + PATHS ${FFMPEG_LIBRARY_DIRS}) +find_library( + AVUTIL_LIBRARY + NAMES avutil + PATHS ${FFMPEG_LIBRARY_DIRS}) + +set(FFMPEG_LIBRARIES ${AVCODEC_LIBRARY} ${AVFORMAT_LIBRARY} ${AVUTIL_LIBRARY}) +set(FFMPEG_INCLUDE_DIRS ${FFMPEG_INCLUDE_DIR}) + +mark_as_advanced(FFMPEG_INCLUDE_DIR AVCODEC_LIBRARY AVFORMAT_LIBRARY AVUTIL_LIBRARY) + +if(FFMPEG_FOUND AND ROCDECODE_FOUND) + # HIP + set(LINK_LIBRARY_LIST ${LINK_LIBRARY_LIST} hip::host) + + # FFMPEG + include_directories(${FFMPEG_INCLUDE_DIR}) + set(LINK_LIBRARY_LIST ${LINK_LIBRARY_LIST} ${FFMPEG_LIBRARIES}) + + # filesystem: c++ compilers less than equal to 8.5 need explicit link with stdc++fs + if(CMAKE_CXX_COMPILER_VERSION VERSION_LESS_EQUAL "8.5") + set(LINK_LIBRARY_LIST ${LINK_LIBRARY_LIST} stdc++fs) + endif() + + # rocDecode + include_directories(${ROCDECODE_INCLUDE_DIR} ${CMAKE_CURRENT_SOURCE_DIR}/..) + set(LINK_LIBRARY_LIST ${LINK_LIBRARY_LIST} ${ROCDECODE_LIBRARY}) + + # Threads + set(THREADS_PREFER_PTHREAD_FLAG ON) + find_package(Threads REQUIRED) + set(LINK_LIBRARY_LIST ${LINK_LIBRARY_LIST} Threads::Threads) + + add_executable(videodecode videodecodebatch.cpp roc_video_dec.cpp) + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=gnu++17") + target_link_libraries(videodecode ${LINK_LIBRARY_LIST}) + target_compile_options(videodecode PRIVATE ${_FLAGS}) + + # FFMPEG multi-version support + if(_FFMPEG_AVCODEC_VERSION VERSION_LESS_EQUAL 58.134.100) + target_compile_definitions(videodecode PUBLIC USE_AVCODEC_GREATER_THAN_58_134=0) + else() + target_compile_definitions(videodecode PUBLIC USE_AVCODEC_GREATER_THAN_58_134=1) + endif() + + if(ROCPROFSYS_INSTALL_EXAMPLES) + install( + TARGETS videodecode + DESTINATION bin + COMPONENT rocprofiler-systems-examples) + endif() +else() + message( + "-- ERROR!: videodecode excluded! please install all the dependencies and try again!" + ) + if(NOT FFMPEG_FOUND) + message(FATAL_ERROR "-- ERROR!: FFMPEG Not Found! - please install FFMPEG!") + endif() + if(NOT ROCDECODE_FOUND) + message(FATAL_ERROR "-- ERROR!: rocDecode Not Found! - please install rocDecode!") + endif() +endif() diff --git a/examples/videodecode/common.h b/examples/videodecode/common.h new file mode 100644 index 00000000..6a7b0179 --- /dev/null +++ b/examples/videodecode/common.h @@ -0,0 +1,102 @@ +/* +Copyright (c) 2023 - 2025 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#pragma once + +#include "roc_video_dec.h" + +typedef enum ReconfigFlushMode_enum +{ + RECONFIG_FLUSH_MODE_NONE = 0, /**< Just flush to get the frame count */ + RECONFIG_FLUSH_MODE_DUMP_TO_FILE = + 1, /**< The remaining frames will be dumped to file in this mode */ + RECONFIG_FLUSH_MODE_CALCULATE_MD5 = + 2, /**< Calculate the MD5 of the flushed frames */ +} ReconfigFlushMode; + +// this struct is used by videodecode and videodecodeMultiFiles to dump last frames to +// file +typedef struct ReconfigDumpFileStruct_t +{ + bool b_dump_frames_to_file; + std::string output_file_name; +} ReconfigDumpFileStruct; + +// callback function to flush last frames and save it to file when reconfigure happens +int +ReconfigureFlushCallback(void* p_viddec_obj, uint32_t flush_mode, void* p_user_struct) +{ + int n_frames_flushed = 0; + if((p_viddec_obj == nullptr) || (p_user_struct == nullptr)) return n_frames_flushed; + + RocVideoDecoder* viddec = static_cast(p_viddec_obj); + OutputSurfaceInfo* surf_info; + if(!viddec->GetOutputSurfaceInfo(&surf_info)) + { + std::cerr << "Error: Failed to get Output Surface Info!" << std::endl; + return n_frames_flushed; + } + + uint8_t* pframe = nullptr; + int64_t pts; + while((pframe = viddec->GetFrame(&pts))) + { + if(flush_mode != RECONFIG_FLUSH_MODE_NONE) + { + if(flush_mode == ReconfigFlushMode::RECONFIG_FLUSH_MODE_DUMP_TO_FILE) + { + ReconfigDumpFileStruct* p_dump_file_struct = + static_cast(p_user_struct); + if(p_dump_file_struct->b_dump_frames_to_file) + { + viddec->SaveFrameToFile(p_dump_file_struct->output_file_name, pframe, + surf_info); + } + } + else if(flush_mode == ReconfigFlushMode::RECONFIG_FLUSH_MODE_CALCULATE_MD5) + { + viddec->UpdateMd5ForFrame(pframe, surf_info); + } + } + // release and flush frame + viddec->ReleaseFrame(pts, true); + n_frames_flushed++; + } + + return n_frames_flushed; +} + +int +GetEnvVar(const char* name, int& dev_count) +{ + char* v = std::getenv(name); + if(v) + { + char* p_tkn = std::strtok(v, ","); + while(p_tkn != nullptr) + { + dev_count++; + p_tkn = strtok(nullptr, ","); + } + } + return dev_count; +} diff --git a/examples/videodecode/roc_video_dec.cpp b/examples/videodecode/roc_video_dec.cpp new file mode 100644 index 00000000..30f04ed5 --- /dev/null +++ b/examples/videodecode/roc_video_dec.cpp @@ -0,0 +1,1605 @@ +/* +Copyright (c) 2023 - 2025 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include "roc_video_dec.h" + +RocVideoDecoder::RocVideoDecoder(int device_id, OutputSurfaceMemoryType out_mem_type, + rocDecVideoCodec codec, bool force_zero_latency, + const Rect* p_crop_rect, bool extract_user_sei_Message, + uint32_t disp_delay, int max_width, int max_height, + uint32_t clk_rate) +: device_id_{ device_id } +, out_mem_type_(out_mem_type) +, codec_id_(codec) +, b_force_zero_latency_(force_zero_latency) +, b_extract_sei_message_(extract_user_sei_Message) +, disp_delay_(disp_delay) +, max_width_(max_width) +, max_height_(max_height) +{ + if(!InitHIP(device_id_)) + { + THROW("Failed to initilize the HIP"); + } + if(p_crop_rect) crop_rect_ = *p_crop_rect; + if(b_extract_sei_message_) + { + fp_sei_ = fopen("rocdec_sei_message.txt", "wb"); + curr_sei_message_ptr_ = new RocdecSeiMessageInfo; + memset(&sei_message_display_q_, 0, sizeof(sei_message_display_q_)); + } + // create rocdec videoparser + RocdecParserParams parser_params = {}; + parser_params.codec_type = codec_id_; + parser_params.max_num_decode_surfaces = + 1; // let the parser to determine the decode buffer pool size + parser_params.clock_rate = clk_rate; + parser_params.max_display_delay = disp_delay_; + parser_params.user_data = this; + parser_params.pfn_sequence_callback = HandleVideoSequenceProc; + parser_params.pfn_decode_picture = HandlePictureDecodeProc; + parser_params.pfn_display_picture = + b_force_zero_latency_ ? NULL : HandlePictureDisplayProc; + parser_params.pfn_get_sei_msg = b_extract_sei_message_ ? HandleSEIMessagesProc : NULL; + ROCDEC_API_CALL(rocDecCreateVideoParser(&rocdec_parser_, &parser_params)); +} + +RocVideoDecoder::~RocVideoDecoder() +{ + auto start_time = StartTimer(); + if(curr_sei_message_ptr_) + { + delete curr_sei_message_ptr_; + curr_sei_message_ptr_ = nullptr; + } + + if(fp_sei_) + { + fclose(fp_sei_); + fp_sei_ = nullptr; + } + + if(rocdec_parser_) + { + rocDecDestroyVideoParser(rocdec_parser_); + rocdec_parser_ = nullptr; + } + + if(roc_decoder_) + { + rocDecDestroyDecoder(roc_decoder_); + roc_decoder_ = nullptr; + } + + if(curr_video_format_ptr_) + { + delete curr_video_format_ptr_; + curr_video_format_ptr_ = nullptr; + } + + std::lock_guard lock(mtx_vp_frame_); + if(out_mem_type_ != OUT_SURFACE_MEM_DEV_INTERNAL) + { + for(auto& p_frame : vp_frames_) + { + if(p_frame.frame_ptr) + { + if(out_mem_type_ == OUT_SURFACE_MEM_DEV_COPIED) + { + hipError_t hip_status = hipFree(p_frame.frame_ptr); + if(hip_status != hipSuccess) + { + std::cerr << "ERROR: hipFree failed! (" << hip_status << ")" + << std::endl; + } + } + else + delete[](p_frame.frame_ptr); + p_frame.frame_ptr = nullptr; + } + } + } + if(hip_stream_) + { + hipError_t hip_status = hipSuccess; + hip_status = hipStreamDestroy(hip_stream_); + if(hip_status != hipSuccess) + { + std::cerr << "ERROR: hipStream_Destroy failed! (" << hip_status << ")" + << std::endl; + } + } + if(fp_out_) + { + fclose(fp_out_); + fp_out_ = nullptr; + } + + double elapsed_time = StopTimer(start_time); + AddDecoderSessionOverHead(std::this_thread::get_id(), elapsed_time); +} + +static const char* +GetVideoCodecString(rocDecVideoCodec e_codec) +{ + static struct + { + rocDecVideoCodec e_codec; + const char* name; + } aCodecName[] = { + { rocDecVideoCodec_MPEG1, "MPEG-1" }, + { rocDecVideoCodec_MPEG2, "MPEG-2" }, + { rocDecVideoCodec_MPEG4, "MPEG-4 (ASP)" }, + { rocDecVideoCodec_AVC, "AVC/H.264" }, + { rocDecVideoCodec_HEVC, "H.265/HEVC" }, + { rocDecVideoCodec_AV1, "AV1" }, + { rocDecVideoCodec_VP8, "VP8" }, + { rocDecVideoCodec_VP9, "VP9" }, + { rocDecVideoCodec_JPEG, "M-JPEG" }, + { rocDecVideoCodec_NumCodecs, "Invalid" }, + }; + + if(e_codec >= 0 && e_codec <= rocDecVideoCodec_NumCodecs) + { + return aCodecName[e_codec].name; + } + for(int i = rocDecVideoCodec_NumCodecs + 1; + i < sizeof(aCodecName) / sizeof(aCodecName[0]); i++) + { + if(e_codec == aCodecName[i].e_codec) + { + return aCodecName[e_codec].name; + } + } + return "Unknown"; +} + +/** + * @brief function to return the name from codec_id + * + * @param codec_id + * @return const char* + */ +const char* +RocVideoDecoder::GetCodecFmtName(rocDecVideoCodec codec_id) +{ + return GetVideoCodecString(codec_id); +} + +static const char* +GetSurfaceFormatString(rocDecVideoSurfaceFormat surface_format_id) +{ + static struct + { + rocDecVideoSurfaceFormat surf_fmt; + const char* name; + } SurfName[] = { + { rocDecVideoSurfaceFormat_NV12, "NV12" }, + { rocDecVideoSurfaceFormat_P016, "P016" }, + { rocDecVideoSurfaceFormat_YUV444, "YUV444" }, + { rocDecVideoSurfaceFormat_YUV444_16Bit, "YUV444_16Bit" }, + }; + + if(surface_format_id >= rocDecVideoSurfaceFormat_NV12 && + surface_format_id <= rocDecVideoSurfaceFormat_YUV444_16Bit) + return SurfName[surface_format_id].name; + else + return "Unknown"; +} + +/** + * @brief function to return the name from surface_format_id + * + * @param surface_format_id - enum for surface format + * @return const char* + */ +const char* +RocVideoDecoder::GetSurfaceFmtName(rocDecVideoSurfaceFormat surface_format_id) +{ + return GetSurfaceFormatString(surface_format_id); +} + +static const char* +GetVideoChromaFormatName(rocDecVideoChromaFormat e_chroma_format) +{ + static struct + { + rocDecVideoChromaFormat chroma_fmt; + const char* name; + } ChromaFormatName[] = { + { rocDecVideoChromaFormat_Monochrome, "YUV 400 (Monochrome)" }, + { rocDecVideoChromaFormat_420, "YUV 420" }, + { rocDecVideoChromaFormat_422, "YUV 422" }, + { rocDecVideoChromaFormat_444, "YUV 444" }, + }; + + if(e_chroma_format >= 0 && e_chroma_format <= rocDecVideoChromaFormat_444) + { + return ChromaFormatName[e_chroma_format].name; + } + return "Unknown"; +} + +static float +GetChromaHeightFactor(rocDecVideoSurfaceFormat surface_format) +{ + float factor = 0.5; + switch(surface_format) + { + case rocDecVideoSurfaceFormat_NV12: + case rocDecVideoSurfaceFormat_P016: factor = 0.5; break; + case rocDecVideoSurfaceFormat_YUV444: + case rocDecVideoSurfaceFormat_YUV444_16Bit: factor = 1.0; break; + } + + return factor; +} + +static int +GetChromaPlaneCount(rocDecVideoSurfaceFormat surface_format) +{ + int num_planes = 1; + switch(surface_format) + { + case rocDecVideoSurfaceFormat_NV12: + case rocDecVideoSurfaceFormat_P016: num_planes = 1; break; + case rocDecVideoSurfaceFormat_YUV444: + case rocDecVideoSurfaceFormat_YUV444_16Bit: num_planes = 2; break; + } + + return num_planes; +} + +static void +GetSurfaceStrideInternal(rocDecVideoSurfaceFormat surface_format, uint32_t width, + uint32_t height, uint32_t* pitch, uint32_t* vstride) +{ + switch(surface_format) + { + case rocDecVideoSurfaceFormat_NV12: + *pitch = align(width, 256); + *vstride = align(height, 16); + break; + case rocDecVideoSurfaceFormat_P016: + *pitch = align(width, 128) * 2; + *vstride = align(height, 16); + break; + case rocDecVideoSurfaceFormat_YUV444: + *pitch = align(width, 256); + *vstride = align(height, 16); + break; + case rocDecVideoSurfaceFormat_YUV444_16Bit: + *pitch = align(width, 128) * 2; + *vstride = align(height, 16); + break; + } + return; +} + +/* Return value from HandleVideoSequence() are interpreted as : + * 0: fail, 1: succeeded, > 1: override dpb size of parser (set by + * CUVIDPARSERPARAMS::max_num_decode_surfaces while creating parser) + */ +int +RocVideoDecoder::HandleVideoSequence(RocdecVideoFormat* p_video_format) +{ + if(p_video_format == nullptr) + { + ROCDEC_THROW("Rocdec:: Invalid video format in HandleVideoSequence: ", + ROCDEC_INVALID_PARAMETER); + return 0; + } + auto start_time = StartTimer(); + input_video_info_str_.str(""); + input_video_info_str_.clear(); + input_video_info_str_ << "Input Video Information" << std::endl + << "\tCodec : " << GetCodecFmtName(p_video_format->codec) + << std::endl; + if(p_video_format->frame_rate.numerator && p_video_format->frame_rate.denominator) + { + input_video_info_str_ << "\tFrame rate : " + << p_video_format->frame_rate.numerator << "/" + << p_video_format->frame_rate.denominator << " = " + << 1.0 * p_video_format->frame_rate.numerator / + p_video_format->frame_rate.denominator + << " fps" << std::endl; + } + input_video_info_str_ + << "\tSequence : " + << (p_video_format->progressive_sequence ? "Progressive" : "Interlaced") + << std::endl + << "\tCoded size : [" << p_video_format->coded_width << ", " + << p_video_format->coded_height << "]" << std::endl + << "\tDisplay area : [" << p_video_format->display_area.left << ", " + << p_video_format->display_area.top << ", " << p_video_format->display_area.right + << ", " << p_video_format->display_area.bottom << "]" << std::endl + << "\tChroma : " << GetVideoChromaFormatName(p_video_format->chroma_format) + << std::endl + << "\tBit depth : " << p_video_format->bit_depth_luma_minus8 + 8; + input_video_info_str_ << std::endl; + + int num_decode_surfaces = p_video_format->min_num_decode_surfaces; + + RocdecDecodeCaps decode_caps; + memset(&decode_caps, 0, sizeof(decode_caps)); + decode_caps.codec_type = p_video_format->codec; + decode_caps.chroma_format = p_video_format->chroma_format; + decode_caps.bit_depth_minus_8 = p_video_format->bit_depth_luma_minus8; + + rocDecGetDecoderCaps(&decode_caps); + if(!decode_caps.is_supported) + { + ROCDEC_THROW("rocDecode:: Codec not supported on this GPU ", + ROCDEC_NOT_SUPPORTED); + return 0; + } + if((p_video_format->coded_width > decode_caps.max_width) || + (p_video_format->coded_height > decode_caps.max_height)) + { + std::ostringstream errorString; + errorString << std::endl + << "Resolution : " << p_video_format->coded_width << "x" + << p_video_format->coded_height << std::endl + << "Max Supported (wxh) : " << decode_caps.max_width << "x" + << decode_caps.max_height << std::endl + << "Resolution not supported on this GPU "; + const std::string cErr = errorString.str(); + ROCDEC_THROW(cErr, ROCDEC_NOT_SUPPORTED); + return 0; + } + if(curr_video_format_ptr_ == nullptr) + { + curr_video_format_ptr_ = new RocdecVideoFormat(); + } + // store current video format: this is required to call reconfigure from application + // in case of random seek + if(curr_video_format_ptr_) + memcpy(curr_video_format_ptr_, p_video_format, sizeof(RocdecVideoFormat)); + + if(coded_width_ && coded_height_) + { + // rocdecCreateDecoder() has been called before, and now there's possible config + // change + return ReconfigureDecoder(p_video_format); + } + // e_codec has been set in the constructor (for parser). Here it's set again for + // potential correction + codec_id_ = p_video_format->codec; + video_chroma_format_ = p_video_format->chroma_format; + bitdepth_minus_8_ = p_video_format->bit_depth_luma_minus8; + byte_per_pixel_ = bitdepth_minus_8_ > 0 ? 2 : 1; + + // Set the output surface format same as chroma format + if(video_chroma_format_ == rocDecVideoChromaFormat_420 || + rocDecVideoChromaFormat_Monochrome) + video_surface_format_ = bitdepth_minus_8_ ? rocDecVideoSurfaceFormat_P016 + : rocDecVideoSurfaceFormat_NV12; + else if(video_chroma_format_ == rocDecVideoChromaFormat_444) + video_surface_format_ = bitdepth_minus_8_ ? rocDecVideoSurfaceFormat_YUV444_16Bit + : rocDecVideoSurfaceFormat_YUV444; + else if(video_chroma_format_ == rocDecVideoChromaFormat_422) + video_surface_format_ = rocDecVideoSurfaceFormat_NV12; + + // Check if output format supported. If not, check falback options + if(!(decode_caps.output_format_mask & (1 << video_surface_format_))) + { + if(decode_caps.output_format_mask & (1 << rocDecVideoSurfaceFormat_NV12)) + video_surface_format_ = rocDecVideoSurfaceFormat_NV12; + else if(decode_caps.output_format_mask & (1 << rocDecVideoSurfaceFormat_P016)) + video_surface_format_ = rocDecVideoSurfaceFormat_P016; + else if(decode_caps.output_format_mask & (1 << rocDecVideoSurfaceFormat_YUV444)) + video_surface_format_ = rocDecVideoSurfaceFormat_YUV444; + else if(decode_caps.output_format_mask & + (1 << rocDecVideoSurfaceFormat_YUV444_16Bit)) + video_surface_format_ = rocDecVideoSurfaceFormat_YUV444_16Bit; + else + ROCDEC_THROW("No supported output format found", ROCDEC_NOT_SUPPORTED); + } + + coded_width_ = p_video_format->coded_width; + coded_height_ = p_video_format->coded_height; + disp_rect_.top = p_video_format->display_area.top; + disp_rect_.bottom = p_video_format->display_area.bottom; + disp_rect_.left = p_video_format->display_area.left; + disp_rect_.right = p_video_format->display_area.right; + disp_width_ = p_video_format->display_area.right - p_video_format->display_area.left; + disp_height_ = p_video_format->display_area.bottom - p_video_format->display_area.top; + + // AV1 has max width/height of sequence in sequence header + if(codec_id_ == rocDecVideoCodec_AV1 && p_video_format->seqhdr_data_length > 0) + { + // dont overwrite if it is already set from cmdline or reconfig.txt + if(!(max_width_ > p_video_format->coded_width || + max_height_ > p_video_format->coded_height)) + { + RocdecVideoFormatEx* vidFormatEx = (RocdecVideoFormatEx*) p_video_format; + max_width_ = vidFormatEx->max_width; + max_height_ = vidFormatEx->max_height; + } + } + if(max_width_ < (int) p_video_format->coded_width) + max_width_ = p_video_format->coded_width; + if(max_height_ < (int) p_video_format->coded_height) + max_height_ = p_video_format->coded_height; + + RocDecoderCreateInfo videoDecodeCreateInfo = { 0 }; + videoDecodeCreateInfo.device_id = device_id_; + videoDecodeCreateInfo.codec_type = codec_id_; + videoDecodeCreateInfo.chroma_format = video_chroma_format_; + videoDecodeCreateInfo.output_format = video_surface_format_; + videoDecodeCreateInfo.bit_depth_minus_8 = bitdepth_minus_8_; + videoDecodeCreateInfo.num_decode_surfaces = num_decode_surfaces; + videoDecodeCreateInfo.width = coded_width_; + videoDecodeCreateInfo.height = coded_height_; + videoDecodeCreateInfo.max_width = max_width_; + videoDecodeCreateInfo.max_height = max_height_; + if(!(crop_rect_.right && crop_rect_.bottom)) + { + videoDecodeCreateInfo.display_rect.top = disp_rect_.top; + videoDecodeCreateInfo.display_rect.bottom = disp_rect_.bottom; + videoDecodeCreateInfo.display_rect.left = disp_rect_.left; + videoDecodeCreateInfo.display_rect.right = disp_rect_.right; + target_width_ = (disp_width_ + 1) & ~1; + target_height_ = (disp_height_ + 1) & ~1; + } + else + { + videoDecodeCreateInfo.display_rect.top = crop_rect_.top; + videoDecodeCreateInfo.display_rect.bottom = crop_rect_.bottom; + videoDecodeCreateInfo.display_rect.left = crop_rect_.left; + videoDecodeCreateInfo.display_rect.right = crop_rect_.right; + target_width_ = (crop_rect_.right - crop_rect_.left + 1) & ~1; + target_height_ = (crop_rect_.bottom - crop_rect_.top + 1) & ~1; + } + videoDecodeCreateInfo.target_width = target_width_; + videoDecodeCreateInfo.target_height = target_height_; + + chroma_height_ = + (int) (ceil(target_height_ * GetChromaHeightFactor(video_surface_format_))); + num_chroma_planes_ = GetChromaPlaneCount(video_surface_format_); + if(video_chroma_format_ == rocDecVideoChromaFormat_Monochrome) num_chroma_planes_ = 0; + if(out_mem_type_ == OUT_SURFACE_MEM_DEV_INTERNAL || + out_mem_type_ == OUT_SURFACE_MEM_NOT_MAPPED) + GetSurfaceStrideInternal(video_surface_format_, p_video_format->coded_width, + p_video_format->coded_height, &surface_stride_, + &surface_vstride_); + else + { + surface_stride_ = + videoDecodeCreateInfo.target_width * + byte_per_pixel_; // todo:: check if we need pitched memory for faster copy + } + chroma_vstride_ = + (int) (ceil(surface_vstride_ * GetChromaHeightFactor(video_surface_format_))); + // fill output_surface_info_ + output_surface_info_.output_width = target_width_; + output_surface_info_.output_height = target_height_; + output_surface_info_.output_pitch = surface_stride_; + output_surface_info_.output_vstride = (out_mem_type_ == OUT_SURFACE_MEM_DEV_INTERNAL) + ? surface_vstride_ + : videoDecodeCreateInfo.target_height; + output_surface_info_.bit_depth = bitdepth_minus_8_ + 8; + output_surface_info_.bytes_per_pixel = byte_per_pixel_; + output_surface_info_.surface_format = video_surface_format_; + output_surface_info_.num_chroma_planes = num_chroma_planes_; + if(out_mem_type_ == OUT_SURFACE_MEM_DEV_INTERNAL) + { + output_surface_info_.output_surface_size_in_bytes = + surface_stride_ * (surface_vstride_ + (chroma_vstride_ * num_chroma_planes_)); + output_surface_info_.mem_type = OUT_SURFACE_MEM_DEV_INTERNAL; + } + else if(out_mem_type_ == OUT_SURFACE_MEM_DEV_COPIED) + { + output_surface_info_.output_surface_size_in_bytes = GetFrameSize(); + output_surface_info_.mem_type = OUT_SURFACE_MEM_DEV_COPIED; + } + else if(out_mem_type_ == OUT_SURFACE_MEM_HOST_COPIED) + { + output_surface_info_.output_surface_size_in_bytes = GetFrameSize(); + output_surface_info_.mem_type = OUT_SURFACE_MEM_HOST_COPIED; + } + else + { + output_surface_info_.output_surface_size_in_bytes = + surface_stride_ * (surface_vstride_ + (chroma_vstride_ * num_chroma_planes_)); + output_surface_info_.mem_type = OUT_SURFACE_MEM_NOT_MAPPED; + } + + input_video_info_str_ << "Video Decoding Params:" << std::endl + << "\tNum Surfaces : " + << videoDecodeCreateInfo.num_decode_surfaces << std::endl + << "\tCrop : [" + << videoDecodeCreateInfo.display_rect.left << ", " + << videoDecodeCreateInfo.display_rect.top << ", " + << videoDecodeCreateInfo.display_rect.right << ", " + << videoDecodeCreateInfo.display_rect.bottom << "]" << std::endl + << "\tResize : " << videoDecodeCreateInfo.target_width + << "x" << videoDecodeCreateInfo.target_height << std::endl; + input_video_info_str_ << std::endl; + std::cout << input_video_info_str_.str(); + + ROCDEC_API_CALL(rocDecCreateDecoder(&roc_decoder_, &videoDecodeCreateInfo)); + double elapsed_time = StopTimer(start_time); + AddDecoderSessionOverHead(std::this_thread::get_id(), elapsed_time); + return num_decode_surfaces; +} + +/** + * @brief Function to set the Reconfig Params object + * + * @param p_reconfig_params: pointer to reconfig params struct + * @return true : success + * @return false : fail + */ +bool +RocVideoDecoder::SetReconfigParams(ReconfigParams* p_reconfig_params, + bool b_force_reconfig_flush) +{ + if(!p_reconfig_params) + { + std::cerr << "ERROR: Invalid reconfig struct passed! " << std::endl; + return false; + } + // save it + p_reconfig_params_ = p_reconfig_params; + b_force_recofig_flush_ = b_force_reconfig_flush; + return true; +} + +/** + * @brief Function to force Reconfigure Flush: needed for random seeking to key frames + * + * @return int 1: Success 0: Fail + */ +int +RocVideoDecoder::FlushAndReconfigure() +{ + if(!p_reconfig_params_) + { + std::cerr << "ERROR: Reconfig params is not set! " << std::endl; + return 0; + } + if(!curr_video_format_ptr_) + { + std::cerr << "ERROR: video format is not initialized! " << std::endl; + return 0; + } + // call reconfigure + b_force_recofig_flush_ = true; // if not already set to force reconfigure + ReconfigureDecoder(curr_video_format_ptr_); + return true; +} + +/** + * @brief function to reconfigure decoder if there is a change in sequence params. + * + * @param p_video_format + * @return int 1: success 0: fail + */ +int +RocVideoDecoder::ReconfigureDecoder(RocdecVideoFormat* p_video_format) +{ + if(p_video_format->codec != codec_id_) + { + ROCDEC_THROW("Reconfigure Not supported for codec change", ROCDEC_NOT_SUPPORTED); + return 0; + } + if(p_video_format->chroma_format != video_chroma_format_) + { + ROCDEC_THROW("Reconfigure Not supported for chroma format change", + ROCDEC_NOT_SUPPORTED); + return 0; + } + if(p_video_format->bit_depth_luma_minus8 != bitdepth_minus_8_) + { + ROCDEC_THROW("Reconfigure Not supported for bit depth change", + ROCDEC_NOT_SUPPORTED); + return 0; + } + bool is_decode_res_changed = !(p_video_format->coded_width == coded_width_ && + p_video_format->coded_height == coded_height_); + bool is_display_rect_changed = + !(p_video_format->display_area.bottom == disp_rect_.bottom && + p_video_format->display_area.top == disp_rect_.top && + p_video_format->display_area.left == disp_rect_.left && + p_video_format->display_area.right == disp_rect_.right); + + if(!is_decode_res_changed && !is_display_rect_changed && !b_force_recofig_flush_) + { + return 1; + } + + // Flush and clear internal frame store to reconfigure when either coded size or + // display size has changed. + if(p_reconfig_params_ && p_reconfig_params_->p_fn_reconfigure_flush) + num_frames_flushed_during_reconfig_ += p_reconfig_params_->p_fn_reconfigure_flush( + this, p_reconfig_params_->reconfig_flush_mode, + static_cast(p_reconfig_params_->p_reconfig_user_struct)); + // clear the existing output buffers of different size + // note that app lose the remaining frames in the vp_frames/vp_frames_q in case + // application didn't set p_fn_reconfigure_flush_ callback + if(out_mem_type_ == OUT_SURFACE_MEM_DEV_INTERNAL) + { + ReleaseInternalFrames(); + } + else + { + std::lock_guard lock(mtx_vp_frame_); + while(!vp_frames_.empty()) + { + DecFrameBuffer* p_frame = &vp_frames_.back(); + // pop decoded frame + vp_frames_.pop_back(); + if(p_frame->frame_ptr) + { + if(out_mem_type_ == OUT_SURFACE_MEM_DEV_COPIED) + { + hipError_t hip_status = hipFree(p_frame->frame_ptr); + if(hip_status != hipSuccess) + std::cerr << "ERROR: hipFree failed! (" << hip_status << ")" + << std::endl; + } + else + delete[](p_frame->frame_ptr); + } + } + } + output_frame_cnt_ = 0; // reset frame_count + if(is_decode_res_changed) + { + coded_width_ = p_video_format->coded_width; + coded_height_ = p_video_format->coded_height; + } + if(is_display_rect_changed) + { + disp_rect_.left = p_video_format->display_area.left; + disp_rect_.right = p_video_format->display_area.right; + disp_rect_.top = p_video_format->display_area.top; + disp_rect_.bottom = p_video_format->display_area.bottom; + disp_width_ = + p_video_format->display_area.right - p_video_format->display_area.left; + disp_height_ = + p_video_format->display_area.bottom - p_video_format->display_area.top; + chroma_height_ = static_cast( + std::ceil(target_height_ * GetChromaHeightFactor(video_surface_format_))); + if(!(crop_rect_.right && crop_rect_.bottom)) + { + target_width_ = (disp_width_ + 1) & ~1; + target_height_ = (disp_height_ + 1) & ~1; + } + else + { + target_width_ = (crop_rect_.right - crop_rect_.left + 1) & ~1; + target_height_ = (crop_rect_.bottom - crop_rect_.top + 1) & ~1; + } + } + + if(out_mem_type_ == OUT_SURFACE_MEM_DEV_INTERNAL || + out_mem_type_ == OUT_SURFACE_MEM_NOT_MAPPED) + { + GetSurfaceStrideInternal(video_surface_format_, coded_width_, coded_height_, + &surface_stride_, &surface_vstride_); + } + else + { + surface_stride_ = target_width_ * byte_per_pixel_; + } + chroma_height_ = static_cast( + ceil(target_height_ * GetChromaHeightFactor(video_surface_format_))); + num_chroma_planes_ = GetChromaPlaneCount(video_surface_format_); + if(p_video_format->chroma_format == rocDecVideoChromaFormat_Monochrome) + num_chroma_planes_ = 0; + chroma_vstride_ = static_cast( + std::ceil(surface_vstride_ * GetChromaHeightFactor(video_surface_format_))); + // Fill output_surface_info_ + output_surface_info_.output_width = target_width_; + output_surface_info_.output_height = target_height_; + output_surface_info_.output_pitch = surface_stride_; + output_surface_info_.output_vstride = (out_mem_type_ == OUT_SURFACE_MEM_DEV_INTERNAL) + ? surface_vstride_ + : target_height_; + output_surface_info_.bit_depth = bitdepth_minus_8_ + 8; + output_surface_info_.bytes_per_pixel = byte_per_pixel_; + output_surface_info_.surface_format = video_surface_format_; + output_surface_info_.num_chroma_planes = num_chroma_planes_; + if(out_mem_type_ == OUT_SURFACE_MEM_DEV_INTERNAL) + { + output_surface_info_.output_surface_size_in_bytes = + surface_stride_ * (surface_vstride_ + (chroma_vstride_ * num_chroma_planes_)); + output_surface_info_.mem_type = OUT_SURFACE_MEM_DEV_INTERNAL; + } + else if(out_mem_type_ == OUT_SURFACE_MEM_DEV_COPIED) + { + output_surface_info_.output_surface_size_in_bytes = GetFrameSize(); + output_surface_info_.mem_type = OUT_SURFACE_MEM_DEV_COPIED; + } + else if(out_mem_type_ == OUT_SURFACE_MEM_HOST_COPIED) + { + output_surface_info_.output_surface_size_in_bytes = GetFrameSize(); + output_surface_info_.mem_type = OUT_SURFACE_MEM_HOST_COPIED; + } + else + { + output_surface_info_.output_surface_size_in_bytes = + surface_stride_ * (surface_vstride_ + (chroma_vstride_ * num_chroma_planes_)); + output_surface_info_.mem_type = OUT_SURFACE_MEM_NOT_MAPPED; + } + + // If the coded_width or coded_height hasn't changed but display resolution has + // changed, then need to update width and height for correct output with cropping. + // There is no need to reconfigure the decoder. + if(!is_decode_res_changed && is_display_rect_changed) + { + return 1; + } + + RocdecReconfigureDecoderInfo reconfig_params = { 0 }; + reconfig_params.width = coded_width_; + reconfig_params.height = coded_height_; + reconfig_params.target_width = target_width_; + reconfig_params.target_height = target_height_; + reconfig_params.num_decode_surfaces = p_video_format->min_num_decode_surfaces; + if(!(crop_rect_.right && crop_rect_.bottom)) + { + reconfig_params.display_rect.top = disp_rect_.top; + reconfig_params.display_rect.bottom = disp_rect_.bottom; + reconfig_params.display_rect.left = disp_rect_.left; + reconfig_params.display_rect.right = disp_rect_.right; + } + else + { + reconfig_params.display_rect.top = crop_rect_.top; + reconfig_params.display_rect.bottom = crop_rect_.bottom; + reconfig_params.display_rect.left = crop_rect_.left; + reconfig_params.display_rect.right = crop_rect_.right; + } + + if(roc_decoder_ == nullptr) + { + ROCDEC_THROW("Reconfigurition of the decoder detected but the decoder was not " + "initialized previoulsy!", + ROCDEC_NOT_SUPPORTED); + return 0; + } + ROCDEC_API_CALL(rocDecReconfigureDecoder(roc_decoder_, &reconfig_params)); + + input_video_info_str_.str(""); + input_video_info_str_.clear(); + input_video_info_str_ << "Input Video Resolution Changed:" << std::endl + << "\tCoded size : [" << p_video_format->coded_width << ", " + << p_video_format->coded_height << "]" << std::endl + << "\tDisplay area : [" << p_video_format->display_area.left + << ", " << p_video_format->display_area.top << ", " + << p_video_format->display_area.right << ", " + << p_video_format->display_area.bottom << "]" << std::endl; + input_video_info_str_ << std::endl; + input_video_info_str_ << "Video Decoding Params:" << std::endl + << "\tNum Surfaces : " << reconfig_params.num_decode_surfaces + << std::endl + << "\tResize : " << reconfig_params.target_width << "x" + << reconfig_params.target_height << std::endl; + input_video_info_str_ << std::endl; + std::cout << input_video_info_str_.str(); + + is_decoder_reconfigured_ = true; + return 1; +} + +/** + * @brief + * + * @param pPicParams + * @return int 1: success 0: fail + */ +int +RocVideoDecoder::HandlePictureDecode(RocdecPicParams* pPicParams) +{ + if(!roc_decoder_) + { + THROW("RocDecoder not initialized: failed with ErrCode: " + + TOSTR(ROCDEC_NOT_INITIALIZED)); + } + pic_num_in_dec_order_[pPicParams->curr_pic_idx] = decode_poc_++; + ROCDEC_API_CALL(rocDecDecodeFrame(roc_decoder_, pPicParams)); + last_decode_surf_idx_ = pPicParams->curr_pic_idx; + decoded_pic_cnt_++; + if(b_force_zero_latency_ && + ((!pPicParams->field_pic_flag) || (pPicParams->second_field))) + { + RocdecParserDispInfo disp_info; + memset(&disp_info, 0, sizeof(disp_info)); + disp_info.picture_index = pPicParams->curr_pic_idx; + disp_info.progressive_frame = !pPicParams->field_pic_flag; + disp_info.top_field_first = pPicParams->bottom_field_flag ^ 1; + HandlePictureDisplay(&disp_info); + } + return 1; +} + +/** + * @brief function to handle display picture + * + * @param pDispInfo + * @return int 0:fail 1: success + */ +int +RocVideoDecoder::HandlePictureDisplay(RocdecParserDispInfo* pDispInfo) +{ + RocdecProcParams video_proc_params = {}; + video_proc_params.progressive_frame = pDispInfo->progressive_frame; + video_proc_params.top_field_first = pDispInfo->top_field_first; + + if(b_extract_sei_message_) + { + if(sei_message_display_q_[pDispInfo->picture_index].sei_data) + { + // Write SEI Message + uint8_t* sei_buffer = + (uint8_t*) (sei_message_display_q_[pDispInfo->picture_index].sei_data); + uint32_t sei_num_messages = + sei_message_display_q_[pDispInfo->picture_index].sei_message_count; + RocdecSeiMessage* sei_message = + sei_message_display_q_[pDispInfo->picture_index].sei_message; + if(fp_sei_) + { + for(uint32_t i = 0; i < sei_num_messages; i++) + { + if(codec_id_ == rocDecVideoCodec_AVC || rocDecVideoCodec_HEVC) + { + switch(sei_message[i].sei_message_type) + { + case SEI_TYPE_TIME_CODE: + { + // todo:: check if we need to write timecode + } + break; + case SEI_TYPE_USER_DATA_UNREGISTERED: + { + fwrite(sei_buffer, sei_message[i].sei_message_size, 1, + fp_sei_); + } + break; + } + } + if(codec_id_ == rocDecVideoCodec_AV1) + { + fwrite(sei_buffer, sei_message[i].sei_message_size, 1, fp_sei_); + } + sei_buffer += sei_message[i].sei_message_size; + } + } + free(sei_message_display_q_[pDispInfo->picture_index].sei_data); + sei_message_display_q_[pDispInfo->picture_index].sei_data = + NULL; // to avoid double free + free(sei_message_display_q_[pDispInfo->picture_index].sei_message); + sei_message_display_q_[pDispInfo->picture_index].sei_message = + NULL; // to avoid double free + } + } + if(out_mem_type_ != OUT_SURFACE_MEM_NOT_MAPPED) + { + void* src_dev_ptr[3] = { 0 }; + uint32_t src_pitch[3] = { 0 }; + ROCDEC_API_CALL(rocDecGetVideoFrame(roc_decoder_, pDispInfo->picture_index, + src_dev_ptr, src_pitch, &video_proc_params)); + RocdecDecodeStatus dec_status; + memset(&dec_status, 0, sizeof(dec_status)); + rocDecStatus result = + rocDecGetDecodeStatus(roc_decoder_, pDispInfo->picture_index, &dec_status); + if(result == ROCDEC_SUCCESS && + (dec_status.decode_status == rocDecodeStatus_Error || + dec_status.decode_status == rocDecodeStatus_Error_Concealed)) + { + std::cerr << "Decode Error occurred for picture: " + << pic_num_in_dec_order_[pDispInfo->picture_index] << std::endl; + } + if(out_mem_type_ == OUT_SURFACE_MEM_DEV_INTERNAL) + { + DecFrameBuffer dec_frame = { 0 }; + dec_frame.frame_ptr = (uint8_t*) (src_dev_ptr[0]); + dec_frame.pts = pDispInfo->pts; + dec_frame.picture_index = pDispInfo->picture_index; + std::lock_guard lock(mtx_vp_frame_); + vp_frames_q_.push(dec_frame); + output_frame_cnt_++; + } + else + { + // copy the decoded surface info device or host + uint8_t* p_dec_frame = nullptr; + { + std::lock_guard lock(mtx_vp_frame_); + // if not enough frames in stock, allocate + if((unsigned) ++output_frame_cnt_ > vp_frames_.size()) + { + num_alloced_frames_++; + DecFrameBuffer dec_frame = { 0 }; + if(out_mem_type_ == OUT_SURFACE_MEM_DEV_COPIED) + { + // allocate device memory + HIP_API_CALL( + hipMalloc((void**) &dec_frame.frame_ptr, GetFrameSize())); + } + else + { + dec_frame.frame_ptr = new uint8_t[GetFrameSize()]; + } + dec_frame.pts = pDispInfo->pts; + dec_frame.picture_index = pDispInfo->picture_index; + vp_frames_.push_back(dec_frame); + } + p_dec_frame = vp_frames_[output_frame_cnt_ - 1].frame_ptr; + } + // Copy luma data + int dst_pitch = disp_width_ * byte_per_pixel_; + uint8_t* p_src_ptr_y = static_cast(src_dev_ptr[0]) + + (disp_rect_.top + crop_rect_.top) * src_pitch[0] + + (disp_rect_.left + crop_rect_.left) * byte_per_pixel_; + if(out_mem_type_ == OUT_SURFACE_MEM_DEV_COPIED) + { + if(src_pitch[0] == dst_pitch) + { + int luma_size = src_pitch[0] * coded_height_; + HIP_API_CALL(hipMemcpyDtoDAsync(p_dec_frame, p_src_ptr_y, luma_size, + hip_stream_)); + } + else + { + // use 2d copy to copy an ROI + HIP_API_CALL(hipMemcpy2DAsync(p_dec_frame, dst_pitch, p_src_ptr_y, + src_pitch[0], dst_pitch, disp_height_, + hipMemcpyDeviceToDevice, hip_stream_)); + } + } + else + HIP_API_CALL(hipMemcpy2DAsync(p_dec_frame, dst_pitch, p_src_ptr_y, + src_pitch[0], dst_pitch, disp_height_, + hipMemcpyDeviceToHost, hip_stream_)); + + // Copy chroma plane ( ) + // rocDec output gives pointer to luma and chroma pointers seperated for the + // decoded frame + uint8_t* p_frame_uv = p_dec_frame + dst_pitch * disp_height_; + uint8_t* p_src_ptr_uv = + (num_chroma_planes_ == 1) + ? static_cast(src_dev_ptr[1]) + + ((disp_rect_.top + crop_rect_.top) >> 1) * src_pitch[1] + + (disp_rect_.left + crop_rect_.left) * byte_per_pixel_ + : static_cast(src_dev_ptr[1]) + + (disp_rect_.top + crop_rect_.top) * src_pitch[1] + + (disp_rect_.left + crop_rect_.left) * byte_per_pixel_; + if(out_mem_type_ == OUT_SURFACE_MEM_DEV_COPIED) + { + if(src_pitch[1] == dst_pitch) + { + int chroma_size = chroma_height_ * dst_pitch; + HIP_API_CALL(hipMemcpyDtoDAsync(p_frame_uv, p_src_ptr_uv, chroma_size, + hip_stream_)); + } + else + { + // use 2d copy to copy an ROI + HIP_API_CALL(hipMemcpy2DAsync(p_frame_uv, dst_pitch, p_src_ptr_uv, + src_pitch[1], dst_pitch, chroma_height_, + hipMemcpyDeviceToDevice, hip_stream_)); + } + } + else + HIP_API_CALL(hipMemcpy2DAsync(p_frame_uv, dst_pitch, p_src_ptr_uv, + src_pitch[1], dst_pitch, chroma_height_, + hipMemcpyDeviceToHost, hip_stream_)); + + if(num_chroma_planes_ == 2) + { + uint8_t* p_frame_v = + p_dec_frame + dst_pitch * (disp_height_ + chroma_height_); + uint8_t* p_src_ptr_v = + static_cast(src_dev_ptr[2]) + + (disp_rect_.top + crop_rect_.top) * src_pitch[2] + + (disp_rect_.left + crop_rect_.left) * byte_per_pixel_; + if(out_mem_type_ == OUT_SURFACE_MEM_DEV_COPIED) + { + if(src_pitch[2] == dst_pitch) + { + int chroma_size = chroma_height_ * dst_pitch; + HIP_API_CALL(hipMemcpyDtoDAsync(p_frame_v, p_src_ptr_v, + chroma_size, hip_stream_)); + } + else + { + // use 2d copy to copy an ROI + HIP_API_CALL(hipMemcpy2DAsync( + p_frame_v, dst_pitch, p_src_ptr_v, src_pitch[2], dst_pitch, + chroma_height_, hipMemcpyDeviceToDevice, hip_stream_)); + } + } + else + HIP_API_CALL(hipMemcpy2DAsync(p_frame_v, dst_pitch, p_src_ptr_v, + src_pitch[2], dst_pitch, chroma_height_, + hipMemcpyDeviceToHost, hip_stream_)); + } + + HIP_API_CALL(hipStreamSynchronize(hip_stream_)); + } + } + else + { + RocdecDecodeStatus dec_status; + memset(&dec_status, 0, sizeof(dec_status)); + rocDecStatus result = + rocDecGetDecodeStatus(roc_decoder_, pDispInfo->picture_index, &dec_status); + if(result == ROCDEC_SUCCESS && + (dec_status.decode_status == rocDecodeStatus_Error || + dec_status.decode_status == rocDecodeStatus_Error_Concealed)) + { + std::cerr << "Decode Error occurred for picture: " + << pic_num_in_dec_order_[pDispInfo->picture_index] << std::endl; + } + output_frame_cnt_++; + } + + return 1; +} + +int +RocVideoDecoder::GetSEIMessage(RocdecSeiMessageInfo* pSEIMessageInfo) +{ + uint32_t sei_num_mesages = pSEIMessageInfo->sei_message_count; + if(sei_num_mesages) + { + RocdecSeiMessage* p_sei_msg_info = pSEIMessageInfo->sei_message; + size_t total_SEI_buff_size = 0; + if((pSEIMessageInfo->picIdx < 0) || (pSEIMessageInfo->picIdx >= MAX_FRAME_NUM)) + { + ERR("Invalid picture index for SEI message: " + + TOSTR(pSEIMessageInfo->picIdx)); + return 0; + } + for(uint32_t i = 0; i < sei_num_mesages; i++) + { + total_SEI_buff_size += p_sei_msg_info[i].sei_message_size; + } + if(!curr_sei_message_ptr_) + { + ERR("Out of Memory, Allocation failed for m_pCurrSEIMessage"); + return 0; + } + curr_sei_message_ptr_->sei_data = malloc(total_SEI_buff_size); + if(!curr_sei_message_ptr_->sei_data) + { + ERR("Out of Memory, Allocation failed for SEI Buffer"); + return 0; + } + memcpy(curr_sei_message_ptr_->sei_data, pSEIMessageInfo->sei_data, + total_SEI_buff_size); + curr_sei_message_ptr_->sei_message = + (RocdecSeiMessage*) malloc(sizeof(RocdecSeiMessage) * sei_num_mesages); + if(!curr_sei_message_ptr_->sei_message) + { + free(curr_sei_message_ptr_->sei_data); + curr_sei_message_ptr_->sei_data = NULL; + return 0; + } + memcpy(curr_sei_message_ptr_->sei_message, pSEIMessageInfo->sei_message, + sizeof(RocdecSeiMessage) * sei_num_mesages); + curr_sei_message_ptr_->sei_message_count = pSEIMessageInfo->sei_message_count; + sei_message_display_q_[pSEIMessageInfo->picIdx] = *curr_sei_message_ptr_; + } + return 1; +} + +int +RocVideoDecoder::DecodeFrame(const uint8_t* data, size_t size, int pkt_flags, int64_t pts, + int* num_decoded_pics) +{ + output_frame_cnt_ = 0, output_frame_cnt_ret_ = 0; + decoded_pic_cnt_ = 0; + RocdecSourceDataPacket packet = { 0 }; + packet.payload = data; + packet.payload_size = size; + packet.flags = pkt_flags | ROCDEC_PKT_TIMESTAMP; + packet.pts = pts; + if(!data || size == 0) + { + packet.flags |= ROCDEC_PKT_ENDOFSTREAM; + } + ROCDEC_API_CALL(rocDecParseVideoData(rocdec_parser_, &packet)); + if(num_decoded_pics) + { + *num_decoded_pics = decoded_pic_cnt_; + } + return output_frame_cnt_; +} + +uint8_t* +RocVideoDecoder::GetFrame(int64_t* pts) +{ + if(output_frame_cnt_ > 0) + { + std::lock_guard lock(mtx_vp_frame_); + output_frame_cnt_--; + if(out_mem_type_ == OUT_SURFACE_MEM_DEV_INTERNAL && !vp_frames_q_.empty()) + { + DecFrameBuffer* fb = &vp_frames_q_.front(); + if(pts) *pts = fb->pts; + return fb->frame_ptr; + } + else if(vp_frames_.size() > 0) + { + if(pts) *pts = vp_frames_[output_frame_cnt_ret_].pts; + return vp_frames_[output_frame_cnt_ret_++].frame_ptr; + } + } + return nullptr; +} + +/** + * @brief function to release frame after use by the application: Only used with + * "OUT_SURFACE_MEM_DEV_INTERNAL" + * + * @param pTimestamp - timestamp of the frame to be released (unmapped) + * @return true - success + * @return false - falied + */ + +bool +RocVideoDecoder::ReleaseFrame(int64_t pTimestamp, bool b_flushing) +{ + if(out_mem_type_ == OUT_SURFACE_MEM_NOT_MAPPED) return true; // nothing to do + if(out_mem_type_ != OUT_SURFACE_MEM_DEV_INTERNAL) + { + if(!b_flushing) // if not flushing the buffers are re-used, so keep them + return true; // nothing to do + else + { + DecFrameBuffer* fb = &vp_frames_[0]; + if(pTimestamp != fb->pts) + { + std::cerr << "Decoded Frame is released out of order" << std::endl; + return false; + } + vp_frames_.erase( + vp_frames_.begin()); // get rid of the frames from the framestore + } + } + // only needed when using internal mapped buffer + if(!vp_frames_q_.empty()) + { + std::lock_guard lock(mtx_vp_frame_); + DecFrameBuffer* fb = &vp_frames_q_.front(); + void* mapped_frame_ptr = fb->frame_ptr; + + if(pTimestamp != fb->pts) + { + std::cerr << "Decoded Frame is released out of order" << std::endl; + return false; + } + // pop decoded frame + vp_frames_q_.pop(); + } + return true; +} + +/** + * @brief function to release all internal frames and clear the q (used with reconfigure): + * Only used with "OUT_SURFACE_MEM_DEV_INTERNAL" + * + * @return true - success + * @return false - falied + */ +bool +RocVideoDecoder::ReleaseInternalFrames() +{ + if(out_mem_type_ != OUT_SURFACE_MEM_DEV_INTERNAL || + out_mem_type_ == OUT_SURFACE_MEM_NOT_MAPPED) + return true; // nothing to do + // only needed when using internal mapped buffer + while(!vp_frames_q_.empty()) + { + std::lock_guard lock(mtx_vp_frame_); + // pop decoded frame + vp_frames_q_.pop(); + } + return true; +} + +void +RocVideoDecoder::SaveFrameToFile(std::string output_file_name, void* surf_mem, + OutputSurfaceInfo* surf_info, size_t rgb_image_size) +{ + uint8_t* hst_ptr = nullptr; + bool is_rgb = (rgb_image_size != 0); + uint64_t output_image_size = + is_rgb ? rgb_image_size : surf_info->output_surface_size_in_bytes; + if(surf_info->mem_type == OUT_SURFACE_MEM_DEV_INTERNAL || + surf_info->mem_type == OUT_SURFACE_MEM_DEV_COPIED) + { + if(hst_ptr == nullptr) + { + hst_ptr = new uint8_t[output_image_size]; + } + hipError_t hip_status = hipSuccess; + hip_status = hipMemcpyDtoH((void*) hst_ptr, surf_mem, output_image_size); + if(hip_status != hipSuccess) + { + std::cerr << "ERROR: hipMemcpyDtoH failed! (" << hipGetErrorName(hip_status) + << ")" << std::endl; + delete[] hst_ptr; + return; + } + } + else + hst_ptr = static_cast(surf_mem); + + if(current_output_filename.empty()) + { + current_output_filename = output_file_name; + } + + // don't overwrite to the same file if reconfigure is detected for a resolution + // changes. + if(is_decoder_reconfigured_) + { + if(fp_out_) + { + fclose(fp_out_); + fp_out_ = nullptr; + } + // Append the width and height of the new stream to the old file name to create a + // file name to save the new frames do this only if resolution changes within a + // stream (e.g., decoding a multi-resolution stream using the videoDecode app) + // don't append to the output_file_name if multiple output file name is provided + // (e.g., decoding multi-files using the videDecodeMultiFiles) + if(!current_output_filename.compare(output_file_name)) + { + std::string::size_type const pos(output_file_name.find_last_of('.')); + extra_output_file_count_++; + std::string to_append = "_" + std::to_string(surf_info->output_width) + "_" + + std::to_string(surf_info->output_height) + "_" + + std::to_string(extra_output_file_count_); + if(pos != std::string::npos) + { + output_file_name.insert(pos, to_append); + } + else + { + output_file_name += to_append; + } + } + is_decoder_reconfigured_ = false; + } + + if(fp_out_ == nullptr) + { + fp_out_ = fopen(output_file_name.c_str(), "wb"); + } + if(fp_out_) + { + if(!is_rgb) + { + uint8_t* tmp_hst_ptr = hst_ptr; + if(surf_info->mem_type == OUT_SURFACE_MEM_DEV_INTERNAL) + { + tmp_hst_ptr += + ((disp_rect_.top + crop_rect_.top) * surf_info->output_pitch) + + (disp_rect_.left + crop_rect_.left) * surf_info->bytes_per_pixel; + } + int img_width = surf_info->output_width; + int img_height = surf_info->output_height; + int output_stride = surf_info->output_pitch; + if(img_width * surf_info->bytes_per_pixel == output_stride && + img_height == surf_info->output_vstride) + { + fwrite(hst_ptr, 1, output_image_size, fp_out_); + } + else + { + uint32_t width = surf_info->output_width * surf_info->bytes_per_pixel; + if(surf_info->bit_depth <= 16) + { + for(int i = 0; i < surf_info->output_height; i++) + { + fwrite(tmp_hst_ptr, 1, width, fp_out_); + tmp_hst_ptr += output_stride; + } + // dump chroma + uint8_t* uv_hst_ptr = + hst_ptr + output_stride * surf_info->output_vstride; + if(surf_info->mem_type == OUT_SURFACE_MEM_DEV_INTERNAL) + { + uv_hst_ptr += (num_chroma_planes_ == 1) + ? (((disp_rect_.top + crop_rect_.top) >> 1) * + surf_info->output_pitch) + + ((disp_rect_.left + crop_rect_.left) * + surf_info->bytes_per_pixel) + : ((disp_rect_.top + crop_rect_.top) * + surf_info->output_pitch) + + ((disp_rect_.left + crop_rect_.left) * + surf_info->bytes_per_pixel); + } + for(int i = 0; i < chroma_height_; i++) + { + fwrite(uv_hst_ptr, 1, width, fp_out_); + uv_hst_ptr += output_stride; + } + if(num_chroma_planes_ == 2) + { + uv_hst_ptr = + hst_ptr + + output_stride * (surf_info->output_vstride + chroma_vstride_); + if(surf_info->mem_type == OUT_SURFACE_MEM_DEV_INTERNAL) + { + uv_hst_ptr += ((disp_rect_.top + crop_rect_.top) * + surf_info->output_pitch) + + ((disp_rect_.left + crop_rect_.left) * + surf_info->bytes_per_pixel); + } + for(int i = 0; i < chroma_height_; i++) + { + fwrite(uv_hst_ptr, 1, width, fp_out_); + uv_hst_ptr += output_stride; + } + } + } + } + } + else + { + fwrite(hst_ptr, 1, rgb_image_size, fp_out_); + } + } + + if(hst_ptr && (surf_info->mem_type != OUT_SURFACE_MEM_HOST_COPIED)) + { + delete[] hst_ptr; + } +} + +void +RocVideoDecoder::ResetSaveFrameToFile() +{ + if(fp_out_) + { + fclose(fp_out_); + fp_out_ = nullptr; + } +} + +void +RocVideoDecoder::InitMd5() +{ + md5_ctx_ = av_md5_alloc(); + av_md5_init(md5_ctx_); +} + +void +RocVideoDecoder::UpdateMd5ForDataBuffer(void* pDevMem, int rgb_image_size) +{ + uint8_t* hstPtr = nullptr; + hstPtr = new uint8_t[rgb_image_size]; + hipError_t hip_status = hipSuccess; + hip_status = hipMemcpyDtoH((void*) hstPtr, pDevMem, rgb_image_size); + if(hip_status != hipSuccess) + { + std::cout << "ERROR: hipMemcpyDtoH failed! (" << hip_status << ")" << std::endl; + delete[] hstPtr; + return; + } + av_md5_update(md5_ctx_, hstPtr, rgb_image_size); + if(hstPtr) + { + delete[] hstPtr; + } +} + +void +RocVideoDecoder::UpdateMd5ForFrame(void* surf_mem, OutputSurfaceInfo* surf_info) +{ + int i; + uint8_t* hst_ptr = nullptr; + uint64_t output_image_size = surf_info->output_surface_size_in_bytes; + if(surf_info->mem_type == OUT_SURFACE_MEM_DEV_INTERNAL || + surf_info->mem_type == OUT_SURFACE_MEM_DEV_COPIED) + { + if(hst_ptr == nullptr) + { + hst_ptr = new uint8_t[output_image_size]; + } + hipError_t hip_status = hipSuccess; + hip_status = hipMemcpyDtoH((void*) hst_ptr, surf_mem, output_image_size); + if(hip_status != hipSuccess) + { + std::cerr << "ERROR: hipMemcpyDtoH failed! (" << hip_status << ")" + << std::endl; + delete[] hst_ptr; + return; + } + } + else + hst_ptr = static_cast(surf_mem); + + // Need to covert interleaved planar to stacked planar, assuming 4:2:0 chroma + // sampling. + uint8_t* stacked_ptr = new uint8_t[output_image_size]; + + uint8_t* tmp_hst_ptr = hst_ptr; + int output_stride = surf_info->output_pitch; + tmp_hst_ptr += + (disp_rect_.top * output_stride) + disp_rect_.left * surf_info->bytes_per_pixel; + uint8_t* tmp_stacked_ptr = stacked_ptr; + int img_width = surf_info->output_width; + int img_height = surf_info->output_height; + // Luma + if(img_width * surf_info->bytes_per_pixel == output_stride && + img_height == surf_info->output_vstride) + { + memcpy(stacked_ptr, hst_ptr, img_width * surf_info->bytes_per_pixel * img_height); + } + else + { + for(i = 0; i < img_height; i++) + { + memcpy(tmp_stacked_ptr, tmp_hst_ptr, img_width * surf_info->bytes_per_pixel); + tmp_hst_ptr += output_stride; + tmp_stacked_ptr += img_width * surf_info->bytes_per_pixel; + } + } + // Chroma + int img_width_chroma = img_width >> 1; + tmp_hst_ptr = hst_ptr + output_stride * surf_info->output_vstride; + if(surf_info->mem_type == OUT_SURFACE_MEM_DEV_INTERNAL) + { + tmp_hst_ptr += ((disp_rect_.top >> 1) * output_stride) + + (disp_rect_.left * surf_info->bytes_per_pixel); + } + tmp_stacked_ptr = + stacked_ptr + img_width * surf_info->bytes_per_pixel * img_height; // Cb + uint8_t* tmp_stacked_ptr_v = tmp_stacked_ptr + img_width_chroma * + surf_info->bytes_per_pixel * + chroma_height_; // Cr + for(i = 0; i < chroma_height_; i++) + { + for(int j = 0; j < img_width_chroma; j++) + { + uint8_t *src_ptr, *dst_ptr; + // Cb + src_ptr = &tmp_hst_ptr[j * surf_info->bytes_per_pixel * 2]; + dst_ptr = &tmp_stacked_ptr[j * surf_info->bytes_per_pixel]; + memcpy(dst_ptr, src_ptr, surf_info->bytes_per_pixel); + // Cr + src_ptr += surf_info->bytes_per_pixel; + dst_ptr = &tmp_stacked_ptr_v[j * surf_info->bytes_per_pixel]; + memcpy(dst_ptr, src_ptr, surf_info->bytes_per_pixel); + } + tmp_hst_ptr += output_stride; + tmp_stacked_ptr += img_width_chroma * surf_info->bytes_per_pixel; + tmp_stacked_ptr_v += img_width_chroma * surf_info->bytes_per_pixel; + } + + int img_size = img_width * surf_info->bytes_per_pixel * (img_height + chroma_height_); + + // For 10 bit, convert from P010 to little endian to match reference decoder output + if(surf_info->bytes_per_pixel == 2) + { + uint16_t* ptr = reinterpret_cast(stacked_ptr); + for(i = 0; i < img_size / 2; i++) + { + ptr[i] = ptr[i] >> 6; + } + } + + av_md5_update(md5_ctx_, stacked_ptr, img_size); + + if(hst_ptr && (surf_info->mem_type != OUT_SURFACE_MEM_HOST_COPIED)) + { + delete[] hst_ptr; + } + delete[] stacked_ptr; +} + +void +RocVideoDecoder::FinalizeMd5(uint8_t** digest) +{ + av_md5_final(md5_ctx_, md5_digest_); + av_freep(&md5_ctx_); + *digest = md5_digest_; +} + +void +RocVideoDecoder::GetDeviceinfo(std::string& device_name, std::string& gcn_arch_name, + int& pci_bus_id, int& pci_domain_id, int& pci_device_id) +{ + device_name = hip_dev_prop_.name; + gcn_arch_name = hip_dev_prop_.gcnArchName; + pci_bus_id = hip_dev_prop_.pciBusID; + pci_domain_id = hip_dev_prop_.pciDomainID; + pci_device_id = hip_dev_prop_.pciDeviceID; +} + +bool +RocVideoDecoder::GetOutputSurfaceInfo(OutputSurfaceInfo** surface_info) +{ + if(!disp_width_ || !disp_height_) + { + std::cerr << "ERROR: RocVideoDecoder is not intialized" << std::endl; + return false; + } + *surface_info = &output_surface_info_; + return true; +} + +bool +RocVideoDecoder::InitHIP(int device_id) +{ + HIP_API_CALL(hipGetDeviceCount(&num_devices_)); + if(num_devices_ < 1) + { + std::cerr << "ERROR: didn't find any GPU!" << std::endl; + return false; + } + HIP_API_CALL(hipSetDevice(device_id)); + HIP_API_CALL(hipGetDeviceProperties(&hip_dev_prop_, device_id)); + HIP_API_CALL(hipStreamCreate(&hip_stream_)); + return true; +} + +std::chrono::_V2::system_clock::time_point +RocVideoDecoder::StartTimer() +{ + return std::chrono::_V2::system_clock::now(); +} + +double +RocVideoDecoder::StopTimer(const std::chrono::_V2::system_clock::time_point& start_time) +{ + return std::chrono::duration( + std::chrono::_V2::system_clock::now() - start_time) + .count(); +} + +bool +RocVideoDecoder::CodecSupported(int device_id, rocDecVideoCodec codec_id, + uint32_t bit_depth) +{ + RocdecDecodeCaps decode_caps; + decode_caps.device_id = device_id; + decode_caps.codec_type = codec_id; + decode_caps.chroma_format = rocDecVideoChromaFormat_420; + decode_caps.bit_depth_minus_8 = bit_depth - 8; + if(rocDecGetDecoderCaps(&decode_caps) != ROCDEC_SUCCESS) + { + return false; + } + return true; +} + +void +RocVideoDecoder::WaitForDecodeCompletion() +{ + RocdecDecodeStatus dec_status; + memset(&dec_status, 0, sizeof(dec_status)); + do + { + rocDecStatus result = + rocDecGetDecodeStatus(roc_decoder_, last_decode_surf_idx_, &dec_status); + } while(dec_status.decode_status == rocDecodeStatus_InProgress); +} diff --git a/examples/videodecode/roc_video_dec.h b/examples/videodecode/roc_video_dec.h new file mode 100644 index 00000000..40684d61 --- /dev/null +++ b/examples/videodecode/roc_video_dec.h @@ -0,0 +1,650 @@ + +/* +Copyright (c) 2023 - 2025 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +extern "C" +{ +#include "libavutil/md5.h" +#include "libavutil/mem.h" +} +#include "rocdecode.h" +#include "rocparser.h" + +/*! + * \file + * \brief The AMD Video Decode Library. + * + * \defgroup group_amd_roc_video_dec rocDecode Video Decode: AMD Video Decode API + * \brief AMD The rocDecode video decoder for AMD’s GPUs. + */ + +#define MAX_FRAME_NUM 16 + +typedef int(ROCDECAPI* PFNRECONFIGUEFLUSHCALLBACK)(void*, uint32_t, void*); + +typedef enum SeiAvcHevcPayloadType_enum +{ + SEI_TYPE_TIME_CODE = 136, + SEI_TYPE_USER_DATA_UNREGISTERED = 5 +} SeiAvcHevcPayloadType; + +typedef enum OutputSurfaceMemoryType_enum +{ + OUT_SURFACE_MEM_DEV_INTERNAL = 0, /**< Internal interopped decoded surface + memory(original mapped decoded surface) */ + OUT_SURFACE_MEM_DEV_COPIED = + 1, /**< decoded output will be copied to a separate device memory (the user + doesn't need to call release) **/ + OUT_SURFACE_MEM_HOST_COPIED = + 2, /**< decoded output will be copied to a separate host memory (the user doesn't + need to call release) **/ + OUT_SURFACE_MEM_NOT_MAPPED = 3 /**< < decoded output is not available (interop won't + be used): useful for decode only performance app*/ +} OutputSurfaceMemoryType; + +#define TOSTR(X) std::to_string(static_cast(X)) +#define STR(X) std::string(X) + +#if DBGINFO +# define INFO(X) \ + std::clog << "[INF] " \ + << " {" << __func__ << "} " \ + << " " << X << std::endl; +#else +# define INFO(X) ; +#endif +#define ERR(X) \ + std::cerr << "[ERR] " \ + << " {" << __func__ << "} " \ + << " " << X << std::endl; + +class RocVideoDecodeException : public std::exception +{ +public: + explicit RocVideoDecodeException(const std::string& message, const int err_code) + : _message(message) + , _err_code(err_code) + {} + explicit RocVideoDecodeException(const std::string& message) + : _message(message) + , _err_code(-1) + {} + virtual const char* what() const throw() override { return _message.c_str(); } + int Geterror_code() const { return _err_code; } + +private: + std::string _message; + int _err_code; +}; + +#define ROCDEC_THROW(X, CODE) \ + throw RocVideoDecodeException(" { " + std::string(__func__) + " } " + X, CODE); +#define THROW(X) throw RocVideoDecodeException(" { " + std::string(__func__) + " } " + X); + +#define ROCDEC_API_CALL(rocDecAPI) \ + do \ + { \ + rocDecStatus error_code = rocDecAPI; \ + if(error_code != ROCDEC_SUCCESS) \ + { \ + std::ostringstream error_log; \ + error_log << #rocDecAPI << " returned " << rocDecGetErrorName(error_code) \ + << " at " << __FILE__ << ":" << __LINE__; \ + ROCDEC_THROW(error_log.str(), error_code); \ + } \ + } while(0) + +#define HIP_API_CALL(call) \ + do \ + { \ + hipError_t hip_status = call; \ + if(hip_status != hipSuccess) \ + { \ + const char* sz_err_name = NULL; \ + sz_err_name = hipGetErrorName(hip_status); \ + std::ostringstream error_log; \ + error_log << "hip API error " << sz_err_name; \ + ROCDEC_THROW(error_log.str(), hip_status); \ + } \ + } while(0) + +struct Rect +{ + int left; + int top; + int right; + int bottom; +}; + +struct Dim +{ + int w, h; +}; + +static inline int +align(int value, int alignment) +{ + return (value + alignment - 1) & ~(alignment - 1); +} + +typedef struct DecFrameBuffer_ +{ + uint8_t* frame_ptr; /**< device memory pointer for the decoded frame */ + int64_t pts; /**< timestamp for the decoded frame */ + int picture_index; /**< surface index for the decoded frame */ +} DecFrameBuffer; + +typedef struct OutputSurfaceInfoType +{ + uint32_t output_width; /**< Output width of decoded surface*/ + uint32_t output_height; /**< Output height of decoded surface*/ + uint32_t output_pitch; /**< Output pitch in bytes of luma plane, chroma pitch can be + inferred based on chromaFormat*/ + uint32_t output_vstride; /**< Output vertical stride in case of using internal mem + pointer **/ + uint32_t bytes_per_pixel; /**< Output BytesPerPixel of decoded image*/ + uint32_t bit_depth; /**< Output BitDepth of the image*/ + uint32_t num_chroma_planes; /**< Output Chroma number of planes*/ + uint64_t output_surface_size_in_bytes; /**< Output Image Size in Bytes; including both + luma and chroma planes*/ + rocDecVideoSurfaceFormat surface_format; /**< Chroma format of the decoded image*/ + OutputSurfaceMemoryType mem_type; /**< Output mem_type of the surface*/ +} OutputSurfaceInfo; + +typedef struct ReconfigParams_t +{ + PFNRECONFIGUEFLUSHCALLBACK p_fn_reconfigure_flush; + void* p_reconfig_user_struct; + uint32_t reconfig_flush_mode; +} ReconfigParams; + +class RocVideoDecoder +{ +public: + /** + * @brief Construct a new Roc Video Decoder object + * + * @param hip_ctx + * @param b_use_device_mem + * @param codec + * @param device_id + * @param b_low_latency + * @param device_frame_pitched + * @param p_crop_rect + * @param extract_user_SEI_Message + * @param max_width + * @param max_height + * @param clk_rate + * @param force_zero_latency + */ + RocVideoDecoder(int device_id, OutputSurfaceMemoryType out_mem_type, + rocDecVideoCodec codec, bool force_zero_latency = false, + const Rect* p_crop_rect = nullptr, + bool extract_user_SEI_Message = false, uint32_t disp_delay = 0, + int max_width = 0, int max_height = 0, uint32_t clk_rate = 1000); + ~RocVideoDecoder(); + + rocDecVideoCodec GetCodecId() { return codec_id_; } + + hipStream_t GetStream() { return hip_stream_; } + + /** + * @brief Get the output frame width + */ + uint32_t GetWidth() + { + assert(disp_width_); + return disp_width_; + } + + /** + * @brief This function is used to get the actual decode width + */ + int GetDecodeWidth() + { + assert(coded_width_); + return coded_width_; + } + + /** + * @brief Get the output frame height + */ + uint32_t GetHeight() + { + assert(disp_height_); + return disp_height_; + } + + /** + * @brief This function is used to get the current chroma height. + */ + int GetChromaHeight() + { + assert(chroma_height_); + return chroma_height_; + } + + /** + * @brief This function is used to get the number of chroma planes. + */ + int GetNumChromaPlanes() + { + assert(num_chroma_planes_); + return num_chroma_planes_; + } + + /** + * @brief This function is used to get the current frame size based on pixel + * format. + */ + int GetFrameSize() + { + assert(disp_width_); + return disp_width_ * (disp_height_ + (chroma_height_ * num_chroma_planes_)) * + byte_per_pixel_; + } + + /** + * @brief This function is used to get the current frame size based on pitch + */ + int GetFrameSizePitched() + { + assert(surface_stride_); + return surface_stride_ * (disp_height_ + (chroma_height_ * num_chroma_planes_)); + } + + /** + * @brief Get the Bit Depth and BytesPerPixel associated with the pixel format + * + * @return uint32_t + */ + uint32_t GetBitDepth() + { + assert(bitdepth_minus_8_); + return (bitdepth_minus_8_ + 8); + } + uint32_t GetBytePerPixel() + { + assert(byte_per_pixel_); + return byte_per_pixel_; + } + /** + * @brief Functions to get the output surface attributes + */ + size_t GetSurfaceSize() + { + assert(surface_size_); + return surface_size_; + } + uint32_t GetSurfaceStride() + { + assert(surface_stride_); + return surface_stride_; + } + // RocDecImageFormat GetSubsampling() { return subsampling_; } + /** + * @brief Get the name of the output format + * + * @param codec_id + * @return std::string + */ + const char* GetCodecFmtName(rocDecVideoCodec codec_id); + + /** + * @brief function to return the name from surface_format_id + * + * @param surface_format_id - enum for surface format + * @return const char* + */ + const char* GetSurfaceFmtName(rocDecVideoSurfaceFormat surface_format_id); + + /** + * @brief Get the pointer to the Output Image Info + * + * @param surface_info ptr to output surface info + * @return true + * @return false + */ + bool GetOutputSurfaceInfo(OutputSurfaceInfo** surface_info); + + /** + * @brief Function to set the Reconfig Params object + * + * @param p_reconfig_params: pointer to reconfig params struct + * @return true : success + * @return false : fail + */ + bool SetReconfigParams(ReconfigParams* p_reconfig_params, + bool b_force_reconfig_flush = false); + + /** + * @brief Function to force Reconfigure Flush: needed for random seeking to key frames + * + * @return int 1: Success 0: Fail + */ + int FlushAndReconfigure(); + /** + * @brief this function decodes a frame and returns the number of frames avalable for + * display + * + * @param data - pointer to the data buffer that is to be decode + * @param size - size of the data buffer in bytes + * @param pts - presentation timestamp + * @param flags - video packet flags + * @param num_decoded_pics - nummber of pictures decoded in this call + * @return int - num of frames to display + */ + int DecodeFrame(const uint8_t* data, size_t size, int pkt_flags, int64_t pts = 0, + int* num_decoded_pics = nullptr); + /** + * @brief This function returns a decoded frame and timestamp. This should be called + * in a loop fetching all the available frames + * + */ + uint8_t* GetFrame(int64_t* pts); + + /** + * @brief function to release frame after use by the application: Only used with + * "OUT_SURFACE_MEM_DEV_INTERNAL" + * + * @param pTimestamp - timestamp of the frame to be released (unmapped) + * @param b_flushing - true when flushing + * @return true - success + * @return false - falied + */ + bool ReleaseFrame(int64_t pTimestamp, bool b_flushing = false); + + /** + * @brief utility function to save image to a file + * + * @param output_file_name - file to write + * @param dev_mem - dev_memory pointer of the frame + * @param image_info - output image info + * @param is_output_RGB - to write in RGB + */ + // void SaveImage(std::string output_file_name, void* dev_mem, OutputImageInfo* + // image_info, bool is_output_RGB = 0); + + /** + * @brief Get the Device info for the current device + * + * @param device_name + * @param gcn_arch_name + * @param pci_bus_id + * @param pci_domain_id + * @param pci_device_id + */ + void GetDeviceinfo(std::string& device_name, std::string& gcn_arch_name, + int& pci_bus_id, int& pci_domain_id, int& pci_device_id); + + /** + * @brief Helper function to dump decoded output surface to file + * + * @param output_file_name - Output file name + * @param dev_mem - pointer to surface memory + * @param surf_info - surface info + * @param rgb_image_size - image size for rgb (optional). A non_zero value + * indicates the surf_mem holds an rgb interleaved image and the entire size will be + * dumped to file + */ + void SaveFrameToFile(std::string output_file_name, void* surf_mem, + OutputSurfaceInfo* surf_info, size_t rgb_image_size = 0); + + /** + * @brief Helper funtion to close a existing file and dump to new file in case of + * multiple files using same decoder + */ + void ResetSaveFrameToFile(); + + /** + * @brief Helper function to start MD5 calculation + */ + void InitMd5(); + + void UpdateMd5ForDataBuffer(void* pDevMem, int rgb_image_size); + + /** + * @brief Helper function to dump decoded output surface to file + * + * @param dev_mem - pointer to surface memory + * @param surf_info - surface info + */ + void UpdateMd5ForFrame(void* surf_mem, OutputSurfaceInfo* surf_info); + + /** + * @brief Helper function to complete MD5 calculation + * + * @param [out] digest Pointer to the 16 byte message digest + */ + void FinalizeMd5(uint8_t** digest); + /** + * @brief Get the Num Of Flushed Frames from video decoder object + * + * @return int32_t + */ + int32_t GetNumOfFlushedFrames() { return num_frames_flushed_during_reconfig_; } + + /*! \brief Function to wait for the decode completion of the last submitted picture + */ + void WaitForDecodeCompletion(); + + // Session overhead refers to decoder initialization and deinitialization time + void AddDecoderSessionOverHead(std::thread::id session_id, double duration) + { + session_overhead_[session_id] += duration; + } + double GetDecoderSessionOverHead(std::thread::id session_id) + { + if(session_overhead_.find(session_id) != session_overhead_.end()) + { + return session_overhead_[session_id]; + } + else + { + return 0; + } + } + + /** + * @brief Check if the given Video Codec is supported on the given GPU + * + * @return rocDecStatus + */ + bool CodecSupported(int device_id, rocDecVideoCodec codec_id, uint32_t bit_depth); + +private: + /** + * @brief Callback function to be registered for getting a callback when decoding + * of sequence starts + */ + static int ROCDECAPI HandleVideoSequenceProc(void* p_user_data, + RocdecVideoFormat* p_video_format) + { + return ((RocVideoDecoder*) p_user_data)->HandleVideoSequence(p_video_format); + } + + /** + * @brief Callback function to be registered for getting a callback when a decoded + * frame is ready to be decoded + */ + static int ROCDECAPI HandlePictureDecodeProc(void* p_user_data, + RocdecPicParams* p_pic_params) + { + return ((RocVideoDecoder*) p_user_data)->HandlePictureDecode(p_pic_params); + } + + /** + * @brief Callback function to be registered for getting a callback when a decoded + * frame is available for display + */ + static int ROCDECAPI HandlePictureDisplayProc(void* p_user_data, + RocdecParserDispInfo* p_disp_info) + { + return ((RocVideoDecoder*) p_user_data)->HandlePictureDisplay(p_disp_info); + } + + /** + * @brief Callback function to be registered for getting a callback when all the + * unregistered user SEI Messages are parsed for a frame. + */ + static int ROCDECAPI HandleSEIMessagesProc(void* p_user_data, + RocdecSeiMessageInfo* p_sei_message_info) + { + return ((RocVideoDecoder*) p_user_data)->GetSEIMessage(p_sei_message_info); + } + + /** + * @brief This function gets called when a sequence is ready to be decoded. The + function also gets called when there is format change + */ + int HandleVideoSequence(RocdecVideoFormat* p_video_format); + + /** + * @brief This function gets called when a picture is ready to be decoded. + * cuvidDecodePicture is called from this function to decode the picture + */ + int HandlePictureDecode(RocdecPicParams* p_pic_params); + + /** + * @brief This function gets called after a picture is decoded and available for + display. Frames are fetched and stored in internal buffer + */ + int HandlePictureDisplay(RocdecParserDispInfo* p_disp_info); + /** + * @brief This function gets called when all unregistered user SEI messages are + * parsed for a frame + */ + int GetSEIMessage(RocdecSeiMessageInfo* p_sei_message_info); + + /** + * @brief This function reconfigure decoder if there is a change in sequence + * params. + */ + int ReconfigureDecoder(RocdecVideoFormat* p_video_format); + + /** + * @brief function to release all internal frames and clear the vp_frames_q_ (used + * with reconfigure): Only used with "OUT_SURFACE_MEM_DEV_INTERNAL" + * + * @return true - success + * @return false - falied + */ + bool ReleaseInternalFrames(); + + /** + * @brief Function to Initialize GPU-HIP + * + */ + bool InitHIP(int device_id); + + /** + * @brief Function to get start time + * + */ + std::chrono::_V2::system_clock::time_point StartTimer(); + + /** + * @brief Function to get elapsed time + * + */ + double StopTimer(const std::chrono::_V2::system_clock::time_point& start_time); + + int num_devices_; + int device_id_; + RocdecVideoParser rocdec_parser_ = nullptr; + rocDecDecoderHandle roc_decoder_ = nullptr; + OutputSurfaceMemoryType out_mem_type_ = OUT_SURFACE_MEM_DEV_INTERNAL; + bool b_extract_sei_message_ = false; + bool b_force_zero_latency_ = false; + uint32_t disp_delay_; + ReconfigParams* p_reconfig_params_ = nullptr; + bool b_force_recofig_flush_ = false; + int32_t num_frames_flushed_during_reconfig_ = 0; + hipDeviceProp_t hip_dev_prop_; + hipStream_t hip_stream_; + rocDecVideoCodec codec_id_ = rocDecVideoCodec_NumCodecs; + rocDecVideoChromaFormat video_chroma_format_ = rocDecVideoChromaFormat_420; + rocDecVideoSurfaceFormat video_surface_format_ = rocDecVideoSurfaceFormat_NV12; + RocdecSeiMessageInfo* curr_sei_message_ptr_ = nullptr; + RocdecSeiMessageInfo sei_message_display_q_[MAX_FRAME_NUM]; + RocdecVideoFormat* curr_video_format_ptr_ = nullptr; + int output_frame_cnt_ = 0, output_frame_cnt_ret_ = 0; + int decoded_pic_cnt_ = 0; + int decode_poc_ = 0, pic_num_in_dec_order_[MAX_FRAME_NUM]; + int num_alloced_frames_ = 0; + int last_decode_surf_idx_ = 0; + std::ostringstream input_video_info_str_; + int bitdepth_minus_8_ = 0; + uint32_t byte_per_pixel_ = 1; + uint32_t coded_width_ = 0; + uint32_t disp_width_ = 0; + uint32_t coded_height_ = 0; + uint32_t disp_height_ = 0; + uint32_t target_width_ = 0; + uint32_t target_height_ = 0; + int max_width_ = 0, max_height_ = 0; + uint32_t chroma_height_ = 0; + uint32_t num_chroma_planes_ = 0; + uint32_t num_components_ = 0; + uint32_t surface_stride_ = 0; + uint32_t + surface_vstride_ = 0, + chroma_vstride_ = + 0; // vertical stride between planes: used when using internal dev memory + size_t surface_size_ = 0; + OutputSurfaceInfo output_surface_info_ = {}; + std::mutex mtx_vp_frame_; + std::vector vp_frames_; // vector of decoded frames + std::queue vp_frames_q_; + Rect disp_rect_ = {}; // displayable area specified in the bitstream + Rect crop_rect_ = {}; // user specified region of interest within diplayable area + // disp_rect_ + FILE* fp_sei_ = NULL; + FILE* fp_out_ = NULL; + struct AVMD5* md5_ctx_; + uint8_t md5_digest_[16]; + bool is_decoder_reconfigured_ = false; + std::string current_output_filename = ""; + uint32_t extra_output_file_count_ = 0; + std::thread::id decoder_session_id_; // Decoder session identifier. Used to gather + // session level stats. + std::unordered_map + session_overhead_; // Records session overhead of initialization+deinitialization + // time. Format is (thread id, duration) +}; diff --git a/examples/videodecode/video_demuxer.h b/examples/videodecode/video_demuxer.h new file mode 100644 index 00000000..c8f24eb5 --- /dev/null +++ b/examples/videodecode/video_demuxer.h @@ -0,0 +1,716 @@ +/* +Copyright (c) 2023 - 2025 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#pragma once + +#include +extern "C" +{ +#include +#include +#if USE_AVCODEC_GREATER_THAN_58_134 +# include +#endif +} + +#include "rocdecode.h" + +/*! + * \file + * \brief The AMD Video Demuxer for rocDecode Library. + * + * \defgroup group_amd_rocdecode_videodemuxer videoDemuxer: AMD rocDecode Video Demuxer + * API \brief AMD The rocDecode video demuxer API. + */ + +/** + * @brief Enum for Seek mode + * + */ +typedef enum SeekModeEnum +{ + SEEK_MODE_EXACT_FRAME = 0, + SEEK_MODE_PREV_KEY_FRAME = 1, + SEEK_MODE_NUM, +} SeekMode; + +/** + * @brief Enum for Seek Criteria + * + */ +typedef enum SeekCriteriaEnum +{ + SEEK_CRITERIA_FRAME_NUM = 0, + SEEK_CRITERIA_TIME_STAMP = 1, + SEEK_CRITERIA_NUM, +} SeekCriteria; + +struct PacketData +{ + int32_t key; + int64_t pts; + int64_t dts; + uint64_t pos; + uintptr_t bsl_data; + uint64_t bsl; + uint64_t duration; +}; + +class VideoSeekContext +{ +public: + VideoSeekContext() + : use_seek_(false) + , seek_frame_(0) + , seek_mode_(SEEK_MODE_PREV_KEY_FRAME) + , seek_crit_(SEEK_CRITERIA_FRAME_NUM) + , out_frame_pts_(0) + , out_frame_duration_(0) + , num_frames_decoded_(0U) + {} + + VideoSeekContext(uint64_t frame_id) + : use_seek_(true) + , seek_frame_(frame_id) + , seek_mode_(SEEK_MODE_PREV_KEY_FRAME) + , seek_crit_(SEEK_CRITERIA_FRAME_NUM) + , out_frame_pts_(0) + , out_frame_duration_(0) + , num_frames_decoded_(0U) + {} + + VideoSeekContext& operator=(const VideoSeekContext& other) + { + use_seek_ = other.use_seek_; + seek_frame_ = other.seek_frame_; + seek_mode_ = other.seek_mode_; + seek_crit_ = other.seek_crit_; + out_frame_pts_ = other.out_frame_pts_; + out_frame_duration_ = other.out_frame_duration_; + num_frames_decoded_ = other.num_frames_decoded_; + return *this; + } + + /* Will be set to false when not seeking, true otherwise; + */ + bool use_seek_; + + /* Frame we want to get. Set by user. + * Shall be set to frame timestamp in case seek is done by time. + */ + uint64_t seek_frame_; + + /* Mode in which we seek. */ + SeekMode seek_mode_; + + /* Criteria by which we seek. */ + SeekCriteria seek_crit_; + + /* PTS of frame found after seek. */ + int64_t out_frame_pts_; + + /* Duration of frame found after seek. */ + int64_t out_frame_duration_; + + /* Number of frames that were decoded during seek. */ + uint64_t num_frames_decoded_; +}; + +// Video Demuxer Interface class +class VideoDemuxer +{ +public: + class StreamProvider + { + public: + virtual ~StreamProvider() {} + virtual int GetData(uint8_t* buf, int buf_size) = 0; + virtual size_t GetBufferSize() = 0; + }; + AVCodecID GetCodecID() { return av_video_codec_id_; }; + VideoDemuxer(const char* input_file_path) + : VideoDemuxer(CreateFmtContextUtil(input_file_path)) + {} + VideoDemuxer(StreamProvider* stream_provider) + : VideoDemuxer(CreateFmtContextUtil(stream_provider)) + { + av_io_ctx_ = av_fmt_input_ctx_->pb; + } + ~VideoDemuxer() + { + if(!av_fmt_input_ctx_) + { + return; + } + if(packet_) + { + av_packet_free(&packet_); + } + if(packet_filtered_) + { + av_packet_free(&packet_filtered_); + } + if(av_bsf_ctx_) + { + av_bsf_free(&av_bsf_ctx_); + } + avformat_close_input(&av_fmt_input_ctx_); + if(av_io_ctx_) + { + av_freep(&av_io_ctx_->buffer); + av_freep(&av_io_ctx_); + } + if(data_with_header_) + { + av_free(data_with_header_); + } + } + bool Demux(uint8_t** video, int* video_size, int64_t* pts = nullptr) + { + if(!av_fmt_input_ctx_) + { + return false; + } + *video_size = 0; + if(packet_->data) + { + av_packet_unref(packet_); + } + int ret = 0; + while((ret = av_read_frame(av_fmt_input_ctx_, packet_)) >= 0 && + packet_->stream_index != av_stream_) + { + av_packet_unref(packet_); + } + if(ret < 0) + { + return false; + } + if(is_h264_ || is_hevc_) + { + if(packet_filtered_->data) + { + av_packet_unref(packet_filtered_); + } + if(av_bsf_send_packet(av_bsf_ctx_, packet_) != 0) + { + std::cerr << "ERROR: av_bsf_send_packet failed!" << std::endl; + return false; + } + if(av_bsf_receive_packet(av_bsf_ctx_, packet_filtered_) != 0) + { + std::cerr << "ERROR: av_bsf_receive_packet failed!" << std::endl; + return false; + } + *video = packet_filtered_->data; + *video_size = packet_filtered_->size; + if(packet_filtered_->dts != AV_NOPTS_VALUE) + { + pkt_dts_ = packet_filtered_->dts; + } + else + { + pkt_dts_ = packet_filtered_->pts; + } + if(pts) + { + *pts = + (int64_t)(packet_filtered_->pts * default_time_scale_ * time_base_); + pkt_duration_ = packet_filtered_->duration; + } + } + else + { + if(is_mpeg4_ && (frame_count_ == 0)) + { + int ext_data_size = + av_fmt_input_ctx_->streams[av_stream_]->codecpar->extradata_size; + if(ext_data_size > 0) + { + data_with_header_ = (uint8_t*) av_malloc( + ext_data_size + packet_->size - 3 * sizeof(uint8_t)); + if(!data_with_header_) + { + std::cerr << "ERROR: av_malloc failed!" << std::endl; + return false; + } + memcpy(data_with_header_, + av_fmt_input_ctx_->streams[av_stream_]->codecpar->extradata, + ext_data_size); + memcpy(data_with_header_ + ext_data_size, packet_->data + 3, + packet_->size - 3 * sizeof(uint8_t)); + *video = data_with_header_; + *video_size = ext_data_size + packet_->size - 3 * sizeof(uint8_t); + } + } + else + { + *video = packet_->data; + *video_size = packet_->size; + } + if(packet_->dts != AV_NOPTS_VALUE) + { + pkt_dts_ = packet_->dts; + } + else + { + pkt_dts_ = packet_->pts; + } + if(pts) + { + *pts = (int64_t)(packet_->pts * default_time_scale_ * time_base_); + pkt_duration_ = packet_->duration; + } + } + frame_count_++; + return true; + } + bool Seek(VideoSeekContext& seek_ctx, uint8_t** pp_video, int* video_size) + { + /* !!! IMPORTANT !!! + * Across this function, packet decode timestamp (DTS) values are used to + * compare given timestamp against. This is done because DTS values shall + * monotonically increase during the course of decoding unlike PTS values + * which may be affected by frame reordering due to B frames. + */ + + if(!is_seekable_) + { + std::cerr << "ERROR: Seek isn't supported for this input." << std::endl; + return false; + } + + if(IsVFR() && (SEEK_CRITERIA_FRAME_NUM == seek_ctx.seek_crit_)) + { + std::cerr << "ERROR: Can't seek by frame number in VFR sequences. Seek by " + "timestamp instead." + << std::endl; + return false; + } + + // Seek for single frame; + auto seek_frame = [&](VideoSeekContext const& seek_ctx, int flags) { + bool seek_backward = true; + int64_t timestamp = 0; + int ret = 0; + + switch(seek_ctx.seek_crit_) + { + case SEEK_CRITERIA_FRAME_NUM: + timestamp = TsFromFrameNumber(seek_ctx.seek_frame_); + ret = av_seek_frame(av_fmt_input_ctx_, av_stream_, timestamp, + seek_backward ? AVSEEK_FLAG_BACKWARD | flags + : flags); + break; + case SEEK_CRITERIA_TIME_STAMP: + timestamp = TsFromTime(seek_ctx.seek_frame_); + ret = av_seek_frame(av_fmt_input_ctx_, av_stream_, timestamp, + seek_backward ? AVSEEK_FLAG_BACKWARD | flags + : flags); + break; + default: std::cerr << "ERROR: Invalid seek mode" << std::endl; ret = -1; + } + + if(ret < 0) + { + throw std::runtime_error("ERROR: seeking for frame"); + } + }; + + // Check if frame satisfies seek conditions; + auto is_seek_done = [&](PacketData& pkt_data, VideoSeekContext const& seek_ctx) { + int64_t target_ts = 0; + + switch(seek_ctx.seek_crit_) + { + case SEEK_CRITERIA_FRAME_NUM: + target_ts = TsFromFrameNumber(seek_ctx.seek_frame_); + break; + case SEEK_CRITERIA_TIME_STAMP: + target_ts = TsFromTime(seek_ctx.seek_frame_); + break; + default: + std::cerr << "ERROR::Invalid seek criteria" << std::endl; + return -1; + } + + if(pkt_dts_ == target_ts) + { + return 0; + } + else if(pkt_dts_ > target_ts) + { + return 1; + } + else + { + return -1; + }; + }; + + /* This will seek for exact frame number; + * Note that decoder may not be able to decode such frame; */ + auto seek_for_exact_frame = [&](PacketData& pkt_data, + VideoSeekContext& seek_ctx) { + // Repetititive seek until seek condition is satisfied; + VideoSeekContext tmp_ctx(seek_ctx.seek_frame_); + seek_frame(tmp_ctx, AVSEEK_FLAG_ANY); + + int seek_done = 0; + do + { + if(!Demux(pp_video, video_size, &pkt_data.pts)) + { + throw std::runtime_error("ERROR: Demux failed trying to seek for " + "specified frame number/timestamp"); + } + seek_done = is_seek_done(pkt_data, seek_ctx); + // TODO: one last condition, check for a target too high than available + // for timestamp + if(seek_done > 0) + { // We've gone too far and need to seek backwards; + if((tmp_ctx.seek_frame_--) >= 0) + { + seek_frame(tmp_ctx, AVSEEK_FLAG_ANY); + } + } + else if(seek_done < 0) + { // Need to read more frames until we reach requested number; + tmp_ctx.seek_frame_++; + seek_frame(tmp_ctx, AVSEEK_FLAG_ANY); + } + if(tmp_ctx.seek_frame_ == + seek_ctx.seek_frame_) // if frame 'N' is too far and frame 'N-1' is + // too less from target. Avoids infinite loop + // between N & N-1 + break; + } while(seek_done != 0); + + seek_ctx.out_frame_pts_ = pkt_data.pts; + seek_ctx.out_frame_duration_ = pkt_data.duration = pkt_duration_; + }; + + // Seek for closest key frame in the past; + auto seek_for_prev_key_frame = [&](PacketData& pkt_data, + VideoSeekContext& seek_ctx) { + seek_frame(seek_ctx, AVSEEK_FLAG_BACKWARD); + Demux(pp_video, video_size, &pkt_data.pts); + seek_ctx.num_frames_decoded_ = + static_cast(pkt_data.pts / 1000 * frame_rate_); + seek_ctx.out_frame_pts_ = pkt_data.pts; + seek_ctx.out_frame_duration_ = pkt_data.duration = pkt_duration_; + }; + + PacketData pktData; + pktData.bsl_data = size_t(*pp_video); + pktData.bsl = *video_size; + + switch(seek_ctx.seek_mode_) + { + case SEEK_MODE_EXACT_FRAME: seek_for_exact_frame(pktData, seek_ctx); break; + case SEEK_MODE_PREV_KEY_FRAME: + seek_for_prev_key_frame(pktData, seek_ctx); + break; + default: throw std::runtime_error("ERROR::Unsupported seek mode"); break; + } + + return true; + } + const uint32_t GetWidth() const { return width_; } + const uint32_t GetHeight() const { return height_; } + const uint32_t GetChromaHeight() const { return chroma_height_; } + const uint32_t GetBitDepth() const { return bit_depth_; } + const uint32_t GetBytePerPixel() const { return byte_per_pixel_; } + const uint32_t GetBitRate() const { return bit_rate_; } + const double GetFrameRate() const { return frame_rate_; }; + bool IsVFR() const { return frame_rate_ != avg_frame_rate_; }; + int64_t TsFromTime(double ts_sec) + { + // Convert integer timestamp representation to AV_TIME_BASE and switch to + // fixed_point + auto const ts_tbu = llround(ts_sec * AV_TIME_BASE); + // Rescale the timestamp to value represented in stream base units; + AVRational time_factor = { 1, AV_TIME_BASE }; + return av_rescale_q(ts_tbu, time_factor, + av_fmt_input_ctx_->streams[av_stream_]->time_base); + } + + int64_t TsFromFrameNumber(int64_t frame_num) + { + auto const ts_sec = static_cast(frame_num) / frame_rate_; + return TsFromTime(ts_sec); + } + +private: + VideoDemuxer(AVFormatContext* av_fmt_input_ctx) + : av_fmt_input_ctx_(av_fmt_input_ctx) + { + av_log_set_level(AV_LOG_QUIET); + if(!av_fmt_input_ctx_) + { + std::cerr << "ERROR: av_fmt_input_ctx_ is not vaild!" << std::endl; + return; + } + packet_ = av_packet_alloc(); + packet_filtered_ = av_packet_alloc(); + if(!packet_ || !packet_filtered_) + { + std::cerr << "ERROR: av_packet_alloc failed!" << std::endl; + return; + } + if(avformat_find_stream_info(av_fmt_input_ctx_, nullptr) < 0) + { + std::cerr << "ERROR: avformat_find_stream_info failed!" << std::endl; + return; + } + av_stream_ = av_find_best_stream(av_fmt_input_ctx_, AVMEDIA_TYPE_VIDEO, -1, -1, + nullptr, 0); + if(av_stream_ < 0) + { + std::cerr << "ERROR: av_find_best_stream failed!" << std::endl; + av_packet_free(&packet_); + av_packet_free(&packet_filtered_); + return; + } + av_video_codec_id_ = av_fmt_input_ctx_->streams[av_stream_]->codecpar->codec_id; + width_ = av_fmt_input_ctx_->streams[av_stream_]->codecpar->width; + height_ = av_fmt_input_ctx_->streams[av_stream_]->codecpar->height; + chroma_format_ = + (AVPixelFormat) av_fmt_input_ctx_->streams[av_stream_]->codecpar->format; + bit_rate_ = av_fmt_input_ctx_->streams[av_stream_]->codecpar->bit_rate; + if(av_fmt_input_ctx_->streams[av_stream_]->r_frame_rate.den != 0) + frame_rate_ = static_cast( + av_fmt_input_ctx_->streams[av_stream_]->r_frame_rate.num) / + static_cast( + av_fmt_input_ctx_->streams[av_stream_]->r_frame_rate.den); + if(av_fmt_input_ctx_->streams[av_stream_]->avg_frame_rate.den != 0) + avg_frame_rate_ = + static_cast( + av_fmt_input_ctx_->streams[av_stream_]->avg_frame_rate.num) / + static_cast( + av_fmt_input_ctx_->streams[av_stream_]->avg_frame_rate.den); + + switch(chroma_format_) + { + case AV_PIX_FMT_YUV420P10LE: + case AV_PIX_FMT_GRAY10LE: + bit_depth_ = 10; + chroma_height_ = (height_ + 1) >> 1; + byte_per_pixel_ = 2; + break; + case AV_PIX_FMT_YUV420P12LE: + bit_depth_ = 12; + chroma_height_ = (height_ + 1) >> 1; + byte_per_pixel_ = 2; + break; + case AV_PIX_FMT_YUV444P10LE: + bit_depth_ = 10; + chroma_height_ = height_ << 1; + byte_per_pixel_ = 2; + break; + case AV_PIX_FMT_YUV444P12LE: + bit_depth_ = 12; + chroma_height_ = height_ << 1; + byte_per_pixel_ = 2; + break; + case AV_PIX_FMT_YUV444P: + bit_depth_ = 8; + chroma_height_ = height_ << 1; + byte_per_pixel_ = 1; + break; + case AV_PIX_FMT_YUV420P: + case AV_PIX_FMT_YUVJ420P: + case AV_PIX_FMT_YUVJ422P: + case AV_PIX_FMT_YUVJ444P: + case AV_PIX_FMT_GRAY8: + bit_depth_ = 8; + chroma_height_ = (height_ + 1) >> 1; + byte_per_pixel_ = 1; + break; + default: + chroma_format_ = AV_PIX_FMT_YUV420P; + bit_depth_ = 8; + chroma_height_ = (height_ + 1) >> 1; + byte_per_pixel_ = 1; + } + + AVRational time_base = av_fmt_input_ctx_->streams[av_stream_]->time_base; + time_base_ = av_q2d(time_base); + + is_h264_ = av_video_codec_id_ == AV_CODEC_ID_H264 && + (!strcmp(av_fmt_input_ctx_->iformat->long_name, "QuickTime / MOV") || + !strcmp(av_fmt_input_ctx_->iformat->long_name, "FLV (Flash Video)") || + !strcmp(av_fmt_input_ctx_->iformat->long_name, "Matroska / WebM")); + is_hevc_ = av_video_codec_id_ == AV_CODEC_ID_HEVC && + (!strcmp(av_fmt_input_ctx_->iformat->long_name, "QuickTime / MOV") || + !strcmp(av_fmt_input_ctx_->iformat->long_name, "FLV (Flash Video)") || + !strcmp(av_fmt_input_ctx_->iformat->long_name, "Matroska / WebM")); + is_mpeg4_ = + av_video_codec_id_ == AV_CODEC_ID_MPEG4 && + (!strcmp(av_fmt_input_ctx_->iformat->long_name, "QuickTime / MOV") || + !strcmp(av_fmt_input_ctx_->iformat->long_name, "FLV (Flash Video)") || + !strcmp(av_fmt_input_ctx_->iformat->long_name, "Matroska / WebM")); + + // Check if the input file allow seek functionality. + is_seekable_ = av_fmt_input_ctx_->iformat->read_seek || + av_fmt_input_ctx_->iformat->read_seek2; + + if(is_h264_) + { + const AVBitStreamFilter* bsf = av_bsf_get_by_name("h264_mp4toannexb"); + if(!bsf) + { + std::cerr << "ERROR: av_bsf_get_by_name() failed" << std::endl; + av_packet_free(&packet_); + av_packet_free(&packet_filtered_); + return; + } + if(av_bsf_alloc(bsf, &av_bsf_ctx_) != 0) + { + std::cerr << "ERROR: av_bsf_alloc failed!" << std::endl; + return; + } + avcodec_parameters_copy(av_bsf_ctx_->par_in, + av_fmt_input_ctx_->streams[av_stream_]->codecpar); + if(av_bsf_init(av_bsf_ctx_) < 0) + { + std::cerr << "ERROR: av_bsf_init failed!" << std::endl; + return; + } + } + if(is_hevc_) + { + const AVBitStreamFilter* bsf = av_bsf_get_by_name("hevc_mp4toannexb"); + if(!bsf) + { + std::cerr << "ERROR: av_bsf_get_by_name() failed" << std::endl; + av_packet_free(&packet_); + av_packet_free(&packet_filtered_); + return; + } + if(av_bsf_alloc(bsf, &av_bsf_ctx_) != 0) + { + std::cerr << "ERROR: av_bsf_alloc failed!" << std::endl; + return; + } + avcodec_parameters_copy(av_bsf_ctx_->par_in, + av_fmt_input_ctx_->streams[av_stream_]->codecpar); + if(av_bsf_init(av_bsf_ctx_) < 0) + { + std::cerr << "ERROR: av_bsf_init failed!" << std::endl; + return; + } + } + } + AVFormatContext* CreateFmtContextUtil(StreamProvider* stream_provider) + { + AVFormatContext* ctx = nullptr; + if(!(ctx = avformat_alloc_context())) + { + std::cerr << "ERROR: avformat_alloc_context failed" << std::endl; + return nullptr; + } + uint8_t* avioc_buffer = nullptr; + int avioc_buffer_size = stream_provider->GetBufferSize(); + avioc_buffer = (uint8_t*) av_malloc(avioc_buffer_size); + if(!avioc_buffer) + { + std::cerr << "ERROR: av_malloc failed!" << std::endl; + return nullptr; + } + av_io_ctx_ = avio_alloc_context(avioc_buffer, avioc_buffer_size, 0, + stream_provider, &ReadPacket, nullptr, nullptr); + if(!av_io_ctx_) + { + std::cerr << "ERROR: avio_alloc_context failed!" << std::endl; + return nullptr; + } + ctx->pb = av_io_ctx_; + + if(avformat_open_input(&ctx, nullptr, nullptr, nullptr) != 0) + { + std::cerr << "ERROR: avformat_open_input failed!" << std::endl; + return nullptr; + } + return ctx; + } + AVFormatContext* CreateFmtContextUtil(const char* input_file_path) + { + avformat_network_init(); + AVFormatContext* ctx = nullptr; + if(avformat_open_input(&ctx, input_file_path, nullptr, nullptr) != 0) + { + std::cerr << "ERROR: avformat_open_input failed!" << std::endl; + return nullptr; + } + return ctx; + } + static int ReadPacket(void* data, uint8_t* buf, int buf_size) + { + return ((StreamProvider*) data)->GetData(buf, buf_size); + } + AVFormatContext* av_fmt_input_ctx_ = nullptr; + AVIOContext* av_io_ctx_ = nullptr; + AVPacket* packet_ = nullptr; + AVPacket* packet_filtered_ = nullptr; + AVBSFContext* av_bsf_ctx_ = nullptr; + AVCodecID av_video_codec_id_; + AVPixelFormat chroma_format_; + double frame_rate_ = 0.0; + double avg_frame_rate_ = 0.0; + uint8_t* data_with_header_ = nullptr; + int av_stream_ = 0; + bool is_h264_ = false; + bool is_hevc_ = false; + bool is_mpeg4_ = false; + bool is_seekable_ = false; + int64_t default_time_scale_ = 1000; + double time_base_ = 0.0; + uint32_t frame_count_ = 0; + uint32_t width_ = 0; + uint32_t height_ = 0; + uint32_t chroma_height_ = 0; + uint32_t bit_depth_ = 0; + uint32_t byte_per_pixel_ = 0; + uint32_t bit_rate_ = 0; + // used for Seek Exact frame + int64_t pkt_dts_ = 0; + int64_t pkt_duration_ = 0; +}; + +static inline rocDecVideoCodec +AVCodec2RocDecVideoCodec(AVCodecID av_codec) +{ + switch(av_codec) + { + case AV_CODEC_ID_MPEG1VIDEO: return rocDecVideoCodec_MPEG1; + case AV_CODEC_ID_MPEG2VIDEO: return rocDecVideoCodec_MPEG2; + case AV_CODEC_ID_MPEG4: return rocDecVideoCodec_MPEG4; + case AV_CODEC_ID_H264: return rocDecVideoCodec_AVC; + case AV_CODEC_ID_HEVC: return rocDecVideoCodec_HEVC; + case AV_CODEC_ID_VP8: return rocDecVideoCodec_VP8; + case AV_CODEC_ID_VP9: return rocDecVideoCodec_VP9; + case AV_CODEC_ID_MJPEG: return rocDecVideoCodec_JPEG; + case AV_CODEC_ID_AV1: return rocDecVideoCodec_AV1; + default: return rocDecVideoCodec_NumCodecs; + } +} diff --git a/examples/videodecode/videodecodebatch.cpp b/examples/videodecode/videodecodebatch.cpp new file mode 100644 index 00000000..89920f35 --- /dev/null +++ b/examples/videodecode/videodecodebatch.cpp @@ -0,0 +1,729 @@ +/* +Copyright (c) 2025 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#if __cplusplus >= 201703L && __has_include() +# include +#else +# include +#endif +#include "common.h" +#include "roc_video_dec.h" +#include "video_demuxer.h" + +class ThreadPool +{ +public: + ThreadPool(int nthreads) + : shutdown_(false) + { + // Create the specified number of threads + threads_.reserve(nthreads); + for(int i = 0; i < nthreads; ++i) + threads_.emplace_back(std::bind(&ThreadPool::ThreadEntry, this, i)); + } + + ~ThreadPool() {} + + void JoinThreads() + { + { + // Unblock any threads and tell them to stop + std::unique_lock lock(mutex_); + shutdown_ = true; + cond_var_.notify_all(); + } + + // Wait for all threads to stop + for(auto& thread : threads_) + thread.join(); + } + + void ExecuteJob(std::function func) + { + // Place a job on the queue and unblock a thread + std::unique_lock lock(mutex_); + decode_jobs_queue_.emplace(std::move(func)); + cond_var_.notify_one(); + } + +protected: + void ThreadEntry(int i) + { + std::function execute_decode_job; + + while(true) + { + { + std::unique_lock lock(mutex_); + cond_var_.wait(lock, + [&] { return shutdown_ || !decode_jobs_queue_.empty(); }); + if(decode_jobs_queue_.empty()) + { + // No jobs to do; shutting down + return; + } + + execute_decode_job = std::move(decode_jobs_queue_.front()); + decode_jobs_queue_.pop(); + } + + // Execute the decode job without holding any locks + execute_decode_job(); + } + } + + std::mutex mutex_; + std::condition_variable cond_var_; + bool shutdown_; + std::queue> decode_jobs_queue_; + std::vector threads_; +}; + +struct DecoderInfo +{ + int dec_device_id; + std::unique_ptr viddec; + std::uint32_t bit_depth; + rocDecVideoCodec rocdec_codec_id; + std::atomic_bool decoding_complete; + + DecoderInfo() + : dec_device_id(0) + , viddec(nullptr) + , bit_depth(8) + , decoding_complete(false) + {} +}; + +void +DecProc(RocVideoDecoder* p_dec, VideoDemuxer* demuxer, int* pn_frame, double* pn_fps, + std::atomic_bool& decoding_complete, bool& b_dump_output_frames, + std::string& output_file_name, OutputSurfaceMemoryType mem_type) +{ + int n_video_bytes = 0, n_frame_returned = 0, n_frame = 0; + uint8_t * p_video = nullptr, *p_frame = nullptr; + int64_t pts = 0; + double total_dec_time = 0.0; + OutputSurfaceInfo* surf_info; + auto start_time = std::chrono::high_resolution_clock::now(); + do + { + demuxer->Demux(&p_video, &n_video_bytes, &pts); + n_frame_returned = p_dec->DecodeFrame(p_video, n_video_bytes, 0, pts); + n_frame += n_frame_returned; + if(b_dump_output_frames && mem_type != OUT_SURFACE_MEM_NOT_MAPPED) + { + if(n_frame_returned) + { + if(!p_dec->GetOutputSurfaceInfo(&surf_info)) + { + std::cerr << "Error: Failed to get Output Surface Info!" << std::endl; + break; + } + } + for(int i = 0; i < n_frame_returned; i++) + { + p_frame = p_dec->GetFrame(&pts); + p_dec->SaveFrameToFile(output_file_name, p_frame, surf_info); + // release frame + p_dec->ReleaseFrame(pts); + } + } + } while(n_video_bytes); + n_frame += p_dec->GetNumOfFlushedFrames(); + + auto end_time = std::chrono::high_resolution_clock::now(); + auto time_per_decode = + std::chrono::duration(end_time - start_time).count(); + + // Calculate average decoding time + total_dec_time = time_per_decode; + double average_decoding_time = total_dec_time / n_frame; + double n_fps = 1000 / average_decoding_time; + *pn_fps = n_fps; + *pn_frame = n_frame; + p_dec->ResetSaveFrameToFile(); + decoding_complete = true; +} + +void +ShowHelpAndExit(const char* option = NULL) +{ + std::cout + << "Options:" << std::endl + << "-i " << std::endl + << "-t Number of threads ( 1 >= n_thread <= 64) - optional; default: 4" + << std::endl + << "-d Device ID (>= 0) - optional; default: 0" << std::endl + << "-o Directory for output YUV files - optional" << std::endl + << "-m output_surface_memory_type - decoded surface memory; optional; default - 3" + << " [0 : OUT_SURFACE_MEM_DEV_INTERNAL/ 1 : OUT_SURFACE_MEM_DEV_COPIED/ 2 : " + "OUT_SURFACE_MEM_HOST_COPIED/ 3 : OUT_SURFACE_MEM_NOT_MAPPED]" + << std::endl + << "-disp_delay -specify the number of frames to be delayed for display; " + "optional; default: 1" + << std::endl; + exit(0); +} + +void +ParseCommandLine(std::string& input_folder_path, std::string& output_folder_path, + int& device_id, int& n_thread, bool& b_dump_output_frames, + OutputSurfaceMemoryType& mem_type, int& disp_delay, int argc, + char* argv[]) +{ + // Parse command-line arguments + if(argc <= 1) + { + ShowHelpAndExit(); + } + for(int i = 1; i < argc; i++) + { + if(!strcmp(argv[i], "-h")) + { + ShowHelpAndExit(); + } + if(!strcmp(argv[i], "-i")) + { + if(++i == argc) + { + ShowHelpAndExit("-i"); + } + input_folder_path = argv[i]; + continue; + } + if(!strcmp(argv[i], "-t")) + { + if(++i == argc) + { + ShowHelpAndExit("-t"); + } + n_thread = atoi(argv[i]); + if(n_thread <= 0 || n_thread > 64) + { + ShowHelpAndExit(argv[i]); + } + continue; + } + if(!strcmp(argv[i], "-d")) + { + if(++i == argc) + { + ShowHelpAndExit("-d"); + } + device_id = atoi(argv[i]); + if(device_id < 0) + { + ShowHelpAndExit(argv[i]); + } + continue; + } + if(!strcmp(argv[i], "-o")) + { + if(++i == argc) + { + ShowHelpAndExit("-o"); + } + output_folder_path = argv[i]; +#if __cplusplus >= 201703L && __has_include() + if(std::filesystem::is_directory(output_folder_path)) + { + std::filesystem::remove_all(output_folder_path); + } + std::filesystem::create_directory(output_folder_path); +#else + if(std::experimental::filesystem::is_directory(output_folder_path)) + { + std::experimental::filesystem::remove_all(output_folder_path); + } + std::experimental::filesystem::create_directory(output_folder_path); +#endif + b_dump_output_frames = true; + continue; + } + if(!strcmp(argv[i], "-m")) + { + if(++i == argc) + { + ShowHelpAndExit("-m"); + } + mem_type = static_cast(atoi(argv[i])); + continue; + } + if(!strcmp(argv[i], "-disp_delay")) + { + if(++i == argc) + { + ShowHelpAndExit("-disp_delay"); + } + disp_delay = atoi(argv[i]); + continue; + } + ShowHelpAndExit(argv[i]); + } +} + +int +main(int argc, char** argv) +{ + std::string input_folder_path, output_folder_path; + int device_id = 0, num_files = 0; + int n_thread = 4; + int disp_delay = 1; + Rect* p_crop_rect = nullptr; + bool b_extract_sei_messages = false; + OutputSurfaceMemoryType mem_type = + OUT_SURFACE_MEM_DEV_INTERNAL; // set to decode only for performance + bool b_force_zero_latency = false, b_dump_output_frames = false; + std::vector input_file_names; + ParseCommandLine(input_folder_path, output_folder_path, device_id, n_thread, + b_dump_output_frames, mem_type, disp_delay, argc, argv); + + try + { +#if __cplusplus >= 201703L && __has_include() + for(const auto& entry : std::filesystem::directory_iterator(input_folder_path)) + { +#else + for(const auto& entry : + std::experimental::filesystem::directory_iterator(input_folder_path)) + { +#endif + input_file_names.push_back(entry.path()); + num_files++; + } + + std::vector output_file_names(num_files); + n_thread = ((n_thread > num_files) ? num_files : n_thread); + int num_devices = 0, sd = 0; + hipError_t hip_status = hipSuccess; + hipDeviceProp_t hip_dev_prop; + std::string gcn_arch_name; + hip_status = hipGetDeviceCount(&num_devices); + if(hip_status != hipSuccess) + { + std::cout << "ERROR: hipGetDeviceCount failed! (" << hip_status << ")" + << std::endl; + return -1; + } + if(num_devices < 1) + { + ERR("ERROR: didn't find any GPU!"); + return -1; + } + + hip_status = hipGetDeviceProperties(&hip_dev_prop, device_id); + if(hip_status != hipSuccess) + { + ERR("ERROR: hipGetDeviceProperties for device (" + TOSTR(device_id) + + " ) failed! (" + hipGetErrorName(hip_status) + ")"); + return -1; + } + + gcn_arch_name = hip_dev_prop.gcnArchName; + std::size_t pos = gcn_arch_name.find_first_of(":"); + std::string gcn_arch_name_base = + (pos != std::string::npos) ? gcn_arch_name.substr(0, pos) : gcn_arch_name; + + // gfx90a has two GCDs as two separate devices + if(!gcn_arch_name_base.compare("gfx90a") && num_devices > 1) + { + sd = 1; + } + + std::string device_name; + int pci_bus_id, pci_domain_id, pci_device_id; + double total_fps = 0; + int n_total = 0; + std::vector v_fps; + std::vector v_frame; + v_fps.resize(num_files, 0); + v_frame.resize(num_files, 0); + int hip_vis_dev_count = 0; + GetEnvVar("HIP_VISIBLE_DEVICES", hip_vis_dev_count); + + std::cout << "info: Number of threads: " << n_thread << std::endl; + + std::vector> v_demuxer(num_files); + std::unique_ptr dec_8bit_avc(nullptr), dec_8bit_hevc(nullptr), + dec_10bit_hevc(nullptr), dec_8bit_av1(nullptr), dec_10bit_av1(nullptr), + dec_8bit_vp9(nullptr), dec_10bit_vp9(nullptr); + std::vector> v_dec_info; + ThreadPool thread_pool(n_thread); + + // reconfig parameters + ReconfigParams reconfig_params = { 0 }; + ReconfigDumpFileStruct reconfig_user_struct = { 0 }; + reconfig_params.p_fn_reconfigure_flush = ReconfigureFlushCallback; + if(!b_dump_output_frames) + { + reconfig_user_struct.b_dump_frames_to_file = false; + reconfig_params.reconfig_flush_mode = RECONFIG_FLUSH_MODE_DUMP_TO_FILE; + } + else + { + reconfig_user_struct.b_dump_frames_to_file = true; + reconfig_params.reconfig_flush_mode = RECONFIG_FLUSH_MODE_NONE; + } + reconfig_params.p_reconfig_user_struct = &reconfig_user_struct; + + for(int i = 0; i < num_files; i++) + { + std::unique_ptr demuxer( + new VideoDemuxer(input_file_names[i].c_str())); + v_demuxer[i] = std::move(demuxer); + std::size_t found_file = input_file_names[i].find_last_of('/'); + input_file_names[i] = input_file_names[i].substr(found_file + 1); + if(b_dump_output_frames) + { + std::size_t found_ext = input_file_names[i].find_last_of('.'); + std::string path = output_folder_path + "/output_" + + input_file_names[i].substr(0, found_ext) + ".yuv"; + output_file_names[i] = path; + } + } + + for(int i = 0; i < n_thread; i++) + { + v_dec_info.emplace_back(std::make_unique()); + if(!hip_vis_dev_count) + { + if(device_id % 2 == 0) + { + v_dec_info[i]->dec_device_id = + (i % 2 == 0) ? device_id : device_id + sd; + } + else + v_dec_info[i]->dec_device_id = + (i % 2 == 0) ? device_id - sd : device_id; + } + else + { + v_dec_info[i]->dec_device_id = i % hip_vis_dev_count; + } + + v_dec_info[i]->rocdec_codec_id = + AVCodec2RocDecVideoCodec(v_demuxer[i]->GetCodecID()); + v_dec_info[i]->bit_depth = v_demuxer[i]->GetBitDepth(); + if(v_dec_info[i]->bit_depth == 8) + { + if(v_dec_info[i]->rocdec_codec_id == rocDecVideoCodec_AVC) + { + std::unique_ptr dec_8bit_avc(new RocVideoDecoder( + v_dec_info[i]->dec_device_id, mem_type, + v_dec_info[i]->rocdec_codec_id, b_force_zero_latency, p_crop_rect, + b_extract_sei_messages, disp_delay)); + v_dec_info[i]->viddec = std::move(dec_8bit_avc); + } + else if(v_dec_info[i]->rocdec_codec_id == rocDecVideoCodec_HEVC) + { + std::unique_ptr dec_8bit_hevc(new RocVideoDecoder( + v_dec_info[i]->dec_device_id, mem_type, + v_dec_info[i]->rocdec_codec_id, b_force_zero_latency, p_crop_rect, + b_extract_sei_messages, disp_delay)); + v_dec_info[i]->viddec = std::move(dec_8bit_hevc); + } + else if(v_dec_info[i]->rocdec_codec_id == rocDecVideoCodec_AV1) + { + std::unique_ptr dec_8bit_av1(new RocVideoDecoder( + v_dec_info[i]->dec_device_id, mem_type, + v_dec_info[i]->rocdec_codec_id, b_force_zero_latency, p_crop_rect, + b_extract_sei_messages, disp_delay)); + v_dec_info[i]->viddec = std::move(dec_8bit_av1); + } + else if(v_dec_info[i]->rocdec_codec_id == rocDecVideoCodec_VP9) + { + std::unique_ptr dec_8bit_vp9(new RocVideoDecoder( + v_dec_info[i]->dec_device_id, mem_type, + v_dec_info[i]->rocdec_codec_id, b_force_zero_latency, p_crop_rect, + b_extract_sei_messages, disp_delay)); + v_dec_info[i]->viddec = std::move(dec_8bit_vp9); + } + else + { + ERR("ERROR: codec type is not supported!"); + return -1; + } + } + else + { // bit depth = 10bit + if(v_dec_info[i]->rocdec_codec_id == rocDecVideoCodec_HEVC) + { + std::unique_ptr dec_10bit_hevc(new RocVideoDecoder( + v_dec_info[i]->dec_device_id, mem_type, + v_dec_info[i]->rocdec_codec_id, b_force_zero_latency, p_crop_rect, + b_extract_sei_messages, disp_delay)); + v_dec_info[i]->viddec = std::move(dec_10bit_hevc); + } + else if(v_dec_info[i]->rocdec_codec_id == rocDecVideoCodec_AV1) + { + std::unique_ptr dec_10bit_av1(new RocVideoDecoder( + v_dec_info[i]->dec_device_id, mem_type, + v_dec_info[i]->rocdec_codec_id, b_force_zero_latency, p_crop_rect, + b_extract_sei_messages, disp_delay)); + v_dec_info[i]->viddec = std::move(dec_10bit_av1); + } + else if(v_dec_info[i]->rocdec_codec_id == rocDecVideoCodec_VP9) + { + std::unique_ptr dec_10bit_vp9(new RocVideoDecoder( + v_dec_info[i]->dec_device_id, mem_type, + v_dec_info[i]->rocdec_codec_id, b_force_zero_latency, p_crop_rect, + b_extract_sei_messages, disp_delay)); + v_dec_info[i]->viddec = std::move(dec_10bit_vp9); + } + else + { + ERR("ERROR: codec type is not supported!"); + return -1; + } + } + + v_dec_info[i]->viddec->GetDeviceinfo(device_name, gcn_arch_name, pci_bus_id, + pci_domain_id, pci_device_id); + std::cout << "info: decoding " << input_file_names[i] << " using GPU device " + << v_dec_info[i]->dec_device_id << " - " << device_name << "[" + << gcn_arch_name << "] on PCI bus " << std::setfill('0') + << std::setw(2) << std::right << std::hex << pci_bus_id << ":" + << std::setfill('0') << std::setw(2) << std::right << std::hex + << pci_domain_id << "." << pci_device_id << std::dec << std::endl; + } + + std::mutex mutex; + + for(int j = 0; j < num_files; j++) + { + int thread_idx = j % n_thread; + if(j >= n_thread) + { + { + std::unique_lock lock(mutex); + while(!v_dec_info[thread_idx]->decoding_complete) + ; + v_dec_info[thread_idx]->decoding_complete = false; + } + uint32_t bit_depth = v_demuxer[j]->GetBitDepth(); + rocDecVideoCodec codec_id = + AVCodec2RocDecVideoCodec(v_demuxer[j]->GetCodecID()); + if(v_dec_info[thread_idx]->bit_depth != bit_depth || + v_dec_info[thread_idx]->rocdec_codec_id != codec_id) + { + if(bit_depth == 8) + { // can be HEVC or H.264 or AV1 + if(dec_8bit_avc == nullptr && codec_id == rocDecVideoCodec_AVC) + { + std::unique_ptr dec_8bit_avc( + new RocVideoDecoder(v_dec_info[thread_idx]->dec_device_id, + mem_type, codec_id, + b_force_zero_latency, p_crop_rect, + b_extract_sei_messages, disp_delay)); + v_dec_info[thread_idx]->viddec = std::move(dec_8bit_avc); + } + else if(dec_8bit_hevc == nullptr && + codec_id == rocDecVideoCodec_HEVC) + { + std::unique_ptr dec_8bit_hevc( + new RocVideoDecoder(v_dec_info[thread_idx]->dec_device_id, + mem_type, codec_id, + b_force_zero_latency, p_crop_rect, + b_extract_sei_messages, disp_delay)); + v_dec_info[thread_idx]->viddec = std::move(dec_8bit_hevc); + } + else if(dec_8bit_av1 == nullptr && + codec_id == rocDecVideoCodec_AV1) + { + std::unique_ptr dec_8bit_av1( + new RocVideoDecoder(v_dec_info[thread_idx]->dec_device_id, + mem_type, codec_id, + b_force_zero_latency, p_crop_rect, + b_extract_sei_messages, disp_delay)); + v_dec_info[thread_idx]->viddec = std::move(dec_8bit_av1); + } + else if(dec_8bit_av1 == nullptr && + codec_id == rocDecVideoCodec_VP9) + { + std::unique_ptr dec_8bit_vp9( + new RocVideoDecoder(v_dec_info[thread_idx]->dec_device_id, + mem_type, codec_id, + b_force_zero_latency, p_crop_rect, + b_extract_sei_messages, disp_delay)); + v_dec_info[thread_idx]->viddec = std::move(dec_8bit_vp9); + } + else + { + if(codec_id == rocDecVideoCodec_AVC) + { + v_dec_info[thread_idx]->viddec.swap(dec_8bit_avc); + } + else if(codec_id == rocDecVideoCodec_HEVC) + { + v_dec_info[thread_idx]->viddec.swap(dec_8bit_hevc); + } + else if(codec_id == rocDecVideoCodec_AV1) + { + v_dec_info[thread_idx]->viddec.swap(dec_8bit_av1); + } + else if(codec_id == rocDecVideoCodec_VP9) + { + v_dec_info[thread_idx]->viddec.swap(dec_8bit_vp9); + } + else + { + ERR("ERROR: codec type is not supported!"); + return -1; + } + } + v_dec_info[thread_idx]->bit_depth = bit_depth; + v_dec_info[thread_idx]->rocdec_codec_id = codec_id; + } + else + { // bit_depth = 10bit; HEVC or AV1 + if(dec_10bit_hevc == nullptr && codec_id == rocDecVideoCodec_HEVC) + { + std::unique_ptr dec_10bit_hevc( + new RocVideoDecoder(v_dec_info[thread_idx]->dec_device_id, + mem_type, codec_id, + b_force_zero_latency, p_crop_rect, + b_extract_sei_messages, disp_delay)); + v_dec_info[thread_idx]->viddec = std::move(dec_10bit_hevc); + } + else if(dec_10bit_av1 == nullptr && + codec_id == rocDecVideoCodec_AV1) + { + std::unique_ptr dec_10bit_av1( + new RocVideoDecoder(v_dec_info[thread_idx]->dec_device_id, + mem_type, codec_id, + b_force_zero_latency, p_crop_rect, + b_extract_sei_messages, disp_delay)); + v_dec_info[thread_idx]->viddec = std::move(dec_10bit_av1); + } + else if(dec_10bit_vp9 == nullptr && + codec_id == rocDecVideoCodec_VP9) + { + std::unique_ptr dec_10bit_vp9( + new RocVideoDecoder(v_dec_info[thread_idx]->dec_device_id, + mem_type, codec_id, + b_force_zero_latency, p_crop_rect, + b_extract_sei_messages, disp_delay)); + v_dec_info[thread_idx]->viddec = std::move(dec_10bit_vp9); + } + else + { + if(codec_id == rocDecVideoCodec_HEVC) + { + v_dec_info[thread_idx]->viddec.swap(dec_10bit_hevc); + } + else if(codec_id == rocDecVideoCodec_AV1) + { + v_dec_info[thread_idx]->viddec.swap(dec_10bit_av1); + } + else if(codec_id == rocDecVideoCodec_VP9) + { + v_dec_info[thread_idx]->viddec.swap(dec_10bit_vp9); + } + else + { + ERR("ERROR: codec type is not supported!"); + return -1; + } + } + v_dec_info[thread_idx]->bit_depth = bit_depth; + v_dec_info[thread_idx]->rocdec_codec_id = codec_id; + } + } + v_dec_info[thread_idx]->viddec->GetDeviceinfo( + device_name, gcn_arch_name, pci_bus_id, pci_domain_id, pci_device_id); + std::cout << "info: decoding " << input_file_names[j] + << " using GPU device " << v_dec_info[thread_idx]->dec_device_id + << " - " << device_name << "[" << gcn_arch_name + << "] on PCI bus " << std::setfill('0') << std::setw(2) + << std::right << std::hex << pci_bus_id << ":" + << std::setfill('0') << std::setw(2) << std::right << std::hex + << pci_domain_id << "." << pci_device_id << std::dec + << std::endl; + } + if(!v_dec_info[thread_idx]->viddec->CodecSupported( + v_dec_info[thread_idx]->dec_device_id, + v_dec_info[thread_idx]->rocdec_codec_id, + v_dec_info[thread_idx]->bit_depth)) + { + std::cerr << "Codec not supported on GPU, skipping this file!" + << std::endl; + v_dec_info[thread_idx]->decoding_complete = true; + continue; + } + thread_pool.ExecuteJob( + std::bind(DecProc, v_dec_info[thread_idx]->viddec.get(), + v_demuxer[j].get(), &v_frame[j], &v_fps[j], + std::ref(v_dec_info[thread_idx]->decoding_complete), + b_dump_output_frames, output_file_names[j], mem_type)); + } + + thread_pool.JoinThreads(); + for(int i = 0; i < num_files; i++) + { + total_fps += + v_fps[i] * static_cast(n_thread) / static_cast(num_files); + n_total += v_frame[i]; + } + if(!b_dump_output_frames) + { + std::cout << "info: Total frame decoded: " << n_total << std::endl; + std::cout << "info: avg decoding time per frame: " << 1000 / total_fps + << " ms" << std::endl; + std::cout << "info: avg FPS: " << total_fps << std::endl; + } + else + { + if(mem_type == OUT_SURFACE_MEM_NOT_MAPPED) + { + std::cout << "info: saving frames with -m 3 option is not supported!" + << std::endl; + } + else + { + for(int i = 0; i < num_files; i++) + std::cout << "info: saved frames into " << output_file_names[i] + << std::endl; + } + } + + } catch(const std::exception& ex) + { + std::cout << ex.what() << std::endl; + } + + return 0; +} \ No newline at end of file diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 5f24137e..6272de84 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -23,5 +23,6 @@ include(${CMAKE_CURRENT_LIST_DIR}/rocprof-sys-overflow-tests.cmake) include(${CMAKE_CURRENT_LIST_DIR}/rocprof-sys-annotate-tests.cmake) include(${CMAKE_CURRENT_LIST_DIR}/rocprof-sys-causal-tests.cmake) include(${CMAKE_CURRENT_LIST_DIR}/rocprof-sys-python-tests.cmake) +include(${CMAKE_CURRENT_LIST_DIR}/rocprof-sys-videodecode-tests.cmake) add_subdirectory(source) diff --git a/tests/rocprof-sys-videodecode-tests.cmake b/tests/rocprof-sys-videodecode-tests.cmake new file mode 100644 index 00000000..d109b533 --- /dev/null +++ b/tests/rocprof-sys-videodecode-tests.cmake @@ -0,0 +1,20 @@ +# -------------------------------------------------------------------------------------- # +# +# video decode tests +# +# -------------------------------------------------------------------------------------- # + +rocprofiler_systems_add_test( + SKIP_BASELINE SKIP_RUNTIME SKIP_REWRITE + NAME videodecode + TARGET videodecode + GPU ON + RUN_ARGS -i ${ROCmVersion_DIR}/share/rocdecode/video/ -t 2 + LABELS "videodecode") + +rocprofiler_systems_add_validation_test( + NAME videodecode-sampling + PERFETTO_METRIC "host" + PERFETTO_FILE "perfetto-trace.proto" + LABELS "videodecode" + ARGS -l videodecode -c 1 -d 0 --counter-names "GPU VCN Activity") diff --git a/tests/validate-perfetto-proto.py b/tests/validate-perfetto-proto.py index 2d991042..01d59cb1 100755 --- a/tests/validate-perfetto-proto.py +++ b/tests/validate-perfetto-proto.py @@ -101,6 +101,13 @@ def validate_perfetto(data, labels, counts, depths): default=[], nargs="*", ) + parser.add_argument( + "--counter-names", + type=str, + help="Require counter name in the traces", + default=[], + nargs="*", + ) args = parser.parse_args() @@ -175,6 +182,18 @@ def validate_perfetto(data, labels, counts, depths): if key_count != count: ret = 1 + for counter_name in args.counter_names: + sum_counter_values = tp.query( + f"""SELECT SUM(counter.value) AS total_value FROM counter_track JOIN counter ON + counter.track_id = counter_track.id WHERE counter_track.name LIKE + '{counter_name}%'""" + ) + total_value = 0 + for row in sum_counter_values: + total_value = row.total_value if row.total_value is not None else -1 + if total_value < 0: + ret = 1 + if ret == 0: print(f"{args.input} validated") else: