diff --git a/CMakeLists.txt b/CMakeLists.txt index ac8484172b..aebd984989 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -68,6 +68,9 @@ include(ROCMCreatePackage) include(CheckCXXCompilerFlag) include(ROCMHeaderWrapper) +# Build library with Beta APIs +add_definitions("-DMIOPEN_BETA_API=1") + set(MIOPEN_ENABLE_AI_IMMED_MODE_FALLBACK On CACHE BOOL "Enable AI-based fallback for Immediate Mode") set(MIOPEN_ENABLE_AI_KERNEL_TUNING On CACHE BOOL "Enable AI heuristic for kernel tuning") set(MIOPEN_ENABLE_SQLITE On CACHE BOOL "") diff --git a/docs/.sphinx/requirements.txt b/docs/.sphinx/requirements.txt index f358d19826..eb52b0503e 100644 --- a/docs/.sphinx/requirements.txt +++ b/docs/.sphinx/requirements.txt @@ -40,7 +40,7 @@ fastjsonschema==2.16.3 # via rocm-docs-core gitdb==4.0.10 # via gitpython -gitpython==3.1.35 +gitpython==3.1.37 # via rocm-docs-core idna==3.4 # via requests diff --git a/docs/datatypes.md b/docs/datatypes.md index 1a46e5fd62..f5d8f28931 100644 --- a/docs/datatypes.md +++ b/docs/datatypes.md @@ -10,7 +10,7 @@ typedef enum { miopenFloat = 1, miopenInt32 = 2, miopenInt8 = 3, - miopenInt8x4 = 4, + /* Value 4 is reserved. */ miopenBFloat16 = 5, } miopenDataType_t; ``` @@ -22,7 +22,6 @@ Type descriptions: * `miopenFloat` - 32-bit floating point * `miopenInt32` - 32-bit integer, used primarily for `int8` convolution outputs * `miopenInt8` - 8-bit integer, currently only supported by `int8` convolution forward path, tensor set, tensor copy, tensor cast, tensor transform, tensor transpose, and im2col. - * `miopenInt8x4` - 8-bit 4 element vector type used primarily with `int8` convolutions forward path. * `miopenBFloat16` - brain float fp-16 (8-bit exponent, 7-bit fraction), currently only supported by convolutions, tensor set, and tensor copy. diff --git a/driver/layernorm_driver.hpp b/driver/layernorm_driver.hpp index 8251472625..e43f3f2f37 100644 --- a/driver/layernorm_driver.hpp +++ b/driver/layernorm_driver.hpp @@ -255,19 +255,19 @@ int LayerNormDriver::AllocateBuffersAndCopy() for(int i = 0; i < in_sz; i++) { - in[i] = RAN_GEN(static_cast(0.0), static_cast(1.0)); + in[i] = prng::gen_A_to_B(static_cast(0.0), static_cast(1.0)); } status = in_dev->ToGPU(q, in.data()); for(int i = 0; i < weight_sz; i++) { - weight[i] = RAN_GEN(static_cast(0.0), static_cast(1.0)); + weight[i] = prng::gen_A_to_B(static_cast(0.0), static_cast(1.0)); } status = weight_dev->ToGPU(q, weight.data()); for(int i = 0; i < bias_sz; i++) { - bias[i] = RAN_GEN(static_cast(0.0), static_cast(1.0)); + bias[i] = prng::gen_A_to_B(static_cast(0.0), static_cast(1.0)); } status = bias_dev->ToGPU(q, bias.data()); diff --git a/include/miopen/miopen.h b/include/miopen/miopen.h index a59181acf3..0c55b3becd 100644 --- a/include/miopen/miopen.h +++ b/include/miopen/miopen.h @@ -112,11 +112,13 @@ typedef enum miopenStatusVersionMismatch = 10, /*!< Version mismatch of the supplied binary data argment. */ } miopenStatus_t; +#ifdef MIOPEN_BETA_API typedef enum { miopenF8RoundingModeStandard = 0, miopenF8RoundingModeStochastic = 1, } miopenF8RoundingMode_t; +#endif /*! @brief Get character string for an error code. * @@ -346,17 +348,21 @@ MIOPEN_DECLARE_OBJECT(miopenReduceTensorDescriptor); */ typedef enum { - miopenHalf = 0, /*!< 16-bit floating point (Fully supported) */ - miopenFloat = 1, /*!< 32-bit floating point (Fully supported) */ - miopenInt32 = 2, /*!< 32-bit int point (Partially supported) */ - miopenInt8 = 3, /*!< 8-bit int point (Partially supported) */ - miopenInt8x4 = - 4, /*!< Pack of four 8-bit int points in NCHW_VECT_C format (Partially supported) */ + miopenHalf = 0, /*!< 16-bit floating point (Fully supported) */ + miopenFloat = 1, /*!< 32-bit floating point (Fully supported) */ + miopenInt32 = 2, /*!< 32-bit int point (Partially supported) */ + miopenInt8 = 3, /*!< 8-bit int point (Partially supported) */ + miopenInt8x4 = 4, /*!< Pack of four Int8 in NCHW_VECT_C format (Support discontinued) */ miopenBFloat16 = 5, /*!< 16-bit binary floating point (8-bit exponent, 7-bit fraction) (Partially supported) */ - miopenDouble = 6, /*!< 64-bit floating point (Partially supported) */ + miopenDouble = 6, /*!< 64-bit floating point (Partially supported) */ +#ifdef MIOPEN_BETA_API miopenFloat8 = 7, - miopenBFloat8 = 8 + miopenBFloat8 = 8, +#else +// miopenReserved1 = 7, +// miopenReserved2 = 8, +#endif } miopenDataType_t; /*! @ingroup tensor @@ -601,11 +607,15 @@ typedef enum MIOPEN_CONVOLUTION_ATTRIB_DETERMINISTIC = 1, /*!< Restrict MIOpen convolutions to kernels which produce numerically deterministic results. 0 - disabled (default), 1 - enabled >*/ +#ifdef MIOPEN_BETA_API MIOPEN_CONVOLUTION_ATTRIB_FP8_ROUNDING_MODE = 2, /*!*/ +#else +// miopenReserved1 = 2, +#endif } miopenConvolutionAttrib_t; /** @addtogroup tensor @@ -723,6 +733,7 @@ MIOPEN_EXPORT miopenStatus_t miopenSetTensorDescriptor(miopenTensorDescriptor_t const int* dimsA, const int* stridesA); +#ifdef MIOPEN_BETA_API /*! @brief Set the tensor cast type * * For tensors where the cast_type attribute is set, the tensor elements would be converted to the @@ -734,6 +745,7 @@ MIOPEN_EXPORT miopenStatus_t miopenSetTensorDescriptor(miopenTensorDescriptor_t */ MIOPEN_EXPORT miopenStatus_t miopenSetTensorCastType(miopenTensorDescriptor_t tensorDesc, miopenDataType_t cast_type); +#endif /*! @brief Set shape of N-dimensional tensor * diff --git a/src/activ/problem_description.cpp b/src/activ/problem_description.cpp index b1e21a76e8..9bc484259d 100644 --- a/src/activ/problem_description.cpp +++ b/src/activ/problem_description.cpp @@ -68,6 +68,8 @@ NetworkConfig ProblemDescription::MakeNetworkConfig() const ss << ((packed) ? "11" : "10"); // + lite bit ss << xDesc.GetType(); + if(const auto ct = xDesc.GetCastType()) + ss << GetDataTypeName(*ct); ss << activDesc.GetMode(); ss << read_unit; ss << MAP_RD; diff --git a/src/check_numerics.cpp b/src/check_numerics.cpp index b0882995e9..50cb0af4af 100644 --- a/src/check_numerics.cpp +++ b/src/check_numerics.cpp @@ -64,7 +64,7 @@ std::string GetKernelName(miopenDataType_t data_type) case miopenBFloat8: return {"check_numerics_bf8"}; case miopenInt32: case miopenInt8: - case miopenInt8x4: + case miopenInt8x4: // Support discontinued. case miopenDouble: default: return {""}; } diff --git a/src/conv/problem_description.cpp b/src/conv/problem_description.cpp index 76c47cbcd9..40fdd2f4c9 100644 --- a/src/conv/problem_description.cpp +++ b/src/conv/problem_description.cpp @@ -127,6 +127,19 @@ void ProblemDescription::BuildConfKey(std::string& conf_key) const ss << 'x' << GetOutLayout(); } ss << 'x' << EncodeDataTypesForKey(GetInDataType(), GetWeightsDataType(), GetOutDataType()); + + std::ostringstream optional; + if(const auto ct = GetInCastType()) + optional << "ci" << GetDataTypeName(*ct); + if(const auto ct = GetWeightsCastType()) + optional << "cw" << GetDataTypeName(*ct); + if(const auto ct = GetOutCastType()) + optional << "co" << GetDataTypeName(*ct); + if(!optional.str().empty()) + { + ss << 'x' << optional.str(); + } + ss << 'x' << PrintDHW('x', GetSpatialDims(), GetPadD(), GetPadH(), GetPadW()); ss << 'x' << PrintDHW( @@ -175,11 +188,18 @@ void ProblemDescription::Serialize(std::ostream& stream) const { // Group count > 1 identifies Group/Depthwise modes. if(GetGroupCount() != 1) - optional << 'g' << GetGroupCount(); + optional << "_g" << GetGroupCount(); + + if(const auto ct = GetInCastType()) + optional << "_ci" << GetDataTypeName(*ct); + if(const auto ct = GetWeightsCastType()) + optional << "_cw" << GetDataTypeName(*ct); + if(const auto ct = GetOutCastType()) + optional << "_co" << GetDataTypeName(*ct); } if(!optional.str().empty()) { - stream << '_' << optional.str(); + stream << optional.str(); } } diff --git a/src/convolution.cpp b/src/convolution.cpp index 403ff777cd..ac7c28fdc4 100644 --- a/src/convolution.cpp +++ b/src/convolution.cpp @@ -358,7 +358,7 @@ ConvolutionDescriptor::GetForwardOutputTensorWithLayout(const TensorDescriptor& std::vector out_strides; tensor_layout_to_strides( out_lens, default_layout, yLayout, xDesc.GetVectorLength(), out_strides); - return {(xDesc.GetType() == miopenInt8 || xDesc.GetType() == miopenInt8x4 + return {(xDesc.GetType() == miopenInt8 ? (yType) : xDesc.GetType()), // TODO: This function overrides the output type with // essentially the input which is incorrect. diff --git a/src/gemm_v2.cpp b/src/gemm_v2.cpp index 19e302f166..fad06870ce 100644 --- a/src/gemm_v2.cpp +++ b/src/gemm_v2.cpp @@ -63,10 +63,6 @@ /// "disabled expansion of recursive macro" injected by rocblas headers. #define AVOID_ROCBLAS_WRAPPERS_204 (MIOPEN_ROCBLAS_VERSION_FLAT >= 2004000) -/// Maintain API compatibility with various rocBLAS version -#define USE_GEMM_FLAGS_PACK_INT8X4 \ - ((MIOPEN_ROCBLAS_VERSION_FLAT >= 2038000) && (MIOPEN_ROCBLAS_VERSION_FLAT < 4000000)) - /// Maintain API compatibility for versions not supporting FP16 alternate implementations #define USE_GEMM_FLAGS_FP16_ALT_IMPL (MIOPEN_ROCBLAS_VERSION_FLAT >= 2043000) /// Some 2.42 versions have rocblas_gemm_flags_fp16_alt_impl, but @@ -110,7 +106,7 @@ static inline rocblas_datatype rocBlasComputeType(const miopen::GemmDescriptor& { // Complex compute types are only supported in newer version of the API assert(desc.dataType == desc.a_cast_type && desc.dataType == desc.b_cast_type); - if(desc.dataType == miopenInt8 || desc.dataType == miopenInt8x4) + if(desc.dataType == miopenInt8) return rocblas_datatype::rocblas_datatype_i32_r; else return rocblas_datatype::rocblas_datatype_f32_r; @@ -413,6 +409,7 @@ miopenStatus_t CallGemm(const Handle& handle, gemm_desc.isColMajor = !gemm_desc.isColMajor; std::swap(A, B); std::swap(a_offset, b_offset); + std::swap(gemm_desc.a_cast_type, gemm_desc.b_cast_type); std::swap(gemm_desc.transA, gemm_desc.transB); std::swap(gemm_desc.m, gemm_desc.n); std::swap(gemm_desc.lda, gemm_desc.ldb); @@ -440,7 +437,6 @@ miopenStatus_t CallGemm(const Handle& handle, switch(gemm_desc.dataType) { - case miopenInt8x4: case miopenInt8: { assert(gemm_desc.k % 4 == 0); @@ -472,12 +468,7 @@ miopenStatus_t CallGemm(const Handle& handle, rocBlasComputeType(gemm_desc), // rocblas_datatype::rocblas_datatype_i32_r, rocblas_gemm_algo::rocblas_gemm_algo_standard, 0, -#if USE_GEMM_FLAGS_PACK_INT8X4 - rocblas_gemm_flags_pack_int8x4 -#else - 0 -#endif - ); + 0); } break; case miopenInt32: break; @@ -621,9 +612,9 @@ miopenStatus_t CallGemm(const Handle& handle, }; break; + case miopenInt8x4: case miopenDouble: { - MIOPEN_THROW(miopenStatusBadParm, - "miopenDouble data type not supported by MIOpenGEMM."); + MIOPEN_THROW(miopenStatusBadParm, "Unknown or unsupported data type."); }; break; } @@ -665,6 +656,7 @@ miopenStatus_t CallGemmStridedBatched(const Handle& handle, gemm_desc.isColMajor = !gemm_desc.isColMajor; std::swap(A, B); std::swap(a_offset, b_offset); + std::swap(gemm_desc.a_cast_type, gemm_desc.b_cast_type); std::swap(gemm_desc.transA, gemm_desc.transB); std::swap(gemm_desc.m, gemm_desc.n); std::swap(gemm_desc.lda, gemm_desc.ldb); @@ -693,7 +685,6 @@ miopenStatus_t CallGemmStridedBatched(const Handle& handle, switch(gemm_desc.dataType) { - case miopenInt8x4: case miopenInt8: { assert(gemm_desc.k % 4 == 0); @@ -729,12 +720,7 @@ miopenStatus_t CallGemmStridedBatched(const Handle& handle, rocblas_datatype::rocblas_datatype_i32_r, rocblas_gemm_algo::rocblas_gemm_algo_standard, 0, -#if USE_GEMM_FLAGS_PACK_INT8X4 - rocblas_gemm_flags_pack_int8x4 -#else - 0 -#endif - ); + 0); } break; case miopenInt32: break; @@ -893,10 +879,10 @@ miopenStatus_t CallGemmStridedBatched(const Handle& handle, break; } + case miopenInt8x4: case miopenDouble: { - MIOPEN_THROW(miopenStatusBadParm, - "miopenDouble data type not supported by MIOpenGEMM."); - } + MIOPEN_THROW(miopenStatusBadParm, "Unknown or unsupported data type."); + }; break; } @@ -938,6 +924,7 @@ miopenStatus_t CallGemmStridedBatchedSequential(const Handle& handle, gemm_desc.isColMajor = !gemm_desc.isColMajor; std::swap(A, B); std::swap(a_offset, b_offset); + std::swap(gemm_desc.a_cast_type, gemm_desc.b_cast_type); std::swap(gemm_desc.transA, gemm_desc.transB); std::swap(gemm_desc.m, gemm_desc.n); std::swap(gemm_desc.lda, gemm_desc.ldb); @@ -968,7 +955,6 @@ miopenStatus_t CallGemmStridedBatchedSequential(const Handle& handle, switch(gemm_desc.dataType) { - case miopenInt8x4: case miopenInt8: { assert(gemm_desc.k % 4 == 0); @@ -1002,12 +988,7 @@ miopenStatus_t CallGemmStridedBatchedSequential(const Handle& handle, rocBlasComputeType(gemm_desc), // rocblas_datatype::rocblas_datatype_i32_r, rocblas_gemm_algo::rocblas_gemm_algo_standard, 0, -#if USE_GEMM_FLAGS_PACK_INT8X4 - rocblas_gemm_flags_pack_int8x4 -#else - 0 -#endif - ); + 0); } } break; @@ -1163,10 +1144,10 @@ miopenStatus_t CallGemmStridedBatchedSequential(const Handle& handle, break; } + case miopenInt8x4: case miopenDouble: { - MIOPEN_THROW(miopenStatusBadParm, - "miopenDouble data type not supported by MIOpenGEMM."); - } + MIOPEN_THROW(miopenStatusBadParm, "Unknown or unsupported data type."); + }; break; } @@ -1196,7 +1177,7 @@ GemmDescriptor CreateGemmDescriptorConvFwd(const TensorDescriptor& wDesc, { #ifndef NDEBUG assert(wDesc.GetType() == xDesc.GetType()); - if(wDesc.GetType() != miopenInt8 && wDesc.GetType() != miopenInt8x4) + if(wDesc.GetType() != miopenInt8) assert(wDesc.GetType() == yDesc.GetType()); #endif @@ -1351,7 +1332,7 @@ GemmDescriptor CreateGemmDescriptorConvCNHWFwd(const TensorDescriptor& wDesc, { #ifndef NDEBUG assert(wDesc.GetType() == xDesc.GetType()); - if(wDesc.GetType() != miopenInt8 && wDesc.GetType() != miopenInt8x4) + if(wDesc.GetType() != miopenInt8) assert(wDesc.GetType() == yDesc.GetType()); #endif @@ -1455,7 +1436,7 @@ GemmDescriptor CreateGemmStridedBatchedDescriptorConv1x1Fwd(const TensorDescript { #ifndef NDEBUG assert(wDesc.GetType() == xDesc.GetType()); - if(wDesc.GetType() != miopenInt8 && wDesc.GetType() != miopenInt8x4) + if(wDesc.GetType() != miopenInt8) assert(wDesc.GetType() == yDesc.GetType()); #else (void)yDesc; diff --git a/src/hip/batched_transpose_sol.cpp b/src/hip/batched_transpose_sol.cpp index 4ffcf34b77..01349775ca 100644 --- a/src/hip/batched_transpose_sol.cpp +++ b/src/hip/batched_transpose_sol.cpp @@ -322,7 +322,11 @@ BatchedTransposeSolution::BatchedTransposeSolution(const ExecutionContext& ctx, uint32_t width_) : data_type(data_type_), batch(batch_), height(height_), width(width_) { - if(data_type == miopenInt8x4 || data_type == miopenDouble) + if(!(data_type == miopenHalf // + || data_type == miopenFloat // + || data_type == miopenInt32 // + || data_type == miopenInt8 // + || data_type == miopenBFloat16)) MIOPEN_THROW("These data type are not supported"); num_cu = ctx.GetStream().GetMaxComputeUnits(); std::size_t data_size = miopen::GetTypeSize(data_type); diff --git a/src/include/miopen/activ/problem_description.hpp b/src/include/miopen/activ/problem_description.hpp index a22a1ba0d8..d5c09850c3 100644 --- a/src/include/miopen/activ/problem_description.hpp +++ b/src/include/miopen/activ/problem_description.hpp @@ -27,8 +27,8 @@ #pragma once #include -#include #include +#include #include diff --git a/src/include/miopen/conv/problem_description.hpp b/src/include/miopen/conv/problem_description.hpp index 438e1d306a..d6b735291e 100644 --- a/src/include/miopen/conv/problem_description.hpp +++ b/src/include/miopen/conv/problem_description.hpp @@ -26,16 +26,17 @@ #pragma once +#include #include -#include #include + +#include +#include +#include + #if MIOPEN_ENABLE_SQLITE #include #endif -#include -#include - -#include namespace miopen { @@ -44,24 +45,6 @@ struct ExecutionContext; std::string EncodeDataTypesForKey(miopenDataType_t in, miopenDataType_t weights, miopenDataType_t out); -inline std::string GetDataTypeName(miopenDataType_t data_type) -{ - switch(data_type) - { - case miopenFloat: return "FP32"; - case miopenHalf: return "FP16"; - case miopenInt8: return "INT8"; - case miopenInt8x4: return "INT8x4"; - case miopenInt32: return "INT32"; - case miopenBFloat16: return "BF16"; - case miopenDouble: return "FP64"; - case miopenFloat8: return "FP8"; - case miopenBFloat8: return "BFP8"; - } - - return "Unknown(" + std::to_string(data_type) + ")"; -} - template constexpr auto GetDHW(unsigned spatial_dims, const std::vector& data) { diff --git a/src/include/miopen/convolution.hpp b/src/include/miopen/convolution.hpp index d94211ed2e..35c494eab2 100644 --- a/src/include/miopen/convolution.hpp +++ b/src/include/miopen/convolution.hpp @@ -407,7 +407,7 @@ struct ConvolutionDescriptor : miopenConvolutionDescriptor friend void from_json(const nlohmann::json& json, ConvolutionDescriptor& conv); private: - void ValidateConvTensors(const ConvTensors& conv_tensors) const; + void ValidateTensors(const ConvTensors& conv_tensors) const; }; void ConvolutionBackwardBias(const Handle& handle, diff --git a/src/include/miopen/datatype.hpp b/src/include/miopen/datatype.hpp index 485bdb3d67..29057e3de9 100644 --- a/src/include/miopen/datatype.hpp +++ b/src/include/miopen/datatype.hpp @@ -53,7 +53,10 @@ inline std::string GetDataType(miopenDataType_t type) type_str = "bfloat16"; } break; - case miopenInt8x4: + case miopenInt8x4: { + type_str = "UNSUPPORTED_TYPE"; + } + break; case miopenInt8: { type_str = "int8_t"; } @@ -137,7 +140,6 @@ inline KernelBuildParameters GetDataTypeKBP(miopenDataType_t type) int use_fp16x8 = 0; int use_fp32 = 0; int use_int8 = 0; - int use_int8x4 = 0; int use_int32 = 0; int use_bfp16 = 0; int use_fp64 = 0; @@ -150,15 +152,14 @@ inline KernelBuildParameters GetDataTypeKBP(miopenDataType_t type) case miopenHalf: use_fp16 = 1; break; case miopenFloat: use_fp32 = 1; break; case miopenInt8: use_int8 = 1; break; - case miopenInt8x4: use_int8x4 = 1; break; case miopenBFloat16: use_bfp16 = 1; break; case miopenInt32: use_int32 = 1; break; case miopenDouble: use_fp64 = 1; break; case miopenFloat8: use_fp8 = 1; break; case miopenBFloat8: use_bfp8 = 1; break; + case miopenInt8x4: // fallthrough default: - MIOPEN_THROW( - "Only float, half, bfloat16, int8, int8x4, float8, bfloat8 data type is supported."); + MIOPEN_THROW("Only float, half, bfloat16, int8, float8, bfloat8 data types are supported."); break; } @@ -168,7 +169,6 @@ inline KernelBuildParameters GetDataTypeKBP(miopenDataType_t type) {"MIOPEN_USE_FP16x8", use_fp16x8}, {"MIOPEN_USE_FP32", use_fp32}, {"MIOPEN_USE_INT8", use_int8}, - {"MIOPEN_USE_INT8x4", use_int8x4}, {"MIOPEN_USE_BFP16", use_bfp16}, {"MIOPEN_USE_INT32", use_int32}, {"MIOPEN_USE_RNE_BFLOAT16", use_rne_bfloat16}, diff --git a/src/include/miopen/layernorm.hpp b/src/include/miopen/layernorm.hpp index 8ec2d96055..f897e79eea 100644 --- a/src/include/miopen/layernorm.hpp +++ b/src/include/miopen/layernorm.hpp @@ -49,8 +49,8 @@ miopenStatus_t LayerNormForward(const Handle& handle, const TensorDescriptor& rstdDesc, Data_t rstd, miopenLayerNormMode_t mode, - const float epsilon, - const int32_t normalized_dim); + float epsilon, + int32_t normalized_dim); } // namespace miopen #endif // _MIOPEN_LAYERNORM_HPP_ diff --git a/src/include/miopen/pooling/problem_description.hpp b/src/include/miopen/pooling/problem_description.hpp index 962c73c7dc..dc3e1c25b6 100644 --- a/src/include/miopen/pooling/problem_description.hpp +++ b/src/include/miopen/pooling/problem_description.hpp @@ -26,6 +26,7 @@ #pragma once +#include #include #include diff --git a/src/include/miopen/problem_description_base.hpp b/src/include/miopen/problem_description_base.hpp index 3ca8a178b1..a3d51f120e 100644 --- a/src/include/miopen/problem_description_base.hpp +++ b/src/include/miopen/problem_description_base.hpp @@ -26,8 +26,28 @@ #pragma once +#include + namespace miopen { +inline std::string GetDataTypeName(miopenDataType_t data_type) +{ + switch(data_type) + { + case miopenFloat: return "FP32"; + case miopenHalf: return "FP16"; + case miopenInt8: return "INT8"; + case miopenInt8x4: return "INT8x4"; + case miopenInt32: return "INT32"; + case miopenBFloat16: return "BF16"; + case miopenDouble: return "FP64"; + case miopenFloat8: return "FP8"; + case miopenBFloat8: return "BF8"; + } + + return "Unknown(" + std::to_string(data_type) + ")"; +} + struct ProblemDescriptionBase { }; diff --git a/src/include/miopen/tensor.hpp b/src/include/miopen/tensor.hpp index b8d72df67c..a133ab4adc 100644 --- a/src/include/miopen/tensor.hpp +++ b/src/include/miopen/tensor.hpp @@ -101,13 +101,13 @@ inline std::size_t GetTypeSize(miopenDataType_t d) case miopenFloat: return 4; case miopenHalf: case miopenBFloat16: return 2; - case miopenInt8x4: + case miopenInt8x4: break; case miopenInt8: case miopenFloat8: case miopenBFloat8: return 1; case miopenDouble: return 8; } - MIOPEN_THROW("Unknown data type"); + MIOPEN_THROW("Unknown or unsupported data type"); } template diff --git a/src/include/miopen/visit_float.hpp b/src/include/miopen/visit_float.hpp index d26afba9dd..35e1ae6ae7 100644 --- a/src/include/miopen/visit_float.hpp +++ b/src/include/miopen/visit_float.hpp @@ -79,7 +79,6 @@ void visit_float(miopenDataType_t t, F f) } case miopenFloat8: case miopenBFloat8: - case miopenInt8x4: case miopenInt8: { f(as_float{}); break; @@ -92,6 +91,7 @@ void visit_float(miopenDataType_t t, F f) f(as_float{}); break; } + case miopenInt8x4: MIOPEN_THROW("miopenInt8x4: Support discontinued."); } } diff --git a/src/kernels/MIOpenIm2d2Col.cl b/src/kernels/MIOpenIm2d2Col.cl index 7b1522db6f..852ccff955 100644 --- a/src/kernels/MIOpenIm2d2Col.cl +++ b/src/kernels/MIOpenIm2d2Col.cl @@ -40,10 +40,6 @@ #define MIOPEN_USE_INT8 0 #endif -#ifndef MIOPEN_USE_INT8x4 -#define MIOPEN_USE_INT8x4 0 -#endif - #ifndef MIOPEN_USE_INT32 #define MIOPEN_USE_INT32 0 #endif @@ -58,8 +54,6 @@ #if MIOPEN_USE_INT8 || MIOPEN_USE_FP8 || MIOPEN_USE_BFP8 typedef char data_t; -#elif MIOPEN_USE_INT8x4 -typedef uint data_t; #elif MIOPEN_USE_INT32 typedef int data_t; #elif(MIOPEN_USE_FP16 || MIOPEN_USE_BFP16) diff --git a/src/kernels/MIOpenIm3d2Col.cl b/src/kernels/MIOpenIm3d2Col.cl index 7ccb8363b7..5ee437a068 100644 --- a/src/kernels/MIOpenIm3d2Col.cl +++ b/src/kernels/MIOpenIm3d2Col.cl @@ -40,18 +40,12 @@ #define MIOPEN_USE_INT8 0 #endif -#ifndef MIOPEN_USE_INT8x4 -#define MIOPEN_USE_INT8x4 0 -#endif - #ifndef MIOPEN_USE_INT32 #define MIOPEN_USE_INT32 0 #endif #if MIOPEN_USE_INT8 typedef char data_t; -#elif MIOPEN_USE_INT8x4 -typedef uint data_t; #elif MIOPEN_USE_INT32 typedef int data_t; #elif(MIOPEN_USE_FP16 || MIOPEN_USE_BFP16) diff --git a/src/kernels/MIOpenSubTensorOpWithScalarKernel.cl b/src/kernels/MIOpenSubTensorOpWithScalarKernel.cl index 87fd82a7fd..dc8c96fc60 100644 --- a/src/kernels/MIOpenSubTensorOpWithScalarKernel.cl +++ b/src/kernels/MIOpenSubTensorOpWithScalarKernel.cl @@ -46,17 +46,13 @@ #define MIOPEN_USE_INT8 0 #endif -#ifndef MIOPEN_USE_INT8x4 -#define MIOPEN_USE_INT8x4 0 -#endif - #ifndef MIOPEN_USE_INT32 #define MIOPEN_USE_INT32 0 #endif #include "float_types.h" -#if MIOPEN_USE_INT8 == 1 || MIOPEN_USE_INT8x4 == 1 +#if MIOPEN_USE_INT8 == 1 #define _FLOAT char #endif diff --git a/src/kernels/MIOpenSubTensorOpWithSubTensorKernel.cl b/src/kernels/MIOpenSubTensorOpWithSubTensorKernel.cl index d0d1762a10..1de4843712 100644 --- a/src/kernels/MIOpenSubTensorOpWithSubTensorKernel.cl +++ b/src/kernels/MIOpenSubTensorOpWithSubTensorKernel.cl @@ -46,11 +46,7 @@ #define MIOPEN_USE_INT8 0 #endif -#ifndef MIOPEN_USE_INT8x4 -#define MIOPEN_USE_INT8x4 0 -#endif - -#if MIOPEN_USE_INT8 == 1 || MIOPEN_USE_INT8x4 == 1 +#if MIOPEN_USE_INT8 == 1 #define _FLOAT char #ifndef FLT_MAX #define MAX_VAL 127 /* max value */ diff --git a/src/kernels/MIOpenSubTensorOpWithTransformKernel.cl b/src/kernels/MIOpenSubTensorOpWithTransformKernel.cl index bc17f2ecf4..bd06f89626 100644 --- a/src/kernels/MIOpenSubTensorOpWithTransformKernel.cl +++ b/src/kernels/MIOpenSubTensorOpWithTransformKernel.cl @@ -43,11 +43,7 @@ #define MIOPEN_USE_INT8 0 #endif -#ifndef MIOPEN_USE_INT8x4 -#define MIOPEN_USE_INT8x4 0 -#endif - -#if MIOPEN_USE_INT8 == 1 || MIOPEN_USE_INT8x4 == 1 +#if MIOPEN_USE_INT8 == 1 #define _FLOAT char #ifndef FLT_MAX #define MAX_VAL 127 /* max value */ diff --git a/src/kernels/MIOpenUtilKernels4.cl b/src/kernels/MIOpenUtilKernels4.cl index 861e563012..09e6b73cff 100644 --- a/src/kernels/MIOpenUtilKernels4.cl +++ b/src/kernels/MIOpenUtilKernels4.cl @@ -40,10 +40,6 @@ #define MIOPEN_USE_INT8 0 #endif -#ifndef MIOPEN_USE_INT8x4 -#define MIOPEN_USE_INT8x4 0 -#endif - #ifndef MIOPEN_USE_INT32 #define MIOPEN_USE_INT32 0 #endif @@ -58,8 +54,6 @@ #if MIOPEN_USE_INT8 || MIOPEN_USE_FP8 || MIOPEN_USE_BFP8 typedef char data_t; -#elif MIOPEN_USE_INT8x4 -typedef uint data_t; #elif MIOPEN_USE_INT32 typedef int data_t; #elif(MIOPEN_USE_FP16 || MIOPEN_USE_BFP16) diff --git a/src/kernels/gpu_reference_kernel/fp8_kern_types.h b/src/kernels/gpu_reference_kernel/fp8_kern_types.h index b14302e0c2..d714a0514c 100644 --- a/src/kernels/gpu_reference_kernel/fp8_kern_types.h +++ b/src/kernels/gpu_reference_kernel/fp8_kern_types.h @@ -58,6 +58,6 @@ #define KERNEL_NAME_SUFFIX CAT(CAT(INPUT_TYPE, _), CAT(CAT(WEIGHTS_TYPE, _), OUTPUT_TYPE)) -#define FWD_KERNEL_NAME CAT(naive_conv_packed_fwd_nchw_, KERNEL_NAME_SUFFIX) -#define BWD_KERNEL_NAME CAT(naive_conv_packed_bwd_nchw_, KERNEL_NAME_SUFFIX) -#define WRW_KERNEL_NAME CAT(naive_conv_packed_wrw_nchw_, KERNEL_NAME_SUFFIX) +#define FWD_KERNEL_NAME CAT(naive_conv_nonpacked_fwd_nchw_, KERNEL_NAME_SUFFIX) +#define BWD_KERNEL_NAME CAT(naive_conv_nonpacked_bwd_nchw_, KERNEL_NAME_SUFFIX) +#define WRW_KERNEL_NAME CAT(naive_conv_nonpacked_wrw_nchw_, KERNEL_NAME_SUFFIX) diff --git a/src/kernels/gpu_reference_kernel/fp8_naive_conv.cpp b/src/kernels/gpu_reference_kernel/fp8_naive_conv.cpp index e6b2945beb..3b4eabecfb 100644 --- a/src/kernels/gpu_reference_kernel/fp8_naive_conv.cpp +++ b/src/kernels/gpu_reference_kernel/fp8_naive_conv.cpp @@ -26,7 +26,7 @@ #ifndef MIOPEN_DONT_USE_HIP_RUNTIME_HEADERS #include #include -#include +//#include #include #endif diff --git a/src/layer_norm.cpp b/src/layer_norm.cpp index 3d52bc771f..33030887ee 100644 --- a/src/layer_norm.cpp +++ b/src/layer_norm.cpp @@ -48,8 +48,8 @@ miopenStatus_t LayerNormForward(const Handle& handle, const TensorDescriptor& rstdDesc, Data_t rstd, miopenLayerNormMode_t mode, - const float epsilon, - const int32_t normalized_dim) + float epsilon, + int32_t normalized_dim) { if(x == nullptr || y == nullptr) { diff --git a/src/ocl/convolutionocl.cpp b/src/ocl/convolutionocl.cpp index b9d221f8a9..bc60fc17f4 100644 --- a/src/ocl/convolutionocl.cpp +++ b/src/ocl/convolutionocl.cpp @@ -377,7 +377,7 @@ static void ConvForwardCheckNumerics(const Handle& handle, } } -void ConvolutionDescriptor::ValidateConvTensors(const ConvTensors& tensors) const +void ConvolutionDescriptor::ValidateTensors(const ConvTensors& tensors) const { // Group stride in current TensorDescriptor is implicit. When invoking kernels, @@ -471,14 +471,9 @@ void ConvolutionDescriptor::ConvolutionForward(Handle& handle, MIOPEN_LOG_I("algo = " << algo << ", workspace = " << workSpaceSize); const auto tensors = ConvFwdTensors{xDesc, x, wDesc, w, yDesc, y}; - ValidateConvTensors(tensors); + ValidateTensors(tensors); ValidateAlphaBeta(alpha, beta); - if(algo != miopenConvolutionFwdAlgoGEMM && xDesc.GetType() == miopenInt8x4) - { - MIOPEN_THROW(miopenStatusBadParm); - } - ConvForwardCheckNumerics(handle, tensors, [&]() { ValidateGroupCount(xDesc, wDesc, *this); @@ -789,7 +784,7 @@ void ConvolutionDescriptor::ConvolutionForwardImmediate(Handle& handle, MIOPEN_LOG_I("solver_id = " << solver_id.ToString() << ", workspace = " << workSpaceSize); const auto tensors = ConvFwdTensors{xDesc, x, wDesc, w, yDesc, y}; - ValidateConvTensors(tensors); + ValidateTensors(tensors); if(!solver_id.IsValid()) MIOPEN_THROW(miopenStatusBadParm); @@ -925,7 +920,7 @@ void ConvolutionDescriptor::ConvolutionBackwardData(Handle& handle, auto tensors = ConvBwdTensors{dyDesc, dy, wDesc, w, dxDesc, dx}; - ValidateConvTensors(tensors); + ValidateTensors(tensors); ValidateAlphaBeta(alpha, beta); ConvBwdCheckNumerics(handle, tensors, beta, [&]() { @@ -991,7 +986,7 @@ void ConvolutionDescriptor::ConvolutionBackwardImmediate(Handle& handle, MIOPEN_LOG_I("solver_id = " << solver_id.ToString() << ", workspace = " << workSpaceSize); auto tensors = ConvBwdTensors{dyDesc, dy, wDesc, w, dxDesc, dx}; - ValidateConvTensors(tensors); + ValidateTensors(tensors); static const float beta = 0.0f; ConvBwdCheckNumerics(handle, tensors, &beta, [&]() { @@ -1125,7 +1120,7 @@ void ConvolutionDescriptor::ConvolutionBackwardWeights(const Handle& handle, { MIOPEN_LOG_I("algo = " << algo << ", workspace = " << workSpaceSize); decltype(auto) tensors = ConvWrwTensors{dyDesc, dy, xDesc, x, dwDesc, dw}; - ValidateConvTensors(tensors); + ValidateTensors(tensors); ValidateAlphaBeta(alpha, beta); if(xDesc.GetType() == miopenInt8) @@ -1188,7 +1183,7 @@ void ConvolutionDescriptor::ConvolutionWrwImmediate(Handle& handle, { MIOPEN_LOG_I("solver_id = " << solver_id.ToString() << ", workspace = " << workSpaceSize); auto tensors = ConvWrwTensors{dyDesc, dy, xDesc, x, dwDesc, dw}; - ValidateConvTensors(tensors); + ValidateTensors(tensors); if(xDesc.GetType() == miopenInt8) MIOPEN_THROW(miopenStatusBadParm); diff --git a/src/ocl/tensorocl.cpp b/src/ocl/tensorocl.cpp index 9c7bff6992..1e1f8b1df9 100644 --- a/src/ocl/tensorocl.cpp +++ b/src/ocl/tensorocl.cpp @@ -1597,10 +1597,13 @@ void ScaleTensor(const Handle& handle, assert(yDim_flat > 0 && yDim_flat <= 5); const miopenDataType_t dataType = yDesc_flat.GetType(); - if(dataType == miopenInt8 || dataType == miopenInt8x4 || dataType == miopenBFloat16) + + if(!(dataType == miopenHalf // + || dataType == miopenFloat // + || dataType == miopenInt32 // + || dataType == miopenDouble)) { - MIOPEN_THROW(miopenStatusBadParm, - "Tensor scale operation is not supported for int8, int8x4, and bfloat16."); + MIOPEN_THROW(miopenStatusBadParm, "ScaleTensor: unsupported data type."); } std::string kernel_name = "SubTensorOpWithScalar" + std::to_string(yDim_flat) + "d"; @@ -1941,8 +1944,7 @@ std::string GetCastTensorBuildOptionFromType(const std::string& buildOption, mio case miopenDouble: // TODO MIOPEN_THROW(miopenStatusBadParm, "miopenDouble data type not supported in cast tensor."); - case miopenInt8x4: - MIOPEN_THROW(miopenStatusBadParm, "miopenInt8x4 data type not supported in cast tensor."); + case miopenInt8x4: // fallthrough default: MIOPEN_THROW(miopenStatusBadParm, "Invalid data type in cast tensor desc."); } } @@ -2237,24 +2239,6 @@ void TransformTensor(const Handle& handle, } } } - else if(xDesc.GetType() == miopenInt8 && yDesc.GetType() == miopenInt8x4 && x_len.size() >= 3) - { - if(x_len[1] <= (y_len[1] - 4) || y_len[1] % 4 != 0) - { - MIOPEN_THROW("Invalid y channel size"); - } - - transpose_NCHW2Vec(handle, x_len, x, y, 4, false, true, alpha, beta); - } - else if(xDesc.GetType() == miopenInt8x4 && yDesc.GetType() == miopenInt8 && x_len.size() >= 3) - { - if(y_len[1] <= (x_len[1] - 4) || x_len[1] % 4 != 0) - { - MIOPEN_THROW("Invalid x channel size"); - } - - transpose_NCHW2Vec(handle, y_len, x, y, 4, false, false, alpha, beta); - } else { auto x_y_len = boost::combine(x_len, y_len); @@ -2294,12 +2278,20 @@ void TransformTensor(const Handle& handle, const miopenDataType_t dataTypex = xDesc_flat.GetType(); const miopenDataType_t dataTypey = yDesc_flat.GetType(); - if(dataTypex == miopenInt8 || dataTypex == miopenInt8x4) + if(!(dataTypex == miopenHalf // + || dataTypex == miopenFloat // + || dataTypex == miopenInt32 // + || dataTypex == miopenBFloat16 // + || dataTypex == miopenDouble)) { MIOPEN_THROW("Tensor x is a unsupported data type"); } - if(dataTypey == miopenInt8 || dataTypey == miopenInt8x4) + if(!(dataTypey == miopenHalf // + || dataTypey == miopenFloat // + || dataTypey == miopenInt32 // + || dataTypey == miopenBFloat16 // + || dataTypey == miopenDouble)) { MIOPEN_THROW("Tensor y is a unsupported data type"); } diff --git a/src/ocl/utilocl.cpp b/src/ocl/utilocl.cpp index 0a88efb2f1..d536e819e6 100644 --- a/src/ocl/utilocl.cpp +++ b/src/ocl/utilocl.cpp @@ -80,8 +80,8 @@ float Im2d2ColGPU(const Handle& handle, int data_size_bound = c * in_h * in_w; - int data_size_bound_pack = type == miopenInt8x4 ? data_size_bound * 4 : data_size_bound; - int im_offset_pack = type == miopenInt8x4 ? im_offset / 4 : im_offset; + int data_size_bound_pack = data_size_bound; + int im_offset_pack = im_offset; if(!kernels.empty()) { @@ -105,7 +105,7 @@ float Im2d2ColGPU(const Handle& handle, } else { - const int c_pack = type == miopenInt8x4 ? c / 4 : c; + const int c_pack = c; std::string params; int num_ch_per_wg; @@ -331,9 +331,8 @@ float Im3d2ColGPU(const Handle& handle, auto&& kernels = handle.GetKernels("miopenIm3d2Col", network_config); - // int8x4 vectorize-c format - int im_offset_pack = type == miopenInt8x4 ? im_offset / 4 : im_offset; - int im_c_pack = type == miopenInt8x4 ? im_c / 4 : im_c; + int im_offset_pack = im_offset; + int im_c_pack = im_c; if(!kernels.empty()) { @@ -772,13 +771,6 @@ float transpose_NCHW2CNHW(const Handle& handle, std::string params = GetDataTypeKernelParams(type); - if(type == miopenInt8x4) - { - c /= 4; - in_offset /= 4; - out_offset /= 4; - } - if(h_stride == 1 && w_stride == 1 && type == miopenFloat) { kernel_name += "_V1"; @@ -910,13 +902,6 @@ float transpose_CNHW2NCHW(const Handle& handle, std::string params = GetDataTypeKernelParams(type); - if(type == miopenInt8x4) - { - c /= 4; - in_offset /= 4; - out_offset /= 4; - } - if(h_stride == 1 && w_stride == 1 && type == miopenFloat) { kernel_name += "_V1"; @@ -1170,14 +1155,8 @@ float transpose_packed_MN2NM(const Handle& handle, auto&& kernels = handle.GetKernels(kernel_name, network_config); std::string params = GetDataTypeKernelParams(type); - if(type == miopenInt8x4) - { - m /= 4; - in_offset /= 4; - out_offset /= 4; - } - if(!(type == miopenInt8x4 || type == miopenInt8)) + if(type != miopenInt8) { MIOPEN_THROW("transpose_packed_MN2NM only meant for int8 variants."); } diff --git a/src/pooling/problem_description.cpp b/src/pooling/problem_description.cpp index 0ee5469684..8e171a4ac0 100644 --- a/src/pooling/problem_description.cpp +++ b/src/pooling/problem_description.cpp @@ -61,6 +61,8 @@ NetworkConfig ProblemDescription::MakeNetworkConfig() const ss << "m" + std::to_string(pooling_method); ss << "_dt" << xDesc.GetType(); + if(const auto ct = xDesc.GetCastType()) + ss << "_dct" << GetDataTypeName(*ct); ss << "_ker" << get_vect_config(pooling.lens); ss << "_str" << get_vect_config(pooling.strides); ss << "_pad" << get_vect_config(pooling.pads); diff --git a/src/pooling_api.cpp b/src/pooling_api.cpp index ef526804cf..bf318f7b78 100644 --- a/src/pooling_api.cpp +++ b/src/pooling_api.cpp @@ -50,7 +50,7 @@ inline void Pooling_logging_cmd(const miopenPoolingDescriptor_t poolDesc, case miopenFloat: ss << "pool"; break; case miopenInt32: case miopenInt8: - case miopenInt8x4: + case miopenInt8x4: // Support discontinued. case miopenBFloat16: case miopenDouble: case miopenFloat8: diff --git a/src/reducetensor.cpp b/src/reducetensor.cpp index 6cf29c2f64..9cabbc03f6 100644 --- a/src/reducetensor.cpp +++ b/src/reducetensor.cpp @@ -211,11 +211,10 @@ inline int GetDataTypeSize(miopenDataType_t t) case miopenFloat8: case miopenBFloat8: case miopenInt8: return (1); - case miopenInt8x4: return (4); + case miopenInt8x4: return (4); // Support discontinued. case miopenBFloat16: return (2); case miopenInt32: return (4); - default: - MIOPEN_THROW("Only float, half, double, bfloat16, int8, int8x4 data type is supported."); + default: MIOPEN_THROW("Only float, half, double, bfloat16, int8 data types are supported."); }; }; @@ -269,7 +268,7 @@ inline int GetDataTypeId(miopenDataType_t t) case miopenBFloat16: return (static_cast('B')); case miopenDouble: return (static_cast('D')); case miopenInt8: - case miopenInt8x4: + case miopenInt8x4: // Support discontinued. case miopenFloat8: case miopenBFloat8: case miopenInt32: return (static_cast('O')); diff --git a/src/reducetensor_api.cpp b/src/reducetensor_api.cpp index 685b2fb430..10b1ff73bf 100644 --- a/src/reducetensor_api.cpp +++ b/src/reducetensor_api.cpp @@ -43,7 +43,7 @@ static void LogCmdRedux(const miopen::ReduceTensorDescriptor reduceTensorDesc, ss << "reducefp16"; else if(aDesc.GetType() == miopenBFloat16) ss << "reducebfp16"; - else if(aDesc.GetType() == miopenInt8 || aDesc.GetType() == miopenInt8x4) + else if(aDesc.GetType() == miopenInt8) ss << "reduceint8"; else if(aDesc.GetType() == miopenDouble) ss << "reducefp64"; diff --git a/src/solver/batchnorm/backward_ck.cpp b/src/solver/batchnorm/backward_ck.cpp index fba8724990..6fecc24049 100644 --- a/src/solver/batchnorm/backward_ck.cpp +++ b/src/solver/batchnorm/backward_ck.cpp @@ -156,11 +156,11 @@ static bool CheckCKApplicability(const miopen::batchnorm::ProblemDescription& pr #endif -bool BnCKBwdBackward::IsApplicable(const ExecutionContext& ctx, +bool BnCKBwdBackward::IsApplicable(const ExecutionContext& context, const miopen::batchnorm::ProblemDescription& bn_problem) const { #if !MIOPEN_BACKEND_HIP || !MIOPEN_USE_COMPOSABLEKERNEL - std::ignore = ctx; + std::ignore = context; std::ignore = fdesc_problem; return false; #else @@ -168,7 +168,7 @@ bool BnCKBwdBackward::IsApplicable(const ExecutionContext& ctx, return false; if(!bn_problem.IsLayoutNHWC()) return false; - if(!ck_utility::is_ck_supported_hardware(ctx.GetStream())) + if(!ck_utility::is_ck_supported_hardware(context.GetStream())) return false; if(bn_problem.GetXDesc().GetType() != bn_problem.GetScaleBiasDiffDesc().GetType()) return false; diff --git a/src/solver/batchnorm/forward_inference_ck.cpp b/src/solver/batchnorm/forward_inference_ck.cpp index 5a7918cc64..ff17432a62 100644 --- a/src/solver/batchnorm/forward_inference_ck.cpp +++ b/src/solver/batchnorm/forward_inference_ck.cpp @@ -200,7 +200,7 @@ bool BnCKFwdInference::IsApplicable(const ExecutionContext& context, return (CheckCKApplicability(bn_problem) != -1); case miopenInt32: case miopenInt8: - case miopenInt8x4: + case miopenInt8x4: // Support discontinued. case miopenFloat8: case miopenBFloat8: default: MIOPEN_THROW("Unsupported datatype"); @@ -244,7 +244,7 @@ BnCKFwdInference::GetSolution(const ExecutionContext& context, break; case miopenInt8: case miopenInt32: - case miopenInt8x4: + case miopenInt8x4: // Support discontinued. case miopenFloat8: case miopenBFloat8: default: MIOPEN_THROW("Unsupported datatype"); diff --git a/src/solver/conv_ck_igemm_fwd_bias_activ_fused.cpp b/src/solver/conv_ck_igemm_fwd_bias_activ_fused.cpp index 9520a79a90..e2df6f8097 100644 --- a/src/solver/conv_ck_igemm_fwd_bias_activ_fused.cpp +++ b/src/solver/conv_ck_igemm_fwd_bias_activ_fused.cpp @@ -287,7 +287,7 @@ void PerformanceConfigConvCKIgemmFwdBiasActivFused::HeuristicInit( case miopenInt8: case miopenFloat: case miopenInt32: - case miopenInt8x4: + case miopenInt8x4: // Support discontinued. case miopenBFloat16: case miopenDouble: default: MIOPEN_THROW("Unsupported datatype"); @@ -342,7 +342,7 @@ bool PerformanceConfigConvCKIgemmFwdBiasActivFused::IsValid( case miopenInt8: case miopenFloat: case miopenInt32: - case miopenInt8x4: + case miopenInt8x4: // Support discontinued. case miopenBFloat16: case miopenDouble: default: MIOPEN_THROW("Unsupported datatype"); @@ -435,7 +435,7 @@ bool ConvCKIgemmFwdBiasActivFused::IsApplicable(const FusionContext& ctx, case miopenInt8: case miopenFloat: case miopenInt32: - case miopenInt8x4: + case miopenInt8x4: // Support discontinued. case miopenBFloat16: case miopenDouble: default: MIOPEN_THROW("Unsupported datatype"); @@ -469,7 +469,7 @@ ConvSolution ConvCKIgemmFwdBiasActivFused::GetSolution( case miopenInt8: case miopenFloat: case miopenInt32: - case miopenInt8x4: + case miopenInt8x4: // Support discontinued. case miopenBFloat16: case miopenDouble: default: MIOPEN_THROW("Unsupported datatype"); diff --git a/src/solver/conv_direct_naive_conv.cpp b/src/solver/conv_direct_naive_conv.cpp index fb3e42bfb1..f87511f911 100644 --- a/src/solver/conv_direct_naive_conv.cpp +++ b/src/solver/conv_direct_naive_conv.cpp @@ -49,7 +49,7 @@ bool ConvDirectNaiveConvIsAssemblyKernel(const ExecutionContext& ctx, { const auto device_name = ctx.GetStream().GetDeviceName(); return (device_name == "gfx906" || device_name == "gfx908") && ctx.rmv.IsV3() && - problem.IsLayoutDefault() && (!problem.IsInt8()); + problem.IsLayoutDefault() && (problem.IsFp16() || problem.IsFp32() || problem.IsBfp16()); } // Check tensor data type respectively diff --git a/src/solver/conv_hip_implicit_gemm_3d_grouped_bwd_xdlops.cpp b/src/solver/conv_hip_implicit_gemm_3d_grouped_bwd_xdlops.cpp index 2602c54320..94983c7f0e 100644 --- a/src/solver/conv_hip_implicit_gemm_3d_grouped_bwd_xdlops.cpp +++ b/src/solver/conv_hip_implicit_gemm_3d_grouped_bwd_xdlops.cpp @@ -210,7 +210,7 @@ void PerformanceConfigHipImplicitGemm3DGroupBwdXdlops::HeuristicInit( case miopenFloat: Init(problem); break; case miopenInt8: Init(problem); break; case miopenInt32: - case miopenInt8x4: + case miopenInt8x4: // Support discontinued. case miopenBFloat16: case miopenFloat8: case miopenBFloat8: @@ -253,7 +253,7 @@ bool PerformanceConfigHipImplicitGemm3DGroupBwdXdlops::IsValid( case miopenFloat: return CheckIsSupportCKArgs(problem); case miopenInt8: return CheckIsSupportCKArgs(problem); case miopenInt32: - case miopenInt8x4: + case miopenInt8x4: // Support discontinued. case miopenBFloat16: case miopenFloat8: case miopenBFloat8: @@ -324,7 +324,7 @@ bool ConvHipImplicitGemm3DGroupBwdXdlops::IsApplicable( case miopenFloat: return CheckCKApplicability(problem); case miopenInt8: return CheckCKApplicability(problem); case miopenInt32: - case miopenInt8x4: + case miopenInt8x4: // Support discontinued. case miopenBFloat16: case miopenFloat8: case miopenBFloat8: @@ -352,7 +352,7 @@ ConvSolution ConvHipImplicitGemm3DGroupBwdXdlops::GetSolution( return InitInvokerFactory, CKArgs, conv::DataInvokeParams>( problem, config.kernel_id); case miopenInt32: - case miopenInt8x4: + case miopenInt8x4: // Support discontinued. case miopenBFloat16: case miopenDouble: case miopenFloat8: diff --git a/src/solver/conv_hip_implicit_gemm_3d_grouped_fwd_xdlops.cpp b/src/solver/conv_hip_implicit_gemm_3d_grouped_fwd_xdlops.cpp index f0623c642d..a21c9ba300 100644 --- a/src/solver/conv_hip_implicit_gemm_3d_grouped_fwd_xdlops.cpp +++ b/src/solver/conv_hip_implicit_gemm_3d_grouped_fwd_xdlops.cpp @@ -210,7 +210,7 @@ void PerformanceConfigHipImplicitGemm3DGroupFwdXdlops::HeuristicInit( case miopenFloat: Init(problem); break; case miopenInt8: Init(problem); break; case miopenInt32: - case miopenInt8x4: + case miopenInt8x4: // Support discontinued. case miopenFloat8: case miopenBFloat8: case miopenBFloat16: @@ -253,7 +253,7 @@ bool PerformanceConfigHipImplicitGemm3DGroupFwdXdlops::IsValid( case miopenFloat: return CheckIsSupportCKArgs(problem); case miopenInt8: return CheckIsSupportCKArgs(problem); case miopenInt32: - case miopenInt8x4: + case miopenInt8x4: // Support discontinued. case miopenFloat8: case miopenBFloat8: case miopenBFloat16: @@ -322,7 +322,7 @@ bool ConvHipImplicitGemm3DGroupFwdXdlops::IsApplicable( case miopenFloat: return CheckCKApplicability(problem); case miopenInt8: return CheckCKApplicability(problem); case miopenInt32: - case miopenInt8x4: + case miopenInt8x4: // Support discontinued. case miopenFloat8: case miopenBFloat8: case miopenBFloat16: @@ -350,7 +350,7 @@ ConvSolution ConvHipImplicitGemm3DGroupFwdXdlops::GetSolution( return InitInvokerFactory, CKArgs, conv::DataInvokeParams>( problem, config.kernel_id); case miopenInt32: - case miopenInt8x4: + case miopenInt8x4: // Support discontinued. case miopenBFloat16: case miopenDouble: case miopenFloat8: diff --git a/src/solver/conv_hip_implicit_gemm_3d_grouped_wrw_xdlops.cpp b/src/solver/conv_hip_implicit_gemm_3d_grouped_wrw_xdlops.cpp index 3c94374b4e..6225410599 100644 --- a/src/solver/conv_hip_implicit_gemm_3d_grouped_wrw_xdlops.cpp +++ b/src/solver/conv_hip_implicit_gemm_3d_grouped_wrw_xdlops.cpp @@ -206,7 +206,7 @@ void PerformanceConfigHipImplicitGemm3DGroupWrwXdlops::HeuristicInit( case miopenFloat: Init(problem); break; case miopenInt8: Init(problem); break; case miopenInt32: - case miopenInt8x4: + case miopenInt8x4: // Support discontinued. case miopenBFloat16: case miopenFloat8: case miopenBFloat8: @@ -249,7 +249,7 @@ bool PerformanceConfigHipImplicitGemm3DGroupWrwXdlops::IsValid( case miopenFloat: return CheckIsSupportCKArgs(problem); case miopenInt8: return CheckIsSupportCKArgs(problem); case miopenInt32: - case miopenInt8x4: + case miopenInt8x4: // Support discontinued. case miopenBFloat16: case miopenFloat8: case miopenBFloat8: @@ -320,7 +320,7 @@ bool ConvHipImplicitGemm3DGroupWrwXdlops::IsApplicable( case miopenFloat: return CheckCKApplicability(problem); case miopenInt8: return CheckCKApplicability(problem); case miopenInt32: - case miopenInt8x4: + case miopenInt8x4: // Support discontinued. case miopenBFloat16: case miopenFloat8: case miopenBFloat8: @@ -348,7 +348,7 @@ ConvSolution ConvHipImplicitGemm3DGroupWrwXdlops::GetSolution( return InitInvokerFactory, CKArgs, conv::WrWInvokeParams>( problem, config.kernel_id); case miopenInt32: - case miopenInt8x4: + case miopenInt8x4: // Support discontinued. case miopenBFloat16: case miopenFloat8: case miopenBFloat8: diff --git a/src/solver/conv_hip_implicit_gemm_bwd_data_xdlops.cpp b/src/solver/conv_hip_implicit_gemm_bwd_data_xdlops.cpp index 57c63267d2..5aa17e75bf 100644 --- a/src/solver/conv_hip_implicit_gemm_bwd_data_xdlops.cpp +++ b/src/solver/conv_hip_implicit_gemm_bwd_data_xdlops.cpp @@ -172,7 +172,7 @@ void PerformanceConfigHipImplicitGemmBwdXdlops::HeuristicInit( case miopenBFloat8: case miopenInt8: case miopenInt32: - case miopenInt8x4: + case miopenInt8x4: // Support discontinued. case miopenBFloat16: case miopenDouble: break; } @@ -214,7 +214,7 @@ bool PerformanceConfigHipImplicitGemmBwdXdlops::IsValid( case miopenBFloat8: case miopenInt8: case miopenInt32: - case miopenInt8x4: + case miopenInt8x4: // Support discontinued. case miopenBFloat16: case miopenDouble: break; } @@ -294,7 +294,7 @@ bool ConvHipImplicitGemmBwdXdlops::IsApplicable( case miopenBFloat8: case miopenInt8: case miopenInt32: - case miopenInt8x4: + case miopenInt8x4: // Support discontinued. case miopenBFloat16: case miopenDouble: break; } @@ -318,7 +318,7 @@ ConvSolution ConvHipImplicitGemmBwdXdlops::GetSolution( problem, config.kernel_id); case miopenInt8: case miopenInt32: - case miopenInt8x4: + case miopenInt8x4: // Support discontinued. case miopenBFloat16: case miopenDouble: case miopenFloat8: diff --git a/src/solver/conv_hip_implicit_gemm_fwd_xdlops.cpp b/src/solver/conv_hip_implicit_gemm_fwd_xdlops.cpp index 73907eb788..7a9c322bc7 100644 --- a/src/solver/conv_hip_implicit_gemm_fwd_xdlops.cpp +++ b/src/solver/conv_hip_implicit_gemm_fwd_xdlops.cpp @@ -172,7 +172,7 @@ void PerformanceConfigHipImplicitGemmFwdXdlops::HeuristicInit( case miopenFloat8: case miopenBFloat8: case miopenInt32: - case miopenInt8x4: + case miopenInt8x4: // Support discontinued. case miopenBFloat16: case miopenDouble: break; } @@ -215,7 +215,7 @@ bool PerformanceConfigHipImplicitGemmFwdXdlops::IsValid( case miopenFloat8: case miopenBFloat8: case miopenInt32: - case miopenInt8x4: + case miopenInt8x4: // Support discontinued. case miopenBFloat16: case miopenDouble: break; } @@ -294,7 +294,7 @@ bool ConvHipImplicitGemmFwdXdlops::IsApplicable( case miopenFloat8: case miopenBFloat8: case miopenInt32: - case miopenInt8x4: + case miopenInt8x4: // Support discontinued. case miopenBFloat16: case miopenDouble: break; } @@ -320,7 +320,7 @@ ConvSolution ConvHipImplicitGemmFwdXdlops::GetSolution( return InitInvokerFactory, CKArgs, conv::DataInvokeParams>( problem, config.kernel_id); case miopenInt32: - case miopenInt8x4: + case miopenInt8x4: // Support discontinued. case miopenBFloat16: case miopenDouble: case miopenFloat8: diff --git a/src/solver/conv_hip_implicit_gemm_grouped_fwd_xdlops.cpp b/src/solver/conv_hip_implicit_gemm_grouped_fwd_xdlops.cpp index c2d0b83141..0a8b6eeb03 100644 --- a/src/solver/conv_hip_implicit_gemm_grouped_fwd_xdlops.cpp +++ b/src/solver/conv_hip_implicit_gemm_grouped_fwd_xdlops.cpp @@ -198,7 +198,7 @@ void PerformanceConfigHipImplicitGemmGroupFwdXdlops::HeuristicInit( case miopenFloat: Init(problem); break; case miopenInt8: Init(problem); break; case miopenInt32: - case miopenInt8x4: + case miopenInt8x4: // Support discontinued. case miopenBFloat16: case miopenFloat8: case miopenBFloat8: @@ -240,7 +240,7 @@ bool PerformanceConfigHipImplicitGemmGroupFwdXdlops::IsValid( case miopenFloat: return CheckIsSupportCKArgs(problem); case miopenInt8: return CheckIsSupportCKArgs(problem); case miopenInt32: - case miopenInt8x4: + case miopenInt8x4: // Support discontinued. case miopenBFloat16: case miopenFloat8: case miopenBFloat8: @@ -311,7 +311,7 @@ bool ConvHipImplicitGemmGroupFwdXdlops::IsApplicable( case miopenFloat: return CheckCKApplicability(problem); case miopenInt8: return CheckCKApplicability(problem); case miopenInt32: - case miopenInt8x4: + case miopenInt8x4: // Support discontinued. case miopenBFloat16: case miopenFloat8: case miopenBFloat8: @@ -339,7 +339,7 @@ ConvSolution ConvHipImplicitGemmGroupFwdXdlops::GetSolution( return InitInvokerFactory, CKArgs, conv::DataInvokeParams>( problem, config.kernel_id); case miopenInt32: - case miopenInt8x4: + case miopenInt8x4: // Support discontinued. case miopenBFloat16: case miopenDouble: case miopenFloat8: diff --git a/src/solver/gemm.cpp b/src/solver/gemm.cpp index 7ad238e7fa..6ae4f5dde7 100644 --- a/src/solver/gemm.cpp +++ b/src/solver/gemm.cpp @@ -87,13 +87,15 @@ bool GemmFwdBase::IsApplicable(const ExecutionContext& ctx, const auto& xDesc = problem.GetIn(); const auto& wDesc = problem.GetWeights(); const auto& yDesc = problem.GetOut(); - if(xDesc.GetType() == miopenInt8x4 || xDesc.GetType() == miopenInt8) - { - // rocBlas needs the output to be int32 always - if(yDesc.GetType() != miopenFloat && yDesc.GetType() != miopenInt32 && - yDesc.GetType() != miopenInt8x4) - return false; - } + + // rocBlas needs the output to be 32-bit always + if(xDesc.GetType() == miopenInt8x4) + return false; + if(xDesc.GetType() == miopenInt8 // + && (yDesc.GetType() != miopenFloat // + && yDesc.GetType() != miopenInt32)) + return false; + const auto rblas_fp8_supported = miopen::StartsWith(ctx.GetStream().GetDeviceName(), "gfx94"); if(problem.IsTensorsCasted()) { @@ -185,8 +187,7 @@ float GemmFwdBase::GetWti(const ExecutionContext&, const conv::ProblemDescriptio n_transpose_packed_MN2NM = 1; n_gemm_strided_batched = conv.group_count; n_transpose_CNHW2NCHW = 1; - if((wDesc.GetType() == miopenInt8 || wDesc.GetType() == miopenInt8x4) && - yDesc.GetType() != miopenInt32) + if(wDesc.GetType() == miopenInt8 && yDesc.GetType() != miopenInt32) n_CastTensor = 1; } // 1x1_stride=1 with GEMM and zero workspace @@ -205,8 +206,7 @@ float GemmFwdBase::GetWti(const ExecutionContext&, const conv::ProblemDescriptio n_gemm_strided_batched = conv.group_count; n_gemm_runs = in_n; } - if((wDesc.GetType() == miopenInt8 || wDesc.GetType() == miopenInt8x4) && - yDesc.GetType() != miopenInt32) + if(wDesc.GetType() == miopenInt8 && yDesc.GetType() != miopenInt32) n_CastTensor = 1; } else // not 1x1 @@ -216,8 +216,7 @@ float GemmFwdBase::GetWti(const ExecutionContext&, const conv::ProblemDescriptio n_transpose_packed_MN2NM = in_n; n_gemm_strided_batched = conv.group_count; n_gemm_runs = in_n; - if((wDesc.GetType() == miopenInt8 || wDesc.GetType() == miopenInt8x4) && - yDesc.GetType() != miopenInt32) + if(wDesc.GetType() == miopenInt8 && yDesc.GetType() != miopenInt32) n_CastTensor = 1; } @@ -410,7 +409,7 @@ ConvSolution GemmFwd1x1_0_2::GetSolution(const ExecutionContext& context, x_t_size *= 2; } - if(wDesc.GetType() == miopenInt8 || wDesc.GetType() == miopenInt8x4) + if(wDesc.GetType() == miopenInt8) { const auto xts = GetTypeSize(xDesc.GetType()); if(xts > 0) @@ -494,8 +493,7 @@ ConvSolution GemmFwd1x1_0_2::GetSolution(const ExecutionContext& context, if(handle.IsProfilingEnabled()) time_gemm += handle.GetKernelTime(); - if((wDesc.GetType() == miopenInt8 || wDesc.GetType() == miopenInt8x4) && - yDesc.GetType() != miopenInt32) + if(wDesc.GetType() == miopenInt8 && yDesc.GetType() != miopenInt32) { TensorDescriptor ygemmDesc(miopenInt32, yDesc.GetLengths(), yDesc.GetStrides()); @@ -782,7 +780,6 @@ ConvSolution GemmFwd1x1_0_1::GetSolution(const ExecutionContext& context, auto solution = ConvSolution{miopenStatusSuccess}; const auto group_count = conv.group_count; - const auto lowp_quant = conv.lowp_quant; if(group_count > 1) { @@ -881,14 +878,6 @@ ConvSolution GemmFwd1x1_0_1::GetSolution(const ExecutionContext& context, } } - if(wDesc.GetType() == miopenInt8x4 && yDesc.GetType() != miopenInt32) - { - TensorDescriptor ygemmDesc(miopenInt32, yDesc.GetLengths(), yDesc.GetStrides()); - CastTensor(handle, &lowp_quant, ygemmDesc, y, yDesc, y, 0, 0); - if(handle.IsProfilingEnabled()) - time_gemm += handle.GetKernelTime(); - } - if(handle.IsProfilingEnabled()) { handle.ResetKernelTime(); @@ -967,14 +956,6 @@ ConvSolution GemmFwd1x1_0_1::GetSolution(const ExecutionContext& context, if(handle.IsProfilingEnabled()) time += handle.GetKernelTime(); - if(wDesc.GetType() == miopenInt8x4 && yDesc.GetType() != miopenInt32) - { - TensorDescriptor ygemmDesc(miopenInt32, yDesc.GetLengths(), yDesc.GetStrides()); - CastTensor(handle, &lowp_quant, ygemmDesc, y, yDesc, y, 0, 0); - if(handle.IsProfilingEnabled()) - time += handle.GetKernelTime(); - } - if(handle.IsProfilingEnabled()) { handle.ResetKernelTime(); @@ -1268,8 +1249,7 @@ ConvSolution GemmFwdRest::GetSolution(const ExecutionContext& context, } } - if((wDesc.GetType() == miopenInt8 || wDesc.GetType() == miopenInt8x4) && - yDesc.GetType() != miopenInt32) + if(wDesc.GetType() == miopenInt8 && yDesc.GetType() != miopenInt32) { TensorDescriptor ygemmDesc(miopenInt32, yDesc.GetLengths(), yDesc.GetStrides()); diff --git a/src/solver/mlir_common.cpp b/src/solver/mlir_common.cpp index 4101db86f9..4e41b8944a 100644 --- a/src/solver/mlir_common.cpp +++ b/src/solver/mlir_common.cpp @@ -57,7 +57,7 @@ static const char* DTypeName(miopenDataType_t ty) case miopenBFloat16: return "bf16"; case miopenInt32: return "i32"; case miopenInt8: return "i8"; - case miopenInt8x4: return "i8x4"; + case miopenInt8x4: return "i8x4"; // Support discontinued. case miopenFloat8: return "fp8"; case miopenBFloat8: return "bfp8"; } diff --git a/src/tensor.cpp b/src/tensor.cpp index ca4f1afc7a..df0d7c2819 100644 --- a/src/tensor.cpp +++ b/src/tensor.cpp @@ -50,7 +50,7 @@ bool IsDataTypeSupported(miopenDataType_t t) case miopenFloat8: case miopenBFloat8: case miopenInt8: - case miopenInt8x4: + case miopenInt8x4: // Support discontinued. case miopenBFloat16: case miopenDouble: return true; } diff --git a/test/conv_common.hpp b/test/conv_common.hpp index 99f964a0f6..e3fa9766e6 100644 --- a/test/conv_common.hpp +++ b/test/conv_common.hpp @@ -217,12 +217,7 @@ tensor get_output_tensor(const miopen::ConvolutionDescriptor& filter, ? input.desc.GetLayout(miopen::tensor_layout_get_default(input.desc.GetSize())) : out_layout; return tensor{filter.GetForwardOutputTensorWithLayout( - input.desc, - weights.desc, - yLayout, - weights.desc.GetType() == miopenInt8x4 - ? (std::is_same{} ? miopenInt32 : miopenFloat) - : miopen_type{})}; + input.desc, weights.desc, yLayout, miopen_type{})}; } enum class ConvApi @@ -572,8 +567,7 @@ struct verify_forward_conv : conv_base auto rout = ref_conv_fwd(input, weights, out, filter); if(filter.mode != miopenTranspose) { - bool is_int8 = - weights.desc.GetType() == miopenInt8 || weights.desc.GetType() == miopenInt8x4; + bool is_int8 = weights.desc.GetType() == miopenInt8; bool is_vect_c = weights.desc.GetVectorLength() > 1; rout.par_for_each([&](auto... is) { if(is_int8 && !is_vect_c) @@ -774,7 +768,7 @@ struct verify_forward_conv : conv_base break; case ConvApi::Find_1_0: case ConvApi::Find_2_0: - if(weights.desc.GetType() == miopenInt8 || weights.desc.GetType() == miopenInt8x4) + if(weights.desc.GetType() == miopenInt8) { bool is_transform = (input.desc.GetLengths()[1] % 4 != 0 || is_vect); @@ -786,10 +780,8 @@ struct verify_forward_conv : conv_base in_len[1] = ((in_len[1] + 3) / 4) * 4; wei_len[1] = ((wei_len[1] + 3) / 4) * 4; - miopen::TensorDescriptor input_vpad_desc(is_vect ? miopenInt8x4 : miopenInt8, - in_len); - miopen::TensorDescriptor weight_vpad_desc(is_vect ? miopenInt8x4 : miopenInt8, - wei_len); + miopen::TensorDescriptor input_vpad_desc(miopenInt8, in_len); + miopen::TensorDescriptor weight_vpad_desc(miopenInt8, wei_len); auto input_vpad = tensor{in_len}; auto weights_vpad = tensor{wei_len}; @@ -1738,8 +1730,8 @@ struct verify_forward_conv_int8 : conv_base in_len[1] = ((in_len[1] + 3) / 4) * 4; wei_len[1] = ((wei_len[1] + 3) / 4) * 4; - miopen::TensorDescriptor input_vpad_desc(is_vect ? miopenInt8x4 : miopenInt8, in_len); - miopen::TensorDescriptor weight_vpad_desc(is_vect ? miopenInt8x4 : miopenInt8, wei_len); + miopen::TensorDescriptor input_vpad_desc(miopenInt8, in_len); + miopen::TensorDescriptor weight_vpad_desc(miopenInt8, wei_len); auto input_vpad = tensor{in_len}; auto weights_vpad = tensor{wei_len}; @@ -2046,7 +2038,7 @@ struct conv_driver : test_driver filter.spatialDim = get_spatial_dim(); else filter.spatialDim = filter_dims.size(); - bool is_int8 = (input.desc.GetType() == miopenInt8 || input.desc.GetType() == miopenInt8x4); + bool is_int8 = (input.desc.GetType() == miopenInt8); filter.mode = cmode_lookup[miopen::ToUpper(conv_mode)]; filter.paddingMode = pmode_lookup[miopen::ToUpper(pad_mode)]; @@ -2360,8 +2352,7 @@ struct conv_driver : test_driver bool skip_backward_weights = is_int8; #if TEST_DIRECT_SUPPORTED_CONFIG_ONLY - if(input.desc.GetType() == miopenInt8 || input.desc.GetType() == miopenInt8x4 || - input.desc.GetType() == miopenBFloat16) + if(input.desc.GetType() == miopenInt8 || input.desc.GetType() == miopenBFloat16) { show_command(); std::cout << "Direct path doesn't support Int8 or BFloat16 type." << std::endl; @@ -2405,7 +2396,8 @@ struct conv_driver : test_driver size_t total_mem; if(is_int8) { - // TODO: Tout here was float which should have been int32 + /// \todo Properly construct the `output` tensor descriptor + /// and get rid of this special "int8" stuff. auto output_int8 = get_output_tensor(filter, input, weights, out_layout); const auto problem = ConvProblemDescription{input.desc, @@ -2422,6 +2414,9 @@ struct conv_driver : test_driver } else { + /// \todo Take into account `skip_forward`, `skip_backward_data`, + /// `skip_backward_weights` and use this path to compute `total_mem` for int8 + /// variations. const auto fwd_problem = miopen::conv::ProblemDescription{ input.desc, weights.desc, diff --git a/test/driver.hpp b/test/driver.hpp index 0a8e2d3080..fd83dd1fc5 100644 --- a/test/driver.hpp +++ b/test/driver.hpp @@ -274,7 +274,7 @@ struct test_driver { case miopenHalf: ss << "--half "; break; case miopenBFloat16: ss << "--bfloat16 "; break; - case miopenInt8x4: + case miopenInt8x4: ss << "--UNSUPPORED_TYPE "; break; case miopenInt8: ss << "--int8 "; break; case miopenInt32: ss << "--int32 "; break; case miopenFloat: ss << "--float "; break; @@ -303,7 +303,7 @@ struct test_driver { case miopenHalf: ret.emplace_back("--half"); break; case miopenBFloat16: ret.emplace_back("--bf16"); break; - case miopenInt8x4: + case miopenInt8x4: ret.emplace_back("--UNSUPPORTED_TYPE"); break; case miopenInt8: ret.emplace_back("--int8"); break; case miopenInt32: ret.emplace_back("--int32"); break; case miopenFloat: ret.emplace_back("--float"); break; diff --git a/test/gpu_reference_kernel.cpp b/test/gpu_reference_kernel.cpp index b3677487fe..e166781b9b 100644 --- a/test/gpu_reference_kernel.cpp +++ b/test/gpu_reference_kernel.cpp @@ -297,8 +297,6 @@ static std::string miopen_type_to_string(miopenDataType_t type) return "int32"; if(type == miopenInt8) return "int8"; - if(type == miopenInt8x4) - return "int8x4"; if(type == miopenBFloat16) return "bf16"; return "n/a"; diff --git a/test/gtest/conv_embed_db.cpp b/test/gtest/conv_embed_db.cpp index b69fde1b5e..4672bb0404 100644 --- a/test/gtest/conv_embed_db.cpp +++ b/test/gtest/conv_embed_db.cpp @@ -73,12 +73,12 @@ void Run2dDriver(miopenDataType_t prec) case miopenHalf: params = ConfigWithHalf::GetParam(); break; case miopenInt8: params = ConfigWithInt8::GetParam(); break; case miopenBFloat16: params = ConfigWithBFloat16::GetParam(); break; - case miopenInt8x4: + case miopenInt8x4: // Support discontinued. case miopenInt32: case miopenFloat8: case miopenBFloat8: case miopenDouble: - FAIL() << "miopenInt8x4, miopenInt32, miopenFloat8, miopenBFloat8, miopenDouble data type " + FAIL() << "miopenInt32, miopenFloat8, miopenBFloat8, miopenDouble data type " "not supported by conv_embed_db test"; default: params = ConfigWithFloat::GetParam(); diff --git a/test/gtest/conv_hip_igemm_xdlops.cpp b/test/gtest/conv_hip_igemm_xdlops.cpp index 508624c847..3407446557 100644 --- a/test/gtest/conv_hip_igemm_xdlops.cpp +++ b/test/gtest/conv_hip_igemm_xdlops.cpp @@ -65,10 +65,10 @@ void Run2dDriver(miopenDataType_t prec) case miopenHalf: case miopenBFloat16: case miopenFloat: - case miopenInt8x4: + case miopenInt8x4: // Support discontinued. case miopenInt32: case miopenDouble: - FAIL() << "miopenHalf, miopenBFloat16, miopenFloat, miopenInt8x4, miopenInt32, " + FAIL() << "miopenHalf, miopenBFloat16, miopenFloat, miopenInt32, " "miopenDouble data " "type not supported by " "test_conv_hip_igemm_xdlops test"; diff --git a/test/gtest/conv_igemm_dynamic.cpp b/test/gtest/conv_igemm_dynamic.cpp index 25a4e179c5..59341ac8a0 100644 --- a/test/gtest/conv_igemm_dynamic.cpp +++ b/test/gtest/conv_igemm_dynamic.cpp @@ -68,12 +68,12 @@ void Run2dDriver(miopenDataType_t prec) case miopenHalf: case miopenInt8: case miopenBFloat16: - case miopenInt8x4: + case miopenInt8x4: // Support discontinued. case miopenInt32: case miopenDouble: case miopenFloat8: case miopenBFloat8: - FAIL() << "miopenHalf, miopenInt8, miopenBFloat16, miopenInt8x4, miopenInt32, " + FAIL() << "miopenHalf, miopenInt8, miopenBFloat16, miopenInt32, " "miopenDouble, miopenFloat8, miopenBFloat8 " "data type not supported by conv_igemm_dynamic test"; diff --git a/test/gtest/conv_igemm_mlir.cpp b/test/gtest/conv_igemm_mlir.cpp index d5fc0d426c..29d240645c 100644 --- a/test/gtest/conv_igemm_mlir.cpp +++ b/test/gtest/conv_igemm_mlir.cpp @@ -81,13 +81,13 @@ void Run2dDriver(miopenDataType_t prec) case miopenInt8: params = ConfigWithInt8::GetParam(); break; case miopenFloat: params = ConfigWithFloat::GetParam(); break; case miopenBFloat16: - case miopenInt8x4: + case miopenInt8x4: // Support discontinued. case miopenInt32: case miopenFloat8: case miopenBFloat8: case miopenDouble: MIOPEN_THROW(miopenStatusBadParm, - "miopenBFloat16, miopenInt8x4, miopenInt32, miopenFloat8, miopenBFloat8, " + "miopenBFloat16, miopenInt32, miopenFloat8, miopenBFloat8, " "miopenDouble data type not supported by conv_igemm_mlir test"); default: params = ConfigWithFloat::GetParam(); diff --git a/test/gtest/conv_igemm_mlir_xdlops.cpp b/test/gtest/conv_igemm_mlir_xdlops.cpp index 19913093c0..e2c5a3ed8f 100644 --- a/test/gtest/conv_igemm_mlir_xdlops.cpp +++ b/test/gtest/conv_igemm_mlir_xdlops.cpp @@ -53,13 +53,13 @@ void Run2dDriver(miopenDataType_t prec) case miopenInt8: params = ConfigWithInt8::GetParam(); break; case miopenBFloat16: case miopenFloat: - case miopenInt8x4: + case miopenInt8x4: // Support discontinued. case miopenInt32: case miopenDouble: case miopenFloat8: case miopenBFloat8: MIOPEN_THROW(miopenStatusBadParm, - "miopenBFloat16, miopenFloat, miopenInt8x4, miopenInt32, miopenDouble data " + "miopenBFloat16, miopenFloat, miopenInt32, miopenDouble data " "type not supported by " "conv_igemm_mlir_xdlops test"); diff --git a/test/gtest/conv_trans.cpp b/test/gtest/conv_trans.cpp index 20015336a0..185a459947 100644 --- a/test/gtest/conv_trans.cpp +++ b/test/gtest/conv_trans.cpp @@ -55,10 +55,10 @@ void Run2dDriver(miopenDataType_t prec) case miopenBFloat8: case miopenInt8: case miopenBFloat16: - case miopenInt8x4: + case miopenInt8x4: // Support discontinued. case miopenInt32: case miopenDouble: - FAIL() << "miopenHalf, miopenInt8, miopenBFloat16, miopenInt8x4, miopenInt32, miopenDouble " + FAIL() << "miopenHalf, miopenInt8, miopenBFloat16, miopenInt32, miopenDouble " "data type not supported by " "conv_trans test"; diff --git a/test/gtest/db_sync.cpp b/test/gtest/db_sync.cpp index d7e59468a2..13b647ceac 100644 --- a/test/gtest/db_sync.cpp +++ b/test/gtest/db_sync.cpp @@ -73,7 +73,7 @@ miopenDataType_t GetDataTypeFromString(const std::string& data_type) else if(data_type == "INT8") return miopenInt8; else if(data_type == "INT8x4") - return miopenInt8x4; + return miopenInt8x4; // Support discontinued. Maintain compatibility with old databases. else if(data_type == "INT32") return miopenInt32; else if(data_type == "BF16") diff --git a/test/tensor_holder.hpp b/test/tensor_holder.hpp index 0b05a1e5e8..3fda3b5cf7 100644 --- a/test/tensor_holder.hpp +++ b/test/tensor_holder.hpp @@ -195,9 +195,10 @@ struct tensor tensor(miopen::TensorDescriptor rhs) : desc(std::move(rhs)) { - assert(desc.GetType() == miopen_type{} || - ((miopen_type{} == miopenInt8 || miopen_type{} == miopenInt8x4) && - (desc.GetType() == miopenFloat || desc.GetType() == miopenInt32))); + assert(desc.GetType() == miopen_type{} // + || (miopen_type{} == miopenInt8 // + && (desc.GetType() == miopenFloat // + || desc.GetType() == miopenInt32))); data.resize(desc.GetElementSpace()); } diff --git a/test/tensor_transform.cpp b/test/tensor_transform.cpp index a0585853a0..e87420bb7a 100644 --- a/test/tensor_transform.cpp +++ b/test/tensor_transform.cpp @@ -422,7 +422,7 @@ struct tensor_transform_driver : test_driver } // Test tensor scale addition - if(miopen_type{} == miopenInt8 || miopen_type{} == miopenInt8x4) + if(miopen_type{} == miopenInt8) return; super_src = tensor{superLens_src}.generate(tensor_elem_gen_integer{max_value});