Skip to content

Commit

Permalink
Merge branch 'master' into tj/core/serialize-validation
Browse files Browse the repository at this point in the history
  • Loading branch information
t-jankowski authored Aug 23, 2024
2 parents 1fcccae + fa2d87a commit 8d3bb34
Show file tree
Hide file tree
Showing 28 changed files with 885 additions and 219 deletions.
3 changes: 1 addition & 2 deletions docs/articles_en/about-openvino/performance-benchmarks.rst
Original file line number Diff line number Diff line change
Expand Up @@ -206,12 +206,11 @@ You can also test performance for your system yourself, following the guide on

<h2>Disclaimers</h2>


* Intel® Distribution of OpenVINO™ toolkit performance results are based on release
2024.3, as of July 31, 2024.

* OpenVINO Model Server performance results are based on release
2024.2, as of June 28, 2024.
2024.3, as of Aug. 19, 2024.

The results may not reflect all publicly available updates. Intel technologies' features and
benefits depend on system configuration and may require enabled hardware, software, or service
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,10 @@ After broadcasting input tensors *a* and *b*, *BitwiseLeftShift* performs a bitw
.. note::

If the number of shifts is negative, or if it equals or exceeds the total number of bits in the type **T**, the behavior can be undefined or implementation-defined (depends on the hardware).

Unsigned integer shift is always performed modulo 2^n where n is the number of bits in the type **T**.

When signed integer shift operation overflows (the result does not fit in the result type), the behavior is undefined.

**Attributes**:

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,9 @@ After broadcasting input tensors *a* and *b*, *BitwiseRightShift* performs a bit

If the number of shifts is negative, or if it equals or exceeds the total number of bits in the type **T**, the behavior can be undefined or implementation-defined (depends on the hardware).

Unsigned integer shift is always performed modulo 2^n where n is the number of bits in the type **T**.

When signed integer shift operation overflows (the result does not fit in the result type), the behavior is undefined.

**Attributes**:

Expand Down
Binary file not shown.
Binary file not shown.
Binary file not shown.
156 changes: 78 additions & 78 deletions docs/sphinx_setup/_static/benchmarks_files/OVMS-benchmark-data.csv
Original file line number Diff line number Diff line change
@@ -1,78 +1,78 @@
Network model,Release,IE-Type,Platform name,Throughput-OVMS-INT8,Throughput-OV-INT8,Throughput-OVMS-FP32,Throughput-OV-FP32,
begin_rec,,,,,,,,
bert-base-cased,OV-2024.2,xeon,Intel® Xeon® 8260M CPU-only,,484.765,486.962,181.829,179.94
bert-base-cased,OV-2024.2,xeon,Intel® Xeon® Gold 6238M CPU-only,,430.151,434.276,157.835,158.277
bert-base-cased,OV-2024.2,core,Intel® Core™ i9-11900K CPU-only,,101.044,102.838,35.727,36.57
bert-base-cased,OV-2024.2,core,Intel® Core™ i7-11700K CPU-only,,100.741,103.322,35.046,36.607
bert-base-cased,OV-2024.2,core,Intel® Core™ i3-10100 CPU-only,,26.124,26.329,17.155,17.387
end_rec,,,,,,,,
begin_rec,,,,,,,,
bert-large-uncased,OV-2024.2,xeon,Intel® Xeon® 8260M CPU-only,,41.302,41.994,14.937,14.482
bert-large-uncased,OV-2024.2,xeon,Intel® Xeon® Gold 6238M CPU-only,,36.595,37.1,13.114,13.03
bert-large-uncased,OV-2024.2,core,Intel® Core™ i9-11900K CPU-only,,10.076,10.145,3.267,3.246
bert-large-uncased,OV-2024.2,core,Intel® Core™ i7-11700K CPU-only,,10.161,10.203,3.287,3.26
bert-large-uncased,OV-2024.2,core,Intel® Core™ i3-10100 CPU-only,,2.422,2.424,1.447,1.427
end_rec,,,,,,,,
begin_rec,,,,,,,,
Efficientdet-D0,OV-2024.2,xeon,Intel® Xeon® 8260M CPU-only,,433.166,479.055,285.65,287.547
Efficientdet-D0,OV-2024.2,xeon,Intel® Xeon® Gold 6238M CPU-only,,367.395,407,251.401,257.516
Efficientdet-D0,OV-2024.2,core,Intel® Core™ i9-11900K CPU-only,,132.153,149.424,57.682,61.811
Efficientdet-D0,OV-2024.2,core,Intel® Core™ i7-11700K CPU-only,,124.984,142.514,50.265,53.089
Efficientdet-D0,OV-2024.2,core,Intel® Core™ i3-10100 CPU-only,,47.048,50.328,30.026,31.473
end_rec,,,,,,,,
begin_rec,,,,,,,,
mask_rcnn_resnet50_atrous_coco,OV-2024.2,xeon,Intel® Xeon® 8260M CPU-only,,6.323,6.488,1.978,1.868
mask_rcnn_resnet50_atrous_coco,OV-2024.2,xeon,Intel® Xeon® Gold 6238M CPU-only,,5.637,5.742,1.715,1.633
mask_rcnn_resnet50_atrous_coco,OV-2024.2,core,Intel® Core™ i9-11900K CPU-only,,1.302,1.276,0.396,0.373
mask_rcnn_resnet50_atrous_coco,OV-2024.2,core,Intel® Core™ i7-11700K CPU-only,,1.307,1.28,0.374,0.36
mask_rcnn_resnet50_atrous_coco,OV-2024.2,core,Intel® Core™ i3-10100 CPU-only,,0.381,0.35,0.181,0.15
end_rec,,,,,,,,
begin_rec,,,,,,,,
Mobilenet-V2 ,OV-2024.2,xeon,Intel® Xeon® 8260M CPU-only,,7578.115,12346.3,3354.203,3938.523
Mobilenet-V2 ,OV-2024.2,xeon,Intel® Xeon® Gold 6238M CPU-only,,7513.034,10367.947,2915.906,3349.306
Mobilenet-V2 ,OV-2024.2,core,Intel® Core™ i9-11900K CPU-only,,2152.015,2740.691,745.81,882.839
Mobilenet-V2 ,OV-2024.2,core,Intel® Core™ i7-11700K CPU-only,,2093.311,2822.613,667.391,795.616
Mobilenet-V2 ,OV-2024.2,core,Intel® Core™ i3-10100 CPU-only,,615.392,719.715,381.686,454.574
end_rec,,,,,,,,
begin_rec,,,,,,,,
Resnet-50,OV-2024.2,xeon,Intel® Xeon® 8260M CPU-only,,2354.173,2482.832,639.577,645.443
Resnet-50,OV-2024.2,xeon,Intel® Xeon® Gold 6238M CPU-only,,2070.726,2177.751,571.252,575.778
Resnet-50,OV-2024.2,core,Intel® Core™ i9-11900K CPU-only,,440.402,458.622,114.169,116.577
Resnet-50,OV-2024.2,core,Intel® Core™ i7-11700K CPU-only,,448.464,470.586,111.785,114.628
Resnet-50,OV-2024.2,core,Intel® Core™ i3-10100 CPU-only,,114.267,118.502,57.29,58.233
end_rec,,,,,,,,
begin_rec,,,,,,,,
SSD-Resnet34-1200 ,OV-2024.2,xeon,Intel® Xeon® 8260M CPU-only,,44.587,47.293,12.111,12.248
SSD-Resnet34-1200 ,OV-2024.2,xeon,Intel® Xeon® Gold 6238M CPU-only,,38.784,40.602,10.521,10.613
SSD-Resnet34-1200 ,OV-2024.2,core,Intel® Core™ i9-11900K CPU-only,,7.736,7.821,2.034,2.011
SSD-Resnet34-1200 ,OV-2024.2,core,Intel® Core™ i7-11700K CPU-only,,7.953,8.033,2.083,2.058
SSD-Resnet34-1200 ,OV-2024.2,core,Intel® Core™ i3-10100 CPU-only,,1.951,1.936,1.04,1.014
end_rec,,,,,,,,
begin_rec,,,,,,,,
SSD_Mobilenet_V1_Coco,OV-2024.2,xeon,Intel® Xeon® 8260M CPU-only,,4753.674,4933.241,1370.423,1379.026
SSD_Mobilenet_V1_Coco,OV-2024.2,xeon,Intel® Xeon® Gold 6238M CPU-only,,4165.318,4276.949,1197.151,1222.112
SSD_Mobilenet_V1_Coco,OV-2024.2,core,Intel® Core™ i9-11900K CPU-only,,920.957,1001.56,270.597,281.293
SSD_Mobilenet_V1_Coco,OV-2024.2,core,Intel® Core™ i7-11700K CPU-only,,941.323,1030.464,256.649,266.715
SSD_Mobilenet_V1_Coco,OV-2024.2,core,Intel® Core™ i3-10100 CPU-only,,256.291,266.245,129.84,135.453
end_rec,,,,,,,,
begin_rec,,,,,,,,
Unet-Camvid--0001 ,OV-2024.2,xeon,Intel® Xeon® 8260M CPU-only,,73.871,78.016,18.23,18.374
Unet-Camvid--0001 ,OV-2024.2,xeon,Intel® Xeon® Gold 6238M CPU-only,,64.573,67.713,15.815,16.023
Unet-Camvid--0001 ,OV-2024.2,core,Intel® Core™ i9-11900K CPU-only,,12.572,12.669,3.28,3.254
Unet-Camvid--0001 ,OV-2024.2,core,Intel® Core™ i7-11700K CPU-only,,12.779,12.894,3.315,3.299
Unet-Camvid--0001 ,OV-2024.2,core,Intel® Core™ i3-10100 CPU-only,,2.99,2.971,1.549,1.542
end_rec,,,,,,,,
begin_rec,,,,,,,,
Yolo_V3_Tiny,OV-2024.2,xeon,Intel® Xeon® 8260M CPU-only,,1863.229,2344.128,775.001,786.09
Yolo_V3_Tiny,OV-2024.2,xeon,Intel® Xeon® Gold 6238M CPU-only,,1669.35,2066.437,675.625,703.203
Yolo_V3_Tiny,OV-2024.2,core,Intel® Core™ i9-11900K CPU-only,,430.441,505.532,146.284,151.09
Yolo_V3_Tiny,OV-2024.2,core,Intel® Core™ i7-11700K CPU-only,,419.347,513.112,142.009,148.117
Yolo_V3_Tiny,OV-2024.2,core,Intel® Core™ i3-10100 CPU-only,,117.889,133.312,63.598,69.377
end_rec,,,,,,,,
begin_rec,,,,,,,,
Yolo_V8n,OV-2024.2,xeon,Intel® Xeon® 8260M CPU-only,,705.714,845.484,316.319,388.763
Yolo_V8n,OV-2024.2,xeon,Intel® Xeon® Gold 6238M CPU-only,,641.815,746.965,278.824,338.806
Yolo_V8n,OV-2024.2,core,Intel® Core™ i9-11900K CPU-only,,154.54,205.294,67.744,75.634
Yolo_V8n,OV-2024.2,core,Intel® Core™ i7-11700K CPU-only,,149.289,199.997,65.243,72.677
Yolo_V8n,OV-2024.2,core,Intel® Core™ i3-10100 CPU-only,,55.999,68.155,34.454,38.544
end_rec,,,,,,,,
Network model,Release,IE-Type,Platform name,Throughput-OVMS-INT8,Throughput-OV-INT8,Throughput-OVMS-FP32,Throughput-OV-FP32,UOM_T
begin_rec,,,,,,,,
bert-base-cased,OV-2024.3,xeon,Intel® Xeon® 8260M CPU-only,479.649,482.878,180.7,179.541,FPS
bert-base-cased,OV-2024.3,xeon,Intel® Xeon® Gold 6238M CPU-only,428.173,430.397,156.73,159.276,FPS
bert-base-cased,OV-2024.3,core,Intel® Core™ i9-11900K CPU-only,100.783,101.983,35.711,36.35,FPS
bert-base-cased,OV-2024.3,core,Intel® Core™ i7-11700K CPU-only,98.441,102.62,34.303,36.096,FPS
bert-base-cased,OV-2024.3,core,Intel® Core™ i3-10100 CPU-only,26.185,26.436,17.108,17.395,FPS
end_rec,,,,,,,,
begin_rec,,,,,,,,
bert-large-uncased,OV-2024.3,xeon,Intel® Xeon® 8260M CPU-only,41.872,42.401,14.949,14.473,FPS
bert-large-uncased,OV-2024.3,xeon,Intel® Xeon® Gold 6238M CPU-only,37.05,37.864,13.075,13.031,FPS
bert-large-uncased,OV-2024.3,core,Intel® Core™ i9-11900K CPU-only,10.047,10.111,3.259,3.237,FPS
bert-large-uncased,OV-2024.3,core,Intel® Core™ i7-11700K CPU-only,9.961,10.167,3.236,3.224,FPS
bert-large-uncased,OV-2024.3,core,Intel® Core™ i3-10100 CPU-only,2.43,2.427,1.447,1.428,FPS
end_rec,,,,,,,,
begin_rec,,,,,,,,
Efficientdet-D0,OV-2024.3,xeon,Intel® Xeon® 8260M CPU-only,439.435,485.287,274.772,272.856,FPS
Efficientdet-D0,OV-2024.3,xeon,Intel® Xeon® Gold 6238M CPU-only,376.1,415.275,253.829,259.188,FPS
Efficientdet-D0,OV-2024.3,core,Intel® Core™ i9-11900K CPU-only,131.735,148.558,57.036,59.907,FPS
Efficientdet-D0,OV-2024.3,core,Intel® Core™ i7-11700K CPU-only,119.798,140.129,,,FPS
Efficientdet-D0,OV-2024.3,core,Intel® Core™ i3-10100 CPU-only,47.382,50.573,30.226,31.492,FPS
end_rec,,,,,,,,
begin_rec,,,,,,,,
mask_rcnn_resnet50_atrous_coco,OV-2024.3,xeon,Intel® Xeon® 8260M CPU-only,6.306,6.364,1.96,1.868,FPS
mask_rcnn_resnet50_atrous_coco,OV-2024.3,xeon,Intel® Xeon® Gold 6238M CPU-only,5.652,5.771,1.714,1.639,FPS
mask_rcnn_resnet50_atrous_coco,OV-2024.3,core,Intel® Core™ i9-11900K CPU-only,1.309,1.267,0.396,0.371,FPS
mask_rcnn_resnet50_atrous_coco,OV-2024.3,core,Intel® Core™ i7-11700K CPU-only,1.293,1.271,0.355,0.346,FPS
mask_rcnn_resnet50_atrous_coco,OV-2024.3,core,Intel® Core™ i3-10100 CPU-only,0.38,0.352,0.182,0.151,FPS
end_rec,,,,,,,,
begin_rec,,,,,,,,
Mobilenet-V2 ,OV-2024.3,xeon,Intel® Xeon® 8260M CPU-only,7563.199,12406.597,3336.015,3972.673,FPS
Mobilenet-V2 ,OV-2024.3,xeon,Intel® Xeon® Gold 6238M CPU-only,7475.62,10373.146,2934.976,3381.725,FPS
Mobilenet-V2 ,OV-2024.3,core,Intel® Core™ i9-11900K CPU-only,2158.818,2742.363,740.988,874.037,FPS
Mobilenet-V2 ,OV-2024.3,core,Intel® Core™ i7-11700K CPU-only,2042.633,2809.471,631.59,759.984,FPS
Mobilenet-V2 ,OV-2024.3,core,Intel® Core™ i3-10100 CPU-only,614.174,718.416,381.882,455.793,FPS
end_rec,,,,,,,,
begin_rec,,,,,,,,
Resnet-50,OV-2024.3,xeon,Intel® Xeon® 8260M CPU-only,2356.238,2483.3,628.616,635.411,FPS
Resnet-50,OV-2024.3,xeon,Intel® Xeon® Gold 6238M CPU-only,2071.836,2202.317,568.945,575.057,FPS
Resnet-50,OV-2024.3,core,Intel® Core™ i9-11900K CPU-only,440.533,458.665,113.442,116.116,FPS
Resnet-50,OV-2024.3,core,Intel® Core™ i7-11700K CPU-only,441.7,469.848,107.395,113.605,FPS
Resnet-50,OV-2024.3,core,Intel® Core™ i3-10100 CPU-only,114.045,118.024,57.165,58.366,FPS
end_rec,,,,,,,,
begin_rec,,,,,,,,
SSD-Resnet34-1200 ,OV-2024.3,xeon,Intel® Xeon® 8260M CPU-only,44.499,47.251,12.074,12.167,FPS
SSD-Resnet34-1200 ,OV-2024.3,xeon,Intel® Xeon® Gold 6238M CPU-only,38.714,40.662,10.504,10.653,FPS
SSD-Resnet34-1200 ,OV-2024.3,core,Intel® Core™ i9-11900K CPU-only,7.756,7.818,2.029,2.005,FPS
SSD-Resnet34-1200 ,OV-2024.3,core,Intel® Core™ i7-11700K CPU-only,7.929,8.032,2.072,2.054,FPS
SSD-Resnet34-1200 ,OV-2024.3,core,Intel® Core™ i3-10100 CPU-only,1.947,1.937,1.037,1.008,FPS
end_rec,,,,,,,,
begin_rec,,,,,,,,
SSD_Mobilenet_V1_Coco,OV-2024.3,xeon,Intel® Xeon® 8260M CPU-only,4732.691,4875.291,1362.268,1375.237,FPS
SSD_Mobilenet_V1_Coco,OV-2024.3,xeon,Intel® Xeon® Gold 6238M CPU-only,4168.575,4279.825,1199.883,1226.189,FPS
SSD_Mobilenet_V1_Coco,OV-2024.3,core,Intel® Core™ i9-11900K CPU-only,921.041,1001.672,268.066,280.987,FPS
SSD_Mobilenet_V1_Coco,OV-2024.3,core,Intel® Core™ i7-11700K CPU-only,915.4,1028.233,244.534,260.822,FPS
SSD_Mobilenet_V1_Coco,OV-2024.3,core,Intel® Core™ i3-10100 CPU-only,256.018,266.401,129.917,135.312,FPS
end_rec,,,,,,,,
begin_rec,,,,,,,,
Unet-Camvid--0001 ,OV-2024.3,xeon,Intel® Xeon® 8260M CPU-only,73.429,77.693,18.104,17.938,FPS
Unet-Camvid--0001 ,OV-2024.3,xeon,Intel® Xeon® Gold 6238M CPU-only,64.29,67.517,15.777,15.927,FPS
Unet-Camvid--0001 ,OV-2024.3,core,Intel® Core™ i9-11900K CPU-only,12.574,12.628,3.267,3.253,FPS
Unet-Camvid--0001 ,OV-2024.3,core,Intel® Core™ i7-11700K CPU-only,12.718,12.881,3.272,3.297,FPS
Unet-Camvid--0001 ,OV-2024.3,core,Intel® Core™ i3-10100 CPU-only,2.995,2.976,1.555,1.53,FPS
end_rec,,,,,,,,
begin_rec,,,,,,,,
Yolo_V3_Tiny,OV-2024.3,xeon,Intel® Xeon® 8260M CPU-only,1842.129,2317.052,755.451,777.681,FPS
Yolo_V3_Tiny,OV-2024.3,xeon,Intel® Xeon® Gold 6238M CPU-only,1667.812,2056.27,675.447,704.412,FPS
Yolo_V3_Tiny,OV-2024.3,core,Intel® Core™ i9-11900K CPU-only,431.387,504.093,145.92,151.499,FPS
Yolo_V3_Tiny,OV-2024.3,core,Intel® Core™ i7-11700K CPU-only,409.268,516.794,139.903,147.235,FPS
Yolo_V3_Tiny,OV-2024.3,core,Intel® Core™ i3-10100 CPU-only,117.276,133.578,65.341,69.29,FPS
end_rec,,,,,,,,
begin_rec,,,,,,,,
Yolo_V8n,OV-2024.3,xeon,Intel® Xeon® 8260M CPU-only,,,314.652,386.299,FPS
Yolo_V8n,OV-2024.34,xeon,Intel® Xeon® Gold 6238M CPU-only,,,282.302,340.845,FPS
Yolo_V8n,OV-2024.65,core,Intel® Core™ i9-11900K CPU-only,153.817,204.691,67.421,74.996,FPS
Yolo_V8n,OV-2024.96,core,Intel® Core™ i7-11700K CPU-only,143.19,197.409,62.948,70.913,FPS
Yolo_V8n,OV-2024.127,core,Intel® Core™ i3-10100 CPU-only,56.244,67.968,34.396,38.576,FPS
end_rec,,,,,,,,
4 changes: 4 additions & 0 deletions src/plugins/intel_cpu/src/cpu_types.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,8 @@ static const TypeToNameMap& get_type_to_name_tbl() {
{"BitwiseNot", Type::Eltwise},
{"BitwiseOr", Type::Eltwise},
{"BitwiseXor", Type::Eltwise},
{"BitwiseLeftShift", Type::Eltwise},
{"BitwiseRightShift", Type::Eltwise},
{"Reshape", Type::Reshape},
{"Squeeze", Type::Reshape},
{"Unsqueeze", Type::Reshape},
Expand Down Expand Up @@ -445,6 +447,8 @@ std::string algToString(const Algorithm alg) {
CASE(EltwiseBitwiseNot);
CASE(EltwiseBitwiseOr);
CASE(EltwiseBitwiseXor);
CASE(EltwiseBitwiseLeftShift);
CASE(EltwiseBitwiseRightShift);
CASE(FQCommon);
CASE(FQQuantization);
CASE(FQBinarization);
Expand Down
2 changes: 2 additions & 0 deletions src/plugins/intel_cpu/src/cpu_types.h
Original file line number Diff line number Diff line change
Expand Up @@ -201,6 +201,8 @@ enum class Algorithm {
EltwiseBitwiseNot,
EltwiseBitwiseOr,
EltwiseBitwiseXor,
EltwiseBitwiseLeftShift,
EltwiseBitwiseRightShift,

// FakeQuantize algorithms
FQCommon,
Expand Down
4 changes: 2 additions & 2 deletions src/plugins/intel_cpu/src/dnnl_extension_utils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -266,8 +266,8 @@ bool DnnlExtensionUtils::isUnarySupportedAsPostOp(Algorithm alg) {
#endif
}

std::string DnnlExtensionUtils::computeWeightsStringHash(const std::shared_ptr<const IMemory> memory,
const std::shared_ptr<DnnlMemoryDesc> dstDesc) {
std::string DnnlExtensionUtils::computeWeightsStringHash(const std::shared_ptr<const IMemory>& memory,
const std::shared_ptr<DnnlMemoryDesc>& dstDesc) {
const auto desc_hash = dnnl::impl::primitive_hashing::get_md_hash(*dstDesc->getDnnlDesc().get());
return std::to_string(desc_hash) + "_" + std::to_string(reinterpret_cast<uint64_t>(memory->getData()));
}
Expand Down
4 changes: 3 additions & 1 deletion src/plugins/intel_cpu/src/dnnl_extension_utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -102,13 +102,15 @@ class DnnlExtensionUtils {
static dnnl_memory_desc_t clone_desc(const_dnnl_memory_desc_t cdesc);
static const char* query_pd_info(const_dnnl_primitive_desc_t pd);
static bool isUnarySupportedAsPostOp(Algorithm alg);

/**
* @brief Computes weights string hash based on weights memory and requested descriptor
* @param memory Weights memory pointer
* @param dstDesc descriptor defining weights representation after repacking
* @return string hash
*/
static std::string computeWeightsStringHash(const std::shared_ptr<const IMemory> memory, const std::shared_ptr<DnnlMemoryDesc> dstDesc);
static std::string computeWeightsStringHash(const std::shared_ptr<const IMemory>& memory,
const std::shared_ptr<DnnlMemoryDesc>& dstDesc);
};

} // namespace intel_cpu
Expand Down
2 changes: 1 addition & 1 deletion src/plugins/intel_cpu/src/node.h
Original file line number Diff line number Diff line change
Expand Up @@ -737,7 +737,7 @@ class Node {
impl_desc_type implType);

void prepareMemory(const std::vector<DnnlMemoryDescPtr>& intDescs);
void prepareMemory(const DnnlMemoryDescPtr& intDesc, size_t indx);
virtual void prepareMemory(const DnnlMemoryDescPtr& intDesc, size_t indx);
void prepareMemory(dnnl::primitive_desc_iterator& itpd);

MemoryPtr prepareWeightMemory(DnnlMemoryDescPtr dstWeightDesc, DnnlMemoryDescPtr srcWeightDesc = nullptr);
Expand Down
12 changes: 6 additions & 6 deletions src/plugins/intel_cpu/src/nodes/batch_to_space.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -183,11 +183,11 @@ void BatchToSpace::batchToSpaceKernel() {
begin[1] = (blockShape[1] - 1 - oAdd[1]) / blockShape[1] / blockSize;
finish[1] = (outShape5D[1] - 1 - oAdd[1]) / blockShape[1] / blockSize;
begin[2] = (blockShape[2] - 1 - oAdd[2]) / blockShape[2];
finish[2] = (outShape5D[2] - 1 - oAdd[2]) / blockShape[2];
finish[2] = (outShape5D[2] + blockShape[2] - 1 - oAdd[2]) / blockShape[2];
begin[3] = (blockShape[3] - 1 - oAdd[3]) / blockShape[3];
finish[3] = (outShape5D[3] - 1 - oAdd[3]) / blockShape[3];
finish[3] = (outShape5D[3] + blockShape[3] - 1 - oAdd[3]) / blockShape[3];
begin[4] = (blockShape[4] - 1 - oAdd[4]) / blockShape[4];
finish[4] = (outShape5D[4] - 1 - oAdd[4]) / blockShape[4];
finish[4] = (outShape5D[4] + blockShape[4] - 1 - oAdd[4]) / blockShape[4];
const int64_t addTmpOC = blocked ? 0lu : oAdd[1];
const int64_t addTmpOc = blocked ? oAdd[1] : 0lu;
indxStart[1] = begin[1] > indxStart[1] ? begin[1] : indxStart[1];
Expand All @@ -198,15 +198,15 @@ void BatchToSpace::batchToSpaceKernel() {
const size_t srcIdx1 = srcIdx0 + indxStart[1] * inSpatialStep * blockSize;
const size_t dstIdx1 = dstIdx0 + tmpOC * outSpatialStep * blockSize;
const size_t itEnd = blocked ? ((block - 1) * blockShape[1] + oAdd[1]) / blockSize : 0lu;
for (size_t i2 = begin[2]; i2 < finish[2] + 1; ++i2) {
for (size_t i2 = begin[2]; i2 < finish[2]; ++i2) {
const int64_t tmpOd = i2 * blockShape[2] + oAdd[2];
const size_t srcIdx2 = srcIdx1 + i2 * inShape5D[3] * inShape5D[4] * blockSize;
const size_t dstIdx2 = dstIdx1 + tmpOd * outShape5D[3] * outShape5D[4] * blockSize;
for (size_t i3 = begin[3]; i3 < finish[3] + 1; ++i3) {
for (size_t i3 = begin[3]; i3 < finish[3]; ++i3) {
const int64_t tmpOh = i3 * blockShape[3] + oAdd[3];
const size_t srcIdx3 = srcIdx2 + i3 * inShape5D[4] * blockSize;
const size_t dstIdx3 = dstIdx2 + tmpOh * outShape5D[4] * blockSize;
for (size_t i4 = begin[4]; i4 < finish[4] + 1; ++i4) {
for (size_t i4 = begin[4]; i4 < finish[4]; ++i4) {
const int64_t tmpOw = i4 * blockShape[4] + oAdd[4];
const size_t srcIdx4 = srcIdx3 + i4 * blockSize;
const size_t dstIdx4 = dstIdx3 + tmpOw * blockSize;
Expand Down
6 changes: 6 additions & 0 deletions src/plugins/intel_cpu/src/nodes/batch_to_space.h
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,12 @@ class BatchToSpace : public Node {

void getSupportedDescriptors() override {};
void initSupportedPrimitiveDescriptors() override;

// output shape can potentially be empty
bool isExecutable() const override {
return !hasEmptyInputTensors() && !hasEmptyOutputTensors();
}

void execute(dnnl::stream strm) override;
bool created() const override;

Expand Down
Loading

0 comments on commit 8d3bb34

Please sign in to comment.