From ab0dc80bc6a1fb0bded10b1d01cc926cc2769c22 Mon Sep 17 00:00:00 2001
From: sivanov-work <sergey.ivanov@intel.com>
Date: Mon, 27 Nov 2023 15:38:31 +0000
Subject: [PATCH 1/5] Set batched input

---
 .../cpp/main.cpp                              | 142 +++++++++++-------
 .../cpp/models/include/models/image_model.h   |   4 +-
 .../cpp/models/include/models/model_base.h    |   3 +
 demos/common/cpp/models/src/image_model.cpp   |  54 +++++++
 .../include/pipelines/async_pipeline.h        |   3 +
 .../pipelines/include/pipelines/metadata.h    |  23 +++
 .../cpp/pipelines/src/async_pipeline.cpp      |  54 +++++++
 7 files changed, 226 insertions(+), 57 deletions(-)
diff --git a/demos/classification_benchmark_demo/cpp/main.cpp b/demos/classification_benchmark_demo/cpp/main.cpp
index 42dbb6d3e74..c835757c9a8 100644
--- a/demos/classification_benchmark_demo/cpp/main.cpp
+++ b/demos/classification_benchmark_demo/cpp/main.cpp
@@ -51,6 +51,7 @@ static const char target_device_message[] = "Optional. Specify the target device
 static const char num_threads_message[] = "Optional. Specify count of threads.";
 static const char num_streams_message[] = "Optional. Specify count of streams.";
 static const char num_inf_req_message[] = "Optional. Number of infer requests.";
+static const char num_inf_req_per_batch_message[] = "Optional. Number of infer requests per batch.";
 static const char image_grid_resolution_message[] = "Optional. Set image grid resolution in format WxH. "
                                                     "Default value is 1280x720.";
 static const char ntop_message[] = "Optional. Number of top results. Default value is 5. Must be >= 1.";
@@ -75,6 +76,7 @@ DEFINE_string(d, "CPU", target_device_message);
 DEFINE_uint32(nthreads, 0, num_threads_message);
 DEFINE_string(nstreams, "", num_streams_message);
 DEFINE_uint32(nireq, 0, num_inf_req_message);
+DEFINE_uint32(nireq_per_batch, 2, num_inf_req_per_batch_message);
 DEFINE_uint32(nt, 5, ntop_message);
 DEFINE_string(res, "1280x720", image_grid_resolution_message);
 DEFINE_bool(auto_resize, false, input_resizable_message);
@@ -265,6 +267,15 @@ int main(int argc, char* argv[]) {
         std::size_t nextImageIndex = 0;
         std::chrono::steady_clock::time_point startTime = std::chrono::steady_clock::now();
 
+        // batch setup
+        std::vector<InputData> inputDataVector {inputImages.begin(), inputImages.end()};
+        auto inputImagesBeginIt = inputDataVector.begin();
+        auto inputImagesEndIt = inputDataVector.begin();
+        std::advance(inputImagesEndIt, FLAGS_nireq_per_batch);
+
+        auto classIndicesBeginIt = classIndices.begin();
+        auto classIndicesEndIt = classIndicesBeginIt;
+        std::advance(classIndicesEndIt, FLAGS_nireq_per_batch);
         while (keepRunning && elapsedSeconds < std::chrono::seconds(FLAGS_time)) {
             if (elapsedSeconds >= testDuration - fpsCalculationDuration && framesNumOnCalculationStart == 0) {
                 framesNumOnCalculationStart = framesNum;
@@ -287,14 +298,30 @@ int main(int argc, char* argv[]) {
             if (pipeline.isReadyToProcess()) {
                 auto imageStartTime = std::chrono::steady_clock::now();
 
-                pipeline.submitData(ImageInputData(inputImages[nextImageIndex]),
-                                    std::make_shared<ClassificationImageMetaData>(inputImages[nextImageIndex],
+                pipeline.submitData(inputImagesBeginIt, inputImagesEndIt,
+                                    std::make_shared<ClassificationImageMetaData>(inputImagesBeginIt, inputImagesEndIt
                                                                                   imageStartTime,
-                                                                                  classIndices[nextImageIndex]));
-                nextImageIndex++;
-                if (nextImageIndex == imageNames.size()) {
-                    nextImageIndex = 0;
+                                                                                  classIndicesBeginIt, classIndicesEndIt));
+                //nextImageIndex++;
+                //if (nextImageIndex == imageNames.size()) {
+                    //nextImageIndex = 0;
+                //}
+
+                ++inputImagesBeginIt;
+                ++inputImagesEndIt;
+                ++classIndicesBeginIt;
+                ++classIndicesEndIt;
+
+                if (inputImagesEndIt == inputDataVector.end()) {
+                    inputImagesBeginIt = inputDataVector.begin();
+                    inputImagesEndIt = inputImagesBeginIt;
+                    std::advance(inputImagesEndIt, FLAGS_nireq_per_batch);
+
+                    classIndicesBeginIt = classIndices.begin();
+                    classIndicesEndIt = classIndicesBeginIt;
+                    std::advance(classIndicesEndIt, FLAGS_nireq_per_batch);
                 }
+
             }
 
             //--- Waiting for free input slot or output data available. Function will return immediately if any of them
@@ -308,58 +335,61 @@ int main(int argc, char* argv[]) {
                 if (!classificationResult.metaData) {
                     throw std::invalid_argument("Renderer: metadata is null");
                 }
-                const ClassificationImageMetaData& classificationImageMetaData =
-                    classificationResult.metaData->asRef<const ClassificationImageMetaData>();
-
-                auto outputImg = classificationImageMetaData.img;
-
-                if (outputImg.empty()) {
-                    throw std::invalid_argument("Renderer: image provided in metadata is empty");
-                }
-                PredictionResult predictionResult = PredictionResult::Incorrect;
-                std::string label = classificationResult.topLabels.front().label;
-                if (!FLAGS_gt.empty()) {
-                    for (size_t i = 0; i < FLAGS_nt; i++) {
-                        unsigned predictedClass = classificationResult.topLabels[i].id;
-                        if (predictedClass == classificationImageMetaData.groundTruthId) {
-                            predictionResult = PredictionResult::Correct;
-                            correctPredictionsCount++;
-                            label = classificationResult.topLabels[i].label;
-                            break;
-                        }
+                const ClassificationImageBatchMetaData& classificationImageBatchMetaData =
+                    classificationResult.metaData->asRef<const ClassificationImageBatchMetaData>();
+
+                //auto outputImg = classificationImageMetaData.img;
+                const std::vector<ClassificationImageMetaData> &outputImagesMD = classificationImageBatchMetaData.metadatas;
+                for (const ClassificationImageMetaData &classificationImageMetaData : outputImagesMD) {
+                    auto outputImg = classificationImageMetaData.img;
+                    if (outputImg.empty()) {
+                        throw std::invalid_argument("Renderer: image provided in metadata is empty");
                     }
-                } else {
-                    predictionResult = PredictionResult::Unknown;
-                }
-                framesNum++;
-                gridMat.updateMat(outputImg, label, predictionResult);
-                accuracy = static_cast<double>(correctPredictionsCount) / framesNum;
-                gridMat.textUpdate(metrics,
-                                   classificationResult.metaData->asRef<ImageMetaData>().timeStamp,
-                                   accuracy,
-                                   FLAGS_nt,
-                                   isTestMode,
-                                   !FLAGS_gt.empty(),
-                                   presenter);
-                renderMetrics.update(renderingStart);
-                elapsedSeconds = std::chrono::steady_clock::now() - startTime;
-                if (!FLAGS_no_show) {
-                    cv::imshow("classification_demo", gridMat.outImg);
-                    //--- Processing keyboard events
-                    int key = cv::waitKey(1);
-                    if (27 == key || 'q' == key || 'Q' == key) {  // Esc
-                        keepRunning = false;
-                    } else if (32 == key || 'r' == key ||
-                               'R' == key) {  // press space or r to restart testing if needed
-                        isTestMode = true;
-                        framesNum = 0;
-                        framesNumOnCalculationStart = 0;
-                        correctPredictionsCount = 0;
-                        accuracy = 0;
-                        elapsedSeconds = std::chrono::steady_clock::duration(0);
-                        startTime = std::chrono::steady_clock::now();
+                    PredictionResult predictionResult = PredictionResult::Incorrect;
+                    std::string label = classificationResult.topLabels.front().label;
+                    if (!FLAGS_gt.empty()) {
+                        for (size_t i = 0; i < FLAGS_nt; i++) {
+                            unsigned predictedClass = classificationResult.topLabels[i].id;
+                            if (predictedClass == classificationImageMetaData.groundTruthId) {
+                                predictionResult = PredictionResult::Correct;
+                                correctPredictionsCount++;
+                                label = classificationResult.topLabels[i].label;
+                                break;
+                            }
+                        }
                     } else {
-                        presenter.handleKey(key);
+                        predictionResult = PredictionResult::Unknown;
+                    }
+                    framesNum += 1;
+                    gridMat.updateMat(outputImg, label, predictionResult);
+                    accuracy = static_cast<double>(correctPredictionsCount) / framesNum;
+                    gridMat.textUpdate(metrics,
+                                       classificationResult.metaData->asRef<ImageMetaData>().timeStamp,
+                                       accuracy,
+                                       FLAGS_nt,
+                                       isTestMode,
+                                       !FLAGS_gt.empty(),
+                                       presenter);
+                    renderMetrics.update(renderingStart);
+                    elapsedSeconds = std::chrono::steady_clock::now() - startTime;
+                    if (!FLAGS_no_show) {
+                        cv::imshow("classification_demo", gridMat.outImg);
+                        //--- Processing keyboard events
+                        int key = cv::waitKey(1);
+                        if (27 == key || 'q' == key || 'Q' == key) {  // Esc
+                            keepRunning = false;
+                        } else if (32 == key || 'r' == key ||
+                                'R' == key) {  // press space or r to restart testing if needed
+                            isTestMode = true;
+                            framesNum = 0;
+                            framesNumOnCalculationStart = 0;
+                            correctPredictionsCount = 0;
+                            accuracy = 0;
+                            elapsedSeconds = std::chrono::steady_clock::duration(0);
+                            startTime = std::chrono::steady_clock::now();
+                        } else {
+                            presenter.handleKey(key);
+                        }
                     }
                 }
             }
diff --git a/demos/common/cpp/models/include/models/image_model.h b/demos/common/cpp/models/include/models/image_model.h
index aa8ab609b9e..707c16d0820 100644
--- a/demos/common/cpp/models/include/models/image_model.h
+++ b/demos/common/cpp/models/include/models/image_model.h
@@ -38,7 +38,9 @@ class ImageModel : public ModelBase {
     ImageModel(const std::string& modelFileName, bool useAutoResize, const std::string& layout = "");
 
     std::shared_ptr<InternalModelData> preprocess(const InputData& inputData, ov::InferRequest& request) override;
-
+    std::shared_ptr<InternalModelData> preprocess(std::vector<InputData>::iterator inputDataBegin,
+                                                  std::vector<InputData>::iterator inputDataEnd,
+                                                  ov::InferRequest& request) override;
 protected:
     bool useAutoResize;
 
diff --git a/demos/common/cpp/models/include/models/model_base.h b/demos/common/cpp/models/include/models/model_base.h
index 00e5a3bc104..2487871cd5a 100644
--- a/demos/common/cpp/models/include/models/model_base.h
+++ b/demos/common/cpp/models/include/models/model_base.h
@@ -40,6 +40,9 @@ class ModelBase {
     virtual ~ModelBase() {}
 
     virtual std::shared_ptr<InternalModelData> preprocess(const InputData& inputData, ov::InferRequest& request) = 0;
+    virtual std::shared_ptr<InternalModelData> preprocess(std::vector<InputData>::iterator inputDataBegin,
+                                                          std::vector<InputData>::iterator inputDataEnd,
+                                                          ov::InferRequest& request) {};
     virtual ov::CompiledModel compileModel(const ModelConfig& config, ov::Core& core);
     virtual void onLoadCompleted(const std::vector<ov::InferRequest>& requests) {}
     virtual std::unique_ptr<ResultBase> postprocess(InferenceResult& infResult) = 0;
diff --git a/demos/common/cpp/models/src/image_model.cpp b/demos/common/cpp/models/src/image_model.cpp
index 817c94d9c79..3271f8b8fed 100644
--- a/demos/common/cpp/models/src/image_model.cpp
+++ b/demos/common/cpp/models/src/image_model.cpp
@@ -32,6 +32,60 @@ ImageModel::ImageModel(const std::string& modelFileName, bool useAutoResize, con
     : ModelBase(modelFileName, layout),
       useAutoResize(useAutoResize) {}
 
+std::shared_ptr<InternalModelData> ImageModel::preprocess(std::vector<InputData>::iterator inputDataBegin,
+                                                          std::vector<InputData>::iterator inputDataEnd,
+                                                          ov::InferRequest& request) {
+
+    const ov::Tensor& frameTensor = request.get_tensor(inputsNames[0]);  // first input should be image
+    const ov::Shape& tensorShape = frameTensor.get_shape();
+    const ov::Layout layout("NHWC");
+    const size_t batch = tensorShape[ov::layout::batch_idx(layout)];
+    const size_t width = tensorShape[ov::layout::width_idx(layout)];
+    const size_t height = tensorShape[ov::layout::height_idx(layout)];
+    const size_t channels = tensorShape[ov::layout::channels_idx(layout)];
+
+    char* memoryBlob = nullptr;
+    size_t image_index = 0;
+    bool isMatFloat = false;
+    for (auto inputDataIt = inputDataBegin; inputDataIt != inputDataEnd; ++inputDataIt ) {
+        const auto& origImg = inputDataIt->asRef<ImageInputData>().inputImage;
+        auto img = inputTransform(origImg);
+
+        auto matType = mat.type() & CV_MAT_DEPTH_MASK;
+        if (matType != CV_8U && matType != CV_32F) {
+            throw std::runtime_error("Unsupported mat type for wrapping");
+        }
+        isMatFloat = matType == CV_32F;
+
+        if (!useAutoResize) {
+            // /* Resize and copy data from the image to the input tensor */
+
+            if (static_cast<size_t>(img.channels()) != channels) {
+                throw std::runtime_error(std::string("The number of channels for model input: ") +
+                                         std::to_string(channels) + " and image: " +
+                                         std::to_string(img.channels()) + " - must match");
+            }
+            if (channels != 1 && channels != 3) {
+                throw std::runtime_error("Unsupported number of channels");
+            }
+            img = resizeImageExt(img, width, height, resizeMode, interpolationMode);
+        }
+        size_t sizeInBytes = img.total() * img.elemSize();
+        if (!memoryBlob) {
+            memoryBlob = new char[sizeInBytes * batch]; // intended memory leak
+        }
+
+        // fill continuous batch
+        memcpy(memoryBlob + sizeInBytes * image_index, img.ptr(), sizeInBytes);
+        image_index++;
+    }
+
+    auto precision = isMatFloat ? ov::element::f32 : ov::element::u8;
+    auto batched_tensor =  ov::Tensor(precision, ov::Shape{ batch, height, width, channels }, memoryBlob);
+    request.set_tensor(inputsNames[0],batched_tensor);
+    return std::make_shared<InternalImageModelData>(origImg.cols, origImg.rows);
+}
+
 std::shared_ptr<InternalModelData> ImageModel::preprocess(const InputData& inputData, ov::InferRequest& request) {
     const auto& origImg = inputData.asRef<ImageInputData>().inputImage;
     auto img = inputTransform(origImg);
diff --git a/demos/common/cpp/pipelines/include/pipelines/async_pipeline.h b/demos/common/cpp/pipelines/include/pipelines/async_pipeline.h
index 308c65cc8b8..28b3820a65f 100644
--- a/demos/common/cpp/pipelines/include/pipelines/async_pipeline.h
+++ b/demos/common/cpp/pipelines/include/pipelines/async_pipeline.h
@@ -75,6 +75,9 @@ class AsyncPipeline {
     /// Otherwise returns unique sequential frame ID for this particular request. Same frame ID will be written in the
     /// result structure.
     virtual int64_t submitData(const InputData& inputData, const std::shared_ptr<MetaData>& metaData);
+    virtual int64_t submitData(std::vector<InputData>::iterator inputDataBegin,
+                               std::vector<InputData>::iterator inputDataEnd,
+                               const std::shared_ptr<MetaData>& metaData);
 
     /// Gets available data from the queue
     /// @param shouldKeepOrder if true, function will treat results as ready only if next sequential result (frame) is
diff --git a/demos/common/cpp/pipelines/include/pipelines/metadata.h b/demos/common/cpp/pipelines/include/pipelines/metadata.h
index 4bbe1456a47..64e714c09dc 100644
--- a/demos/common/cpp/pipelines/include/pipelines/metadata.h
+++ b/demos/common/cpp/pipelines/include/pipelines/metadata.h
@@ -49,3 +49,26 @@ struct ClassificationImageMetaData : public ImageMetaData {
         : ImageMetaData(img, timeStamp),
           groundTruthId(groundTruthId) {}
 };
+
+
+struct ClassificationImageBatchMetaData : public MetaData {
+    std::vector<std::shared_ptr<ClassificationImageMetaData>> metadatas;
+
+    ClassificationImageMetaData(const std::vector<cv::Mat>::iterator imagesBeginIt,
+                                const std::vector<cv::Mat>::iterator imagesEndIt,
+                                std::chrono::steady_clock::time_point timeStamp,
+                                std::vector<unsigned int>::iterator groundTruthIdsBeginIt,
+                                const std::vector<unsigned int>::iterator groundTruthIdsEndIt)
+        : MetaData(){
+        size_t images_count = std::distance(imagesBeginIt, imagesEndIt);
+        size_t gt_count = std::distance(groundTruthIdsBeginIt, groundTruthIdsEndIt);
+        if (images_count != gt_count) {
+            throw std::runtime_error("images.size() != groundTruthIds.size()");
+        }
+
+        metadatas.reserve(images_count);
+        for (; imagesBeginIt != imagesEndIt;) {
+            metadatas.push_back(std::make_shared<ClassificationImageMetaData>(*it++, timeStamp, *groundTruthIdsBeginIt++));
+        }
+    }
+};
diff --git a/demos/common/cpp/pipelines/src/async_pipeline.cpp b/demos/common/cpp/pipelines/src/async_pipeline.cpp
index 42ff6bbd7fe..ef799b2897f 100644
--- a/demos/common/cpp/pipelines/src/async_pipeline.cpp
+++ b/demos/common/cpp/pipelines/src/async_pipeline.cpp
@@ -76,6 +76,60 @@ void AsyncPipeline::waitForData(bool shouldKeepOrder) {
     }
 }
 
+int64_t AsyncPipeline::submitData(std::vector<InputData>::iterator inputDataBegin,
+                                  std::vector<InputData>::iterator inputDataEnd,
+                                  const std::shared_ptr<MetaData>& metaData) {
+    auto frameID = inputFrameId;
+
+    auto request = requestsPool->getIdleRequest();
+    if (!request) {
+        return -1;
+    }
+
+    auto startTime = std::chrono::steady_clock::now();
+    auto internalModelData = model->preprocess(inputDataBegin, inputDataEnd, request);
+    preprocessMetrics.update(startTime);
+
+    request.set_callback(
+        [this, request, frameID, internalModelData, metaData, startTime](std::exception_ptr ex) mutable {
+            {
+                const std::lock_guard<std::mutex> lock(mtx);
+                inferenceMetrics.update(startTime);
+                try {
+                    if (ex) {
+                        std::rethrow_exception(ex);
+                    }
+                    InferenceResult result;
+
+                    result.frameId = frameID;
+                    result.metaData = std::move(metaData);
+                    result.internalModelData = std::move(internalModelData);
+
+                    for (const auto& outName : model->getOutputsNames()) {
+                        auto tensor = request.get_tensor(outName);
+                        result.outputsData.emplace(outName, tensor);
+                    }
+
+                    completedInferenceResults.emplace(frameID, result);
+                    requestsPool->setRequestIdle(request);
+                } catch (...) {
+                    if (!callbackException) {
+                        callbackException = std::current_exception();
+                    }
+                }
+            }
+            condVar.notify_one();
+        });
+
+    inputFrameId++;
+    if (inputFrameId < 0)
+        inputFrameId = 0;
+
+    request.start_async();
+
+    return frameID;
+}
+
 int64_t AsyncPipeline::submitData(const InputData& inputData, const std::shared_ptr<MetaData>& metaData) {
     auto frameID = inputFrameId;
 

From 5aa33658d69ecfb4de5c37eac958b084a7b81636 Mon Sep 17 00:00:00 2001
From: sivanov-work <sergey.ivanov@intel.com>
Date: Mon, 27 Nov 2023 17:16:46 +0000
Subject: [PATCH 2/5] Make compiled

---
 .../cpp/main.cpp                              | 37 +++++++++++++------
 .../cpp/models/include/models/image_model.h   |  4 +-
 .../cpp/models/include/models/model_base.h    |  6 +--
 demos/common/cpp/models/src/image_model.cpp   | 14 ++++---
 .../include/pipelines/async_pipeline.h        |  4 +-
 .../pipelines/include/pipelines/metadata.h    |  4 +-
 .../cpp/pipelines/src/async_pipeline.cpp      |  4 +-
 7 files changed, 45 insertions(+), 28 deletions(-)

diff --git a/demos/classification_benchmark_demo/cpp/main.cpp b/demos/classification_benchmark_demo/cpp/main.cpp
index c835757c9a8..665e6933464 100644
--- a/demos/classification_benchmark_demo/cpp/main.cpp
+++ b/demos/classification_benchmark_demo/cpp/main.cpp
@@ -76,7 +76,7 @@ DEFINE_string(d, "CPU", target_device_message);
 DEFINE_uint32(nthreads, 0, num_threads_message);
 DEFINE_string(nstreams, "", num_streams_message);
 DEFINE_uint32(nireq, 0, num_inf_req_message);
-DEFINE_uint32(nireq_per_batch, 2, num_inf_req_per_batch_message);
+DEFINE_uint32(nireq_per_batch, 1, num_inf_req_per_batch_message);
 DEFINE_uint32(nt, 5, ntop_message);
 DEFINE_string(res, "1280x720", image_grid_resolution_message);
 DEFINE_bool(auto_resize, false, input_resizable_message);
@@ -268,9 +268,16 @@ int main(int argc, char* argv[]) {
         std::chrono::steady_clock::time_point startTime = std::chrono::steady_clock::now();
 
         // batch setup
-        std::vector<InputData> inputDataVector {inputImages.begin(), inputImages.end()};
-        auto inputImagesBeginIt = inputDataVector.begin();
-        auto inputImagesEndIt = inputDataVector.begin();
+        std::vector<std::shared_ptr<InputData>> inputDataVector;
+        std::transform(inputImages.begin(), inputImages.end(), std::back_inserter(inputDataVector), [](const auto &src) {
+            return std::make_shared<ImageInputData>(src);
+        });
+        auto inputImagesDataBeginIt = inputDataVector.begin();
+        auto inputImagesDataEndIt = inputImagesDataBeginIt;
+        std::advance(inputImagesDataEndIt, FLAGS_nireq_per_batch);
+
+        auto inputImagesBeginIt = inputImages.begin();
+        auto inputImagesEndIt = inputImagesBeginIt;
         std::advance(inputImagesEndIt, FLAGS_nireq_per_batch);
 
         auto classIndicesBeginIt = classIndices.begin();
@@ -298,8 +305,8 @@ int main(int argc, char* argv[]) {
             if (pipeline.isReadyToProcess()) {
                 auto imageStartTime = std::chrono::steady_clock::now();
 
-                pipeline.submitData(inputImagesBeginIt, inputImagesEndIt,
-                                    std::make_shared<ClassificationImageMetaData>(inputImagesBeginIt, inputImagesEndIt
+                pipeline.submitData(inputImagesDataBeginIt, inputImagesDataEndIt,
+                                    std::make_shared<ClassificationImageBatchMetaData>(inputImagesBeginIt, inputImagesEndIt,
                                                                                   imageStartTime,
                                                                                   classIndicesBeginIt, classIndicesEndIt));
                 //nextImageIndex++;
@@ -307,16 +314,22 @@ int main(int argc, char* argv[]) {
                     //nextImageIndex = 0;
                 //}
 
+                ++inputImagesDataBeginIt;
+                ++inputImagesDataEndIt;
                 ++inputImagesBeginIt;
                 ++inputImagesEndIt;
                 ++classIndicesBeginIt;
                 ++classIndicesEndIt;
 
-                if (inputImagesEndIt == inputDataVector.end()) {
-                    inputImagesBeginIt = inputDataVector.begin();
+                if (inputImagesEndIt == inputImages.end()) {
+                    inputImagesBeginIt = inputImages.begin();
                     inputImagesEndIt = inputImagesBeginIt;
                     std::advance(inputImagesEndIt, FLAGS_nireq_per_batch);
 
+                    inputImagesDataBeginIt = inputDataVector.begin();
+                    inputImagesDataEndIt = inputImagesDataBeginIt;
+                    std::advance(inputImagesDataEndIt, FLAGS_nireq_per_batch);
+
                     classIndicesBeginIt = classIndices.begin();
                     classIndicesEndIt = classIndicesBeginIt;
                     std::advance(classIndicesEndIt, FLAGS_nireq_per_batch);
@@ -339,9 +352,9 @@ int main(int argc, char* argv[]) {
                     classificationResult.metaData->asRef<const ClassificationImageBatchMetaData>();
 
                 //auto outputImg = classificationImageMetaData.img;
-                const std::vector<ClassificationImageMetaData> &outputImagesMD = classificationImageBatchMetaData.metadatas;
-                for (const ClassificationImageMetaData &classificationImageMetaData : outputImagesMD) {
-                    auto outputImg = classificationImageMetaData.img;
+                const std::vector<std::shared_ptr<ClassificationImageMetaData>> &outputImagesMD = classificationImageBatchMetaData.metadatas;
+                for (const std::shared_ptr<ClassificationImageMetaData> &classificationImageMetaData : outputImagesMD) {
+                    auto outputImg = classificationImageMetaData->img;
                     if (outputImg.empty()) {
                         throw std::invalid_argument("Renderer: image provided in metadata is empty");
                     }
@@ -350,7 +363,7 @@ int main(int argc, char* argv[]) {
                     if (!FLAGS_gt.empty()) {
                         for (size_t i = 0; i < FLAGS_nt; i++) {
                             unsigned predictedClass = classificationResult.topLabels[i].id;
-                            if (predictedClass == classificationImageMetaData.groundTruthId) {
+                            if (predictedClass == classificationImageMetaData->groundTruthId) {
                                 predictionResult = PredictionResult::Correct;
                                 correctPredictionsCount++;
                                 label = classificationResult.topLabels[i].label;
diff --git a/demos/common/cpp/models/include/models/image_model.h b/demos/common/cpp/models/include/models/image_model.h
index 707c16d0820..df8ceb40384 100644
--- a/demos/common/cpp/models/include/models/image_model.h
+++ b/demos/common/cpp/models/include/models/image_model.h
@@ -38,8 +38,8 @@ class ImageModel : public ModelBase {
     ImageModel(const std::string& modelFileName, bool useAutoResize, const std::string& layout = "");
 
     std::shared_ptr<InternalModelData> preprocess(const InputData& inputData, ov::InferRequest& request) override;
-    std::shared_ptr<InternalModelData> preprocess(std::vector<InputData>::iterator inputDataBegin,
-                                                  std::vector<InputData>::iterator inputDataEnd,
+    std::shared_ptr<InternalModelData> preprocess(std::vector<std::shared_ptr<InputData>>::iterator inputDataBegin,
+                                                  std::vector<std::shared_ptr<InputData>>::iterator inputDataEnd,
                                                   ov::InferRequest& request) override;
 protected:
     bool useAutoResize;
diff --git a/demos/common/cpp/models/include/models/model_base.h b/demos/common/cpp/models/include/models/model_base.h
index 2487871cd5a..d86dba8ae21 100644
--- a/demos/common/cpp/models/include/models/model_base.h
+++ b/demos/common/cpp/models/include/models/model_base.h
@@ -40,9 +40,9 @@ class ModelBase {
     virtual ~ModelBase() {}
 
     virtual std::shared_ptr<InternalModelData> preprocess(const InputData& inputData, ov::InferRequest& request) = 0;
-    virtual std::shared_ptr<InternalModelData> preprocess(std::vector<InputData>::iterator inputDataBegin,
-                                                          std::vector<InputData>::iterator inputDataEnd,
-                                                          ov::InferRequest& request) {};
+    virtual std::shared_ptr<InternalModelData> preprocess(std::vector<std::shared_ptr<InputData>>::iterator inputDataBegin,
+                                                          std::vector<std::shared_ptr<InputData>>::iterator inputDataEnd,
+                                                          ov::InferRequest& request) {return {};};
     virtual ov::CompiledModel compileModel(const ModelConfig& config, ov::Core& core);
     virtual void onLoadCompleted(const std::vector<ov::InferRequest>& requests) {}
     virtual std::unique_ptr<ResultBase> postprocess(InferenceResult& infResult) = 0;
diff --git a/demos/common/cpp/models/src/image_model.cpp b/demos/common/cpp/models/src/image_model.cpp
index 3271f8b8fed..40de9f2b89e 100644
--- a/demos/common/cpp/models/src/image_model.cpp
+++ b/demos/common/cpp/models/src/image_model.cpp
@@ -32,8 +32,8 @@ ImageModel::ImageModel(const std::string& modelFileName, bool useAutoResize, con
     : ModelBase(modelFileName, layout),
       useAutoResize(useAutoResize) {}
 
-std::shared_ptr<InternalModelData> ImageModel::preprocess(std::vector<InputData>::iterator inputDataBegin,
-                                                          std::vector<InputData>::iterator inputDataEnd,
+std::shared_ptr<InternalModelData> ImageModel::preprocess(std::vector<std::shared_ptr<InputData>>::iterator inputDataBegin,
+                                                          std::vector<std::shared_ptr<InputData>>::iterator inputDataEnd,
                                                           ov::InferRequest& request) {
 
     const ov::Tensor& frameTensor = request.get_tensor(inputsNames[0]);  // first input should be image
@@ -47,11 +47,15 @@ std::shared_ptr<InternalModelData> ImageModel::preprocess(std::vector<InputData>
     char* memoryBlob = nullptr;
     size_t image_index = 0;
     bool isMatFloat = false;
+    int origImg_cols = 0;
+    int origImg_rows = 0;
     for (auto inputDataIt = inputDataBegin; inputDataIt != inputDataEnd; ++inputDataIt ) {
-        const auto& origImg = inputDataIt->asRef<ImageInputData>().inputImage;
+        const auto& origImg = (*inputDataIt)->asRef<ImageInputData>().inputImage;
+        origImg_cols = origImg.cols;
+        origImg_rows = origImg.rows;
         auto img = inputTransform(origImg);
 
-        auto matType = mat.type() & CV_MAT_DEPTH_MASK;
+        auto matType = img.type() & CV_MAT_DEPTH_MASK;
         if (matType != CV_8U && matType != CV_32F) {
             throw std::runtime_error("Unsupported mat type for wrapping");
         }
@@ -83,7 +87,7 @@ std::shared_ptr<InternalModelData> ImageModel::preprocess(std::vector<InputData>
     auto precision = isMatFloat ? ov::element::f32 : ov::element::u8;
     auto batched_tensor =  ov::Tensor(precision, ov::Shape{ batch, height, width, channels }, memoryBlob);
     request.set_tensor(inputsNames[0],batched_tensor);
-    return std::make_shared<InternalImageModelData>(origImg.cols, origImg.rows);
+    return std::make_shared<InternalImageModelData>(origImg_cols, origImg_rows);
 }
 
 std::shared_ptr<InternalModelData> ImageModel::preprocess(const InputData& inputData, ov::InferRequest& request) {
diff --git a/demos/common/cpp/pipelines/include/pipelines/async_pipeline.h b/demos/common/cpp/pipelines/include/pipelines/async_pipeline.h
index 28b3820a65f..52c74eee542 100644
--- a/demos/common/cpp/pipelines/include/pipelines/async_pipeline.h
+++ b/demos/common/cpp/pipelines/include/pipelines/async_pipeline.h
@@ -75,8 +75,8 @@ class AsyncPipeline {
     /// Otherwise returns unique sequential frame ID for this particular request. Same frame ID will be written in the
     /// result structure.
     virtual int64_t submitData(const InputData& inputData, const std::shared_ptr<MetaData>& metaData);
-    virtual int64_t submitData(std::vector<InputData>::iterator inputDataBegin,
-                               std::vector<InputData>::iterator inputDataEnd,
+    virtual int64_t submitData(std::vector<std::shared_ptr<InputData>>::iterator inputDataBegin,
+                               std::vector<std::shared_ptr<InputData>>::iterator inputDataEnd,
                                const std::shared_ptr<MetaData>& metaData);
 
     /// Gets available data from the queue
diff --git a/demos/common/cpp/pipelines/include/pipelines/metadata.h b/demos/common/cpp/pipelines/include/pipelines/metadata.h
index 64e714c09dc..5cdecd4629a 100644
--- a/demos/common/cpp/pipelines/include/pipelines/metadata.h
+++ b/demos/common/cpp/pipelines/include/pipelines/metadata.h
@@ -54,7 +54,7 @@ struct ClassificationImageMetaData : public ImageMetaData {
 struct ClassificationImageBatchMetaData : public MetaData {
     std::vector<std::shared_ptr<ClassificationImageMetaData>> metadatas;
 
-    ClassificationImageMetaData(const std::vector<cv::Mat>::iterator imagesBeginIt,
+    ClassificationImageBatchMetaData(std::vector<cv::Mat>::iterator imagesBeginIt,
                                 const std::vector<cv::Mat>::iterator imagesEndIt,
                                 std::chrono::steady_clock::time_point timeStamp,
                                 std::vector<unsigned int>::iterator groundTruthIdsBeginIt,
@@ -68,7 +68,7 @@ struct ClassificationImageBatchMetaData : public MetaData {
 
         metadatas.reserve(images_count);
         for (; imagesBeginIt != imagesEndIt;) {
-            metadatas.push_back(std::make_shared<ClassificationImageMetaData>(*it++, timeStamp, *groundTruthIdsBeginIt++));
+            metadatas.push_back(std::make_shared<ClassificationImageMetaData>(*imagesBeginIt++, timeStamp, *groundTruthIdsBeginIt++));
         }
     }
 };
diff --git a/demos/common/cpp/pipelines/src/async_pipeline.cpp b/demos/common/cpp/pipelines/src/async_pipeline.cpp
index ef799b2897f..34efb937ec4 100644
--- a/demos/common/cpp/pipelines/src/async_pipeline.cpp
+++ b/demos/common/cpp/pipelines/src/async_pipeline.cpp
@@ -76,8 +76,8 @@ void AsyncPipeline::waitForData(bool shouldKeepOrder) {
     }
 }
 
-int64_t AsyncPipeline::submitData(std::vector<InputData>::iterator inputDataBegin,
-                                  std::vector<InputData>::iterator inputDataEnd,
+int64_t AsyncPipeline::submitData(std::vector<std::shared_ptr<InputData>>::iterator inputDataBegin,
+                                  std::vector<std::shared_ptr<InputData>>::iterator inputDataEnd,
                                   const std::shared_ptr<MetaData>& metaData) {
     auto frameID = inputFrameId;
 

From 55f5d4313839945f5f2f58c793f1c1cc31d39f90 Mon Sep 17 00:00:00 2001
From: sivanov-work <sergey.ivanov@intel.com>
Date: Tue, 28 Nov 2023 11:51:15 +0000
Subject: [PATCH 3/5] Add config & logs

---
 .../cpp/main.cpp                              | 46 ++++++++++++++++++-
 .../cpp/models/src/classification_model.cpp   |  2 +-
 .../cpp/pipelines/src/async_pipeline.cpp      |  3 ++
 3 files changed, 49 insertions(+), 2 deletions(-)

diff --git a/demos/classification_benchmark_demo/cpp/main.cpp b/demos/classification_benchmark_demo/cpp/main.cpp
index 665e6933464..3db6c86b03d 100644
--- a/demos/classification_benchmark_demo/cpp/main.cpp
+++ b/demos/classification_benchmark_demo/cpp/main.cpp
@@ -86,6 +86,7 @@ DEFINE_string(u, "", utilization_monitors_message);
 DEFINE_bool(reverse_input_channels, false, reverse_input_channels_message);
 DEFINE_string(mean_values, "", mean_values_message);
 DEFINE_string(scale_values, "", scale_values_message);
+DEFINE_string(config, "", "Path to the configuration file (optional)");
 
 static void showUsage() {
     std::cout << std::endl;
@@ -111,6 +112,7 @@ static void showUsage() {
     std::cout << "    -reverse_input_channels   " << reverse_input_channels_message << std::endl;
     std::cout << "    -mean_values              " << mean_values_message << std::endl;
     std::cout << "    -scale_values             " << scale_values_message << std::endl;
+    std::cout << "    -config                   " << "Path to config file" << std::endl;
 }
 
 bool ParseAndCheckCommandLine(int argc, char* argv[]) {
@@ -137,6 +139,44 @@ bool ParseAndCheckCommandLine(int argc, char* argv[]) {
     return true;
 }
 
+
+std::map<std::string, std::string> parseConfigFile() {
+    std::map<std::string, std::string> config;
+
+    std::ifstream file(FLAGS_config);
+    if(!file.is_open()) {
+        std::cerr << "Can't open file " << FLAGS_config << " for read" << std::endl;
+        exit(-1);
+    }
+
+    std::string option;
+    while (std::getline(file, option)) {
+        if (option.empty() || option[0] == '#') {
+            continue;
+        }
+        size_t spacePos = option.find_first_of(" \t\n\r");
+        if(spacePos == std::string::npos) {
+            std::cerr << "Invalid config parameter format. Space separator required here: " << option;
+            exit(-1);
+        }
+
+        std::string key, value;
+        if (spacePos != std::string::npos) {
+            key = option.substr(0, spacePos);
+            size_t valueStart = option.find_first_not_of(" \t\n\r", spacePos);
+            if(valueStart == std::string::npos) {
+                std::cerr << "An invalid config parameter value detected, it mustn't be empty: " << option;
+                exit(-1);
+            }
+            size_t valueEnd = option.find_last_not_of(" \t\n\r");
+            value = option.substr(valueStart, valueEnd - valueStart + 1);
+            config[key] = value;
+        }
+    }
+
+    return config;
+}
+
 cv::Mat centerSquareCrop(const cv::Mat& image) {
     if (image.cols >= image.rows) {
         return image(cv::Rect((image.cols - image.rows) / 2, 0, image.rows, image.rows));
@@ -235,6 +275,10 @@ int main(int argc, char* argv[]) {
 
         slog::info << ov::get_openvino_version() << slog::endl;
         ov::Core core;
+        if (!FLAGS_config.empty()) {
+            const auto configs = parseConfigFile();
+            core.set_property(FLAGS_d, {configs.begin(), configs.end()});
+        }
 
         std::unique_ptr<ClassificationModel> model(new ClassificationModel(FLAGS_m, FLAGS_nt, FLAGS_auto_resize, labels, FLAGS_layout));
         model->setInputsPreprocessing(FLAGS_reverse_input_channels, FLAGS_mean_values, FLAGS_scale_values);
@@ -377,7 +421,7 @@ int main(int argc, char* argv[]) {
                     gridMat.updateMat(outputImg, label, predictionResult);
                     accuracy = static_cast<double>(correctPredictionsCount) / framesNum;
                     gridMat.textUpdate(metrics,
-                                       classificationResult.metaData->asRef<ImageMetaData>().timeStamp,
+                                       classificationImageMetaData->timeStamp,
                                        accuracy,
                                        FLAGS_nt,
                                        isTestMode,
diff --git a/demos/common/cpp/models/src/classification_model.cpp b/demos/common/cpp/models/src/classification_model.cpp
index be80d897747..89e20c0b6b7 100644
--- a/demos/common/cpp/models/src/classification_model.cpp
+++ b/demos/common/cpp/models/src/classification_model.cpp
@@ -55,7 +55,7 @@ std::unique_ptr<ResultBase> ClassificationModel::postprocess(InferenceResult& in
     for (size_t i = 0; i < scoresTensor.get_size(); ++i) {
         int ind = indicesPtr[i];
         if (ind < 0 || ind >= static_cast<int>(labels.size())) {
-            throw std::runtime_error("Invalid index for the class label is found during postprocessing");
+            throw std::runtime_error(std::string("Invalid index: ") + std::to_string(ind) + " for the class label is found during postprocessing, label size: " + std::to_string(labels.size()));
         }
         result->topLabels.emplace_back(ind, labels[ind], scoresPtr[i]);
     }
diff --git a/demos/common/cpp/pipelines/src/async_pipeline.cpp b/demos/common/cpp/pipelines/src/async_pipeline.cpp
index 34efb937ec4..5833ba705cc 100644
--- a/demos/common/cpp/pipelines/src/async_pipeline.cpp
+++ b/demos/common/cpp/pipelines/src/async_pipeline.cpp
@@ -95,6 +95,7 @@ int64_t AsyncPipeline::submitData(std::vector<std::shared_ptr<InputData>>::itera
             {
                 const std::lock_guard<std::mutex> lock(mtx);
                 inferenceMetrics.update(startTime);
+                std::cout << "callback has been called" << std::endl;
                 try {
                     if (ex) {
                         std::rethrow_exception(ex);
@@ -111,7 +112,9 @@ int64_t AsyncPipeline::submitData(std::vector<std::shared_ptr<InputData>>::itera
                     }
 
                     completedInferenceResults.emplace(frameID, result);
+                    std::cout << "before setRequestIdle: " << std::endl;
                     requestsPool->setRequestIdle(request);
+                    std::cout << "after setRequestIdle: " << std::endl;
                 } catch (...) {
                     if (!callbackException) {
                         callbackException = std::current_exception();

From 3128d4f0e346bb3429db994d062fb5038d5f41e6 Mon Sep 17 00:00:00 2001
From: sivanov-work <sergey.ivanov@intel.com>
Date: Thu, 30 Nov 2023 16:44:09 +0000
Subject: [PATCH 4/5] stash

---
 demos/common/cpp/models/src/classification_model.cpp | 11 +++++++++++
 demos/common/cpp/models/src/image_model.cpp          |  6 ++++--
 demos/common/cpp/pipelines/src/async_pipeline.cpp    |  1 +
 3 files changed, 16 insertions(+), 2 deletions(-)

diff --git a/demos/common/cpp/models/src/classification_model.cpp b/demos/common/cpp/models/src/classification_model.cpp
index 89e20c0b6b7..62342305572 100644
--- a/demos/common/cpp/models/src/classification_model.cpp
+++ b/demos/common/cpp/models/src/classification_model.cpp
@@ -44,9 +44,19 @@ ClassificationModel::ClassificationModel(const std::string& modelFileName,
 
 std::unique_ptr<ResultBase> ClassificationModel::postprocess(InferenceResult& infResult) {
     const ov::Tensor& indicesTensor = infResult.outputsData.find(outputsNames[0])->second;
+    const void* indicesTensorBuffer = reinterpret_cast<const void*>(indicesTensor.data());
+    std::cout << "-S- indices tensor data: " << indicesTensorBuffer <<  ", size: " << indicesTensor.get_size() << std::endl;
     const int* indicesPtr = indicesTensor.data<int>();
+    for (int i = 0; i < indicesTensor.get_size(); i++){
+        std::cout << "-S- index[" << i << "]: " << indicesPtr[i] <<std::endl;
+    }
     const ov::Tensor& scoresTensor = infResult.outputsData.find(outputsNames[1])->second;
     const float* scoresPtr = scoresTensor.data<float>();
+    const void* scoresTensorBuffer = reinterpret_cast<const void*>(scoresTensor.data());
+    std::cout << "-S- scores tensor data: " << scoresTensorBuffer << ", size: " << scoresTensor.get_size() <<std::endl;
+    for (int i = 0; i < scoresTensor.get_size(); i++){
+        std::cout << "-S- score[" << i << "]: " << scoresPtr[i] <<std::endl;
+    }
 
     ClassificationResult* result = new ClassificationResult(infResult.frameId, infResult.metaData);
     auto retVal = std::unique_ptr<ResultBase>(result);
@@ -54,6 +64,7 @@ std::unique_ptr<ResultBase> ClassificationModel::postprocess(InferenceResult& in
     result->topLabels.reserve(scoresTensor.get_size());
     for (size_t i = 0; i < scoresTensor.get_size(); ++i) {
         int ind = indicesPtr[i];
+         std::cout << "-S- index???[" << i << "]: " << ind << ", labels size: " << labels.size() <<std::endl;
         if (ind < 0 || ind >= static_cast<int>(labels.size())) {
             throw std::runtime_error(std::string("Invalid index: ") + std::to_string(ind) + " for the class label is found during postprocessing, label size: " + std::to_string(labels.size()));
         }
diff --git a/demos/common/cpp/models/src/image_model.cpp b/demos/common/cpp/models/src/image_model.cpp
index 40de9f2b89e..688bdf7a55c 100644
--- a/demos/common/cpp/models/src/image_model.cpp
+++ b/demos/common/cpp/models/src/image_model.cpp
@@ -43,7 +43,7 @@ std::shared_ptr<InternalModelData> ImageModel::preprocess(std::vector<std::share
     const size_t width = tensorShape[ov::layout::width_idx(layout)];
     const size_t height = tensorShape[ov::layout::height_idx(layout)];
     const size_t channels = tensorShape[ov::layout::channels_idx(layout)];
-
+    std::cout << "ImageModel::preprocess: batch: " << batch << ", width: " << width << ", height: " << height << ", channels: " << channels << std::endl;
     char* memoryBlob = nullptr;
     size_t image_index = 0;
     bool isMatFloat = false;
@@ -75,6 +75,7 @@ std::shared_ptr<InternalModelData> ImageModel::preprocess(std::vector<std::share
             img = resizeImageExt(img, width, height, resizeMode, interpolationMode);
         }
         size_t sizeInBytes = img.total() * img.elemSize();
+        std::cout << "image size in bytes: " << sizeInBytes << std::endl;
         if (!memoryBlob) {
             memoryBlob = new char[sizeInBytes * batch]; // intended memory leak
         }
@@ -84,9 +85,10 @@ std::shared_ptr<InternalModelData> ImageModel::preprocess(std::vector<std::share
         image_index++;
     }
 
+    std::cout << "isMatFloat: " << isMatFloat << std::endl;
     auto precision = isMatFloat ? ov::element::f32 : ov::element::u8;
     auto batched_tensor =  ov::Tensor(precision, ov::Shape{ batch, height, width, channels }, memoryBlob);
-    request.set_tensor(inputsNames[0],batched_tensor);
+    request.set_tensor(inputsNames[0], batched_tensor);
     return std::make_shared<InternalImageModelData>(origImg_cols, origImg_rows);
 }
 
diff --git a/demos/common/cpp/pipelines/src/async_pipeline.cpp b/demos/common/cpp/pipelines/src/async_pipeline.cpp
index 5833ba705cc..19c977efab4 100644
--- a/demos/common/cpp/pipelines/src/async_pipeline.cpp
+++ b/demos/common/cpp/pipelines/src/async_pipeline.cpp
@@ -108,6 +108,7 @@ int64_t AsyncPipeline::submitData(std::vector<std::shared_ptr<InputData>>::itera
 
                     for (const auto& outName : model->getOutputsNames()) {
                         auto tensor = request.get_tensor(outName);
+                        std::cout << "-S- output tensorName: " << outName << ", tensor ptr: " << reinterpret_cast<void*>(tensor.data()) << ", size: " << tensor.get_size() << std::endl;
                         result.outputsData.emplace(outName, tensor);
                     }
 

From 700c86349c9de437e68de0afdd6636265c572dd8 Mon Sep 17 00:00:00 2001
From: sivanov-work <sergey.ivanov@intel.com>
Date: Thu, 14 Dec 2023 15:38:56 +0000
Subject: [PATCH 5/5] Adapt object_detection_demo for batch processing

---
 .../include/models/detection_model_ssd.h      |   3 +
 .../cpp/models/src/classification_model.cpp   |  11 -
 .../cpp/models/src/detection_model_ssd.cpp    |  15 ++
 demos/common/cpp/models/src/image_model.cpp   |   7 +-
 .../pipelines/include/pipelines/metadata.h    |  25 +++
 .../cpp/pipelines/src/async_pipeline.cpp      |   4 -
 demos/object_detection_demo/cpp/main.cpp      | 201 ++++++++++++++----
 7 files changed, 207 insertions(+), 59 deletions(-)

diff --git a/demos/common/cpp/models/include/models/detection_model_ssd.h b/demos/common/cpp/models/include/models/detection_model_ssd.h
index 5fe34dfd66d..da0990fd9b4 100644
--- a/demos/common/cpp/models/include/models/detection_model_ssd.h
+++ b/demos/common/cpp/models/include/models/detection_model_ssd.h
@@ -50,6 +50,9 @@ class ModelSSD : public DetectionModel {
              const std::string& layout = "");
 
     std::shared_ptr<InternalModelData> preprocess(const InputData& inputData, ov::InferRequest& request) override;
+    std::shared_ptr<InternalModelData> preprocess(std::vector<std::shared_ptr<InputData>>::iterator inputDataBegin,
+                                                                std::vector<std::shared_ptr<InputData>>::iterator inputDataEnd,
+                                                                ov::InferRequest& request) override;
     std::unique_ptr<ResultBase> postprocess(InferenceResult& infResult) override;
 
 protected:
diff --git a/demos/common/cpp/models/src/classification_model.cpp b/demos/common/cpp/models/src/classification_model.cpp
index 62342305572..89e20c0b6b7 100644
--- a/demos/common/cpp/models/src/classification_model.cpp
+++ b/demos/common/cpp/models/src/classification_model.cpp
@@ -44,19 +44,9 @@ ClassificationModel::ClassificationModel(const std::string& modelFileName,
 
 std::unique_ptr<ResultBase> ClassificationModel::postprocess(InferenceResult& infResult) {
     const ov::Tensor& indicesTensor = infResult.outputsData.find(outputsNames[0])->second;
-    const void* indicesTensorBuffer = reinterpret_cast<const void*>(indicesTensor.data());
-    std::cout << "-S- indices tensor data: " << indicesTensorBuffer <<  ", size: " << indicesTensor.get_size() << std::endl;
     const int* indicesPtr = indicesTensor.data<int>();
-    for (int i = 0; i < indicesTensor.get_size(); i++){
-        std::cout << "-S- index[" << i << "]: " << indicesPtr[i] <<std::endl;
-    }
     const ov::Tensor& scoresTensor = infResult.outputsData.find(outputsNames[1])->second;
     const float* scoresPtr = scoresTensor.data<float>();
-    const void* scoresTensorBuffer = reinterpret_cast<const void*>(scoresTensor.data());
-    std::cout << "-S- scores tensor data: " << scoresTensorBuffer << ", size: " << scoresTensor.get_size() <<std::endl;
-    for (int i = 0; i < scoresTensor.get_size(); i++){
-        std::cout << "-S- score[" << i << "]: " << scoresPtr[i] <<std::endl;
-    }
 
     ClassificationResult* result = new ClassificationResult(infResult.frameId, infResult.metaData);
     auto retVal = std::unique_ptr<ResultBase>(result);
@@ -64,7 +54,6 @@ std::unique_ptr<ResultBase> ClassificationModel::postprocess(InferenceResult& in
     result->topLabels.reserve(scoresTensor.get_size());
     for (size_t i = 0; i < scoresTensor.get_size(); ++i) {
         int ind = indicesPtr[i];
-         std::cout << "-S- index???[" << i << "]: " << ind << ", labels size: " << labels.size() <<std::endl;
         if (ind < 0 || ind >= static_cast<int>(labels.size())) {
             throw std::runtime_error(std::string("Invalid index: ") + std::to_string(ind) + " for the class label is found during postprocessing, label size: " + std::to_string(labels.size()));
         }
diff --git a/demos/common/cpp/models/src/detection_model_ssd.cpp b/demos/common/cpp/models/src/detection_model_ssd.cpp
index 992b41ffe6a..394c422ac93 100644
--- a/demos/common/cpp/models/src/detection_model_ssd.cpp
+++ b/demos/common/cpp/models/src/detection_model_ssd.cpp
@@ -53,6 +53,21 @@ std::shared_ptr<InternalModelData> ModelSSD::preprocess(const InputData& inputDa
     return DetectionModel::preprocess(inputData, request);
 }
 
+std::shared_ptr<InternalModelData> ModelSSD::preprocess(std::vector<std::shared_ptr<InputData>>::iterator inputDataBegin,
+                                                                std::vector<std::shared_ptr<InputData>>::iterator inputDataEnd,
+                                                                ov::InferRequest& request) {
+    if (inputsNames.size() > 1) {
+        const auto& imageInfoTensor = request.get_tensor(inputsNames[1]);
+        const auto info = imageInfoTensor.data<float>();
+        info[0] = static_cast<float>(netInputHeight);
+        info[1] = static_cast<float>(netInputWidth);
+        info[2] = 1;
+        request.set_tensor(inputsNames[1], imageInfoTensor);
+    }
+
+    return DetectionModel::preprocess(inputDataBegin, inputDataEnd, request);
+}
+
 std::unique_ptr<ResultBase> ModelSSD::postprocess(InferenceResult& infResult) {
     return outputsNames.size() > 1 ? postprocessMultipleOutputs(infResult) : postprocessSingleOutput(infResult);
 }
diff --git a/demos/common/cpp/models/src/image_model.cpp b/demos/common/cpp/models/src/image_model.cpp
index 688bdf7a55c..94541d737dd 100644
--- a/demos/common/cpp/models/src/image_model.cpp
+++ b/demos/common/cpp/models/src/image_model.cpp
@@ -43,12 +43,15 @@ std::shared_ptr<InternalModelData> ImageModel::preprocess(std::vector<std::share
     const size_t width = tensorShape[ov::layout::width_idx(layout)];
     const size_t height = tensorShape[ov::layout::height_idx(layout)];
     const size_t channels = tensorShape[ov::layout::channels_idx(layout)];
-    std::cout << "ImageModel::preprocess: batch: " << batch << ", width: " << width << ", height: " << height << ", channels: " << channels << std::endl;
     char* memoryBlob = nullptr;
     size_t image_index = 0;
     bool isMatFloat = false;
     int origImg_cols = 0;
     int origImg_rows = 0;
+    size_t image_count = std::distance(inputDataBegin, inputDataEnd);
+    if (image_count != batch) {
+        throw std::runtime_error("Image count in preprocess must repeat batch count");
+    }
     for (auto inputDataIt = inputDataBegin; inputDataIt != inputDataEnd; ++inputDataIt ) {
         const auto& origImg = (*inputDataIt)->asRef<ImageInputData>().inputImage;
         origImg_cols = origImg.cols;
@@ -75,7 +78,6 @@ std::shared_ptr<InternalModelData> ImageModel::preprocess(std::vector<std::share
             img = resizeImageExt(img, width, height, resizeMode, interpolationMode);
         }
         size_t sizeInBytes = img.total() * img.elemSize();
-        std::cout << "image size in bytes: " << sizeInBytes << std::endl;
         if (!memoryBlob) {
             memoryBlob = new char[sizeInBytes * batch]; // intended memory leak
         }
@@ -85,7 +87,6 @@ std::shared_ptr<InternalModelData> ImageModel::preprocess(std::vector<std::share
         image_index++;
     }
 
-    std::cout << "isMatFloat: " << isMatFloat << std::endl;
     auto precision = isMatFloat ? ov::element::f32 : ov::element::u8;
     auto batched_tensor =  ov::Tensor(precision, ov::Shape{ batch, height, width, channels }, memoryBlob);
     request.set_tensor(inputsNames[0], batched_tensor);
diff --git a/demos/common/cpp/pipelines/include/pipelines/metadata.h b/demos/common/cpp/pipelines/include/pipelines/metadata.h
index 5cdecd4629a..2466110a1a0 100644
--- a/demos/common/cpp/pipelines/include/pipelines/metadata.h
+++ b/demos/common/cpp/pipelines/include/pipelines/metadata.h
@@ -40,6 +40,31 @@ struct ImageMetaData : public MetaData {
     ImageMetaData(cv::Mat img, std::chrono::steady_clock::time_point timeStamp) : img(img), timeStamp(timeStamp) {}
 };
 
+struct ImageBatchMetaData : public MetaData {
+    std::chrono::steady_clock::time_point timeStamp;
+    std::vector<std::shared_ptr<ImageMetaData>> metadatas;
+
+    ImageBatchMetaData() {}
+
+    ImageBatchMetaData(std::vector<cv::Mat>::iterator imagesBeginIt,
+                       const std::vector<cv::Mat>::iterator imagesEndIt,
+                       std::chrono::steady_clock::time_point timeStamp) : timeStamp(timeStamp) {
+        size_t images_count = std::distance(imagesBeginIt, imagesEndIt);
+        metadatas.reserve(images_count);
+        for (; imagesBeginIt != imagesEndIt;) {
+            metadatas.push_back(std::make_shared<ImageMetaData>(*imagesBeginIt++, timeStamp));
+        }
+    }
+
+    void add(cv::Mat img, std::chrono::steady_clock::time_point timeStamp) {
+        metadatas.push_back(std::make_shared<ImageMetaData>(img, timeStamp));
+        this->timeStamp = timeStamp;
+    }
+    void clear() {
+        metadatas.clear();
+    }
+};
+
 struct ClassificationImageMetaData : public ImageMetaData {
     unsigned int groundTruthId;
 
diff --git a/demos/common/cpp/pipelines/src/async_pipeline.cpp b/demos/common/cpp/pipelines/src/async_pipeline.cpp
index 19c977efab4..34efb937ec4 100644
--- a/demos/common/cpp/pipelines/src/async_pipeline.cpp
+++ b/demos/common/cpp/pipelines/src/async_pipeline.cpp
@@ -95,7 +95,6 @@ int64_t AsyncPipeline::submitData(std::vector<std::shared_ptr<InputData>>::itera
             {
                 const std::lock_guard<std::mutex> lock(mtx);
                 inferenceMetrics.update(startTime);
-                std::cout << "callback has been called" << std::endl;
                 try {
                     if (ex) {
                         std::rethrow_exception(ex);
@@ -108,14 +107,11 @@ int64_t AsyncPipeline::submitData(std::vector<std::shared_ptr<InputData>>::itera
 
                     for (const auto& outName : model->getOutputsNames()) {
                         auto tensor = request.get_tensor(outName);
-                        std::cout << "-S- output tensorName: " << outName << ", tensor ptr: " << reinterpret_cast<void*>(tensor.data()) << ", size: " << tensor.get_size() << std::endl;
                         result.outputsData.emplace(outName, tensor);
                     }
 
                     completedInferenceResults.emplace(frameID, result);
-                    std::cout << "before setRequestIdle: " << std::endl;
                     requestsPool->setRequestIdle(request);
-                    std::cout << "after setRequestIdle: " << std::endl;
                 } catch (...) {
                     if (!callbackException) {
                         callbackException = std::current_exception();
diff --git a/demos/object_detection_demo/cpp/main.cpp b/demos/object_detection_demo/cpp/main.cpp
index b69ceffbd2a..97cad98b21e 100644
--- a/demos/object_detection_demo/cpp/main.cpp
+++ b/demos/object_detection_demo/cpp/main.cpp
@@ -21,6 +21,7 @@
 #include <cmath>
 #include <cstdint>
 #include <exception>
+#include <fstream>
 #include <iomanip>
 #include <iostream>
 #include <iterator>
@@ -104,6 +105,7 @@ static const char mean_values_message[] =
     "Optional. Normalize input by subtracting the mean values per channel. Example: \"255.0 255.0 255.0\"";
 static const char scale_values_message[] = "Optional. Divide input by scale values per channel. Division is applied "
                                            "after mean values subtraction. Example: \"255.0 255.0 255.0\"";
+static const char num_inf_req_per_batch_message[] = "Optional. ";
 
 DEFINE_bool(h, false, help_message);
 DEFINE_string(at, "", at_message);
@@ -115,6 +117,7 @@ DEFINE_bool(r, false, raw_output_message);
 DEFINE_double(t, 0.5, thresh_output_message);
 DEFINE_double(iou_t, 0.5, iou_thresh_output_message);
 DEFINE_bool(auto_resize, false, input_resizable_message);
+DEFINE_uint32(nireq_per_batch, 1, num_inf_req_per_batch_message);
 DEFINE_uint32(nireq, 0, nireq_message);
 DEFINE_uint32(nthreads, 0, num_threads_message);
 DEFINE_string(nstreams, "", num_streams_message);
@@ -127,6 +130,7 @@ DEFINE_string(masks, "", masks_message);
 DEFINE_bool(reverse_input_channels, false, reverse_input_channels_message);
 DEFINE_string(mean_values, "", mean_values_message);
 DEFINE_string(scale_values, "", scale_values_message);
+DEFINE_string(config, "", "Path to the configuration file (optional)");
 
 /**
  * \brief This function shows a help message
@@ -262,6 +266,100 @@ bool ParseAndCheckCommandLine(int argc, char* argv[]) {
     return true;
 }
 
+
+std::map<std::string, std::string> parseConfigFile() {
+    std::map<std::string, std::string> config;
+
+    std::ifstream file(FLAGS_config);
+    if(!file.is_open()) {
+        std::cerr << "Can't open file " << FLAGS_config << " for read" << std::endl;
+        exit(-1);
+    }
+
+    std::string option;
+    while (std::getline(file, option)) {
+        if (option.empty() || option[0] == '#') {
+            continue;
+        }
+        size_t spacePos = option.find_first_of(" \t\n\r");
+        if(spacePos == std::string::npos) {
+            std::cerr << "Invalid config parameter format. Space separator required here: " << option;
+            exit(-1);
+        }
+
+        std::string key, value;
+        if (spacePos != std::string::npos) {
+            key = option.substr(0, spacePos);
+            size_t valueStart = option.find_first_not_of(" \t\n\r", spacePos);
+            if(valueStart == std::string::npos) {
+                std::cerr << "An invalid config parameter value detected, it mustn't be empty: " << option;
+                exit(-1);
+            }
+            size_t valueEnd = option.find_last_not_of(" \t\n\r");
+            value = option.substr(valueStart, valueEnd - valueStart + 1);
+            config[key] = value;
+        }
+    }
+
+    return config;
+}
+
+
+std::vector<cv::Mat> renderDetectionBatchData(DetectionResult& result, const ColorPalette& palette, OutputTransform& outputTransform) {
+    if (!result.metaData) {
+        throw std::invalid_argument("Renderer: metadata is null");
+    }
+
+    const auto &batchedMetadata = result.metaData->asRef<ImageBatchMetaData>();
+    std::vector<cv::Mat> outputs;
+    outputs.reserve(batchedMetadata.metadatas.size());
+    for (auto &metadata : batchedMetadata.metadatas) {
+        auto outputImg = metadata->img;
+
+        if (outputImg.empty()) {
+            throw std::invalid_argument("Renderer: image provided in metadata is empty");
+        }
+        outputTransform.resize(outputImg);
+        // Visualizing result data over source image
+        if (FLAGS_r) {
+            slog::debug << " -------------------- Frame # " << result.frameId << "--------------------" << slog::endl;
+            slog::debug << " Class ID  | Confidence | XMIN | YMIN | XMAX | YMAX " << slog::endl;
+        }
+
+        for (auto& obj : result.objects) {
+            if (FLAGS_r) {
+                slog::debug << " " << std::left << std::setw(9) << obj.label << " | " << std::setw(10) << obj.confidence
+                            << " | " << std::setw(4) << int(obj.x) << " | " << std::setw(4) << int(obj.y) << " | "
+                            << std::setw(4) << int(obj.x + obj.width) << " | " << std::setw(4) << int(obj.y + obj.height)
+                            << slog::endl;
+            }
+            outputTransform.scaleRect(obj);
+            std::ostringstream conf;
+            conf << ":" << std::fixed << std::setprecision(1) << obj.confidence * 100 << '%';
+            const auto& color = palette[obj.labelID];
+            putHighlightedText(outputImg,
+                               obj.label + conf.str(),
+                               cv::Point2f(obj.x, obj.y - 5),
+                               cv::FONT_HERSHEY_COMPLEX_SMALL,
+                               1,
+                               color,
+                               2);
+            cv::rectangle(outputImg, obj, color, 2);
+        }
+
+        try {
+            for (auto& lmark : result.asRef<RetinaFaceDetectionResult>().landmarks) {
+                outputTransform.scaleCoord(lmark);
+                cv::circle(outputImg, lmark, 2, cv::Scalar(0, 255, 255), -1);
+            }
+        } catch (const std::bad_cast&) {}
+        outputs.push_back(outputImg);
+    }
+    return outputs;
+}
+
+
+
 // Input image is stored inside metadata, as we put it there during submission stage
 cv::Mat renderDetectionData(DetectionResult& result, const ColorPalette& palette, OutputTransform& outputTransform) {
     if (!result.metaData) {
@@ -399,6 +497,10 @@ int main(int argc, char* argv[]) {
         slog::info << ov::get_openvino_version() << slog::endl;
 
         ov::Core core;
+        if (!FLAGS_config.empty()) {
+            const auto configs = parseConfigFile();
+            core.set_property(FLAGS_d, {configs.begin(), configs.end()});
+        }
 
         AsyncPipeline pipeline(std::move(model),
                                ConfigFactory::getUserConfig(FLAGS_d, FLAGS_nireq, FLAGS_nstreams, FLAGS_nthreads),
@@ -418,20 +520,33 @@ int main(int argc, char* argv[]) {
         OutputTransform outputTransform = OutputTransform();
         size_t found = FLAGS_output_resolution.find("x");
 
+       // batch setup
+        std::vector<std::shared_ptr<InputData>> inputDataVector;
+        inputDataVector.reserve(FLAGS_nireq_per_batch);
+        auto image_batch_metadata = std::make_shared<ImageBatchMetaData>();
         while (keepRunning) {
             if (pipeline.isReadyToProcess()) {
                 auto startTime = std::chrono::steady_clock::now();
 
-                //--- Capturing frame
-                curr_frame = cap->read();
-
-                if (curr_frame.empty()) {
-                    // Input stream is over
+                //--- Capturing nireq_per_batch frames
+                inputDataVector.clear();
+                image_batch_metadata->clear();
+                for (int i = 0; i < FLAGS_nireq_per_batch; i++) {
+                    curr_frame = cap->read();
+                    if (curr_frame.empty()) {
+                        // Input stream is over
+                        break;
+                    }
+                    inputDataVector.push_back(std::make_shared<ImageInputData>(curr_frame));
+                    image_batch_metadata->add(curr_frame, startTime);
+                }
+                if (inputDataVector.size() != FLAGS_nireq_per_batch) {
                     break;
                 }
-
-                frameNum = pipeline.submitData(ImageInputData(curr_frame),
-                                               std::make_shared<ImageMetaData>(curr_frame, startTime));
+                auto inputImagesDataBeginIt = inputDataVector.begin();
+                auto inputImagesDataEndIt = inputImagesDataBeginIt;
+                std::advance(inputImagesDataEndIt, FLAGS_nireq_per_batch);
+                frameNum = pipeline.submitData(inputImagesDataBeginIt, inputImagesDataEndIt, image_batch_metadata);
             }
 
             if (frameNum == 0) {
@@ -455,28 +570,30 @@ int main(int argc, char* argv[]) {
             //    and use your own processing instead of calling renderDetectionData().
             while (keepRunning && (result = pipeline.getResult())) {
                 auto renderingStart = std::chrono::steady_clock::now();
-                cv::Mat outFrame = renderDetectionData(result->asRef<DetectionResult>(), palette, outputTransform);
+                std::vector<cv::Mat> outFrames = renderDetectionBatchData(result->asRef<DetectionResult>(), palette, outputTransform);
 
                 //--- Showing results and device information
-                presenter.drawGraphs(outFrame);
-                renderMetrics.update(renderingStart);
-                metrics.update(result->metaData->asRef<ImageMetaData>().timeStamp,
-                               outFrame,
-                               {10, 22},
-                               cv::FONT_HERSHEY_COMPLEX,
-                               0.65);
-
-                videoWriter.write(outFrame);
-                framesProcessed++;
-
-                if (!FLAGS_no_show) {
-                    cv::imshow("Detection Results", outFrame);
-                    //--- Processing keyboard events
-                    int key = cv::waitKey(1);
-                    if (27 == key || 'q' == key || 'Q' == key) {  // Esc
-                        keepRunning = false;
-                    } else {
-                        presenter.handleKey(key);
+                for (cv::Mat outFrame : outFrames) {
+                    presenter.drawGraphs(outFrame);
+                    renderMetrics.update(renderingStart);
+                    metrics.update(result->metaData->asRef<ImageBatchMetaData>().timeStamp,
+                                   outFrame,
+                                   {10, 22},
+                                   cv::FONT_HERSHEY_COMPLEX,
+                                   0.65);
+
+                    videoWriter.write(outFrame);
+                    framesProcessed++;
+
+                    if (!FLAGS_no_show) {
+                        cv::imshow("Detection Results", outFrame);
+                        //--- Processing keyboard events
+                        int key = cv::waitKey(1);
+                        if (27 == key || 'q' == key || 'Q' == key) {  // Esc
+                            keepRunning = false;
+                        } else {
+                            presenter.handleKey(key);
+                        }
                     }
                 }
             }
@@ -489,20 +606,22 @@ int main(int argc, char* argv[]) {
             result = pipeline.getResult();
             if (result != nullptr) {
                 auto renderingStart = std::chrono::steady_clock::now();
-                cv::Mat outFrame = renderDetectionData(result->asRef<DetectionResult>(), palette, outputTransform);
+                std::vector<cv::Mat> outFrames = renderDetectionBatchData(result->asRef<DetectionResult>(), palette, outputTransform);
                 //--- Showing results and device information
-                presenter.drawGraphs(outFrame);
-                renderMetrics.update(renderingStart);
-                metrics.update(result->metaData->asRef<ImageMetaData>().timeStamp,
-                               outFrame,
-                               {10, 22},
-                               cv::FONT_HERSHEY_COMPLEX,
-                               0.65);
-                videoWriter.write(outFrame);
-                if (!FLAGS_no_show) {
-                    cv::imshow("Detection Results", outFrame);
-                    //--- Updating output window
-                    cv::waitKey(1);
+                for (cv::Mat outFrame : outFrames) {
+                    presenter.drawGraphs(outFrame);
+                    renderMetrics.update(renderingStart);
+                    metrics.update(result->metaData->asRef<ImageBatchMetaData>().timeStamp,
+                                   outFrame,
+                                   {10, 22},
+                                   cv::FONT_HERSHEY_COMPLEX,
+                                   0.65);
+                    videoWriter.write(outFrame);
+                    if (!FLAGS_no_show) {
+                        cv::imshow("Detection Results", outFrame);
+                        //--- Updating output window
+                        cv::waitKey(1);
+                    }
                 }
             }
         }