yolov5_TRT_C-_python_api

convert yolov5 c++ infer to python pakage,python use pakege to get label and cor 可先参考我的一篇博客配置下环境：

https://blog.csdn.net/weixin_43269994/article/details/117219986?spm=1001.2014.3001.5502

编写CMakeLists.txt

cmake_minimum_required(VERSION 2.6)

project(yolov5)

# add_definitions(-std=c++11)

option(CUDA_USE_STATIC_CUDA_RUNTIME OFF)
set(CMAKE_CXX_STANDARD 11)
set(CMAKE_BUILD_TYPE Debug)

find_package(CUDA REQUIRED)

include_directories(${PROJECT_SOURCE_DIR}/include)
# include and link dirs of cuda and tensorrt, you need adapt them if yours are different
# cuda
include_directories(/usr/local/cuda-11.1/include)
link_directories(/usr/local/cuda-11.1/lib64)

# tensorrt
include_directories(/home/lindsay/TensorRT-7.2.2.3/include)
link_directories(/home/lindsay/TensorRT-7.2.2.3/lib)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11 -Wall -Ofast -Wfatal-errors -D_MWAITXINTRIN_H_INCLUDED")
#set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall -Ofast -Wfatal-errors -D_MWAITXINTRIN_H_INCLUDED")

cuda_add_library(yolov5_trt SHARED ${PROJECT_SOURCE_DIR}/yololayer.cu ${PROJECT_SOURCE_DIR}/yolov5_lib.cpp)

find_package(OpenCV)
# find_package(OpenCV 4.4.0  REQUIRED)
include_directories(${OpenCV_INCLUDE_DIRS})

target_link_libraries(yolov5_trt nvinfer cudart ${OpenCV_LIBS})

# add_executable(yolov5 ${PROJECT_SOURCE_DIR}/yolov5.cpp)
add_executable(yolov5 ${PROJECT_SOURCE_DIR}/calibrator.cpp ${PROJECT_SOURCE_DIR}/yolov5.cpp)
target_link_libraries(yolov5 nvinfer)
target_link_libraries(yolov5 cudart)
target_link_libraries(yolov5 yolov5_trt)
target_link_libraries(yolov5 ${OpenCV_LIBS})

message(${OpenCV_LIBS})

add_definitions(-O2 -pthread)

包装 yolov5 tensort 为 C++库

//yolov5_lib.h

#pragma once 
 
#ifdef __cplusplus
extern "C" 
{
#endif 
 
void * yolov5_trt_create(const char * engine_name);
 
const char * yolov5_trt_detect(void *h, cv::Mat &img, float threshold);
 
void yolov5_trt_destroy(void *h);
 
#ifdef __cplusplus
}
#endif 
~

//yolov5_lib.cpp

#include <iostream>
#include <chrono>
#include "cuda_runtime_api.h"
#include "logging.h"
#include "common.hpp"
#include "yolov5_lib.h"
 
#define USE_FP16  // comment out this if want to use FP32
#define DEVICE 0  // GPU id
#define NMS_THRESH 0.4
#define CONF_THRESH 0.5
#define BATCH_SIZE 1
 
// stuff we know about the network and the input/output blobs
static const int INPUT_H = Yolo::INPUT_H;
static const int INPUT_W = Yolo::INPUT_W;
static const int CLASS_NUM = Yolo::CLASS_NUM;
static const int OUTPUT_SIZE = Yolo::MAX_OUTPUT_BBOX_COUNT * sizeof(Yolo::Detection) / sizeof(float) + 1;  // we assume the yololayer outputs no more than MAX_OUTPUT_BBOX_COUNT boxes that conf >= 0.1
const char* INPUT_BLOB_NAME = "data";
const char* OUTPUT_BLOB_NAME = "prob";
static Logger gLogger;
 
 
static void doInference(IExecutionContext& context, cudaStream_t& stream, void **buffers, float* input, float* output, int batchSize) {
    // DMA input batch data to device, infer on the batch asynchronously, and DMA output back to host
    CHECK(cudaMemcpyAsync(buffers[0], input, batchSize * 3 * INPUT_H * INPUT_W * sizeof(float), cudaMemcpyHostToDevice, stream));
    context.enqueue(batchSize, buffers, stream, nullptr);
    CHECK(cudaMemcpyAsync(output, buffers[1], batchSize * OUTPUT_SIZE * sizeof(float), cudaMemcpyDeviceToHost, stream));
    cudaStreamSynchronize(stream);
}
 
 
typedef struct 
{
 
    float *data;
    float *prob;
    IRuntime *runtime;
    ICudaEngine *engine;
    IExecutionContext *exe_context;
    void* buffers[2];
    cudaStream_t cuda_stream;
    int inputIndex;
    int outputIndex;
    char result_json_str[16384];
 
}Yolov5TRTContext;
 
oid * yolov5_trt_create(const char * engine_name)
{
    size_t size = 0;
    char *trtModelStream = NULL;
    Yolov5TRTContext * trt_ctx = NULL;
 
    trt_ctx = new Yolov5TRTContext();
 
    std::ifstream file(engine_name, std::ios::binary);
    printf("yolov5_trt_create  ... \n");
    if (file.good()) {
        file.seekg(0, file.end);
        size = file.tellg();
        file.seekg(0, file.beg);
        trtModelStream = new char[size];
        assert(trtModelStream);
        file.read(trtModelStream, size);
        file.close();
    }else
        return NULL;
 
    trt_ctx->data = new float[BATCH_SIZE * 3 * INPUT_H * INPUT_W];
    trt_ctx->prob = new float[BATCH_SIZE * OUTPUT_SIZE];
    trt_ctx->runtime = createInferRuntime(gLogger);
    assert(trt_ctx->runtime != nullptr);
 
    printf("yolov5_trt_create  cuda engine... \n");
    trt_ctx->engine = trt_ctx->runtime->deserializeCudaEngine(trtModelStream, size);
    assert(trt_ctx->engine != nullptr);
    trt_ctx->exe_context = trt_ctx->engine->createExecutionContext();
 
 
    delete[] trtModelStream;
    assert(trt_ctx->engine->getNbBindings() == 2);
 
    // In order to bind the buffers, we need to know the names of the input and output tensors.
    // Note that indices are guaranteed to be less than IEngine::getNbBindings()
    trt_ctx->inputIndex = trt_ctx->engine->getBindingIndex(INPUT_BLOB_NAME);
    trt_ctx->outputIndex = trt_ctx->engine->getBindingIndex(OUTPUT_BLOB_NAME);
 
    assert(trt_ctx->inputIndex == 0);
    assert(trt_ctx->outputIndex == 1);
    // Create GPU buffers on device
 
    printf("yolov5_trt_create  buffer ... \n");
    CHECK(cudaMalloc(&trt_ctx->buffers[trt_ctx->inputIndex], BATCH_SIZE * 3 * INPUT_H * INPUT_W * sizeof(float)));
    CHECK(cudaMalloc(&trt_ctx->buffers[trt_ctx->outputIndex], BATCH_SIZE * OUTPUT_SIZE * sizeof(float)));
    // Create stream
 
    printf("yolov5_trt_create  stream ... \n");
    CHECK(cudaStreamCreate(&trt_ctx->cuda_stream));
    printf("yolov5_trt_create  done ... \n");
    return (void *)trt_ctx;
 
 
}
 
 
const char * yolov5_trt_detect(void *h, cv::Mat &img, float threshold)
{
    Yolov5TRTContext *trt_ctx;
    int i;
    int delay_preprocess;
    int delay_infer;
 
    trt_ctx = (Yolov5TRTContext *)h;
 
 
    trt_ctx->result_json_str[0] = 0;
 
    if (img.empty()) return trt_ctx->result_json_str;
 
    auto start0 = std::chrono::system_clock::now();
 
    //printf("yolov5_trt_detect start preprocess img \n");
    cv::Mat pr_img = preprocess_img(img);
 
 
 
    //printf("yolov5_trt_detect start convert img to float\n");
    // letterbox BGR to RGB
    i = 0;
    for (int row = 0; row < INPUT_H; ++row) {
        uchar* uc_pixel = pr_img.data + row * pr_img.step;
        for (int col = 0; col < INPUT_W; ++col) {
            trt_ctx->data[i] = (float)uc_pixel[2] / 255.0;
            trt_ctx->data[i + INPUT_H * INPUT_W] = (float)uc_pixel[1] / 255.0;
            trt_ctx->data[i + 2 * INPUT_H * INPUT_W] = (float)uc_pixel[0] / 255.0;
            uc_pixel += 3;
            ++i;
        }
    }
    auto end0 = std::chrono::system_clock::now();
 
    delay_preprocess =  std::chrono::duration_cast<std::chrono::milliseconds>(end0 - start0).count();
 
    // Run inference
    //printf("yolov5_trt_detect start do inference\n");
    auto start = std::chrono::system_clock::now();
    doInference(*trt_ctx->exe_context, trt_ctx->cuda_stream, trt_ctx->buffers, trt_ctx->data, trt_ctx->prob, BATCH_SIZE);
 
    auto end = std::chrono::system_clock::now();
    delay_infer = std::chrono::duration_cast<std::chrono::milliseconds>(end - start).count();
 
    std::cout <<"delay_proress:" << delay_preprocess << "ms, " << "delay_infer:" << delay_infer << "ms" << std::endl;
 
    //printf("yolov5_trt_detect start do process infer result \n");
 
    int fcount = 1;
    int str_len;
    std::vector<std::vector<Yolo::Detection>> batch_res(1);
    auto& res = batch_res[0];
    nms(res, &trt_ctx->prob[0], threshold, NMS_THRESH);
 
    sprintf(trt_ctx->result_json_str,
                "{\"delay_preprocess\": %d,"
                "\"delay_infer\": %d,"
                "\"num_det\":%d, \"objects\":[", delay_preprocess, delay_infer, (int) res.size());
 
    str_len = strlen(trt_ctx->result_json_str);
 
    i = 0;
    for(i = 0 ; i < res.size(); i++){
        int x1, y1, x2, y2;
        int class_id;
 
        cv::Rect r = get_rect(img, res[i].bbox);
 
        x1 = r.x;
        y1 = r.y;
        x2 = r.x + r.width;
        y2 = r.y + r.height;
        class_id = (int)res[i].class_id;
 
 
        if (0 == i){
            sprintf(trt_ctx->result_json_str + str_len, "(%d,%d,%d,%d,%d)", class_id, x1, y1, x2, y2);
        }else {
            sprintf(trt_ctx->result_json_str + str_len, ",(%d,%d,%d,%d,%d)", class_id, x1, y1, x2, y2);
        }
        str_len = strlen(trt_ctx->result_json_str);
 
        if (str_len >= 16300)
            break;
 
    }
 
    sprintf(trt_ctx->result_json_str + str_len, "]}");
 
 
    return trt_ctx->result_json_str;
 
}
 
 
void yolov5_trt_destroy(void *h)
{
    Yolov5TRTContext *trt_ctx;
 
    trt_ctx = (Yolov5TRTContext *)h;
 
    // Release stream and buffers
    cudaStreamDestroy(trt_ctx->cuda_stream);
    CHECK(cudaFree(trt_ctx->buffers[trt_ctx->inputIndex]));
    CHECK(cudaFree(trt_ctx->buffers[trt_ctx->outputIndex]));
    // Destroy the engine
    trt_ctx->exe_context->destroy();
    trt_ctx->engine->destroy();
    trt_ctx->runtime->destroy();
 
    delete trt_ctx->data;
    delete trt_ctx->prob;
 
    delete trt_ctx;
 
}

执行以下命令：

mkdir build && cd build
cmake ..
make -j10
sudo ./yolov5 -s ../yolov5s.wts ../yolov5s.engine s# 生成引擎以及libyolov5_trt.so

编译得到 libyolov5_trt.so

包装 yolov5 tensort 为 python 库 (基于 c++库，调用libyolov5_trt.so)

python modules , 参考：

https://github.com/walletiger/tensorrt_retinaface_with_python/tree/main/python

yolov5_trt_py_module.cpp

#include <stdio.h>
#include <stdlib.h>
#include <assert.h>
#include <Python.h>

#include <opencv2/opencv.hpp>
#include <opencv2/highgui/highgui.hpp>
#include "../yolov5_lib.h"
#include "pyboostcvconverter/pyboostcvconverter.hpp"
#include <boost/python.hpp>


using namespace cv;
using namespace boost::python;



static PyObject * mpyCreate(PyObject *self,  PyObject *args)
{
    char *engine_path = NULL;
    void *trt_engine = NULL;

    if (!PyArg_ParseTuple(args, "s", &engine_path)){
        return  Py_BuildValue("K", (unsigned long long)trt_engine);
    }

    trt_engine = yolov5_trt_create(engine_path);

    printf("create yolov5-trt , instance = %p\n", trt_engine);

    return Py_BuildValue("K", (unsigned long long)trt_engine);
}

static PyObject *mpyDetect(PyObject *self, PyObject *args)
{
    void *trt_engine = NULL;
    PyObject *ndArray = NULL;
    float conf_thresh = 0.45;
    const char *ret = NULL;
    unsigned long long v; 

    if (!PyArg_ParseTuple(args, "KOf", &v, &ndArray, &conf_thresh))
        return Py_BuildValue("s", "");

    Mat mat = pbcvt::fromNDArrayToMat(ndArray);

    trt_engine = (void *)v;

    ret = yolov5_trt_detect(trt_engine, mat, conf_thresh);

    return Py_BuildValue("s", ret);
}

static PyObject * mPyDestroy(PyObject *self, PyObject *args)
{
    void *engine = NULL;
    unsigned long long v; 
    if (!PyArg_ParseTuple(args, "K", &v))
        return Py_BuildValue("O", NULL);;

    printf(" destroy engine , engine = %lu\n", v);
	engine = (void *)v;

    yolov5_trt_destroy(engine);

    return Py_BuildValue("O", NULL);

}

static PyMethodDef TRTYolov5MeThods[] = {
    {"create", mpyCreate, METH_VARARGS, "Create the engine."},
    {"detect", mpyDetect, METH_VARARGS, "use the engine to detect image"},    
    {"destroy", mPyDestroy, METH_VARARGS, "destroy the engine"},        
    {NULL, NULL, 0, NULL}
};

static struct PyModuleDef TRTYolov5Module = {
    PyModuleDef_HEAD_INIT,
    "TRTYolov5",     /* name of module */
    "",          /* module documentation, may be NULL */
    -1,          /* size of per-interpreter state of the module, or -1 if the module keeps state in global variables. */
    TRTYolov5MeThods
};

PyMODINIT_FUNC PyInit_TRTYolov5(void) {
    printf("init module ... \n");

    return PyModule_Create(&TRTYolov5Module);
}

setup.py

from setuptools import setup, Extension, find_packages
import distutils.command.clean
from torch.utils.cpp_extension import BuildExtension
import numpy as np
setup(
    name='TRTYolov5',
    version='1.0',
    author="lindsay",
    author_email="lindsayshuo@foxmail.com",
    url="lindsayshuo@foxmail.com",
    description='Python Package with Hello World C++ Extension',

    # Package info
    packages=find_packages(exclude=('test',)),
    zip_safe=False,

    ext_modules=[
        Extension(
            'TRTYolov5',
            sources=['pyboostcvconverter/pyboost_cv4_converter.cpp', 'yolov5_trt_py_module.cpp'],
            include_dirs=['/home/lindsay/anaconda3/lib/python3.8/site-packages/numpy/core/include/numpy',
                          '/usr/local/cuda-11.1/include/',
                          '/usr/local/include/',
                          '../include'
                         ],
            libraries=['gstvideo-1.0', 'yolov5_trt',  'opencv_features2d', 'opencv_flann',  'opencv_imgcodecs', 'opencv_imgproc', 'opencv_core', 'opencv_highgui', 'opencv_videoio',   "boost_python3"],
            # libraries=['gstvideo-1.0', 'yolov5_trt',  'opencv_features2d',  'opencv_core', 'opencv_highgui',  "boost_python3"],
            library_dirs=[ '../build','/home/lindsay/anaconda3/lib'],
            py_limited_api=True)
    ],
    include_dirs=[np.get_include()]
)

执行以下脚本：

 	sudo vim ~/.bashrc
     	最后一行加入：
    	export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/home/lindsay/Desktop/yolov5_tensorrtx_python-master/build
    	export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/lib64
	cd ../python/  
	python setup.py install   #联合编译为python库
	python detect.py  #验证

如果在make algorithm时报错，在/usr/lib/x86_64-linux-gnu 下也没有看到文件，如果没有装 libboost-python，则执行以下命令：

sudo apt-get install libboost-python-dev ，

执行完，路径里面就有了libboost_python-py27.so.1.58.0，问题解决，make通过

将python包封装为pip库

python setup.py bdist_wheel

得到以下输出：

running bdist_wheel
running build
running build_ext
installing to build/bdist.linux-x86_64/wheel
running install
running install_lib
creating build/bdist.linux-x86_64/wheel
copying build/lib.linux-x86_64-3.8/TRTYolov5.abi3.so -> build/bdist.linux-x86_64/wheel
running install_egg_info
running egg_info
writing TRTYolov5.egg-info/PKG-INFO
writing dependency_links to TRTYolov5.egg-info/dependency_links.txt
writing top-level names to TRTYolov5.egg-info/top_level.txt
reading manifest file 'TRTYolov5.egg-info/SOURCES.txt'
writing manifest file 'TRTYolov5.egg-info/SOURCES.txt'
Copying TRTYolov5.egg-info to build/bdist.linux-x86_64/wheel/TRTYolov5-1.0-py3.8.egg-info
running install_scripts
creating build/bdist.linux-x86_64/wheel/TRTYolov5-1.0.dist-info/WHEEL
creating 'dist/TRTYolov5-1.0-cp38-cp38-linux_x86_64.whl' and adding 'build/bdist.linux-x86_64/wheel' to it
adding 'TRTYolov5.abi3.so'
adding 'TRTYolov5-1.0.dist-info/METADATA'
adding 'TRTYolov5-1.0.dist-info/WHEEL'
adding 'TRTYolov5-1.0.dist-info/top_level.txt'
adding 'TRTYolov5-1.0.dist-info/RECORD'
removing build/bdist.linux-x86_64/wheel

包生成在dist文件夹下，后续迁移时可直接pip安装即可。

Name		Name	Last commit message	Last commit date
Latest commit History 23 Commits
build		build
python		python
samples		samples
videos		videos
CMakeLists.txt		CMakeLists.txt
README.md		README.md
backup		backup
calibrator.cpp		calibrator.cpp
calibrator.h		calibrator.h
common.hpp		common.hpp
cuda_utils.h		cuda_utils.h
logging.h		logging.h
utils.h		utils.h
yololayer.cu		yololayer.cu
yololayer.h		yololayer.h
yolov5.cpp		yolov5.cpp
yolov5_lib.cpp		yolov5_lib.cpp
yolov5_lib.cu		yolov5_lib.cu
yolov5_lib.h		yolov5_lib.h

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Repository files navigation

yolov5_TRT_C-_python_api

包装 yolov5 tensort 为 C++库

包装 yolov5 tensort 为 python 库 (基于 c++库，调用libyolov5_trt.so)

将python包封装为pip库

About

Releases 4

Packages

Languages

lindsayshuo/yolov5_TRT_C-_python_api

Folders and files

Latest commit

History

Repository files navigation

yolov5_TRT_C-_python_api

包装 yolov5 tensort 为 C++库

包装 yolov5 tensort 为 python 库 (基于 c++库，调用libyolov5_trt.so)

将python包封装为pip库

About

Resources

Stars

Watchers

Forks

Releases 4

Packages 0

Languages

Packages