Skip to content

Commit

Permalink
modeld: PC Thneed prereqs (commaai#25615)
Browse files Browse the repository at this point in the history
* pc thneed prereqs

* ugh, out of date

* that can stay private

* memcpy here is fine in SNPE variant

* release files

* thneed docs don't work anymore. they didn't look too useful

Co-authored-by: Comma Device <device@comma.ai>
  • Loading branch information
2 people authored and rjsmith1999 committed Oct 8, 2022
1 parent d9c0576 commit 4d1b1cf
Show file tree
Hide file tree
Showing 16 changed files with 366 additions and 263 deletions.
2 changes: 0 additions & 2 deletions docs/c_docs.rst
Original file line number Diff line number Diff line change
Expand Up @@ -78,8 +78,6 @@ modeld
:project: selfdrive_modeld_transforms
.. autodoxygenindex::
:project: selfdrive_modeld_models
.. autodoxygenindex::
:project: selfdrive_modeld_thneed
.. autodoxygenindex::
:project: selfdrive_modeld_runners

Expand Down
4 changes: 3 additions & 1 deletion release/files_common
Original file line number Diff line number Diff line change
Expand Up @@ -356,7 +356,9 @@ selfdrive/modeld/transforms/transform.h
selfdrive/modeld/transforms/transform.cl

selfdrive/modeld/thneed/*.py
selfdrive/modeld/thneed/thneed.*
selfdrive/modeld/thneed/thneed.h
selfdrive/modeld/thneed/thneed_common.cc
selfdrive/modeld/thneed/thneed_qcom2.cc
selfdrive/modeld/thneed/serialize.cc
selfdrive/modeld/thneed/compile.cc
selfdrive/modeld/thneed/optimizer.cc
Expand Down
3 changes: 2 additions & 1 deletion selfdrive/modeld/SConscript
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,8 @@ common_src = [
]

thneed_src = [
"thneed/thneed.cc",
"thneed/thneed_common.cc",
"thneed/thneed_qcom2.cc",
"thneed/serialize.cc",
"thneed/optimizer.cc",
"runners/thneedmodel.cc",
Expand Down
2 changes: 1 addition & 1 deletion selfdrive/modeld/models/driving.cc
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ void model_init(ModelState* s, cl_device_id device_id, cl_context context) {
#else
s->m = std::make_unique<SNPEModel>("models/supercombo.dlc",
#endif
&s->output[0], NET_OUTPUT_SIZE, USE_GPU_RUNTIME, true);
&s->output[0], NET_OUTPUT_SIZE, USE_GPU_RUNTIME, true, false, context);

#ifdef TEMPORAL
s->m->addRecurrent(&s->output[OUTPUT_SIZE], TEMPORAL_SIZE);
Expand Down
35 changes: 24 additions & 11 deletions selfdrive/modeld/runners/onnx_runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,36 +9,46 @@

import onnxruntime as ort # pylint: disable=import-error

def read(sz):
def read(sz, tf8=False):
dd = []
gt = 0
while gt < sz * 4:
st = os.read(0, sz * 4 - gt)
szof = 1 if tf8 else 4
while gt < sz * szof:
st = os.read(0, sz * szof - gt)
assert(len(st) > 0)
dd.append(st)
gt += len(st)
return np.frombuffer(b''.join(dd), dtype=np.float32)
r = np.frombuffer(b''.join(dd), dtype=np.uint8 if tf8 else np.float32).astype(np.float32)
if tf8:
r = r / 255.
return r

def write(d):
os.write(1, d.tobytes())

def run_loop(m):
def run_loop(m, tf8_input=False):
ishapes = [[1]+ii.shape[1:] for ii in m.get_inputs()]
keys = [x.name for x in m.get_inputs()]

# run once to initialize CUDA provider
if "CUDAExecutionProvider" in m.get_providers():
m.run(None, dict(zip(keys, [np.zeros(shp, dtype=np.float32) for shp in ishapes])))

print("ready to run onnx model", keys, ishapes, file=sys.stderr)
while 1:
inputs = []
for shp in ishapes:
for k, shp in zip(keys, ishapes):
ts = np.product(shp)
#print("reshaping %s with offset %d" % (str(shp), offset), file=sys.stderr)
inputs.append(read(ts).reshape(shp))
inputs.append(read(ts, (k=='input_img' and tf8_input)).reshape(shp))
ret = m.run(None, dict(zip(keys, inputs)))
#print(ret, file=sys.stderr)
for r in ret:
write(r)


if __name__ == "__main__":
print(sys.argv, file=sys.stderr)
print("Onnx available providers: ", ort.get_available_providers(), file=sys.stderr)
options = ort.SessionOptions()
options.graph_optimization_level = ort.GraphOptimizationLevel.ORT_DISABLE_ALL
Expand All @@ -54,7 +64,10 @@ def run_loop(m):
options.graph_optimization_level = ort.GraphOptimizationLevel.ORT_ENABLE_ALL
provider = 'CPUExecutionProvider'

print("Onnx selected provider: ", [provider], file=sys.stderr)
ort_session = ort.InferenceSession(sys.argv[1], options, providers=[provider])
print("Onnx using ", ort_session.get_providers(), file=sys.stderr)
run_loop(ort_session)
try:
print("Onnx selected provider: ", [provider], file=sys.stderr)
ort_session = ort.InferenceSession(sys.argv[1], options, providers=[provider])
print("Onnx using ", ort_session.get_providers(), file=sys.stderr)
run_loop(ort_session, tf8_input=("--use_tf8" in sys.argv))
except KeyboardInterrupt:
pass
6 changes: 4 additions & 2 deletions selfdrive/modeld/runners/onnxmodel.cc
Original file line number Diff line number Diff line change
Expand Up @@ -14,12 +14,13 @@
#include "selfdrive/common/swaglog.h"
#include "selfdrive/common/util.h"

ONNXModel::ONNXModel(const char *path, float *_output, size_t _output_size, int runtime, bool _use_extra) {
ONNXModel::ONNXModel(const char *path, float *_output, size_t _output_size, int runtime, bool _use_extra, bool _use_tf8, cl_context context) {
LOGD("loading model %s", path);

output = _output;
output_size = _output_size;
use_extra = _use_extra;
use_tf8 = _use_tf8;

int err = pipe(pipein);
assert(err == 0);
Expand All @@ -28,11 +29,12 @@ ONNXModel::ONNXModel(const char *path, float *_output, size_t _output_size, int

std::string exe_dir = util::dir_name(util::readlink("/proc/self/exe"));
std::string onnx_runner = exe_dir + "/runners/onnx_runner.py";
std::string tf8_arg = use_tf8 ? "--use_tf8" : "";

proc_pid = fork();
if (proc_pid == 0) {
LOGD("spawning onnx process %s", onnx_runner.c_str());
char *argv[] = {(char*)onnx_runner.c_str(), (char*)path, nullptr};
char *argv[] = {(char*)onnx_runner.c_str(), (char*)path, (char*)tf8_arg.c_str(), nullptr};
dup2(pipein[0], 0);
dup2(pipeout[1], 1);
close(pipein[0]);
Expand Down
3 changes: 2 additions & 1 deletion selfdrive/modeld/runners/onnxmodel.h
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@

class ONNXModel : public RunModel {
public:
ONNXModel(const char *path, float *output, size_t output_size, int runtime, bool use_extra = false);
ONNXModel(const char *path, float *output, size_t output_size, int runtime, bool use_extra = false, bool _use_tf8 = false, cl_context context = NULL);
~ONNXModel();
void addRecurrent(float *state, int state_size);
void addDesire(float *state, int state_size);
Expand All @@ -31,6 +31,7 @@ class ONNXModel : public RunModel {
int calib_size;
float *image_input_buf = NULL;
int image_buf_size;
bool use_tf8;
float *extra_input_buf = NULL;
int extra_buf_size;
bool use_extra;
Expand Down
1 change: 1 addition & 0 deletions selfdrive/modeld/runners/runmodel.h
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
#pragma once
#include "selfdrive/common/clutil.h"
class RunModel {
public:
virtual ~RunModel() {}
Expand Down
31 changes: 22 additions & 9 deletions selfdrive/modeld/runners/snpemodel.cc
Original file line number Diff line number Diff line change
Expand Up @@ -6,19 +6,20 @@
#include <cstdlib>
#include <cstring>

#include "selfdrive/common/util.h"
#include "selfdrive/common/timing.h"
#include "common/util.h"
#include "common/timing.h"

void PrintErrorStringAndExit() {
std::cerr << zdl::DlSystem::getLastErrorString() << std::endl;
std::exit(EXIT_FAILURE);
}

SNPEModel::SNPEModel(const char *path, float *loutput, size_t loutput_size, int runtime, bool luse_extra) {
SNPEModel::SNPEModel(const char *path, float *loutput, size_t loutput_size, int runtime, bool luse_extra, bool luse_tf8, cl_context context) {
output = loutput;
output_size = loutput_size;
use_extra = luse_extra;
#if defined(QCOM) || defined(QCOM2)
use_tf8 = luse_tf8;
#ifdef QCOM2
if (runtime==USE_GPU_RUNTIME) {
Runtime = zdl::DlSystem::Runtime_t::GPU;
} else if (runtime==USE_DSP_RUNTIME) {
Expand All @@ -39,7 +40,7 @@ SNPEModel::SNPEModel(const char *path, float *loutput, size_t loutput_size, int
// create model runner
zdl::SNPE::SNPEBuilder snpeBuilder(container.get());
while (!snpe) {
#if defined(QCOM) || defined(QCOM2)
#ifdef QCOM2
snpe = snpeBuilder.setOutputLayers({})
.setRuntimeProcessor(Runtime)
.setUseUserSuppliedBuffers(true)
Expand Down Expand Up @@ -70,14 +71,16 @@ SNPEModel::SNPEModel(const char *path, float *loutput, size_t loutput_size, int
printf("model: %s -> %s\n", input_tensor_name, output_tensor_name);

zdl::DlSystem::UserBufferEncodingFloat userBufferEncodingFloat;
zdl::DlSystem::UserBufferEncodingTf8 userBufferEncodingTf8(0, 1./255); // network takes 0-1
zdl::DlSystem::IUserBufferFactory& ubFactory = zdl::SNPE::SNPEFactory::getUserBufferFactory();
size_t size_of_input = use_tf8 ? sizeof(uint8_t) : sizeof(float);

// create input buffer
{
const auto &inputDims_opt = snpe->getInputDimensions(input_tensor_name);
const zdl::DlSystem::TensorShape& bufferShape = *inputDims_opt;
std::vector<size_t> strides(bufferShape.rank());
strides[strides.size() - 1] = sizeof(float);
strides[strides.size() - 1] = size_of_input;
size_t product = 1;
for (size_t i = 0; i < bufferShape.rank(); i++) product *= bufferShape[i];
size_t stride = strides[strides.size() - 1];
Expand All @@ -86,7 +89,10 @@ SNPEModel::SNPEModel(const char *path, float *loutput, size_t loutput_size, int
strides[i-1] = stride;
}
printf("input product is %lu\n", product);
inputBuffer = ubFactory.createUserBuffer(NULL, product*sizeof(float), strides, &userBufferEncodingFloat);
inputBuffer = ubFactory.createUserBuffer(NULL,
product*size_of_input,
strides,
use_tf8 ? (zdl::DlSystem::UserBufferEncoding*)&userBufferEncodingTf8 : (zdl::DlSystem::UserBufferEncoding*)&userBufferEncodingFloat);

inputMap.add(input_tensor_name, inputBuffer.get());
}
Expand Down Expand Up @@ -123,6 +129,12 @@ SNPEModel::SNPEModel(const char *path, float *loutput, size_t loutput_size, int
outputBuffer = ubFactory.createUserBuffer(output, output_size * sizeof(float), outputStrides, &userBufferEncodingFloat);
outputMap.add(output_tensor_name, outputBuffer.get());
}

#ifdef USE_THNEED
if (Runtime == zdl::DlSystem::Runtime_t::GPU) {
thneed.reset(new Thneed());
}
#endif
}

void SNPEModel::addRecurrent(float *state, int state_size) {
Expand Down Expand Up @@ -176,7 +188,7 @@ std::unique_ptr<zdl::DlSystem::IUserBuffer> SNPEModel::addExtra(float *state, in
void SNPEModel::execute() {
#ifdef USE_THNEED
if (Runtime == zdl::DlSystem::Runtime_t::GPU) {
if (thneed == NULL) {
if (!thneed_recorded) {
bool ret = inputBuffer->setBufferAddress(input);
assert(ret == true);
if (use_extra) {
Expand All @@ -188,7 +200,7 @@ void SNPEModel::execute() {
PrintErrorStringAndExit();
}
memset(recurrent, 0, recurrent_size*sizeof(float));
thneed = new Thneed();
thneed->record = true;
if (!snpe->execute(inputMap, outputMap)) {
PrintErrorStringAndExit();
}
Expand Down Expand Up @@ -220,6 +232,7 @@ void SNPEModel::execute() {
assert(false);
}
free(outputs_golden);
thneed_recorded = true;
} else {
if (use_extra) {
float *inputs[5] = {recurrent, trafficConvention, desire, extra, input};
Expand Down
9 changes: 6 additions & 3 deletions selfdrive/modeld/runners/snpemodel.h
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
#pragma once
#pragma clang diagnostic ignored "-Wdeprecated-declarations"

#include <DlContainer/IDlContainer.hpp>
#include <DlSystem/DlError.hpp>
Expand All @@ -22,7 +23,7 @@

class SNPEModel : public RunModel {
public:
SNPEModel(const char *path, float *loutput, size_t loutput_size, int runtime, bool luse_extra = false);
SNPEModel(const char *path, float *loutput, size_t loutput_size, int runtime, bool luse_extra = false, bool use_tf8 = false, cl_context context = NULL);
void addRecurrent(float *state, int state_size);
void addTrafficConvention(float *state, int state_size);
void addCalib(float *state, int state_size);
Expand All @@ -32,13 +33,14 @@ class SNPEModel : public RunModel {
void execute();

#ifdef USE_THNEED
Thneed *thneed = NULL;
std::unique_ptr<Thneed> thneed;
bool thneed_recorded = false;
#endif

private:
std::string model_data;

#if defined(QCOM) || defined(QCOM2)
#ifdef QCOM2
zdl::DlSystem::Runtime_t Runtime;
#endif

Expand All @@ -50,6 +52,7 @@ class SNPEModel : public RunModel {
std::unique_ptr<zdl::DlSystem::IUserBuffer> inputBuffer;
float *input;
size_t input_size;
bool use_tf8;

// snpe output stuff
zdl::DlSystem::UserBufferMap outputMap;
Expand Down
5 changes: 2 additions & 3 deletions selfdrive/modeld/runners/thneedmodel.cc
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,8 @@

#include <cassert>

ThneedModel::ThneedModel(const char *path, float *loutput, size_t loutput_size, int runtime, bool luse_extra) {
thneed = new Thneed(true);
thneed->record = 0;
ThneedModel::ThneedModel(const char *path, float *loutput, size_t loutput_size, int runtime, bool luse_extra, bool luse_tf8, cl_context context) {
thneed = new Thneed(true, context);
thneed->load(path);
thneed->clexec();
thneed->find_inputs_outputs();
Expand Down
2 changes: 1 addition & 1 deletion selfdrive/modeld/runners/thneedmodel.h
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@

class ThneedModel : public RunModel {
public:
ThneedModel(const char *path, float *loutput, size_t loutput_size, int runtime, bool luse_extra = false);
ThneedModel(const char *path, float *loutput, size_t loutput_size, int runtime, bool luse_extra = false, bool use_tf8 = false, cl_context context = NULL);
void addRecurrent(float *state, int state_size);
void addTrafficConvention(float *state, int state_size);
void addDesire(float *state, int state_size);
Expand Down
Loading

0 comments on commit 4d1b1cf

Please sign in to comment.