Skip to content

Commit

Permalink
modeld: PC Thneed prereqs (commaai#25615)
Browse files Browse the repository at this point in the history
* pc thneed prereqs

* ugh, out of date

* that can stay private

* memcpy here is fine in SNPE variant

* release files

* thneed docs don't work anymore. they didn't look too useful

Co-authored-by: Comma Device <device@comma.ai>
  • Loading branch information
geohot and Comma Device authored Aug 31, 2022
1 parent 452d5e4 commit b6e355a
Show file tree
Hide file tree
Showing 15 changed files with 312 additions and 240 deletions.
2 changes: 0 additions & 2 deletions docs/c_docs.rst
Original file line number Diff line number Diff line change
Expand Up @@ -72,8 +72,6 @@ modeld
:project: selfdrive_modeld_transforms
.. autodoxygenindex::
:project: selfdrive_modeld_models
.. autodoxygenindex::
:project: selfdrive_modeld_thneed
.. autodoxygenindex::
:project: selfdrive_modeld_runners

Expand Down
4 changes: 3 additions & 1 deletion release/files_common
Original file line number Diff line number Diff line change
Expand Up @@ -363,7 +363,9 @@ selfdrive/modeld/transforms/transform.h
selfdrive/modeld/transforms/transform.cl

selfdrive/modeld/thneed/*.py
selfdrive/modeld/thneed/thneed.*
selfdrive/modeld/thneed/thneed.h
selfdrive/modeld/thneed/thneed_common.cc
selfdrive/modeld/thneed/thneed_qcom2.cc
selfdrive/modeld/thneed/serialize.cc
selfdrive/modeld/thneed/compile.cc
selfdrive/modeld/thneed/optimizer.cc
Expand Down
3 changes: 2 additions & 1 deletion selfdrive/modeld/SConscript
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,8 @@ common_src = [
]

thneed_src = [
"thneed/thneed.cc",
"thneed/thneed_common.cc",
"thneed/thneed_qcom2.cc",
"thneed/serialize.cc",
"thneed/optimizer.cc",
"runners/thneedmodel.cc",
Expand Down
2 changes: 1 addition & 1 deletion selfdrive/modeld/models/driving.cc
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ void model_init(ModelState* s, cl_device_id device_id, cl_context context) {
#else
s->m = std::make_unique<SNPEModel>("models/supercombo.dlc",
#endif
&s->output[0], NET_OUTPUT_SIZE, USE_GPU_RUNTIME, true);
&s->output[0], NET_OUTPUT_SIZE, USE_GPU_RUNTIME, true, false, context);

#ifdef TEMPORAL
s->m->addRecurrent(&s->output[OUTPUT_SIZE], TEMPORAL_SIZE);
Expand Down
2 changes: 1 addition & 1 deletion selfdrive/modeld/runners/onnxmodel.cc
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
#include "common/swaglog.h"
#include "common/util.h"

ONNXModel::ONNXModel(const char *path, float *_output, size_t _output_size, int runtime, bool _use_extra, bool _use_tf8) {
ONNXModel::ONNXModel(const char *path, float *_output, size_t _output_size, int runtime, bool _use_extra, bool _use_tf8, cl_context context) {
LOGD("loading model %s", path);

output = _output;
Expand Down
2 changes: 1 addition & 1 deletion selfdrive/modeld/runners/onnxmodel.h
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@

class ONNXModel : public RunModel {
public:
ONNXModel(const char *path, float *output, size_t output_size, int runtime, bool use_extra = false, bool _use_tf8 = false);
ONNXModel(const char *path, float *output, size_t output_size, int runtime, bool use_extra = false, bool _use_tf8 = false, cl_context context = NULL);
~ONNXModel();
void addRecurrent(float *state, int state_size);
void addDesire(float *state, int state_size);
Expand Down
1 change: 1 addition & 0 deletions selfdrive/modeld/runners/runmodel.h
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
#pragma once
#include "common/clutil.h"
class RunModel {
public:
virtual ~RunModel() {}
Expand Down
2 changes: 1 addition & 1 deletion selfdrive/modeld/runners/snpemodel.cc
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ void PrintErrorStringAndExit() {
std::exit(EXIT_FAILURE);
}

SNPEModel::SNPEModel(const char *path, float *loutput, size_t loutput_size, int runtime, bool luse_extra, bool luse_tf8) {
SNPEModel::SNPEModel(const char *path, float *loutput, size_t loutput_size, int runtime, bool luse_extra, bool luse_tf8, cl_context context) {
output = loutput;
output_size = loutput_size;
use_extra = luse_extra;
Expand Down
2 changes: 1 addition & 1 deletion selfdrive/modeld/runners/snpemodel.h
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@

class SNPEModel : public RunModel {
public:
SNPEModel(const char *path, float *loutput, size_t loutput_size, int runtime, bool luse_extra = false, bool use_tf8 = false);
SNPEModel(const char *path, float *loutput, size_t loutput_size, int runtime, bool luse_extra = false, bool use_tf8 = false, cl_context context = NULL);
void addRecurrent(float *state, int state_size);
void addTrafficConvention(float *state, int state_size);
void addCalib(float *state, int state_size);
Expand Down
4 changes: 2 additions & 2 deletions selfdrive/modeld/runners/thneedmodel.cc
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,8 @@

#include <cassert>

ThneedModel::ThneedModel(const char *path, float *loutput, size_t loutput_size, int runtime, bool luse_extra) {
thneed = new Thneed(true);
ThneedModel::ThneedModel(const char *path, float *loutput, size_t loutput_size, int runtime, bool luse_extra, bool luse_tf8, cl_context context) {
thneed = new Thneed(true, context);
thneed->load(path);
thneed->clexec();
thneed->find_inputs_outputs();
Expand Down
2 changes: 1 addition & 1 deletion selfdrive/modeld/runners/thneedmodel.h
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@

class ThneedModel : public RunModel {
public:
ThneedModel(const char *path, float *loutput, size_t loutput_size, int runtime, bool luse_extra = false);
ThneedModel(const char *path, float *loutput, size_t loutput_size, int runtime, bool luse_extra = false, bool use_tf8 = false, cl_context context = NULL);
void addRecurrent(float *state, int state_size);
void addTrafficConvention(float *state, int state_size);
void addDesire(float *state, int state_size);
Expand Down
59 changes: 52 additions & 7 deletions selfdrive/modeld/thneed/serialize.cc
Original file line number Diff line number Diff line change
Expand Up @@ -14,9 +14,9 @@ void Thneed::load(const char *filename) {

string buf = util::read_file(filename);
int jsz = *(int *)buf.data();
string err;
string jsonerr;
string jj(buf.data() + sizeof(int), jsz);
Json jdat = Json::parse(jj, err);
Json jdat = Json::parse(jj, jsonerr);

map<cl_mem, cl_mem> real_mem;
real_mem[NULL] = NULL;
Expand Down Expand Up @@ -48,13 +48,33 @@ void Thneed::load(const char *filename) {
desc.image_width = mobj["width"].int_value();
desc.image_height = mobj["height"].int_value();
desc.image_row_pitch = mobj["row_pitch"].int_value();
assert(sz == desc.image_height*desc.image_row_pitch);
#ifdef QCOM2
desc.buffer = clbuf;

cl_image_format format;
#else
// TODO: we are creating unused buffers on PC
clReleaseMemObject(clbuf);
#endif
cl_image_format format = {0};
format.image_channel_order = CL_RGBA;
format.image_channel_data_type = CL_HALF_FLOAT;
format.image_channel_data_type = mobj["float32"].bool_value() ? CL_FLOAT : CL_HALF_FLOAT;

cl_int errcode;

clbuf = clCreateImage(context, CL_MEM_READ_WRITE, &format, &desc, NULL, NULL);
#ifndef QCOM2
if (mobj["needs_load"].bool_value()) {
clbuf = clCreateImage(context, CL_MEM_COPY_HOST_PTR | CL_MEM_READ_WRITE, &format, &desc, &buf[ptr-sz], &errcode);
} else {
clbuf = clCreateImage(context, CL_MEM_READ_WRITE, &format, &desc, NULL, &errcode);
}
#else
clbuf = clCreateImage(context, CL_MEM_READ_WRITE, &format, &desc, NULL, &errcode);
#endif
if (clbuf == NULL) {
printf("clError: %s create image %zux%zu rp %zu with buffer %p\n", cl_get_error_string(errcode),
desc.image_width, desc.image_height, desc.image_row_pitch, desc.buffer
);
}
assert(clbuf != NULL);
}

Expand All @@ -67,6 +87,30 @@ void Thneed::load(const char *filename) {
g_programs[name] = cl_program_from_source(context, device_id, source.string_value());
}

for (auto &obj : jdat["inputs"].array_items()) {
auto mobj = obj.object_items();
int sz = mobj["size"].int_value();
cl_mem aa = real_mem[*(cl_mem*)(mobj["buffer_id"].string_value().data())];
input_clmem.push_back(aa);
input_sizes.push_back(sz);
printf("Thneed::load: adding input %s with size %d\n", mobj["name"].string_value().data(), sz);

cl_int cl_err;
void *ret = clEnqueueMapBuffer(command_queue, aa, CL_TRUE, CL_MAP_WRITE, 0, sz, 0, NULL, NULL, &cl_err);
if (cl_err != CL_SUCCESS) printf("clError: %s map %p %d\n", cl_get_error_string(cl_err), aa, sz);
assert(cl_err == CL_SUCCESS);
inputs.push_back(ret);
}

for (auto &obj : jdat["outputs"].array_items()) {
auto mobj = obj.object_items();
int sz = mobj["size"].int_value();
printf("Thneed::save: adding output with size %d\n", sz);
// TODO: support multiple outputs
output = real_mem[*(cl_mem*)(mobj["buffer_id"].string_value().data())];
assert(output != NULL);
}

for (auto &obj : jdat["binaries"].array_items()) {
string name = obj["name"].string_value();
size_t length = obj["length"].int_value();
Expand Down Expand Up @@ -135,7 +179,7 @@ void Thneed::save(const char *filename, bool save_binaries) {
});

if (k->arg_types[i] == "image2d_t" || k->arg_types[i] == "image1d_t") {
cl_mem buf;
cl_mem buf = NULL;
clGetImageInfo(val, CL_IMAGE_BUFFER, sizeof(buf), &buf, NULL);
string aa = string((char *)&buf, sizeof(buf));
jj["buffer_id"] = aa;
Expand All @@ -149,6 +193,7 @@ void Thneed::save(const char *filename, bool save_binaries) {
jj["row_pitch"] = (int)row_pitch;
jj["size"] = (int)(height * row_pitch);
jj["needs_load"] = false;
jj["float32"] = false;

if (saved_objects.find(aa) == saved_objects.end()) {
saved_objects.insert(aa);
Expand Down
8 changes: 7 additions & 1 deletion selfdrive/modeld/thneed/thneed.h
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,9 @@

using namespace std;

cl_int thneed_clSetKernelArg(cl_kernel kernel, cl_uint arg_index, size_t arg_size, const void *arg_value);
cl_program thneed_clCreateProgramWithSource(cl_context context, cl_uint count, const char **strings, const size_t *lengths, cl_int *errcode_ret);

namespace json11 {
class Json;
}
Expand Down Expand Up @@ -89,7 +92,7 @@ class CachedCommand: public CachedIoctl {

class Thneed {
public:
Thneed(bool do_clinit=false);
Thneed(bool do_clinit=false, cl_context _context = NULL);
void stop();
void execute(float **finputs, float *foutput, bool slow=false);
void wait();
Expand All @@ -110,9 +113,12 @@ class Thneed {
bool record = false;
int debug;
int timestamp;

#ifdef QCOM2
unique_ptr<GPUMalloc> ram;
vector<unique_ptr<CachedIoctl> > cmds;
int fd;
#endif

// all CL kernels
void find_inputs_outputs();
Expand Down
Loading

0 comments on commit b6e355a

Please sign in to comment.