Skip to content

Commit

Permalink
thneed: add flag to enable optimizer (commaai#24568)
Browse files Browse the repository at this point in the history
* improve the thneed compiler

* only init thneed if we are using the GPU

Co-authored-by: Comma Device <device@comma.ai>
  • Loading branch information
2 people authored and pull[bot] committed Oct 15, 2023
1 parent 66db4b6 commit a3ba5d0
Show file tree
Hide file tree
Showing 7 changed files with 53 additions and 22 deletions.
2 changes: 1 addition & 1 deletion selfdrive/modeld/SConscript
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ common_model = lenv.Object(common_src)
if use_thneed and arch == "larch64":
fn = File("models/supercombo").abspath
compiler = lenv.Program('thneed/compile', ["thneed/compile.cc"]+common_model, LIBS=libs)
cmd = f"cd {Dir('.').abspath} && {compiler[0].abspath} {fn}.dlc {fn}_badweights.thneed --binary"
cmd = f"cd {Dir('.').abspath} && {compiler[0].abspath} --in {fn}.dlc --out {fn}_badweights.thneed --binary --optimize"

lib_paths = ':'.join(Dir(p).abspath for p in lenv["LIBPATH"])
kernel_path = os.path.join(Dir('.').abspath, "thneed", "kernels")
Expand Down
11 changes: 9 additions & 2 deletions selfdrive/modeld/runners/snpemodel.cc
Original file line number Diff line number Diff line change
Expand Up @@ -123,6 +123,12 @@ SNPEModel::SNPEModel(const char *path, float *loutput, size_t loutput_size, int
outputBuffer = ubFactory.createUserBuffer(output, output_size * sizeof(float), outputStrides, &userBufferEncodingFloat);
outputMap.add(output_tensor_name, outputBuffer.get());
}

#ifdef USE_THNEED
if (Runtime == zdl::DlSystem::Runtime_t::GPU) {
thneed.reset(new Thneed());
}
#endif
}

void SNPEModel::addRecurrent(float *state, int state_size) {
Expand Down Expand Up @@ -176,7 +182,7 @@ std::unique_ptr<zdl::DlSystem::IUserBuffer> SNPEModel::addExtra(float *state, in
void SNPEModel::execute() {
#ifdef USE_THNEED
if (Runtime == zdl::DlSystem::Runtime_t::GPU) {
if (thneed == NULL) {
if (!thneed_recorded) {
bool ret = inputBuffer->setBufferAddress(input);
assert(ret == true);
if (use_extra) {
Expand All @@ -188,7 +194,7 @@ void SNPEModel::execute() {
PrintErrorStringAndExit();
}
memset(recurrent, 0, recurrent_size*sizeof(float));
thneed = new Thneed();
thneed->record = true;
if (!snpe->execute(inputMap, outputMap)) {
PrintErrorStringAndExit();
}
Expand Down Expand Up @@ -220,6 +226,7 @@ void SNPEModel::execute() {
assert(false);
}
free(outputs_golden);
thneed_recorded = true;
} else {
if (use_extra) {
float *inputs[5] = {recurrent, trafficConvention, desire, extra, input};
Expand Down
3 changes: 2 additions & 1 deletion selfdrive/modeld/runners/snpemodel.h
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,8 @@ class SNPEModel : public RunModel {
void execute();

#ifdef USE_THNEED
Thneed *thneed = NULL;
std::unique_ptr<Thneed> thneed;
bool thneed_recorded = false;
#endif

private:
Expand Down
1 change: 0 additions & 1 deletion selfdrive/modeld/runners/thneedmodel.cc
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@

ThneedModel::ThneedModel(const char *path, float *loutput, size_t loutput_size, int runtime, bool luse_extra) {
thneed = new Thneed(true);
thneed->record = 0;
thneed->load(path);
thneed->clexec();
thneed->find_inputs_outputs();
Expand Down
41 changes: 37 additions & 4 deletions selfdrive/modeld/thneed/compile.cc
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
#include <cstring>
#include <getopt.h>

#include "selfdrive/modeld/runners/snpemodel.h"
#include "selfdrive/modeld/thneed/thneed.h"
Expand All @@ -10,10 +11,36 @@

// TODO: This should probably use SNPE directly.
int main(int argc, char* argv[]) {
bool run_optimizer = false, save_binaries = false;
const char *input_file = NULL, *output_file = NULL;
static struct option long_options[] = {
{"in", required_argument, 0, 'i' },
{"out", required_argument, 0, 'o' },
{"binary", no_argument, 0, 'b' },
{"optimize", no_argument, 0, 'f' },
{0, 0, 0, 0 }
};
int long_index = 0, opt = 0;
while ((opt = getopt_long_only(argc, argv,"", long_options, &long_index)) != -1) {
switch (opt) {
case 'i': input_file = optarg; break;
case 'o': output_file = optarg; break;
case 'b': save_binaries = true; break;
case 'f': run_optimizer = true; break;
}
}

// no input?
if (!input_file) {
printf("usage: -i <input file> -o <output file> --binary --optimize\n");
return -1;
}

#define OUTPUT_SIZE 0x10000

float *output = (float*)calloc(OUTPUT_SIZE, sizeof(float));
SNPEModel mdl(argv[1], output, 0, USE_GPU_RUNTIME, true);
SNPEModel mdl(input_file, output, 0, USE_GPU_RUNTIME, true);
mdl.thneed->run_optimizer = run_optimizer;

float state[TEMPORAL_SIZE] = {0};
float desire[DESIRE_LEN] = {0};
Expand All @@ -32,14 +59,20 @@ int main(int argc, char* argv[]) {
memset(output, 0, OUTPUT_SIZE * sizeof(float));
mdl.execute();

// don't save?
if (!output_file) {
printf("no output file, exiting\n");
return 0;
}

// save model
bool save_binaries = (argc > 3) && (strcmp(argv[3], "--binary") == 0);
mdl.thneed->save(argv[2], save_binaries);
printf("saving %s with binary %d\n", output_file, save_binaries);
mdl.thneed->save(output_file, save_binaries);

// test model
auto thneed = new Thneed(true);
thneed->record = false;
thneed->load(argv[2]);
thneed->load(output_file);
thneed->clexec();
thneed->find_inputs_outputs();

Expand Down
14 changes: 2 additions & 12 deletions selfdrive/modeld/thneed/thneed.cc
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,6 @@

#include "selfdrive/common/clutil.h"
#include "selfdrive/common/timing.h"
//#define RUN_DISASSEMBLER
#define RUN_OPTIMIZER

Thneed *g_thneed = NULL;
int g_fd = -1;
Expand Down Expand Up @@ -203,11 +201,6 @@ void CachedCommand::exec() {
for (auto &it : kq) {
it->debug_print(false);
}
#ifdef RUN_DISASSEMBLER
// assuming 2 commands
disassemble(0);
disassemble(1);
#endif
}

assert(ret == 0);
Expand All @@ -220,7 +213,6 @@ Thneed::Thneed(bool do_clinit) {
assert(g_fd != -1);
fd = g_fd;
ram = make_unique<GPUMalloc>(0x80000, fd);
record = true;
timestamp = -1;
g_thneed = this;
char *thneed_debug_env = getenv("THNEED_DEBUG");
Expand All @@ -230,7 +222,7 @@ Thneed::Thneed(bool do_clinit) {
void Thneed::stop() {
find_inputs_outputs();
printf("Thneed::stop: recorded %lu commands\n", cmds.size());
record = 0;
record = false;
}

void Thneed::find_inputs_outputs() {
Expand Down Expand Up @@ -416,9 +408,7 @@ cl_int thneed_clFinish(cl_command_queue command_queue) {
Thneed *thneed = g_thneed;

if (thneed != NULL && thneed->record) {
#ifdef RUN_OPTIMIZER
thneed->optimize();
#endif
if (thneed->run_optimizer) thneed->optimize();
return thneed->clexec();
} else {
return clFinish(command_queue);
Expand Down
3 changes: 2 additions & 1 deletion selfdrive/modeld/thneed/thneed.h
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,7 @@ class Thneed {
void execute(float **finputs, float *foutput, bool slow=false);
void wait();
int optimize();
bool run_optimizer = false;

vector<cl_mem> input_clmem;
vector<void *> inputs;
Expand All @@ -106,7 +107,7 @@ class Thneed {
int context_id;

// protected?
bool record;
bool record = false;
int debug;
int timestamp;
unique_ptr<GPUMalloc> ram;
Expand Down

0 comments on commit a3ba5d0

Please sign in to comment.