diff --git a/selfdrive/modeld/thneed/compile.cc b/selfdrive/modeld/thneed/compile.cc index a432ce4d0fbc248..c2a357fcf1d1b7e 100644 --- a/selfdrive/modeld/thneed/compile.cc +++ b/selfdrive/modeld/thneed/compile.cc @@ -35,6 +35,14 @@ int main(int argc, char* argv[]) { // save model bool save_binaries = (argc > 3) && (strcmp(argv[3], "--binary") == 0); mdl.thneed->save(argv[2], save_binaries); + + // test model + auto thneed = new Thneed(true); + thneed->record &= ~THNEED_RECORD; + thneed->load(argv[2]); + thneed->clexec(); + thneed->find_inputs_outputs(); + return 0; } diff --git a/selfdrive/modeld/thneed/kernels/convolution_horizontal_reduced_reads.cl b/selfdrive/modeld/thneed/kernels/convolution_horizontal_reduced_reads.cl index bc8add79aa36a66..fcea88ce90e97d5 100644 --- a/selfdrive/modeld/thneed/kernels/convolution_horizontal_reduced_reads.cl +++ b/selfdrive/modeld/thneed/kernels/convolution_horizontal_reduced_reads.cl @@ -1,4 +1,3 @@ #define SUPPORT_DILATION __kernel void convolution_horizontal_reduced_reads( -#include "convolution_.cl" diff --git a/selfdrive/modeld/thneed/kernels/convolution_horizontal_reduced_reads_1x1.cl b/selfdrive/modeld/thneed/kernels/convolution_horizontal_reduced_reads_1x1.cl index 75a090ca22829fa..0d15d8058179ff0 100644 --- a/selfdrive/modeld/thneed/kernels/convolution_horizontal_reduced_reads_1x1.cl +++ b/selfdrive/modeld/thneed/kernels/convolution_horizontal_reduced_reads_1x1.cl @@ -2,4 +2,3 @@ #define SUPPORT_ACCUMULATION __kernel void convolution_horizontal_reduced_reads_1x1( -#include "convolution_.cl" diff --git a/selfdrive/modeld/thneed/kernels/convolution_horizontal_reduced_reads_5_outputs.cl b/selfdrive/modeld/thneed/kernels/convolution_horizontal_reduced_reads_5_outputs.cl index 980e7d1f672109a..69421fc2a938c92 100644 --- a/selfdrive/modeld/thneed/kernels/convolution_horizontal_reduced_reads_5_outputs.cl +++ b/selfdrive/modeld/thneed/kernels/convolution_horizontal_reduced_reads_5_outputs.cl @@ -1,4 +1,3 @@ #define NUM_OUTPUTS 5 __kernel void convolution_horizontal_reduced_reads_5_outputs( -#include "convolution_.cl" diff --git a/selfdrive/modeld/thneed/kernels/convolution_horizontal_reduced_reads_depthwise.cl b/selfdrive/modeld/thneed/kernels/convolution_horizontal_reduced_reads_depthwise.cl index 80be0da9242dcd9..50e39941d404227 100644 --- a/selfdrive/modeld/thneed/kernels/convolution_horizontal_reduced_reads_depthwise.cl +++ b/selfdrive/modeld/thneed/kernels/convolution_horizontal_reduced_reads_depthwise.cl @@ -2,4 +2,3 @@ #define SUPPORT_DILATION __kernel void convolution_horizontal_reduced_reads_depthwise( -#include "convolution_.cl" diff --git a/selfdrive/modeld/thneed/kernels/convolution_horizontal_reduced_reads_depthwise_stride_1.cl b/selfdrive/modeld/thneed/kernels/convolution_horizontal_reduced_reads_depthwise_stride_1.cl index 3d651c229bd49be..b347cb6c7186b6c 100644 --- a/selfdrive/modeld/thneed/kernels/convolution_horizontal_reduced_reads_depthwise_stride_1.cl +++ b/selfdrive/modeld/thneed/kernels/convolution_horizontal_reduced_reads_depthwise_stride_1.cl @@ -1,4 +1,3 @@ #define DEPTHWISE __kernel void convolution_horizontal_reduced_reads_depthwise_stride_1( -#include "convolution_.cl" diff --git a/selfdrive/modeld/thneed/optimizer.cc b/selfdrive/modeld/thneed/optimizer.cc index 3c7c41873d84d14..b516b5fa504b8cc 100644 --- a/selfdrive/modeld/thneed/optimizer.cc +++ b/selfdrive/modeld/thneed/optimizer.cc @@ -4,6 +4,9 @@ #include #include "thneed.h" +#include "selfdrive/common/util.h" +#include "selfdrive/common/clutil.h" + extern map g_program_source; static int is_same_size_image(cl_mem a, cl_mem b) { @@ -63,6 +66,14 @@ static cl_mem make_image_like(cl_context context, cl_mem val) { int Thneed::optimize() { const char *kernel_path = getenv("KERNEL_PATH"); if (!kernel_path) { kernel_path = "/data/openpilot/selfdrive/modeld/thneed/kernels"; printf("no KERNEL_PATH set, defaulting to %s\n", kernel_path); } + + string convolution_; + { + char fn[0x100]; + snprintf(fn, sizeof(fn), "%s/%s.cl", kernel_path, "convolution_"); + convolution_ = util::read_file(fn); + } + // load custom kernels map g_programs; for (auto &k : kq) { @@ -70,33 +81,17 @@ int Thneed::optimize() { if (g_programs.find(k->name) == g_programs.end()) { char fn[0x100]; snprintf(fn, sizeof(fn), "%s/%s.cl", kernel_path, k->name.c_str()); - FILE *g = fopen(fn, "rb"); - if (g != NULL) { - char *src[0x10000]; - const char *srcs[1]; srcs[0] = (const char *)src; - memset(src, 0, sizeof(src)); - size_t length = fread(src, 1, sizeof(src), g); - fclose(g); - - printf("building kernel %s\n", k->name.c_str()); - k->program = clCreateProgramWithSource(context, 1, srcs, &length, NULL); - char options[0x100]; - snprintf(options, sizeof(options)-1, "-I %s", kernel_path); - int err = clBuildProgram(k->program, 1, &device_id, options, NULL, NULL); - - if (err != 0) { - printf("got err %d\n", err); - size_t err_length; - char buffer[2048]; - clGetProgramBuildInfo(k->program, device_id, CL_PROGRAM_BUILD_LOG, sizeof(buffer), buffer, &err_length); - buffer[err_length] = '\0'; - printf("%s\n", buffer); + if (util::file_exists(fn)) { + string kernel_src = util::read_file(fn); + if (k->name.rfind("convolution_", 0) == 0) { + kernel_src += convolution_; } - assert(err == 0); + printf("building kernel %s with len %lu\n", k->name.c_str(), kernel_src.length()); + k->program = cl_program_from_source(context, device_id, kernel_src); // save in cache g_programs[k->name] = k->program; - g_program_source[k->program] = string((char *)src, length); + g_program_source[k->program] = kernel_src; } else { g_programs[k->name] = NULL; } diff --git a/selfdrive/modeld/thneed/thneed.cc b/selfdrive/modeld/thneed/thneed.cc index 0f36d6834a94809..aa1caa5cd92dc39 100644 --- a/selfdrive/modeld/thneed/thneed.cc +++ b/selfdrive/modeld/thneed/thneed.cc @@ -70,7 +70,7 @@ int ioctl(int filedes, unsigned long request, void *argp) { struct kgsl_gpuobj_sync *cmd = (struct kgsl_gpuobj_sync *)argp; struct kgsl_gpuobj_sync_obj *objs = (struct kgsl_gpuobj_sync_obj *)(cmd->objs); - if (thneed->record & THNEED_DEBUG) { + if (thneed->record & THNEED_VERBOSE_DEBUG) { printf("IOCTL_KGSL_GPUOBJ_SYNC count:%d ", cmd->count); for (int i = 0; i < cmd->count; i++) { printf(" -- offset:0x%lx len:0x%lx id:%d op:%d ", objs[i].offset, objs[i].length, objs[i].id, objs[i].op);