minor cleanups, fix non binary compile (#23882)
Co-authored-by: Comma Device <device@comma.ai>pull/23890/head
parent
8eab496ce3
commit
825c924c1c
|
@ -35,6 +35,14 @@ int main(int argc, char* argv[]) {
|
||||||
// save model
|
// save model
|
||||||
bool save_binaries = (argc > 3) && (strcmp(argv[3], "--binary") == 0);
|
bool save_binaries = (argc > 3) && (strcmp(argv[3], "--binary") == 0);
|
||||||
mdl.thneed->save(argv[2], save_binaries);
|
mdl.thneed->save(argv[2], save_binaries);
|
||||||
|
|
||||||
|
// test model
|
||||||
|
auto thneed = new Thneed(true);
|
||||||
|
thneed->record &= ~THNEED_RECORD;
|
||||||
|
thneed->load(argv[2]);
|
||||||
|
thneed->clexec();
|
||||||
|
thneed->find_inputs_outputs();
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -1,4 +1,3 @@
|
||||||
#define SUPPORT_DILATION
|
#define SUPPORT_DILATION
|
||||||
|
|
||||||
__kernel void convolution_horizontal_reduced_reads(
|
__kernel void convolution_horizontal_reduced_reads(
|
||||||
#include "convolution_.cl"
|
|
||||||
|
|
|
@ -2,4 +2,3 @@
|
||||||
#define SUPPORT_ACCUMULATION
|
#define SUPPORT_ACCUMULATION
|
||||||
|
|
||||||
__kernel void convolution_horizontal_reduced_reads_1x1(
|
__kernel void convolution_horizontal_reduced_reads_1x1(
|
||||||
#include "convolution_.cl"
|
|
||||||
|
|
|
@ -1,4 +1,3 @@
|
||||||
#define NUM_OUTPUTS 5
|
#define NUM_OUTPUTS 5
|
||||||
|
|
||||||
__kernel void convolution_horizontal_reduced_reads_5_outputs(
|
__kernel void convolution_horizontal_reduced_reads_5_outputs(
|
||||||
#include "convolution_.cl"
|
|
||||||
|
|
|
@ -2,4 +2,3 @@
|
||||||
#define SUPPORT_DILATION
|
#define SUPPORT_DILATION
|
||||||
|
|
||||||
__kernel void convolution_horizontal_reduced_reads_depthwise(
|
__kernel void convolution_horizontal_reduced_reads_depthwise(
|
||||||
#include "convolution_.cl"
|
|
||||||
|
|
|
@ -1,4 +1,3 @@
|
||||||
#define DEPTHWISE
|
#define DEPTHWISE
|
||||||
|
|
||||||
__kernel void convolution_horizontal_reduced_reads_depthwise_stride_1(
|
__kernel void convolution_horizontal_reduced_reads_depthwise_stride_1(
|
||||||
#include "convolution_.cl"
|
|
||||||
|
|
|
@ -4,6 +4,9 @@
|
||||||
#include <assert.h>
|
#include <assert.h>
|
||||||
#include "thneed.h"
|
#include "thneed.h"
|
||||||
|
|
||||||
|
#include "selfdrive/common/util.h"
|
||||||
|
#include "selfdrive/common/clutil.h"
|
||||||
|
|
||||||
extern map<cl_program, string> g_program_source;
|
extern map<cl_program, string> g_program_source;
|
||||||
|
|
||||||
static int is_same_size_image(cl_mem a, cl_mem b) {
|
static int is_same_size_image(cl_mem a, cl_mem b) {
|
||||||
|
@ -63,6 +66,14 @@ static cl_mem make_image_like(cl_context context, cl_mem val) {
|
||||||
int Thneed::optimize() {
|
int Thneed::optimize() {
|
||||||
const char *kernel_path = getenv("KERNEL_PATH");
|
const char *kernel_path = getenv("KERNEL_PATH");
|
||||||
if (!kernel_path) { kernel_path = "/data/openpilot/selfdrive/modeld/thneed/kernels"; printf("no KERNEL_PATH set, defaulting to %s\n", kernel_path); }
|
if (!kernel_path) { kernel_path = "/data/openpilot/selfdrive/modeld/thneed/kernels"; printf("no KERNEL_PATH set, defaulting to %s\n", kernel_path); }
|
||||||
|
|
||||||
|
string convolution_;
|
||||||
|
{
|
||||||
|
char fn[0x100];
|
||||||
|
snprintf(fn, sizeof(fn), "%s/%s.cl", kernel_path, "convolution_");
|
||||||
|
convolution_ = util::read_file(fn);
|
||||||
|
}
|
||||||
|
|
||||||
// load custom kernels
|
// load custom kernels
|
||||||
map<string, cl_program> g_programs;
|
map<string, cl_program> g_programs;
|
||||||
for (auto &k : kq) {
|
for (auto &k : kq) {
|
||||||
|
@ -70,33 +81,17 @@ int Thneed::optimize() {
|
||||||
if (g_programs.find(k->name) == g_programs.end()) {
|
if (g_programs.find(k->name) == g_programs.end()) {
|
||||||
char fn[0x100];
|
char fn[0x100];
|
||||||
snprintf(fn, sizeof(fn), "%s/%s.cl", kernel_path, k->name.c_str());
|
snprintf(fn, sizeof(fn), "%s/%s.cl", kernel_path, k->name.c_str());
|
||||||
FILE *g = fopen(fn, "rb");
|
if (util::file_exists(fn)) {
|
||||||
if (g != NULL) {
|
string kernel_src = util::read_file(fn);
|
||||||
char *src[0x10000];
|
if (k->name.rfind("convolution_", 0) == 0) {
|
||||||
const char *srcs[1]; srcs[0] = (const char *)src;
|
kernel_src += convolution_;
|
||||||
memset(src, 0, sizeof(src));
|
|
||||||
size_t length = fread(src, 1, sizeof(src), g);
|
|
||||||
fclose(g);
|
|
||||||
|
|
||||||
printf("building kernel %s\n", k->name.c_str());
|
|
||||||
k->program = clCreateProgramWithSource(context, 1, srcs, &length, NULL);
|
|
||||||
char options[0x100];
|
|
||||||
snprintf(options, sizeof(options)-1, "-I %s", kernel_path);
|
|
||||||
int err = clBuildProgram(k->program, 1, &device_id, options, NULL, NULL);
|
|
||||||
|
|
||||||
if (err != 0) {
|
|
||||||
printf("got err %d\n", err);
|
|
||||||
size_t err_length;
|
|
||||||
char buffer[2048];
|
|
||||||
clGetProgramBuildInfo(k->program, device_id, CL_PROGRAM_BUILD_LOG, sizeof(buffer), buffer, &err_length);
|
|
||||||
buffer[err_length] = '\0';
|
|
||||||
printf("%s\n", buffer);
|
|
||||||
}
|
}
|
||||||
assert(err == 0);
|
printf("building kernel %s with len %lu\n", k->name.c_str(), kernel_src.length());
|
||||||
|
k->program = cl_program_from_source(context, device_id, kernel_src);
|
||||||
|
|
||||||
// save in cache
|
// save in cache
|
||||||
g_programs[k->name] = k->program;
|
g_programs[k->name] = k->program;
|
||||||
g_program_source[k->program] = string((char *)src, length);
|
g_program_source[k->program] = kernel_src;
|
||||||
} else {
|
} else {
|
||||||
g_programs[k->name] = NULL;
|
g_programs[k->name] = NULL;
|
||||||
}
|
}
|
||||||
|
|
|
@ -70,7 +70,7 @@ int ioctl(int filedes, unsigned long request, void *argp) {
|
||||||
struct kgsl_gpuobj_sync *cmd = (struct kgsl_gpuobj_sync *)argp;
|
struct kgsl_gpuobj_sync *cmd = (struct kgsl_gpuobj_sync *)argp;
|
||||||
struct kgsl_gpuobj_sync_obj *objs = (struct kgsl_gpuobj_sync_obj *)(cmd->objs);
|
struct kgsl_gpuobj_sync_obj *objs = (struct kgsl_gpuobj_sync_obj *)(cmd->objs);
|
||||||
|
|
||||||
if (thneed->record & THNEED_DEBUG) {
|
if (thneed->record & THNEED_VERBOSE_DEBUG) {
|
||||||
printf("IOCTL_KGSL_GPUOBJ_SYNC count:%d ", cmd->count);
|
printf("IOCTL_KGSL_GPUOBJ_SYNC count:%d ", cmd->count);
|
||||||
for (int i = 0; i < cmd->count; i++) {
|
for (int i = 0; i < cmd->count; i++) {
|
||||||
printf(" -- offset:0x%lx len:0x%lx id:%d op:%d ", objs[i].offset, objs[i].length, objs[i].id, objs[i].op);
|
printf(" -- offset:0x%lx len:0x%lx id:%d op:%d ", objs[i].offset, objs[i].length, objs[i].id, objs[i].op);
|
||||||
|
|
Loading…
Reference in New Issue