remove the clCreateProgramWithSource interceptor (#1559)

* remove the clCreateProgramWithSource interceptor

* that's old code, thneed is better

* label them thneed_, we shouldn't need to touch CL for anything not SNPE related
pull/1560/head
George Hotz 2020-05-24 03:33:36 -07:00 committed by GitHub
parent 86057f785b
commit b8571710e0
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 12 additions and 168 deletions

View File

@ -1,3 +0,0 @@
#!/bin/sh
gcc -fPIC -I /data/openpilot/phonelibs/opencl/include -shared hook.c

View File

@ -1,155 +0,0 @@
#include <stdio.h>
#include <stdlib.h>
#include <dlfcn.h>
#include <CL/cl.h>
#include <stdint.h>
#include <time.h>
static inline uint64_t nanos_since_boot() {
struct timespec t;
clock_gettime(CLOCK_BOOTTIME, &t);
return t.tv_sec * 1000000000ULL + t.tv_nsec;
}
struct kernel {
cl_kernel k;
const char *name;
cl_program p;
};
int k_index = 0;
struct kernel kk[0x1000] = {0};
FILE *f = NULL;
cl_program clCreateProgramWithSource(cl_context context,
cl_uint count,
const char **strings,
const size_t *lengths,
cl_int *errcode_ret) {
printf("clCreateProgramWithSource: %d\n", count);
if (f == NULL) {
f = fopen("/tmp/kernels.cl", "w");
}
fprintf(f, "/* ************************ PROGRAM BREAK ****************************/\n");
for (int i = 0; i < count; i++) {
fprintf(f, "%s\n", strings[i]);
if (i != 0) fprintf(f, "/* ************************ SECTION BREAK ****************************/\n");
}
fflush(f);
cl_program (*my_clCreateProgramWithSource)(cl_context context,
cl_uint count,
const char **strings,
const size_t *lengths,
cl_int *errcode_ret) = dlsym(RTLD_NEXT, "REAL_clCreateProgramWithSource");
return my_clCreateProgramWithSource(context, count, strings, lengths, errcode_ret);
}
cl_program clCreateProgramWithBinary(cl_context context,
cl_uint num_devices,
const cl_device_id *device_list,
const size_t *lengths,
const unsigned char **binaries,
cl_int *binary_status,
cl_int *errcode_ret) {
printf("clCreateProgramWithBinary\n");
cl_program (*my_clCreateProgramWithBinary)(cl_context context,
cl_uint num_devices,
const cl_device_id *device_list,
const size_t *lengths,
const unsigned char **binaries,
cl_int *binary_status,
cl_int *errcode_ret) = dlsym(RTLD_NEXT, "REAL_clCreateProgramWithBinary");
return my_clCreateProgramWithBinary(context, num_devices, device_list, lengths, binaries, binary_status, errcode_ret);
}
cl_kernel clCreateKernel(cl_program program, const char *kernel_name, cl_int *errcode_ret) {
cl_kernel (*my_clCreateKernel)(cl_program program, const char *kernel_name, cl_int *errcode_ret);
my_clCreateKernel = dlsym(RTLD_NEXT, "REAL_clCreateKernel");
cl_kernel ret = my_clCreateKernel(program, kernel_name, errcode_ret);
//printf("clCreateKernel: %s -> %p\n", kernel_name, ret);
char *tmp = (char*)malloc(strlen(kernel_name)+1);
strcpy(tmp, kernel_name);
kk[k_index].k = ret;
kk[k_index].name = tmp;
kk[k_index].p = program;
k_index++;
return ret;
}
uint64_t start_time = 0;
int cnt = 0;
cl_int clEnqueueNDRangeKernel(cl_command_queue command_queue,
cl_kernel kernel,
cl_uint work_dim,
const size_t *global_work_offset,
const size_t *global_work_size,
const size_t *local_work_size,
cl_uint num_events_in_wait_list,
const cl_event *event_wait_list,
cl_event *event) {
cl_int (*my_clEnqueueNDRangeKernel)(cl_command_queue, cl_kernel, cl_uint,
const size_t *, const size_t *, const size_t *,
cl_uint, const cl_event *, cl_event *) = NULL;
my_clEnqueueNDRangeKernel = dlsym(RTLD_NEXT, "REAL_clEnqueueNDRangeKernel");
if (start_time == 0) {
start_time = nanos_since_boot();
}
// get kernel name
const char *name = NULL;
cl_program p;
for (int i = 0; i < k_index; i++) {
if (kk[i].k == kernel) {
name = kk[i].name;
p = kk[i].p;
break;
}
}
uint64_t tb = nanos_since_boot();
cl_int ret = my_clEnqueueNDRangeKernel(command_queue, kernel, work_dim,
global_work_offset, global_work_size, local_work_size,
num_events_in_wait_list, event_wait_list, event);
uint64_t te = nanos_since_boot();
printf("%10lu run%8d in %5ld us command_queue:%p work_dim:%d event:%p ", (tb-start_time)/1000, cnt++, (te-tb)/1000,
command_queue, work_dim, event);
for (int i = 0; i < work_dim; i++) {
printf("%4zu ", global_work_size[i]);
}
printf("%p %s\n", p, name);
return ret;
}
void *dlsym(void *handle, const char *symbol) {
void *(*my_dlsym)(void *handle, const char *symbol) = (void*)dlopen-0x2d4;
if (memcmp("REAL_", symbol, 5) == 0) {
return my_dlsym(handle, symbol+5);
} else if (strcmp("clEnqueueNDRangeKernel", symbol) == 0) {
return clEnqueueNDRangeKernel;
} else if (strcmp("clCreateKernel", symbol) == 0) {
return clCreateKernel;
} else if (strcmp("clCreateProgramWithSource", symbol) == 0) {
return clCreateProgramWithSource;
} else if (strcmp("clCreateProgramWithBinary", symbol) == 0) {
return clCreateProgramWithBinary;
} else {
printf("dlsym %s\n", symbol);
return my_dlsym(handle, symbol);
}
}

View File

@ -269,8 +269,10 @@ void Thneed::execute(float **finputs, float *foutput, bool slow) {
}
}
// TODO: with a different way of getting the input and output buffers, we don't have to intercept CL at all
cl_int (*my_clSetKernelArg)(cl_kernel kernel, cl_uint arg_index, size_t arg_size, const void *arg_value) = NULL;
cl_int clSetKernelArg(cl_kernel kernel, cl_uint arg_index, size_t arg_size, const void *arg_value) {
cl_int thneed_clSetKernelArg(cl_kernel kernel, cl_uint arg_index, size_t arg_size, const void *arg_value) {
if (my_clSetKernelArg == NULL) my_clSetKernelArg = reinterpret_cast<decltype(my_clSetKernelArg)>(dlsym(RTLD_NEXT, "REAL_clSetKernelArg"));
if (arg_value != NULL) {
g_args[std::make_pair(kernel, arg_index)] = std::string((char*)arg_value, arg_size);
@ -280,7 +282,7 @@ cl_int clSetKernelArg(cl_kernel kernel, cl_uint arg_index, size_t arg_size, cons
}
cl_int (*my_clEnqueueNDRangeKernel)(cl_command_queue, cl_kernel, cl_uint, const size_t *, const size_t *, const size_t *, cl_uint, const cl_event *, cl_event *) = NULL;
cl_int clEnqueueNDRangeKernel(cl_command_queue command_queue,
cl_int thneed_clEnqueueNDRangeKernel(cl_command_queue command_queue,
cl_kernel kernel,
cl_uint work_dim,
const size_t *global_work_offset,
@ -403,17 +405,15 @@ cl_int clEnqueueNDRangeKernel(cl_command_queue command_queue,
//#define SAVE_KERNELS
#ifdef SAVE_KERNELS
std::map<cl_program, std::string> program_source;
#endif
std::map<cl_program, std::string> program_source;
cl_program (*my_clCreateProgramWithSource)(cl_context context, cl_uint count, const char **strings, const size_t *lengths, cl_int *errcode_ret) = NULL;
cl_program clCreateProgramWithSource(cl_context context, cl_uint count, const char **strings, const size_t *lengths, cl_int *errcode_ret) {
cl_program thneed_clCreateProgramWithSource(cl_context context, cl_uint count, const char **strings, const size_t *lengths, cl_int *errcode_ret) {
if (my_clCreateProgramWithSource == NULL) my_clCreateProgramWithSource = reinterpret_cast<decltype(my_clCreateProgramWithSource)>(dlsym(RTLD_NEXT, "REAL_clCreateProgramWithSource"));
assert(count == 1);
size_t my_lengths[1];
my_lengths[0] = lengths[0];
#ifdef SAVE_KERNELS
char fn[0x100];
snprintf(fn, sizeof(fn), "/tmp/program_%zu.cl", strlen(strings[0]));
FILE *f = fopen(fn, "wb");
@ -433,22 +433,24 @@ cl_program clCreateProgramWithSource(cl_context context, cl_uint count, const ch
}
program_source[ret] = strings[0];
#endif
cl_program ret = my_clCreateProgramWithSource(context, count, strings, my_lengths, errcode_ret);
return ret;
}
#endif
void *dlsym(void *handle, const char *symbol) {
void *(*my_dlsym)(void *handle, const char *symbol) = (void *(*)(void *handle, const char *symbol))((uintptr_t)dlopen-0x2d4);
if (memcmp("REAL_", symbol, 5) == 0) {
return my_dlsym(handle, symbol+5);
} else if (strcmp("clEnqueueNDRangeKernel", symbol) == 0) {
return (void*)clEnqueueNDRangeKernel;
return (void*)thneed_clEnqueueNDRangeKernel;
} else if (strcmp("clSetKernelArg", symbol) == 0) {
return (void*)clSetKernelArg;
return (void*)thneed_clSetKernelArg;
#ifdef SAVE_KERNELS
} else if (strcmp("clCreateProgramWithSource", symbol) == 0) {
return (void*)clCreateProgramWithSource;
return (void*)thneed_clCreateProgramWithSource;
#endif
} else {
return my_dlsym(handle, symbol);
}