c++ify thneed to remove memory leaks (#1737)
Co-authored-by: Comma Device <device@comma.ai>albatross
parent
78b428e534
commit
01a486308d
|
@ -8,7 +8,7 @@
|
|||
|
||||
Thneed *g_thneed = NULL;
|
||||
int g_fd = -1;
|
||||
std::map<std::pair<cl_kernel, int>, std::string> g_args;
|
||||
map<pair<cl_kernel, int>, string> g_args;
|
||||
|
||||
static inline uint64_t nanos_since_boot() {
|
||||
struct timespec t;
|
||||
|
@ -43,9 +43,7 @@ int ioctl(int filedes, unsigned long request, void *argp) {
|
|||
if (thneed->record & 1) {
|
||||
thneed->timestamp = cmd->timestamp;
|
||||
thneed->context_id = cmd->context_id;
|
||||
CachedCommand *ccmd = new CachedCommand(thneed, cmd);
|
||||
//ccmd->disassemble();
|
||||
thneed->cmds.push_back(ccmd);
|
||||
thneed->cmds.push_back(unique_ptr<CachedCommand>(new CachedCommand(thneed, cmd)));
|
||||
}
|
||||
if (thneed->record & 2) {
|
||||
printf("IOCTL_KGSL_GPU_COMMAND(%2zu): flags: 0x%lx context_id: %u timestamp: %u\n",
|
||||
|
@ -66,9 +64,7 @@ int ioctl(int filedes, unsigned long request, void *argp) {
|
|||
}
|
||||
|
||||
if (thneed->record & 1) {
|
||||
struct kgsl_gpuobj_sync_obj *new_objs = (struct kgsl_gpuobj_sync_obj *)malloc(sizeof(struct kgsl_gpuobj_sync_obj)*cmd->count);
|
||||
memcpy(new_objs, objs, sizeof(struct kgsl_gpuobj_sync_obj)*cmd->count);
|
||||
thneed->syncobjs.push_back(std::make_pair(cmd->count, new_objs));
|
||||
thneed->syncobjs.push_back(string((char *)objs, sizeof(struct kgsl_gpuobj_sync_obj)*cmd->count));
|
||||
}
|
||||
} else if (request == IOCTL_KGSL_DEVICE_WAITTIMESTAMP_CTXTID) {
|
||||
struct kgsl_device_waittimestamp_ctxtid *cmd = (struct kgsl_device_waittimestamp_ctxtid *)argp;
|
||||
|
@ -111,6 +107,10 @@ GPUMalloc::GPUMalloc(int size, int fd) {
|
|||
remaining = size;
|
||||
}
|
||||
|
||||
GPUMalloc::~GPUMalloc() {
|
||||
// TODO: free the GPU malloced area
|
||||
}
|
||||
|
||||
void *GPUMalloc::alloc(int size) {
|
||||
if (size > remaining) return NULL;
|
||||
remaining -= size;
|
||||
|
@ -170,7 +170,7 @@ void CachedCommand::exec(bool wait) {
|
|||
Thneed::Thneed() {
|
||||
assert(g_fd != -1);
|
||||
fd = g_fd;
|
||||
ram = new GPUMalloc(0x40000, fd);
|
||||
ram = make_unique<GPUMalloc>(0x40000, fd);
|
||||
record = 1;
|
||||
timestamp = -1;
|
||||
g_thneed = this;
|
||||
|
@ -235,9 +235,9 @@ void Thneed::execute(float **finputs, float *foutput, bool slow) {
|
|||
for (auto it = syncobjs.begin(); it != syncobjs.end(); ++it) {
|
||||
struct kgsl_gpuobj_sync cmd;
|
||||
|
||||
cmd.objs = (uint64_t)it->second;
|
||||
cmd.obj_len = it->first * sizeof(struct kgsl_gpuobj_sync_obj);
|
||||
cmd.count = it->first;
|
||||
cmd.objs = (uint64_t)it->data();
|
||||
cmd.obj_len = it->length();
|
||||
cmd.count = it->length() / sizeof(struct kgsl_gpuobj_sync_obj);
|
||||
|
||||
ret = ioctl(fd, IOCTL_KGSL_GPUOBJ_SYNC, &cmd);
|
||||
assert(ret == 0);
|
||||
|
@ -275,7 +275,7 @@ cl_int (*my_clSetKernelArg)(cl_kernel kernel, cl_uint arg_index, size_t arg_size
|
|||
cl_int thneed_clSetKernelArg(cl_kernel kernel, cl_uint arg_index, size_t arg_size, const void *arg_value) {
|
||||
if (my_clSetKernelArg == NULL) my_clSetKernelArg = reinterpret_cast<decltype(my_clSetKernelArg)>(dlsym(RTLD_NEXT, "REAL_clSetKernelArg"));
|
||||
if (arg_value != NULL) {
|
||||
g_args[std::make_pair(kernel, arg_index)] = std::string((char*)arg_value, arg_size);
|
||||
g_args[make_pair(kernel, arg_index)] = string((char*)arg_value, arg_size);
|
||||
}
|
||||
cl_int ret = my_clSetKernelArg(kernel, arg_index, arg_size, arg_value);
|
||||
return ret;
|
||||
|
@ -310,7 +310,7 @@ cl_int thneed_clEnqueueNDRangeKernel(cl_command_queue command_queue,
|
|||
for (int i = 0; i < num_args; i++) {
|
||||
char arg_name[0x100];
|
||||
clGetKernelArgInfo(kernel, i, CL_KERNEL_ARG_NAME, sizeof(arg_name), arg_name, NULL);
|
||||
std::string arg = g_args[std::make_pair(kernel, i)];
|
||||
string arg = g_args[make_pair(kernel, i)];
|
||||
|
||||
if (strcmp(arg_name, "input") == 0 && strcmp(name, "zero_pad_image_float") == 0) {
|
||||
cl_mem mem;
|
||||
|
@ -343,7 +343,7 @@ cl_int thneed_clEnqueueNDRangeKernel(cl_command_queue command_queue,
|
|||
char arg_name[0x100];
|
||||
clGetKernelArgInfo(kernel, i, CL_KERNEL_ARG_TYPE_NAME, sizeof(arg_type), arg_type, NULL);
|
||||
clGetKernelArgInfo(kernel, i, CL_KERNEL_ARG_NAME, sizeof(arg_name), arg_name, NULL);
|
||||
std::string arg = g_args[std::make_pair(kernel, i)];
|
||||
string arg = g_args[make_pair(kernel, i)];
|
||||
printf(" %s %s", arg_type, arg_name);
|
||||
void *arg_value = (void*)arg.data();
|
||||
int arg_size = arg.size();
|
||||
|
@ -405,7 +405,7 @@ cl_int thneed_clEnqueueNDRangeKernel(cl_command_queue command_queue,
|
|||
//#define SAVE_KERNELS
|
||||
|
||||
#ifdef SAVE_KERNELS
|
||||
std::map<cl_program, std::string> program_source;
|
||||
map<cl_program, string> program_source;
|
||||
|
||||
cl_program (*my_clCreateProgramWithSource)(cl_context context, cl_uint count, const char **strings, const size_t *lengths, cl_int *errcode_ret) = NULL;
|
||||
cl_program thneed_clCreateProgramWithSource(cl_context context, cl_uint count, const char **strings, const size_t *lengths, cl_int *errcode_ret) {
|
||||
|
|
|
@ -5,11 +5,14 @@
|
|||
#include <vector>
|
||||
#include <CL/cl.h>
|
||||
|
||||
using namespace std;
|
||||
|
||||
class Thneed;
|
||||
|
||||
class GPUMalloc {
|
||||
public:
|
||||
GPUMalloc(int size, int fd);
|
||||
~GPUMalloc();
|
||||
void *alloc(int size);
|
||||
private:
|
||||
uint64_t base;
|
||||
|
@ -34,7 +37,7 @@ class Thneed {
|
|||
void stop();
|
||||
void execute(float **finputs, float *foutput, bool slow=false);
|
||||
|
||||
std::vector<cl_mem> inputs;
|
||||
vector<cl_mem> inputs;
|
||||
cl_mem output;
|
||||
|
||||
cl_command_queue command_queue;
|
||||
|
@ -43,9 +46,9 @@ class Thneed {
|
|||
// protected?
|
||||
int record;
|
||||
int timestamp;
|
||||
GPUMalloc *ram;
|
||||
std::vector<CachedCommand *> cmds;
|
||||
std::vector<std::pair<int, struct kgsl_gpuobj_sync_obj *> > syncobjs;
|
||||
unique_ptr<GPUMalloc> ram;
|
||||
vector<unique_ptr<CachedCommand> > cmds;
|
||||
vector<string> syncobjs;
|
||||
int fd;
|
||||
};
|
||||
|
||||
|
|
Loading…
Reference in New Issue