c++ify thneed to remove memory leaks (#1737)

Co-authored-by: Comma Device <device@comma.ai>
albatross
George Hotz 2020-06-19 15:53:48 -07:00 committed by GitHub
parent 78b428e534
commit 01a486308d
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 22 additions and 19 deletions

View File

@ -8,7 +8,7 @@
Thneed *g_thneed = NULL;
int g_fd = -1;
std::map<std::pair<cl_kernel, int>, std::string> g_args;
map<pair<cl_kernel, int>, string> g_args;
static inline uint64_t nanos_since_boot() {
struct timespec t;
@ -43,9 +43,7 @@ int ioctl(int filedes, unsigned long request, void *argp) {
if (thneed->record & 1) {
thneed->timestamp = cmd->timestamp;
thneed->context_id = cmd->context_id;
CachedCommand *ccmd = new CachedCommand(thneed, cmd);
//ccmd->disassemble();
thneed->cmds.push_back(ccmd);
thneed->cmds.push_back(unique_ptr<CachedCommand>(new CachedCommand(thneed, cmd)));
}
if (thneed->record & 2) {
printf("IOCTL_KGSL_GPU_COMMAND(%2zu): flags: 0x%lx context_id: %u timestamp: %u\n",
@ -66,9 +64,7 @@ int ioctl(int filedes, unsigned long request, void *argp) {
}
if (thneed->record & 1) {
struct kgsl_gpuobj_sync_obj *new_objs = (struct kgsl_gpuobj_sync_obj *)malloc(sizeof(struct kgsl_gpuobj_sync_obj)*cmd->count);
memcpy(new_objs, objs, sizeof(struct kgsl_gpuobj_sync_obj)*cmd->count);
thneed->syncobjs.push_back(std::make_pair(cmd->count, new_objs));
thneed->syncobjs.push_back(string((char *)objs, sizeof(struct kgsl_gpuobj_sync_obj)*cmd->count));
}
} else if (request == IOCTL_KGSL_DEVICE_WAITTIMESTAMP_CTXTID) {
struct kgsl_device_waittimestamp_ctxtid *cmd = (struct kgsl_device_waittimestamp_ctxtid *)argp;
@ -111,6 +107,10 @@ GPUMalloc::GPUMalloc(int size, int fd) {
remaining = size;
}
GPUMalloc::~GPUMalloc() {
// TODO: free the GPU malloced area
}
void *GPUMalloc::alloc(int size) {
if (size > remaining) return NULL;
remaining -= size;
@ -170,7 +170,7 @@ void CachedCommand::exec(bool wait) {
Thneed::Thneed() {
assert(g_fd != -1);
fd = g_fd;
ram = new GPUMalloc(0x40000, fd);
ram = make_unique<GPUMalloc>(0x40000, fd);
record = 1;
timestamp = -1;
g_thneed = this;
@ -235,9 +235,9 @@ void Thneed::execute(float **finputs, float *foutput, bool slow) {
for (auto it = syncobjs.begin(); it != syncobjs.end(); ++it) {
struct kgsl_gpuobj_sync cmd;
cmd.objs = (uint64_t)it->second;
cmd.obj_len = it->first * sizeof(struct kgsl_gpuobj_sync_obj);
cmd.count = it->first;
cmd.objs = (uint64_t)it->data();
cmd.obj_len = it->length();
cmd.count = it->length() / sizeof(struct kgsl_gpuobj_sync_obj);
ret = ioctl(fd, IOCTL_KGSL_GPUOBJ_SYNC, &cmd);
assert(ret == 0);
@ -275,7 +275,7 @@ cl_int (*my_clSetKernelArg)(cl_kernel kernel, cl_uint arg_index, size_t arg_size
cl_int thneed_clSetKernelArg(cl_kernel kernel, cl_uint arg_index, size_t arg_size, const void *arg_value) {
if (my_clSetKernelArg == NULL) my_clSetKernelArg = reinterpret_cast<decltype(my_clSetKernelArg)>(dlsym(RTLD_NEXT, "REAL_clSetKernelArg"));
if (arg_value != NULL) {
g_args[std::make_pair(kernel, arg_index)] = std::string((char*)arg_value, arg_size);
g_args[make_pair(kernel, arg_index)] = string((char*)arg_value, arg_size);
}
cl_int ret = my_clSetKernelArg(kernel, arg_index, arg_size, arg_value);
return ret;
@ -310,7 +310,7 @@ cl_int thneed_clEnqueueNDRangeKernel(cl_command_queue command_queue,
for (int i = 0; i < num_args; i++) {
char arg_name[0x100];
clGetKernelArgInfo(kernel, i, CL_KERNEL_ARG_NAME, sizeof(arg_name), arg_name, NULL);
std::string arg = g_args[std::make_pair(kernel, i)];
string arg = g_args[make_pair(kernel, i)];
if (strcmp(arg_name, "input") == 0 && strcmp(name, "zero_pad_image_float") == 0) {
cl_mem mem;
@ -343,7 +343,7 @@ cl_int thneed_clEnqueueNDRangeKernel(cl_command_queue command_queue,
char arg_name[0x100];
clGetKernelArgInfo(kernel, i, CL_KERNEL_ARG_TYPE_NAME, sizeof(arg_type), arg_type, NULL);
clGetKernelArgInfo(kernel, i, CL_KERNEL_ARG_NAME, sizeof(arg_name), arg_name, NULL);
std::string arg = g_args[std::make_pair(kernel, i)];
string arg = g_args[make_pair(kernel, i)];
printf(" %s %s", arg_type, arg_name);
void *arg_value = (void*)arg.data();
int arg_size = arg.size();
@ -405,7 +405,7 @@ cl_int thneed_clEnqueueNDRangeKernel(cl_command_queue command_queue,
//#define SAVE_KERNELS
#ifdef SAVE_KERNELS
std::map<cl_program, std::string> program_source;
map<cl_program, string> program_source;
cl_program (*my_clCreateProgramWithSource)(cl_context context, cl_uint count, const char **strings, const size_t *lengths, cl_int *errcode_ret) = NULL;
cl_program thneed_clCreateProgramWithSource(cl_context context, cl_uint count, const char **strings, const size_t *lengths, cl_int *errcode_ret) {

View File

@ -5,11 +5,14 @@
#include <vector>
#include <CL/cl.h>
using namespace std;
class Thneed;
class GPUMalloc {
public:
GPUMalloc(int size, int fd);
~GPUMalloc();
void *alloc(int size);
private:
uint64_t base;
@ -34,7 +37,7 @@ class Thneed {
void stop();
void execute(float **finputs, float *foutput, bool slow=false);
std::vector<cl_mem> inputs;
vector<cl_mem> inputs;
cl_mem output;
cl_command_queue command_queue;
@ -43,9 +46,9 @@ class Thneed {
// protected?
int record;
int timestamp;
GPUMalloc *ram;
std::vector<CachedCommand *> cmds;
std::vector<std::pair<int, struct kgsl_gpuobj_sync_obj *> > syncobjs;
unique_ptr<GPUMalloc> ram;
vector<unique_ptr<CachedCommand> > cmds;
vector<string> syncobjs;
int fd;
};