More governance work, fix thneed (#2610)
* more governance work * fix thneed on qcom2 * waste doesn't OOM Co-authored-by: Comma Device <device@comma.ai>pull/2639/head
parent
327aa123f4
commit
a14ce09018
|
@ -8,6 +8,11 @@ source "$BASEDIR/launch_env.sh"
|
||||||
|
|
||||||
DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null && pwd )"
|
DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null && pwd )"
|
||||||
|
|
||||||
|
function tici_init {
|
||||||
|
sudo su -c 'echo "performance" > /sys/class/devfreq/soc:qcom,memlat-cpu0/governor'
|
||||||
|
sudo su -c 'echo "performance" > /sys/class/devfreq/soc:qcom,memlat-cpu4/governor'
|
||||||
|
}
|
||||||
|
|
||||||
function two_init {
|
function two_init {
|
||||||
# Restrict Android and other system processes to the first two cores
|
# Restrict Android and other system processes to the first two cores
|
||||||
echo 0-1 > /dev/cpuset/background/cpus
|
echo 0-1 > /dev/cpuset/background/cpus
|
||||||
|
@ -123,6 +128,10 @@ function launch {
|
||||||
two_init
|
two_init
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
if [ -f /TICI ]; then
|
||||||
|
tici_init
|
||||||
|
fi
|
||||||
|
|
||||||
# handle pythonpath
|
# handle pythonpath
|
||||||
ln -sfn $(pwd) /data/pythonpath
|
ln -sfn $(pwd) /data/pythonpath
|
||||||
export PYTHONPATH="$PWD"
|
export PYTHONPATH="$PWD"
|
||||||
|
|
|
@ -5,6 +5,7 @@
|
||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
#include <math.h>
|
#include <math.h>
|
||||||
#include <sched.h>
|
#include <sched.h>
|
||||||
|
#include <string.h>
|
||||||
#include <stdlib.h>
|
#include <stdlib.h>
|
||||||
#include <unistd.h>
|
#include <unistd.h>
|
||||||
#include <pthread.h>
|
#include <pthread.h>
|
||||||
|
@ -22,21 +23,21 @@ void waste(int pid) {
|
||||||
int ret = sched_setaffinity(0, sizeof(cpu_set_t), &my_set);
|
int ret = sched_setaffinity(0, sizeof(cpu_set_t), &my_set);
|
||||||
printf("set affinity to %d: %d\n", pid, ret);
|
printf("set affinity to %d: %d\n", pid, ret);
|
||||||
|
|
||||||
// 256 MB
|
// 128 MB
|
||||||
float32x4_t *tmp = (float32x4_t *)malloc(0x1000000*sizeof(float32x4_t));
|
float32x4_t *tmp = (float32x4_t *)malloc(0x800000*sizeof(float32x4_t));
|
||||||
|
|
||||||
// comment out the memset for CPU only and not RAM
|
// comment out the memset for CPU only and not RAM
|
||||||
// otherwise we need this to avoid the zero page
|
// otherwise we need this to avoid the zero page
|
||||||
#ifdef MEM
|
#ifdef MEM
|
||||||
memset(tmp, 0xaa, 0x1000000*sizeof(float32x4_t));
|
memset(tmp, 0xaa, 0x800000*sizeof(float32x4_t));
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
float32x4_t out;
|
float32x4_t out;
|
||||||
|
|
||||||
double sec = seconds_since_boot();
|
double sec = seconds_since_boot();
|
||||||
while (1) {
|
while (1) {
|
||||||
for (int i = 0; i < 8; i++) {
|
for (int i = 0; i < 0x10; i++) {
|
||||||
for (int j = 0; j < 0x1000000; j+=0x20) {
|
for (int j = 0; j < 0x800000; j+=0x20) {
|
||||||
out = vmlaq_f32(out, tmp[j+0], tmp[j+1]);
|
out = vmlaq_f32(out, tmp[j+0], tmp[j+1]);
|
||||||
out = vmlaq_f32(out, tmp[j+2], tmp[j+3]);
|
out = vmlaq_f32(out, tmp[j+2], tmp[j+3]);
|
||||||
out = vmlaq_f32(out, tmp[j+4], tmp[j+5]);
|
out = vmlaq_f32(out, tmp[j+4], tmp[j+5]);
|
||||||
|
@ -80,7 +81,7 @@ int main() {
|
||||||
iavg += 1/ttime[i];
|
iavg += 1/ttime[i];
|
||||||
printf("%4.2f ", ttime[i]);
|
printf("%4.2f ", ttime[i]);
|
||||||
}
|
}
|
||||||
double mb_per_sec = (8.*0x1000000/(1024*1024))*sizeof(float32x4_t)*iavg;
|
double mb_per_sec = (16.*0x800000/(1024*1024))*sizeof(float32x4_t)*iavg;
|
||||||
printf("-- %4.2f -- %.2f MB/s \n", avg/CORES, mb_per_sec);
|
printf("-- %4.2f -- %.2f MB/s \n", avg/CORES, mb_per_sec);
|
||||||
sleep(1);
|
sleep(1);
|
||||||
}
|
}
|
||||||
|
|
|
@ -19,7 +19,11 @@ if arch == "aarch64":
|
||||||
lenv['CFLAGS'].append("-DUSE_THNEED")
|
lenv['CFLAGS'].append("-DUSE_THNEED")
|
||||||
lenv['CXXFLAGS'].append("-DUSE_THNEED")
|
lenv['CXXFLAGS'].append("-DUSE_THNEED")
|
||||||
elif arch == "larch64":
|
elif arch == "larch64":
|
||||||
libs += ['gsl', 'CB', 'pthread']
|
libs += ['gsl', 'CB', 'pthread', 'dl']
|
||||||
|
if not TEST_THNEED:
|
||||||
|
common_src += ["thneed/thneed.cc"]
|
||||||
|
lenv['CFLAGS'].append("-DUSE_THNEED")
|
||||||
|
lenv['CXXFLAGS'].append("-DUSE_THNEED")
|
||||||
else:
|
else:
|
||||||
libs += ['pthread']
|
libs += ['pthread']
|
||||||
|
|
||||||
|
|
|
@ -1,6 +1,7 @@
|
||||||
#pragma clang diagnostic ignored "-Wexceptions"
|
#pragma clang diagnostic ignored "-Wexceptions"
|
||||||
|
|
||||||
#include <cassert>
|
#include <cassert>
|
||||||
|
#include <string.h>
|
||||||
#include <stdlib.h>
|
#include <stdlib.h>
|
||||||
#include "common/util.h"
|
#include "common/util.h"
|
||||||
#include "snpemodel.h"
|
#include "snpemodel.h"
|
||||||
|
|
|
@ -1,10 +1,11 @@
|
||||||
#include "thneed.h"
|
|
||||||
#include <cassert>
|
#include <cassert>
|
||||||
#include <sys/mman.h>
|
#include <sys/mman.h>
|
||||||
#include <dlfcn.h>
|
#include <dlfcn.h>
|
||||||
#include <map>
|
#include <map>
|
||||||
#include <string>
|
#include <string>
|
||||||
|
#include <string.h>
|
||||||
#include <errno.h>
|
#include <errno.h>
|
||||||
|
#include "thneed.h"
|
||||||
|
|
||||||
Thneed *g_thneed = NULL;
|
Thneed *g_thneed = NULL;
|
||||||
int g_fd = -1;
|
int g_fd = -1;
|
||||||
|
@ -31,6 +32,7 @@ extern "C" {
|
||||||
int (*my_ioctl)(int filedes, unsigned long request, void *argp) = NULL;
|
int (*my_ioctl)(int filedes, unsigned long request, void *argp) = NULL;
|
||||||
#undef ioctl
|
#undef ioctl
|
||||||
int ioctl(int filedes, unsigned long request, void *argp) {
|
int ioctl(int filedes, unsigned long request, void *argp) {
|
||||||
|
request &= 0xFFFFFFFF; // needed on QCOM2
|
||||||
if (my_ioctl == NULL) my_ioctl = reinterpret_cast<decltype(my_ioctl)>(dlsym(RTLD_NEXT, "ioctl"));
|
if (my_ioctl == NULL) my_ioctl = reinterpret_cast<decltype(my_ioctl)>(dlsym(RTLD_NEXT, "ioctl"));
|
||||||
Thneed *thneed = g_thneed;
|
Thneed *thneed = g_thneed;
|
||||||
|
|
||||||
|
@ -440,7 +442,14 @@ cl_program thneed_clCreateProgramWithSource(cl_context context, cl_uint count, c
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
void *dlsym(void *handle, const char *symbol) {
|
void *dlsym(void *handle, const char *symbol) {
|
||||||
|
// TODO: Find dlsym in a better way. Currently this is hand looked up in libdl.so
|
||||||
|
#if defined QCOM
|
||||||
void *(*my_dlsym)(void *handle, const char *symbol) = (void *(*)(void *handle, const char *symbol))((uintptr_t)dlopen-0x2d4);
|
void *(*my_dlsym)(void *handle, const char *symbol) = (void *(*)(void *handle, const char *symbol))((uintptr_t)dlopen-0x2d4);
|
||||||
|
#elif defined QCOM2
|
||||||
|
void *(*my_dlsym)(void *handle, const char *symbol) = (void *(*)(void *handle, const char *symbol))((uintptr_t)dlopen+0x138);
|
||||||
|
#else
|
||||||
|
#error "Unsupported platform for thneed"
|
||||||
|
#endif
|
||||||
if (memcmp("REAL_", symbol, 5) == 0) {
|
if (memcmp("REAL_", symbol, 5) == 0) {
|
||||||
return my_dlsym(handle, symbol+5);
|
return my_dlsym(handle, symbol+5);
|
||||||
} else if (strcmp("clEnqueueNDRangeKernel", symbol) == 0) {
|
} else if (strcmp("clEnqueueNDRangeKernel", symbol) == 0) {
|
||||||
|
|
|
@ -1,8 +1,14 @@
|
||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
|
#ifndef __user
|
||||||
|
#define __user __attribute__(())
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#include <stdlib.h>
|
||||||
#include <stdint.h>
|
#include <stdint.h>
|
||||||
#include "include/msm_kgsl.h"
|
#include "include/msm_kgsl.h"
|
||||||
#include <vector>
|
#include <vector>
|
||||||
|
#include <memory>
|
||||||
#include <CL/cl.h>
|
#include <CL/cl.h>
|
||||||
|
|
||||||
using namespace std;
|
using namespace std;
|
||||||
|
|
Loading…
Reference in New Issue