More governance work, fix thneed (#2610)
* more governance work * fix thneed on qcom2 * waste doesn't OOM Co-authored-by: Comma Device <device@comma.ai>albatross
parent
327aa123f4
commit
a14ce09018
|
@ -8,6 +8,11 @@ source "$BASEDIR/launch_env.sh"
|
|||
|
||||
DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null && pwd )"
|
||||
|
||||
function tici_init {
|
||||
sudo su -c 'echo "performance" > /sys/class/devfreq/soc:qcom,memlat-cpu0/governor'
|
||||
sudo su -c 'echo "performance" > /sys/class/devfreq/soc:qcom,memlat-cpu4/governor'
|
||||
}
|
||||
|
||||
function two_init {
|
||||
# Restrict Android and other system processes to the first two cores
|
||||
echo 0-1 > /dev/cpuset/background/cpus
|
||||
|
@ -123,6 +128,10 @@ function launch {
|
|||
two_init
|
||||
fi
|
||||
|
||||
if [ -f /TICI ]; then
|
||||
tici_init
|
||||
fi
|
||||
|
||||
# handle pythonpath
|
||||
ln -sfn $(pwd) /data/pythonpath
|
||||
export PYTHONPATH="$PWD"
|
||||
|
|
|
@ -5,6 +5,7 @@
|
|||
#include <stdio.h>
|
||||
#include <math.h>
|
||||
#include <sched.h>
|
||||
#include <string.h>
|
||||
#include <stdlib.h>
|
||||
#include <unistd.h>
|
||||
#include <pthread.h>
|
||||
|
@ -22,21 +23,21 @@ void waste(int pid) {
|
|||
int ret = sched_setaffinity(0, sizeof(cpu_set_t), &my_set);
|
||||
printf("set affinity to %d: %d\n", pid, ret);
|
||||
|
||||
// 256 MB
|
||||
float32x4_t *tmp = (float32x4_t *)malloc(0x1000000*sizeof(float32x4_t));
|
||||
// 128 MB
|
||||
float32x4_t *tmp = (float32x4_t *)malloc(0x800000*sizeof(float32x4_t));
|
||||
|
||||
// comment out the memset for CPU only and not RAM
|
||||
// otherwise we need this to avoid the zero page
|
||||
#ifdef MEM
|
||||
memset(tmp, 0xaa, 0x1000000*sizeof(float32x4_t));
|
||||
memset(tmp, 0xaa, 0x800000*sizeof(float32x4_t));
|
||||
#endif
|
||||
|
||||
float32x4_t out;
|
||||
|
||||
double sec = seconds_since_boot();
|
||||
while (1) {
|
||||
for (int i = 0; i < 8; i++) {
|
||||
for (int j = 0; j < 0x1000000; j+=0x20) {
|
||||
for (int i = 0; i < 0x10; i++) {
|
||||
for (int j = 0; j < 0x800000; j+=0x20) {
|
||||
out = vmlaq_f32(out, tmp[j+0], tmp[j+1]);
|
||||
out = vmlaq_f32(out, tmp[j+2], tmp[j+3]);
|
||||
out = vmlaq_f32(out, tmp[j+4], tmp[j+5]);
|
||||
|
@ -80,7 +81,7 @@ int main() {
|
|||
iavg += 1/ttime[i];
|
||||
printf("%4.2f ", ttime[i]);
|
||||
}
|
||||
double mb_per_sec = (8.*0x1000000/(1024*1024))*sizeof(float32x4_t)*iavg;
|
||||
double mb_per_sec = (16.*0x800000/(1024*1024))*sizeof(float32x4_t)*iavg;
|
||||
printf("-- %4.2f -- %.2f MB/s \n", avg/CORES, mb_per_sec);
|
||||
sleep(1);
|
||||
}
|
||||
|
|
|
@ -19,7 +19,11 @@ if arch == "aarch64":
|
|||
lenv['CFLAGS'].append("-DUSE_THNEED")
|
||||
lenv['CXXFLAGS'].append("-DUSE_THNEED")
|
||||
elif arch == "larch64":
|
||||
libs += ['gsl', 'CB', 'pthread']
|
||||
libs += ['gsl', 'CB', 'pthread', 'dl']
|
||||
if not TEST_THNEED:
|
||||
common_src += ["thneed/thneed.cc"]
|
||||
lenv['CFLAGS'].append("-DUSE_THNEED")
|
||||
lenv['CXXFLAGS'].append("-DUSE_THNEED")
|
||||
else:
|
||||
libs += ['pthread']
|
||||
|
||||
|
|
|
@ -1,6 +1,7 @@
|
|||
#pragma clang diagnostic ignored "-Wexceptions"
|
||||
|
||||
#include <cassert>
|
||||
#include <string.h>
|
||||
#include <stdlib.h>
|
||||
#include "common/util.h"
|
||||
#include "snpemodel.h"
|
||||
|
|
|
@ -1,10 +1,11 @@
|
|||
#include "thneed.h"
|
||||
#include <cassert>
|
||||
#include <sys/mman.h>
|
||||
#include <dlfcn.h>
|
||||
#include <map>
|
||||
#include <string>
|
||||
#include <string.h>
|
||||
#include <errno.h>
|
||||
#include "thneed.h"
|
||||
|
||||
Thneed *g_thneed = NULL;
|
||||
int g_fd = -1;
|
||||
|
@ -31,6 +32,7 @@ extern "C" {
|
|||
int (*my_ioctl)(int filedes, unsigned long request, void *argp) = NULL;
|
||||
#undef ioctl
|
||||
int ioctl(int filedes, unsigned long request, void *argp) {
|
||||
request &= 0xFFFFFFFF; // needed on QCOM2
|
||||
if (my_ioctl == NULL) my_ioctl = reinterpret_cast<decltype(my_ioctl)>(dlsym(RTLD_NEXT, "ioctl"));
|
||||
Thneed *thneed = g_thneed;
|
||||
|
||||
|
@ -440,7 +442,14 @@ cl_program thneed_clCreateProgramWithSource(cl_context context, cl_uint count, c
|
|||
#endif
|
||||
|
||||
void *dlsym(void *handle, const char *symbol) {
|
||||
// TODO: Find dlsym in a better way. Currently this is hand looked up in libdl.so
|
||||
#if defined QCOM
|
||||
void *(*my_dlsym)(void *handle, const char *symbol) = (void *(*)(void *handle, const char *symbol))((uintptr_t)dlopen-0x2d4);
|
||||
#elif defined QCOM2
|
||||
void *(*my_dlsym)(void *handle, const char *symbol) = (void *(*)(void *handle, const char *symbol))((uintptr_t)dlopen+0x138);
|
||||
#else
|
||||
#error "Unsupported platform for thneed"
|
||||
#endif
|
||||
if (memcmp("REAL_", symbol, 5) == 0) {
|
||||
return my_dlsym(handle, symbol+5);
|
||||
} else if (strcmp("clEnqueueNDRangeKernel", symbol) == 0) {
|
||||
|
|
|
@ -1,8 +1,14 @@
|
|||
#pragma once
|
||||
|
||||
#ifndef __user
|
||||
#define __user __attribute__(())
|
||||
#endif
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <stdint.h>
|
||||
#include "include/msm_kgsl.h"
|
||||
#include <vector>
|
||||
#include <memory>
|
||||
#include <CL/cl.h>
|
||||
|
||||
using namespace std;
|
||||
|
|
Loading…
Reference in New Issue