More governance work, fix thneed (#2610)

* more governance work

* fix thneed on qcom2

* waste doesn't OOM

Co-authored-by: Comma Device <device@comma.ai>
pull/2639/head
George Hotz 2020-11-23 17:52:28 -08:00 committed by GitHub
parent 327aa123f4
commit a14ce09018
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 38 additions and 8 deletions

View File

@ -8,6 +8,11 @@ source "$BASEDIR/launch_env.sh"
DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null && pwd )"
function tici_init {
sudo su -c 'echo "performance" > /sys/class/devfreq/soc:qcom,memlat-cpu0/governor'
sudo su -c 'echo "performance" > /sys/class/devfreq/soc:qcom,memlat-cpu4/governor'
}
function two_init {
# Restrict Android and other system processes to the first two cores
echo 0-1 > /dev/cpuset/background/cpus
@ -123,6 +128,10 @@ function launch {
two_init
fi
if [ -f /TICI ]; then
tici_init
fi
# handle pythonpath
ln -sfn $(pwd) /data/pythonpath
export PYTHONPATH="$PWD"

View File

@ -5,6 +5,7 @@
#include <stdio.h>
#include <math.h>
#include <sched.h>
#include <string.h>
#include <stdlib.h>
#include <unistd.h>
#include <pthread.h>
@ -22,21 +23,21 @@ void waste(int pid) {
int ret = sched_setaffinity(0, sizeof(cpu_set_t), &my_set);
printf("set affinity to %d: %d\n", pid, ret);
// 256 MB
float32x4_t *tmp = (float32x4_t *)malloc(0x1000000*sizeof(float32x4_t));
// 128 MB
float32x4_t *tmp = (float32x4_t *)malloc(0x800000*sizeof(float32x4_t));
// comment out the memset for CPU only and not RAM
// otherwise we need this to avoid the zero page
#ifdef MEM
memset(tmp, 0xaa, 0x1000000*sizeof(float32x4_t));
memset(tmp, 0xaa, 0x800000*sizeof(float32x4_t));
#endif
float32x4_t out;
double sec = seconds_since_boot();
while (1) {
for (int i = 0; i < 8; i++) {
for (int j = 0; j < 0x1000000; j+=0x20) {
for (int i = 0; i < 0x10; i++) {
for (int j = 0; j < 0x800000; j+=0x20) {
out = vmlaq_f32(out, tmp[j+0], tmp[j+1]);
out = vmlaq_f32(out, tmp[j+2], tmp[j+3]);
out = vmlaq_f32(out, tmp[j+4], tmp[j+5]);
@ -80,7 +81,7 @@ int main() {
iavg += 1/ttime[i];
printf("%4.2f ", ttime[i]);
}
double mb_per_sec = (8.*0x1000000/(1024*1024))*sizeof(float32x4_t)*iavg;
double mb_per_sec = (16.*0x800000/(1024*1024))*sizeof(float32x4_t)*iavg;
printf("-- %4.2f -- %.2f MB/s \n", avg/CORES, mb_per_sec);
sleep(1);
}

View File

@ -19,7 +19,11 @@ if arch == "aarch64":
lenv['CFLAGS'].append("-DUSE_THNEED")
lenv['CXXFLAGS'].append("-DUSE_THNEED")
elif arch == "larch64":
libs += ['gsl', 'CB', 'pthread']
libs += ['gsl', 'CB', 'pthread', 'dl']
if not TEST_THNEED:
common_src += ["thneed/thneed.cc"]
lenv['CFLAGS'].append("-DUSE_THNEED")
lenv['CXXFLAGS'].append("-DUSE_THNEED")
else:
libs += ['pthread']

View File

@ -1,6 +1,7 @@
#pragma clang diagnostic ignored "-Wexceptions"
#include <cassert>
#include <string.h>
#include <stdlib.h>
#include "common/util.h"
#include "snpemodel.h"

View File

@ -1,10 +1,11 @@
#include "thneed.h"
#include <cassert>
#include <sys/mman.h>
#include <dlfcn.h>
#include <map>
#include <string>
#include <string.h>
#include <errno.h>
#include "thneed.h"
Thneed *g_thneed = NULL;
int g_fd = -1;
@ -31,6 +32,7 @@ extern "C" {
int (*my_ioctl)(int filedes, unsigned long request, void *argp) = NULL;
#undef ioctl
int ioctl(int filedes, unsigned long request, void *argp) {
request &= 0xFFFFFFFF; // needed on QCOM2
if (my_ioctl == NULL) my_ioctl = reinterpret_cast<decltype(my_ioctl)>(dlsym(RTLD_NEXT, "ioctl"));
Thneed *thneed = g_thneed;
@ -440,7 +442,14 @@ cl_program thneed_clCreateProgramWithSource(cl_context context, cl_uint count, c
#endif
void *dlsym(void *handle, const char *symbol) {
// TODO: Find dlsym in a better way. Currently this is hand looked up in libdl.so
#if defined QCOM
void *(*my_dlsym)(void *handle, const char *symbol) = (void *(*)(void *handle, const char *symbol))((uintptr_t)dlopen-0x2d4);
#elif defined QCOM2
void *(*my_dlsym)(void *handle, const char *symbol) = (void *(*)(void *handle, const char *symbol))((uintptr_t)dlopen+0x138);
#else
#error "Unsupported platform for thneed"
#endif
if (memcmp("REAL_", symbol, 5) == 0) {
return my_dlsym(handle, symbol+5);
} else if (strcmp("clEnqueueNDRangeKernel", symbol) == 0) {

View File

@ -1,8 +1,14 @@
#pragma once
#ifndef __user
#define __user __attribute__(())
#endif
#include <stdlib.h>
#include <stdint.h>
#include "include/msm_kgsl.h"
#include <vector>
#include <memory>
#include <CL/cl.h>
using namespace std;