// gcc -O2 waste.c -lpthread -owaste // gcc -O2 waste.c -lpthread -owaste -DMEM #define _GNU_SOURCE #include #include #include #include #include #include #include #include #include #include "../selfdrive/common/timing.h" int get_nprocs(void); double *ttime, *oout; void waste(int pid) { cpu_set_t my_set; CPU_ZERO(&my_set); CPU_SET(pid, &my_set); int ret = sched_setaffinity(0, sizeof(cpu_set_t), &my_set); printf("set affinity to %d: %d\n", pid, ret); // 128 MB float32x4_t *tmp = (float32x4_t *)malloc(0x800000*sizeof(float32x4_t)); // comment out the memset for CPU only and not RAM // otherwise we need this to avoid the zero page #ifdef MEM memset(tmp, 0xaa, 0x800000*sizeof(float32x4_t)); #endif float32x4_t out; double sec = seconds_since_boot(); while (1) { for (int i = 0; i < 0x10; i++) { for (int j = 0; j < 0x800000; j+=0x20) { out = vmlaq_f32(out, tmp[j+0], tmp[j+1]); out = vmlaq_f32(out, tmp[j+2], tmp[j+3]); out = vmlaq_f32(out, tmp[j+4], tmp[j+5]); out = vmlaq_f32(out, tmp[j+6], tmp[j+7]); out = vmlaq_f32(out, tmp[j+8], tmp[j+9]); out = vmlaq_f32(out, tmp[j+10], tmp[j+11]); out = vmlaq_f32(out, tmp[j+12], tmp[j+13]); out = vmlaq_f32(out, tmp[j+14], tmp[j+15]); out = vmlaq_f32(out, tmp[j+16], tmp[j+17]); out = vmlaq_f32(out, tmp[j+18], tmp[j+19]); out = vmlaq_f32(out, tmp[j+20], tmp[j+21]); out = vmlaq_f32(out, tmp[j+22], tmp[j+23]); out = vmlaq_f32(out, tmp[j+24], tmp[j+25]); out = vmlaq_f32(out, tmp[j+26], tmp[j+27]); out = vmlaq_f32(out, tmp[j+28], tmp[j+29]); out = vmlaq_f32(out, tmp[j+30], tmp[j+31]); } } double nsec = seconds_since_boot(); ttime[pid] = nsec-sec; oout[pid] = out[0] + out[1] + out[2] + out[3]; sec = nsec; } } int main() { int CORES = get_nprocs(); ttime = (double *)malloc(CORES*sizeof(double)); oout = (double *)malloc(CORES*sizeof(double)); pthread_t waster[CORES]; for (long i = 0; i < CORES; i++) { ttime[i] = NAN; pthread_create(&waster[i], NULL, (void *(*)(void *))waste, (void*)i); } while (1) { double avg = 0.0; double iavg = 0.0; for (int i = 0; i < CORES; i++) { avg += ttime[i]; iavg += 1/ttime[i]; printf("%4.2f ", ttime[i]); } double mb_per_sec = (16.*0x800000/(1024*1024))*sizeof(float32x4_t)*iavg; printf("-- %4.2f -- %.2f MB/s \n", avg/CORES, mb_per_sec); sleep(1); } }