waste can become memory bound
parent
580589b250
commit
7bfaf4cc4e
|
@ -1,4 +1,5 @@
|
|||
// gcc -O2 waste.c -lpthread -owaste
|
||||
// gcc -O2 waste.c -lpthread -owaste -DMEM
|
||||
|
||||
#define _GNU_SOURCE
|
||||
#include <stdio.h>
|
||||
|
@ -19,24 +20,28 @@ void waste(int pid) {
|
|||
int ret = sched_setaffinity(0, sizeof(cpu_set_t), &my_set);
|
||||
printf("set affinity to %d: %d\n", pid, ret);
|
||||
|
||||
float32x4_t *tmp = (float32x4_t *)malloc(0x1000008*sizeof(float32x4_t));
|
||||
// 256 MB
|
||||
float32x4_t *tmp = (float32x4_t *)malloc(0x1000001*sizeof(float32x4_t));
|
||||
|
||||
// comment out the memset for CPU only and not RAM
|
||||
// otherwise we need this to avoid the zero page
|
||||
#ifdef MEM
|
||||
memset(tmp, 0xaa, 0x1000001*sizeof(float32x4_t));
|
||||
#endif
|
||||
|
||||
float32x4_t out;
|
||||
|
||||
uint64_t i = 0;
|
||||
double sec = seconds_since_boot();
|
||||
while(1) {
|
||||
int j;
|
||||
for (j = 0; j < 0x1000000; j++) {
|
||||
out = vmlaq_f32(out, tmp[j], tmp[j+1]);
|
||||
while (1) {
|
||||
for (int i = 0; i < 8; i++) {
|
||||
for (int j = 0; j < 0x1000000; j++) {
|
||||
out = vmlaq_f32(out, tmp[j], tmp[j+1]);
|
||||
}
|
||||
}
|
||||
if (i == 0x8) {
|
||||
double nsec = seconds_since_boot();
|
||||
ttime[pid] = nsec-sec;
|
||||
oout[pid] = out[0] + out[1] + out[2] + out[3];
|
||||
i = 0;
|
||||
sec = nsec;
|
||||
}
|
||||
i++;
|
||||
double nsec = seconds_since_boot();
|
||||
ttime[pid] = nsec-sec;
|
||||
oout[pid] = out[0] + out[1] + out[2] + out[3];
|
||||
sec = nsec;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -50,10 +55,12 @@ int main() {
|
|||
pthread_create(&waster[i], NULL, waste, (void*)i);
|
||||
}
|
||||
while (1) {
|
||||
double avg = 0.0;
|
||||
for (int i = 0 ; i < CORES; i++) {
|
||||
avg += ttime[i];
|
||||
printf("%.2f ", ttime[i]);
|
||||
}
|
||||
printf("\n");
|
||||
printf("-- %.2f\n", avg/CORES);
|
||||
sleep(1);
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue