1
0
Fork 0

Restrict avx2 hack to windows target

this workaround is possibly rather a windows & gcc specific problem. See e.g.
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=54412#c25

on Linux with gcc 8 this patch brings roughly a 8% speedup.
However, probably needs some testing in the wild.

includes a workaround for an old msys make (3.81) installation (fixes #2984)

No functional change
pull/2986/head
Joost VandeVondele 2020-08-11 21:11:17 +02:00
parent ee06046412
commit 992f549ae7
2 changed files with 3 additions and 3 deletions

View File

@ -569,7 +569,7 @@ help:
build: config-sanity
$(MAKE) ARCH=$(ARCH) COMP=$(COMP) all
profile-build: config-sanity objclean profileclean net
profile-build: net config-sanity objclean profileclean
@echo ""
@echo "Step 1/4. Building instrumented executable ..."
$(MAKE) ARCH=$(ARCH) COMP=$(COMP) $(profile_make)

View File

@ -44,7 +44,7 @@
// compiled with older g++ crashes because the output memory is not aligned
// even though alignas is specified.
#if defined(USE_AVX2)
#if defined(__GNUC__ ) && (__GNUC__ < 9)
#if defined(__GNUC__ ) && (__GNUC__ < 9) && defined(_WIN32)
#define _mm256_loadA_si256 _mm256_loadu_si256
#define _mm256_storeA_si256 _mm256_storeu_si256
#else
@ -54,7 +54,7 @@
#endif
#if defined(USE_AVX512)
#if defined(__GNUC__ ) && (__GNUC__ < 9)
#if defined(__GNUC__ ) && (__GNUC__ < 9) && defined(_WIN32)
#define _mm512_loadA_si512 _mm512_loadu_si512
#define _mm512_storeA_si512 _mm512_storeu_si512
#else