Restrict avx2 hack to windows target
this workaround is possibly rather a windows & gcc specific problem. See e.g. https://gcc.gnu.org/bugzilla/show_bug.cgi?id=54412#c25 on Linux with gcc 8 this patch brings roughly a 8% speedup. However, probably needs some testing in the wild. includes a workaround for an old msys make (3.81) installation (fixes #2984) No functional changepull/2986/head
parent
ee06046412
commit
992f549ae7
|
@ -569,7 +569,7 @@ help:
|
|||
build: config-sanity
|
||||
$(MAKE) ARCH=$(ARCH) COMP=$(COMP) all
|
||||
|
||||
profile-build: config-sanity objclean profileclean net
|
||||
profile-build: net config-sanity objclean profileclean
|
||||
@echo ""
|
||||
@echo "Step 1/4. Building instrumented executable ..."
|
||||
$(MAKE) ARCH=$(ARCH) COMP=$(COMP) $(profile_make)
|
||||
|
|
|
@ -44,7 +44,7 @@
|
|||
// compiled with older g++ crashes because the output memory is not aligned
|
||||
// even though alignas is specified.
|
||||
#if defined(USE_AVX2)
|
||||
#if defined(__GNUC__ ) && (__GNUC__ < 9)
|
||||
#if defined(__GNUC__ ) && (__GNUC__ < 9) && defined(_WIN32)
|
||||
#define _mm256_loadA_si256 _mm256_loadu_si256
|
||||
#define _mm256_storeA_si256 _mm256_storeu_si256
|
||||
#else
|
||||
|
@ -54,7 +54,7 @@
|
|||
#endif
|
||||
|
||||
#if defined(USE_AVX512)
|
||||
#if defined(__GNUC__ ) && (__GNUC__ < 9)
|
||||
#if defined(__GNUC__ ) && (__GNUC__ < 9) && defined(_WIN32)
|
||||
#define _mm512_loadA_si512 _mm512_loadu_si512
|
||||
#define _mm512_storeA_si512 _mm512_storeu_si512
|
||||
#else
|
||||
|
|
Loading…
Reference in New Issue