buildroot/package/opencv3/0001-cmake-support-multiple-CPU-targets.patch
Thomas Petazzoni b6ea64aec8 package/opencv3: regenerate patch without renames
Patch using renames are not properly handled by patch < 2.7, which
some users (including autobuilder instances) might be using. Such
renames are silently ignored by old patch versions, caused strange
build failures due to the missed renames.

This patch fixes this by regenerating the patch without renames.

Fixes:

  http://autobuild.buildroot.net/results/347d8fd2e286b3e4e5e18743e64d862bdb66dbb9/

Signed-off-by: Thomas Petazzoni <thomas.petazzoni@free-electrons.com>
2017-05-12 11:48:00 +02:00

2565 lines
96 KiB
Diff

From 1c54b13cb29463af514a107c42946abd97b5ca41 Mon Sep 17 00:00:00 2001
From: Alexander Alekhin <alexander.alekhin@intel.com>
Date: Wed, 7 Sep 2016 18:02:36 +0300
Subject: [PATCH] cmake: support multiple CPU targets
Backported from: https://github.com/opencv/opencv/commit/e16227b53cabab1caa4b7aba8ff59a630528348f
Signed-off-by: Samuel Martin <s.martin49@gmail.com>
---
CMakeLists.txt | 50 +-
cmake/OpenCVCompilerOptimizations.cmake | 651 +++++++++++++++++++++
cmake/OpenCVCompilerOptions.cmake | 161 +----
cmake/OpenCVGenHeaders.cmake | 4 +
cmake/OpenCVModule.cmake | 3 +
cmake/OpenCVPCHSupport.cmake | 5 +-
cmake/OpenCVUtils.cmake | 50 +-
cmake/checks/cpu_avx.cpp | 9 +
cmake/checks/cpu_avx2.cpp | 10 +
cmake/checks/cpu_avx512.cpp | 10 +
cmake/checks/cpu_fp16.cpp | 33 ++
cmake/checks/cpu_popcnt.cpp | 8 +
cmake/checks/cpu_sse.cpp | 2 +
cmake/checks/cpu_sse2.cpp | 2 +
cmake/checks/cpu_sse3.cpp | 7 +
cmake/checks/cpu_sse41.cpp | 6 +
cmake/checks/cpu_sse42.cpp | 5 +
cmake/checks/cpu_ssse3.cpp | 7 +
cmake/checks/fp16.cpp | 33 --
cmake/templates/cv_cpu_config.h.in | 5 +
cmake/templates/cvconfig.h.in | 13 +
.../core/include/opencv2/core/cv_cpu_dispatch.h | 166 ++++++
modules/core/include/opencv2/core/cv_cpu_helper.h | 133 +++++
modules/core/include/opencv2/core/cvdef.h | 145 +----
modules/core/include/opencv2/core/fast_math.hpp | 60 +-
modules/core/src/system.cpp | 301 ++++++++--
modules/highgui/CMakeLists.txt | 2 +-
modules/imgproc/src/imgwarp.cpp | 4 +-
modules/objdetect/src/haar.cpp | 5 +-
29 files changed, 1472 insertions(+), 418 deletions(-)
create mode 100644 cmake/OpenCVCompilerOptimizations.cmake
create mode 100644 cmake/checks/cpu_avx.cpp
create mode 100644 cmake/checks/cpu_avx2.cpp
create mode 100644 cmake/checks/cpu_avx512.cpp
create mode 100644 cmake/checks/cpu_fp16.cpp
create mode 100644 cmake/checks/cpu_popcnt.cpp
create mode 100644 cmake/checks/cpu_sse.cpp
create mode 100644 cmake/checks/cpu_sse2.cpp
create mode 100644 cmake/checks/cpu_sse3.cpp
create mode 100644 cmake/checks/cpu_sse41.cpp
create mode 100644 cmake/checks/cpu_sse42.cpp
create mode 100644 cmake/checks/cpu_ssse3.cpp
delete mode 100644 cmake/checks/fp16.cpp
create mode 100644 cmake/templates/cv_cpu_config.h.in
create mode 100644 modules/core/include/opencv2/core/cv_cpu_dispatch.h
create mode 100644 modules/core/include/opencv2/core/cv_cpu_helper.h
diff --git a/CMakeLists.txt b/CMakeLists.txt
index cc45f6f..9c9971e 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -85,6 +85,10 @@ if(POLICY CMP0042)
cmake_policy(SET CMP0042 NEW)
endif()
+if(POLICY CMP0051)
+ cmake_policy(SET CMP0051 NEW)
+endif()
+
include(cmake/OpenCVUtils.cmake)
# must go before the project command
@@ -274,16 +278,6 @@ OCV_OPTION(ENABLE_COVERAGE "Enable coverage collection with GCov"
OCV_OPTION(ENABLE_OMIT_FRAME_POINTER "Enable -fomit-frame-pointer for GCC" ON IF CMAKE_COMPILER_IS_GNUCXX AND NOT (APPLE AND CMAKE_COMPILER_IS_CLANGCXX) )
OCV_OPTION(ENABLE_POWERPC "Enable PowerPC for GCC" ON IF (CMAKE_COMPILER_IS_GNUCXX AND CMAKE_SYSTEM_PROCESSOR MATCHES powerpc.*) )
OCV_OPTION(ENABLE_FAST_MATH "Enable -ffast-math (not recommended for GCC 4.6.x)" OFF IF (CMAKE_COMPILER_IS_GNUCXX AND (X86 OR X86_64)) )
-OCV_OPTION(ENABLE_SSE "Enable SSE instructions" ON IF ((MSVC OR CMAKE_COMPILER_IS_GNUCXX) AND (X86 OR X86_64)) )
-OCV_OPTION(ENABLE_SSE2 "Enable SSE2 instructions" ON IF ((MSVC OR CMAKE_COMPILER_IS_GNUCXX) AND (X86 OR X86_64)) )
-OCV_OPTION(ENABLE_SSE3 "Enable SSE3 instructions" ON IF ((MSVC OR CMAKE_COMPILER_IS_GNUCXX OR CV_ICC) AND (X86 OR X86_64)) )
-OCV_OPTION(ENABLE_SSSE3 "Enable SSSE3 instructions" OFF IF ((MSVC OR CMAKE_COMPILER_IS_GNUCXX) AND (X86 OR X86_64)) )
-OCV_OPTION(ENABLE_SSE41 "Enable SSE4.1 instructions" OFF IF ((MSVC OR CMAKE_COMPILER_IS_GNUCXX OR CV_ICC) AND (X86 OR X86_64)) )
-OCV_OPTION(ENABLE_SSE42 "Enable SSE4.2 instructions" OFF IF ((MSVC OR CMAKE_COMPILER_IS_GNUCXX) AND (X86 OR X86_64)) )
-OCV_OPTION(ENABLE_POPCNT "Enable POPCNT instructions" OFF IF ((MSVC OR CMAKE_COMPILER_IS_GNUCXX) AND (X86 OR X86_64)) )
-OCV_OPTION(ENABLE_AVX "Enable AVX instructions" OFF IF ((MSVC OR CMAKE_COMPILER_IS_GNUCXX) AND (X86 OR X86_64)) )
-OCV_OPTION(ENABLE_AVX2 "Enable AVX2 instructions" OFF IF ((MSVC OR CMAKE_COMPILER_IS_GNUCXX) AND (X86 OR X86_64)) )
-OCV_OPTION(ENABLE_FMA3 "Enable FMA3 instructions" OFF IF ((MSVC OR CMAKE_COMPILER_IS_GNUCXX) AND (X86 OR X86_64)) )
OCV_OPTION(ENABLE_NEON "Enable NEON instructions" "${NEON}" IF CMAKE_COMPILER_IS_GNUCXX AND (ARM OR AARCH64 OR IOS) )
OCV_OPTION(ENABLE_VFPV3 "Enable VFPv3-D32 instructions" OFF IF CMAKE_COMPILER_IS_GNUCXX AND (ARM OR AARCH64 OR IOS) )
OCV_OPTION(ENABLE_NOISY_WARNINGS "Show all warnings even if they are too noisy" OFF )
@@ -292,6 +286,9 @@ OCV_OPTION(ANDROID_EXAMPLES_WITH_LIBS "Build binaries of Android examples with n
OCV_OPTION(ENABLE_IMPL_COLLECTION "Collect implementation data on function call" OFF )
OCV_OPTION(ENABLE_INSTRUMENTATION "Instrument functions to collect calls trace and performance" OFF )
OCV_OPTION(GENERATE_ABI_DESCRIPTOR "Generate XML file for abi_compliance_checker tool" OFF IF UNIX)
+OCV_OPTION(CV_ENABLE_INTRINSICS "Use intrinsic-based optimized code" ON )
+OCV_OPTION(CV_DISABLE_OPTIMIZATION "Disable explicit optimized code (dispatched code/intrinsics/loop unrolling/etc)" OFF )
+
OCV_OPTION(DOWNLOAD_EXTERNAL_TEST_DATA "Download external test data (Python executable and OPENCV_TEST_DATA_PATH environment variable may be required)" OFF )
@@ -492,6 +489,9 @@ if(CMAKE_GENERATOR MATCHES "Makefiles|Ninja" AND "${CMAKE_BUILD_TYPE}" STREQUAL
set(CMAKE_BUILD_TYPE Release)
endif()
+# --- Python Support ---
+include(cmake/OpenCVDetectPython.cmake)
+
include(cmake/OpenCVCompilerOptions.cmake)
@@ -569,9 +569,6 @@ else()
unset(DOXYGEN_FOUND CACHE)
endif()
-# --- Python Support ---
-include(cmake/OpenCVDetectPython.cmake)
-
# --- Java Support ---
include(cmake/OpenCVDetectApacheAnt.cmake)
if(ANDROID)
@@ -860,6 +857,33 @@ if(NOT CMAKE_GENERATOR MATCHES "Xcode|Visual Studio")
status(" Configuration:" ${CMAKE_BUILD_TYPE})
endif()
+
+# ========================= CPU code generation mode =========================
+status("")
+status(" CPU/HW features:")
+status(" Baseline:" "${CPU_BASELINE_FINAL}")
+if(NOT CPU_BASELINE STREQUAL CPU_BASELINE_FINAL)
+ status(" requested:" "${CPU_BASELINE}")
+endif()
+if(CPU_BASELINE_REQUIRE)
+ status(" required:" "${CPU_BASELINE_REQUIRE}")
+endif()
+if(CPU_BASELINE_DISABLE)
+ status(" disabled:" "${CPU_BASELINE_DISABLE}")
+endif()
+if(CPU_DISPATCH_FINAL OR CPU_DISPATCH)
+ status(" Dispatched code generation:" "${CPU_DISPATCH_FINAL}")
+ if(NOT CPU_DISPATCH STREQUAL CPU_DISPATCH_FINAL)
+ status(" requested:" "${CPU_DISPATCH}")
+ endif()
+ if(CPU_DISPATCH_REQUIRE)
+ status(" required:" "${CPU_DISPATCH_REQUIRE}")
+ endif()
+ foreach(OPT ${CPU_DISPATCH_FINAL})
+ status(" ${OPT} (${CPU_${OPT}_USAGE_COUNT} files):" "+ ${CPU_DISPATCH_${OPT}_INCLUDED}")
+ endforeach()
+endif()
+
# ========================== C/C++ options ==========================
if(CMAKE_CXX_COMPILER_VERSION)
set(OPENCV_COMPILER_STR "${CMAKE_CXX_COMPILER} ${CMAKE_CXX_COMPILER_ARG1} (ver ${CMAKE_CXX_COMPILER_VERSION})")
diff --git a/cmake/OpenCVCompilerOptimizations.cmake b/cmake/OpenCVCompilerOptimizations.cmake
new file mode 100644
index 0000000..b849f02
--- /dev/null
+++ b/cmake/OpenCVCompilerOptimizations.cmake
@@ -0,0 +1,651 @@
+# x86/x86-64 arch:
+# SSE / SSE2 (always available on 64-bit CPUs)
+# SSE3 / SSSE3
+# SSE4_1 / SSE4_2 / POPCNT
+# AVX / AVX2 / AVX512
+# FMA3
+
+# CPU_{opt}_SUPPORTED=ON/OFF - compiler support (possibly with additional flag)
+# CPU_{opt}_IMPLIES=<list>
+# CPU_{opt}_FORCE=<list> - subset of "implies" list
+# CPU_{opt}_FLAGS_ON=""
+# CPU_{opt}_FEATURE_ALIAS - mapping to CV_CPU_* HWFeature enum
+
+# Input variables:
+# CPU_BASELINE=<list> - preferred list of baseline optimizations
+# CPU_DISPATCH=<list> - preferred list of dispatched optimizations
+
+# Advanced input variables:
+# CPU_BASELINE_REQUIRE=<list> - list of required baseline optimizations
+# CPU_DISPATCH_REQUIRE=<list> - list of required dispatched optimizations
+# CPU_BASELINE_DISABLE=<list> - list of disabled baseline optimizations
+
+# Output variables:
+# CPU_BASELINE_FINAL=<list> - final list of enabled compiler optimizations
+# CPU_DISPATCH_FINAL=<list> - final list of dispatched optimizations
+#
+# CPU_DISPATCH_FLAGS_${opt} - flags for source files compiled separately (_opt_avx2.cpp)
+
+set(CPU_ALL_OPTIMIZATIONS "SSE;SSE2;SSE3;SSSE3;SSE4_1;SSE4_2;POPCNT;AVX;FP16;AVX2;FMA3") # without AVX512
+list(APPEND CPU_ALL_OPTIMIZATIONS NEON VFPV3 FP16)
+list(REMOVE_DUPLICATES CPU_ALL_OPTIMIZATIONS)
+
+ocv_update(CPU_VFPV3_FEATURE_ALIAS "")
+
+
+set(HELP_CPU_BASELINE "Specify list of enabled baseline CPU optimizations")
+set(HELP_CPU_BASELINE_REQUIRE "Specify list of required baseline CPU optimizations")
+set(HELP_CPU_BASELINE_DISABLE "Specify list of forbidden baseline CPU optimizations")
+set(HELP_CPU_DISPATCH "Specify list of dispatched CPU optimizations")
+set(HELP_CPU_DISPATCH_REQUIRE "Specify list of required dispatched CPU optimizations")
+
+foreach(var CPU_BASELINE CPU_BASELINE_REQUIRE CPU_BASELINE_DISABLE CPU_DISPATCH CPU_DISPATCH_REQUIRE)
+ if(DEFINED ${var})
+ string(REPLACE "," ";" _list "${${var}}")
+ set(${var} "${_list}" CACHE STRING "${HELP_${var}}" FORCE)
+ endif()
+endforeach()
+
+# process legacy flags
+macro(ocv_optimization_process_obsolete_option legacy_flag OPT legacy_warn)
+ if(DEFINED ${legacy_flag})
+ if(${legacy_warn})
+ message(STATUS "WARNING: Option ${legacy_flag}='${${legacy_flag}}' is deprecated and should not be used anymore")
+ message(STATUS " Behaviour of this option is not backward compatible")
+ message(STATUS " Refer to 'CPU_BASELINE'/'CPU_DISPATCH' CMake options documentation")
+ endif()
+ if(${legacy_flag})
+ if(NOT ";${CPU_BASELINE_REQUIRE};" MATCHES ";${OPT};")
+ set(CPU_BASELINE_REQUIRE "${CPU_BASELINE_REQUIRE};${OPT}" CACHE STRING "${HELP_CPU_BASELINE_REQUIRE}" FORCE)
+ endif()
+ else()
+ if(NOT ";${CPU_BASELINE_DISABLE};" MATCHES ";${OPT};")
+ set(CPU_BASELINE_DISABLE "${CPU_BASELINE_DISABLE};${OPT}" CACHE STRING "${HELP_CPU_BASELINE_DISABLE}" FORCE)
+ endif()
+ endif()
+ endif()
+endmacro()
+ocv_optimization_process_obsolete_option(ENABLE_SSE SSE ON)
+ocv_optimization_process_obsolete_option(ENABLE_SSE2 SSE2 ON)
+ocv_optimization_process_obsolete_option(ENABLE_SSE3 SSE3 ON)
+ocv_optimization_process_obsolete_option(ENABLE_SSSE3 SSSE3 ON)
+ocv_optimization_process_obsolete_option(ENABLE_SSE41 SSE4_1 ON)
+ocv_optimization_process_obsolete_option(ENABLE_SSE42 SSE4_2 ON)
+ocv_optimization_process_obsolete_option(ENABLE_POPCNT POPCNT ON)
+ocv_optimization_process_obsolete_option(ENABLE_AVX AVX ON)
+ocv_optimization_process_obsolete_option(ENABLE_AVX2 AVX2 ON)
+ocv_optimization_process_obsolete_option(ENABLE_FMA3 FMA3 ON)
+
+ocv_optimization_process_obsolete_option(ENABLE_VFPV3 VFPV3 OFF)
+ocv_optimization_process_obsolete_option(ENABLE_NEON NEON OFF)
+
+
+macro(ocv_is_optimization_in_list resultvar check_opt)
+ set(__checked "")
+ set(__queue ${ARGN})
+ set(${resultvar} 0)
+ while(__queue AND NOT ${resultvar})
+ list(REMOVE_DUPLICATES __queue)
+ set(__queue_current ${__queue})
+ set(__queue "")
+ foreach(OPT ${__queue_current})
+ if("x${OPT}" STREQUAL "x${check_opt}")
+ set(${resultvar} 1)
+ break()
+ elseif(NOT ";${__checked};" MATCHES ";${OPT};")
+ list(APPEND __queue ${CPU_${OPT}_IMPLIES})
+ endif()
+ list(APPEND __checked ${OPT})
+ endforeach()
+ endwhile()
+endmacro()
+
+macro(ocv_is_optimization_in_force_list resultvar check_opt)
+ set(__checked "")
+ set(__queue ${ARGN})
+ set(${resultvar} 0)
+ while(__queue AND NOT ${resultvar})
+ list(REMOVE_DUPLICATES __queue)
+ set(__queue_current ${__queue})
+ set(__queue "")
+ foreach(OPT ${__queue_current})
+ if(OPT STREQUAL "${check_opt}")
+ set(${resultvar} 1)
+ break()
+ elseif(NOT ";${__checked};" MATCHES ";${OPT};")
+ list(APPEND __queue ${CPU_${OPT}_FORCE})
+ endif()
+ list(APPEND __checked ${OPT})
+ endforeach()
+ endwhile()
+endmacro()
+
+macro(ocv_append_optimization_flag var OPT)
+ if(CPU_${OPT}_FLAGS_CONFLICT)
+ string(REGEX REPLACE " ${CPU_${OPT}_FLAGS_CONFLICT}" "" ${var} " ${${var}}")
+ string(REGEX REPLACE "^ +" "" ${var} "${${var}}")
+ endif()
+ set(${var} "${${var}} ${CPU_${OPT}_FLAGS_ON}")
+endmacro()
+
+# Support GCC -march=native or Intel Compiler -xHost flags
+if(";${CPU_BASELINE};" MATCHES ";NATIVE;" OR ";${CPU_BASELINE};" MATCHES ";HOST;")
+ set(CPU_BASELINE_DETECT ON)
+ set(_add_native_flag ON)
+elseif(";${CPU_BASELINE};" MATCHES ";DETECT;")
+ set(CPU_BASELINE_DETECT ON)
+elseif(" ${CMAKE_CXX_FLAGS} " MATCHES " -march=native | -xHost | /QxHost ")
+ if(DEFINED CPU_BASELINE)
+ message(STATUS "CPU: Detected '-march=native' or '-xHost' compiler flag. Force CPU_BASELINE=DETECT.")
+ endif()
+ set(CPU_BASELINE "DETECT" CACHE STRING "${HELP_CPU_BASELINE}")
+ set(CPU_BASELINE_DETECT ON)
+endif()
+
+if(X86 OR X86_64)
+ ocv_update(CPU_KNOWN_OPTIMIZATIONS "SSE;SSE2;SSE3;SSSE3;SSE4_1;POPCNT;SSE4_2;FP16;FMA3;AVX;AVX2;AVX512")
+
+ ocv_update(CPU_SSE_TEST_FILE "${OpenCV_SOURCE_DIR}/cmake/checks/cpu_sse.cpp")
+ ocv_update(CPU_SSE2_TEST_FILE "${OpenCV_SOURCE_DIR}/cmake/checks/cpu_sse2.cpp")
+ ocv_update(CPU_SSE3_TEST_FILE "${OpenCV_SOURCE_DIR}/cmake/checks/cpu_sse3.cpp")
+ ocv_update(CPU_SSSE3_TEST_FILE "${OpenCV_SOURCE_DIR}/cmake/checks/cpu_ssse3.cpp")
+ ocv_update(CPU_SSE4_1_TEST_FILE "${OpenCV_SOURCE_DIR}/cmake/checks/cpu_sse41.cpp")
+ ocv_update(CPU_SSE4_2_TEST_FILE "${OpenCV_SOURCE_DIR}/cmake/checks/cpu_sse42.cpp")
+ ocv_update(CPU_POPCNT_TEST_FILE "${OpenCV_SOURCE_DIR}/cmake/checks/cpu_popcnt.cpp")
+ ocv_update(CPU_AVX_TEST_FILE "${OpenCV_SOURCE_DIR}/cmake/checks/cpu_avx.cpp")
+ ocv_update(CPU_AVX2_TEST_FILE "${OpenCV_SOURCE_DIR}/cmake/checks/cpu_avx2.cpp")
+ ocv_update(CPU_FP16_TEST_FILE "${OpenCV_SOURCE_DIR}/cmake/checks/cpu_fp16.cpp")
+ ocv_update(CPU_AVX512_TEST_FILE "${OpenCV_SOURCE_DIR}/cmake/checks/cpu_avx512.cpp")
+
+ if(NOT OPENCV_CPU_OPT_IMPLIES_IGNORE)
+ ocv_update(CPU_AVX512_IMPLIES "AVX2")
+ ocv_update(CPU_AVX512_FORCE "") # Don't force other optimizations
+ ocv_update(CPU_AVX2_IMPLIES "AVX;FMA3;FP16")
+ ocv_update(CPU_FMA3_IMPLIES "AVX2")
+ ocv_update(CPU_FMA3_FORCE "") # Don't force other optimizations
+ ocv_update(CPU_FP16_IMPLIES "AVX")
+ ocv_update(CPU_FP16_FORCE "") # Don't force other optimizations
+ ocv_update(CPU_AVX_IMPLIES "SSE4_2")
+ ocv_update(CPU_SSE4_2_IMPLIES "SSE4_1;POPCNT")
+ ocv_update(CPU_POPCNT_IMPLIES "SSE4_1")
+ ocv_update(CPU_POPCNT_FORCE "") # Don't force other optimizations
+ ocv_update(CPU_SSE4_1_IMPLIES "SSE3;SSSE3")
+ ocv_update(CPU_SSSE3_IMPLIES "SSE3")
+ ocv_update(CPU_SSE3_IMPLIES "SSE2")
+ ocv_update(CPU_SSE2_IMPLIES "SSE")
+ endif()
+
+ if(CV_ICC)
+ macro(ocv_intel_compiler_optimization_option name unix_flags msvc_flags)
+ ocv_update(CPU_${name}_FLAGS_NAME "${name}")
+ if(MSVC)
+ set(enable_flags "${msvc_flags}")
+ set(flags_conflict "/arch:[^ ]+")
+ else()
+ set(enable_flags "${unix_flags}")
+ set(flags_conflict "-msse[^ ]*|-mssse3|-mavx[^ ]*|-march[^ ]+")
+ endif()
+ ocv_update(CPU_${name}_FLAGS_ON "${enable_flags}")
+ if(flags_conflict)
+ ocv_update(CPU_${name}_FLAGS_CONFLICT "${flags_conflict}")
+ endif()
+ endmacro()
+ ocv_intel_compiler_optimization_option(AVX2 "-march=core-avx2" "/arch:CORE-AVX2")
+ ocv_intel_compiler_optimization_option(FP16 "-mavx" "/arch:AVX")
+ ocv_intel_compiler_optimization_option(AVX "-mavx" "/arch:AVX")
+ ocv_intel_compiler_optimization_option(FMA3 "" "")
+ ocv_intel_compiler_optimization_option(POPCNT "" "")
+ ocv_intel_compiler_optimization_option(SSE4_2 "-msse4.2" "/arch:SSE4.2")
+ ocv_intel_compiler_optimization_option(SSE4_1 "-msse4.1" "/arch:SSE4.1")
+ ocv_intel_compiler_optimization_option(SSE3 "-msse3" "/arch:SSE3")
+ ocv_intel_compiler_optimization_option(SSSE3 "-mssse3" "/arch:SSSE3")
+ ocv_intel_compiler_optimization_option(SSE2 "-msse2" "/arch:SSE2")
+ if(NOT X86_64) # x64 compiler doesn't support /arch:sse
+ ocv_intel_compiler_optimization_option(SSE "-msse" "/arch:SSE")
+ endif()
+ #ocv_intel_compiler_optimization_option(AVX512 "-march=core-avx512")
+ elseif(CMAKE_COMPILER_IS_GNUCXX)
+ ocv_update(CPU_AVX2_FLAGS_ON "-mavx2")
+ ocv_update(CPU_FP16_FLAGS_ON "-mf16c")
+ ocv_update(CPU_AVX_FLAGS_ON "-mavx")
+ ocv_update(CPU_FMA3_FLAGS_ON "-mfma")
+ ocv_update(CPU_POPCNT_FLAGS_ON "-mpopcnt")
+ ocv_update(CPU_SSE4_2_FLAGS_ON "-msse4.2")
+ ocv_update(CPU_SSE4_1_FLAGS_ON "-msse4.1")
+ ocv_update(CPU_SSE3_FLAGS_ON "-msse3")
+ ocv_update(CPU_SSSE3_FLAGS_ON "-mssse3")
+ ocv_update(CPU_SSE2_FLAGS_ON "-msse2")
+ ocv_update(CPU_SSE_FLAGS_ON "-msse")
+ if(NOT CMAKE_CXX_COMPILER_VERSION VERSION_LESS "5.0")
+ ocv_update(CPU_AVX512_FLAGS_ON "-mavx512f -mavx512pf -mavx512er -mavx512cd -mavx512vl -mavx512bw -mavx512dq -mavx512ifma -mavx512vbmi")
+ endif()
+ elseif(MSVC)
+ ocv_update(CPU_AVX2_FLAGS_ON "/arch:AVX2")
+ ocv_update(CPU_AVX_FLAGS_ON "/arch:AVX")
+ if(NOT MSVC64)
+ # 64-bit MSVC compiler uses SSE/SSE2 by default
+ ocv_update(CPU_SSE_FLAGS_ON "/arch:SSE")
+ ocv_update(CPU_SSE_SUPPORTED ON)
+ ocv_update(CPU_SSE2_FLAGS_ON "/arch:SSE2")
+ ocv_update(CPU_SSE2_SUPPORTED ON)
+ else()
+ ocv_update(CPU_SSE_SUPPORTED ON)
+ ocv_update(CPU_SSE2_SUPPORTED ON)
+ endif()
+ # Other instruction sets are supported by default since MSVC 2008 at least
+ else()
+ message(WARNING "TODO: Unsupported compiler")
+ endif()
+
+ if(NOT DEFINED CPU_DISPATCH)
+ set(CPU_DISPATCH "SSE4_1;AVX;FP16;AVX2" CACHE STRING "${HELP_CPU_DISPATCH}")
+ endif()
+
+ if(NOT DEFINED CPU_BASELINE)
+ if(X86_64)
+ set(CPU_BASELINE "SSSE3" CACHE STRING "${HELP_CPU_BASELINE}")
+ else()
+ set(CPU_BASELINE "SSE2" CACHE STRING "${HELP_CPU_BASELINE}")
+ endif()
+ endif()
+
+elseif(ARM OR AARCH64)
+ ocv_update(CPU_FP16_TEST_FILE "${OpenCV_SOURCE_DIR}/cmake/checks/cpu_fp16.cpp")
+ if(NOT AARCH64)
+ ocv_update(CPU_KNOWN_OPTIMIZATIONS "VFPV3;NEON;FP16")
+ ocv_update(CPU_NEON_FLAGS_ON "-mfpu=neon")
+ ocv_update(CPU_VFPV3_FLAGS_ON "-mfpu=vfpv3")
+ ocv_update(CPU_FP16_FLAGS_ON "-mfpu=neon-fp16")
+ set(CPU_BASELINE "DETECT" CACHE STRING "${HELP_CPU_BASELINE}")
+ else()
+ ocv_update(CPU_KNOWN_OPTIMIZATIONS "NEON;FP16")
+ ocv_update(CPU_NEON_FLAGS_ON "")
+ set(CPU_BASELINE "NEON" CACHE STRING "${HELP_CPU_BASELINE}")
+ endif()
+endif()
+
+# Helper values for cmake-gui
+set(CPU_BASELINE "DETECT" CACHE STRING "${HELP_CPU_BASELINE}")
+set(CPU_DISPATCH "" CACHE STRING "${HELP_CPU_DISPATCH}")
+set_property(CACHE CPU_BASELINE PROPERTY STRINGS "" ${CPU_KNOWN_OPTIMIZATIONS})
+set_property(CACHE CPU_DISPATCH PROPERTY STRINGS "" ${CPU_KNOWN_OPTIMIZATIONS})
+
+set(CPU_BASELINE_FLAGS "")
+
+set(CPU_BASELINE_FINAL "")
+set(CPU_DISPATCH_FINAL "")
+
+macro(ocv_check_compiler_optimization OPT)
+ if(NOT DEFINED CPU_${OPT}_SUPPORTED)
+ if((DEFINED CPU_${OPT}_FLAGS_ON AND NOT "x${CPU_${OPT}_FLAGS_ON}" STREQUAL "x") OR CPU_${OPT}_TEST_FILE)
+ set(_varname "")
+ if(CPU_${OPT}_TEST_FILE)
+ set(__available 0)
+ if(CPU_BASELINE_DETECT)
+ set(_varname "HAVE_CPU_${OPT}_SUPPORT")
+ ocv_check_compiler_flag(CXX "${CPU_BASELINE_FLAGS}" "${_varname}" "${CPU_${OPT}_TEST_FILE}")
+ if(${_varname})
+ list(APPEND CPU_BASELINE_FINAL ${OPT})
+ set(__available 1)
+ endif()
+ endif()
+ if(NOT __available)
+ if(NOT "x${CPU_${OPT}_FLAGS_NAME}" STREQUAL "x")
+ set(_varname "HAVE_CPU_${CPU_${OPT}_FLAGS_NAME}")
+ set(_compile_flags "${CPU_BASELINE_FLAGS}")
+ ocv_append_optimization_flag(_compile_flags ${OPT})
+ ocv_check_compiler_flag(CXX "${_compile_flags}" "${_varname}" "${CPU_${OPT}_TEST_FILE}")
+ elseif(NOT "x${CPU_${OPT}_FLAGS_ON}" STREQUAL "x")
+ ocv_check_flag_support(CXX "${CPU_${OPT}_FLAGS_ON}" _varname "" "${CPU_${OPT}_TEST_FILE}")
+ else()
+ set(_varname "HAVE_CPU_${OPT}_SUPPORT")
+ set(_compile_flags "${CPU_BASELINE_FLAGS}")
+ ocv_append_optimization_flag(_compile_flags ${OPT})
+ ocv_check_compiler_flag(CXX "${_compile_flags}" "${_varname}" "${CPU_${OPT}_TEST_FILE}")
+ endif()
+ endif()
+ else()
+ ocv_check_flag_support(CXX "${CPU_${OPT}_FLAGS_ON}" _varname "")
+ endif()
+ if(_varname AND ${_varname})
+ set(CPU_${OPT}_SUPPORTED ON)
+ elseif(NOT CPU_${OPT}_SUPPORTED)
+ message(STATUS "${OPT} is not supported by C++ compiler")
+ endif()
+ else()
+ set(CPU_${OPT}_SUPPORTED ON)
+ endif()
+ endif()
+endmacro()
+
+foreach(OPT ${CPU_KNOWN_OPTIMIZATIONS})
+ set(CPU_${OPT}_USAGE_COUNT 0 CACHE INTERNAL "" FORCE)
+ if(NOT DEFINED CPU_${OPT}_FORCE)
+ set(CPU_${OPT}_FORCE "${CPU_${OPT}_IMPLIES}")
+ endif()
+endforeach()
+
+if(_add_native_flag)
+ set(_varname "HAVE_CPU_NATIVE_SUPPORT")
+ ocv_check_compiler_flag(CXX "-march=native" "${_varname}" "")
+ if(_varname)
+ set(CPU_BASELINE_FLAGS "${CPU_BASELINE_FLAGS} -march=native")
+ else()
+ set(_varname "HAVE_CPU_HOST_SUPPORT")
+ if(MSVC)
+ set(_flag "/QxHost")
+ else()
+ set(_flag "-xHost")
+ endif()
+ ocv_check_compiler_flag(CXX "${_flag}" "${_varname}" "")
+ if(_varname)
+ set(CPU_BASELINE_FLAGS "${CPU_BASELINE_FLAGS} ${flag}")
+ endif()
+ endif()
+endif()
+
+foreach(OPT ${CPU_KNOWN_OPTIMIZATIONS})
+ set(__is_disabled 0)
+ foreach(OPT2 ${CPU_BASELINE_DISABLE})
+ ocv_is_optimization_in_list(__is_disabled ${OPT2} ${OPT})
+ if(__is_disabled)
+ break()
+ endif()
+ endforeach()
+ if(__is_disabled)
+ set(__is_from_baseline 0)
+ else()
+ ocv_is_optimization_in_list(__is_from_baseline ${OPT} ${CPU_BASELINE_REQUIRE})
+ if(NOT __is_from_baseline)
+ ocv_is_optimization_in_list(__is_from_baseline ${OPT} ${CPU_BASELINE})
+ endif()
+ endif()
+ ocv_is_optimization_in_list(__is_from_dispatch ${OPT} ${CPU_DISPATCH_REQUIRE})
+ if(NOT __is_from_dispatch)
+ ocv_is_optimization_in_list(__is_from_dispatch ${OPT} ${CPU_DISPATCH})
+ endif()
+ if(__is_from_dispatch OR __is_from_baseline OR CPU_BASELINE_DETECT)
+ ocv_check_compiler_optimization(${OPT})
+ endif()
+ if(CPU_BASELINE_DETECT AND NOT __is_from_baseline AND NOT __is_disabled)
+ ocv_is_optimization_in_list(__is_from_baseline ${OPT} ${CPU_BASELINE_FINAL})
+ endif()
+ if(CPU_${OPT}_SUPPORTED)
+ if(";${CPU_DISPATCH};" MATCHES ";${OPT};" AND NOT __is_from_baseline)
+ list(APPEND CPU_DISPATCH_FINAL ${OPT})
+ elseif(__is_from_baseline AND NOT CPU_BASELINE_DETECT)
+ list(APPEND CPU_BASELINE_FINAL ${OPT})
+ ocv_append_optimization_flag(CPU_BASELINE_FLAGS ${OPT})
+ endif()
+ endif()
+endforeach()
+
+foreach(OPT ${CPU_BASELINE_REQUIRE})
+ if(NOT ";${CPU_BASELINE_FINAL};" MATCHES ";${OPT};")
+ message(SEND_ERROR "Required baseline optimization is not supported: ${OPT} (CPU_BASELINE_REQUIRE=${CPU_BASELINE_REQUIRE})")
+ endif()
+endforeach()
+
+foreach(OPT ${CPU_BASELINE})
+ if(OPT STREQUAL "DETECT" OR OPT STREQUAL "HOST" OR OPT STREQUAL "NATIVE")
+ # nothing
+ elseif(NOT ";${CPU_BASELINE_FINAL};" MATCHES ";${OPT};")
+ message(STATUS "Optimization ${OPT} is not available, skipped")
+ endif()
+endforeach()
+
+foreach(OPT ${CPU_DISPATCH_REQUIRE})
+ if(";${CPU_DISPATCH_FINAL};" MATCHES ";${OPT};")
+ # OK
+ elseif(";${CPU_BASELINE_FINAL};" MATCHES ";${OPT};")
+ message(SEND_ERROR "Dispatched optimization ${OPT} is in baseline list (CPU_DISPATCH_REQUIRE=${CPU_DISPATCH_REQUIRE})")
+ else()
+ message(SEND_ERROR "Required dispatch optimization is not supported: ${OPT} (CPU_DISPATCH_REQUIRE=${CPU_DISPATCH_REQUIRE})")
+ endif()
+endforeach()
+
+foreach(OPT ${CPU_DISPATCH})
+ if(";${CPU_DISPATCH_FINAL};" MATCHES ";${OPT};")
+ # OK
+ elseif(";${CPU_BASELINE_FINAL};" MATCHES ";${OPT};")
+ # OK
+ else()
+ message(STATUS "Dispatch optimization ${OPT} is not available, skipped")
+ endif()
+endforeach()
+
+#message(STATUS "CPU_BASELINE_FINAL=${CPU_BASELINE_FINAL}")
+#message(STATUS "CPU_DISPATCH_FINAL=${CPU_DISPATCH_FINAL}")
+
+#if(CPU_DISPATCH_FINAL AND NOT PYTHON_DEFAULT_EXECUTABLE)
+# message(FATAL_ERROR "Python is required for CPU dispatched optimization support")
+#endif()
+
+macro(ocv_compiler_optimization_options)
+ set(__flags "${OPENCV_EXTRA_CXX_FLAGS} ${CPU_BASELINE_FLAGS}")
+ if(NOT __flags STREQUAL CACHED_CPU_BASELINE_FLAGS)
+ set(CACHED_CPU_BASELINE_FLAGS "${__flags}" CACHE INTERNAL "" FORCE)
+ ocv_clear_vars(HAVE_CPU_BASELINE_FLAGS)
+ endif()
+ ocv_check_compiler_flag(CXX "${__flags}" HAVE_CPU_BASELINE_FLAGS)
+ if(NOT HAVE_CPU_BASELINE_FLAGS)
+ message(FATAL_ERROR "Compiler doesn't support baseline optimization flags: ${CPU_BASELINE_FLAGS}")
+ endif()
+ add_extra_compiler_option_force("${CPU_BASELINE_FLAGS}")
+
+ foreach(OPT ${CPU_DISPATCH_FINAL})
+ set(__dispatch_flags "")
+ set(__dispatch_definitions "")
+ set(__dispatch_opts "")
+ set(__dispatch_opts_force "")
+ foreach(OPT2 ${CPU_KNOWN_OPTIMIZATIONS})
+ if(NOT CPU_${OPT2}_SUPPORTED)
+ #continue()
+ else()
+ ocv_is_optimization_in_list(__is_from_baseline ${OPT2} ${CPU_BASELINE_FINAL})
+ if(NOT __is_from_baseline)
+ ocv_is_optimization_in_list(__is_active ${OPT2} ${OPT})
+ if(__is_active)
+ ocv_append_optimization_flag(__dispatch_flags ${OPT2})
+ list(APPEND __dispatch_definitions "CV_CPU_COMPILE_${OPT2}=1")
+ list(APPEND __dispatch_opts "${OPT2}")
+ endif()
+ ocv_is_optimization_in_force_list(__is_force ${OPT2} ${OPT})
+ if(__is_force)
+ list(APPEND __dispatch_opts_force "${OPT2}")
+ endif()
+ endif()
+ endif()
+ endforeach()
+ set(__flags "${OPENCV_EXTRA_CXX_FLAGS} ${__dispatch_flags}")
+ if(NOT __flags STREQUAL CACHED_CPU_DISPATCH_${OPT}_FLAGS)
+ set(CACHED_CPU_DISPATCH_${OPT}_FLAGS "${__flags}" CACHE INTERNAL "" FORCE)
+ ocv_clear_vars(HAVE_CPU_DISPATCH_FLAGS_${OPT})
+ endif()
+ ocv_check_compiler_flag(CXX "${__flags}" HAVE_CPU_DISPATCH_FLAGS_${OPT})
+ if(NOT HAVE_CPU_DISPATCH_FLAGS_${OPT})
+ message(FATAL_ERROR "Compiler doesn't support optimization flags for ${OPT} dispatch mode: ${__dispatch_flags}")
+ endif()
+ set(CPU_DISPATCH_FLAGS_${OPT} "${__dispatch_flags}")
+ set(CPU_DISPATCH_DEFINITIONS_${OPT} "${__dispatch_definitions}")
+ set(CPU_DISPATCH_${OPT}_INCLUDED "${__dispatch_opts}")
+ set(CPU_DISPATCH_${OPT}_FORCED "${__dispatch_opts_force}")
+ endforeach()
+
+ if(ENABLE_POWERPC)
+ add_extra_compiler_option("-mcpu=G3 -mtune=G5")
+ endif()
+ if(ARM)
+ add_extra_compiler_option("-mfp16-format=ieee")
+ endif(ARM)
+ if(ENABLE_NEON)
+ add_extra_compiler_option("-mfpu=neon")
+ endif()
+ if(ENABLE_VFPV3 AND NOT ENABLE_NEON)
+ add_extra_compiler_option("-mfpu=vfpv3")
+ endif()
+endmacro()
+
+macro(ocv_compiler_optimization_options_finalize)
+ if(CMAKE_COMPILER_IS_GNUCXX AND (X86 OR X86_64))
+ if(NOT APPLE AND CMAKE_SIZEOF_VOID_P EQUAL 4)
+ if(OPENCV_EXTRA_CXX_FLAGS MATCHES "-m(sse2|avx)")
+ add_extra_compiler_option(-mfpmath=sse) # !! important - be on the same wave with x64 compilers
+ else()
+ add_extra_compiler_option(-mfpmath=387)
+ endif()
+ endif()
+ endif()
+
+ if(MSVC)
+ # Generate Intrinsic Functions
+ set(OPENCV_EXTRA_FLAGS "${OPENCV_EXTRA_FLAGS} /Oi")
+
+ if((X86 OR X86_64) AND CMAKE_SIZEOF_VOID_P EQUAL 4 AND ";${CPU_BASELINE_FINAL};" MATCHES ";SSE;")
+ set(OPENCV_EXTRA_FLAGS "${OPENCV_EXTRA_FLAGS} /fp:fast") # !! important - be on the same wave with x64 compilers
+ endif()
+ endif(MSVC)
+endmacro()
+
+macro(ocv_compiler_optimization_process_sources SOURCES_VAR_NAME LIBS_VAR_NAME TARGET_BASE_NAME)
+ set(__result "")
+ set(__result_libs "")
+ foreach(OPT ${CPU_DISPATCH_FINAL})
+ set(__result_${OPT} "")
+ endforeach()
+ foreach(fname ${${SOURCES_VAR_NAME}})
+ string(TOLOWER "${fname}" fname_LOWER)
+ if(fname_LOWER MATCHES "[.]opt_.*[.]cpp$")
+ if(CV_DISABLE_OPTIMIZATION OR NOT CV_ENABLE_INTRINSICS)
+ message(STATUS "Excluding from source files list: ${fname}")
+ #continue()
+ else()
+ set(__opt_found 0)
+ foreach(OPT ${CPU_BASELINE_FINAL})
+ string(TOLOWER "${OPT}" OPT_LOWER)
+ if(fname_LOWER MATCHES "_${OPT_LOWER}[.]cpp$")
+#message("${fname} BASELINE-${OPT}")
+ set(__opt_found 1)
+ list(APPEND __result "${fname}")
+ break()
+ endif()
+ endforeach()
+ foreach(OPT ${CPU_DISPATCH_FINAL})
+ foreach(OPT2 ${CPU_DISPATCH_${OPT}_FORCED})
+ string(TOLOWER "${OPT2}" OPT2_LOWER)
+ if(fname_LOWER MATCHES "_${OPT2_LOWER}[.]cpp$")
+ list(APPEND __result_${OPT} "${fname}")
+ math(EXPR CPU_${OPT}_USAGE_COUNT "${CPU_${OPT}_USAGE_COUNT}+1")
+ set(CPU_${OPT}_USAGE_COUNT "${CPU_${OPT}_USAGE_COUNT}" CACHE INTERNAL "" FORCE)
+#message("${fname} ${OPT}")
+#message(" ${CPU_DISPATCH_${OPT}_INCLUDED}")
+#message(" ${CPU_DISPATCH_DEFINITIONS_${OPT}}")
+#message(" ${CPU_DISPATCH_FLAGS_${OPT}}")
+ set(__opt_found 1)
+ break()
+ endif()
+ endforeach()
+ if(__opt_found)
+ set(__opt_found 1)
+ break()
+ endif()
+ endforeach()
+ if(NOT __opt_found)
+ message(STATUS "Excluding from source files list: ${fname}")
+ endif()
+ endif()
+ else()
+ list(APPEND __result "${fname}")
+ endif()
+ endforeach()
+
+ foreach(OPT ${CPU_DISPATCH_FINAL})
+ if(__result_${OPT})
+#message("${OPT}: ${__result_${OPT}}")
+ if(CMAKE_GENERATOR MATCHES "^Visual")
+ # extra flags are added before common flags, so switching between optimizations doesn't work correctly
+ # Also CMAKE_CXX_FLAGS doesn't work (it is directory-based, so add_subdirectory is required)
+ add_library(${TARGET_BASE_NAME}_${OPT} OBJECT ${__result_${OPT}})
+ ocv_append_dependant_targets(${TARGET_BASE_NAME} ${TARGET_BASE_NAME}_${OPT})
+ set_target_properties(${TARGET_BASE_NAME}_${OPT} PROPERTIES COMPILE_DEFINITIONS "${CPU_DISPATCH_DEFINITIONS_${OPT}}")
+ set_target_properties(${TARGET_BASE_NAME}_${OPT} PROPERTIES COMPILE_FLAGS "${CPU_DISPATCH_FLAGS_${OPT}}")
+ #list(APPEND __result_libs ${TARGET_BASE_NAME}_${OPT})
+ list(APPEND __result "$<TARGET_OBJECTS:${TARGET_BASE_NAME}_${OPT}>")
+ else()
+ foreach(fname ${__result_${OPT}})
+ set_source_files_properties("${fname}" PROPERTIES COMPILE_DEFINITIONS "${CPU_DISPATCH_DEFINITIONS_${OPT}}")
+ set_source_files_properties("${fname}" PROPERTIES COMPILE_FLAGS "${CPU_DISPATCH_FLAGS_${OPT}}")
+ endforeach()
+ list(APPEND __result ${__result_${OPT}})
+ endif()
+ endif()
+ endforeach()
+ set(${SOURCES_VAR_NAME} "${__result}")
+ list(APPEND ${LIBS_VAR_NAME} ${__result_libs})
+endmacro()
+
+macro(ocv_compiler_optimization_fill_cpu_config)
+ set(OPENCV_CPU_BASELINE_DEFINITIONS_CONFIGMAKE "")
+ foreach(OPT ${CPU_BASELINE_FINAL})
+ set(OPENCV_CPU_BASELINE_DEFINITIONS_CONFIGMAKE "${OPENCV_CPU_BASELINE_DEFINITIONS_CONFIGMAKE}
+#define CV_CPU_COMPILE_${OPT} 1
+#define CV_CPU_BASELINE_COMPILE_${OPT} 1
+")
+ endforeach()
+
+ set(OPENCV_CPU_BASELINE_DEFINITIONS_CONFIGMAKE "${OPENCV_CPU_BASELINE_DEFINITIONS_CONFIGMAKE}
+#define CV_CPU_BASELINE_FEATURES 0 \\")
+ foreach(OPT ${CPU_BASELINE_FINAL})
+ if(NOT DEFINED CPU_${OPT}_FEATURE_ALIAS OR NOT "x${CPU_${OPT}_FEATURE_ALIAS}" STREQUAL "x")
+ set(OPENCV_CPU_BASELINE_DEFINITIONS_CONFIGMAKE "${OPENCV_CPU_BASELINE_DEFINITIONS_CONFIGMAKE}
+ , CV_CPU_${OPT} \\")
+ endif()
+ endforeach()
+ set(OPENCV_CPU_BASELINE_DEFINITIONS_CONFIGMAKE "${OPENCV_CPU_BASELINE_DEFINITIONS_CONFIGMAKE}\n")
+
+ set(__dispatch_modes "")
+ foreach(OPT ${CPU_DISPATCH_FINAL})
+ list(APPEND __dispatch_modes ${CPU_DISPATCH_${OPT}_FORCE} ${OPT})
+ endforeach()
+ list(REMOVE_DUPLICATES __dispatch_modes)
+ set(OPENCV_CPU_DISPATCH_DEFINITIONS_CONFIGMAKE "")
+ foreach(OPT ${__dispatch_modes})
+ set(OPENCV_CPU_DISPATCH_DEFINITIONS_CONFIGMAKE "${OPENCV_CPU_DISPATCH_DEFINITIONS_CONFIGMAKE}
+#define CV_CPU_DISPATCH_COMPILE_${OPT} 1")
+ endforeach()
+
+ set(OPENCV_CPU_CONTROL_DEFINITIONS_CONFIGMAKE "// AUTOGENERATED, DO NOT EDIT\n")
+ foreach(OPT ${CPU_ALL_OPTIMIZATIONS})
+ if(NOT DEFINED CPU_${OPT}_FEATURE_ALIAS OR NOT "x${CPU_${OPT}_FEATURE_ALIAS}" STREQUAL "x")
+ set(OPENCV_CPU_CONTROL_DEFINITIONS_CONFIGMAKE "${OPENCV_CPU_CONTROL_DEFINITIONS_CONFIGMAKE}
+#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_${OPT}
+# define CV_CPU_HAS_SUPPORT_${OPT} 1
+# define CV_CPU_CALL_${OPT}(...) return __VA_ARGS__
+#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_${OPT}
+# define CV_CPU_HAS_SUPPORT_${OPT} (cv::checkHardwareSupport(CV_CPU_${OPT}))
+# define CV_CPU_CALL_${OPT}(...) if (CV_CPU_HAS_SUPPORT_${OPT}) return __VA_ARGS__
+#else
+# define CV_CPU_HAS_SUPPORT_${OPT} 0
+# define CV_CPU_CALL_${OPT}(...)
+#endif
+")
+ endif()
+ endforeach()
+
+ set(__file "${CMAKE_SOURCE_DIR}/modules/core/include/opencv2/core/cv_cpu_helper.h")
+ if(EXISTS "${__file}")
+ file(READ "${__file}" __content)
+ endif()
+ if(__content STREQUAL OPENCV_CPU_CONTROL_DEFINITIONS_CONFIGMAKE)
+ #message(STATUS "${__file} contains same content")
+ else()
+ file(WRITE "${__file}" "${OPENCV_CPU_CONTROL_DEFINITIONS_CONFIGMAKE}")
+ message(WARNING "${__file} is updated")
+ endif()
+endmacro()
+
+if(CV_DISABLE_OPTIMIZATION OR CV_ICC)
+ ocv_update(CV_ENABLE_UNROLLED 0)
+else()
+ ocv_update(CV_ENABLE_UNROLLED 1)
+endif()
diff --git a/cmake/OpenCVCompilerOptions.cmake b/cmake/OpenCVCompilerOptions.cmake
index 5bb0479..0eb68b6 100644
--- a/cmake/OpenCVCompilerOptions.cmake
+++ b/cmake/OpenCVCompilerOptions.cmake
@@ -31,24 +31,21 @@ endif()
if(MINGW OR (X86 AND UNIX AND NOT APPLE))
# mingw compiler is known to produce unstable SSE code with -O3 hence we are trying to use -O2 instead
if(CMAKE_COMPILER_IS_GNUCXX)
- foreach(flags CMAKE_CXX_FLAGS CMAKE_CXX_FLAGS_RELEASE CMAKE_CXX_FLAGS_DEBUG)
- string(REPLACE "-O3" "-O2" ${flags} "${${flags}}")
- endforeach()
- endif()
-
- if(CMAKE_COMPILER_IS_GNUCC)
- foreach(flags CMAKE_C_FLAGS CMAKE_C_FLAGS_RELEASE CMAKE_C_FLAGS_DEBUG)
+ foreach(flags
+ CMAKE_CXX_FLAGS CMAKE_CXX_FLAGS_RELEASE CMAKE_CXX_FLAGS_DEBUG
+ CMAKE_C_FLAGS CMAKE_C_FLAGS_RELEASE CMAKE_C_FLAGS_DEBUG)
string(REPLACE "-O3" "-O2" ${flags} "${${flags}}")
endforeach()
endif()
endif()
if(MSVC)
- string(REGEX REPLACE "^ *| * $" "" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}")
- string(REGEX REPLACE "^ *| * $" "" CMAKE_CXX_FLAGS_INIT "${CMAKE_CXX_FLAGS_INIT}")
+ string(STRIP "${CMAKE_CXX_FLAGS}" CMAKE_CXX_FLAGS)
+ string(STRIP "${CMAKE_CXX_FLAGS_INIT}" CMAKE_CXX_FLAGS_INIT)
if(CMAKE_CXX_FLAGS STREQUAL CMAKE_CXX_FLAGS_INIT)
# override cmake default exception handling option
- string(REPLACE "/EHsc" "/EHa" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}")
+ string(REPLACE "/EHsc" "" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}")
+ set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /EHa")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}" CACHE STRING "Flags used by the compiler during all build types." FORCE)
endif()
endif()
@@ -63,9 +60,6 @@ set(OPENCV_EXTRA_EXE_LINKER_FLAGS_RELEASE "")
set(OPENCV_EXTRA_EXE_LINKER_FLAGS_DEBUG "")
macro(add_extra_compiler_option option)
- if(CMAKE_BUILD_TYPE)
- set(CMAKE_TRY_COMPILE_CONFIGURATION ${CMAKE_BUILD_TYPE})
- endif()
ocv_check_flag_support(CXX "${option}" _varname "${OPENCV_EXTRA_CXX_FLAGS} ${ARGN}")
if(${_varname})
set(OPENCV_EXTRA_CXX_FLAGS "${OPENCV_EXTRA_CXX_FLAGS} ${option}")
@@ -77,6 +71,12 @@ macro(add_extra_compiler_option option)
endif()
endmacro()
+macro(add_extra_compiler_option_force option)
+ set(OPENCV_EXTRA_CXX_FLAGS "${OPENCV_EXTRA_CXX_FLAGS} ${option}")
+ set(OPENCV_EXTRA_C_FLAGS "${OPENCV_EXTRA_C_FLAGS} ${option}")
+endmacro()
+
+
# Gets environment variable and puts its value to the corresponding preprocessor definition
# Useful for WINRT that has no access to environment variables
macro(add_env_definitions option)
@@ -102,7 +102,11 @@ if(MINGW)
endif()
if(CV_ICC AND NOT ENABLE_FAST_MATH)
- add_extra_compiler_option("-fp-model precise")
+ if(MSVC)
+ add_extra_compiler_option("/fp:precise")
+ else()
+ add_extra_compiler_option("-fp-model precise")
+ endif()
endif()
if(CMAKE_COMPILER_IS_GNUCXX)
@@ -141,7 +145,7 @@ if(CMAKE_COMPILER_IS_GNUCXX)
endif()
# We need pthread's
- if(UNIX AND NOT ANDROID AND NOT (APPLE AND CMAKE_COMPILER_IS_CLANGCXX))
+ if(UNIX AND NOT ANDROID AND NOT (APPLE AND CMAKE_COMPILER_IS_CLANGCXX)) # TODO
add_extra_compiler_option(-pthread)
endif()
@@ -170,83 +174,6 @@ if(CMAKE_COMPILER_IS_GNUCXX)
if(ENABLE_FAST_MATH)
add_extra_compiler_option(-ffast-math)
endif()
- if(ENABLE_POWERPC)
- add_extra_compiler_option("-mcpu=G3 -mtune=G5")
- endif()
- if(ENABLE_SSE)
- add_extra_compiler_option(-msse)
- endif()
- if(ENABLE_SSE2)
- add_extra_compiler_option(-msse2)
- elseif(X86 OR X86_64)
- add_extra_compiler_option(-mno-sse2)
- endif()
- if(ARM)
- add_extra_compiler_option("-mfp16-format=ieee")
- endif(ARM)
- if(ENABLE_NEON)
- add_extra_compiler_option("-mfpu=neon")
- endif()
- if(ENABLE_VFPV3 AND NOT ENABLE_NEON)
- add_extra_compiler_option("-mfpu=vfpv3")
- endif()
-
- # SSE3 and further should be disabled under MingW because it generates compiler errors
- if(NOT MINGW)
- if(ENABLE_AVX)
- add_extra_compiler_option(-mavx)
- elseif(X86 OR X86_64)
- add_extra_compiler_option(-mno-avx)
- endif()
- if(ENABLE_AVX2)
- add_extra_compiler_option(-mavx2)
-
- if(ENABLE_FMA3)
- add_extra_compiler_option(-mfma)
- endif()
- endif()
-
- # GCC depresses SSEx instructions when -mavx is used. Instead, it generates new AVX instructions or AVX equivalence for all SSEx instructions when needed.
- if(NOT OPENCV_EXTRA_CXX_FLAGS MATCHES "-mavx")
- if(ENABLE_SSE3)
- add_extra_compiler_option(-msse3)
- elseif(X86 OR X86_64)
- add_extra_compiler_option(-mno-sse3)
- endif()
-
- if(ENABLE_SSSE3)
- add_extra_compiler_option(-mssse3)
- elseif(X86 OR X86_64)
- add_extra_compiler_option(-mno-ssse3)
- endif()
-
- if(ENABLE_SSE41)
- add_extra_compiler_option(-msse4.1)
- elseif(X86 OR X86_64)
- add_extra_compiler_option(-mno-sse4.1)
- endif()
-
- if(ENABLE_SSE42)
- add_extra_compiler_option(-msse4.2)
- elseif(X86 OR X86_64)
- add_extra_compiler_option(-mno-sse4.2)
- endif()
-
- if(ENABLE_POPCNT)
- add_extra_compiler_option(-mpopcnt)
- endif()
- endif()
- endif(NOT MINGW)
-
- if(X86 OR X86_64)
- if(NOT APPLE AND CMAKE_SIZEOF_VOID_P EQUAL 4)
- if(OPENCV_EXTRA_CXX_FLAGS MATCHES "-m(sse2|avx)")
- add_extra_compiler_option(-mfpmath=sse)# !! important - be on the same wave with x64 compilers
- else()
- add_extra_compiler_option(-mfpmath=387)
- endif()
- endif()
- endif()
# Profiling?
if(ENABLE_PROFILING)
@@ -257,7 +184,7 @@ if(CMAKE_COMPILER_IS_GNUCXX)
string(REPLACE "-fomit-frame-pointer" "" ${flags} "${${flags}}")
string(REPLACE "-ffunction-sections" "" ${flags} "${${flags}}")
endforeach()
- elseif(NOT APPLE AND NOT ANDROID)
+ elseif(NOT ((IOS OR ANDROID) AND NOT BUILD_SHARED_LIBS))
# Remove unreferenced functions: function level linking
add_extra_compiler_option(-ffunction-sections)
endif()
@@ -296,41 +223,6 @@ if(MSVC)
set(OPENCV_EXTRA_FLAGS_RELEASE "${OPENCV_EXTRA_FLAGS_RELEASE} /Zi")
endif()
- if(ENABLE_AVX2 AND NOT MSVC_VERSION LESS 1800)
- set(OPENCV_EXTRA_FLAGS "${OPENCV_EXTRA_FLAGS} /arch:AVX2")
- endif()
- if(ENABLE_AVX AND NOT MSVC_VERSION LESS 1600 AND NOT OPENCV_EXTRA_FLAGS MATCHES "/arch:")
- set(OPENCV_EXTRA_FLAGS "${OPENCV_EXTRA_FLAGS} /arch:AVX")
- endif()
-
- if(ENABLE_SSE4_1 AND CV_ICC AND NOT OPENCV_EXTRA_FLAGS MATCHES "/arch:")
- set(OPENCV_EXTRA_FLAGS "${OPENCV_EXTRA_FLAGS} /arch:SSE4.1")
- endif()
-
- if(ENABLE_SSE3 AND CV_ICC AND NOT OPENCV_EXTRA_FLAGS MATCHES "/arch:")
- set(OPENCV_EXTRA_FLAGS "${OPENCV_EXTRA_FLAGS} /arch:SSE3")
- endif()
-
- if(NOT MSVC64)
- # 64-bit MSVC compiler uses SSE/SSE2 by default
- if(ENABLE_SSE2 AND NOT OPENCV_EXTRA_FLAGS MATCHES "/arch:")
- set(OPENCV_EXTRA_FLAGS "${OPENCV_EXTRA_FLAGS} /arch:SSE2")
- endif()
- if(ENABLE_SSE AND NOT OPENCV_EXTRA_FLAGS MATCHES "/arch:")
- set(OPENCV_EXTRA_FLAGS "${OPENCV_EXTRA_FLAGS} /arch:SSE")
- endif()
- endif()
-
- if(ENABLE_SSE OR ENABLE_SSE2 OR ENABLE_SSE3 OR ENABLE_SSE4_1 OR ENABLE_AVX OR ENABLE_AVX2)
- set(OPENCV_EXTRA_FLAGS "${OPENCV_EXTRA_FLAGS} /Oi")
- endif()
-
- if(X86 OR X86_64)
- if(CMAKE_SIZEOF_VOID_P EQUAL 4 AND ENABLE_SSE2)
- set(OPENCV_EXTRA_FLAGS "${OPENCV_EXTRA_FLAGS} /fp:fast") # !! important - be on the same wave with x64 compilers
- endif()
- endif()
-
if(OPENCV_WARNINGS_ARE_ERRORS)
set(OPENCV_EXTRA_FLAGS "${OPENCV_EXTRA_FLAGS} /WX")
endif()
@@ -353,6 +245,16 @@ if(NOT BUILD_SHARED_LIBS AND CMAKE_COMPILER_IS_GNUCXX AND NOT ANDROID)
set(OPENCV_EXTRA_FLAGS "-fPIC ${OPENCV_EXTRA_FLAGS}")
endif()
+include(cmake/OpenCVCompilerOptimizations.cmake)
+
+if(COMMAND ocv_compiler_optimization_options)
+ ocv_compiler_optimization_options()
+endif()
+
+if(COMMAND ocv_compiler_optimization_options_finalize)
+ ocv_compiler_optimization_options_finalize()
+endif()
+
# Add user supplied extra options (optimization, etc...)
# ==========================================================
set(OPENCV_EXTRA_FLAGS "${OPENCV_EXTRA_FLAGS}" CACHE INTERNAL "Extra compiler options")
@@ -370,6 +272,7 @@ if(CMAKE_COMPILER_IS_GNUCXX AND CMAKE_OPENCV_GCC_VERSION_NUM GREATER 399)
add_extra_compiler_option(-fvisibility-inlines-hidden)
endif()
+# TODO !!!!!
if(NOT OPENCV_FP16_DISABLE AND NOT IOS)
if(ARM AND ENABLE_NEON)
set(FP16_OPTION "-mfpu=neon-fp16")
@@ -378,7 +281,7 @@ if(NOT OPENCV_FP16_DISABLE AND NOT IOS)
endif()
try_compile(__VALID_FP16
"${OpenCV_BINARY_DIR}"
- "${OpenCV_SOURCE_DIR}/cmake/checks/fp16.cpp"
+ "${OpenCV_SOURCE_DIR}/cmake/checks/cpu_fp16.cpp"
COMPILE_DEFINITIONS "-DCHECK_FP16" "${FP16_OPTION}"
OUTPUT_VARIABLE TRY_OUT
)
diff --git a/cmake/OpenCVGenHeaders.cmake b/cmake/OpenCVGenHeaders.cmake
index 2988979..477b910 100644
--- a/cmake/OpenCVGenHeaders.cmake
+++ b/cmake/OpenCVGenHeaders.cmake
@@ -3,6 +3,10 @@ configure_file("${OpenCV_SOURCE_DIR}/cmake/templates/cvconfig.h.in" "${OPENCV_CO
configure_file("${OpenCV_SOURCE_DIR}/cmake/templates/cvconfig.h.in" "${OPENCV_CONFIG_FILE_INCLUDE_DIR}/opencv2/cvconfig.h")
install(FILES "${OPENCV_CONFIG_FILE_INCLUDE_DIR}/cvconfig.h" DESTINATION ${OPENCV_INCLUDE_INSTALL_PATH}/opencv2 COMPONENT dev)
+# platform-specific config file
+ocv_compiler_optimization_fill_cpu_config()
+configure_file("${OpenCV_SOURCE_DIR}/cmake/templates/cv_cpu_config.h.in" "${OPENCV_CONFIG_FILE_INCLUDE_DIR}/cv_cpu_config.h")
+
# ----------------------------------------------------------------------------
# opencv_modules.hpp based on actual modules list
# ----------------------------------------------------------------------------
diff --git a/cmake/OpenCVModule.cmake b/cmake/OpenCVModule.cmake
index 742a287..3e98bf5 100644
--- a/cmake/OpenCVModule.cmake
+++ b/cmake/OpenCVModule.cmake
@@ -65,6 +65,7 @@ foreach(mod ${OPENCV_MODULES_BUILD} ${OPENCV_MODULES_DISABLED_USER} ${OPENCV_MOD
unset(OPENCV_MODULE_${mod}_PRIVATE_OPT_DEPS CACHE)
unset(OPENCV_MODULE_${mod}_LINK_DEPS CACHE)
unset(OPENCV_MODULE_${mod}_WRAPPERS CACHE)
+ unset(OPENCV_DEPENDANT_TARGETS_${mod} CACHE)
endforeach()
# clean modules info which needs to be recalculated
@@ -641,6 +642,8 @@ macro(ocv_set_module_sources)
# use full paths for module to be independent from the module location
ocv_convert_to_full_paths(OPENCV_MODULE_${the_module}_HEADERS)
+ ocv_compiler_optimization_process_sources(OPENCV_MODULE_${the_module}_SOURCES OPENCV_MODULE_${the_module}_DEPS_EXT ${the_module})
+
set(OPENCV_MODULE_${the_module}_HEADERS ${OPENCV_MODULE_${the_module}_HEADERS} CACHE INTERNAL "List of header files for ${the_module}")
set(OPENCV_MODULE_${the_module}_SOURCES ${OPENCV_MODULE_${the_module}_SOURCES} CACHE INTERNAL "List of source files for ${the_module}")
endmacro()
diff --git a/cmake/OpenCVPCHSupport.cmake b/cmake/OpenCVPCHSupport.cmake
index 90437cb..45968e7 100644
--- a/cmake/OpenCVPCHSupport.cmake
+++ b/cmake/OpenCVPCHSupport.cmake
@@ -326,7 +326,10 @@ MACRO(ADD_NATIVE_PRECOMPILED_HEADER _targetName _input)
get_target_property(_sources ${_targetName} SOURCES)
foreach(src ${_sources})
- if(NOT "${src}" MATCHES "\\.mm$")
+ if(NOT "${src}" MATCHES "\\.mm$"
+ AND NOT "${src}" MATCHES "\\.h$" AND NOT "${src}" MATCHES "\\.hpp$" # header files
+ AND NOT "${src}" MATCHES "^\$" # CMake generator expressions
+ )
get_source_file_property(oldProps "${src}" COMPILE_FLAGS)
if(NOT oldProps)
set(newProperties "/Yu\"${_input}\" /FI\"${_input}\"")
diff --git a/cmake/OpenCVUtils.cmake b/cmake/OpenCVUtils.cmake
index cdf257d..8a5ee28 100644
--- a/cmake/OpenCVUtils.cmake
+++ b/cmake/OpenCVUtils.cmake
@@ -37,7 +37,11 @@ endmacro()
macro(ocv_update VAR)
if(NOT DEFINED ${VAR})
- set(${VAR} ${ARGN})
+ if("x${ARGN}" STREQUAL "x")
+ set(${VAR} "")
+ else()
+ set(${VAR} ${ARGN})
+ endif()
else()
#ocv_debug_message("Preserve old value for ${VAR}: ${${VAR}}")
endif()
@@ -151,8 +155,15 @@ function(ocv_append_target_property target prop)
endif()
endfunction()
+function(ocv_append_dependant_targets target)
+ #ocv_debug_message("ocv_append_dependant_targets(${target} ${ARGN})")
+ _ocv_fix_target(target)
+ set(OPENCV_DEPENDANT_TARGETS_${target} "${OPENCV_DEPENDANT_TARGETS_${target}};${ARGN}" CACHE INTERNAL "" FORCE)
+endfunction()
+
# adds include directories in such way that directories from the OpenCV source tree go first
function(ocv_target_include_directories target)
+ #ocv_debug_message("ocv_target_include_directories(${target} ${ARGN})")
_ocv_fix_target(target)
set(__params "")
if(CMAKE_COMPILER_IS_GNUCXX AND NOT CMAKE_CXX_COMPILER_VERSION VERSION_LESS "6.0" AND
@@ -173,6 +184,11 @@ function(ocv_target_include_directories target)
else()
if(TARGET ${target})
target_include_directories(${target} PRIVATE ${__params})
+ if(OPENCV_DEPENDANT_TARGETS_${target})
+ foreach(t ${OPENCV_DEPENDANT_TARGETS_${target}})
+ target_include_directories(${t} PRIVATE ${__params})
+ endforeach()
+ endif()
else()
set(__new_inc "${OCV_TARGET_INCLUDE_DIRS_${target}};${__params}")
set(OCV_TARGET_INCLUDE_DIRS_${target} "${__new_inc}" CACHE INTERNAL "")
@@ -205,8 +221,11 @@ set(OCV_COMPILER_FAIL_REGEX
)
MACRO(ocv_check_compiler_flag LANG FLAG RESULT)
+ set(_fname "${ARGN}")
if(NOT DEFINED ${RESULT})
- if("_${LANG}_" MATCHES "_CXX_")
+ if(_fname)
+ # nothing
+ elseif("_${LANG}_" MATCHES "_CXX_")
set(_fname "${CMAKE_BINARY_DIR}${CMAKE_FILES_DIRECTORY}/CMakeTmp/src.cxx")
if("${CMAKE_CXX_FLAGS} ${FLAG} " MATCHES "-Werror " OR "${CMAKE_CXX_FLAGS} ${FLAG} " MATCHES "-Werror=unknown-pragmas ")
FILE(WRITE "${_fname}" "int main() { return 0; }\n")
@@ -231,7 +250,13 @@ MACRO(ocv_check_compiler_flag LANG FLAG RESULT)
unset(_fname)
endif()
if(_fname)
- MESSAGE(STATUS "Performing Test ${RESULT}")
+ if(NOT "x${ARGN}" STREQUAL "x")
+ file(RELATIVE_PATH __msg "${CMAKE_SOURCE_DIR}" "${ARGN}")
+ set(__msg " (check file: ${__msg})")
+ else()
+ set(__msg "")
+ endif()
+ MESSAGE(STATUS "Performing Test ${RESULT}${__msg}")
TRY_COMPILE(${RESULT}
"${CMAKE_BINARY_DIR}"
"${_fname}"
@@ -278,7 +303,11 @@ MACRO(ocv_check_compiler_flag LANG FLAG RESULT)
endif()
ENDMACRO()
-macro(ocv_check_flag_support lang flag varname)
+macro(ocv_check_flag_support lang flag varname base_options)
+ if(CMAKE_BUILD_TYPE)
+ set(CMAKE_TRY_COMPILE_CONFIGURATION ${CMAKE_BUILD_TYPE})
+ endif()
+
if("_${lang}_" MATCHES "_CXX_")
set(_lang CXX)
elseif("_${lang}_" MATCHES "_C_")
@@ -293,7 +322,7 @@ macro(ocv_check_flag_support lang flag varname)
string(REGEX REPLACE "^(/|-)" "HAVE_${_lang}_" ${varname} "${${varname}}")
string(REGEX REPLACE " -|-|=| |\\." "_" ${varname} "${${varname}}")
- ocv_check_compiler_flag("${_lang}" "${ARGN} ${flag}" ${${varname}})
+ ocv_check_compiler_flag("${_lang}" "${base_options} ${flag}" ${${varname}} ${ARGN})
endmacro()
# turns off warnings
@@ -327,7 +356,7 @@ macro(ocv_warnings_disable)
string(REPLACE "${warning}" "" ${var} "${${var}}")
string(REPLACE "-W" "-Wno-" warning "${warning}")
endif()
- ocv_check_flag_support(${var} "${warning}" _varname)
+ ocv_check_flag_support(${var} "${warning}" _varname "")
if(${_varname})
set(${var} "${${var}} ${warning}")
endif()
@@ -342,7 +371,7 @@ macro(ocv_warnings_disable)
else()
string(REPLACE "-wd" "-Qwd" warning "${warning}")
endif()
- ocv_check_flag_support(${var} "${warning}" _varname)
+ ocv_check_flag_support(${var} "${warning}" _varname "")
if(${_varname})
set(${var} "${${var}} ${warning}")
endif()
@@ -357,7 +386,7 @@ macro(ocv_warnings_disable)
endmacro()
macro(add_apple_compiler_options the_module)
- ocv_check_flag_support(OBJCXX "-fobjc-exceptions" HAVE_OBJC_EXCEPTIONS)
+ ocv_check_flag_support(OBJCXX "-fobjc-exceptions" HAVE_OBJC_EXCEPTIONS "")
if(HAVE_OBJC_EXCEPTIONS)
foreach(source ${OPENCV_MODULE_${the_module}_SOURCES})
if("${source}" MATCHES "\\.mm$")
@@ -892,6 +921,11 @@ function(_ocv_append_target_includes target)
if (TARGET ${target}_object)
target_include_directories(${target}_object PRIVATE ${OCV_TARGET_INCLUDE_DIRS_${target}})
endif()
+ if(OPENCV_DEPENDANT_TARGETS_${target})
+ foreach(t ${OPENCV_DEPENDANT_TARGETS_${target}})
+ target_include_directories(${t} PRIVATE ${OCV_TARGET_INCLUDE_DIRS_${target}})
+ endforeach()
+ endif()
unset(OCV_TARGET_INCLUDE_DIRS_${target} CACHE)
endif()
endfunction()
diff --git a/cmake/checks/cpu_avx.cpp b/cmake/checks/cpu_avx.cpp
new file mode 100644
index 0000000..05536f4
--- /dev/null
+++ b/cmake/checks/cpu_avx.cpp
@@ -0,0 +1,9 @@
+#if !defined __AVX__ // MSVC supports this flag since MSVS 2013
+#error "__AVX__ define is missing"
+#endif
+#include <immintrin.h>
+void test()
+{
+ __m256 a = _mm256_set1_ps(0.0f);
+}
+int main() { return 0; }
diff --git a/cmake/checks/cpu_avx2.cpp b/cmake/checks/cpu_avx2.cpp
new file mode 100644
index 0000000..3ab1143
--- /dev/null
+++ b/cmake/checks/cpu_avx2.cpp
@@ -0,0 +1,10 @@
+#if !defined __AVX2__ // MSVC supports this flag since MSVS 2013
+#error "__AVX2__ define is missing"
+#endif
+#include <immintrin.h>
+void test()
+{
+ int data[8] = {0,0,0,0, 0,0,0,0};
+ __m256i a = _mm256_loadu_si256((const __m256i *)data);
+}
+int main() { return 0; }
diff --git a/cmake/checks/cpu_avx512.cpp b/cmake/checks/cpu_avx512.cpp
new file mode 100644
index 0000000..d0898ab
--- /dev/null
+++ b/cmake/checks/cpu_avx512.cpp
@@ -0,0 +1,10 @@
+#if defined __AVX512__ || defined __AVX512F__
+#include <immintrin.h>
+void test()
+{
+ __m512i zmm = _mm512_setzero_si512();
+}
+#else
+#error "AVX512 is not supported"
+#endif
+int main() { return 0; }
diff --git a/cmake/checks/cpu_fp16.cpp b/cmake/checks/cpu_fp16.cpp
new file mode 100644
index 0000000..6951f1c
--- /dev/null
+++ b/cmake/checks/cpu_fp16.cpp
@@ -0,0 +1,33 @@
+#include <stdio.h>
+
+#if defined __F16C__ || (defined _MSC_VER && _MSC_VER >= 1700) || (defined __INTEL_COMPILER && defined __AVX__)
+#include <immintrin.h>
+int test()
+{
+ const float src[] = { 0.0f, 0.0f, 0.0f, 0.0f };
+ short dst[8];
+ __m128 v_src = _mm_load_ps(src);
+ __m128i v_dst = _mm_cvtps_ph(v_src, 0);
+ _mm_storel_epi64((__m128i*)dst, v_dst);
+ return (int)dst[0];
+}
+#elif defined __GNUC__ && (defined __arm__ || defined __aarch64__)
+#include "arm_neon.h"
+int test()
+{
+ const float src[] = { 0.0f, 0.0f, 0.0f, 0.0f };
+ short dst[8];
+ float32x4_t v_src = *(float32x4_t*)src;
+ float16x4_t v_dst = vcvt_f16_f32(v_src);
+ *(float16x4_t*)dst = v_dst;
+ return (int)dst[0];
+}
+#else
+#error "FP16 is not supported"
+#endif
+
+int main()
+{
+ printf("%d\n", test());
+ return 0;
+}
diff --git a/cmake/checks/cpu_popcnt.cpp b/cmake/checks/cpu_popcnt.cpp
new file mode 100644
index 0000000..f55c9f3
--- /dev/null
+++ b/cmake/checks/cpu_popcnt.cpp
@@ -0,0 +1,8 @@
+#include <nmmintrin.h>
+#ifndef _MSC_VER
+#include <popcntintrin.h>
+#endif
+int main() {
+ int i = _mm_popcnt_u64(1);
+ return 0;
+}
diff --git a/cmake/checks/cpu_sse.cpp b/cmake/checks/cpu_sse.cpp
new file mode 100644
index 0000000..c6269ac
--- /dev/null
+++ b/cmake/checks/cpu_sse.cpp
@@ -0,0 +1,2 @@
+#include <xmmintrin.h>
+int main() { return 0; }
diff --git a/cmake/checks/cpu_sse2.cpp b/cmake/checks/cpu_sse2.cpp
new file mode 100644
index 0000000..68a69f8
--- /dev/null
+++ b/cmake/checks/cpu_sse2.cpp
@@ -0,0 +1,2 @@
+#include <emmintrin.h>
+int main() { return 0; }
diff --git a/cmake/checks/cpu_sse3.cpp b/cmake/checks/cpu_sse3.cpp
new file mode 100644
index 0000000..98ce219
--- /dev/null
+++ b/cmake/checks/cpu_sse3.cpp
@@ -0,0 +1,7 @@
+#include <pmmintrin.h>
+int main() {
+ __m128 u, v;
+ u = _mm_set1_ps(0.0f);
+ v = _mm_moveldup_ps(u); // SSE3
+ return 0;
+}
diff --git a/cmake/checks/cpu_sse41.cpp b/cmake/checks/cpu_sse41.cpp
new file mode 100644
index 0000000..ddd835b
--- /dev/null
+++ b/cmake/checks/cpu_sse41.cpp
@@ -0,0 +1,6 @@
+#include <smmintrin.h>
+int main() {
+ __m128i a = _mm_setzero_si128(), b = _mm_setzero_si128();
+ __m128i c = _mm_packus_epi32(a, b);
+ return 0;
+}
diff --git a/cmake/checks/cpu_sse42.cpp b/cmake/checks/cpu_sse42.cpp
new file mode 100644
index 0000000..56f5665
--- /dev/null
+++ b/cmake/checks/cpu_sse42.cpp
@@ -0,0 +1,5 @@
+#include <nmmintrin.h>
+int main() {
+ int i = _mm_popcnt_u64(1);
+ return 0;
+}
diff --git a/cmake/checks/cpu_ssse3.cpp b/cmake/checks/cpu_ssse3.cpp
new file mode 100644
index 0000000..e583199
--- /dev/null
+++ b/cmake/checks/cpu_ssse3.cpp
@@ -0,0 +1,7 @@
+#include <tmmintrin.h>
+const double v = 0;
+int main() {
+ __m128i a = _mm_setzero_si128();
+ __m128i b = _mm_abs_epi32(a);
+ return 0;
+}
diff --git a/cmake/checks/fp16.cpp b/cmake/checks/fp16.cpp
deleted file mode 100644
index c77c844..0000000
--- a/cmake/checks/fp16.cpp
+++ /dev/null
@@ -1,33 +0,0 @@
-#include <stdio.h>
-
-#if defined __F16C__ || (defined _MSC_VER && _MSC_VER >= 1700)
-#include <immintrin.h>
-int test()
-{
- const float src[] = { 0.0f, 0.0f, 0.0f, 0.0f };
- short dst[8];
- __m128 v_src = _mm_load_ps(src);
- __m128i v_dst = _mm_cvtps_ph(v_src, 0);
- _mm_storel_epi64((__m128i*)dst, v_dst);
- return (int)dst[0];
-}
-#elif defined __GNUC__ && (defined __arm__ || defined __aarch64__)
-#include "arm_neon.h"
-int test()
-{
- const float src[] = { 0.0f, 0.0f, 0.0f, 0.0f };
- short dst[8];
- float32x4_t v_src = *(float32x4_t*)src;
- float16x4_t v_dst = vcvt_f16_f32(v_src);
- *(float16x4_t*)dst = v_dst;
- return (int)dst[0];
-}
-#else
-#error "FP16 is not supported"
-#endif
-
-int main()
-{
- printf("%d\n", test());
- return 0;
-}
diff --git a/cmake/templates/cv_cpu_config.h.in b/cmake/templates/cv_cpu_config.h.in
new file mode 100644
index 0000000..27b2731
--- /dev/null
+++ b/cmake/templates/cv_cpu_config.h.in
@@ -0,0 +1,5 @@
+// OpenCV CPU baseline features
+@OPENCV_CPU_BASELINE_DEFINITIONS_CONFIGMAKE@
+
+// OpenCV supported CPU dispatched features
+@OPENCV_CPU_DISPATCH_DEFINITIONS_CONFIGMAKE@
diff --git a/cmake/templates/cvconfig.h.in b/cmake/templates/cvconfig.h.in
index 05add9e..658d12c 100644
--- a/cmake/templates/cvconfig.h.in
+++ b/cmake/templates/cvconfig.h.in
@@ -1,6 +1,15 @@
+#ifndef OPENCV_CVCONFIG_H_INCLUDED
+#define OPENCV_CVCONFIG_H_INCLUDED
+
/* OpenCV compiled as static or dynamic libs */
#cmakedefine BUILD_SHARED_LIBS
+/* OpenCV intrinsics optimized code */
+#cmakedefine CV_ENABLE_INTRINSICS
+
+/* OpenCV additional optimized code */
+#cmakedefine CV_DISABLE_OPTIMIZATION
+
/* Compile for 'real' NVIDIA GPU architectures */
#define CUDA_ARCH_BIN "${OPENCV_CUDA_ARCH_BIN}"
@@ -206,3 +215,7 @@
/* OpenVX */
#cmakedefine HAVE_OPENVX
+
+
+
+#endif // OPENCV_CVCONFIG_H_INCLUDED
diff --git a/modules/core/include/opencv2/core/cv_cpu_dispatch.h b/modules/core/include/opencv2/core/cv_cpu_dispatch.h
new file mode 100644
index 0000000..9a8537f
--- /dev/null
+++ b/modules/core/include/opencv2/core/cv_cpu_dispatch.h
@@ -0,0 +1,166 @@
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html.
+
+#if defined __OPENCV_BUILD \
+
+#include "cv_cpu_config.h"
+#include "cv_cpu_helper.h"
+
+#if defined CV_ENABLE_INTRINSICS \
+ && !defined CV_DISABLE_OPTIMIZATION \
+ && !defined __CUDACC__ /* do not include SSE/AVX/NEON headers for NVCC compiler */ \
+
+#ifdef CV_CPU_COMPILE_SSE2
+# include <emmintrin.h>
+# define CV_MMX 1
+# define CV_SSE 1
+# define CV_SSE2 1
+#endif
+#ifdef CV_CPU_COMPILE_SSE3
+# include <pmmintrin.h>
+# define CV_SSE3 1
+#endif
+#ifdef CV_CPU_COMPILE_SSSE3
+# include <tmmintrin.h>
+# define CV_SSSE3 1
+#endif
+#ifdef CV_CPU_COMPILE_SSE4_1
+# include <smmintrin.h>
+# define CV_SSE4_1 1
+#endif
+#ifdef CV_CPU_COMPILE_SSE4_2
+# include <nmmintrin.h>
+# define CV_SSE4_2 1
+#endif
+#ifdef CV_CPU_COMPILE_POPCNT
+# ifdef _MSC_VER
+# include <nmmintrin.h>
+# if defined(_M_X64)
+# define CV_POPCNT_U64 _mm_popcnt_u64
+# endif
+# define CV_POPCNT_U32 _mm_popcnt_u32
+# else
+# include <popcntintrin.h>
+# if defined(__x86_64__)
+# define CV_POPCNT_U64 __builtin_popcountll
+# endif
+# define CV_POPCNT_U32 __builtin_popcount
+# endif
+# define CV_POPCNT 1
+#endif
+#ifdef CV_CPU_COMPILE_AVX
+# include <immintrin.h>
+# define CV_AVX 1
+#endif
+#ifdef CV_CPU_COMPILE_AVX2
+# include <immintrin.h>
+# define CV_AVX2 1
+#endif
+#ifdef CV_CPU_COMPILE_FMA3
+# define CV_FMA3 1
+#endif
+
+#if (defined WIN32 || defined _WIN32) && defined(_M_ARM)
+# include <Intrin.h>
+# include <arm_neon.h>
+# define CV_NEON 1
+#elif defined(__ARM_NEON__) || (defined (__ARM_NEON) && defined(__aarch64__))
+# include <arm_neon.h>
+# define CV_NEON 1
+#endif
+
+#if defined(__ARM_NEON__) || defined(__aarch64__)
+# include <arm_neon.h>
+#endif
+
+#endif // CV_ENABLE_INTRINSICS && !CV_DISABLE_OPTIMIZATION && !__CUDACC__
+
+#endif // __OPENCV_BUILD
+
+
+
+#if !defined __OPENCV_BUILD // Compatibility code
+
+#if defined __SSE2__ || defined _M_X64 || (defined _M_IX86_FP && _M_IX86_FP >= 2)
+# include <emmintrin.h>
+# define CV_MMX 1
+# define CV_SSE 1
+# define CV_SSE2 1
+#elif (defined WIN32 || defined _WIN32) && defined(_M_ARM)
+# include <Intrin.h>
+# include <arm_neon.h>
+# define CV_NEON 1
+#elif defined(__ARM_NEON__) || (defined (__ARM_NEON) && defined(__aarch64__))
+# include <arm_neon.h>
+# define CV_NEON 1
+#endif
+
+#endif // !__OPENCV_BUILD (Compatibility code)
+
+
+
+#ifndef CV_MMX
+# define CV_MMX 0
+#endif
+#ifndef CV_SSE
+# define CV_SSE 0
+#endif
+#ifndef CV_SSE2
+# define CV_SSE2 0
+#endif
+#ifndef CV_SSE3
+# define CV_SSE3 0
+#endif
+#ifndef CV_SSSE3
+# define CV_SSSE3 0
+#endif
+#ifndef CV_SSE4_1
+# define CV_SSE4_1 0
+#endif
+#ifndef CV_SSE4_2
+# define CV_SSE4_2 0
+#endif
+#ifndef CV_POPCNT
+# define CV_POPCNT 0
+#endif
+#ifndef CV_AVX
+# define CV_AVX 0
+#endif
+#ifndef CV_AVX2
+# define CV_AVX2 0
+#endif
+#ifndef CV_FMA3
+# define CV_FMA3 0
+#endif
+#ifndef CV_AVX_512F
+# define CV_AVX_512F 0
+#endif
+#ifndef CV_AVX_512BW
+# define CV_AVX_512BW 0
+#endif
+#ifndef CV_AVX_512CD
+# define CV_AVX_512CD 0
+#endif
+#ifndef CV_AVX_512DQ
+# define CV_AVX_512DQ 0
+#endif
+#ifndef CV_AVX_512ER
+# define CV_AVX_512ER 0
+#endif
+#ifndef CV_AVX_512IFMA512
+# define CV_AVX_512IFMA512 0
+#endif
+#ifndef CV_AVX_512PF
+# define CV_AVX_512PF 0
+#endif
+#ifndef CV_AVX_512VBMI
+# define CV_AVX_512VBMI 0
+#endif
+#ifndef CV_AVX_512VL
+# define CV_AVX_512VL 0
+#endif
+
+#ifndef CV_NEON
+# define CV_NEON 0
+#endif
diff --git a/modules/core/include/opencv2/core/cv_cpu_helper.h b/modules/core/include/opencv2/core/cv_cpu_helper.h
new file mode 100644
index 0000000..cb755d6
--- /dev/null
+++ b/modules/core/include/opencv2/core/cv_cpu_helper.h
@@ -0,0 +1,133 @@
+// AUTOGENERATED, DO NOT EDIT
+
+#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_SSE
+# define CV_CPU_HAS_SUPPORT_SSE 1
+# define CV_CPU_CALL_SSE(...) return __VA_ARGS__
+#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_SSE
+# define CV_CPU_HAS_SUPPORT_SSE (cv::checkHardwareSupport(CV_CPU_SSE))
+# define CV_CPU_CALL_SSE(...) if (CV_CPU_HAS_SUPPORT_SSE) return __VA_ARGS__
+#else
+# define CV_CPU_HAS_SUPPORT_SSE 0
+# define CV_CPU_CALL_SSE(...)
+#endif
+
+#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_SSE2
+# define CV_CPU_HAS_SUPPORT_SSE2 1
+# define CV_CPU_CALL_SSE2(...) return __VA_ARGS__
+#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_SSE2
+# define CV_CPU_HAS_SUPPORT_SSE2 (cv::checkHardwareSupport(CV_CPU_SSE2))
+# define CV_CPU_CALL_SSE2(...) if (CV_CPU_HAS_SUPPORT_SSE2) return __VA_ARGS__
+#else
+# define CV_CPU_HAS_SUPPORT_SSE2 0
+# define CV_CPU_CALL_SSE2(...)
+#endif
+
+#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_SSE3
+# define CV_CPU_HAS_SUPPORT_SSE3 1
+# define CV_CPU_CALL_SSE3(...) return __VA_ARGS__
+#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_SSE3
+# define CV_CPU_HAS_SUPPORT_SSE3 (cv::checkHardwareSupport(CV_CPU_SSE3))
+# define CV_CPU_CALL_SSE3(...) if (CV_CPU_HAS_SUPPORT_SSE3) return __VA_ARGS__
+#else
+# define CV_CPU_HAS_SUPPORT_SSE3 0
+# define CV_CPU_CALL_SSE3(...)
+#endif
+
+#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_SSSE3
+# define CV_CPU_HAS_SUPPORT_SSSE3 1
+# define CV_CPU_CALL_SSSE3(...) return __VA_ARGS__
+#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_SSSE3
+# define CV_CPU_HAS_SUPPORT_SSSE3 (cv::checkHardwareSupport(CV_CPU_SSSE3))
+# define CV_CPU_CALL_SSSE3(...) if (CV_CPU_HAS_SUPPORT_SSSE3) return __VA_ARGS__
+#else
+# define CV_CPU_HAS_SUPPORT_SSSE3 0
+# define CV_CPU_CALL_SSSE3(...)
+#endif
+
+#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_SSE4_1
+# define CV_CPU_HAS_SUPPORT_SSE4_1 1
+# define CV_CPU_CALL_SSE4_1(...) return __VA_ARGS__
+#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_SSE4_1
+# define CV_CPU_HAS_SUPPORT_SSE4_1 (cv::checkHardwareSupport(CV_CPU_SSE4_1))
+# define CV_CPU_CALL_SSE4_1(...) if (CV_CPU_HAS_SUPPORT_SSE4_1) return __VA_ARGS__
+#else
+# define CV_CPU_HAS_SUPPORT_SSE4_1 0
+# define CV_CPU_CALL_SSE4_1(...)
+#endif
+
+#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_SSE4_2
+# define CV_CPU_HAS_SUPPORT_SSE4_2 1
+# define CV_CPU_CALL_SSE4_2(...) return __VA_ARGS__
+#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_SSE4_2
+# define CV_CPU_HAS_SUPPORT_SSE4_2 (cv::checkHardwareSupport(CV_CPU_SSE4_2))
+# define CV_CPU_CALL_SSE4_2(...) if (CV_CPU_HAS_SUPPORT_SSE4_2) return __VA_ARGS__
+#else
+# define CV_CPU_HAS_SUPPORT_SSE4_2 0
+# define CV_CPU_CALL_SSE4_2(...)
+#endif
+
+#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_POPCNT
+# define CV_CPU_HAS_SUPPORT_POPCNT 1
+# define CV_CPU_CALL_POPCNT(...) return __VA_ARGS__
+#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_POPCNT
+# define CV_CPU_HAS_SUPPORT_POPCNT (cv::checkHardwareSupport(CV_CPU_POPCNT))
+# define CV_CPU_CALL_POPCNT(...) if (CV_CPU_HAS_SUPPORT_POPCNT) return __VA_ARGS__
+#else
+# define CV_CPU_HAS_SUPPORT_POPCNT 0
+# define CV_CPU_CALL_POPCNT(...)
+#endif
+
+#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_AVX
+# define CV_CPU_HAS_SUPPORT_AVX 1
+# define CV_CPU_CALL_AVX(...) return __VA_ARGS__
+#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_AVX
+# define CV_CPU_HAS_SUPPORT_AVX (cv::checkHardwareSupport(CV_CPU_AVX))
+# define CV_CPU_CALL_AVX(...) if (CV_CPU_HAS_SUPPORT_AVX) return __VA_ARGS__
+#else
+# define CV_CPU_HAS_SUPPORT_AVX 0
+# define CV_CPU_CALL_AVX(...)
+#endif
+
+#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_FP16
+# define CV_CPU_HAS_SUPPORT_FP16 1
+# define CV_CPU_CALL_FP16(...) return __VA_ARGS__
+#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_FP16
+# define CV_CPU_HAS_SUPPORT_FP16 (cv::checkHardwareSupport(CV_CPU_FP16))
+# define CV_CPU_CALL_FP16(...) if (CV_CPU_HAS_SUPPORT_FP16) return __VA_ARGS__
+#else
+# define CV_CPU_HAS_SUPPORT_FP16 0
+# define CV_CPU_CALL_FP16(...)
+#endif
+
+#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_AVX2
+# define CV_CPU_HAS_SUPPORT_AVX2 1
+# define CV_CPU_CALL_AVX2(...) return __VA_ARGS__
+#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_AVX2
+# define CV_CPU_HAS_SUPPORT_AVX2 (cv::checkHardwareSupport(CV_CPU_AVX2))
+# define CV_CPU_CALL_AVX2(...) if (CV_CPU_HAS_SUPPORT_AVX2) return __VA_ARGS__
+#else
+# define CV_CPU_HAS_SUPPORT_AVX2 0
+# define CV_CPU_CALL_AVX2(...)
+#endif
+
+#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_FMA3
+# define CV_CPU_HAS_SUPPORT_FMA3 1
+# define CV_CPU_CALL_FMA3(...) return __VA_ARGS__
+#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_FMA3
+# define CV_CPU_HAS_SUPPORT_FMA3 (cv::checkHardwareSupport(CV_CPU_FMA3))
+# define CV_CPU_CALL_FMA3(...) if (CV_CPU_HAS_SUPPORT_FMA3) return __VA_ARGS__
+#else
+# define CV_CPU_HAS_SUPPORT_FMA3 0
+# define CV_CPU_CALL_FMA3(...)
+#endif
+
+#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_NEON
+# define CV_CPU_HAS_SUPPORT_NEON 1
+# define CV_CPU_CALL_NEON(...) return __VA_ARGS__
+#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_NEON
+# define CV_CPU_HAS_SUPPORT_NEON (cv::checkHardwareSupport(CV_CPU_NEON))
+# define CV_CPU_CALL_NEON(...) if (CV_CPU_HAS_SUPPORT_NEON) return __VA_ARGS__
+#else
+# define CV_CPU_HAS_SUPPORT_NEON 0
+# define CV_CPU_CALL_NEON(...)
+#endif
diff --git a/modules/core/include/opencv2/core/cvdef.h b/modules/core/include/opencv2/core/cvdef.h
index 699b166..0a46e02 100644
--- a/modules/core/include/opencv2/core/cvdef.h
+++ b/modules/core/include/opencv2/core/cvdef.h
@@ -48,6 +48,10 @@
//! @addtogroup core_utils
//! @{
+#ifdef __OPENCV_BUILD
+#include "cvconfig.h"
+#endif
+
#if !defined _CRT_SECURE_NO_DEPRECATE && defined _MSC_VER && _MSC_VER > 1300
# define _CRT_SECURE_NO_DEPRECATE /* to avoid multiple Visual Studio warnings */
#endif
@@ -59,10 +63,6 @@
#undef abs
#undef Complex
-#if !defined _CRT_SECURE_NO_DEPRECATE && defined _MSC_VER && _MSC_VER > 1300
-# define _CRT_SECURE_NO_DEPRECATE /* to avoid multiple Visual Studio warnings */
-#endif
-
#include <limits.h>
#include "opencv2/core/hal/interface.h"
@@ -88,7 +88,7 @@
# endif
#endif
-#if defined CV_ICC && !defined CV_ENABLE_UNROLLED
+#if defined CV_DISABLE_OPTIMIZATION || (defined CV_ICC && !defined CV_ENABLE_UNROLLED)
# define CV_ENABLE_UNROLLED 0
#else
# define CV_ENABLE_UNROLLED 1
@@ -161,142 +161,9 @@ enum CpuFeatures {
CPU_NEON = 100
};
-// do not include SSE/AVX/NEON headers for NVCC compiler
-#ifndef __CUDACC__
-
-#if defined __SSE2__ || defined _M_X64 || (defined _M_IX86_FP && _M_IX86_FP >= 2)
-# include <emmintrin.h>
-# define CV_MMX 1
-# define CV_SSE 1
-# define CV_SSE2 1
-# if defined __SSE3__ || (defined _MSC_VER && _MSC_VER >= 1500)
-# include <pmmintrin.h>
-# define CV_SSE3 1
-# endif
-# if defined __SSSE3__ || (defined _MSC_VER && _MSC_VER >= 1500)
-# include <tmmintrin.h>
-# define CV_SSSE3 1
-# endif
-# if defined __SSE4_1__ || (defined _MSC_VER && _MSC_VER >= 1500)
-# include <smmintrin.h>
-# define CV_SSE4_1 1
-# endif
-# if defined __SSE4_2__ || (defined _MSC_VER && _MSC_VER >= 1500)
-# include <nmmintrin.h>
-# define CV_SSE4_2 1
-# endif
-# if defined __POPCNT__ || (defined _MSC_VER && _MSC_VER >= 1500)
-# ifdef _MSC_VER
-# include <nmmintrin.h>
-# else
-# include <popcntintrin.h>
-# endif
-# define CV_POPCNT 1
-# endif
-# if defined __AVX__ || (defined _MSC_VER && _MSC_VER >= 1600 && 0)
-// MS Visual Studio 2010 (2012?) has no macro pre-defined to identify the use of /arch:AVX
-// See: http://connect.microsoft.com/VisualStudio/feedback/details/605858/arch-avx-should-define-a-predefined-macro-in-x64-and-set-a-unique-value-for-m-ix86-fp-in-win32
-# include <immintrin.h>
-# define CV_AVX 1
-# if defined(_XCR_XFEATURE_ENABLED_MASK)
-# define __xgetbv() _xgetbv(_XCR_XFEATURE_ENABLED_MASK)
-# else
-# define __xgetbv() 0
-# endif
-# endif
-# if defined __AVX2__ || (defined _MSC_VER && _MSC_VER >= 1800 && 0)
-# include <immintrin.h>
-# define CV_AVX2 1
-# if defined __FMA__
-# define CV_FMA3 1
-# endif
-# endif
-#endif
-
-#if (defined WIN32 || defined _WIN32) && defined(_M_ARM)
-# include <Intrin.h>
-# include <arm_neon.h>
-# define CV_NEON 1
-# define CPU_HAS_NEON_FEATURE (true)
-#elif defined(__ARM_NEON__) || (defined (__ARM_NEON) && defined(__aarch64__))
-# include <arm_neon.h>
-# define CV_NEON 1
-#endif
-
-#if defined __GNUC__ && defined __arm__ && (defined __ARM_PCS_VFP || defined __ARM_VFPV3__ || defined __ARM_NEON__) && !defined __SOFTFP__
-# define CV_VFP 1
-#endif
-
-#endif // __CUDACC__
-
-#ifndef CV_POPCNT
-#define CV_POPCNT 0
-#endif
-#ifndef CV_MMX
-# define CV_MMX 0
-#endif
-#ifndef CV_SSE
-# define CV_SSE 0
-#endif
-#ifndef CV_SSE2
-# define CV_SSE2 0
-#endif
-#ifndef CV_SSE3
-# define CV_SSE3 0
-#endif
-#ifndef CV_SSSE3
-# define CV_SSSE3 0
-#endif
-#ifndef CV_SSE4_1
-# define CV_SSE4_1 0
-#endif
-#ifndef CV_SSE4_2
-# define CV_SSE4_2 0
-#endif
-#ifndef CV_AVX
-# define CV_AVX 0
-#endif
-#ifndef CV_AVX2
-# define CV_AVX2 0
-#endif
-#ifndef CV_FMA3
-# define CV_FMA3 0
-#endif
-#ifndef CV_AVX_512F
-# define CV_AVX_512F 0
-#endif
-#ifndef CV_AVX_512BW
-# define CV_AVX_512BW 0
-#endif
-#ifndef CV_AVX_512CD
-# define CV_AVX_512CD 0
-#endif
-#ifndef CV_AVX_512DQ
-# define CV_AVX_512DQ 0
-#endif
-#ifndef CV_AVX_512ER
-# define CV_AVX_512ER 0
-#endif
-#ifndef CV_AVX_512IFMA512
-# define CV_AVX_512IFMA512 0
-#endif
-#ifndef CV_AVX_512PF
-# define CV_AVX_512PF 0
-#endif
-#ifndef CV_AVX_512VBMI
-# define CV_AVX_512VBMI 0
-#endif
-#ifndef CV_AVX_512VL
-# define CV_AVX_512VL 0
-#endif
-#ifndef CV_NEON
-# define CV_NEON 0
-#endif
+#include "cv_cpu_dispatch.h"
-#ifndef CV_VFP
-# define CV_VFP 0
-#endif
/* fundamental constants */
#define CV_PI 3.1415926535897932384626433832795
diff --git a/modules/core/include/opencv2/core/fast_math.hpp b/modules/core/include/opencv2/core/fast_math.hpp
index c76936a..31c1062 100644
--- a/modules/core/include/opencv2/core/fast_math.hpp
+++ b/modules/core/include/opencv2/core/fast_math.hpp
@@ -47,6 +47,12 @@
#include "opencv2/core/cvdef.h"
+#if ((defined _MSC_VER && defined _M_X64) || (defined __GNUC__ && defined __x86_64__ \
+ && defined __SSE2__ && !defined __APPLE__)) && !defined(__CUDACC__)
+#include <emmintrin.h>
+#endif
+
+
//! @addtogroup core_utils
//! @{
@@ -66,7 +72,7 @@
# include "tegra_round.hpp"
#endif
-#if CV_VFP
+#if defined __GNUC__ && defined __arm__ && (defined __ARM_PCS_VFP || defined __ARM_VFPV3__ || defined __ARM_NEON__) && !defined __SOFTFP__
// 1. general scheme
#define ARM_ROUND(_value, _asm_string) \
int res; \
@@ -82,7 +88,7 @@
#endif
// 3. version for float
#define ARM_ROUND_FLT(value) ARM_ROUND(value, "vcvtr.s32.f32 %[temp], %[value]\n vmov %[res], %[temp]")
-#endif // CV_VFP
+#endif
/** @brief Rounds floating-point number to the nearest integer
@@ -93,7 +99,7 @@ CV_INLINE int
cvRound( double value )
{
#if ((defined _MSC_VER && defined _M_X64) || (defined __GNUC__ && defined __x86_64__ \
- && defined __SSE2__ && !defined __APPLE__)) && !defined(__CUDACC__)
+ && defined __SSE2__ && !defined __APPLE__) || CV_SSE2) && !defined(__CUDACC__)
__m128d t = _mm_set_sd( value );
return _mm_cvtsd_si32(t);
#elif defined _MSC_VER && defined _M_IX86
@@ -108,7 +114,7 @@ cvRound( double value )
defined __GNUC__) && defined HAVE_TEGRA_OPTIMIZATION
TEGRA_ROUND_DBL(value);
#elif defined CV_ICC || defined __GNUC__
-# if CV_VFP
+# if defined ARM_ROUND_DBL
ARM_ROUND_DBL(value);
# else
return (int)lrint(value);
@@ -130,18 +136,8 @@ cvRound( double value )
*/
CV_INLINE int cvFloor( double value )
{
-#if (defined _MSC_VER && defined _M_X64 || (defined __GNUC__ && defined __SSE2__ && !defined __APPLE__)) && !defined(__CUDACC__)
- __m128d t = _mm_set_sd( value );
- int i = _mm_cvtsd_si32(t);
- return i - _mm_movemask_pd(_mm_cmplt_sd(t, _mm_cvtsi32_sd(t,i)));
-#elif defined __GNUC__
int i = (int)value;
return i - (i > value);
-#else
- int i = cvRound(value);
- float diff = (float)(value - i);
- return i - (diff < 0);
-#endif
}
/** @brief Rounds floating-point number to the nearest integer not smaller than the original.
@@ -153,18 +149,8 @@ CV_INLINE int cvFloor( double value )
*/
CV_INLINE int cvCeil( double value )
{
-#if (defined _MSC_VER && defined _M_X64 || (defined __GNUC__ && defined __SSE2__&& !defined __APPLE__)) && !defined(__CUDACC__)
- __m128d t = _mm_set_sd( value );
- int i = _mm_cvtsd_si32(t);
- return i + _mm_movemask_pd(_mm_cmplt_sd(_mm_cvtsi32_sd(t,i), t));
-#elif defined __GNUC__
int i = (int)value;
return i + (i < value);
-#else
- int i = cvRound(value);
- float diff = (float)(i - value);
- return i + (diff < 0);
-#endif
}
/** @brief Determines if the argument is Not A Number.
@@ -200,8 +186,8 @@ CV_INLINE int cvIsInf( double value )
/** @overload */
CV_INLINE int cvRound(float value)
{
-#if ((defined _MSC_VER && defined _M_X64) || (defined __GNUC__ && defined __x86_64__ && \
- defined __SSE2__ && !defined __APPLE__)) && !defined(__CUDACC__)
+#if ((defined _MSC_VER && defined _M_X64) || (defined __GNUC__ && defined __x86_64__ \
+ && defined __SSE2__ && !defined __APPLE__) || CV_SSE2) && !defined(__CUDACC__)
__m128 t = _mm_set_ss( value );
return _mm_cvtss_si32(t);
#elif defined _MSC_VER && defined _M_IX86
@@ -216,7 +202,7 @@ CV_INLINE int cvRound(float value)
defined __GNUC__) && defined HAVE_TEGRA_OPTIMIZATION
TEGRA_ROUND_FLT(value);
#elif defined CV_ICC || defined __GNUC__
-# if CV_VFP
+# if defined ARM_ROUND_FLT
ARM_ROUND_FLT(value);
# else
return (int)lrintf(value);
@@ -237,18 +223,8 @@ CV_INLINE int cvRound( int value )
/** @overload */
CV_INLINE int cvFloor( float value )
{
-#if (defined _MSC_VER && defined _M_X64 || (defined __GNUC__ && defined __SSE2__ && !defined __APPLE__)) && !defined(__CUDACC__)
- __m128 t = _mm_set_ss( value );
- int i = _mm_cvtss_si32(t);
- return i - _mm_movemask_ps(_mm_cmplt_ss(t, _mm_cvtsi32_ss(t,i)));
-#elif defined __GNUC__
int i = (int)value;
return i - (i > value);
-#else
- int i = cvRound(value);
- float diff = (float)(value - i);
- return i - (diff < 0);
-#endif
}
/** @overload */
@@ -260,18 +236,8 @@ CV_INLINE int cvFloor( int value )
/** @overload */
CV_INLINE int cvCeil( float value )
{
-#if (defined _MSC_VER && defined _M_X64 || (defined __GNUC__ && defined __SSE2__&& !defined __APPLE__)) && !defined(__CUDACC__)
- __m128 t = _mm_set_ss( value );
- int i = _mm_cvtss_si32(t);
- return i + _mm_movemask_ps(_mm_cmplt_ss(_mm_cvtsi32_ss(t,i), t));
-#elif defined __GNUC__
int i = (int)value;
return i + (i < value);
-#else
- int i = cvRound(value);
- float diff = (float)(i - value);
- return i + (diff < 0);
-#endif
}
/** @overload */
diff --git a/modules/core/src/system.cpp b/modules/core/src/system.cpp
index 3c8f39d..a983838 100644
--- a/modules/core/src/system.cpp
+++ b/modules/core/src/system.cpp
@@ -237,24 +237,81 @@ void Exception::formatMessage()
msg = format("%s:%d: error: (%d) %s\n", file.c_str(), line, code, err.c_str());
}
+static const char* g_hwFeatureNames[CV_HARDWARE_MAX_FEATURE] = { NULL };
+
+static const char* getHWFeatureName(int id)
+{
+ return (id < CV_HARDWARE_MAX_FEATURE) ? g_hwFeatureNames[id] : NULL;
+}
+static const char* getHWFeatureNameSafe(int id)
+{
+ const char* name = getHWFeatureName(id);
+ return name ? name : "Unknown feature";
+}
+
struct HWFeatures
{
enum { MAX_FEATURE = CV_HARDWARE_MAX_FEATURE };
- HWFeatures(void)
+ HWFeatures(bool run_initialize = false)
{
- memset( have, 0, sizeof(have) );
- x86_family = 0;
+ memset( have, 0, sizeof(have[0]) * MAX_FEATURE );
+ if (run_initialize)
+ initialize();
}
- static HWFeatures initialize(void)
+ static void initializeNames()
{
- HWFeatures f;
+ for (int i = 0; i < CV_HARDWARE_MAX_FEATURE; i++)
+ {
+ g_hwFeatureNames[i] = 0;
+ }
+ g_hwFeatureNames[CPU_MMX] = "MMX";
+ g_hwFeatureNames[CPU_SSE] = "SSE";
+ g_hwFeatureNames[CPU_SSE2] = "SSE2";
+ g_hwFeatureNames[CPU_SSE3] = "SSE3";
+ g_hwFeatureNames[CPU_SSSE3] = "SSSE3";
+ g_hwFeatureNames[CPU_SSE4_1] = "SSE4.1";
+ g_hwFeatureNames[CPU_SSE4_2] = "SSE4.2";
+ g_hwFeatureNames[CPU_POPCNT] = "POPCNT";
+ g_hwFeatureNames[CPU_FP16] = "FP16";
+ g_hwFeatureNames[CPU_AVX] = "AVX";
+ g_hwFeatureNames[CPU_AVX2] = "AVX2";
+ g_hwFeatureNames[CPU_FMA3] = "FMA3";
+
+ g_hwFeatureNames[CPU_AVX_512F] = "AVX512F";
+ g_hwFeatureNames[CPU_AVX_512BW] = "AVX512BW";
+ g_hwFeatureNames[CPU_AVX_512CD] = "AVX512CD";
+ g_hwFeatureNames[CPU_AVX_512DQ] = "AVX512DQ";
+ g_hwFeatureNames[CPU_AVX_512ER] = "AVX512ER";
+ g_hwFeatureNames[CPU_AVX_512IFMA512] = "AVX512IFMA";
+ g_hwFeatureNames[CPU_AVX_512PF] = "AVX512PF";
+ g_hwFeatureNames[CPU_AVX_512VBMI] = "AVX512VBMI";
+ g_hwFeatureNames[CPU_AVX_512VL] = "AVX512VL";
+
+ g_hwFeatureNames[CPU_NEON] = "NEON";
+ }
+
+ void initialize(void)
+ {
+#ifndef WINRT
+ if (getenv("OPENCV_DUMP_CONFIG"))
+ {
+ fprintf(stderr, "\nOpenCV build configuration is:\n%s\n",
+ cv::getBuildInformation().c_str());
+ }
+#endif
+
+ initializeNames();
+
int cpuid_data[4] = { 0, 0, 0, 0 };
+ int cpuid_data_ex[4] = { 0, 0, 0, 0 };
#if defined _MSC_VER && (defined _M_IX86 || defined _M_X64)
+ #define OPENCV_HAVE_X86_CPUID 1
__cpuid(cpuid_data, 1);
#elif defined __GNUC__ && (defined __i386__ || defined __x86_64__)
+ #define OPENCV_HAVE_X86_CPUID 1
#ifdef __x86_64__
asm __volatile__
(
@@ -278,33 +335,36 @@ struct HWFeatures
#endif
#endif
- f.x86_family = (cpuid_data[0] >> 8) & 15;
- if( f.x86_family >= 6 )
+ #ifdef OPENCV_HAVE_X86_CPUID
+ int x86_family = (cpuid_data[0] >> 8) & 15;
+ if( x86_family >= 6 )
{
- f.have[CV_CPU_MMX] = (cpuid_data[3] & (1 << 23)) != 0;
- f.have[CV_CPU_SSE] = (cpuid_data[3] & (1<<25)) != 0;
- f.have[CV_CPU_SSE2] = (cpuid_data[3] & (1<<26)) != 0;
- f.have[CV_CPU_SSE3] = (cpuid_data[2] & (1<<0)) != 0;
- f.have[CV_CPU_SSSE3] = (cpuid_data[2] & (1<<9)) != 0;
- f.have[CV_CPU_FMA3] = (cpuid_data[2] & (1<<12)) != 0;
- f.have[CV_CPU_SSE4_1] = (cpuid_data[2] & (1<<19)) != 0;
- f.have[CV_CPU_SSE4_2] = (cpuid_data[2] & (1<<20)) != 0;
- f.have[CV_CPU_POPCNT] = (cpuid_data[2] & (1<<23)) != 0;
- f.have[CV_CPU_AVX] = (((cpuid_data[2] & (1<<28)) != 0)&&((cpuid_data[2] & (1<<27)) != 0));//OS uses XSAVE_XRSTORE and CPU support AVX
- f.have[CV_CPU_FP16] = (cpuid_data[2] & (1<<29)) != 0;
+ have[CV_CPU_MMX] = (cpuid_data[3] & (1<<23)) != 0;
+ have[CV_CPU_SSE] = (cpuid_data[3] & (1<<25)) != 0;
+ have[CV_CPU_SSE2] = (cpuid_data[3] & (1<<26)) != 0;
+ have[CV_CPU_SSE3] = (cpuid_data[2] & (1<<0)) != 0;
+ have[CV_CPU_SSSE3] = (cpuid_data[2] & (1<<9)) != 0;
+ have[CV_CPU_FMA3] = (cpuid_data[2] & (1<<12)) != 0;
+ have[CV_CPU_SSE4_1] = (cpuid_data[2] & (1<<19)) != 0;
+ have[CV_CPU_SSE4_2] = (cpuid_data[2] & (1<<20)) != 0;
+ have[CV_CPU_POPCNT] = (cpuid_data[2] & (1<<23)) != 0;
+ have[CV_CPU_AVX] = (cpuid_data[2] & (1<<28)) != 0;
+ have[CV_CPU_FP16] = (cpuid_data[2] & (1<<29)) != 0;
// make the second call to the cpuid command in order to get
// information about extended features like AVX2
#if defined _MSC_VER && (defined _M_IX86 || defined _M_X64)
- __cpuidex(cpuid_data, 7, 0);
+ #define OPENCV_HAVE_X86_CPUID_EX 1
+ __cpuidex(cpuid_data_ex, 7, 0);
#elif defined __GNUC__ && (defined __i386__ || defined __x86_64__)
+ #define OPENCV_HAVE_X86_CPUID_EX 1
#ifdef __x86_64__
asm __volatile__
(
"movl $7, %%eax\n\t"
"movl $0, %%ecx\n\t"
"cpuid\n\t"
- :[eax]"=a"(cpuid_data[0]),[ebx]"=b"(cpuid_data[1]),[ecx]"=c"(cpuid_data[2]),[edx]"=d"(cpuid_data[3])
+ :[eax]"=a"(cpuid_data_ex[0]),[ebx]"=b"(cpuid_data_ex[1]),[ecx]"=c"(cpuid_data_ex[2]),[edx]"=d"(cpuid_data_ex[3])
:
: "cc"
);
@@ -317,29 +377,76 @@ struct HWFeatures
"cpuid\n\t"
"movl %%ebx, %0\n\t"
"popl %%ebx\n\t"
- : "=r"(cpuid_data[1]), "=c"(cpuid_data[2])
+ : "=r"(cpuid_data_ex[1]), "=c"(cpuid_data_ex[2])
:
: "cc"
);
#endif
#endif
- f.have[CV_CPU_AVX2] = (cpuid_data[1] & (1<<5)) != 0;
-
- f.have[CV_CPU_AVX_512F] = (cpuid_data[1] & (1<<16)) != 0;
- f.have[CV_CPU_AVX_512DQ] = (cpuid_data[1] & (1<<17)) != 0;
- f.have[CV_CPU_AVX_512IFMA512] = (cpuid_data[1] & (1<<21)) != 0;
- f.have[CV_CPU_AVX_512PF] = (cpuid_data[1] & (1<<26)) != 0;
- f.have[CV_CPU_AVX_512ER] = (cpuid_data[1] & (1<<27)) != 0;
- f.have[CV_CPU_AVX_512CD] = (cpuid_data[1] & (1<<28)) != 0;
- f.have[CV_CPU_AVX_512BW] = (cpuid_data[1] & (1<<30)) != 0;
- f.have[CV_CPU_AVX_512VL] = (cpuid_data[1] & (1<<31)) != 0;
- f.have[CV_CPU_AVX_512VBMI] = (cpuid_data[2] & (1<<1)) != 0;
+
+ #ifdef OPENCV_HAVE_X86_CPUID_EX
+ have[CV_CPU_AVX2] = (cpuid_data_ex[1] & (1<<5)) != 0;
+
+ have[CV_CPU_AVX_512F] = (cpuid_data_ex[1] & (1<<16)) != 0;
+ have[CV_CPU_AVX_512DQ] = (cpuid_data_ex[1] & (1<<17)) != 0;
+ have[CV_CPU_AVX_512IFMA512] = (cpuid_data_ex[1] & (1<<21)) != 0;
+ have[CV_CPU_AVX_512PF] = (cpuid_data_ex[1] & (1<<26)) != 0;
+ have[CV_CPU_AVX_512ER] = (cpuid_data_ex[1] & (1<<27)) != 0;
+ have[CV_CPU_AVX_512CD] = (cpuid_data_ex[1] & (1<<28)) != 0;
+ have[CV_CPU_AVX_512BW] = (cpuid_data_ex[1] & (1<<30)) != 0;
+ have[CV_CPU_AVX_512VL] = (cpuid_data_ex[1] & (1<<31)) != 0;
+ have[CV_CPU_AVX_512VBMI] = (cpuid_data_ex[2] & (1<<1)) != 0;
+ #else
+ CV_UNUSED(cpuid_data_ex);
+ #endif
+
+ bool have_AVX_OS_support = true;
+ bool have_AVX512_OS_support = true;
+ if (!(cpuid_data[2] & (1<<27)))
+ have_AVX_OS_support = false; // OS uses XSAVE_XRSTORE and CPU support AVX
+ else
+ {
+ int xcr0 = 0;
+ #ifdef _XCR_XFEATURE_ENABLED_MASK // requires immintrin.h
+ xcr0 = (int)_xgetbv(_XCR_XFEATURE_ENABLED_MASK);
+ #elif defined __GNUC__ && (defined __i386__ || defined __x86_64__)
+ __asm__ ("xgetbv" : "=a" (xcr0) : "c" (0) : "%edx" );
+ #endif
+ if ((xcr0 & 0x6) != 0x6)
+ have_AVX_OS_support = false; // YMM registers
+ if ((xcr0 & 0xe6) != 0xe6)
+ have_AVX512_OS_support = false; // ZMM registers
+ }
+
+ if (!have_AVX_OS_support)
+ {
+ have[CV_CPU_AVX] = false;
+ have[CV_CPU_FP16] = false;
+ have[CV_CPU_AVX2] = false;
+ have[CV_CPU_FMA3] = false;
+ }
+ if (!have_AVX_OS_support || !have_AVX512_OS_support)
+ {
+ have[CV_CPU_AVX_512F] = false;
+ have[CV_CPU_AVX_512BW] = false;
+ have[CV_CPU_AVX_512CD] = false;
+ have[CV_CPU_AVX_512DQ] = false;
+ have[CV_CPU_AVX_512ER] = false;
+ have[CV_CPU_AVX_512IFMA512] = false;
+ have[CV_CPU_AVX_512PF] = false;
+ have[CV_CPU_AVX_512VBMI] = false;
+ have[CV_CPU_AVX_512VL] = false;
+ }
}
+ #else
+ CV_UNUSED(cpuid_data);
+ CV_UNUSED(cpuid_data_ex);
+ #endif // OPENCV_HAVE_X86_CPUID
#if defined ANDROID || defined __linux__
#ifdef __aarch64__
- f.have[CV_CPU_NEON] = true;
- f.have[CV_CPU_FP16] = true;
+ have[CV_CPU_NEON] = true;
+ have[CV_CPU_FP16] = true;
#elif defined __arm__
int cpufile = open("/proc/self/auxv", O_RDONLY);
@@ -352,8 +459,8 @@ struct HWFeatures
{
if (auxv.a_type == AT_HWCAP)
{
- f.have[CV_CPU_NEON] = (auxv.a_un.a_val & 4096) != 0;
- f.have[CV_CPU_FP16] = (auxv.a_un.a_val & 2) != 0;
+ have[CV_CPU_NEON] = (auxv.a_un.a_val & 4096) != 0;
+ have[CV_CPU_FP16] = (auxv.a_un.a_val & 2) != 0;
break;
}
}
@@ -363,21 +470,133 @@ struct HWFeatures
#endif
#elif (defined __clang__ || defined __APPLE__)
#if (defined __ARM_NEON__ || (defined __ARM_NEON && defined __aarch64__))
- f.have[CV_CPU_NEON] = true;
+ have[CV_CPU_NEON] = true;
#endif
#if (defined __ARM_FP && (((__ARM_FP & 0x2) != 0) && defined __ARM_NEON__))
- f.have[CV_CPU_FP16] = true;
+ have[CV_CPU_FP16] = true;
#endif
#endif
- return f;
+ int baseline_features[] = { CV_CPU_BASELINE_FEATURES };
+ if (!checkFeatures(baseline_features, sizeof(baseline_features) / sizeof(baseline_features[0])))
+ {
+ fprintf(stderr, "\n"
+ "******************************************************************\n"
+ "* FATAL ERROR: *\n"
+ "* This OpenCV build doesn't support current CPU/HW configuration *\n"
+ "* *\n"
+ "* Use OPENCV_DUMP_CONFIG=1 environment variable for details *\n"
+ "******************************************************************\n");
+ fprintf(stderr, "\nRequired baseline features:\n");
+ checkFeatures(baseline_features, sizeof(baseline_features) / sizeof(baseline_features[0]), true);
+ CV_ErrorNoReturn(cv::Error::StsAssert, "Missing support for required CPU baseline features. Check OpenCV build configuration and required CPU/HW setup.");
+ }
+
+ readSettings(baseline_features, sizeof(baseline_features) / sizeof(baseline_features[0]));
+ }
+
+ bool checkFeatures(const int* features, int count, bool dump = false)
+ {
+ bool result = true;
+ for (int i = 0; i < count; i++)
+ {
+ int feature = features[i];
+ if (feature)
+ {
+ if (have[feature])
+ {
+ if (dump) fprintf(stderr, "%s - OK\n", getHWFeatureNameSafe(feature));
+ }
+ else
+ {
+ result = false;
+ if (dump) fprintf(stderr, "%s - NOT AVAILABLE\n", getHWFeatureNameSafe(feature));
+ }
+ }
+ }
+ return result;
+ }
+
+ static inline bool isSymbolSeparator(char c)
+ {
+ return c == ',' || c == ';' || c == '-';
+ }
+
+ void readSettings(const int* baseline_features, int baseline_count)
+ {
+ bool dump = true;
+ const char* disabled_features =
+#ifndef WINRT
+ getenv("OPENCV_CPU_DISABLE");
+#else
+ NULL;
+#endif
+ if (disabled_features && disabled_features[0] != 0)
+ {
+ const char* start = disabled_features;
+ for (;;)
+ {
+ while (start[0] != 0 && isSymbolSeparator(start[0]))
+ {
+ start++;
+ }
+ if (start[0] == 0)
+ break;
+ const char* end = start;
+ while (end[0] != 0 && !isSymbolSeparator(end[0]))
+ {
+ end++;
+ }
+ if (end == start)
+ continue;
+ cv::String feature(start, end);
+ start = end;
+
+ CV_Assert(feature.size() > 0);
+
+ bool found = false;
+ for (int i = 0; i < CV_HARDWARE_MAX_FEATURE; i++)
+ {
+ if (!g_hwFeatureNames[i]) continue;
+ size_t len = strlen(g_hwFeatureNames[i]);
+ if (len != feature.size()) continue;
+ if (feature.compare(g_hwFeatureNames[i]) == 0)
+ {
+ bool isBaseline = false;
+ for (int k = 0; k < baseline_count; k++)
+ {
+ if (baseline_features[k] == i)
+ {
+ isBaseline = true;
+ break;
+ }
+ }
+ if (isBaseline)
+ {
+ if (dump) fprintf(stderr, "OPENCV: Trying to disable baseline CPU feature: '%s'. This has very limited effect, because code optimizations for this feature are executed unconditionally in the most cases.\n", getHWFeatureNameSafe(i));
+ }
+ if (!have[i])
+ {
+ if (dump) fprintf(stderr, "OPENCV: Trying to disable unavailable CPU feature on the current platform: '%s'.\n", getHWFeatureNameSafe(i));
+ }
+ have[i] = false;
+
+ found = true;
+ break;
+ }
+ }
+ if (!found)
+ {
+ if (dump) fprintf(stderr, "OPENCV: Trying to disable unknown CPU feature: '%s'.\n", feature.c_str());
+ }
+ }
+ }
}
- int x86_family;
bool have[MAX_FEATURE+1];
};
-static HWFeatures featuresEnabled = HWFeatures::initialize(), featuresDisabled = HWFeatures();
+static HWFeatures featuresEnabled(true), featuresDisabled = HWFeatures(false);
static HWFeatures* currentFeatures = &featuresEnabled;
bool checkHardwareSupport(int feature)
diff --git a/modules/highgui/CMakeLists.txt b/modules/highgui/CMakeLists.txt
index eb56177..6d9c650 100644
--- a/modules/highgui/CMakeLists.txt
+++ b/modules/highgui/CMakeLists.txt
@@ -65,7 +65,7 @@ elseif(HAVE_QT)
list(APPEND HIGHGUI_LIBRARIES ${QT_LIBRARIES})
list(APPEND highgui_srcs ${CMAKE_CURRENT_LIST_DIR}/src/window_QT.cpp ${_MOC_OUTFILES} ${_RCC_OUTFILES})
- ocv_check_flag_support(CXX -Wno-missing-declarations _have_flag)
+ ocv_check_flag_support(CXX -Wno-missing-declarations _have_flag "")
if(${_have_flag})
set_source_files_properties(${_RCC_OUTFILES} PROPERTIES COMPILE_FLAGS -Wno-missing-declarations)
endif()
diff --git a/modules/imgproc/src/imgwarp.cpp b/modules/imgproc/src/imgwarp.cpp
index 0fa5202..dcf2e44 100644
--- a/modules/imgproc/src/imgwarp.cpp
+++ b/modules/imgproc/src/imgwarp.cpp
@@ -1649,7 +1649,7 @@ struct VResizeLanczos4
{
CastOp castOp;
VecOp vecOp;
- int k, x = vecOp((const uchar**)src, (uchar*)dst, (const uchar*)beta, width);
+ int x = vecOp((const uchar**)src, (uchar*)dst, (const uchar*)beta, width);
#if CV_ENABLE_UNROLLED
for( ; x <= width - 4; x += 4 )
{
@@ -1657,7 +1657,7 @@ struct VResizeLanczos4
const WT* S = src[0];
WT s0 = S[x]*b, s1 = S[x+1]*b, s2 = S[x+2]*b, s3 = S[x+3]*b;
- for( k = 1; k < 8; k++ )
+ for( int k = 1; k < 8; k++ )
{
b = beta[k]; S = src[k];
s0 += S[x]*b; s1 += S[x+1]*b;
diff --git a/modules/objdetect/src/haar.cpp b/modules/objdetect/src/haar.cpp
index 51843fa..bb37ee9 100644
--- a/modules/objdetect/src/haar.cpp
+++ b/modules/objdetect/src/haar.cpp
@@ -824,10 +824,7 @@ cvRunHaarClassifierCascadeSum( const CvHaarClassifierCascade* _cascade,
CvPoint pt, double& stage_sum, int start_stage )
{
#ifdef CV_HAAR_USE_AVX
- bool haveAVX = false;
- if(cv::checkHardwareSupport(CV_CPU_AVX))
- if(__xgetbv()&0x6)// Check if the OS will save the YMM registers
- haveAVX = true;
+ bool haveAVX = cv::checkHardwareSupport(CV_CPU_AVX);
#else
# ifdef CV_HAAR_USE_SSE
bool haveSSE2 = cv::checkHardwareSupport(CV_CPU_SSE2);
--
2.7.4