From d055128d5038ec81a084fda58977ffe7805a3313 Mon Sep 17 00:00:00 2001 From: Philipp Remy Date: Wed, 8 Oct 2025 14:02:58 +0200 Subject: [PATCH 01/40] PATCH: Add support for CMAKE_OSX_ARCHITECTURES This commit introduces logic to ensure that CMAKE_OSX_ARCHITECTURES is always set on Apple targets. It can either be specified on the CLI (i.e., in case of cross-compilation) or left blank (in which case it is set to CMAKE_HOST_SYSTEM_PROCESSOR). This will be used throughout the project to determine for which architecture to compile on Apple targets. Signed-off-by: Philipp Remy --- CMakeLists.txt | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/CMakeLists.txt b/CMakeLists.txt index 9bd72150ef..80ed419c96 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,6 +1,18 @@ cmake_minimum_required(VERSION 3.25) project(aliceVision LANGUAGES C CXX) +# Initialize CMAKE_OSX_ARCHITECTURES, if not specified on the command line. +if(APPLE AND NOT CMAKE_OSX_ARCHITECTURES) + message(STATUS "Host processor: ${CMAKE_HOST_SYSTEM_PROCESSOR}") + if(CMAKE_HOST_SYSTEM_PROCESSOR STREQUAL "arm64") + set(CMAKE_OSX_ARCHITECTURES "arm64") + elseif(CMAKE_HOST_SYSTEM_PROCESSOR STREQUAL "x86_64") + set(CMAKE_OSX_ARCHITECTURES "x86_64") + else() + message(FATAL_ERROR "CMAKE_HOST_SYSTEM_PROCESSOR was neither arm64 nor x86_64 on an Apple platform and CMAKE_OSX_ARCHITECTURES was not specified!") + endif() +endif() + option(ALICEVISION_BUILD_DEPENDENCIES "Build all AliceVision dependencies" OFF) option(AV_BUILD_ALICEVISION "Enable building of AliceVision" ON) option(AV_EIGEN_MEMORY_ALIGNMENT "Enable Eigen memory alignment" ON) From 51bdccc8719c7ed8fc7a000b2fdd950cef3f2b9d Mon Sep 17 00:00:00 2001 From: Philipp Remy Date: Tue, 2 Sep 2025 11:57:54 +0200 Subject: [PATCH 02/40] FEAT: Update OptimizeForArchitecture The previous OptimizeForArchitecture only included proper support for x86(_64). The new implementation supports x86(_64), arm(64), ppc and adds newer CPU families. Furthermore, it adapts OFA to account for cross-compilation on Apple targets. When targeting arm64, it selects apple-m1 as the baseline architecture (making it compatible with all Mac Apple Silicon Chips). When targeting x86_64, it selects skylake as the baseline architecture (making it compatible with all Intel Macs not older than 2015). All of this can be manually overridden by sepcifying TARGET_ARCHITECTURE on the command line. The code is derived/copied from https://gitlab.inria.fr/gismo/gismo, which is licensed under MPL-2. Signed-off-by: Philipp Remy --- src/CMakeLists.txt | 16 +- src/cmake/OFA/AddCXXCompilerFlag.cmake | 204 ++++ src/cmake/OFA/AutodetectArm.cmake | 427 +++++++ src/cmake/OFA/AutodetectPpc.cmake | 57 + src/cmake/OFA/AutodetectX86.cmake | 461 ++++++++ src/cmake/OFA/CheckARM.txt | 176 +++ src/cmake/OFA/CheckCXXCompilerFlag.cmake | 72 ++ src/cmake/OFA/CheckX86.txt | 218 ++++ src/cmake/OFA/CommonMacros.cmake | 10 + src/cmake/OFA/HandleArmOptions.cmake | 1162 +++++++++++++++++++ src/cmake/OFA/HandlePpcOptions.cmake | 170 +++ src/cmake/OFA/HandleX86Options.cmake | 899 ++++++++++++++ src/cmake/OFA/License.txt | 373 ++++++ src/cmake/OFA/OptimizeForArchitecture.cmake | 159 +++ src/cmake/OFA/cpuinfo_arm.c | 47 + src/cmake/OFA/cpuinfo_x86.cxx | 732 ++++++++++++ src/cmake/OptimizeForArchitecture.cmake | 612 ---------- src/nonFree/sift/CMakeLists.txt | 5 +- 18 files changed, 5183 insertions(+), 617 deletions(-) create mode 100644 src/cmake/OFA/AddCXXCompilerFlag.cmake create mode 100644 src/cmake/OFA/AutodetectArm.cmake create mode 100644 src/cmake/OFA/AutodetectPpc.cmake create mode 100644 src/cmake/OFA/AutodetectX86.cmake create mode 100644 src/cmake/OFA/CheckARM.txt create mode 100644 src/cmake/OFA/CheckCXXCompilerFlag.cmake create mode 100644 src/cmake/OFA/CheckX86.txt create mode 100644 src/cmake/OFA/CommonMacros.cmake create mode 100644 src/cmake/OFA/HandleArmOptions.cmake create mode 100644 src/cmake/OFA/HandlePpcOptions.cmake create mode 100644 src/cmake/OFA/HandleX86Options.cmake create mode 100644 src/cmake/OFA/License.txt create mode 100644 src/cmake/OFA/OptimizeForArchitecture.cmake create mode 100644 src/cmake/OFA/cpuinfo_arm.c create mode 100644 src/cmake/OFA/cpuinfo_x86.cxx delete mode 100644 src/cmake/OptimizeForArchitecture.cmake diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index b7914bee51..575af581c6 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -187,14 +187,21 @@ endmacro(add_target_properties) # ============================================================================== set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} ${CMAKE_CURRENT_SOURCE_DIR}/cmake) -include(OptimizeForArchitecture) +include(OFA/OptimizeForArchitecture) OptimizeForArchitecture() -set(ALICEVISION_HAVE_SSE 0) -if (SSE2_FOUND OR TARGET_ARCHITECTURE STREQUAL "native") +set(ALICEVISION_HAVE_SSE 0 CACHE BOOL "SSE2 is available" FORCE) +if (USE_SSE2 OR TARGET_ARCHITECTURE STREQUAL "native") if (MSVC AND NOT ${CMAKE_CL_64}) add_definitions(/arch:SSE2) endif() - set(ALICEVISION_HAVE_SSE 1) + set(ALICEVISION_HAVE_SSE 1 CACHE BOOL "SSE2 is available" FORCE) +endif() +set(ALICEVISION_HAVE_AVX 0 CACHE BOOL "AVX is available" FORCE) +if(USE_AVX OR TARGET_ARCHITECTURE STREQUAL "native") + if (MSVC AND NOT ${CMAKE_CL_64}) + add_definitions(/arch:AVX) + endif() + set(ALICEVISION_HAVE_AVX 1 CACHE BOOL "AVX is available" FORCE) endif() if (UNIX) @@ -222,6 +229,7 @@ endif() # allocation feature with a separate flag, so use it if alignment is enabled in Eigen. # See https://eigen.tuxfamily.org/dox/group__TopicUnalignedArrayAssert.html if (AV_EIGEN_MEMORY_ALIGNMENT) + include(AddCompilerFlag) if (CMAKE_CXX_COMPILER_ID STREQUAL "GNU" AND CMAKE_CXX_COMPILER_VERSION VERSION_GREATER_EQUAL 7.1) AddCompilerFlag("-faligned-new") elseif (CMAKE_CXX_COMPILER_ID STREQUAL "Clang" AND CMAKE_CXX_COMPILER_VERSION VERSION_GREATER_EQUAL 6.0) diff --git a/src/cmake/OFA/AddCXXCompilerFlag.cmake b/src/cmake/OFA/AddCXXCompilerFlag.cmake new file mode 100644 index 0000000000..00e433535c --- /dev/null +++ b/src/cmake/OFA/AddCXXCompilerFlag.cmake @@ -0,0 +1,204 @@ +# Add a given compiler flag to flag variables. +# +# Usage: +# AddCXXCompilerFlag( +# [CODE ] +# [EXTRA_FLAGS ] +# [FLAGS ] +# [HEADERS ] +# [RESULT ] +# [TESTS ]) +# +# Input argument: +# flag to be added after succesful completion of all tests +# +# Optional input arguments: +# CODE variable holding the test code; this overrides the +# automatic generation of the test code +# EXTRA_FLAGS variable holding the list of extra compiler flags that +# are used without checks +# FLAGS variable holding the list of flags to which is +# added after succesful completion of all tests +# HEADERS variable holding the list of header files prepended to +# the C++ test code's main function +# TESTS variable holding the list of tests to be included in +# the C++ test code's main function body +# +# Output argument: +# RESULT variable holding the result of all tests + +#============================================================================= +# This code is largely inspired by +# +# AddCompilerFlag.cmake +# Copyright 2010-2015 Matthias Kretz +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: +# +# * Redistributions of source code must retain the above copyright notice, +# this list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# * Neither the names of contributing organizations nor the +# names of its contributors may be used to endorse or promote products +# derived from this software without specific prior written permission. +# +# and +# +# CheckCXXCompilerFlag.cmake +# Copyright 2006-2009 Kitware, Inc. +# Copyright 2006 Alexander Neundorf +# Copyright 2011-2013 Matthias Kretz +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: +# +# * Redistributions of source code must retain the above copyright notice, +# this list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# * The names of Kitware, Inc., the Insight Consortium, or the names of +# any consortium members, or of any contributors, may not be used to +# endorse or promote products derived from this software without +# specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER AND CONTRIBUTORS ``AS IS'' +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +# ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE FOR +# ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# ============================================================================= + +include(CheckIncludeFileCXX) +include(OFA/CheckCXXCompilerFlag) + +macro(AddCXXCompilerFlag _flag) + set(state 0) + unset(_code) + unset(_extra_flags) + unset(_flags) + unset(_headers) + unset(_result) + unset(_tests) + + foreach(_arg ${ARGN}) + if("x${_arg}" STREQUAL "xCODE") + set(state 1) + elseif("x${_arg}" STREQUAL "xEXTRA_FLAGS") + set(state 2) + elseif("x${_arg}" STREQUAL "xFLAGS") + set(state 3) + elseif("x${_arg}" STREQUAL "xHEADERS") + set(state 4) + elseif("x${_arg}" STREQUAL "xRESULT") + set(state 5) + elseif("x${_arg}" STREQUAL "xTESTS") + set(state 6) + + elseif(state EQUAL 1) + set(_code ${_arg}) + elseif(state EQUAL 2) + set(_extra_flags ${_arg}) + elseif(state EQUAL 3) + set(_flags ${_arg}) + elseif(state EQUAL 4) + set(_headers ${_arg}) + elseif(state EQUAL 5) + set(_result ${_arg}) + elseif(state EQUAL 6) + set(_tests ${_arg}) + else() + message(FATAL_ERROR "[OptimizeForArchitecture] The argument ${_arg} is not supported by AddCXXCompilerFlag") + endif() + endforeach() + + set(_check_include_file_cxx TRUE) + set(_check_cxx_source_compiles TRUE) + + # Check availability of header file(s) + foreach(_header ${_headers}) + set(_resultVar "HAVE_${_header}") + string(REGEX REPLACE "[-.+/:= ]" "_" _resultVar "${_resultVar}") + check_include_file_cxx(${_header} ${_resultVar} "${_flag}${_extra_flags}") + + if(NOT ${_resultVar}) + set(_check_include_file_cxx FALSE) + endif() + endforeach() + + # Check if compiler supports flag and can compile code + set(_cxx_code) + foreach(_header ${_headers}) + set(_cxx_code "${_cxx_code}\n#include<${_header}>") + endforeach() + + if(_code) + set(_cxx_code "${_cxx_code}\n${_code}") + elseif(_tests) + set(_cxx_code "${_cxx_code}\nint main() {") + foreach(_test ${_tests}) + set(_cxx_code "${_cxx_code}\n${_test}") + endforeach() + set(_cxx_code "${_cxx_code}\nreturn 0; }") + else() + set(_cxx_code "${_cxx_code}\nint main() { return 0; }") + endif() + + set(_CMAKE_REQUIRED_FLAGS "${CMAKE_REQUIRED_FLAGS}") + set(CMAKE_REQUIRED_FLAGS "${_flag}${_extra_flags}") + set(_resultVar "HAVE_${_flag}") + string(REGEX REPLACE "[-.+/:= ]" "_" _resultVar "${_resultVar}") + check_cxx_source_compiles("${_cxx_code}" ${_resultVar} + # Some compilers do not fail with a bad flag + FAIL_REGEX "error: bad value (.*) for .* switch" # GNU + FAIL_REGEX "argument unused during compilation" # clang + FAIL_REGEX "warning: the flag .* has been deprecated" # clang + FAIL_REGEX "is valid for .* but not for C\\\\+\\\\+" # GNU + FAIL_REGEX "unrecognized .*option" # GNU + FAIL_REGEX "ignored for target" # GNU + FAIL_REGEX "ignoring unknown option" # MSVC + FAIL_REGEX "warning D9002" # MSVC + FAIL_REGEX "[Uu]nknown option" # HP + FAIL_REGEX "[Ww]arning: [Oo]ption" # SunPro + FAIL_REGEX "[Ww]arning: illegal use of -xarch option" # SunPro + FAIL_REGEX "command option .* is not recognized" # XL + FAIL_REGEX "WARNING: unknown flag:" # Open64 + FAIL_REGEX "command line error" # ICC + FAIL_REGEX "command line warning" # ICC + FAIL_REGEX "#10236:" # ICC: File not found + FAIL_REGEX " #10159: " # ICC + FAIL_REGEX " #10353: " # ICC: option '-mfma' ignored, suggest using '-march=core-avx2' + FAIL_REGEX " #10006: " # ICC: ignoring unknown option '-mavx512fp16' + ) + set(CMAKE_REQUIRED_FLAGS "${_CMAKE_REQUIRED_FLAGS}") + + if(NOT ${_resultVar}) + set(_check_cxx_source_compiles FALSE) + endif() + + if (DEFINED _result) + if (${_check_include_file_cxx} AND ${_check_cxx_source_compiles}) + set(${_result} TRUE) + else() + set(${_result} FALSE) + endif() + endif() + + if(DEFINED _flags AND ${_check_include_file_cxx} AND ${_check_cxx_source_compiles}) + list(APPEND ${_flags} "${_flag}") + endif() +endmacro(AddCXXCompilerFlag) diff --git a/src/cmake/OFA/AutodetectArm.cmake b/src/cmake/OFA/AutodetectArm.cmake new file mode 100644 index 0000000000..786798a5c8 --- /dev/null +++ b/src/cmake/OFA/AutodetectArm.cmake @@ -0,0 +1,427 @@ +#============================================================================= +# Autodetection of ARM / ARM64 CPUs +# +# This is a two-step process: +# +# 1. Get the CPUID from the system by reading /proc/cpuconfig (on +# Linux), the system registry (on Windows), or executing an +# OS-specific command (macOS, BSD, SunOS, ...) +# +# 2. Determine the specific CPU from the CPUID +#============================================================================= + +macro(OFA_AutodetectArm) + set(_cpu_implementer) + set(_cpu_architecture) + set(_cpu_variant) + set(_cpu_part) + set(_cpu_revision) + + # Get CPUID from system + if(CMAKE_SYSTEM_NAME STREQUAL "Linux") + + # Linux + file(READ "/proc/cpuinfo" _cpuinfo) + string(REGEX REPLACE ".*CPU implementer[ \t]*:[ \t]+([a-zA-Z0-9_-]+).*" "\\1" _cpu_implementer "${_cpuinfo}") + string(REGEX REPLACE ".*CPU architecture[ \t]*:[ \t]+([a-zA-Z0-9_-]+).*" "\\1" _cpu_architecture "${_cpuinfo}") + string(REGEX REPLACE ".*CPU variant[ \t]*:[ \t]+([a-zA-Z0-9_-]+).*" "\\1" _cpu_variant "${_cpuinfo}") + string(REGEX REPLACE ".*CPU part[ \t]*:[ \t]+([a-zA-Z0-9_-]+).*" "\\1" _cpu_part "${_cpuinfo}") + string(REGEX REPLACE ".*CPU revision[ \t]*:[ \t]+([a-zA-Z0-9_-]+).*" "\\1" _cpu_revision "${_cpuinfo}") + string(REGEX REPLACE ".*Features[ \t]*:[ \t]+([^\n]+).*" "\\1" _cpu_flags "${_cpuinfo}") + + elseif(CMAKE_SYSTEM_NAME STREQUAL "Darwin") + + # macOS + execute_process(COMMAND /usr/sbin/sysctl -n hw.cputype hw.cputype hw.cpusubtype hw.cpufamily hw.cpusubfamily + OUTPUT_VARIABLE _sysctl_output_string RESULT_VARIABLE _error) + if(NOT _error) + string(REPLACE "\n" ";" _sysctl_output ${_sysctl_output_string}) + list(GET _sysctl_output 0 _cpu_implementer) + list(GET _sysctl_output 1 _cpu_architecture) + list(GET _sysctl_output 2 _cpu_variant) + list(GET _sysctl_output 3 _cpu_part) + list(GET _sysctl_output 4 _cpu_revision) + endif() + if(_error) + message(WARNING "[OptimizeForArchitecture] Auto-detection of optimization flags failed and will use the generic CPU settings.") + endif() + + else() + + # Try to retrieve CPUID directly + try_run(_exit _ok + ${CMAKE_CURRENT_BINARY_DIR} + ${CMAKE_SOURCE_DIR}/CMake/OFA/cpuinfo_arm.c + RUN_OUTPUT_VARIABLE _cpuinfo) + + if(_ok AND ${_exit} EQUAL 0) + string(REGEX REPLACE ".*implementer[ \t]*:[ \t]+([a-zA-Z0-9_-]+).*" "\\1" _cpu_implementer "${_cpuinfo}") + string(REGEX REPLACE ".*architecture[ \t]*:[ \t]+([a-zA-Z0-9_-]+).*" "\\1" _cpu_architecture "${_cpuinfo}") + string(REGEX REPLACE ".*variant[ \t]*:[ \t]+([a-zA-Z0-9_-]+).*" "\\1" _cpu_variant "${_cpuinfo}") + string(REGEX REPLACE ".*part[ \t]*:[ \t]+([a-zA-Z0-9_-]+).*" "\\1" _cpu_part "${_cpuinfo}") + string(REGEX REPLACE ".*revision[ \t]*:[ \t]+([^\n]+).*" "\\1" _cpu_revision "${_cpuinfo}") + + else() + + message(FATAL_ERROR "[OptimizeForArchitecture] OptimizeForArchitecture.cmake does not implement support for CMAKE_SYSTEM_NAME: ${CMAKE_SYSTEM_NAME}") + endif() + endif() + + # Determine CPU from CPUID + # Taken from https://github.com/karelzak/util-linux/blob/master/sys-utils/lscpu-arm.c + # and https://gcc.gnu.org/onlinedocs/gcc/ARM-Options.html + + # ARM + if(_cpu_implementer STREQUAL "0x41") + if(_cpu_part STREQUAL "0x810") + set(TARGET_ARCHITECTURE "arm810") + elseif(_cpu_part STREQUAL "0x920") + set(TARGET_ARCHITECTURE "arm920t") + elseif(_cpu_part STREQUAL "0x922") + set(TARGET_ARCHITECTURE "arm922t") + elseif(_cpu_part STREQUAL "0x926") + set(TARGET_ARCHITECTURE "arm926ej-s") + elseif(_cpu_part STREQUAL "0x940") + set(TARGET_ARCHITECTURE "arm940t") + elseif(_cpu_part STREQUAL "0x946") + set(TARGET_ARCHITECTURE "arm946e-s") + elseif(_cpu_part STREQUAL "0x966") + set(TARGET_ARCHITECTURE "arm966e-s") + elseif(_cpu_part STREQUAL "0xa20") + set(TARGET_ARCHITECTURE "arm1020e") + elseif(_cpu_part STREQUAL "0xa22") + set(TARGET_ARCHITECTURE "arm1022e") + elseif(_cpu_part STREQUAL "0xa26") + set(TARGET_ARCHITECTURE "arm1026ej-s") + elseif(_cpu_part STREQUAL "0xb02") + set(TARGET_ARCHITECTURE "mpcore") + elseif(_cpu_part STREQUAL "0xb36") + set(TARGET_ARCHITECTURE "arm1136jf-s") + elseif(_cpu_part STREQUAL "0xb56") + set(TARGET_ARCHITECTURE "arm1156t2f-s") + elseif(_cpu_part STREQUAL "0xb76") + set(TARGET_ARCHITECTURE "arm1176jzf-s") + elseif(_cpu_part STREQUAL "0xc05") + set(TARGET_ARCHITECTURE "cortex-a5") + elseif(_cpu_part STREQUAL "0xc07") + set(TARGET_ARCHITECTURE "cortex-a7") + elseif(_cpu_part STREQUAL "0xc08") + set(TARGET_ARCHITECTURE "cortex-a8") + elseif(_cpu_part STREQUAL "0xc09") + set(TARGET_ARCHITECTURE "cortex-a9") + elseif(_cpu_part STREQUAL "0xc0d") + set(TARGET_ARCHITECTURE "cortex-a12") + elseif(_cpu_part STREQUAL "0xc0f") + set(TARGET_ARCHITECTURE "cortex-a15") + elseif(_cpu_part STREQUAL "0xc0e") + set(TARGET_ARCHITECTURE "cortex-a17") + elseif(_cpu_part STREQUAL "0xc14") + set(TARGET_ARCHITECTURE "cortex-r4f") + elseif(_cpu_part STREQUAL "0xc15") + set(TARGET_ARCHITECTURE "cortex-r5") + elseif(_cpu_part STREQUAL "0xc17") + set(TARGET_ARCHITECTURE "cortex-r7") + elseif(_cpu_part STREQUAL "0xc18") + set(TARGET_ARCHITECTURE "cortex-r8") + elseif(_cpu_part STREQUAL "0xc20") + set(TARGET_ARCHITECTURE "cortex-m0") + elseif(_cpu_part STREQUAL "0xc21") + set(TARGET_ARCHITECTURE "cortex-m1") + elseif(_cpu_part STREQUAL "0xc23") + set(TARGET_ARCHITECTURE "cortex-m3") + elseif(_cpu_part STREQUAL "0xc24") + set(TARGET_ARCHITECTURE "cortex-m4") + elseif(_cpu_part STREQUAL "0xc27") + set(TARGET_ARCHITECTURE "cortex-m7") + elseif(_cpu_part STREQUAL "0xc60") + set(TARGET_ARCHITECTURE "cortex-m0plus") + elseif(_cpu_part STREQUAL "0xd01") + set(TARGET_ARCHITECTURE "cortex-a32") + elseif(_cpu_part STREQUAL "0xd02") + set(TARGET_ARCHITECTURE "cortex-a34") + elseif(_cpu_part STREQUAL "0xd03") + set(TARGET_ARCHITECTURE "cortex-a53") + elseif(_cpu_part STREQUAL "0xd04") + set(TARGET_ARCHITECTURE "cortex-a35") + elseif(_cpu_part STREQUAL "0xd05") + set(TARGET_ARCHITECTURE "cortex-a55") + elseif(_cpu_part STREQUAL "0xd07") + set(TARGET_ARCHITECTURE "cortex-a57") + elseif(_cpu_part STREQUAL "0xd08") + set(TARGET_ARCHITECTURE "cortex-a72") + elseif(_cpu_part STREQUAL "0xd09") + set(TARGET_ARCHITECTURE "cortex-a73") + elseif(_cpu_part STREQUAL "0xd0a") + set(TARGET_ARCHITECTURE "cortex-a75") + elseif(_cpu_part STREQUAL "0xd0b") + set(TARGET_ARCHITECTURE "cortex-a76") + elseif(_cpu_part STREQUAL "0xd0c") + set(TARGET_ARCHITECTURE "neoverse-n1") + elseif(_cpu_part STREQUAL "0xd0d") + set(TARGET_ARCHITECTURE "cortex-a77") + elseif(_cpu_part STREQUAL "0xd0e") + set(TARGET_ARCHITECTURE "cortex-a76ae") + elseif(_cpu_part STREQUAL "0xd13") + set(TARGET_ARCHITECTURE "cortex-r52") + elseif(_cpu_part STREQUAL "0xd20") + set(TARGET_ARCHITECTURE "cortex-m23") + elseif(_cpu_part STREQUAL "0xd21") + set(TARGET_ARCHITECTURE "cortex-m33") + elseif(_cpu_part STREQUAL "0xd40") + set(TARGET_ARCHITECTURE "neoverse-v1") + elseif(_cpu_part STREQUAL "0xd41") + set(TARGET_ARCHITECTURE "cortex-a78") + elseif(_cpu_part STREQUAL "0xd42") + set(TARGET_ARCHITECTURE "cortex-a78ae") + elseif(_cpu_part STREQUAL "0xd44") + set(TARGET_ARCHITECTURE "cortex-x1") + elseif(_cpu_part STREQUAL "0xd46") + set(TARGET_ARCHITECTURE "cortex-a510") + elseif(_cpu_part STREQUAL "0xd47") + set(TARGET_ARCHITECTURE "cortex-a710") + elseif(_cpu_part STREQUAL "0xd48") + set(TARGET_ARCHITECTURE "cortex-x2") + elseif(_cpu_part STREQUAL "0xd49") + set(TARGET_ARCHITECTURE "neoverse-n2") + elseif(_cpu_part STREQUAL "0xd4a") + set(TARGET_ARCHITECTURE "neoverse-e1") + elseif(_cpu_part STREQUAL "0xd4b") + set(TARGET_ARCHITECTURE "cortex-a78c") + endif() + + # Broadcom + elseif(_cpu_implementer STREQUAL "0x42") + if(_cpu_part STREQUAL "0x0f") + set(TARGET_ARCHITECTURE "brahma-b15") + elseif(_cpu_part STREQUAL "0x100") + set(TARGET_ARCHITECTURE "brahma-b53") + elseif(_cpu_part STREQUAL "0x516") + set(TARGET_ARCHITECTURE "thunderx2") + endif() + + # Cavium + elseif(_cpu_implementer STREQUAL "0x43") + if(_cpu_part STREQUAL "0x0a0") + set(TARGET_ARCHITECTURE "thunderx") + elseif(_cpu_part STREQUAL "0x0a1") + set(TARGET_ARCHITECTURE "thunderxt88") + elseif(_cpu_part STREQUAL "0x0a2") + set(TARGET_ARCHITECTURE "thunderxt81") + elseif(_cpu_part STREQUAL "0x0a3") + set(TARGET_ARCHITECTURE "thunderxt83") + elseif(_cpu_part STREQUAL "0x0af") + set(TARGET_ARCHITECTURE "thunderx2t99") + endif() + + # DEC + elseif(_cpu_implementer STREQUAL "0x44") + if(_cpu_part STREQUAL "0xa10") + set(TARGET_ARCHITECTURE "strongarm110") + elseif(_cpu_part STREQUAL "0xa11") + set(TARGET_ARCHITECTURE "strongarm1100") + endif() + + # FUJITSU + elseif(_cpu_implementer STREQUAL "0x46") + if(_cpu_part STREQUAL "0x001") + set(TARGET_ARCHITECTURE "a64fx") + endif() + + # HiSilicon + elseif(_cpu_implementer STREQUAL "0x48") + if(_cpu_part STREQUAL "0xd01") + set(TARGET_ARCHITECTURE "tsv110") + endif() + + # Infineon + elseif(_cpu_implementer STREQUAL "0x49") + + # Motorola/Freescale + elseif(_cpu_implementer STREQUAL "0x4d") + + # Nvidia + elseif(_cpu_implementer STREQUAL "0x4e") + if(_cpu_part STREQUAL "0x000") + set(TARGET_ARCHITECTURE "denver") + elseif(_cpu_part STREQUAL "0x003") + set(TARGET_ARCHITECTURE "denver2") + elseif(_cpu_part STREQUAL "0x004") + set(TARGET_ARCHITECTURE "carmel") + endif() + + # APM + elseif(_cpu_implementer STREQUAL "0x50") + if(_cpu_part STREQUAL "0x000") + set(TARGET_ARCHITECTURE "xgene1") + endif() + + # Qualcomm + elseif(_cpu_implementer STREQUAL "0x51") + if(_cpu_part STREQUAL "0x00f") + set(TARGET_ARCHITECTURE "scorpion") + elseif(_cpu_part STREQUAL "0x02d") + set(TARGET_ARCHITECTURE "scorpion") + elseif(_cpu_part STREQUAL "0x04d") + set(TARGET_ARCHITECTURE "krait") + elseif(_cpu_part STREQUAL "0x06f") + set(TARGET_ARCHITECTURE "krait") + elseif(_cpu_part STREQUAL "0x201") + set(TARGET_ARCHITECTURE "kryo") + elseif(_cpu_part STREQUAL "0x205") + set(TARGET_ARCHITECTURE "kryo") + elseif(_cpu_part STREQUAL "0x211") + set(TARGET_ARCHITECTURE "kryo") + elseif(_cpu_part STREQUAL "0x800") + set(TARGET_ARCHITECTURE "falkor") + elseif(_cpu_part STREQUAL "0x801") + set(TARGET_ARCHITECTURE "kryo2") + elseif(_cpu_part STREQUAL "0xc00") + set(TARGET_ARCHITECTURE "falkor") + elseif(_cpu_part STREQUAL "0xc01") + set(TARGET_ARCHITECTURE "saphira") + endif() + + # Samsung + elseif(_cpu_implementer STREQUAL "0x53") + if(_cpu_part STREQUAL "0x001") + set(TARGET_ARCHITECTURE "exynos-m1") + endif() + + # Marvell + elseif(_cpu_implementer STREQUAL "0x56") + if(_cpu_part STREQUAL "0x131") + set(TARGET_ARCHITECTURE "marvell-f") + elseif(_cpu_part STREQUAL "0x581") + set(TARGET_ARCHITECTURE "marvell-pj4") + elseif(_cpu_part STREQUAL "0x584") + set(TARGET_ARCHITECTURE "marvell-pj4") + endif() + + # Apple + elseif(_cpu_implementer STREQUAL "0x61") + if(_cpu_part STREQUAL "0x022") + set(TARGET_ARCHITECTURE "icestorm") + elseif(_cpu_part STREQUAL "0x023") + set(TARGET_ARCHITECTURE "firestorm") + endif() + + # Faraday + elseif(_cpu_implementer STREQUAL "0x66") + if(_cpu_part STREQUAL "0x526") + set(TARGET_ARCHITECTURE "fa526") + elseif(_cpu_part STREQUAL "0x626") + set(TARGET_ARCHITECTURE "fa626") + endif() + + # Intel + elseif(_cpu_implementer STREQUAL "0x69") + if(_cpu_part STREQUAL "0x200") + set(TARGET_ARCHITECTURE "i80200") + elseif(_cpu_part STREQUAL "0x210") + set(TARGET_ARCHITECTURE "pxa250a") + elseif(_cpu_part STREQUAL "0x212") + set(TARGET_ARCHITECTURE "pxa210a") + elseif(_cpu_part STREQUAL "0x242") + set(TARGET_ARCHITECTURE "i80321-400") + elseif(_cpu_part STREQUAL "0x243") + set(TARGET_ARCHITECTURE "i80321-600") + elseif(_cpu_part STREQUAL "0x290") + set(TARGET_ARCHITECTURE "pxa250b") + elseif(_cpu_part STREQUAL "0x292") + set(TARGET_ARCHITECTURE "pxa210b") + elseif(_cpu_part STREQUAL "0x2c2") + set(TARGET_ARCHITECTURE "i80321-400-b0") + elseif(_cpu_part STREQUAL "0x2c3") + set(TARGET_ARCHITECTURE "i80321-600-b0") + elseif(_cpu_part STREQUAL "0x2d0") + set(TARGET_ARCHITECTURE "pxa250c") + elseif(_cpu_part STREQUAL "0x2d2") + set(TARGET_ARCHITECTURE "pxa210c") + elseif(_cpu_part STREQUAL "0x411") + set(TARGET_ARCHITECTURE "pxa27x") + elseif(_cpu_part STREQUAL "0x41c") + set(TARGET_ARCHITECTURE "ipx425-533") + elseif(_cpu_part STREQUAL "0x41d") + set(TARGET_ARCHITECTURE "ipx425-400") + elseif(_cpu_part STREQUAL "0x41f") + set(TARGET_ARCHITECTURE "ipx425-266") + elseif(_cpu_part STREQUAL "0x682") + set(TARGET_ARCHITECTURE "pxa32x") + elseif(_cpu_part STREQUAL "0x683") + set(TARGET_ARCHITECTURE "pxa930") + elseif(_cpu_part STREQUAL "0x688") + set(TARGET_ARCHITECTURE "pxa30x") + elseif(_cpu_part STREQUAL "0x689") + set(TARGET_ARCHITECTURE "pxa31x") + elseif(_cpu_part STREQUAL "0xb11") + set(TARGET_ARCHITECTURE "sa1110") + elseif(_cpu_part STREQUAL "0xc12") + set(TARGET_ARCHITECTURE "ipx1200") + endif() + + # Phytium + elseif(_cpu_implementer STREQUAL "0x70") + if(_cpu_part STREQUAL "0x662") + set(TARGET_ARCHITECTURE "ftc662") + elseif(_cpu_part STREQUAL "0x663") + set(TARGET_ARCHITECTURE "ftc663") + endif() + + # Ampere + elseif(_cpu_implementer STREQUAL "0xc0") + + # Taken from /Library/Developer/CommandLineTools/SDKs/MacOSX12.sdk/System/Library/Frameworks/Kernel.framework/Versions/A/Headers/mach/machine.h + elseif(_cpu_implementer STREQUAL "16777228" OR _cpu_implementer STREQUAL "0x100000C") # Apple ARM64 + if( _cpu_part STREQUAL "0x1e2d6381" OR _cpu_part STREQUAL "506291073") # Swift (A6) + set(TARGET_ARCHITECTURE "apple-a6") + elseif(_cpu_part STREQUAL "0x37a09642" OR _cpu_part STREQUAL "933271106") # Cyclone (A7) + set(TARGET_ARCHITECTURE "apple-a7") + elseif(_cpu_part STREQUAL "0x2c91a47e" OR _cpu_part STREQUAL "747742334") # Typhoon (A8) + set(TARGET_ARCHITECTURE "apple-a8") + elseif(_cpu_part STREQUAL "0x92fb37c8" OR _cpu_part STREQUAL "2465937352") # Twister (A9) + set(TARGET_ARCHITECTURE "apple-a9") + elseif(_cpu_part STREQUAL "0x67ceee93" OR _cpu_part STREQUAL "1741614739") # Hurrican (A10) + set(TARGET_ARCHITECTURE "apple-a10") + elseif(_cpu_part STREQUAL "0xe81e7ef6" OR _cpu_part STREQUAL "3894312694") # Monsoon Mistral (A11) + set(TARGET_ARCHITECTURE "apple-a11") + elseif(_cpu_part STREQUAL "0x07d34b9f" OR _cpu_part STREQUAL "131287967") # Vortex Tempest (A12) + set(TARGET_ARCHITECTURE "apple-a12") + elseif(_cpu_part STREQUAL "0x462504d2" OR _cpu_part STREQUAL "1176831186") # Lightning Thunder (A13) + set(TARGET_ARCHITECTURE "apple-a13") + elseif(_cpu_part STREQUAL "0x1b588bb3" OR _cpu_part STREQUAL "458787763") # Firestorm Icestorm (A14 / M1 / M1 Pro / M1 Max / M1 Ultra) + set(TARGET_ARCHITECTURE "apple-m1") + elseif(_cpu_part STREQUAL "0xda33d83d" OR _cpu_part STREQUAL "3660830781") # Blizzard Avalanche (A15 / M2 / M2 Pro / M2 Max) + set(TARGET_ARCHITECTURE "apple-m2") + elseif(_cpu_part STREQUAL "0x8765edea" OR _cpu_part STREQUAL "2271604202") # Everest Sawtooth (A16) + set(TARGET_ARCHITECTURE "apple-a16") + elseif(_cpu_part STREQUAL "0x2876f5b5" OR _cpu_part STREQUAL "678884789") # Coll (A17) + set(TARGET_ARCHITECTURE "apple-a17") + elseif(_cpu_part STREQUAL "0x204526d0" OR _cpu_part STREQUAL "541402832") # Tupai (A18) + set(TARGET_ARCHITECTURE "apple-a18") + elseif(_cpu_part STREQUAL "0x75d4acb9" OR _cpu_part STREQUAL "1976872121") # Tahiti (A18 Pro) + set(TARGET_ARCHITECTURE "apple-a18") + elseif(_cpu_part STREQUAL "0xfa33415e" OR _cpu_part STREQUAL "4197663070") # Ibiza (M3) + set(TARGET_ARCHITECTURE "apple-m3") + elseif(_cpu_part STREQUAL "0x72015832" OR _cpu_part STREQUAL "1912690738") # Palma (M3 Pro) + set(TARGET_ARCHITECTURE "apple-m3") + elseif(_cpu_part STREQUAL "0x5f4dea93" OR _cpu_part STREQUAL "1598941843") # Lobos (M3 Max) + set(TARGET_ARCHITECTURE "apple-m3") + elseif(_cpu_part STREQUAL "0x6f5129ac" OR _cpu_part STREQUAL "1867590060") # Donan (M4) + set(TARGET_ARCHITECTURE "apple-m4") + elseif(_cpu_part STREQUAL "0x17d5b93a" OR _cpu_part STREQUAL "399882554") # Brava (M4 Pro) + set(TARGET_ARCHITECTURE "apple-m4") + endif() + + else() + message(WARNING "[OptimizeForArchitecture] Auto-detection of optimization flags failed and will use the generic CPU settings.") + return() + endif() + + if(OFA_VERBOSE) + message(STATUS "[OptimizeForArchitecture] CPU implementer: ${_cpu_implementer}") + message(STATUS "[OptimizeForArchitecture] CPU architecture: ${_cpu_architecture}") + message(STATUS "[OptimizeForArchitecture] CPU variant: ${_cpu_variant}") + message(STATUS "[OptimizeForArchitecture] CPU part: ${_cpu_part}") + message(STATUS "[OptimizeForArchitecture] CPU revision: ${_cpu_revision}") + endif() +endmacro(OFA_AutodetectArm) diff --git a/src/cmake/OFA/AutodetectPpc.cmake b/src/cmake/OFA/AutodetectPpc.cmake new file mode 100644 index 0000000000..70c3a70685 --- /dev/null +++ b/src/cmake/OFA/AutodetectPpc.cmake @@ -0,0 +1,57 @@ +#============================================================================= +# Autodetection of PPC / PPC64 CPUs +# +# This is a two-step process: +# +# 1. Get the CPUID from the system by reading /proc/cpuconfig (on +# Linux), the system registry (on Windows), or executing an +# OS-specific command (macOS, BSD, SunOS, ...) +# +# 2. Determine the specific CPU from the CPUID +#============================================================================= + +macro(OFA_AutodetectPpc) + set(_cpu) + + if(CMAKE_SYSTEM_NAME STREQUAL "Linux") + + # Linux + file(READ "/proc/cpuinfo" _cpuinfo) + string(REGEX REPLACE ".*cpu[ \t]*:[ \t]+([a-zA-Z0-9_-]+).*" "\\1" _cpu "${_cpuinfo}") + if(_cpu STREQUAL "POWER3") + set(TARGET_ARCHITECTURE "power3") + elseif(_cpu STREQUAL "POWER4") + set(TARGET_ARCHITECTURE "power4") + elseif(_cpu STREQUAL "POWER5") + set(TARGET_ARCHITECTURE "power5") + elseif(_cpu STREQUAL "POWER5+") + set(TARGET_ARCHITECTURE "power5+") + elseif(_cpu STREQUAL "POWER6") + set(TARGET_ARCHITECTURE "power6") + elseif(_cpu STREQUAL "POWER6X") + set(TARGET_ARCHITECTURE "power6x") + elseif(_cpu STREQUAL "POWER7") + set(TARGET_ARCHITECTURE "power7") + elseif(_cpu STREQUAL "POWER8" OR _cpu STREQUAL "POWER8NVL") + set(TARGET_ARCHITECTURE "power8") + elseif(_cpu STREQUAL "POWER9" OR _cpu STREQUAL "POWER9NVL") + set(TARGET_ARCHITECTURE "power9") + elseif(_cpu STREQUAL "POWER10" OR _cpu STREQUAL "POWER10NVL") + set(TARGET_ARCHITECTURE "power10") + else() + message(WARNING "[OptimizeForArchitecture] Auto-detection of optimization flags failed and will use the generic CPU settings.") + endif() + + # TODO: AIX, FreeBSD, ... + + else() + + message(FATAL_ERROR "[OptimizeForArchitecture] OptimizeForArchitecture.cmake does not implement support for CMAKE_SYSTEM_NAME: ${CMAKE_SYSTEM_NAME}") + return() + + endif() + + if(OFA_VERBOSE) + message(STATUS "[OptimizeForArchitecture] CPU: ${_cpu}") + endif() +endmacro(OFA_AutodetectPpc) diff --git a/src/cmake/OFA/AutodetectX86.cmake b/src/cmake/OFA/AutodetectX86.cmake new file mode 100644 index 0000000000..337207bc32 --- /dev/null +++ b/src/cmake/OFA/AutodetectX86.cmake @@ -0,0 +1,461 @@ +#============================================================================= +# Autodetection of X86 / X86_64 CPUs +# +# This is a two-step process: +# +# 1. Get the CPUID from the system by reading /proc/cpuconfig (on +# Linux), the system registry (on Windows), or executing an +# OS-specific command (macOS, BSD, SunOS, ...) +# +# 2. Determine the specific CPU from the CPUID +#============================================================================= + +macro(OFA_AutodetectX86) + set(_vendor_id) + set(_cpu_family) + set(_cpu_model) + set(_cpu_stepping) + + # Get CPUID from system + if(CMAKE_SYSTEM_NAME STREQUAL "Linux") + + # Linux + file(READ "/proc/cpuinfo" _cpuinfo) + string(REGEX REPLACE ".*vendor_id[ \t]*:[ \t]+([a-zA-Z0-9_-]+).*" "\\1" _vendor_id "${_cpuinfo}") + string(REGEX REPLACE ".*cpu family[ \t]*:[ \t]+([a-zA-Z0-9_-]+).*" "\\1" _cpu_family "${_cpuinfo}") + string(REGEX REPLACE ".*model[ \t]*:[ \t]+([a-zA-Z0-9_-]+).*" "\\1" _cpu_model "${_cpuinfo}") + string(REGEX REPLACE ".*stepping[ \t]*:[ \t]+([a-zA-Z0-9_-]+).*" "\\1" _cpu_stepping "${_cpuinfo}") + string(REGEX REPLACE ".*flags[ \t]*:[ \t]+([^\n]+).*" "\\1" _cpu_flags "${_cpuinfo}") + + elseif(CMAKE_SYSTEM_NAME STREQUAL "Darwin") + + # macOS + execute_process(COMMAND /usr/sbin/sysctl -n machdep.cpu.vendor machdep.cpu.family machdep.cpu.model machdep.cpu.stepping machdep.cpu.features + OUTPUT_VARIABLE _sysctl_output_string RESULT_VARIABLE _error) + if(NOT _error) + string(REPLACE "\n" ";" _sysctl_output ${_sysctl_output_string}) + list(GET _sysctl_output 0 _vendor_id) + list(GET _sysctl_output 1 _cpu_family) + list(GET _sysctl_output 2 _cpu_model) + list(GET _sysctl_output 3 _cpu_stepping) + list(GET _sysctl_output 4 _cpu_flags) + string(TOLOWER "${_cpu_flags}" _cpu_flags) + string(REPLACE "." "_" _cpu_flags "${_cpu_flags}") + else() + # Apple Silicon (ARM64) running in Rosetta 2 mode + # + # The regular detection mechanism for macOS-x64_86 does not work + # because the emulated CPU does not provide the required + # information via the sysctl command. We therefore generate fake + # vendor, model, and stepping information based on the + # macOS-specific CPU codes. + execute_process(COMMAND /usr/sbin/sysctl -n hw.cputype machdep.cpu.family hw.cpufamily machdep.cpu.features + OUTPUT_VARIABLE _sysctl_output_string RESULT_VARIABLE _error) + if(NOT _error) + string(REPLACE "\n" ";" _sysctl_output ${_sysctl_output_string}) + list(GET _sysctl_output 0 _cpu_implementer) + list(GET _sysctl_output 1 _cpu_family) + list(GET _sysctl_output 2 _cpu_model) + list(GET _sysctl_output 3 _cpu_flags) + string(TOLOWER "${_cpu_flags}" _cpu_flags) + string(REPLACE "." "_" _cpu_flags "${_cpu_flags}") + + # Fake vendor + if(_cpu_implementer STREQUAL "0x7" OR _cpu_implementer STREQUAL "7") + set(_vendor_id "GenuineIntel") + else() + set(_vendor_id "Unknown") + endif() + + # Fake stepping + set(_cpu_stepping "Unknown") + + # Fake model + # Taken from /Library/Developer/CommandLineTools/SDKs/MacOSX12.sdk/System/Library/Frameworks/Kernel.framework/Versions/A/Headers/mach/machine.h + if( _cpu_model STREQUAL "0x78ea4fbc" OR _cpu_model STREQUAL "2028621756") # Penryn + set(_cpu_model "23") + elseif(_cpu_model STREQUAL "0x6b5a4cd2" OR _cpu_model STREQUAL "1801080018") # Nehalem + set(_cpu_model "26") + elseif(_cpu_model STREQUAL "0x573b5eec" OR _cpu_model STREQUAL "1463508716") # Westmere + set(_cpu_model "37") + elseif(_cpu_model STREQUAL "0x5490b78c" OR _cpu_model STREQUAL "1418770316") # Sandybridge + set(_cpu_model "42") + elseif(_cpu_model STREQUAL "0x1f65e835" OR _cpu_model STREQUAL "526772277") # Ivybridge + set(_cpu_model "58") + elseif(_cpu_model STREQUAL "0x10b282dc" OR _cpu_model STREQUAL "280134364") # Haswell + set(_cpu_model "60") + elseif(_cpu_model STREQUAL "0x582ed09c" OR _cpu_model STREQUAL "1479463068") # Broadwell + set(_cpu_model "61") + elseif(_cpu_model STREQUAL "0x37fc219f" OR _cpu_model STREQUAL "939270559") # Skylake + set(_cpu_model "78") + elseif(_cpu_model STREQUAL "0x0f817246" OR _cpu_model STREQUAL "260141638") # Kabylake + set(_cpu_model "142") + elseif(_cpu_model STREQUAL "0x38435547" OR _cpu_model STREQUAL "943936839") # Icelake + set(_cpu_model "125") + elseif(_cpu_model STREQUAL "0x1cf8a03e" OR _cpu_model STREQUAL "486055998") # Cometlake + set(_cpu_model "142") + else() + set(_cpu_model "Unknown") + endif() + endif() + endif() + if(_error) + message(FATAL_ERROR "[OptimizeForArchitecture] OptimizeForArchitecture.cmake does not implement support for CMAKE_SYSTEM_PROCESSOR: ${CMAKE_SYSTEM_PROCESSOR}") + endif() + + elseif(CMAKE_SYSTEM_NAME STREQUAL "Windows") + + # Windows + get_filename_component(_vendor_id "[HKEY_LOCAL_MACHINE\\Hardware\\Description\\System\\CentralProcessor\\0;VendorIdentifier]" NAME CACHE) + get_filename_component(_cpu_id "[HKEY_LOCAL_MACHINE\\Hardware\\Description\\System\\CentralProcessor\\0;Identifier]" NAME CACHE) + mark_as_advanced(_vendor_id _cpu_id) + string(REGEX REPLACE ".* Family ([0-9]+) .*" "\\1" _cpu_family "${_cpu_id}") + string(REGEX REPLACE ".* Model ([0-9]+) .*" "\\1" _cpu_model "${_cpu_id}") + string(REGEX REPLACE ".* Stepping ([0-9]+) .*" "\\1" _cpu_mstepping "${_cpu_id}") + + else() + + # Try to retrieve CPUID directly + try_run(_exit _ok + ${CMAKE_CURRENT_BINARY_DIR} + ${CMAKE_SOURCE_DIR}/CMake/OFA/cpuinfo_x86.cxx + RUN_OUTPUT_VARIABLE _cpuinfo) + + if(_ok AND ${_exit} EQUAL 0) + string(REGEX REPLACE ".*vendor_id[ \t]*:[ \t]+([a-zA-Z0-9_-]+).*" "\\1" _vendor_id "${_cpuinfo}") + string(REGEX REPLACE ".*cpu family[ \t]*:[ \t]+([a-zA-Z0-9_-]+).*" "\\1" _cpu_family "${_cpuinfo}") + string(REGEX REPLACE ".*model[ \t]*:[ \t]+([a-zA-Z0-9_-]+).*" "\\1" _cpu_model "${_cpuinfo}") + string(REGEX REPLACE ".*stepping[ \t]*:[ \t]+([a-zA-Z0-9_-]+).*" "\\1" _cpu_stepping "${_cpuinfo}") + string(REGEX REPLACE ".*flags[ \t]*:[ \t]+([^\n]+).*" "\\1" _cpu_flags "${_cpuinfo}") + + else() + + message(FATAL_ERROR "[OptimizeForArchitecture] OptimizeForArchitecture.cmake does not implement support for CMAKE_SYSTEM_NAME: ${CMAKE_SYSTEM_NAME}") + endif() + endif() + + # Determine CPU from CPUID + if(_vendor_id STREQUAL "GenuineIntel") + if(_cpu_family EQUAL 19) + + # MIC architecture + if(_cpu_model EQUAL 1) + set(TARGET_ARCHITECTURE "diamondrapids") + elseif(_cpu_model EQUAL 0) + set(TARGET_ARCHITECTURE "novalake") + + else() + message(WARNING + " [OptimizeForArchitecture] Your CPU is not known.\n" + " \tAuto-detection of optimization flags failed and will use the 65nm Core 2 CPU settings.\n" + " \tPlease send an email to gismo@inria.fr with the following content so that we can update the OFA script:\n" + " \tVendor id: ${_vendor_id}\n" + " \tCPU family: ${_cpu_family}\n" + " \tCPU mode: ${_cpu_model}\n" + " \tCPU stepping: ${_cpu_stepping}\n" + " \tCPU flags: ${_cpu_flags}") + set(TARGET_ARCHITECTURE "merom") + endif() + + elseif(_cpu_family EQUAL 6) + # taken from the Intel ORM + # http://www.intel.com/content/www/us/en/processors/architectures-software-developer-manuals.html + # CPUID Signature Values Of Recent Intel Microarchitectures + # 4E 5E | Skylake microarchitecture + # 3D 47 56 | Broadwell microarchitecture + # 3C 45 46 3F | Haswell microarchitecture + # 3A 3E | Ivy Bridge microarchitecture + # 2A 2D | Sandy Bridge microarchitecture + # 25 2C 2F | Intel microarchitecture Westmere + # 1A 1E 1F 2E | Intel microarchitecture Nehalem + # 17 1D | Enhanced Intel Core microarchitecture + # 0F | Intel Core microarchitecture + # + # Intel SDM Vol. 3C 35-1 / December 2016: + # 57 | Xeon Phi 3200, 5200, 7200 [Knights Landing] + # 85 | Future Xeon Phi + # 8E 9E | 7th gen. Core [Kaby Lake] + # 55 | Future Xeon [Skylake w/ AVX512] + # 4E 5E | 6th gen. Core / E3 v5 [Skylake w/o AVX512] + # 56 | Xeon D-1500 [Broadwell] + # 4F | Xeon E5 v4, E7 v4, i7-69xx [Broadwell] + # 47 | 5th gen. Core / Xeon E3 v4 [Broadwell] + # 3D | M-5xxx / 5th gen. [Broadwell] + # 3F | Xeon E5 v3, E7 v3, i7-59xx [Haswell-E] + # 3C 45 46 | 4th gen. Core, Xeon E3 v3 [Haswell] + # 3E | Xeon E5 v2, E7 v2, i7-49xx [Ivy Bridge-E] + # 3A | 3rd gen. Core, Xeon E3 v2 [Ivy Bridge] + # 2D | Xeon E5, i7-39xx [Sandy Bridge] + # 2F | Xeon E7 + # 2A | Xeon E3, 2nd gen. Core [Sandy Bridge] + # 2E | Xeon 7500, 6500 series + # 25 2C | Xeon 3600, 5600 series, Core i7, i5 and i3 + # + # Values from the Intel SDE: + # 5C | Goldmont + # 5A | Silvermont + # 57 | Knights Landing + # 66 | Cannonlake + # 55 | Skylake Server + # 4E | Skylake Client + # 3C | Broadwell (likely a bug in the SDE) + # 3C | Haswell + # + # Latest updates taken from https://en.wikichip.org/wiki/intel/cpuid + + # MIC architecture + if(_cpu_model EQUAL 133) + set(TARGET_ARCHITECTURE "knm") # Knights Mill + + elseif(_cpu_model EQUAL 87) + set(TARGET_ARCHITECTURE "knl") # Knights Landing + + # Small cores + elseif(_cpu_model EQUAL 138 OR _cpu_model EQUAL 150 OR _cpu_model EQUAL 156) + set(TARGET_ARCHITECTURE "tremont") + + elseif(_cpu_model EQUAL 122) + set(TARGET_ARCHITECTURE "goldmont-plus") + + elseif(_cpu_model EQUAL 92 OR _cpu_model EQUAL 95) + set(TARGET_ARCHITECTURE "goldmont") + + elseif(_cpu_model EQUAL 55 OR _cpu_model EQUAL 74 OR _cpu_model EQUAL 76 OR _cpu_model EQUAL 77 OR _cpu_model EQUAL 90 OR _cpu_model EQUAL 93) + set(TARGET_ARCHITECTURE "silvermont") + + elseif(_cpu_model EQUAL 28 OR _cpu_model EQUAL 38 OR _cpu_model EQUAL 39 OR _cpu_model EQUAL 53 OR _cpu_model EQUAL 54) + set(TARGET_ARCHITECTURE "bonnell") + + # Big cores (server) + elseif(_cpu_model EQUAL 221) + set(TARGET_ARCHITECTURE "clearwaterforest") + + elseif(_cpu_model EQUAL 175) + set(TARGET_ARCHITECTURE "sierraforest") + + elseif(_cpu_model EQUAL 173 OR _cpu_model EQUAL 174) + set(TARGET_ARCHITECTURE "graniterapids") + + elseif(_cpu_model EQUAL 207) + set(TARGET_ARCHITECTURE "emeraldrapids") + + elseif(_cpu_model EQUAL 143) + set(TARGET_ARCHITECTURE "sapphirerapids") + + elseif(_cpu_model EQUAL 106 OR _cpu_model EQUAL 108) + set(TARGET_ARCHITECTURE "icelake-avx512") + + elseif(_cpu_model EQUAL 85) + if(_cpu_stepping LESS 5) + set(TARGET_ARCHITECTURE "skylake-avx512") + elseif(_cpu_stepping LESS 8) + set(TARGET_ARCHITECTURE "cascadelake") + else() + set(TARGET_ARCHITECTURE "cooperlake") + endif() + + elseif(_cpu_model EQUAL 79 OR _cpu_model EQUAL 86) + set(TARGET_ARCHITECTURE "broadwell") + + elseif(_cpu_model EQUAL 63) + set(TARGET_ARCHITECTURE "haswell") + + elseif(_cpu_model EQUAL 62) + set(TARGET_ARCHITECTURE "ivybridge") + + elseif(_cpu_model EQUAL 45) + set(TARGET_ARCHITECTURE "sandybridge") + + elseif(_cpu_model EQUAL 44 OR _cpu_model EQUAL 47) + set(TARGET_ARCHITECTURE "westmere") + + elseif(_cpu_model EQUAL 26 OR _cpu_model EQUAL 30 OR _cpu_model EQUAL 46) + set(TARGET_ARCHITECTURE "nehalem") + + elseif(_cpu_model EQUAL 23 OR _cpu_model EQUAL 29) + set(TARGET_ARCHITECTURE "penryn") + + # Big cores (client) + elseif(_cpu_model EQUAL 204) + set(TARGET_ARCHITECTURE "pantherlake") + + elseif(_cpu_model EQUAL 188 OR _cpu_model EQUAL 189) + set(TARGET_ARCHITECTURE "lunarlake") + + elseif(_cpu_model EQUAL 181 OR _cpu_model EQUAL 197 OR _cpu_model EQUAL 198) + set(TARGET_ARCHITECTURE "arrowlake") + + elseif(_cpu_model EQUAL 170 OR _cpu_model EQUAL 171 OR _cpu_model EQUAL 172) + set(TARGET_ARCHITECTURE "meteorlake") + + elseif(_cpu_model EQUAL 183 OR _cpu_model EQUAL 186 OR _cpu_model EQUAL 190 OR _cpu_model EQUAL 191) + set(TARGET_ARCHITECTURE "raptorlake") # Raptor Lake refresh = Bartlett Lake + + elseif(_cpu_model EQUAL 151 OR _cpu_model EQUAL 154) + set(TARGET_ARCHITECTURE "alderlake") + + elseif(_cpu_model EQUAL 167) + set(TARGET_ARCHITECTURE "rocketlake") + + elseif(_cpu_model EQUAL 165 OR _cpu_model EQUAL 166) + set(TARGET_ARCHITECTURE "cometlake") + + elseif(_cpu_model EQUAL 140 OR _cpu_model EQUAL 141) + set(TARGET_ARCHITECTURE "tigerlake") + + elseif(_cpu_model EQUAL 125 OR _cpu_model EQUAL 126) + set(TARGET_ARCHITECTURE "icelake") + + elseif(_cpu_model EQUAL 102) + set(TARGET_ARCHITECTURE "cannonlake") + + elseif(_cpu_model EQUAL 142 OR _cpu_model EQUAL 158) + set(TARGET_ARCHITECTURE "kabylake") + + elseif(_cpu_model EQUAL 78 OR _cpu_model EQUAL 94) + set(TARGET_ARCHITECTURE "skylake") + + elseif(_cpu_model EQUAL 61 OR _cpu_model EQUAL 71) + set(TARGET_ARCHITECTURE "broadwell") + + elseif(_cpu_model EQUAL 60 OR _cpu_model EQUAL 69 OR _cpu_model EQUAL 70) + set(TARGET_ARCHITECTURE "haswell") + + elseif(_cpu_model EQUAL 58) + set(TARGET_ARCHITECTURE "ivybridge") + + elseif(_cpu_model EQUAL 42) + set(TARGET_ARCHITECTURE "sandybridge") + + elseif(_cpu_model EQUAL 37) + set(TARGET_ARCHITECTURE "westmere") + + elseif(_cpu_model EQUAL 30 OR _cpu_model EQUAL 31) + set(TARGET_ARCHITECTURE "nehalem") + + elseif(_cpu_model EQUAL 23 OR _cpu_model EQUAL 29) + set(TARGET_ARCHITECTURE "penryn") + + elseif(_cpu_model EQUAL 15 OR _cpu_model EQUAL 22) + set(TARGET_ARCHITECTURE "merom") + + elseif(_cpu_model EQUAL 28) + set(TARGET_ARCHITECTURE "atom") + + elseif(_cpu_model EQUAL 14) + set(TARGET_ARCHITECTURE "core") + + elseif(_cpu_model LESS 14) + message(WARNING + " [OptimizeForArchitecture] Your CPU is not known.\n" + " \tAuto-detection of optimization flags failed and will use the generic CPU settings with SSE2.\n" + " \tPlease send an email to gismo@inria.fr with the following content so that we can update the OFA script:\n" + " \tVendor id: ${_vendor_id}\n" + " \tCPU family: ${_cpu_family}\n" + " \tCPU mode: ${_cpu_model}\n" + " \tCPU stepping: ${_cpu_stepping}\n" + " \tCPU flags: ${_cpu_flags}") + set(TARGET_ARCHITECTURE "generic") + else() + message(WARNING + " [OptimizeForArchitecture] Your CPU is not known.\n" + " \tAuto-detection of optimization flags failed and will use the 65nm Core 2 CPU settings.\n" + " \tPlease send an email to gismo@inria.fr with the following content so that we can update the OFA script:\n" + " \tVendor id: ${_vendor_id}\n" + " \tCPU family: ${_cpu_family}\n" + " \tCPU mode: ${_cpu_model}\n" + " \tCPU stepping: ${_cpu_stepping}\n" + " \tCPU flags: ${_cpu_flags}") + set(TARGET_ARCHITECTURE "merom") + endif() + + elseif(_cpu_family EQUAL 7) # Itanium (not supported) + message(WARNING "[OptimizeForArchitecture] Your CPU (Itanium: family ${_cpu_family}, model ${_cpu_model}) is not supported by OptimizeForArchitecture.cmake.") + + elseif(_cpu_family EQUAL 15) # NetBurst + list(APPEND _available_vector_units_list "sse" "sse2") + if(_cpu_model GREATER 2) # Not sure whether this must be 3 or even 4 instead + list(APPEND _available_vector_units_list "sse" "sse2" "sse3") + endif() + + endif() + + elseif(_vendor_id STREQUAL "AuthenticAMD") + # taken from the list of AMD CPU microarchitectures + # https://en.wikipedia.org/wiki/List_of_AMD_CPU_microarchitectures + # CPUID Signature Values Of Recent AMD Microarchitectures + # 05 05h | K6 + # 06 06h | K7 + # 15 0Fh | K8 / Hammer + # 16 10h | K10 + # 17 11h | K8 & K10 "hybrid" + # 18 12h | K10 (Llano) / K12 (ARM based AMD cpu) + # 20 14h | Bobcat + # 21 15h | Bulldozer / Piledriver / Steamroller / Excavator + # 22 16h | Jaguar / Puma + # 23 17h | Zen / Zen+ / Zen 2 + # 24 18h | Hygon Dhyana + # 25 19h | Zen 3 / Zen 3+ / Zen 4 + # 26 1Ah | Zen 5 + + if(_cpu_family EQUAL 25) # 19h + set(TARGET_ARCHITECTURE "zen3") # Some newer models will be Zen 4 + + elseif(_cpu_family EQUAL 24) # 18h + set(TARGET_ARCHITECTURE "zen") + + elseif(_cpu_family EQUAL 23) # 17h + if(_cpu_model LESS 49) + set(TARGET_ARCHITECTURE "zen") + else() + set(TARGET_ARCHITECTURE "zen2") + endif() + + elseif(_cpu_family EQUAL 22) # 16h + set(TARGET_ARCHITECTURE "amd16h") + + elseif(_cpu_family EQUAL 21) # 15h + if(_cpu_model LESS 16) + set(TARGET_ARCHITECTURE "bulldozer") + elseif(_cpu_model LESS 32) + set(TARGET_ARCHITECTURE "piledriver") + elseif(_cpu_model LESS 64) + set(TARGET_ARCHITECTURE "steamroller") + else() + set(TARGET_ARCHITECTURE "excavator") + endif() + + elseif(_cpu_family EQUAL 20) # 14h + set(TARGET_ARCHITECTURE "amd14h") + + elseif(_cpu_family EQUAL 18) # 12h (K10 / K12) + + elseif(_cpu_family EQUAL 17) # 12h (K8 & K10 hybrid) + + elseif(_cpu_family EQUAL 16) # 10h (K10) + set(TARGET_ARCHITECTURE "barcelona") + + elseif(_cpu_family EQUAL 15) # 0Fh (K8 / Hammer) + if(_cpu_model LESS 39) + set(TARGET_ARCHITECTURE "k8") + else() + set(TARGET_ARCHITECTURE "k8-sse3") + endif() + + elseif(_cpu_family EQUAL 6) # 06h (K7) + elseif(_cpu_family EQUAL 5) # 05h (K6) + + endif() + + else() + message(WARNING "[OptimizeForArchitecture] Auto-detection of optimization flags failed and will use the generic CPU settings.") + return() + endif() + + if(OFA_VERBOSE) + message(STATUS "[OptimizeForArchitecture] Vendor id: ${_vendor_id}") + message(STATUS "[OptimizeForArchitecture] CPU family: ${_cpu_family}") + message(STATUS "[OptimizeForArchitecture] CPU mode: ${_cpu_model}") + message(STATUS "[OptimizeForArchitecture] CPU stepping: ${_cpu_stepping}") + endif() +endmacro(OFA_AutodetectX86) diff --git a/src/cmake/OFA/CheckARM.txt b/src/cmake/OFA/CheckARM.txt new file mode 100644 index 0000000000..63540835bc --- /dev/null +++ b/src/cmake/OFA/CheckARM.txt @@ -0,0 +1,176 @@ +# List of arm/arm64 checks + +# FORMAT: +# [,];;;;[] +# +# lines starting with # are comments +# lines starting with push_enable: start a block of tests enabled for the given compilers only +# lines starting with pop_enable: ends a block of tests enabled for the given compilers only +# lines starting with push_disable: start a block of tests disabled for the given compilers +# lines starting with pop_disable: ends a block of tests disabled for the given compilers + +# DESCRIPTION: +# For each line of this file, HandleArmOptions generates the code snipped +# +# #include +# #include +# ... +# int main { +# name(parameter0, parameter1, ...); +# return 0; +# } +# +# and compiles it with, e.g. +# +# gcc -m -m +# +# if the extension should be enabled and +# +# gcc -m-no -m-no +# +# if the extension should be disabled. In the above example, the +# compiler name 'gcc' and the flag prefixes '-m' and '-mno-' will be +# set properly by HandleX86Options. +# +# EXTENSION ALIAS: +# By default, it is assumed that the name of the extension, e.g., +# avx512f coinsides with the name of the compiler flag to be used to +# enable/disable it, e.g., -mno-avx512f. Some compilers like Oracle's +# SunPro have non-canonical naming conventions, +# cf. https://docs.oracle.com/cd/E77782_01/html/E77792/gqexw.html. +# +# In this case, the optional parameter can be used +# to specify the name of the extension as reported by the system, +# whereas the compiler-specific extension flag(s) are given in +# and [], respectively. +# +# ENABLING/DISABLING OF CHECKS: +# Checks can be explicitly disabled for particular compilers by placing +# them inside a push_disable/pop_disable block, e.g. +# +# push_disable:SunPro,IntelLLVM +# +# pop_disable:SunPro +# +# Similarly, checks can be explicitly enabled for particular compilers +# by placing them inside a push_disable/pop_disable block, e.g. +# +# push_enable:SunPro +# +# pop_enable:SunPro + +# ARM (aarch32) 32-bit + +# armv4 : no options +# armv4t : no options + +# armv5t : no options +# armv5te : no options +# armv5tej : no options + +# armv6 : fp vfpv2 +# armv6j : fp vfpv2 +# armv6k : fp vfpv2 +# armv6z : fp vfpv2 +# armv6kz : fp vfpv2 +# armv6zk : fp vfpv2 +# armv6t2 : fp vfpv2 +# armv6-m : no options +# armv6s-m : no options +fp;arm_neon.h;vcvt_f16_f32;float32x4_t() +vfpv2;cstdlib;exit;0 + +# armv7 : fp vfpv3-d16 +vfpv3-d16;cstdlib;exit;0;vfpv3_d16 + +# armv7-a : mp sec fp vfpv3 vfpv3-d16-fp16 vfpv3-fp16 vfpv4-d16 vfpv4 simd +# neon-fp16 neon-vfpv4 nosimd vfpv3-d16 neon neon-vfpv3 +# armv7ve : vfpv3-d16 vfpv3 vfpv3-d16-fp16 vfpv3-fp16 fp vfpv4 neon neon-fp16 +# simd nosimd vfpv4-d16 neon-vfpv3 neon-vfpv4 +mp;cstdlib;exit;0 +neon;cstdlib;exit;0 +neon-fp16;cstdlib;exit;0;neon_fp16 +neon-vfpv3;cstdlib;exit;0;neon_vfpv3 +neon-vfpv4;cstdlib;exit;0;neon_vfpv4 +sec;cstdlib;exit;0 +simd;cstdlib;exit;0 +vfpv3;cstdlib;exit;0 +vfpv3-d16-fp16;cstdlib;exit;0;vfpv3_d16_fp16 +vfpv3-fp16;cstdlib;exit;0;vfpv3_fp16 +vfpv4;cstdlib;exit;0 +vfpv4-d16;cstdlib;exit;0;vfpv4_d16 + +# armv7-r : fp.sp fp vfpv3xd-fp16 vfpv3-d16-fp16 idiv noidiv vfpv3xd vfpv3-d16 +fp.sp;cstdlib;exit;0;fp_sp +fp.dp;cstdlib;exit;0;fp_dp +idiv;cstdlib;exit;0 +vfpv3dx;cstdlib;exit;0 +vfpv3dx-fp16;cstdlib;exit;0;vfpv3dx_fp16 + +# armv7-m : no options +# armv7e-m : fp fpv5 fp.dp vfpv4-sp-d16 fpv5-d16 +fpv5;cstdlib;exit;0 +fpv5_d16;cstdlib;exit;0 +vfpv4-sp-d16;cstdlib;exit;0;vfpv4_sp_d16 + +# armv8-a : crc simd crypto nocrypto sb predres +crc;arm_acle.h;__crc32b;(uint32_t)0,(uint8_t)0 +crypto;arm_neon.h;vaesdq_u8;uint8x16_t(), uint8x16_t() +sb;cstdlib;exit;0 +predres;cstdlib;exit;0 + +# armv8-r : crc fp.sp simd crypto nocrypto +# armv8.1-a : simd crypto nocrypto sb predres +# armv8.2-a : simd fp16 fp16fml crypto nocrypto dotprod sb predres i8mm bf16 +bf16,sve;arm_sve.h;svbfdot;svfloat32_t(),svbfloat16_t(),svbfloat16_t() +dotprod;arm_neon.h;svdot;svint32_t(),svint8_t(),svint8_t() +fp16;arm_neon.h;vabdq_f16;float16x8_t(),float16x8_t() +fp16fml;arm_neon.h;vfmlalq_high_f16;float32x4_t(),float16x8_t(),float16x8_t() +i8mm,sve;arm_sve.h;svmmla;svint32_t(),svint8_t(),svint8_t() +simd;arm_neon.h;vaddq_u32;uint32x4_t(),uint32x4_t() + +# armv8.3-a : simd fp16 fp16fml crypto nocrypto dotprod sb predres i8mm bf16 +# armv8.4-a : simd fp16 crypto nocrypto sb predres i8mm bf16 +# armv8.5-a : simd fp16 crypto nocrypto i8mm bf16 +# armv8.6-a : simd fp16 crypto nocrypto i8mm bf16 + +# ARM64 (aarch64) 64-bit + +# armv8.x-a : fp simd crypto crc lse fp16 rcpc rdma dotprod aes sha2 sha3 sm4 fp16fml sve profile rng memtag sb ssbs predres sve2 sve2-sm4 sve2-aes sve2-sha3 sve2-bitperm tme i8mm f32mm f64mm bf16 flagm pauth asimd crc32 +crc32;arm_acle.h;__crc32b;(uint32_t)0,(uint8_t)0 +simd;cstdlib;exit;0;asimd +aes,crypto;arm_neon.h;vaesdq_u8;uint8x16_t(), uint8x16_t() +dsp,sve;arm_sve.h;svqadd_z;svbool_t(),svint8_t(),svint8_t() +f32mm,sve;arm_sve.h;svmmla;svfloat32_t(),svfloat32_t(),svfloat32_t() +f64mm,sve;arm_sve.h;svmmla;svfloat64_t(),svfloat64_t(),svfloat64_t() +flagm;cstdlib;exit;0 +lse;cstdlib;exit;0 +memtag;cstdlib;exit;0 +mve;cstdlib;exit;0 +mve_fp;cstdlib;exit;0 +pauth;cstdlib;exit;0 +profile;cstdlib;exit;0 +ras;cstdlib;exit;0 +rcpc;cstdlib;exit;0 +rdm;cstdlib;exit;0 +rdma;cstdlib;exit;0 +rng;cstdlib;exit;0 +sec;cstdlib;exit;0 +sha2,crypto;arm_neon.h;vsha256hq_u32;uint32x4_t(),uint32x4_t(),uint32x4_t() +sha3;arm_neon.h;vsha512hq_u64;uint64x2_t(),uint64x2_t(),uint64x2_t() +sm4;arm_neon.h;vsm4eq_u32;uint32x4_t(), uint32x4_t() +ssbs;cstdlib;exit;0 +tme;cstdlib;exit;0 +zcm;cstdlib;exit;0 +zcz;cstdlib;exit;0 + +# SVE +sve;arm_sve.h;svwhilelt_b64;0,1 + +# SVE2 +sve2;arm_sve.h;svaba;svint8_t(),svint8_t(),svint8_t() +sve2-aes;arm_sve.h;svaesd;svuint8_t(),svuint8_t() +sve2-bitperm;arm_sve.h;svbdep;svuint8_t(),svuint8_t() +sve2-sha3;arm_sve.h;svrax1;svint64_t(),svint64_t() +sve2-sm4;arm_sve.h;svsm4e;svuint32_t(),svuint32_t() + diff --git a/src/cmake/OFA/CheckCXXCompilerFlag.cmake b/src/cmake/OFA/CheckCXXCompilerFlag.cmake new file mode 100644 index 0000000000..eb16e22a3f --- /dev/null +++ b/src/cmake/OFA/CheckCXXCompilerFlag.cmake @@ -0,0 +1,72 @@ +# - Check whether the CXX compiler supports a given flag. +# CHECK_CXX_COMPILER_FLAG( ) +# - the compiler flag +# - variable to store the result +# This internally calls the check_cxx_source_compiles macro. See help +# for CheckCXXSourceCompiles for a listing of variables that can +# modify the build. + +#============================================================================= +# Copyright 2006-2009 Kitware, Inc. +# Copyright 2006 Alexander Neundorf +# Copyright 2011-2013 Matthias Kretz +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: +# +# * Redistributions of source code must retain the above copyright notice, +# this list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# * The names of Kitware, Inc., the Insight Consortium, or the names of +# any consortium members, or of any contributors, may not be used to +# endorse or promote products derived from this software without +# specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER AND CONTRIBUTORS ``AS IS'' +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +# ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE FOR +# ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +#============================================================================= + +INCLUDE(CheckCXXSourceCompiles) + +MACRO (CHECK_CXX_COMPILER_FLAG _FLAG _RESULT) + SET(SAFE_CMAKE_REQUIRED_DEFINITIONS "${CMAKE_REQUIRED_DEFINITIONS}") + SET(CMAKE_REQUIRED_DEFINITIONS "${_FLAG}") + if(${ARGC} GREATER 2) + SET(TEST_SOURCE "${ARGV2}") + else() + SET(TEST_SOURCE "int main() { return 0;}") + endif() + CHECK_CXX_SOURCE_COMPILES("${TEST_SOURCE}" ${_RESULT} + # Some compilers do not fail with a bad flag + FAIL_REGEX "error: bad value (.*) for .* switch" # GNU + FAIL_REGEX "argument unused during compilation" # clang + FAIL_REGEX "is valid for .* but not for C\\\\+\\\\+" # GNU + FAIL_REGEX "unrecognized .*option" # GNU + FAIL_REGEX "ignored for target" # GNU + FAIL_REGEX "ignoring unknown option" # MSVC + FAIL_REGEX "warning D9002" # MSVC + FAIL_REGEX "[Uu]nknown option" # HP + FAIL_REGEX "[Ww]arning: [Oo]ption" # SunPro + FAIL_REGEX "command option .* is not recognized" # XL + FAIL_REGEX "WARNING: unknown flag:" # Open64 + FAIL_REGEX "command line error" # ICC + FAIL_REGEX "command line warning" # ICC + FAIL_REGEX "#10236:" # ICC: File not found + FAIL_REGEX " #10159: " # ICC + FAIL_REGEX " #10353: " # ICC: option '-mfma' ignored, suggest using '-march=core-avx2' + ) + SET (CMAKE_REQUIRED_DEFINITIONS "${SAFE_CMAKE_REQUIRED_DEFINITIONS}") +ENDMACRO (CHECK_CXX_COMPILER_FLAG) diff --git a/src/cmake/OFA/CheckX86.txt b/src/cmake/OFA/CheckX86.txt new file mode 100644 index 0000000000..66830b0a4f --- /dev/null +++ b/src/cmake/OFA/CheckX86.txt @@ -0,0 +1,218 @@ +# List of x86/x86_64 checks + +# FORMAT: +# [,];;;;[] +# +# lines starting with # are comments +# lines starting with push_enable: start a block of tests enabled for the given compilers only +# lines starting with pop_enable: ends a block of tests enabled for the given compilers only +# lines starting with push_disable: start a block of tests disabled for the given compilers +# lines starting with pop_disable: ends a block of tests disabled for the given compilers + +# DESCRIPTION: +# For each line of this file, HandleX86Options generates the code snipped +# +# #include +# #include +# ... +# int main { +# name(parameter0, parameter1, ...); +# return 0; +# } +# +# and compiles it with, e.g. +# +# gcc -m -m +# +# if the extension should be enabled and +# +# gcc -m-no -m-no +# +# if the extension should be disabled. In the above example, the +# compiler name 'gcc' and the flag prefixes '-m' and '-mno-' will be +# set properly by HandleX86Options. +# +# EXTENSION ALIAS: +# By default, it is assumed that the name of the extension, e.g., +# avx512f coinsides with the name of the compiler flag to be used to +# enable/disable it, e.g., -mno-avx512f. Some compilers like Oracle's +# SunPro have non-canonical naming conventions, +# cf. https://docs.oracle.com/cd/E77782_01/html/E77792/gqexw.html. +# +# In this case, the optional parameter can be used +# to specify the name of the extension as reported by the system, +# whereas the compiler-specific extension flag(s) are given in +# and [], respectively. +# +# ENABLING/DISABLING OF CHECKS: +# Checks can be explicitly disabled for particular compilers by placing +# them inside a push_disable/pop_disable block, e.g. +# +# push_disable:MSVC,SunPro +# +# pop_disable:MSVC,SunPro +# +# Similarly, checks can be explicitly enabled for particular compilers +# by placing them inside a push_disable/pop_disable block, e.g. +# +# push_enable:SunPro +# +# pop_enable:SunPro + +# MSVC and Oracle's SunPro compiler fail these checks +push_disable:MSVC,SunPro + +# MMX +mmx;mmintrin.h;_mm_add_pi16;__m64(),__m64() + +# SSE/SSE2/SSE3/SSE4.1/SSE4.2/SSE4A/AVX/AVX2/FMA +avx;immintrin.h;_mm256_add_pd;_mm256_setzero_pd(),_mm256_setzero_pd() +avx2;immintrin.h;_mm256_hadd_epi16;_mm256_setzero_si256(),_mm256_setzero_si256() +fma;immintrin.h;_mm_fmadd_pd;_mm_setzero_pd(),_mm_setzero_pd(),_mm_setzero_pd() +sse2;emmintrin.h;_mm_add_epi16;_mm_setzero_si128(),_mm_setzero_si128() +sse3;pmmintrin.h;_mm_addsub_pd;_mm_setzero_pd(),_mm_setzero_pd() +sse4.1;smmintrin.h;_mm_max_epi32;_mm_setzero_si128(),_mm_setzero_si128() +sse4.2;nmmintrin.h;_mm_cmpgt_epi64;_mm_setzero_si128(),_mm_setzero_si128() +sse4a;ammintrin.h;_mm_extract_si64;_mm_setzero_si128(),_mm_setzero_si128() +sse;xmmintrin.h;_mm_add_ps;_mm_setzero_ps(),_mm_setzero_ps() +ssse3;tmmintrin.h;_mm_hadd_epi16;_mm_setzero_si128(),_mm_setzero_si128() + +# AVX-VNNI +avxvnni;immintrin.h;_mm_dpbusd_avx_epi32;_mm_setzero_si128(),_mm_setzero_si128(),_mm_setzero_si128() + +# AVX-512 +avx5124fmaps;immintrin.h;_mm_4fmadd_ss;_mm_setzero_ps(),_mm_setzero_ps(),_mm_setzero_ps(),_mm_setzero_ps(),_mm_setzero_ps(),new __m128[1] +avx5124vnniw;immintrin.h;_mm512_4dpwssd_epi32;_mm512_setzero_si512(),_mm512_setzero_si512(),_mm512_setzero_si512(),_mm512_setzero_si512(),_mm512_setzero_si512(),new __m128i[1] +avx512bf16,avx512vl;immintrin.h;_mm_cvtne2ps_pbh;_mm_setzero_ps(),_mm_setzero_ps() +avx512bitalg,avx512vl;immintrin.h;_mm_popcnt_epi16;_mm_setzero_si128() +avx512bw;immintrin.h;_mm512_abs_epi16;_mm512_setzero_si512() +avx512cd;immintrin.h;_mm512_broadcastmb_epi64;__mmask8() +avx512dq;immintrin.h;_mm512_and_pd;_mm512_setzero_pd(),_mm512_setzero_pd() +avx512er;immintrin.h;_mm512_exp2a23_pd;_mm512_setzero_pd() +avx512f;immintrin.h;_mm512_abs_epi32;_mm512_setzero_si512() +avx512fp16,avx512vl;immintrin.h;_mm_add_ph;_mm_setzero_ph(),_mm_setzero_ph() +avx512ifma;immintrin.h;_mm512_maskz_madd52hi_epu64;__mmask8(),_mm512_setzero_si512(),_mm512_setzero_si512(),_mm512_setzero_si512() +avx512pf;immintrin.h;_mm512_prefetch_i32scatter_pd;NULL,_mm256_setzero_si256(),(int)1,_MM_HINT_T0 +avx512vbmi2,avx512vl;immintrin.h;_mm_mask_compress_epi16;_mm_setzero_si128(),__mmask8(),_mm_setzero_si128() +avx512vbmi;immintrin.h;_mm512_permutex2var_epi8;_mm512_setzero_si512(),_mm512_setzero_si512(),_mm512_setzero_si512() +avx512vl,avx512f;immintrin.h;_mm_abs_epi64;_mm_setzero_si128() +avx512vnni,avx512vl;immintrin.h;_mm_dpbusd_epi32;_mm_setzero_si128(),_mm_setzero_si128(),_mm_setzero_si128() +avx512vp2intersect,avx512vl;immintrin.h;_mm_2intersect_epi32;_mm_setzero_si128(),_mm_setzero_si128(),new __mmask8[1],new __mmask8[1] +avx512vpopcntdq,avx512vl;immintrin.h;_mm_popcnt_epi64;_mm_setzero_si128() + +# AMX +amx-bf16;immintrin.h;_tile_dpbf16ps;0,1,2 +amx-int8;immintrin.h;_tile_dpbssd;0,1,2 +amx-tile;immintrin.h;_tile_zero;0 + +# Other +adx;immintrin.h;_addcarryx_u32;(unsigned char)0,(unsigned int)1,(unsigned int)1,new unsigned int[1] +aes;wmmintrin.h;_mm_aesdec_si128;_mm_setzero_si128(),_mm_setzero_si128() +bmi2;immintrin.h;_bzhi_u32;(unsigned int)1,(unsigned int)1 +enqcmd;immintrin.h;_enqcmd;(void*)NULL,(void const*)NULL +f16c;immintrin.h;_mm_cvtph_ps;_mm_setzero_si128() +fsgsbase;immintrin.h;_readfsbase_u32; +fxsr;immintrin.h;_fxrstor;(void*)NULL +gfni,avx512vl;immintrin.h;_mm_gf2p8mul_epi8;_mm_setzero_si128(),_mm_setzero_si128() +hreset;immintrin.h;_hreset;1 +invpcid;immintrin.h;_invpcid;(unsigned int)1,(void*)NULL +keylocker;immintrin.h;_mm_aesdec128kl_u8;new __m128i[1],_mm_setzero_si128(),(const void*)NULL +keylocker_wide;immintrin.h;_mm_aesdecwide128kl_u8;new __m128i[1],(const __m128i*)new __m128i[1], (const void*)NULL +lzcnt;immintrin.h;_lzcnt_u32;(unsigned int)1 +monitor;pmmintrin.h;_mm_monitor;(void const*)NULL,(unsigned)1,(unsigned)1 +movbe;immintrin.h;_loadbe_i16;(void const*)NULL +movdir64b;immintrin.h;_movdir64b;(void*)NULL,(const void*)NULL +movdiri;immintrin.h;_directstoreu_u32;(void*)NULL,(unsigned int)1 +mpx;immintrin.h;_bnd_chk_ptr_lbounds;(const void*)NULL +pclmul;wmmintrin.h;_mm_clmulepi64_si128;_mm_setzero_si128(),_mm_setzero_si128(),(const int)0;pclmul +pconfig;immintrin.h;_pconfig_u32;(const int)1,new size_t[1] +pku;cstdlib;exit;0 +popcnt;immintrin.h;_mm_popcnt_u32;(unsigned int)1 +prfchw;immintrin.h;_m_prefetchw;(void*)NULL +prefetchwt1;xmmintrin.h;_mm_prefetch;(char const*)NULL,(int)1 +ptwrite;immintrin.h;_ptwrite32;(unsigned int)0 +rdpid;immintrin.h;_rdpid_u32; +rdrnd;immintrin.h;_rdrand16_step;(unsigned short*)new unsigned short[1] +rdseed;immintrin.h;_rdseed16_step;(unsigned short*)new unsigned short[1] +rdtscp;immintrin.h;__rdtscp;(unsigned int*)NULL +rtm;immintrin.h;_xend; +serialize;immintrin.h;_serialize; +sha;immintrin.h;_mm_sha1msg1_epu32;_mm_setzero_si128(),_mm_setzero_si128() +tsc;immintrin.h;_rdtsc; +tsxldtrk;immintrin.h;_xresldtrk; +uintr;immintrin.h;_clui; +vaes,avx512vl;immintrin.h;_mm256_aesdec_epi128;_mm256_setzero_si256(),_mm256_setzero_si256() +vpclmulqdq,avx512vl;immintrin.h;_mm256_clmulepi64_epi128;_mm256_setzero_si256(),_mm256_setzero_si256(),(const int)1 +waitpkg;immintrin.h;_umonitor;(void*)NULL +wbnoinvd;immintrin.h;_wbnoinvd; +xsavec,xsave;immintrin.h;_xsavec;(void*)NULL,(unsigned long long)0 +xsaveopt,xsave;immintrin.h;_xsaveopt;(void*)NULL,(unsigned long long)0 +xsaves;immintrin.h;_xgetbv;(unsigned int)1 +xss,xsave;immintrin.h;_xrstors;(const void*)NULL,(unsigned long long)0 + +# GNU GCC fails the following tests ... +push_disable:GNU +abm;x86intrin.h;_bextri_u32;(unsigned int)0,(unsigned int)0 +bmi;immintrin.h;_andn_u32;(unsigned int)1,(unsigned int)1 +cldemote;immintrin.h;_mm_cldemote;(void const*)NULL +clflushopt;immintrin.h;_mm_clflushopt;(void const*)NULL +clwb;immintrin.h;_mm_clwb;(void const*)NULL +pop_disable:GNU + +# ... and needs a slightly modified implementation +push_enable:GNU +abm;x86intrin.h;__bextri_u32;(unsigned int)0,(unsigned int)0 +bmi;immintrin.h;__andn_u32;(unsigned int)1,(unsigned int)1 +cldemote;immintrin.h;_cldemote;(void*)NULL +clflushopt;immintrin.h;_mm_clflushopt;(void*)NULL +clwb;immintrin.h;_mm_clwb;(void*)NULL +pop_enable:GNU + +pop_disable:MSVC,SunPro + + +# Special checks for the MSVC compiler +push_enable:MSVC + +# SSE/SSE2/SSE3/SSE4.1/SSE4.2/SSE4A/AVX/AVX2/FMA +SSE;xmmintrin.h;_mm_add_ps;_mm_setzero_ps(),_mm_setzero_ps();sse +SSE2;emmintrin.h;_mm_add_epi16;_mm_setzero_si128(),_mm_setzero_si128();sse2 +AVX;immintrin.h;_mm256_add_pd;_mm256_setzero_pd(),_mm256_setzero_pd();avx +AVX2;immintrin.h;_mm256_hadd_epi16;_mm256_setzero_si256(),_mm256_setzero_si256();avx2 + +# AVX-512 +AVX512;immintrin.h;_mm512_abs_epi32;_mm512_setzero_si512();avx512f + +pop_enable:MSVC + + +# Special checks for Oracle's SunPro compiler +# https://docs.oracle.com/cd/E77782_01/html/E77792/gqexw.html +push_enable:SunPro + +# SSE/SSE2/SSE3/SSE4.1/SSE4.2/SSE4A/AVX/AVX2/FMA +avx;immintrin.h;_mm256_add_pd;_mm256_setzero_pd(),_mm256_setzero_pd() +avx2;immintrin.h;_mm256_hadd_epi16;_mm256_setzero_si256(),_mm256_setzero_si256() +sse2;emmintrin.h;_mm_add_epi16;_mm_setzero_si128(),_mm_setzero_si128() +sse3;pmmintrin.h;_mm_addsub_pd;_mm_setzero_pd(),_mm_setzero_pd() +sse4_1;smmintrin.h;_mm_max_epi32;_mm_setzero_si128(),_mm_setzero_si128();sse4.1 +sse4_2;nmmintrin.h;_mm_cmpgt_epi64;_mm_setzero_si128(),_mm_setzero_si128();sse4.2 +sse;xmmintrin.h;_mm_add_ps;_mm_setzero_ps(),_mm_setzero_ps() +ssse3;tmmintrin.h;_mm_hadd_epi16;_mm_setzero_si128(),_mm_setzero_si128() + +# AVX-512 +avx512;immintrin.h;_mm512_abs_epi32;_mm512_setzero_si512();avx512f +avx512;xmmintrin.h;_mm_prefetch;(char const*)NULL,(int)1;prefetchwt1 + +# Other +avx_i;emmintrin.h;_mm_cvtph_ps;_mm_setzero_si128();f16c +aes;wmmintrin.h;_mm_aesdec_si128;_mm_setzero_si128(),_mm_setzero_si128();aes +aes;wmmintrin.h;_mm_clmulepi64_si128;_mm_setzero_si128(),_mm_setzero_si128(),(const int)0;pclmul +avx2;immintrin.h;_lzcnt_u32;(unsigned int)1;lzcnt +sse4_2;immintrin.h;_mm_popcnt_u32;(unsigned int)1;popcnt +avx_i;immintrin.h;_andn_u32;(unsigned int)1,(unsigned int)1;bmi +avx_i;immintrin.h;_bzhi_u32;(unsigned int)1,(unsigned int)1;bmi2 +avx_i;immintrin.h;_readfsbase_u32;;fsgsbase +avx_i;immintrin.h;_rdrand16_step;(unsigned short*)new unsigned short[1];rdrnd +pop_enable:SunPro + diff --git a/src/cmake/OFA/CommonMacros.cmake b/src/cmake/OFA/CommonMacros.cmake new file mode 100644 index 0000000000..89e911f6cf --- /dev/null +++ b/src/cmake/OFA/CommonMacros.cmake @@ -0,0 +1,10 @@ +include(OFA/AddCXXCompilerFlag) + +macro(_ofa_find _list _value _ret) + list(FIND ${_list} "${_value}" _found) + if(_found EQUAL -1) + set(${_ret} FALSE) + else() + set(${_ret} TRUE) + endif() +endmacro(_ofa_find) diff --git a/src/cmake/OFA/HandleArmOptions.cmake b/src/cmake/OFA/HandleArmOptions.cmake new file mode 100644 index 0000000000..b9f464f6b0 --- /dev/null +++ b/src/cmake/OFA/HandleArmOptions.cmake @@ -0,0 +1,1162 @@ +#============================================================================= +# Handling of ARM / ARM64 options +# +# This is a three-step process: +# +# 1. Generate a list of available compiler flags for the specific CPU +# +# 2. Enable/disable feature flags based on available CPU features, +# used-defined USE_ variables and the capabilities of the +# host system's compiler and linker +# +# 3. Set compiler-specific flags (e.g., -m/-mno-) +#============================================================================= + +include(OFA/CommonMacros) + +macro(OFA_HandleArmOptions) + + # Special treatment for "native" flag + if(TARGET_ARCHITECTURE STREQUAL "native") + if(MSVC) + # MSVC (on Windows) + message(FATAL_ERROR "[OptimizeForArchitecture] MSVC does not support \"native\" flag.") + elseif(CMAKE_CXX_COMPILER_ID MATCHES "NVHPC" + OR CMAKE_CXX_COMPILER_ID MATCHES "PGI") + # NVidia HPC / PGI (on Linux/Windows) + AddCompilerFlag("-tp=native" CXX_FLAGS ARCHITECTURE_CXX_FLAGS RESULT _ok) + elseif(CMAKE_CXX_COMPILER_ID MATCHES "Cray") + # Cray (on Linux) + message(FATAL_ERROR, "[OptimizeForArchitecture] Cray compiler does not support \"native\" flag.") + else() + # Others: GNU, Clang and variants + AddCXXCompilerFlag("-mcpu=native" FLAGS ARCHITECTURE_CXX_FLAGS RESULT _ok) + endif() + + if(NOT _ok) + message(FATAL_ERROR "[OptimizeForArchitecture] An error occured while setting the \"native\" flag.") + endif() + + elseif(NOT TARGET_ARCHITECTURE STREQUAL "none") + + # Step 1: Generate a list of compiler flags for the specific CPU + set(_march_flag_list) + set(_mtune_flag_list) + set(_available_extension_list) + + # ARM + if(TARGET_ARCHITECTURE STREQUAL "strongarm") + list(APPEND _mtune_flag_list "strongarm") + list(APPEND _march_flag_list "armv4") + elseif(TARGET_ARCHITECTURE STREQUAL "arm8") + list(APPEND _mtune_flag_list "arm8") + list(APPEND _march_flag_list "armv4") + elseif(TARGET_ARCHITECTURE STREQUAL "arm810") + list(APPEND _mtune_flag_list "arm810") + list(APPEND _march_flag_list "armv4") + elseif(TARGET_ARCHITECTURE STREQUAL "fa526") + list(APPEND _mtune_flag_list "fa526") + list(APPEND _march_flag_list "armv4") + elseif(TARGET_ARCHITECTURE STREQUAL "fa626") + list(APPEND _mtune_flag_list "fa626") + list(APPEND _march_flag_list "armv4") + elseif(TARGET_ARCHITECTURE STREQUAL "arm7tdmi") + list(APPEND _mtune_flag_list "arm7tdmi") + list(APPEND _march_flag_list "armv4t") + elseif(TARGET_ARCHITECTURE STREQUAL "arm7tdmi-s") + list(APPEND _mtune_flag_list "arm7tdmi-s") + list(APPEND _march_flag_list "armv4t") + elseif(TARGET_ARCHITECTURE STREQUAL "arm710t") + list(APPEND _mtune_flag_list "arm710t") + list(APPEND _march_flag_list "armv4t") + elseif(TARGET_ARCHITECTURE STREQUAL "arm720t") + list(APPEND _mtune_flag_list "arm720t") + list(APPEND _march_flag_list "armv4t") + elseif(TARGET_ARCHITECTURE STREQUAL "arm740t") + list(APPEND _mtune_flag_list "arm740t") + list(APPEND _march_flag_list "armv4t") + elseif(TARGET_ARCHITECTURE STREQUAL "arm9") + list(APPEND _mtune_flag_list "arm9") + list(APPEND _march_flag_list "armv4t") + elseif(TARGET_ARCHITECTURE STREQUAL "arm9tdmi") + list(APPEND _mtune_flag_list "arm9tdmi") + list(APPEND _march_flag_list "armv4t") + elseif(TARGET_ARCHITECTURE STREQUAL "arm920") + list(APPEND _mtune_flag_list "arm920") + list(APPEND _march_flag_list "armv4t") + elseif(TARGET_ARCHITECTURE STREQUAL "arm920t") + list(APPEND _mtune_flag_list "arm920t") + list(APPEND _march_flag_list "armv4t") + elseif(TARGET_ARCHITECTURE STREQUAL "arm922t") + list(APPEND _mtune_flag_list "arm922t") + list(APPEND _march_flag_list "armv4t") + elseif(TARGET_ARCHITECTURE STREQUAL "arm940t") + list(APPEND _mtune_flag_list "arm940t") + list(APPEND _march_flag_list "armv4t") + + elseif(TARGET_ARCHITECTURE STREQUAL "arm1020t") + list(APPEND _mtune_flag_list "arm1020t") + list(APPEND _march_flag_list "armv5t") + elseif(TARGET_ARCHITECTURE STREQUAL "arm10tdmi") + list(APPEND _mtune_flag_list "arm10tdmi") + list(APPEND _march_flag_list "armv5t") + + elseif(TARGET_ARCHITECTURE STREQUAL "arm9e") + list(APPEND _mtune_flag_list "arm9e") + list(APPEND _march_flag_list "armv5te") + list(APPEND _available_extension_list "fp") + elseif(TARGET_ARCHITECTURE STREQUAL "arm946e-s") + list(APPEND _mtune_flag_list "arm946e-s") + list(APPEND _march_flag_list "armv5te") + list(APPEND _available_extension_list "fp") + elseif(TARGET_ARCHITECTURE STREQUAL "arm966e-s") + list(APPEND _mtune_flag_list "arm966e-s") + list(APPEND _march_flag_list "armv5te") + list(APPEND _available_extension_list "fp") + elseif(TARGET_ARCHITECTURE STREQUAL "arm968e-s") + list(APPEND _mtune_flag_list "arm968e-s") + list(APPEND _march_flag_list "armv5te") + list(APPEND _available_extension_list "fp") + elseif(TARGET_ARCHITECTURE STREQUAL "arm10e") + list(APPEND _mtune_flag_list "arm10e") + list(APPEND _march_flag_list "armv5te") + list(APPEND _available_extension_list "fp") + elseif(TARGET_ARCHITECTURE STREQUAL "arm1020e") + list(APPEND _mtune_flag_list "arm1020e") + list(APPEND _march_flag_list "armv5te") + list(APPEND _available_extension_list "fp") + elseif(TARGET_ARCHITECTURE STREQUAL "arm1022e") + list(APPEND _mtune_flag_list "arm1022e") + list(APPEND _march_flag_list "armv5te") + list(APPEND _available_extension_list "fp") + elseif(TARGET_ARCHITECTURE STREQUAL "xscale") + list(APPEND _mtune_flag_list "xscale") + list(APPEND _march_flag_list "armv5te") + elseif(TARGET_ARCHITECTURE STREQUAL "iwmmxt") + list(APPEND _mtune_flag_list "iwmmxt") + list(APPEND _march_flag_list "armv5te") + elseif(TARGET_ARCHITECTURE STREQUAL "iwmmxt2") + list(APPEND _mtune_flag_list "iwmmxt2") + list(APPEND _march_flag_list "armv5te") + elseif(TARGET_ARCHITECTURE STREQUAL "fa606te") + list(APPEND _mtune_flag_list "fa606te") + list(APPEND _march_flag_list "armv5te") + elseif(TARGET_ARCHITECTURE STREQUAL "fa626te") + list(APPEND _mtune_flag_list "fa626te") + list(APPEND _march_flag_list "armv5te") + elseif(TARGET_ARCHITECTURE STREQUAL "fmp626") + list(APPEND _mtune_flag_list "fmp626") + list(APPEND _march_flag_list "armv5te") + elseif(TARGET_ARCHITECTURE STREQUAL "fa726te") + list(APPEND _mtune_flag_list "fa726te") + list(APPEND _march_flag_list "armv5te") + elseif(TARGET_ARCHITECTURE STREQUAL "arm926ej-s") + list(APPEND _mtune_flag_list "arm926ej-s") + list(APPEND _march_flag_list "armv5tej") + list(APPEND _available_extension_list "fp") + elseif(TARGET_ARCHITECTURE STREQUAL "arm1026ej-s") + list(APPEND _mtune_flag_list "arm1026ej-s") + list(APPEND _march_flag_list "armv5tej") + list(APPEND _available_extension_list "fp") + + elseif(TARGET_ARCHITECTURE STREQUAL "mpcore") + list(APPEND _mtune_flag_list "mpcore") + list(APPEND _march_flag_list "armv6k") + list(APPEND _available_extension_list "fp") + elseif(TARGET_ARCHITECTURE STREQUAL "arm1136j-s") + list(APPEND _mtune_flag_list "arm1136j-s") + list(APPEND _march_flag_list "armv6j") + elseif(TARGET_ARCHITECTURE STREQUAL "arm1136jf-s") + list(APPEND _mtune_flag_list "arm1136jf-s") + list(APPEND _march_flag_list "armv6j") + list(APPEND _available_extension_list "fp") + elseif(TARGET_ARCHITECTURE STREQUAL "arm1156t2-s") + list(APPEND _mtune_flag_list "arm1156t2-s") + list(APPEND _march_flag_list "armv6t2") + elseif(TARGET_ARCHITECTURE STREQUAL "arm1156t2f-s") + list(APPEND _mtune_flag_list "arm1156t2f-s") + list(APPEND _march_flag_list "armv6t2") + list(APPEND _available_extension_list "fp") + elseif(TARGET_ARCHITECTURE STREQUAL "arm1176jz-s") + list(APPEND _mtune_flag_list "arm1176jz-s") + list(APPEND _march_flag_list "armv6kz") + elseif(TARGET_ARCHITECTURE STREQUAL "arm1176jzf-s") + list(APPEND _mtune_flag_list "arm1176jzf-s") + list(APPEND _march_flag_list "armv6kz") + list(APPEND _available_extension_list "fp") + + elseif(TARGET_ARCHITECTURE STREQUAL "generic-armv7-a") + list(APPEND _mtune_flag_list "generic-armv7-a") + list(APPEND _march_flag_list "armv7-a") + list(APPEND _available_extension_list "mp" "sec" "vfpv3-d16" "vfpv3" "vfpv3-d16-fp16" "vfpv3-fp16" "vfpv4-d16" "vfpv4" "simd" "neon-fp16" "neon-vfpv4") + + elseif(TARGET_ARCHITECTURE STREQUAL "cortex-a78") + list(APPEND _mtune_flag_list "cortex-a78") + list(APPEND _march_flag_list "armv8.2-a") + list(APPEND _march_flag_list "armv8.1-a") + list(APPEND _march_flag_list "armv8-a") + list(APPEND _march_flag_list "armv7-a") + list(APPEND _available_extension_list "fp16" "dotprod") + + elseif(TARGET_ARCHITECTURE STREQUAL "cortex-a5") + list(APPEND _mtune_flag_list "cortex-a5") + list(APPEND _march_flag_list "armv7-a") + list(APPEND _available_extension_list "mp" "sec" "neon-fp16") + elseif(TARGET_ARCHITECTURE STREQUAL "cortex-a7") + list(APPEND _mtune_flag_list "cortex-a7") + list(APPEND _march_flag_list "armv7ve") + list(APPEND _available_extension_list "simd") + elseif(TARGET_ARCHITECTURE STREQUAL "cortex-a8") + list(APPEND _mtune_flag_list "cortex-a8") + list(APPEND _march_flag_list "armv7-a") + list(APPEND _available_extension_list "sec" "simd") + elseif(TARGET_ARCHITECTURE STREQUAL "cortex-a9") + list(APPEND _mtune_flag_list "cortex-a9") + list(APPEND _march_flag_list "armv7-a") + list(APPEND _available_extension_list "mp" "sec" "neon-fp16") + elseif(TARGET_ARCHITECTURE STREQUAL "cortex-a12") + list(APPEND _mtune_flag_list "cortex-a12") + list(APPEND _march_flag_list "armv7ve") + list(APPEND _available_extension_list "simd") + elseif(TARGET_ARCHITECTURE STREQUAL "cortex-a15") + list(APPEND _mtune_flag_list "cortex-a15") + list(APPEND _march_flag_list "armv7ve") + list(APPEND _available_extension_list "simd") + elseif(TARGET_ARCHITECTURE STREQUAL "cortex-a15.cortex-a7") + list(APPEND _mtune_flag_list "cortex-a15.cortex-a7") + list(APPEND _march_flag_list "armv7ve") + list(APPEND _available_extension_list "simd") + elseif(TARGET_ARCHITECTURE STREQUAL "cortex-a17") + list(APPEND _mtune_flag_list "cortex-a17") + list(APPEND _march_flag_list "armv7ve") + list(APPEND _available_extension_list "simd") + elseif(TARGET_ARCHITECTURE STREQUAL "cortex-a17.cortex-a7") + list(APPEND _mtune_flag_list "cortex-a17.cortex-a7") + list(APPEND _march_flag_list "armv7ve") + list(APPEND _available_extension_list "simd") + elseif(TARGET_ARCHITECTURE STREQUAL "cortex-a32") + list(APPEND _mtune_flag_list "cortex-a32") + list(APPEND _march_flag_list "armv8-a") + list(APPEND _march_flag_list "armv7-a") + list(APPEND _available_extension_list "crc" "simd") + elseif(TARGET_ARCHITECTURE STREQUAL "cortex-a34") + list(APPEND _mtune_flag_list "cortex-a34") + list(APPEND _march_flag_list "armv8-a") + list(APPEND _march_flag_list "armv7-a") + list(APPEND _available_extension_list "crc" "crypto" "simd") + elseif(TARGET_ARCHITECTURE STREQUAL "cortex-a35") + list(APPEND _mtune_flag_list "cortex-a35") + list(APPEND _march_flag_list "armv8-a") + list(APPEND _march_flag_list "armv7-a") + list(APPEND _available_extension_list "crc" "crypto" "simd") + elseif(TARGET_ARCHITECTURE STREQUAL "cortex-a53") + list(APPEND _mtune_flag_list "cortex-a53") + list(APPEND _march_flag_list "armv8-a") + list(APPEND _march_flag_list "armv7-a") + list(APPEND _available_extension_list "crc" "crypto" "simd") + elseif(TARGET_ARCHITECTURE STREQUAL "cortex-a55") + list(APPEND _mtune_flag_list "cortex-a55") + list(APPEND _march_flag_list "armv8.2-a") + list(APPEND _march_flag_list "armv8.1-a") + list(APPEND _march_flag_list "armv8-a") + list(APPEND _march_flag_list "armv7-a") + list(APPEND _available_extension_list "fp16" "dotprod") + elseif(TARGET_ARCHITECTURE STREQUAL "cortex-a57") + list(APPEND _mtune_flag_list "cortex-a57") + list(APPEND _march_flag_list "armv8-a") + list(APPEND _march_flag_list "armv7-a") + list(APPEND _available_extension_list "crc" "simd") + elseif(TARGET_ARCHITECTURE STREQUAL "cortex-a57.cortext-a53") + list(APPEND _mtune_flag_list "cortex-a57.cortext-a53") + list(APPEND _march_flag_list "armv8-a") + list(APPEND _march_flag_list "armv7-a") + list(APPEND _available_extension_list "crc" "simd") + elseif(TARGET_ARCHITECTURE STREQUAL "cortex-a72") + list(APPEND _mtune_flag_list "cortex-a72") + list(APPEND _march_flag_list "armv8-a") + list(APPEND _march_flag_list "armv7-a") + list(APPEND _available_extension_list "crc" "crypto" "simd") + elseif(TARGET_ARCHITECTURE STREQUAL "cortex-a72.cortext-a53") + list(APPEND _mtune_flag_list "cortex-a72.cortext-a53") + list(APPEND _march_flag_list "armv8-a") + list(APPEND _march_flag_list "armv7-a") + list(APPEND _available_extension_list "crc" "simd") + elseif(TARGET_ARCHITECTURE STREQUAL "cortex-a73") + list(APPEND _mtune_flag_list "cortex-a73") + list(APPEND _march_flag_list "armv8-a") + list(APPEND _march_flag_list "armv7-a") + list(APPEND _available_extension_list "crc" "crypto" "simd") + elseif(TARGET_ARCHITECTURE STREQUAL "cortex-a73.cortext-a35") + list(APPEND _mtune_flag_list "cortex-a73.cortext-a35") + list(APPEND _march_flag_list "armv8-a") + list(APPEND _march_flag_list "armv7-a") + list(APPEND _available_extension_list "crc" "simd") + elseif(TARGET_ARCHITECTURE STREQUAL "cortex-a73.cortext-a53") + list(APPEND _mtune_flag_list "cortex-a73.cortext-a53") + list(APPEND _march_flag_list "armv8-a") + list(APPEND _march_flag_list "armv7-a") + list(APPEND _available_extension_list "crc" "simd") + elseif(TARGET_ARCHITECTURE STREQUAL "cortex-a75") + list(APPEND _mtune_flag_list "cortex-a75") + list(APPEND _march_flag_list "armv8.2-a") + list(APPEND _march_flag_list "armv8.1-a") + list(APPEND _march_flag_list "armv8-a") + list(APPEND _march_flag_list "armv7-a") + list(APPEND _available_extension_list "fp16" "dotprod") + elseif(TARGET_ARCHITECTURE STREQUAL "cortex-a75.cortext-a55") + list(APPEND _mtune_flag_list "cortex-a75.cortext-a55") + list(APPEND _march_flag_list "armv8.2-a") + list(APPEND _march_flag_list "armv8.1-a") + list(APPEND _march_flag_list "armv8-a") + list(APPEND _available_extension_list "fp16" "dotprod") + elseif(TARGET_ARCHITECTURE STREQUAL "cortex-a76") + list(APPEND _mtune_flag_list "cortex-a76") + list(APPEND _march_flag_list "armv8.2-a") + list(APPEND _march_flag_list "armv8.1-a") + list(APPEND _march_flag_list "armv8-a") + list(APPEND _march_flag_list "armv7-a") + list(APPEND _available_extension_list "fp16" "dotprod") + elseif(TARGET_ARCHITECTURE STREQUAL "cortex-a76.cortext-a55") + list(APPEND _mtune_flag_list "cortex-a76.cortext-a55") + list(APPEND _march_flag_list "armv8.2-a") + list(APPEND _march_flag_list "armv8.1-a") + list(APPEND _march_flag_list "armv8-a") + list(APPEND _march_flag_list "armv7-a") + list(APPEND _available_extension_list "fp16" "dotprod") + elseif(TARGET_ARCHITECTURE STREQUAL "cortex-a76ae") + list(APPEND _mtune_flag_list "cortex-a76ae") + list(APPEND _march_flag_list "armv8.2-a") + list(APPEND _march_flag_list "armv8.1-a") + list(APPEND _march_flag_list "armv8-a") + list(APPEND _march_flag_list "armv7-a") + list(APPEND _available_extension_list "fp16" "dotprod") + elseif(TARGET_ARCHITECTURE STREQUAL "cortex-a77") + list(APPEND _mtune_flag_list "cortex-a77") + list(APPEND _march_flag_list "armv8.2-a") + list(APPEND _march_flag_list "armv8.1-a") + list(APPEND _march_flag_list "armv8-a") + list(APPEND _march_flag_list "armv7-a") + list(APPEND _available_extension_list "fp16" "dotprod") + elseif(TARGET_ARCHITECTURE STREQUAL "cortex-a78") + list(APPEND _mtune_flag_list "cortex-a78") + list(APPEND _march_flag_list "armv8.2-a") + list(APPEND _march_flag_list "armv8.1-a") + list(APPEND _march_flag_list "armv8-a") + list(APPEND _march_flag_list "armv7-a") + list(APPEND _available_extension_list "fp16" "dotprod") + elseif(TARGET_ARCHITECTURE STREQUAL "cortex-a78ae") + list(APPEND _mtune_flag_list "cortex-a78ae") + list(APPEND _march_flag_list "armv8.2-a") + list(APPEND _march_flag_list "armv8.1-a") + list(APPEND _march_flag_list "armv8-a") + list(APPEND _march_flag_list "armv7-a") + list(APPEND _available_extension_list "fp16" "dotprod") + elseif(TARGET_ARCHITECTURE STREQUAL "cortex-a78c") + list(APPEND _mtune_flag_list "cortex-a78c") + list(APPEND _march_flag_list "armv8.2-a") + list(APPEND _march_flag_list "armv8.1-a") + list(APPEND _march_flag_list "armv8-a") + list(APPEND _march_flag_list "armv7-a") + list(APPEND _available_extension_list "fp16" "dotprod") + elseif(TARGET_ARCHITECTURE STREQUAL "cortex-a710") + list(APPEND _mtune_flag_list "cortex-a510") + list(APPEND _march_flag_list "armv9-a") + list(APPEND _march_flag_list "armv8.6-a") + list(APPEND _march_flag_list "armv8.5-a") + list(APPEND _march_flag_list "armv8.4-a") + list(APPEND _march_flag_list "armv8.3-a") + list(APPEND _march_flag_list "armv8.2-a") + list(APPEND _march_flag_list "armv8.1-a") + list(APPEND _march_flag_list "armv8-a") + list(APPEND _march_flag_list "armv7-a") + list(APPEND _available_extension_list "bf16" "fp16" "i8mm") + elseif(TARGET_ARCHITECTURE STREQUAL "cortex-a710") + list(APPEND _mtune_flag_list "cortex-a710") + list(APPEND _march_flag_list "armv9-a") + list(APPEND _march_flag_list "armv8.6-a") + list(APPEND _march_flag_list "armv8.5-a") + list(APPEND _march_flag_list "armv8.4-a") + list(APPEND _march_flag_list "armv8.3-a") + list(APPEND _march_flag_list "armv8.2-a") + list(APPEND _march_flag_list "armv8.1-a") + list(APPEND _march_flag_list "armv8-a") + list(APPEND _march_flag_list "armv7-a") + list(APPEND _available_extension_list "bf16" "fp16" "i8mm") + + elseif(TARGET_ARCHITECTURE STREQUAL "cortex-m0") + list(APPEND _mtune_flag_list "cortex-m0") + list(APPEND _march_flag_list "armv6s-m") + elseif(TARGET_ARCHITECTURE STREQUAL "cortex-m0plus") + list(APPEND _mtune_flag_list "cortex-m0plus") + list(APPEND _march_flag_list "armv6s-m") + elseif(TARGET_ARCHITECTURE STREQUAL "cortex-m1") + list(APPEND _mtune_flag_list "cortex-m1") + list(APPEND _march_flag_list "armv6s-m") + elseif(TARGET_ARCHITECTURE STREQUAL "cortex-m3") + list(APPEND _mtune_flag_list "cortex-m3") + list(APPEND _march_flag_list "armv7-m") + elseif(TARGET_ARCHITECTURE STREQUAL "cortex-m4") + list(APPEND _mtune_flag_list "cortex-m4") + list(APPEND _march_flag_list "armv7e-m") + list(APPEND _available_extension_list "fp") + elseif(TARGET_ARCHITECTURE STREQUAL "cortex-m7") + list(APPEND _mtune_flag_list "cortex-m7") + list(APPEND _march_flag_list "armv7e-m") + list(APPEND _available_extension_list "fp.dp") + elseif(TARGET_ARCHITECTURE STREQUAL "cortex-m23") + list(APPEND _mtune_flag_list "cortex-m23") + list(APPEND _march_flag_list "armv8-m.base") + list(APPEND _march_flag_list "armv7-m") + elseif(TARGET_ARCHITECTURE STREQUAL "cortex-m33") + list(APPEND _mtune_flag_list "cortex-m33") + list(APPEND _march_flag_list "armv8-m.main") + list(APPEND _march_flag_list "armv7-m") + list(APPEND _available_extension_list "dsp" "fp") + elseif(TARGET_ARCHITECTURE STREQUAL "cortex-m35p") + list(APPEND _mtune_flag_list "cortex-m35p") + list(APPEND _march_flag_list "armv8-m.main") + list(APPEND _march_flag_list "armv7-m") + list(APPEND _available_extension_list "dsp" "fp") + elseif(TARGET_ARCHITECTURE STREQUAL "cortex-m55") + list(APPEND _mtune_flag_list "cortex-m55") + list(APPEND _march_flag_list "armv8.1-m.main") + list(APPEND _march_flag_list "armv8-m") + list(APPEND _march_flag_list "armv7-m") + list(APPEND _available_extension_list "mve.fp" "fp.dp") + + elseif(TARGET_ARCHITECTURE STREQUAL "cortex-r4") + list(APPEND _mtune_flag_list "cortex-r4") + list(APPEND _march_flag_list "armv7-r") + elseif(TARGET_ARCHITECTURE STREQUAL "cortex-r4f") + list(APPEND _mtune_flag_list "cortex-r4f") + list(APPEND _march_flag_list "armv7-r") + list(APPEND _available_extension_list "fp") + elseif(TARGET_ARCHITECTURE STREQUAL "cortex-r5") + list(APPEND _mtune_flag_list "cortex-r5") + list(APPEND _march_flag_list "armv7-r") + list(APPEND _available_extension_list "idiv" "fp") + elseif(TARGET_ARCHITECTURE STREQUAL "cortex-r7") + list(APPEND _mtune_flag_list "cortex-r7") + list(APPEND _march_flag_list "armv7-r") + list(APPEND _available_extension_list "idiv" "vfpv3-d16-fp16") + elseif(TARGET_ARCHITECTURE STREQUAL "cortex-r8") + list(APPEND _mtune_flag_list "cortex-r8") + list(APPEND _march_flag_list "armv7-r") + elseif(TARGET_ARCHITECTURE STREQUAL "cortex-r52") + list(APPEND _mtune_flag_list "cortex-r52") + list(APPEND _march_flag_list "armv8-r") + list(APPEND _march_flag_list "armv7-r") + list(APPEND _available_extension_list "crc" "simd" "idiv" "vfpv3-d16-fp16") + + elseif(TARGET_ARCHITECTURE STREQUAL "cortex-x1") + list(APPEND _mtune_flag_list "cortex-x1") + list(APPEND _march_flag_list "armv8.2-a") + list(APPEND _march_flag_list "armv8.1-a") + list(APPEND _march_flag_list "armv8-a") + list(APPEND _march_flag_list "armv7-a") + list(APPEND _available_extension_list "fp16" "dotprod") + elseif(TARGET_ARCHITECTURE STREQUAL "cortex-x2") + list(APPEND _march_flag_list "armv9-a") + list(APPEND _march_flag_list "armv8.6-a") + list(APPEND _march_flag_list "armv8.5-a") + list(APPEND _march_flag_list "armv8.4-a") + list(APPEND _march_flag_list "armv8.3-a") + list(APPEND _march_flag_list "armv8.2-a") + list(APPEND _march_flag_list "armv8.1-a") + list(APPEND _march_flag_list "armv8-a") + list(APPEND _march_flag_list "armv7-a") + list(APPEND _available_extension_list "bf16" "fp16" "i8mm") + + elseif(TARGET_ARCHITECTURE STREQUAL "neoverse-e1") + list(APPEND _mtune_flag_list "neoverse-e1") + list(APPEND _march_flag_list "armv8.2-a") + list(APPEND _march_flag_list "armv8.1-a") + list(APPEND _march_flag_list "armv8-a") + list(APPEND _march_flag_list "armv7-a") + list(APPEND _available_extension_list "fp16" "dotprod") + elseif(TARGET_ARCHITECTURE STREQUAL "neoverse-n1") + list(APPEND _mtune_flag_list "neoverse-n1") + list(APPEND _march_flag_list "armv8.2-a") + list(APPEND _march_flag_list "armv8.1-a") + list(APPEND _march_flag_list "armv8-a") + list(APPEND _march_flag_list "armv7-a") + list(APPEND _available_extension_list "fp16" "dotprod") + elseif(TARGET_ARCHITECTURE STREQUAL "neoverse-n2") + list(APPEND _mtune_flag_list "neoverse-n2") + list(APPEND _march_flag_list "armv8.5-a") + list(APPEND _march_flag_list "armv8.4-a") + list(APPEND _march_flag_list "armv8.3-a") + list(APPEND _march_flag_list "armv8.2-a") + list(APPEND _march_flag_list "armv8.1-a") + list(APPEND _march_flag_list "armv8-a") + list(APPEND _march_flag_list "armv7-a") + list(APPEND _available_extension_list "bf16" "fp16" "i8mm") + elseif(TARGET_ARCHITECTURE STREQUAL "neoverse-v1") + list(APPEND _mtune_flag_list "neoverse-v1") + list(APPEND _march_flag_list "armv8.4-a") + list(APPEND _march_flag_list "armv8.3-a") + list(APPEND _march_flag_list "armv8.2-a") + list(APPEND _march_flag_list "armv8.1-a") + list(APPEND _march_flag_list "armv8-a") + list(APPEND _march_flag_list "armv7-a") + list(APPEND _available_extension_list "bf16" "fp16" "i8mm") + + # Broadcom + elseif(TARGET_ARCHITECTURE STREQUAL "brahma-b15") + list(APPEND _mtune_flag_list "brahma-b15") + elseif(TARGET_ARCHITECTURE STREQUAL "brahma-b53") + list(APPEND _mtune_flag_list "brahma-b53") + elseif(TARGET_ARCHITECTURE STREQUAL "thunderx2") + list(APPEND _mtune_flag_list "thunderx2") + list(APPEND _march_flag_list "armv8.1-a") + list(APPEND _march_flag_list "armv8-a") + list(APPEND _march_flag_list "armv7-a") + list(APPEND _available_extension_list "crypto") + + # Cavium + elseif(TARGET_ARCHITECTURE STREQUAL "thunderx") + list(APPEND _mtune_flag_list "thunderx") + list(APPEND _march_flag_list "armv8-a") + list(APPEND _march_flag_list "armv7-a") + list(APPEND _available_extension_list "crc" "crypto") + elseif(TARGET_ARCHITECTURE STREQUAL "thunderxt88") + list(APPEND _mtune_flag_list "thunderxt88") + list(APPEND _march_flag_list "armv8-a") + list(APPEND _march_flag_list "armv7-a") + list(APPEND _available_extension_list "crc" "crypto") + elseif(TARGET_ARCHITECTURE STREQUAL "thunderxt81") + list(APPEND _mtune_flag_list "thunderxt81") + list(APPEND _march_flag_list "armv8-a") + list(APPEND _march_flag_list "armv7-a") + list(APPEND _available_extension_list "crc" "crypto") + elseif(TARGET_ARCHITECTURE STREQUAL "thunderxt83") + list(APPEND _mtune_flag_list "thunderxt83") + list(APPEND _march_flag_list "armv8-a") + list(APPEND _march_flag_list "armv7-a") + list(APPEND _available_extension_list "crc" "crypto") + elseif(TARGET_ARCHITECTURE STREQUAL "thunderx2t99") + list(APPEND _mtune_flag_list "thunderx2t99") + list(APPEND _march_flag_list "armv8.1-a") + list(APPEND _march_flag_list "armv8-a") + list(APPEND _march_flag_list "armv7-a") + list(APPEND _available_extension_list "fp" "asimd" "evtstrm" "aes" "pmull" "sha1" "sha2" "crc32" "atomics" "cpuid" "asimdrdm") + + # DEC + elseif(TARGET_ARCHITECTURE STREQUAL "strongarm110") + list(APPEND _mtune_flag_list "strongarm110") + list(APPEND _march_flag_list "armv4") + elseif(TARGET_ARCHITECTURE STREQUAL "strongarm1100") + list(APPEND _mtune_flag_list "strongarm1100") + list(APPEND _march_flag_list "armv4") + + # FUJITSU + elseif(TARGET_ARCHITECTURE STREQUAL "a64fx") + list(APPEND _mtune_flag_list "a64fx") + list(APPEND _march_flag_list "armv8.2-a") + list(APPEND _march_flag_list "armv8.1-a") + list(APPEND _march_flag_list "armv8-a") + list(APPEND _march_flag_list "armv7-a") + list(APPEND _available_extension_list "fp" "asimd" "evtstrm" "sha1" "sha2" "crc32" "atomics" "fphp" "asimdhp" "cpuid" "asimdrdm" "fcma" "dcpop" "sve") + + # HiSilicon + elseif(TARGET_ARCHITECTURE STREQUAL "tsv110") + list(APPEND _mtune_flag_list "tsv110") + list(APPEND _march_flag_list "armv8.2-a") + list(APPEND _march_flag_list "armv8.1-a") + list(APPEND _march_flag_list "armv8-a") + list(APPEND _march_flag_list "armv7-a") + list(APPEND _available_extension_list "aes" "crypto" "fp16" "sha2") + + # Nvidia + elseif(TARGET_ARCHITECTURE STREQUAL "denver") + list(APPEND _mtune_flag_list "denver") + list(APPEND _march_flag_list "armv8-a") + list(APPEND _march_flag_list "armv7-a") + list(APPEND _available_extension_list "crc" "crypto" "simd" "vfpv3" "vfpv4") + elseif(TARGET_ARCHITECTURE STREQUAL "denver2") + list(APPEND _mtune_flag_list "denver2") + list(APPEND _march_flag_list "armv8-a") + list(APPEND _march_flag_list "armv7-a") + list(APPEND _available_extension_list "crc" "crypto" "simd" "vfpv3" "vfpv4") + elseif(TARGET_ARCHITECTURE STREQUAL "carmel") + list(APPEND _mtune_flag_list "denver") + list(APPEND _march_flag_list "armv8.2-a") + list(APPEND _march_flag_list "armv8.1-a") + list(APPEND _march_flag_list "armv8-a") + list(APPEND _march_flag_list "armv7-a") + list(APPEND _available_extension_list "crc" "crypto" "simd" "vfpv3" "vfpv4") + + # APM + elseif(TARGET_ARCHITECTURE STREQUAL "xgene1") + list(APPEND _mtune_flag_list "xgene1") + list(APPEND _march_flag_list "armv8-a") + list(APPEND _march_flag_list "armv7-a") + + # Qualcomm + elseif(TARGET_ARCHITECTURE STREQUAL "scorpion") + list(APPEND _mtune_flag_list "scorpion") + list(APPEND _march_flag_list "armv7-a") + elseif(TARGET_ARCHITECTURE STREQUAL "krait") + list(APPEND _mtune_flag_list "krait") + list(APPEND _march_flag_list "armv7-a") + elseif(TARGET_ARCHITECTURE STREQUAL "kryo") + list(APPEND _mtune_flag_list "kryo") + list(APPEND _march_flag_list "armv8-a") + list(APPEND _march_flag_list "armv7-a") + elseif(TARGET_ARCHITECTURE STREQUAL "kryo2") + list(APPEND _mtune_flag_list "kryo2") + list(APPEND _march_flag_list "armv8.2-a") + list(APPEND _march_flag_list "armv8.1-a") + list(APPEND _march_flag_list "armv8-a") + list(APPEND _march_flag_list "armv7-a") + elseif(TARGET_ARCHITECTURE STREQUAL "falkor") + list(APPEND _mtune_flag_list "falkor") + list(APPEND _march_flag_list "armv8.1-a") + list(APPEND _march_flag_list "armv8-a") + list(APPEND _march_flag_list "armv7-a") + elseif(TARGET_ARCHITECTURE STREQUAL "saphira") + list(APPEND _mtune_flag_list "saphira") + list(APPEND _march_flag_list "armv8.4-a") + list(APPEND _march_flag_list "armv8.3-a") + list(APPEND _march_flag_list "armv8.2-a") + list(APPEND _march_flag_list "armv8.1-a") + list(APPEND _march_flag_list "armv8-a") + list(APPEND _march_flag_list "armv7-a") + list(APPEND _available_extension_list "crc" "crypto" "simd" "vfpv3" "vfpv4") + + # Samsung + elseif(TARGET_ARCHITECTURE STREQUAL "exynos-m1") + list(APPEND _mtune_flag_list "exynos-m1") + list(APPEND _march_flag_list "armv8-a") + list(APPEND _march_flag_list "armv7-a") + list(APPEND _available_extension_list "crypto" "simd") + + # Marvell + elseif(TARGET_ARCHITECTURE STREQUAL "marvell-f") + list(APPEND _mtune_flag_list "marvell-f") + list(APPEND _march_flag_list "armv5te") + elseif(TARGET_ARCHITECTURE STREQUAL "marvell-pj4") + list(APPEND _mtune_flag_list "marvell-pj4") + list(APPEND _march_flag_list "armv7-a") + list(APPEND _available_extension_list "mp" "sec" "fp") + + # Intel + elseif(TARGET_ARCHITECTURE STREQUAL "i80200") + list(APPEND _mtune_flag_list "i80200") + elseif(TARGET_ARCHITECTURE STREQUAL "pxa250a") + list(APPEND _mtune_flag_list "pxa250a") + elseif(TARGET_ARCHITECTURE STREQUAL "pxa210a") + list(APPEND _mtune_flag_list "pxa210a") + elseif(TARGET_ARCHITECTURE STREQUAL "i80321-400") + list(APPEND _mtune_flag_list "i80321-400") + elseif(TARGET_ARCHITECTURE STREQUAL "i80321-600") + list(APPEND _mtune_flag_list "i80321-600") + elseif(TARGET_ARCHITECTURE STREQUAL "pxa250b") + list(APPEND _mtune_flag_list "pxa250b") + elseif(TARGET_ARCHITECTURE STREQUAL "pxa210b") + list(APPEND _mtune_flag_list "pxa210b") + elseif(TARGET_ARCHITECTURE STREQUAL "i80321-400-b0") + list(APPEND _mtune_flag_list "i80321-400-b0") + elseif(TARGET_ARCHITECTURE STREQUAL "i80321-600-b0") + list(APPEND _mtune_flag_list "i80321-600-b0") + elseif(TARGET_ARCHITECTURE STREQUAL "pxa250c") + list(APPEND _mtune_flag_list "pxa250c") + elseif(TARGET_ARCHITECTURE STREQUAL "pxa210c") + list(APPEND _mtune_flag_list "pxa210c") + elseif(TARGET_ARCHITECTURE STREQUAL "pxa27x") + list(APPEND _mtune_flag_list "pxa27x") + elseif(TARGET_ARCHITECTURE STREQUAL "ipx425-533") + list(APPEND _mtune_flag_list "ipx425-533") + elseif(TARGET_ARCHITECTURE STREQUAL "ipx425-400") + list(APPEND _mtune_flag_list "ipx425-400") + elseif(TARGET_ARCHITECTURE STREQUAL "ipx425-266") + list(APPEND _mtune_flag_list "ipx425-266") + elseif(TARGET_ARCHITECTURE STREQUAL "pxa32x") + list(APPEND _mtune_flag_list "pxa32x") + elseif(TARGET_ARCHITECTURE STREQUAL "pxa930") + list(APPEND _mtune_flag_list "pxa930") + elseif(TARGET_ARCHITECTURE STREQUAL "pxa30x") + list(APPEND _mtune_flag_list "pxa30x") + elseif(TARGET_ARCHITECTURE STREQUAL "pxa31x") + list(APPEND _mtune_flag_list "pxa31x") + elseif(TARGET_ARCHITECTURE STREQUAL "sa1110") + list(APPEND _mtune_flag_list "sa1110") + elseif(TARGET_ARCHITECTURE STREQUAL "ipx1200") + list(APPEND _mtune_flag_list "ipx1200") + + # Apple + elseif(TARGET_ARCHITECTURE STREQUAL "apple-a6") + list(APPEND _mtune_flag_list "apple-a6") + list(APPEND _march_flag_list "armv7-a") + elseif(TARGET_ARCHITECTURE STREQUAL "apple-a7") + list(APPEND _mtune_flag_list "apple-a7") + list(APPEND _march_flag_list "armv8-a") + list(APPEND _available_extension_list "aes" "crypto" "fp" "simd" "sha2" "zcm" "zcz") + elseif(TARGET_ARCHITECTURE STREQUAL "apple-a8") + list(APPEND _mtune_flag_list "apple-a8") + list(APPEND _march_flag_list "armv8-a") + list(APPEND _available_extension_list "aes" "crypto" "fp" "simd" "sha2" "zcm" "zcz") + elseif(TARGET_ARCHITECTURE STREQUAL "apple-a9") + list(APPEND _mtune_flag_list "apple-a9") + list(APPEND _march_flag_list "armv8-a") + list(APPEND _available_extension_list "aes" "crypto" "fp" "simd" "sha2" "zcm" "zcz") + elseif(TARGET_ARCHITECTURE STREQUAL "apple-a10") + list(APPEND _mtune_flag_list "apple-a10") + list(APPEND _march_flag_list "armv8.1-a") + list(APPEND _march_flag_list "armv8-a") + list(APPEND _available_extension_list "aes" "crc" "crypto" "fp" "simd" "rdm" "sha2" "zcm" "zcz") + elseif(TARGET_ARCHITECTURE STREQUAL "apple-a11") + list(APPEND _mtune_flag_list "apple-a11") + list(APPEND _march_flag_list "armv8.2-a") + list(APPEND _march_flag_list "armv8.1-a") + list(APPEND _march_flag_list "armv8-a") + list(APPEND _available_extension_list "aes" "crc" "crypto" "fp" "lse" "simd" "ras" "rdm" "sha2" "zcm" "zcz") + elseif(TARGET_ARCHITECTURE STREQUAL "apple-a12") + list(APPEND _mtune_flag_list "apple-a12") + list(APPEND _march_flag_list "armv8.3-a") + list(APPEND _march_flag_list "armv8.2-a") + list(APPEND _march_flag_list "armv8.1-a") + list(APPEND _march_flag_list "armv8-a") + list(APPEND _available_extension_list "aes" "crc" "crypto" "fp" "fp16" "lse" "simd" "ras" "rcpc" "rdm" "sha2" "zcm" "zcz") + elseif(TARGET_ARCHITECTURE STREQUAL "apple-a13") + list(APPEND _mtune_flag_list "apple-a13") + list(APPEND _march_flag_list "armv8.4-a") + list(APPEND _march_flag_list "armv8.3-a") + list(APPEND _march_flag_list "armv8.2-a") + list(APPEND _march_flag_list "armv8.1-a") + list(APPEND _march_flag_list "armv8-a") + list(APPEND _available_extension_list "aes" "crc" "crypto" "dotprod" "fp" "fp16" "fp16fml" "lse" "simd" "ras" "rcpc" "rdm" "sha2" "sha3" "sm4" "zcm" "zcz") + elseif(TARGET_ARCHITECTURE STREQUAL "apple-a14") + list(APPEND _mtune_flag_list "apple-a14") + list(APPEND _march_flag_list "armv8.5-a") + list(APPEND _march_flag_list "armv8.4-a") + list(APPEND _march_flag_list "armv8.3-a") + list(APPEND _march_flag_list "armv8.2-a") + list(APPEND _march_flag_list "armv8.1-a") + list(APPEND _march_flag_list "armv8-a") + list(APPEND _available_extension_list "aes" "crc" "crypto" "fp" "fp16" "lse" "simd" "ras" "rcpc" "rdm" "sha2" "zcm" "zcz") + elseif(TARGET_ARCHITECTURE STREQUAL "apple-a15") + list(APPEND _mtune_flag_list "apple-a15") + list(APPEND _march_flag_list "armv8.5-a") + list(APPEND _march_flag_list "armv8.4-a") + list(APPEND _march_flag_list "armv8.3-a") + list(APPEND _march_flag_list "armv8.2-a") + list(APPEND _march_flag_list "armv8.1-a") + list(APPEND _march_flag_list "armv8-a") + list(APPEND _available_extension_list "aes" "crc" "crypto" "fp" "fp16" "lse" "simd" "ras" "rcpc" "rdm" "sha2" "zcm" "zcz") + elseif(TARGET_ARCHITECTURE STREQUAL "apple-a16") + list(APPEND _mtune_flag_list "apple-a16") + list(APPEND _march_flag_list "armv8.5-a") + list(APPEND _march_flag_list "armv8.4-a") + list(APPEND _march_flag_list "armv8.3-a") + list(APPEND _march_flag_list "armv8.2-a") + list(APPEND _march_flag_list "armv8.1-a") + list(APPEND _march_flag_list "armv8-a") + list(APPEND _available_extension_list "aes" "crc" "crypto" "fp" "fp16" "lse" "simd" "ras" "rcpc" "rdm" "sha2" "zcm" "zcz") + elseif(TARGET_ARCHITECTURE STREQUAL "apple-a17") + list(APPEND _mtune_flag_list "apple-a17") + list(APPEND _march_flag_list "armv8.5-a") + list(APPEND _march_flag_list "armv8.4-a") + list(APPEND _march_flag_list "armv8.3-a") + list(APPEND _march_flag_list "armv8.2-a") + list(APPEND _march_flag_list "armv8.1-a") + list(APPEND _march_flag_list "armv8-a") + list(APPEND _available_extension_list "aes" "crc" "crypto" "fp" "fp16" "lse" "simd" "ras" "rcpc" "rdm" "sha2" "zcm" "zcz") + elseif(TARGET_ARCHITECTURE STREQUAL "apple-m1") + list(APPEND _mtune_flag_list "apple-m1" "vortext") + list(APPEND _march_flag_list "armv8.5-a") + list(APPEND _march_flag_list "armv8.4-a") + list(APPEND _march_flag_list "armv8.3-a") + list(APPEND _march_flag_list "armv8.2-a") + list(APPEND _march_flag_list "armv8.1-a") + list(APPEND _march_flag_list "armv8-a") + list(APPEND _available_extension_list "aes" "altnzcv" "ccdp" "complxnum" "crc" "crypto" "dotprod" "fp" "fp-armv8" "fp16" "fp16fml" "fptoint" "fullfp16" "jsconv" "lse" "neon" "pauth" "perfmon" "predres" "ras" "rcpc" "rdm" "sb" "sha2" "sha3" "simd" "specrestrict" "ssbs" "zcm" "zcz") + elseif(TARGET_ARCHITECTURE STREQUAL "apple-m2") + list(APPEND _mtune_flag_list "apple-m2" "apple-m1" "vortex") + list(APPEND _march_flag_list "armv8.6-a") + list(APPEND _march_flag_list "armv8.5-a") + list(APPEND _march_flag_list "armv8.4-a") + list(APPEND _march_flag_list "armv8.3-a") + list(APPEND _march_flag_list "armv8.2-a") + list(APPEND _march_flag_list "armv8.1-a") + list(APPEND _march_flag_list "armv8-a") + list(APPEND _available_extension_list "aes" "altnzcv" "bf16" "ccdp" "complxnum" "crc" "crypto" "dotprod" "fp" "fp-armv8" "fp16" "fp16fml" "fptoint" "fullfp16" "i8mm" "jsconv" "lse" "neon" "pauth" "perfmon" "predres" "ras" "rcpc" "rdm" "sb" "sha2" "sha3" "simd" "specrestrict" "ssbs" "zcm" "zcz") + elseif(TARGET_ARCHITECTURE STREQUAL "apple-m3") + list(APPEND _mtune_flag_list "apple-m3" "apple-m2" "apple-m1" "vortex") + list(APPEND _march_flag_list "armv8.6-a") + list(APPEND _march_flag_list "armv8.5-a") + list(APPEND _march_flag_list "armv8.4-a") + list(APPEND _march_flag_list "armv8.3-a") + list(APPEND _march_flag_list "armv8.2-a") + list(APPEND _march_flag_list "armv8.1-a") + list(APPEND _march_flag_list "armv8-a") + list(APPEND _available_extension_list "aes" "altnzcv" "bf16" "ccdp" "complxnum" "crc" "crypto" "dotprod" "fp" "fp-armv8" "fp16" "fp16fml" "fpac" "fptoint" "fullfp16" "i8mm" "jsconv" "lse" "neon" "pauth" "perfmon" "predres" "ras" "rcpc" "rdm" "sb" "sha2" "sha3" "simd" "specrestrict" "ssbs" "zcm" "zcz") + elseif(TARGET_ARCHITECTURE STREQUAL "apple-m4") + list(APPEND _mtune_flag_list "apple-m4" "apple-m3" "apple-m2" "apple-m1" "vortex") + list(APPEND _march_flag_list "armv8.7-a") + list(APPEND _march_flag_list "armv8.6-a") + list(APPEND _march_flag_list "armv8.5-a") + list(APPEND _march_flag_list "armv8.4-a") + list(APPEND _march_flag_list "armv8.3-a") + list(APPEND _march_flag_list "armv8.2-a") + list(APPEND _march_flag_list "armv8.1-a") + list(APPEND _march_flag_list "armv8-a") + list(APPEND _available_extension_list "aes" "altnzcv" "bf16" "ccdp" "complxnum" "crc" "crypto" "dotprod" "fp" "fp-armv8" "fp16" "fp16fml" "fpac" "fptoint" "fullfp16" "i8mm" "jsconv" "lse" "neon" "pauth" "perfmon" "predres" "ras" "rcpc" "rdm" "sb" "sha2" "sha3" "simd" "sme" "sme-f64f64" "sme-i16i64" "sme2" "specrestrict" "ssbs" "wfxt" "zcm" "zcz") + + # Others + elseif(TARGET_ARCHITECTURE STREQUAL "generic") + list(APPEND _march_flag_list "generic") + elseif(TARGET_ARCHITECTURE STREQUAL "none") + # add this clause to remove it from the else clause + + else() + message(FATAL_ERROR "[OptimizeForArchitecture] Unknown target architecture: \"${TARGET_ARCHITECTURE}\". Please set TARGET_ARCHITECTURE to a supported value.") + endif() + + # Clean list of available extensions + list(SORT _available_extension_list) + list(REMOVE_DUPLICATES _available_extension_list) + + if(OFA_VERBOSE) + if(_march_flag_list) + string(REPLACE ";" ", " _str "${_march_flag_list}") + string(TOUPPER ${_str} _str) + message(STATUS "[OptimizeForArchitecture] CPU architectures (-march): " ${_str}) + endif() + if(_mtune_flag_list) + string(REPLACE ";" ", " _str "${_mtune_flag_list}") + string(TOUPPER ${_str} _str) + message(STATUS "[OptimizeForArchitecture] CPU microarchitectures (-mtune): " ${_str}) + endif() + if(_available_extension_list) + list(LENGTH _available_extension_list _len) + string(REPLACE ";" ", " _str "${_available_extension_list}") + string(TOUPPER ${_str} _str) + message(STATUS "[OptimizeForArchitecture] Extensions (${_len} available): ${_str}") + endif() + endif() + + # Following the recommendation from + # https://community.arm.com/developer/tools-software/ + # tools/b/tools-software-ides-blog/posts/ + # compiler-flags-across-architectures-march-mtune-and-mcpu we + # first try to use the -mcpu flag and set it a value from the + # list of -mtune flags. If that fails, e.g., if the compiler + # does not yet support the specified target, we try to set the + # -march and -mtune flags as fallback option. + + # Set compiler-specific option names + set(_mcpu_flag "-mcpu=") + set(_march_flag "-march=") + set(_mtune_flag "-mtune") + + set(CAN_USE_MCPU FALSE) + foreach(_flag ${_mtune_flag_list}) + AddCXXCompilerFlag("${_mcpu_flag}${_flag}" FLAGS ARCHITECTURE_CXX_FLAGS RESULT _ok) + if(_ok) + set(CAN_USE_MCPU TRUE) + break() + endif() + endforeach() + + # Fallback: set -march and -mtune flags + set(_check_extension_list) + set(_check_extension_flag_list) + set(_disable_extension_flag_list) + set(_enable_extension_flag_list) + set(_ignore_extension_flag_list) + + foreach(_flag ${_march_flag_list}) + AddCXXCompilerFlag("${_march_flag}${_flag}" RESULT _ok) + if(_ok) + set(_march ${_flag}) + break() + endif() + endforeach() + + # Step 2: Enable/disable feature flags based on available CPU + # features, used-defined USE_ variables and + # the capabilities of the host system's compiler and linker + file(READ ${ALICEVISION_ROOT}/../src/cmake/OFA/CheckARM.txt _checks) + string(REGEX REPLACE "[:;]" "|" _checks "${_checks}") + string(REPLACE "\n" ";" _checks "${_checks}") + + set(_skip_check FALSE) + + # Iterate over the list of checks line by line + foreach (_check ${_checks}) + string(REPLACE "|" ";" _check "${_check}") + + # Parse for special lines + if ("${_check}" MATCHES "^#" ) # Skip comment + continue() + + elseif ("${_check}" MATCHES "^push_enable" ) # Start enable block + list(GET _check 1 _push_enable_list) + string(REPLACE "," ";" _push_enable_list "${_push_enable_list}") + _ofa_find(_push_enable_list "${CMAKE_CXX_COMPILER_ID}" _found) + if(_found) + list(INSERT _skip_check 0 FALSE) + else() + list(INSERT _skip_check 0 TRUE) + endif() + continue() + + elseif ("${_check}" MATCHES "^pop_enable" ) # End enable block + list(REMOVE_AT _skip_check 0) + continue() + + elseif ("${_check}" MATCHES "^push_disable" ) # Start disable block + list(GET _check 1 _push_disable_list) + string(REPLACE "," ";" _push_disable_list "${_push_disable_list}") + _ofa_find(_push_disable_list "${CMAKE_CXX_COMPILER_ID}" _found) + if(_found) + list(INSERT _skip_check 0 TRUE) + else() + # Compiler was not found in the list, so we keep its previous status + list(GET _skip_check 0 _skip) + list(INSERT _skip_check 0 ${_skip}) + endif() + continue() + + elseif ("${_check}" MATCHES "^pop_disable" ) # End disable block + list(REMOVE_AT _skip_check 0) + continue() + endif() + + # Skip test? + list(GET _skip_check 0 _skip) + if(_skip) + continue() + endif() + + # Extract extra CPU extensions, header files, function name, and parameters + list(GET _check 0 _check_extension_flags) + list(GET _check 1 _check_headers) + list(GET _check 2 _check_function) + list(GET _check 3 _check_params) + + # Convert list of extensions into compiler flags + string(REPLACE "," ";" _check_extension_flags "${_check_extension_flags}") + list(GET _check_extension_flags 0 _extension_flag) + list(APPEND _check_extension_flag_list "${_extension_flag}") + string(REPLACE ";" "+" _check_extra_flags "+${_check_extension_flags}") + + # Extract optional extension alias + list(LENGTH _check _len) + if(${_len} EQUAL 5) + list(GET _check 4 _extension) + else() + set(_extension "${_extension_flag}") + endif() + + list(APPEND _check_extension_list "${_extension}") + + # Define USE_<_extension_flag> variable + set(_useVar "USE_${_extension_flag}") + string(TOUPPER "${_useVar}" _useVar) + string(REPLACE "[-.+/:= ]" "_" _useVar "${_useVar}") + + # If not specified externally, set the value of the + # USE_<_extension_flag> variable to TRUE if it is found in the list + # of available extensions and FALSE otherwise + if(NOT DEFINED ${_useVar}) + _ofa_find(_available_extension_list "${_extension}" _found) + set(${_useVar} ${_found}) + endif() + + if(${_useVar}) + # Check if the compiler supports the -march=<_march>+<_extension_flag> + # flag and can compile the provided test code with it + set(_code "\nint main() { ${_check_function}(${_check_params})\; return 0\; }") + AddCXXCompilerFlag("${_march_flag}${_march}+${_extension_flag}" + EXTRA_FLAGS ${_check_extra_flags} + HEADERS ${_check_headers} + CODE "${_code}" + RESULT _ok) + if(NOT ${_ok}) + # Test failed + set(${_useVar} FALSE CACHE BOOL "Use ${_extension} extension.") + else() + # Test succeeded + set(${_useVar} TRUE CACHE BOOL "Use ${_extension} extension.") + endif() + else() + # Disable extension without running tests + set(${_useVar} FALSE CACHE BOOL "Use ${_extension} extension.") + endif() + mark_as_advanced(${_useVar}) + endforeach() + + # Generate lists of enabled/disabled flags + list(REMOVE_DUPLICATES _check_extension_flag_list) + foreach(_extension_flag ${_check_extension_flag_list}) + _ofa_find(_available_extension_list "${_extension_flag}" _found) + set(_useVar "USE_${_extension_flag}") + string(TOUPPER "${_useVar}" _useVar) + string(REPLACE "[-.+/:= ]" "_" _useVar "${_useVar}") + + if(${_useVar}) + # Add <_extension_flag> to list of enabled extensions (if supported) + set(_haveVar "HAVE_${_march_flag}${_march}+${_extension_flag}") + string(REGEX REPLACE "[-.+/:= ]" "_" _haveVar "${_haveVar}") + if(NOT ${_haveVar}) + if(OFA_VERBOSE) + message(STATUS "[OptimizeForArchitecture] Ignoring flag ${_march_flag}${_march}+${_extension_flag} because checks failed") + endif() + list(APPEND _ignore_extension_flag_list "${_extension_flag}") + continue() + endif() + list(APPEND _enable_extension_flag_list "${_extension_flag}") + else() + # Add <_extension_flag> to list of disabled extensions (if supported) + AddCXXCompilerFlag("${_march_flag}${_march}+no${_extension_flag}") + set(_haveVar "HAVE_${_march_flag}${_march}+no${_extension_flag}") + string(REGEX REPLACE "[-.+/:= ]" "_" _haveVar "${_haveVar}") + if(NOT ${_haveVar}) + if(OFA_VERBOSE) + message(STATUS "[OptimizeForArchitecture] Ignoring flag ${_march_flag}${_march}+no${_extension_flag} because checks failed") + endif() + list(APPEND _ignore_extension_flag_list "${_extension_flag}") + continue() + endif() + list(APPEND _disable_extension_flag_list "${_extension_flag}") + endif() + endforeach() + + if(OFA_VERBOSE) + # Print checked extension flags + if(_check_extension_flag_list) + list(LENGTH _check_extension_flag_list _len) + list(SORT _check_extension_flag_list) + string(REPLACE ";" ", " _str "${_check_extension_flag_list}") + string(TOUPPER ${_str} _str) + message(STATUS "[OptimizeForArchitecture] Extensions (${_len} checked): ${_str}") + endif() + # Print enabled extension flags + if(_enable_extension_flag_list) + list(LENGTH _enable_extension_flag_list _len) + list(SORT _enable_extension_flag_list) + string(REPLACE ";" ", " _str "${_enable_extension_flag_list}") + string(TOUPPER ${_str} _str) + if(CAN_USE_MCPU) + message(STATUS "[OptimizeForArchitecture] Extensions (${_len} available): ${_str}") + else() + message(STATUS "[OptimizeForArchitecture] Extensions (${_len} enabled): ${_str}") + endif() + endif() + # Print disabled extension flags + if(_disable_extension_flag_list) + list(LENGTH _disable_extension_flag_list _len) + list(SORT _disable_extension_flag_list) + string(REPLACE ";" ", " _str "${_disable_extension_flag_list}") + string(TOUPPER ${_str} _str) + message(STATUS "[OptimizeForArchitecture] Extensions (${_len} disabled): ${_str}") + endif() + # Print ignored extension flags + if(_ignore_extension_flag_list) + list(LENGTH _ignore_extension_flag_list _len) + list(SORT _ignore_extension_flag_list) + string(REPLACE ";" ", " _str "${_ignore_extension_flag_list}") + string(TOUPPER ${_str} _str) + message(STATUS "[OptimizeForArchitecture] Extensions (${_len} ignored): ${_str}") + endif() + # Print unhandled extension flags + set(_unhandled_extension_list) + foreach(_extension ${_available_extension_list}) + _ofa_find(_check_extension_list "${_extension}" _found) + if(NOT _found) + list(APPEND _unhandled_extension_list ${_extension}) + endif() + endforeach() + if(_unhandled_extension_list) + list(LENGTH _unhandled_extension_list _len) + list(SORT _unhandled_extension_list) + string(REPLACE ";" ", " _str "${_unhandled_extension_list}") + string(TOUPPER ${_str} _str) + message(STATUS "[OptimizeForArchitecture] Extensions (${_len} unhandled): ${_str}") + endif() + endif() + + # Step 3: Set compiler-specific flags (e.g., -m/-mno-) + if(NOT CAN_USE_MCPU) + + if(MSVC AND MSVC_VERSION GREATER 1900) + _ofa_find(_enable_extension_flag_list "vfpv4" _found) + if(_found) + AddCompilerFlag("/arch:VFPv4" CXX_FLAGS ARCHITECTURE_CXX_FLAGS CXX_RESULT _found) + endif() + if(NOT _found) + _ofa_find(_enable_extension_flag_list "simd" _found) + if(_found) + AddCompilerFlag("/arch:ARMv7VE" CXX_FLAGS ARCHITECTURE_CXX_FLAGS CXX_RESULT _found) + endif() + endif() + foreach(_flag ${_enable_extension_flag_list}) + string(TOUPPER "${_flag}" _flag) + string(REPLACE "[-.+/:= ]" "_" _flag "__${_flag}__") + add_definitions("-D${_flag}") + endforeach(_flag) + + elseif(CMAKE_CXX_COMPILER_ID MATCHES "Cray") + + # TODO: Add Cray flags + + elseif(CMAKE_CXX_COMPILER_ID MATCHES "Fujitsu") + + # TODO: Add Fujitsu flags + + elseif(CMAKE_CXX_COMPILER_ID MATCHES "NVHPC") + + # TODO: Add NVHPC flags + + else() + # Others: GNU, Clang and variants + foreach(_march ${_march_flag_list}) + AddCXXCompilerFlag("-march=${_march}" RESULT _ok) + if(_ok) + set(_march_plus_extensions "${_march}") + foreach(_flag ${_enable_extension_flag_list}) + AddCXXCompilerFlag("-march=${_march_plus_extensions}+${_flag}" RESULT _ok) + if(_ok) + set(_march_plus_extensions "${_march_plus_extensions}+${_flag}") + endif(_ok) + endforeach() + foreach(_flag ${_disable_extension_flag_list}) + AddCXXCompilerFlag("-march=${_march_plus_extensions}+no${_flag}" RESULT _ok) + if(_ok) + set(_march_plus_extensions "${_march_plus_extensions}+no${_flag}") + endif(_ok) + endforeach() + AddCXXCompilerFlag("-march=${_march_plus_extensions}" FLAGS ARCHITECTURE_CXX_FLAGS) + break() + endif() + endforeach() + + # Set -mtune flag + foreach(_mtune ${_mtune_flag_list}) + AddCXXCompilerFlag("-mtune=${_mtune}" FLAGS ARCHITECTURE_CXX_FLAGS RESULT _ok) + if(_ok) + break() + endif() + endforeach() + endif() + + endif() # CAN_USE_MCPU + + endif() + + # Compile code with profiling instrumentation + if(TARGET_PROFILER STREQUAL "gprof") + AddCXXCompilerFlag("-pg" FLAGS ARCHITECTURE_CXX_FLAGS) + endif() + + # Remove duplicate flags + list(REMOVE_DUPLICATES ARCHITECTURE_CXX_FLAGS) + + if(OFA_VERBOSE) + string(REPLACE ";" ", " _str "${ARCHITECTURE_CXX_FLAGS}") + message(STATUS "[OptimizeForArchitecture] ARCHITECTURE_CXX_FLAGS: " ${_str}) + endif() + +endmacro(OFA_HandleArmOptions) diff --git a/src/cmake/OFA/HandlePpcOptions.cmake b/src/cmake/OFA/HandlePpcOptions.cmake new file mode 100644 index 0000000000..4def043583 --- /dev/null +++ b/src/cmake/OFA/HandlePpcOptions.cmake @@ -0,0 +1,170 @@ +#============================================================================= +# Handling of PPC / PPC64 options +# +# This is a three-step process: +# +# 1. Generate a list of available compiler flags for the specific CPU +# +# 2. Enable/disable feature flags based on available CPU features, +# used-defined USE_ variables and the capabilities of the +# host system's compiler and linker +# +# 3. Set compiler-specific flags (e.g., -m/-mno-) +#============================================================================= + +include(OFA/CommonMacros) + +macro(OFA_HandlePpcOptions) + + # Special treatment for "native" flag + if(TARGET_ARCHITECTURE STREQUAL "native") + if(CMAKE_CXX_COMPILER_ID MATCHES "NVHPC" OR + CMAKE_CXX_COMPILER_ID MATCHES "PGI") + # NVidia HPC / PGI + AddCXXCompilerFlag("-tp=native" FLAGS ARCHITECTURE_CXX_FLAGS RESULT _ok) + elseif(CMAKE_CXX_COMPILER_ID MATCHES "XL") + # IBM XL (on Linux/AIX) + AddCXXCompilerFlag("-qarch=auto" FLAGS ARCHITECTURE_CXX_FLAGS RESULT _ok) + else() + # Others: GNU, Clang and variants + AddCXXCompilerFlag("-march=native" FLAGS ARCHITECTURE_CXX_FLAGS RESULT _ok) + endif() + + if(NOT _ok) + message(FATAL_ERROR "[OptimizeForArchitecture] An error occured while setting the \"native\" flag.") + endif() + + elseif(NOT TARGET_ARCHITECTURE STREQUAL "none") + + # Step 1: Generate a list of compiler flags for the specific CPU + set(_march_flag_list) + + # Define macros for PowerPC64 + macro(_power3) + list(APPEND _march_flag_list "power3") + endmacro() + macro(_power4) + list(APPEND _march_flag_list "power4") + _power3() + endmacro() + macro(_power5) + list(APPEND _march_flag_list "power5") + _power4() + endmacro() + macro(_power5plus) + list(APPEND _march_flag_list "power5+") + _power5() + endmacro() + macro(_power6) + list(APPEND _march_flag_list "power6") + _power5() + endmacro() + macro(_power6x) + list(APPEND _march_flag_list "power6x") + _power6() + endmacro() + macro(_power7) + list(APPEND _march_flag_list "power7") + _power6() + endmacro() + macro(_power8) + list(APPEND _march_flag_list "pwr8") + list(APPEND _march_flag_list "power8") + _power7() + endmacro() + macro(_power9) + list(APPEND _march_flag_list "pwr9") + list(APPEND _march_flag_list "power9") + _power8() + endmacro() + macro(_power10) + list(APPEND _march_flag_list "pwr10") + list(APPEND _march_flag_list "power10") + _power9() + endmacro() + + # PowerPC64 + if(TARGET_ARCHITECTURE STREQUAL "power3") + _power3() + elseif(TARGET_ARCHITECTURE STREQUAL "power4") + _power4() + elseif(TARGET_ARCHITECTURE STREQUAL "power5") + _power5() + elseif(TARGET_ARCHITECTURE STREQUAL "power5+") + _power5plus() + elseif(TARGET_ARCHITECTURE STREQUAL "power6") + _power6() + elseif(TARGET_ARCHITECTURE STREQUAL "power6x") + _power6x() + elseif(TARGET_ARCHITECTURE STREQUAL "power7") + _power7() + elseif(TARGET_ARCHITECTURE STREQUAL "power8") + _power8() + elseif(TARGET_ARCHITECTURE STREQUAL "power9") + _power9() + elseif(TARGET_ARCHITECTURE STREQUAL "power10") + _power10() + + # Others + elseif(TARGET_ARCHITECTURE STREQUAL "generic") + list(APPEND _march_flag_list "generic") + elseif(TARGET_ARCHITECTURE STREQUAL "none") + # add this clause to remove it from the else clause + + else() + message(FATAL_ERROR "[OptimizeForArchitecture] Unknown target architecture: \"${TARGET_ARCHITECTURE}\". Please set TARGET_ARCHITECTURE to a supported value.") + endif() + + # Step 2: We do not enable/disable feature flags for PPC/PPC64 CPUs + + # Step 3: Set compiler-specific flags (e.g., -m/-mno-) + if(CMAKE_CXX_COMPILER_ID MATCHES "XL") + + # Set -qarch flag + foreach(_flag ${_march_flag_list}) + AddCXXCompilerFlag("-mcpu=${_flag}" FLAGS ARCHITECTURE_CXX_FLAGS RESULT _good) + AddCXXCompilerFlag("-qarch=${_flag}" FLAGS ARCHITECTURE_CXX_FLAGS RESULT _good) + if(_good) + break() + endif(_good) + endforeach(_flag) + + elseif(CMAKE_CXX_COMPILER_ID MATCHES "NVHPC" + OR CMAKE_CXX_COMPILER_ID MATCHES "PGI") + + # Set -tp flag + foreach(_flag ${_march_flag_list}) + AddCXXCompilerFlag("-tp=${_flag}" FLAGS ARCHITECTURE_CXX_FLAGS RESULT _good) + if(_good) + break() + endif(_good) + endforeach(_flag) + + else() + # Others: GNU, Clang and variants + + # Set -march flag + foreach(_flag ${_march_flag_list}) + AddCXXCompilerFlag("-march=${_flag}" FLAGS ARCHITECTURE_CXX_FLAGS RESULT _good) + if(_good) + break() + endif(_good) + endforeach(_flag) + + endif() + endif() + + # Compile code with profiling instrumentation + if(TARGET_PROFILER STREQUAL "gprof") + AddCXXCompilerFlag("-pg" FLAGS ARCHITECTURE_CXX_FLAGS) + endif() + + # Remove duplicate flags + list(REMOVE_DUPLICATES ARCHITECTURE_CXX_FLAGS) + + if(OFA_VERBOSE) + string(REPLACE ";" ", " _str "${ARCHITECTURE_CXX_FLAGS}") + message(STATUS "[OptimizeForArchitecture] ARCHITECTURE_CXX_FLAGS: " ${_str}) + endif() + +endmacro(OFA_HandlePpcOptions) diff --git a/src/cmake/OFA/HandleX86Options.cmake b/src/cmake/OFA/HandleX86Options.cmake new file mode 100644 index 0000000000..d8f523a9d2 --- /dev/null +++ b/src/cmake/OFA/HandleX86Options.cmake @@ -0,0 +1,899 @@ +#============================================================================= +# Handling of X86 / X86_64 options +# +# This is a three-step process: +# +# 1. Generate a list of available compiler flags for the specific CPU +# +# 2. Enable/disable feature flags based on available CPU features, +# used-defined USE_ variables and the capabilities of the +# host system's compiler and linker +# +# 3. Set compiler-specific flags (e.g., -m/-mno-) +#============================================================================= + +include(OFA/CommonMacros) + +#Requires CMake 3.2 + +macro(OFA_HandleX86Options) + + # Special treatment for "native" flag + if(TARGET_ARCHITECTURE STREQUAL "native") + if(MSVC) + # MSVC (on Windows) + message(FATAL_ERROR "[OptimizeForArchitecture] MSVC does not support \"native\" flag.") + elseif(CMAKE_CXX_COMPILER_ID MATCHES "Intel" + OR CMAKE_CXX_COMPILER_ID MATCHES "IntelLLVM") + if(WIN32) + # Intel (on Windows) + AddCXXCompilerFlag("/QxHOST" FLAGS ARCHITECTURE_CXX_FLAGS RESULT _ok) + else() + # Intel (on Linux) + AddCXXCompilerFlag("-xHOST" FLAGS ARCHITECTURE_CXX_FLAGS RESULT _ok) + endif() + elseif(CMAKE_CXX_COMPILER_ID MATCHES "NVHPC" + OR CMAKE_CXX_COMPILER_ID MATCHES "PGI") + # NVidia HPC / PGI (on Linux/Windows) + AddCXXCompilerFlag("-tp=native" FLAGS ARCHITECTURE_CXX_FLAGS RESULT _ok) + elseif(CMAKE_CXX_COMPILER_ID MATCHES "SunPro") + # Sun/Oracle Studio (on Linux/Sun OS) + AddCXXCompilerFlag("-native" FLAGS ARCHITECTURE_CXX_FLAGS RESULT _ok) + elseif(CMAKE_CXX_COMPILER_ID MATCHES "Cray") + # Cray (on Linux) + message(FATAL_ERROR, "[OptimizeForArchitecture] Cray compiler does not support \"native\" flag.") + else() + # Others: GNU, Clang and variants + AddCXXCompilerFlag("-march=native" FLAGS ARCHITECTURE_CXX_FLAGS RESULT _ok) + endif() + + if(NOT _ok) + message(FATAL_ERROR "[OptimizeForArchitecture] An error occured while setting the \"native\" flag.") + endif() + + elseif(NOT TARGET_ARCHITECTURE STREQUAL "none") + + # Step 1: Generate a list of compiler flags for the specific CPU + set(_march_flag_list) + set(_available_extension_list) + + # Define macros for Intel + macro(_nehalem) + list(APPEND _march_flag_list "nehalem") + list(APPEND _march_flag_list "corei7") + list(APPEND _march_flag_list "core2") + list(APPEND _available_extension_list "cx16" "fxsr" "sahf" "mmx" "popcnt" "sse" "sse2" "sse3" "ssse3" "sse4.1" "sse4.2") + endmacro() + macro(_westmere) + list(APPEND _march_flag_list "westmere") + _nehalem() + list(APPEND _available_extension_list "aes" "pclmul") + endmacro() + macro(_sandybridge) + list(APPEND _march_flag_list "sandybridge") + list(APPEND _march_flag_list "corei7-avx") + _westmere() + list(APPEND _available_extension_list "avx" "xsave") + endmacro() + macro(_ivybridge) + list(APPEND _march_flag_list "ivybridge") + list(APPEND _march_flag_list "core-avx-i") + _sandybridge() + list(APPEND _available_extension_list "f16c" "fsgsbase" "rdrnd" ) + endmacro() + macro(_haswell) + list(APPEND _march_flag_list "haswell") + list(APPEND _march_flag_list "core-avx2") + _ivybridge() + list(APPEND _available_extension_list "avx2" "bmi" "bmi2" "fma" "hle" "lzcnt" "movbe") + endmacro() + macro(_broadwell) + list(APPEND _march_flag_list "broadwell") + _haswell() + list(APPEND _available_extension_list "adx" "prfchw" "rdseed") + endmacro() + macro(_skylake) + list(APPEND _march_flag_list "skylake") + _broadwell() + list(APPEND _available_extension_list "clflushopt" "sgx" "xsavec" "xsaves") + endmacro() + macro(_skylake_avx512) + list(APPEND _march_flag_list "skylake-avx512") + _skylake() + list(APPEND _available_extension_list "avx512bw" "avx512cd" "avx512dq" "avx512f" "avx512vl" "clwb" "pku") + endmacro() + macro(_cascadelake) + list(APPEND _march_flag_list "cascadelake") + _skylake_avx512() + list(APPEND _available_extension_list "avx512vnni") + endmacro() + macro(_cooperlake) + list(APPEND _march_flag_list "cooperlake") + _cascadelake() + list(APPEND _available_extension_list "avx512bf16") + endmacro() + macro(_cannonlake) + list(APPEND _march_flag_list "cannonlake") + _skylake() + list(APPEND _available_extension_list "avx512bw" "avx512cd" "avx512dq" "avx512f" "avx512ifma" "avx512vbmi" "avx512vl" "evex512" "pku" "sha") + endmacro() + macro(_icelake) + list(APPEND _march_flag_list "icelake-client") + _cannonlake() + list(APPEND _available_extension_list "avx512bitalg" "avx512vbmi2" "avx512vnni" "avx512vpopcntdq" "gfni" "rdpid" "vaes" "vpclmulqdq") + endmacro() + macro(_icelake_avx512) + list(APPEND _march_flag_list "icelake-server") + _icelake() + list(APPEND _available_extension_list "clwb" "pconfig" "wbnoinvd") + endmacro() + macro(_tigerlake) + list(APPEND _march_flag_list "tigerlake") + _icelake() + list(APPEND _available_extension_list "avx512vp2intersect" "clwb" "kl" "movdir64b" "movdiri" "widekl") + endmacro() + macro(_cometlake) + list(APPEND _march_flag_list "cometlake") + _tigerlake() + endmacro() + macro(_alderlake) + list(APPEND _march_flag_list "alderlake") + _skylake() + list(APPEND _available_extension_list "avxvnni" "cldemote" "clwb" "gfni" "hreset" "kl" "movdir64b" "movdiri" "pconfig" "pku" "ptwrite" "rdpid" "serialize" "sha" "vaes" "vpclmulqdq" "waitpkg" "widekl") + endmacro() + macro(_rocketlake) + list(APPEND _march_flag_list "rocketlake") + _skylake_avx512() + list(APPEND _available_extension_list "avx512bitalg" "avx512ifma" "avx512vbmi" "avx512vbmi2" "avx512vnni" "avx512vpopcntdq" "gfni" "rdpid" "sgx" "sha" "vaes" "vpclmulqdq") + endmacro() + macro(_raptorlake) + list(APPEND _march_flag_list "raptorlake") + _skylake() + list(APPEND _available_extension_list "avxvnni" "cldemote" "clwb" "gfni" "hreset" "kl" "movdir64b" "movdiri" "pconfig" "pku" "ptwrite" "rdpid" "serialize" "sha" "vaes" "vpclmulqdq" "waitpkg" "widekl") + endmacro() + macro(_meteorlake) + list(APPEND _march_flag_list "meteorlake") + _raptorlake() + endmacro() + macro(_arrowlake) + list(APPEND _march_flag_list "arrowlake") + _meteorlake() + list(APPEND _available_extension_list "avxifma" "avxneconvert" "avxvnniint8" "cmpccxadd" "uintr") + endmacro() + macro(_lunarlake) + list(APPEND _march_flag_list "lunarlake") + _arrowlake() + list(APPEND _available_extension_list "avxvnniint16" "sha512" "sm3" "sm4") + endmacro() + macro(_pantherlake) + list(APPEND _march_flag_list "pantherlake") + _lunarlake() + list(APPEND _available_extension_list "prefetchi") + endmacro() + macro(_sapphirerapids) + list(APPEND _march_flag_list "sapphirerapids") + _skylake_avx512() + list(APPEND _available_extension_list "amx-bf16" "amx-int8" "amx-tile" "avx512bf16" "avx512bitalg" "avx512fp16" "avx512ifma" "avx512vbmi" "avx512vbmi2" "avx512vnni" "avx512vpopcntdq" "avxvnni" "cldemote" "enqcmd" "gfni" "movdir64b" "movdiri" "pconfig" "ptwrite" "rdpid" "serialize" "sha" "tsxldtrk" "uintr" "vaes" "vpclmulqdq" "waitpkg" "wbnoinvd") + endmacro() + macro(_emeraldrapids) + list(APPEND _march_flag_list "emeraldrapids") + _sapphirerapids() + endmacro() + macro(_graniterapids) + list(APPEND _march_flag_list "graniterapids") + _emeraldrapids() + list(APPEND _available_extension_list "amx-fp16" "prefetchi") + endmacro() + macro(_sierraforest) + list(APPEND _march_flag_list "sierraforest") + _skylake() + list(APPEND _available_extension_list "avxifma" "avxneconvert" "avxvnni" "avxvnniint8" "cldemote" "clwb" "cmpccxadd" "enqcmd" "gfni" "hreset" "kl" "movdir64b" "movdiri" "pconfig" "pku" "ptwrite" "rdpid" "serialize" "sha" "uintr" "vaes" "vpclmulqdq" "waitpkg" "widekl") + endmacro() + macro(_clearwaterforest) + list(APPEND _march_flag_list "clearwaterforest") + _sierraforest() + list(APPEND _available_extension_list "avxvnniint16" "prefetchi" "sha512" "sm3" "sm4" "usermsr") + endmacro() + macro(_knightslanding) + list(APPEND _march_flag_list "knl") + _broadwell() + list(APPEND _available_extension_list "avx512f" "avx512pf" "avx512er" "avx512cd") + endmacro() + macro(_knightsmill) + list(APPEND _march_flag_list "knm") + _broadwell() + list(APPEND _available_extension_list "avx512f" "avx512pf" "avx512er" "avx512cd" "avx5124fmaps" "avx5124vnni" "avx512vpopcntdq") + endmacro() + macro(_silvermont) + list(APPEND _march_flag_list "silvermont") + _westmere() + list(APPEND _available_extension_list "rdrnd") + endmacro() + macro(_goldmont) + list(APPEND _march_flag_list "goldmont") + _silvermont() + list(APPEND _available_extension_list "rdseed") + endmacro() + macro(_goldmont_plus) + list(APPEND _march_flag_list "goldmont-plus") + _goldmont() + list(APPEND _available_extension_list "rdpid") + endmacro() + macro(_tremont) + list(APPEND _march_flag_list "tremont") + _goldmont_plus() + endmacro() + + # Define macros for AMD + macro(_k8) + list(APPEND _march_flag_list "k8") + list(APPEND _available_extension_list "mmx" "3dnow" "sse" "sse2") + endmacro() + macro(_k8_sse3) + list(APPEND _march_flag_list "k8-sse3") + _k8() + list(APPEND _available_extension_list "sse3") + endmacro() + macro(_barcelona) # amd10h + list(APPEND _march_flag_list "barcelona") + _k8_sse3() + list(APPEND _available_extension_list "sse4a" "abm") + endmacro() + macro(_amd14h) + list(APPEND _march_flag_list "btver1") + _barcelona() + list(APPEND _available_extension_list "cx16" "ssse3") + endmacro() + macro(_bulldozer) # amd15h + list(APPEND _march_flag_list "bdver1") + _amd14h() + list(APPEND _available_extension_list "sse4.1" "sse4.2" "avx" "xop" "fma4" "lwp" "aes" "pclmul") + endmacro() + macro(_piledriver) + list(APPEND _march_flag_list "bdver2") + _bulldozer() + list(APPEND _available_extension_list "fma" "f16c" "bmi" "tbm") + endmacro() + macro(_steamroller) + list(APPEND _march_flag_list "bdver3") + _piledriver() + list(APPEND _available_extension_list "fsgsbase") + endmacro() + macro(_excavator) + list(APPEND _march_flag_list "bdver4") + _steamroller() + list(APPEND _available_extension_list "bmi2" "avx2" "movbe") + endmacro() + macro(_amd16h) + list(APPEND _march_flag_list "btver2") + _amd14h() + list(APPEND _available_extension_list "movbe" "sse4.1" "sse4.2" "avx" "f16c" "bmi" "pclmul" "aes") + endmacro() + macro(_zen) + list(APPEND _march_flag_list "znver1") + _amd16h() + list(APPEND _available_extension_list "bmi2" "fma" "fsgsbase" "avx2" "adcx" "rdseed" "mwaitx" "sha" "clzero" "xsavec" "xsaves" "clflushopt" "popcnt") + endmacro() + macro(_zen2) + list(APPEND _march_flag_list "znver2") + _zen() + list(APPEND _available_extension_list "clwb" "rdpid" "wbnoinvd") + endmacro() + macro(_zen3) + list(APPEND _march_flag_list "znver3") + _zen2() + list(APPEND _available_extension_list "pku" "vpclmulqdq" "vaes") + endmacro() + macro(_zen4) + list(APPEND _march_flag_list "znver4") + _zen3() + list(APPEND _available_extension_list "avx512f" "avx512cd" "avx512vl" "avx512dq" "avx512bw" "avx512ifma" "avx512vbmi" "avx512vpopcntdq" "avx512bitalg" "avx512vbmi2" "avx512vnni" "avx512bf16") + endmacro() + + # Intel + if(TARGET_ARCHITECTURE STREQUAL "core" OR TARGET_ARCHITECTURE STREQUAL "core2") + list(APPEND _march_flag_list "core2") + list(APPEND _available_extension_list "mmx" "sse" "sse2" "sse3") + elseif(TARGET_ARCHITECTURE STREQUAL "merom") + list(APPEND _march_flag_list "merom") + list(APPEND _march_flag_list "core2") + list(APPEND _available_extension_list "mmx" "sse" "sse2" "sse3" "ssse3") + elseif(TARGET_ARCHITECTURE STREQUAL "penryn") + list(APPEND _march_flag_list "penryn") + list(APPEND _march_flag_list "core2") + list(APPEND _available_extension_list "mmx" "sse" "sse2" "sse3" "ssse3") + message(STATUS "[OptimizeForArchitecture] Sadly the Penryn architecture exists in variants with SSE4.1 and without SSE4.1.") + if(_cpu_flags MATCHES "sse4_1") + message(STATUS "[OptimizeForArchitecture] SSE4.1: enabled (auto-detected from this computer's CPU flags)") + list(APPEND _available_extension_list "sse4.1") + else() + message(STATUS "[OptimizeForArchitecture] SSE4.1: disabled (auto-detected from this computer's CPU flags)") + endif() + elseif(TARGET_ARCHITECTURE STREQUAL "knm") + _knightsmill() + elseif(TARGET_ARCHITECTURE STREQUAL "knl") + _knightslanding() + elseif(TARGET_ARCHITECTURE STREQUAL "pantherlake") + _pantherlake() + elseif(TARGET_ARCHITECTURE STREQUAL "lunarlake") + _lunarlake() + elseif(TARGET_ARCHITECTURE STREQUAL "arrowlake") + _arrowlake() + elseif(TARGET_ARCHITECTURE STREQUAL "meteorlake") + _meteorlake() + elseif(TARGET_ARCHITECTURE STREQUAL "raptorlake") + _raptorlake() + elseif(TARGET_ARCHITECTURE STREQUAL "rocketlake") + _rocketlake() + elseif(TARGET_ARCHITECTURE STREQUAL "clearwaterforest") + _clearwaterforest() + elseif(TARGET_ARCHITECTURE STREQUAL "sierraforest") + _sierraforest() + elseif(TARGET_ARCHITECTURE STREQUAL "graniterapids") + _graniterapids() + elseif(TARGET_ARCHITECTURE STREQUAL "emeraldrapids") + _emeraldrapids() + elseif(TARGET_ARCHITECTURE STREQUAL "sapphirerapids") + _sapphirerapids() + elseif(TARGET_ARCHITECTURE STREQUAL "alderlake") + _alderlake() + elseif(TARGET_ARCHITECTURE STREQUAL "cometlake") + _cometlake() + elseif(TARGET_ARCHITECTURE STREQUAL "tigerlake") + _tigerlake() + elseif(TARGET_ARCHITECTURE STREQUAL "icelake") + _icelake() + elseif(TARGET_ARCHITECTURE STREQUAL "icelake-xeon" OR TARGET_ARCHITECTURE STREQUAL "icelake-avx512") + _icelake_avx512() + elseif(TARGET_ARCHITECTURE STREQUAL "cannonlake") + _cannonlake() + elseif(TARGET_ARCHITECTURE STREQUAL "cooperlake") + _cooperlake() + elseif(TARGET_ARCHITECTURE STREQUAL "cascadelake") + _cascadelake() + elseif(TARGET_ARCHITECTURE STREQUAL "kabylake") + _skylake() + elseif(TARGET_ARCHITECTURE STREQUAL "skylake-xeon" OR TARGET_ARCHITECTURE STREQUAL "skylake-avx512") + _skylake_avx512() + elseif(TARGET_ARCHITECTURE STREQUAL "skylake") + _skylake() + elseif(TARGET_ARCHITECTURE STREQUAL "broadwell") + _broadwell() + elseif(TARGET_ARCHITECTURE STREQUAL "haswell") + _haswell() + elseif(TARGET_ARCHITECTURE STREQUAL "ivybridge") + _ivybridge() + elseif(TARGET_ARCHITECTURE STREQUAL "sandybridge") + _sandybridge() + elseif(TARGET_ARCHITECTURE STREQUAL "westmere") + _westmere() + elseif(TARGET_ARCHITECTURE STREQUAL "nehalem") + _nehalem() + elseif(TARGET_ARCHITECTURE STREQUAL "tremont") + _tremont() + elseif(TARGET_ARCHITECTURE STREQUAL "goldmont-plus") + _goldmont_plus() + elseif(TARGET_ARCHITECTURE STREQUAL "goldmont") + _goldmont() + elseif(TARGET_ARCHITECTURE STREQUAL "silvermont") + _silvermont() + elseif(TARGET_ARCHITECTURE STREQUAL "bonnell") + list(APPEND _march_flag_list "bonnell") + list(APPEND _march_flag_list "atom") + list(APPEND _march_flag_list "core2") + list(APPEND _available_extension_list "sse" "sse2" "sse3" "ssse3") + elseif(TARGET_ARCHITECTURE STREQUAL "atom") + list(APPEND _march_flag_list "atom") + list(APPEND _march_flag_list "core2") + list(APPEND _available_extension_list "sse" "sse2" "sse3" "ssse3") + + # AMD + elseif(TARGET_ARCHITECTURE STREQUAL "k8") + _k8() + elseif(TARGET_ARCHITECTURE STREQUAL "k8-sse3") + k8_sse3() + elseif(TARGET_ARCHITECTURE STREQUAL "barcelona" OR + TARGET_ARCHITECTURE STREQUAL "istanbul" OR + TARGET_ARCHITECTURE STREQUAL "magny-cours") + _barcelona() + elseif(TARGET_ARCHITECTURE STREQUAL "amd14h") + _amd14h() + elseif(TARGET_ARCHITECTURE STREQUAL "bulldozer" OR + TARGET_ARCHITECTURE STREQUAL "interlagos") + _bulldozer() + elseif(TARGET_ARCHITECTURE STREQUAL "piledriver") + _piledriver() + elseif(TARGET_ARCHITECTURE STREQUAL "steamroller") + _steamroller() + elseif(TARGET_ARCHITECTURE STREQUAL "excavator") + _excavator() + elseif(TARGET_ARCHITECTURE STREQUAL "amd16h") + _amd16h() + elseif(TARGET_ARCHITECTURE STREQUAL "zen") + _zen() + elseif(TARGET_ARCHITECTURE STREQUAL "zen2") + _zen2() + elseif(TARGET_ARCHITECTURE STREQUAL "zen3") + _zen3() + elseif(TARGET_ARCHITECTURE STREQUAL "zen4") + _zen4() + + # Others + elseif(TARGET_ARCHITECTURE STREQUAL "generic") + list(APPEND _march_flag_list "generic") + list(APPEND _available_extension_list "sse") + list(APPEND _available_extension_list "sse2") + elseif(TARGET_ARCHITECTURE STREQUAL "none") + # add this clause to remove it from the else clause + + else() + message(FATAL_ERROR "[OptimizeForArchitecture] Unknown target architecture: \"${TARGET_ARCHITECTURE}\". Please set TARGET_ARCHITECTURE to a supported value.") + endif() + + # Clean list of available extensions + list(SORT _available_extension_list) + list(REMOVE_DUPLICATES _available_extension_list) + + if(OFA_VERBOSE) + if(_march_flag_list) + string(REPLACE ";" ", " _str "${_march_flag_list}") + string(TOUPPER ${_str} _str) + message(STATUS "[OptimizeForArchitecture] CPU architectures: " ${_str}) + endif() + if(_available_extension_list) + list(LENGTH _available_extension_list _len) + string(REPLACE ";" ", " _str "${_available_extension_list}") + string(TOUPPER ${_str} _str) + message(STATUS "[OptimizeForArchitecture] Extensions (${_len} available): ${_str}") + endif() + endif() + + set(_check_extension_list) + set(_check_extension_flag_list) + set(_disable_extension_flag_list) + set(_enable_extension_flag_list) + set(_ignore_extension_flag_list) + + # Set compiler-specific option names + if(CMAKE_CXX_COMPILER_ID MATCHES "MSVC") + set(_enable_flag "/arch:") + unset(_disable) + elseif(CMAKE_CXX_COMPILER_ID MATCHES "SunPro") + set(_enable_flag "-xarch=") + unset(_disable_flag) + else() + set(_enable_flag "-m") + set(_disable_flag "-mno-") + endif() + + # Step 2: Enable/disable feature flags based on available CPU + # features, used-defined USE_ variables and + # the capabilities of the host system's compiler and linker + file(READ ${ALICEVISION_ROOT}/../src/cmake/OFA/CheckX86.txt _checks) + string(REGEX REPLACE "[:;]" "|" _checks "${_checks}") + string(REPLACE "\n" ";" _checks "${_checks}") + + set(_skip_check FALSE) + + # Iterate over the list of checks line by line + foreach (_check ${_checks}) + string(REPLACE "|" ";" _check "${_check}") + + # Parse for special lines + if ("${_check}" MATCHES "^#" ) # Skip comment + continue() + + elseif ("${_check}" MATCHES "^push_enable" ) # Start enable block + list(GET _check 1 _push_enable_list) + string(REPLACE "," ";" _push_enable_list "${_push_enable_list}") + _ofa_find(_push_enable_list "${CMAKE_CXX_COMPILER_ID}" _found) + if(_found) + list(INSERT _skip_check 0 FALSE) + else() + list(INSERT _skip_check 0 TRUE) + endif() + continue() + + elseif ("${_check}" MATCHES "^pop_enable" ) # End enable block + list(REMOVE_AT _skip_check 0) + continue() + + elseif ("${_check}" MATCHES "^push_disable" ) # Start disable block + list(GET _check 1 _push_disable_list) + string(REPLACE "," ";" _push_disable_list "${_push_disable_list}") + _ofa_find(_push_disable_list "${CMAKE_CXX_COMPILER_ID}" _found) + if(_found) + list(INSERT _skip_check 0 TRUE) + else() + # Compiler was not found in the list, so we keep its previous status + list(GET _skip_check 0 _skip) + list(INSERT _skip_check 0 ${_skip}) + endif() + continue() + + elseif ("${_check}" MATCHES "^pop_disable" ) # End disable block + list(REMOVE_AT _skip_check 0) + continue() + endif() + + # Skip test? + list(GET _skip_check 0 _skip) + if(_skip) + continue() + endif() + + # Extract extra CPU extensions, header files, function name, and parameters + list(GET _check 0 _check_extension_flags) + list(GET _check 1 _check_headers) + list(GET _check 2 _check_function) + list(GET _check 3 _check_params) + + # Convert list of extensions into compiler flags + string(REPLACE "," ";" _check_extension_flags "${_check_extension_flags}") + list(GET _check_extension_flags 0 _extension_flag) + list(APPEND _check_extension_flag_list "${_extension_flag}") + string(REPLACE ";" " ${_enable_flag}" _check_extra_flags " ${_enable_flag}${_check_extension_flags}") + + # Extract optional extension alias + list(LENGTH _check _len) + if(${_len} EQUAL 5) + list(GET _check 4 _extension) + else() + set(_extension "${_extension_flag}") + endif() + + list(APPEND _check_extension_list "${_extension}") + + # Define USE_<_extension_flag> variable + set(_useVar "USE_${_extension_flag}") + string(TOUPPER "${_useVar}" _useVar) + string(REPLACE "[-.+/:= ]" "_" _useVar "${_useVar}") + + # If not specified externally, set the value of the + # USE_<_extension_flag> variable to TRUE if it is found in the list + # of available extensions and FALSE otherwise + if(NOT DEFINED ${_useVar}) + _ofa_find(_available_extension_list "${_extension}" _found) + set(${_useVar} ${_found}) + endif() + + if(${_useVar}) + # Check if the compiler supports the -m<_extension_flag> + # flag and can compile the provided test code with it + set(_code "\nint main() { ${_check_function}(${_check_params})\; return 0\; }") + AddCXXCompilerFlag("${_enable_flag}${_extension_flag}" + EXTRA_FLAGS ${_check_extra_flags} + HEADERS ${_check_headers} + CODE "${_code}" + RESULT _ok) + if(NOT ${_ok}) + # Test failed + set(${_useVar} FALSE CACHE BOOL "Use ${_extension} extension.") + else() + # Test succeeded + set(${_useVar} TRUE CACHE BOOL "Use ${_extension} extension.") + endif() + else() + # Disable extension without running tests + set(${_useVar} FALSE CACHE BOOL "Use ${_extension} extension.") + endif() + mark_as_advanced(${_useVar}) + endforeach() + + # Generate lists of enabled/disabled flags + list(REMOVE_DUPLICATES _check_extension_flag_list) + foreach(_extension_flag ${_check_extension_flag_list}) + _ofa_find(_available_extension_list "${_extension_flag}" _found) + set(_useVar "USE_${_extension_flag}") + string(TOUPPER "${_useVar}" _useVar) + string(REPLACE "[-.+/:= ]" "_" _useVar "${_useVar}") + + if(${_useVar}) + # Add <_extension_flag> to list of enabled extensions (if supported) + set(_haveVar "HAVE_${_enable_flag}${_extension_flag}") + string(REGEX REPLACE "[-.+/:= ]" "_" _haveVar "${_haveVar}") + if(NOT ${_haveVar}) + if(OFA_VERBOSE) + message(STATUS "[OptimizeForArchitecture] Ignoring flag ${_enable_flag}${_extension_flag} because checks failed") + endif() + list(APPEND _ignore_extension_flag_list "${_extension_flag}") + continue() + endif() + list(APPEND _enable_extension_flag_list "${_extension_flag}") + elseif(DEFINED _disable_flag) + # Add <_extension_flag> to list of disabled extensions (if supported) + AddCXXCompilerFlag("${_disable_flag}${_extension_flag}") + set(_haveVar "HAVE_${_disable_flag}${_extension_flag}") + string(REGEX REPLACE "[-.+/:= ]" "_" _haveVar "${_haveVar}") + if(NOT ${_haveVar}) + if(OFA_VERBOSE) + message(STATUS "[OptimizeForArchitecture] Ignoring flag ${_disable_flag}${_extension_flag} because checks failed") + endif() + list(APPEND _ignore_extension_flag_list "${_extension_flag}") + continue() + endif() + list(APPEND _disable_extension_flag_list "${_extension_flag}") + else() + list(APPEND _ignore_extension_flag_list "${_extension_flag}") + endif() + endforeach() + + if(OFA_VERBOSE) + # Print checked extension flags + if(_check_extension_flag_list) + list(LENGTH _check_extension_flag_list _len) + list(SORT _check_extension_flag_list) + string(REPLACE ";" ", " _str "${_check_extension_flag_list}") + string(TOUPPER ${_str} _str) + message(STATUS "[OptimizeForArchitecture] Extensions (${_len} checked): ${_str}") + endif() + # Print enabled extension flags + if(_enable_extension_flag_list) + list(LENGTH _enable_extension_flag_list _len) + list(SORT _enable_extension_flag_list) + string(REPLACE ";" ", " _str "${_enable_extension_flag_list}") + string(TOUPPER ${_str} _str) + message(STATUS "[OptimizeForArchitecture] Extensions (${_len} enabled): ${_str}") + endif() + # Print disabled extension flags + if(_disable_extension_flag_list) + list(LENGTH _disable_extension_flag_list _len) + list(SORT _disable_extension_flag_list) + string(REPLACE ";" ", " _str "${_disable_extension_flag_list}") + string(TOUPPER ${_str} _str) + message(STATUS "[OptimizeForArchitecture] Extensions (${_len} disabled): ${_str}") + endif() + # Print ignored extension flags + if(_ignore_extension_flag_list) + list(LENGTH _ignore_extension_flag_list _len) + list(SORT _ignore_extension_flag_list) + string(REPLACE ";" ", " _str "${_ignore_extension_flag_list}") + string(TOUPPER ${_str} _str) + message(STATUS "[OptimizeForArchitecture] Extensions (${_len} ignored): ${_str}") + endif() + # Print unhandled extension flags + set(_unhandled_extension_list) + foreach(_extension ${_available_extension_list}) + _ofa_find(_check_extension_list "${_extension}" _found) + if(NOT _found) + list(APPEND _unhandled_extension_list ${_extension}) + endif() + endforeach() + if(_unhandled_extension_list) + list(LENGTH _unhandled_extension_list _len) + list(SORT _unhandled_extension_list) + string(REPLACE ";" ", " _str "${_unhandled_extension_list}") + string(TOUPPER ${_str} _str) + message(STATUS "[OptimizeForArchitecture] Extensions (${_len} unhandled): ${_str}") + endif() + endif() + + # Step 3: Set compiler-specific flags (e.g., -m/-mno-) + if(MSVC AND MSVC_VERSION GREATER 1700) + _ofa_find(_enable_extension_flag_list "avx512f" _found) + if(_found) + AddCXXCompilerFlag("/arch:AVX512" FLAGS ARCHITECTURE_CXX_FLAGS RESULT _found) + endif() + if(NOT _found) + _ofa_find(_enable_extension_flag_list "avx2" _found) + if(_found) + AddCXXCompilerFlag("/arch:AVX2" FLAGS ARCHITECTURE_CXX_FLAGS RESULT _found) + endif() + endif() + if(NOT _found) + _ofa_find(_enable_extension_flag_list "avx" _found) + if(_found) + AddCXXCompilerFlag("/arch:AVX" FLAGS ARCHITECTURE_CXX_FLAGS RESULT _found) + endif() + endif() + if(NOT _found) + _ofa_find(_enable_extension_flag_list "sse2" _found) + if(_found) + AddCXXCompilerFlag("/arch:SSE2" FLAGS ARCHITECTURE_CXX_FLAGS) + endif() + endif() + if(NOT _found) + _ofa_find(_enable_extension_flag_list "sse" _found) + if(_found) + AddCXXCompilerFlag("/arch:SSE" FLAGS ARCHITECTURE_CXX_FLAGS) + endif() + endif() + foreach(_extension ${_enable_extension_flag_list}) + string(TOUPPER "${_extension}" _extension) + string(REPLACE "[-.+/:= ]" "_" _extension "__${_extension}__") + add_definitions("-D${_extension}") + endforeach(_extension) + + elseif(CMAKE_CXX_COMPILER_ID MATCHES "Intel" + OR CMAKE_CXX_COMPILER_ID MATCHES "IntelLLVM") + + if(WIN32) + # Intel (on Windows) + set(OFA_map_knl "-QxKNL;-QxMIC-AVX512") + set(OFA_map_knm "-QxKNM;-QxMIC-AVX512") + set(OFA_map_clearwaterforest "-QxCLEARWATERFOREST;-QxCORE-AVX512") + set(OFA_map_sierraforest "-QxSIERRAFOREST;-QxCORE-AVX512") + set(OFA_map_diamondrapids "-QxDIAMONDRAPIDS;-QxCORE-AVX512") + set(OFA_map_graniterapids "-QxGRANITERAPICS;-QxCORE-AVX512") + set(OFA_map_emeraldrapids "-QxEMERALDRAPIDS;-QxCORE-AVX512") + set(OFA_map_sapphirerapids "-QxSAPPHIRERAPIDS;-QxCORE-AVX512") + set(OFA_map_pantherlake "-QxPANTHERLAKE;-QxCORE-AVX512") + set(OFA_map_lunarlake "-QxLUNARLAKE;-QxCORE-AVX512") + set(OFA_map_arrowlake "-QxARROWLAKE;-QxCORE-AVX512") + set(OFA_map_meteorlake "-QxMETEORLAKE;-QxCORE-AVX512") + set(OFA_map_raptorlake "-QxRAPTORLAKE;-QxCORE-AVX512") + set(OFA_map_alderlake "-QxALDERLAKE;-QxCORE-AVX512") + set(OFA_map_rocketlake "-QxROCKETLAKE;-QxCORE-AVX512") + set(OFA_map_tigerlake "-QxTIGERLAKE;-QxCORE-AVX512") + set(OFA_map_icelake-server "-QxICELAKE-SERVER;-QxCORE-AVX512") + set(OFA_map_icelake-avx512 "-QxICELAKE-SERVER;-QxCORE-AVX512") + set(OFA_map_icelake-client "-QxICELAKE-CLIENT;-QxCORE-AVX512") + set(OFA_map_icelake "-QxICELAKE-CLIENT;-QxCORE-AVX512") + set(OFA_map_cannonlake "-QxCANNONLAKE;-QxCORE-AVX512") + set(OFA_map_cooperlake "-QxCOOPERLAKE;-QxCORE-AVX512") + set(OFA_map_cascadelake "-QxCASCADELAKE;-QxCORE-AVX512") + set(OFA_map_skylake-avx512 "-QxSKYLAKE-AVX512;-QxCORE-AVX512") + set(OFA_map_coffeelake "-QxCOFFEELAKE;-QxCORE-AVX2") + set(OFA_map_kabylake "-QxKABYLAKE;-QxCORE-AVX2") + set(OFA_map_amberlake "-QxAMBERLAKE;-QxCORE-AVX2") + set(OFA_map_skylake "-QxSKYLAKE;-QxCORE-AVX2") + set(OFA_map_broadwell "-QxBROADWELL;-QxCORE-AVX2") + set(OFA_map_haswell "-QxHASWELL;-QxCORE-AVX2") + set(OFA_map_ivybridge "-QxIVYBRIDGE;-QxCORE-AVX-I") + set(OFA_map_sandybridge "-QxSANDYBRIDGE;-QxAVX") + set(OFA_map_westmere "-QxSSE4.2") + set(OFA_map_nehalem "-QxSSE4.2") + set(OFA_map_penryn "-QxSSSE3") + set(OFA_map_merom "-QxSSSE3") + set(OFA_map_core2 "-QxSSE3") + set(OFA_map_tremont "-QxTREMONT") + set(OFA_map_goldmont-plus "-QxGOLDMONT-PLUS") + set(OFA_map_goldmont "-QxGOLDMONT") + set(OFA_map_silvermont "-QxSILVERMONT") + set(_ok FALSE) + else() + # Intel (in Linux) + set(OFA_map_knl "-xKNL;-xMIC-AVX512") + set(OFA_map_knm "-xKNM;-xMIC-AVX512") + set(OFA_map_clearwaterforest "-xCLEARWATERFOREST;-xCORE-AVX512") + set(OFA_map_sierraforest "-xSIERRAFOREST;-xCORE-AVX512") + set(OFA_map_diamondrapids "-xDIAMONDRAPIDS;-xCORE-AVX512") + set(OFA_map_graniterapids "-xGRANITERAPIDS;-xCORE-AVX512") + set(OFA_map_emeraldrapids "-xEMERALDRAPIDS;-xCORE-AVX512") + set(OFA_map_sapphirerapids "-xSAPPHIRERAPIDS;-xCORE-AVX512") + set(OFA_map_pantherlake "-xPANTHERLAKE;-xCORE-AVX512") + set(OFA_map_lunarlake "-xLUNARLAKE;-xCORE-AVX512") + set(OFA_map_arrowlake "-xARROWLAKE;-xCORE-AVX512") + set(OFA_map_meteorlake "-xMETEORLAKE;-xCORE-AVX512") + set(OFA_map_raptorlake "-xRAPTORLAKE;-xCORE-AVX512") + set(OFA_map_alderlake "-xALDERLAKE;-xCORE-AVX512") + set(OFA_map_rocketlake "-xROCKETLAKE;-xCORE-AVX512") + set(OFA_map_tigerlake "-xTIGERLAKE;-xCORE-AVX512") + set(OFA_map_icelake-server "-xICELAKE-SERVER;-xCORE-AVX512") + set(OFA_map_icelake-avx512 "-xICELAKE-SERVER;-xCORE-AVX512") + set(OFA_map_icelake-client "-xICELAKE-CLIENT;-xCORE-AVX512") + set(OFA_map_icelake "-xICELAKE-CLIENT;-xCORE-AVX512") + set(OFA_map_cannonlake "-xCANNONLAKE;-xCORE-AVX512") + set(OFA_map_cooperlake "-xCOOPERLAKE;-xCORE-AVX512") + set(OFA_map_cascadelake "-xCASCADELAKE;-xCORE-AVX512") + set(OFA_map_skylake-avx512 "-xSKYLAKE-AVX512;-xCORE-AVX512") + set(OFA_map_coffeelake "-xCOFFEELAKE;-xCORE-AVX2") + set(OFA_map_kabylake "-xKABYLAKE;-xCORE-AVX2") + set(OFA_map_amberlake "-xAMBERLAKE;-xCORE-AVX2") + set(OFA_map_skylake "-xSKYLAKE;-xCORE-AVX2") + set(OFA_map_broadwell "-xBROADWELL;-xCORE-AVX2") + set(OFA_map_haswell "-xHASWELL;-xCORE-AVX2") + set(OFA_map_ivybridge "-xIVYBRIDGE;-xCORE-AVX-I") + set(OFA_map_sandybridge "-xSANDYBRIDGE;-xAVX") + set(OFA_map_westmere "-xSSE4.2") + set(OFA_map_nehalem "-xSSE4.2") + set(OFA_map_penryn "-xSSSE3") + set(OFA_map_merom "-xSSSE3") + set(OFA_map_core2 "-xSSE3") + set(OFA_map_tremont "-xTREMONT") + set(OFA_map_goldmont-plus "-xGOLDMONT-PLUS") + set(OFA_map_goldmont "-xGOLDMONT") + set(OFA_map_silvermont "-xSILVERMONT") + set(_ok FALSE) + endif() + + foreach(_arch ${_march_flag_list}) + if(DEFINED OFA_map_${_arch}) + foreach(_flag ${OFA_map_${_arch}}) + AddCXXCompilerFlag(${_flag} FLAGS ARCHITECTURE_CXX_FLAGS RESULT _ok) + if(_ok) + break() + endif() + endforeach() + if(_ok) + break() + endif() + endif() + endforeach() + if(NOT _ok) + # This is the Intel compiler, so SSE2 is a very reasonable baseline. + message(STATUS "[OptimizeForArchitecture] Did not recognize the requested architecture flag ${_arch}, falling back to SSE2") + if(WIN32) + AddCXXCompilerFlag("-QxSSE2" FLAGS ARCHITECTURE_CXX_FLAGS) + else() + AddCXXCompilerFlag("-xSSE2" FLAGS ARCHITECTURE_CXX_FLAGS) + endif() + endif() + + # Set -m<_extension> flag for enabled features + foreach(_extension ${_enable_extension_flag_list}) + AddCXXCompilerFlag("${_enable_flag}${_extension}" FLAGS ARCHITECTURE_CXX_FLAGS) + endforeach(_extension) + + # Set -mno-<_extension> flag for disabled features + if(DEFINED _disable_flag) + foreach(_extension ${_disable_extension_flag_list}) + AddCXXCompilerFlag("${_disable_flag}${_extension}" FLAGS ARCHITECTURE_CXX_FLAGS) + endforeach(_extension) + endif() + + elseif(CMAKE_CXX_COMPILER_ID MATCHES "SunPro") + + # Set -xtarget flag + foreach(_flag ${_march_flag_list}) + AddCXXCompilerFlag("-xtarget=${_flag}" FLAGS ARCHITECTURE_CXX_FLAGS RESULT _good) + if(_good) + break() + endif(_good) + endforeach(_flag) + + # Set -xarch= flag for enabled features + foreach(_flag ${_enable_extension_flag_list}) + AddCXXCompilerFlag("-xarch=${_flag}" FLAGS ARCHITECTURE_CXX_FLAGS) + endforeach(_flag) + + # TODO PGI/Cray ... + + else() + # Others: GNU, Clang and variants + + # Set -march flag + foreach(_flag ${_march_flag_list}) + AddCXXCompilerFlag("-march=${_flag}" FLAGS ARCHITECTURE_CXX_FLAGS RESULT _good) + if(_good) + break() + endif(_good) + endforeach(_flag) + + # Set -m flag for enabled features + foreach(_flag ${_enable_extension_flag_list}) + AddCXXCompilerFlag("-m${_flag}" FLAGS ARCHITECTURE_CXX_FLAGS) + endforeach(_flag) + + # Set -mno-feature flag for disabled features + foreach(_flag ${_disable_extension_flag_list}) + AddCXXCompilerFlag("-mno-${_flag}" FLAGS ARCHITECTURE_CXX_FLAGS) + endforeach(_flag) + endif() + endif() + + # Compile code with profiling instrumentation + if(TARGET_PROFILER STREQUAL "gprof") + AddCXXCompilerFlag("-pg" FLAGS ARCHITECTURE_CXX_FLAGS) + elseif(TARGET_PROFILER STREQUAL "vtune") + if (CMAKE_CXX_COMPILER_ID MATCHES "Intel") + # Need to check if this also works on Windows + AddCXXCompilerFlag("-g" FLAGS ARCHITECTURE_CXX_FLAGS) + AddCXXCompilerFlag("-debug inline-debug-info" FLAGS ARCHITECTURE_CXX_FLAGS) + AddCXXCompilerFlag("-D TBB_USE_THREADING_TOOLS" FLAGS ARCHITECTURE_CXX_FLAGS) + AddCXXCompilerFlag("-parallel-source-info=2" FLAGS ARCHITECTURE_CXX_FLAGS) + AddCXXCompilerFlag("-gline-tables-only" FLAGS ARCHITECTURE_CXX_FLAGS) + AddCXXCompilerFlag("-fdebug-info-for-profiling" FLAGS ARCHITECTURE_CXX_FLAGS) + AddCXXCompilerFlag("-Xsprofile" FLAGS ARCHITECTURE_CXX_FLAGS) + endif() + endif() + + # Remove duplicate flags + list(REMOVE_DUPLICATES ARCHITECTURE_CXX_FLAGS) + + if(OFA_VERBOSE) + string(REPLACE ";" ", " _str "${ARCHITECTURE_CXX_FLAGS}") + message(STATUS "[OptimizeForArchitecture] ARCHITECTURE_CXX_FLAGS: " ${_str}) + endif() + +endmacro(OFA_HandleX86Options) diff --git a/src/cmake/OFA/License.txt b/src/cmake/OFA/License.txt new file mode 100644 index 0000000000..a612ad9813 --- /dev/null +++ b/src/cmake/OFA/License.txt @@ -0,0 +1,373 @@ +Mozilla Public License Version 2.0 +================================== + +1. Definitions +-------------- + +1.1. "Contributor" + means each individual or legal entity that creates, contributes to + the creation of, or owns Covered Software. + +1.2. "Contributor Version" + means the combination of the Contributions of others (if any) used + by a Contributor and that particular Contributor's Contribution. + +1.3. "Contribution" + means Covered Software of a particular Contributor. + +1.4. "Covered Software" + means Source Code Form to which the initial Contributor has attached + the notice in Exhibit A, the Executable Form of such Source Code + Form, and Modifications of such Source Code Form, in each case + including portions thereof. + +1.5. "Incompatible With Secondary Licenses" + means + + (a) that the initial Contributor has attached the notice described + in Exhibit B to the Covered Software; or + + (b) that the Covered Software was made available under the terms of + version 1.1 or earlier of the License, but not also under the + terms of a Secondary License. + +1.6. "Executable Form" + means any form of the work other than Source Code Form. + +1.7. "Larger Work" + means a work that combines Covered Software with other material, in + a separate file or files, that is not Covered Software. + +1.8. "License" + means this document. + +1.9. "Licensable" + means having the right to grant, to the maximum extent possible, + whether at the time of the initial grant or subsequently, any and + all of the rights conveyed by this License. + +1.10. "Modifications" + means any of the following: + + (a) any file in Source Code Form that results from an addition to, + deletion from, or modification of the contents of Covered + Software; or + + (b) any new file in Source Code Form that contains any Covered + Software. + +1.11. "Patent Claims" of a Contributor + means any patent claim(s), including without limitation, method, + process, and apparatus claims, in any patent Licensable by such + Contributor that would be infringed, but for the grant of the + License, by the making, using, selling, offering for sale, having + made, import, or transfer of either its Contributions or its + Contributor Version. + +1.12. "Secondary License" + means either the GNU General Public License, Version 2.0, the GNU + Lesser General Public License, Version 2.1, the GNU Affero General + Public License, Version 3.0, or any later versions of those + licenses. + +1.13. "Source Code Form" + means the form of the work preferred for making modifications. + +1.14. "You" (or "Your") + means an individual or a legal entity exercising rights under this + License. For legal entities, "You" includes any entity that + controls, is controlled by, or is under common control with You. For + purposes of this definition, "control" means (a) the power, direct + or indirect, to cause the direction or management of such entity, + whether by contract or otherwise, or (b) ownership of more than + fifty percent (50%) of the outstanding shares or beneficial + ownership of such entity. + +2. License Grants and Conditions +-------------------------------- + +2.1. Grants + +Each Contributor hereby grants You a world-wide, royalty-free, +non-exclusive license: + +(a) under intellectual property rights (other than patent or trademark) + Licensable by such Contributor to use, reproduce, make available, + modify, display, perform, distribute, and otherwise exploit its + Contributions, either on an unmodified basis, with Modifications, or + as part of a Larger Work; and + +(b) under Patent Claims of such Contributor to make, use, sell, offer + for sale, have made, import, and otherwise transfer either its + Contributions or its Contributor Version. + +2.2. Effective Date + +The licenses granted in Section 2.1 with respect to any Contribution +become effective for each Contribution on the date the Contributor first +distributes such Contribution. + +2.3. Limitations on Grant Scope + +The licenses granted in this Section 2 are the only rights granted under +this License. No additional rights or licenses will be implied from the +distribution or licensing of Covered Software under this License. +Notwithstanding Section 2.1(b) above, no patent license is granted by a +Contributor: + +(a) for any code that a Contributor has removed from Covered Software; + or + +(b) for infringements caused by: (i) Your and any other third party's + modifications of Covered Software, or (ii) the combination of its + Contributions with other software (except as part of its Contributor + Version); or + +(c) under Patent Claims infringed by Covered Software in the absence of + its Contributions. + +This License does not grant any rights in the trademarks, service marks, +or logos of any Contributor (except as may be necessary to comply with +the notice requirements in Section 3.4). + +2.4. Subsequent Licenses + +No Contributor makes additional grants as a result of Your choice to +distribute the Covered Software under a subsequent version of this +License (see Section 10.2) or under the terms of a Secondary License (if +permitted under the terms of Section 3.3). + +2.5. Representation + +Each Contributor represents that the Contributor believes its +Contributions are its original creation(s) or it has sufficient rights +to grant the rights to its Contributions conveyed by this License. + +2.6. Fair Use + +This License is not intended to limit any rights You have under +applicable copyright doctrines of fair use, fair dealing, or other +equivalents. + +2.7. Conditions + +Sections 3.1, 3.2, 3.3, and 3.4 are conditions of the licenses granted +in Section 2.1. + +3. Responsibilities +------------------- + +3.1. Distribution of Source Form + +All distribution of Covered Software in Source Code Form, including any +Modifications that You create or to which You contribute, must be under +the terms of this License. You must inform recipients that the Source +Code Form of the Covered Software is governed by the terms of this +License, and how they can obtain a copy of this License. You may not +attempt to alter or restrict the recipients' rights in the Source Code +Form. + +3.2. Distribution of Executable Form + +If You distribute Covered Software in Executable Form then: + +(a) such Covered Software must also be made available in Source Code + Form, as described in Section 3.1, and You must inform recipients of + the Executable Form how they can obtain a copy of such Source Code + Form by reasonable means in a timely manner, at a charge no more + than the cost of distribution to the recipient; and + +(b) You may distribute such Executable Form under the terms of this + License, or sublicense it under different terms, provided that the + license for the Executable Form does not attempt to limit or alter + the recipients' rights in the Source Code Form under this License. + +3.3. Distribution of a Larger Work + +You may create and distribute a Larger Work under terms of Your choice, +provided that You also comply with the requirements of this License for +the Covered Software. If the Larger Work is a combination of Covered +Software with a work governed by one or more Secondary Licenses, and the +Covered Software is not Incompatible With Secondary Licenses, this +License permits You to additionally distribute such Covered Software +under the terms of such Secondary License(s), so that the recipient of +the Larger Work may, at their option, further distribute the Covered +Software under the terms of either this License or such Secondary +License(s). + +3.4. Notices + +You may not remove or alter the substance of any license notices +(including copyright notices, patent notices, disclaimers of warranty, +or limitations of liability) contained within the Source Code Form of +the Covered Software, except that You may alter any license notices to +the extent required to remedy known factual inaccuracies. + +3.5. Application of Additional Terms + +You may choose to offer, and to charge a fee for, warranty, support, +indemnity or liability obligations to one or more recipients of Covered +Software. However, You may do so only on Your own behalf, and not on +behalf of any Contributor. You must make it absolutely clear that any +such warranty, support, indemnity, or liability obligation is offered by +You alone, and You hereby agree to indemnify every Contributor for any +liability incurred by such Contributor as a result of warranty, support, +indemnity or liability terms You offer. You may include additional +disclaimers of warranty and limitations of liability specific to any +jurisdiction. + +4. Inability to Comply Due to Statute or Regulation +--------------------------------------------------- + +If it is impossible for You to comply with any of the terms of this +License with respect to some or all of the Covered Software due to +statute, judicial order, or regulation then You must: (a) comply with +the terms of this License to the maximum extent possible; and (b) +describe the limitations and the code they affect. Such description must +be placed in a text file included with all distributions of the Covered +Software under this License. Except to the extent prohibited by statute +or regulation, such description must be sufficiently detailed for a +recipient of ordinary skill to be able to understand it. + +5. Termination +-------------- + +5.1. The rights granted under this License will terminate automatically +if You fail to comply with any of its terms. However, if You become +compliant, then the rights granted under this License from a particular +Contributor are reinstated (a) provisionally, unless and until such +Contributor explicitly and finally terminates Your grants, and (b) on an +ongoing basis, if such Contributor fails to notify You of the +non-compliance by some reasonable means prior to 60 days after You have +come back into compliance. Moreover, Your grants from a particular +Contributor are reinstated on an ongoing basis if such Contributor +notifies You of the non-compliance by some reasonable means, this is the +first time You have received notice of non-compliance with this License +from such Contributor, and You become compliant prior to 30 days after +Your receipt of the notice. + +5.2. If You initiate litigation against any entity by asserting a patent +infringement claim (excluding declaratory judgment actions, +counter-claims, and cross-claims) alleging that a Contributor Version +directly or indirectly infringes any patent, then the rights granted to +You by any and all Contributors for the Covered Software under Section +2.1 of this License shall terminate. + +5.3. In the event of termination under Sections 5.1 or 5.2 above, all +end user license agreements (excluding distributors and resellers) which +have been validly granted by You or Your distributors under this License +prior to termination shall survive termination. + +************************************************************************ +* * +* 6. Disclaimer of Warranty * +* ------------------------- * +* * +* Covered Software is provided under this License on an "as is" * +* basis, without warranty of any kind, either expressed, implied, or * +* statutory, including, without limitation, warranties that the * +* Covered Software is free of defects, merchantable, fit for a * +* particular purpose or non-infringing. The entire risk as to the * +* quality and performance of the Covered Software is with You. * +* Should any Covered Software prove defective in any respect, You * +* (not any Contributor) assume the cost of any necessary servicing, * +* repair, or correction. This disclaimer of warranty constitutes an * +* essential part of this License. No use of any Covered Software is * +* authorized under this License except under this disclaimer. * +* * +************************************************************************ + +************************************************************************ +* * +* 7. Limitation of Liability * +* -------------------------- * +* * +* Under no circumstances and under no legal theory, whether tort * +* (including negligence), contract, or otherwise, shall any * +* Contributor, or anyone who distributes Covered Software as * +* permitted above, be liable to You for any direct, indirect, * +* special, incidental, or consequential damages of any character * +* including, without limitation, damages for lost profits, loss of * +* goodwill, work stoppage, computer failure or malfunction, or any * +* and all other commercial damages or losses, even if such party * +* shall have been informed of the possibility of such damages. This * +* limitation of liability shall not apply to liability for death or * +* personal injury resulting from such party's negligence to the * +* extent applicable law prohibits such limitation. Some * +* jurisdictions do not allow the exclusion or limitation of * +* incidental or consequential damages, so this exclusion and * +* limitation may not apply to You. * +* * +************************************************************************ + +8. Litigation +------------- + +Any litigation relating to this License may be brought only in the +courts of a jurisdiction where the defendant maintains its principal +place of business and such litigation shall be governed by laws of that +jurisdiction, without reference to its conflict-of-law provisions. +Nothing in this Section shall prevent a party's ability to bring +cross-claims or counter-claims. + +9. Miscellaneous +---------------- + +This License represents the complete agreement concerning the subject +matter hereof. If any provision of this License is held to be +unenforceable, such provision shall be reformed only to the extent +necessary to make it enforceable. Any law or regulation which provides +that the language of a contract shall be construed against the drafter +shall not be used to construe this License against a Contributor. + +10. Versions of the License +--------------------------- + +10.1. New Versions + +Mozilla Foundation is the license steward. Except as provided in Section +10.3, no one other than the license steward has the right to modify or +publish new versions of this License. Each version will be given a +distinguishing version number. + +10.2. Effect of New Versions + +You may distribute the Covered Software under the terms of the version +of the License under which You originally received the Covered Software, +or under the terms of any subsequent version published by the license +steward. + +10.3. Modified Versions + +If you create software not governed by this License, and you want to +create a new license for such software, you may create and use a +modified version of this License if you rename the license and remove +any references to the name of the license steward (except to note that +such modified license differs from this License). + +10.4. Distributing Source Code Form that is Incompatible With Secondary +Licenses + +If You choose to distribute Source Code Form that is Incompatible With +Secondary Licenses under the terms of this version of the License, the +notice described in Exhibit B of this License must be attached. + +Exhibit A - Source Code Form License Notice +------------------------------------------- + + This Source Code Form is subject to the terms of the Mozilla Public + License, v. 2.0. If a copy of the MPL was not distributed with this + file, You can obtain one at http://mozilla.org/MPL/2.0/. + +If it is not possible or desirable to put the notice in a particular +file, then You may include the notice in a location (such as a LICENSE +file in a relevant directory) where a recipient would be likely to look +for such a notice. + +You may add additional accurate notices of copyright ownership. + +Exhibit B - "Incompatible With Secondary Licenses" Notice +--------------------------------------------------------- + + This Source Code Form is "Incompatible With Secondary Licenses", as + defined by the Mozilla Public License, v. 2.0. diff --git a/src/cmake/OFA/OptimizeForArchitecture.cmake b/src/cmake/OFA/OptimizeForArchitecture.cmake new file mode 100644 index 0000000000..81fa9935bb --- /dev/null +++ b/src/cmake/OFA/OptimizeForArchitecture.cmake @@ -0,0 +1,159 @@ +# Determine the host CPU feature set and determine the best set of compiler +# flags to enable all supported SIMD relevant features. Alternatively, the +# target CPU can be explicitly selected (for generating more generic binaries +# or for targeting a different system). +# Compilers provide e.g. the -march=native flag to achieve a similar result. +# This fails to address the need for building for a different microarchitecture +# than the current host. +# The script tries to deduce all settings from the model and family numbers of +# the CPU instead of reading the CPUID flags from e.g. /proc/cpuinfo. This makes +# the detection more independent from the CPUID code in the kernel (e.g. avx2 is +# not listed on older kernels). +# +# Usage: +# OptimizeForArchitecture() +# +# Optional inputs: +# TARGET_ARCHITECTURE= specifies the target architecture (default=auto) +# TARGET_PROFILER= specifies the target profiler (default=none) +# OFA_VERBOSE= prints verbose output (default=off) +# +# If any of the _broken flags are defined and set to true, +# the OptimizeForArchitecture macro will consequently disable the +# relevant features via compiler flags. +# +# Output: +# ARCHITECTURE_CXX_FLAGS compiler flags optimized for the target architecture +# +# Internal variables: +# USE_ boolean variable holding the status of +# HAVE_ boolean variable holding the compiler;s capability + +#============================================================================= +# Copyright 2010-2016 Matthias Kretz +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: +# +# * Redistributions of source code must retain the above copyright notice, +# this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# * Neither the names of contributing organizations nor the +# names of its contributors may be used to endorse or promote products +# derived from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER AND CONTRIBUTORS ``AS IS'' +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +# ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE FOR +# ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +#============================================================================= + +#============================================================================= +# Extension of the original version by Matthias Moller +# +# Changelog: +# - Update of CPUIDs for latest Intel and AMD processors +# - Added support for PPC64 (Clang, GCC, IBM XLC) +# - Added Support for ARM (Clang, GCC, ARM Clang, Cray, Fujitsu) +# - Restructuring and splitting into multiple files +#============================================================================= + +#============================================================================= +# Autodetection of CPU +#============================================================================= + +macro(OFA_AutodetectHostArchitecture) + + # If building a macOS universal binary, optimizing via compiler flags is unsupported. + if("${CMAKE_OSX_ARCHITECTURES}" STREQUAL "arm64;x86_64" OR "${CMAKE_OSX_ARCHITECTURES}" STREQUAL "x86_64;arm64") + message(STATUS "[OptimizeForArchitecture] CMAKE_OSX_ARCHITECTURES indicates the project will be compiled into a universal binary. Optimization via compiler flags is unsupported and will not be performed. This should usually only be done if a redistributable bundle is required.\n\t--> If building only for the current host machine, consider specifying CMAKE_OSX_ARCHITECTURES via the command line to enable optimizations.") + return() + endif() + + set(TARGET_ARCHITECTURE "none") + set(ARCHITECTURE_CXX_FLAGS CACHE STRING "CPU architecture compiler flags") + + if(APPLE AND NOT "${CMAKE_OSX_ARCHITECTURES}" MATCHES ${CMAKE_SYSTEM_PROCESSOR}) + message(STATUS "[OptimizeForArchitecture] CMAKE_OSX_ARCHITECTURES indicates the project will be cross-compiled from ${CMAKE_HOST_SYSTEM_PROCESSOR} to ${CMAKE_OSX_ARCHITECTURES}.") + if("${CMAKE_OSX_ARCHITECTURES}" MATCHES "x86_64") + set(TARGET_ARCHITECTURE "skylake" CACHE STRING "CPU architecture to optimize for. Using an incorrect setting here can result in crashes of the resulting binary because of invalid instructions used. Setting the value to \"auto\" will try to optimize for the architecture where cmake is called. Setting the value to \"native\" bypasses all checks and uses \"-march=native\" or the compiler equivalent flag. Other supported values are: \"none\", \"generic\", \"core\", \"core2\", \"merom\" (65nm Core2), \"penryn\" (45nm Core2), \"nehalem\", \"westmere\", \"sandybridge\", \"ivybridge\", \"haswell\", \"broadwell\", \"skylake\", \"skylake-xeon\", \"kabylake\", \"cannonlake\", \"cascadelake\", \"cooperlake\", \"icelake\", \"icelake-xeon\", \"tigerlake\", \"alderlake\", \"sapphirerapids\", \"rocketlake\", \"raptorlake\", \"bonnell\", \"silvermont\", \"goldmont\", \"goldmont-plus\", \"tremont\", \"knl\" (Knights Landing), \"knm\" (Knights Mill), \"atom\", \"k8\", \"k8-sse3\", \"barcelona\", \"istanbul\", \"magny-cours\", \"bulldozer\", \"interlagos\", \"piledriver\", \"steamroller\", \"excavator\", \"amd14h\", \"amd16h\", \"zen\", \"zen2\", \"zen3\", \"zen4\"." FORCE) + elseif("${CMAKE_OSX_ARCHITECTURES}" MATCHES "arm64") + set(TARGET_ARCHITECTURE "apple-m1" CACHE STRING "CPU architecture to optimize for. Using an incorrect setting here can result in crashes of the resulting binary because of invalid instructions used. Setting the value to \"auto\" will try to optimize for the architecture where cmake is called. Setting the value to \"native\" bypasses all checks and uses \"-march=native\" or the compiler equivalent flag. Other supported values are: \"none\", \"generic\", \"a64fx\", \"apple-a6\", \"apple-a7\", \"apple-a8\", \"apple-a9\", \"apple-a10\", \"apple-a11\", \"apple-a12\", \"apple-a13\", \"apple-a14\", \"apple-a15\", \"apple-a16\", \"apple-m1\", \"apple-m2\", \"apple-m3\", \"apple-m4\", \"arm1020e\", \"arm1020t\", \"arm1022e\", \"arm1026ej-s\", \"arm10e\", \"arm10tdmi\", \"arm1136j-s\", \"arm1136jf-s\", \"arm1156t2-s\", \"arm1156t2f-s\", \"arm1176jz-s\", \"arm1176jzf-s\", \"arm710t\", \"arm720t\", \"arm740t\", \"arm7tdmi-s\", \"arm7tdmi\", \"arm810\", \"arm8\", \"arm920\", \"arm920t\", \"arm922t\", \"arm926ej-s\", \"arm940t\", \"arm946e-s\", \"arm966e-s\", \"arm968e-s\", \"arm9\", \"arm9e\", \"arm9tdmi\", \"brahma-b15\", \"brahma-b53\", \"carmel\", \"cortex-a7\", \"cortex-a8\", \"cortex-a9\", \"cortex-a12\", \"cortex-a15.cortex-a7\", \"cortex-a15\", \"cortex-a17.cortex-a7\", \"cortex-a17\", \"cortex-a32\", \"cortex-a34\", \"cortex-a35\", \"cortex-a53\", \"cortex-a55\", \"cortex-a57.cortext-a53\", \"cortex-a57\", \"cortex-a5\", \"cortex-a72.cortext-a53\", \"cortex-a72\", \"cortex-a73.cortext-a35\", \"cortex-a73.cortext-a53\", \"cortex-a73\", \"cortex-a75.cortext-a55\", \"cortex-a75\", \"cortex-a76.cortext-a55\", \"cortex-a76\", \"cortex-a76ae\", \"cortex-a77\", \"cortex-a78\", \"cortex-a78ae\", \"cortex-a76c\", \"cortex-a510\", \"cortex-a710\", \"cortex-m0\", \"cortex-m0plus\", \"cortex-m1\", \"cortex-m23\", \"cortex-m33\", \"cortex-m35p\", \"cortex-m3\", \"cortex-m4\", \"cortex-m55\", \"cortex-m7\", \"cortex-r4\", \"cortex-r4f\", \"cortex-r52\", \"cortex-r5\", \"cortex-r7\", \"cortex-r8\", \"cortex-x1\", \"cortex-x2\", \"denver2\", \"denver\", \"exynos-m1\", \"fa526\", \"fa606te\", \"fa626\", \"fa626te\", \"fa726te\", \"falkor\", \"fmp626\", \"generic-armv7-a\", \"i80200\", \"i80321-400-b0\", \"i80321-400\", \"i80321-600-b0\", \"i80321-600\", \"ipx1200\", \"ipx425-266\", \"ipx425-400\", \"ipx425-533\", \"iwmmxt2\", \"iwmmxt\", \"krait\", \"kryo2\", \"kryo\", \"marvell-f\", \"marvell-pj4\", \"mpcore\", \"neoverse-e1\", \"neoverse-n1\", \"neoverse-n2\", \"neoverse-v1\", \"pxa210a\", \"pxa210b\", \"pxa210c\", \"pxa250a\", \"pxa250b\", \"pxa250c\", \"pxa27x\", \"pxa30x\", \"pxa31x\", \"pxa32x\", \"pxa930\", \"sa1110\", \"saphira\", \"scorpion\", \"strongarm1100\", \"strongarm110\", \"strongarm\", \"thunderx2\", \"thunderx2t99\", \"thunderx\", \"thunderxt81\", \"thunderxt83\", \"thunderxt88\", \"tsv110\", \"xgene1\", \"xscale\"." FORCE) + message(STATUS "[OptimizeForArchitecture] Set baseline architecture for cross-compiling on Darwin to ${TARGET_ARCHITECTURE}. You can overwrite this value by setting TARGET_ARCHITECTURE on the CLI. To disable optimization set TARGET_ARCHITECTURE=none (note that this will significantly slow the down the resulting binaries and should only be done if targeting a redistributable build).") + endif() + elseif("${CMAKE_SYSTEM_PROCESSOR}" MATCHES "i686.*|i386.*|x86.*|amd64.*|x86_64.*|AMD64.*") + include(OFA/AutodetectX86) + OFA_AutodetectX86() + elseif("${CMAKE_SYSTEM_PROCESSOR}" MATCHES "^(arm.*|ARM.*|aarch64.*|AARCH64.*)") + include(OFA/AutodetectArm) + OFA_AutodetectArm() + elseif("${CMAKE_SYSTEM_PROCESSOR}" MATCHES "^(powerpc|ppc)64.*") + include(OFA/AutodetectPpc) + OFA_AutodetectPpc() + else() + message(WARNING "[OptimizeForArchitecture] The CMAKE_SYSTEM_PROCESSOR '${CMAKE_SYSTEM_PROCESSOR}' is not supported by OptimizeForArchitecture") + endif() +endmacro(OFA_AutodetectHostArchitecture) + +#============================================================================= +# Handling of CPU options +#============================================================================= + +macro(OptimizeForArchitecture) + if("${CMAKE_SYSTEM_PROCESSOR}" MATCHES "i686.*|i386.*|x86.*|amd64.*|x86_64.*|AMD64.*") + set(TARGET_ARCHITECTURE "auto" CACHE STRING "CPU architecture to optimize for. Using an incorrect setting here can result in crashes of the resulting binary because of invalid instructions used. Setting the value to \"auto\" will try to optimize for the architecture where cmake is called. Setting the value to \"native\" bypasses all checks and uses \"-march=native\" or the compiler equivalent flag. Other supported values are: \"none\", \"generic\", \"core\", \"core2\", \"merom\" (65nm Core2), \"penryn\" (45nm Core2), \"nehalem\", \"westmere\", \"sandybridge\", \"ivybridge\", \"haswell\", \"broadwell\", \"skylake\", \"skylake-xeon\", \"kabylake\", \"cannonlake\", \"cascadelake\", \"cooperlake\", \"icelake\", \"icelake-xeon\", \"tigerlake\", \"alderlake\", \"sapphirerapids\", \"rocketlake\", \"raptorlake\", \"bonnell\", \"silvermont\", \"goldmont\", \"goldmont-plus\", \"tremont\", \"knl\" (Knights Landing), \"knm\" (Knights Mill), \"atom\", \"k8\", \"k8-sse3\", \"barcelona\", \"istanbul\", \"magny-cours\", \"bulldozer\", \"interlagos\", \"piledriver\", \"steamroller\", \"excavator\", \"amd14h\", \"amd16h\", \"zen\", \"zen2\", \"zen3\", \"zen4\"." ) + elseif("${CMAKE_SYSTEM_PROCESSOR}" MATCHES "^(arm.*|ARM.*|aarch64.*|AARCH64.*)") + set(TARGET_ARCHITECTURE "auto" CACHE STRING "CPU architecture to optimize for. Using an incorrect setting here can result in crashes of the resulting binary because of invalid instructions used. Setting the value to \"auto\" will try to optimize for the architecture where cmake is called. Setting the value to \"native\" bypasses all checks and uses \"-march=native\" or the compiler equivalent flag. Other supported values are: \"none\", \"generic\", \"a64fx\", \"apple-a6\", \"apple-a7\", \"apple-a8\", \"apple-a9\", \"apple-a10\", \"apple-a11\", \"apple-a12\", \"apple-a13\", \"apple-a14\", \"apple-a15\", \"apple-a16\", \"apple-m1\", \"apple-m2\", \"apple-m3\", \"apple-m4\", \"arm1020e\", \"arm1020t\", \"arm1022e\", \"arm1026ej-s\", \"arm10e\", \"arm10tdmi\", \"arm1136j-s\", \"arm1136jf-s\", \"arm1156t2-s\", \"arm1156t2f-s\", \"arm1176jz-s\", \"arm1176jzf-s\", \"arm710t\", \"arm720t\", \"arm740t\", \"arm7tdmi-s\", \"arm7tdmi\", \"arm810\", \"arm8\", \"arm920\", \"arm920t\", \"arm922t\", \"arm926ej-s\", \"arm940t\", \"arm946e-s\", \"arm966e-s\", \"arm968e-s\", \"arm9\", \"arm9e\", \"arm9tdmi\", \"brahma-b15\", \"brahma-b53\", \"carmel\", \"cortex-a7\", \"cortex-a8\", \"cortex-a9\", \"cortex-a12\", \"cortex-a15.cortex-a7\", \"cortex-a15\", \"cortex-a17.cortex-a7\", \"cortex-a17\", \"cortex-a32\", \"cortex-a34\", \"cortex-a35\", \"cortex-a53\", \"cortex-a55\", \"cortex-a57.cortext-a53\", \"cortex-a57\", \"cortex-a5\", \"cortex-a72.cortext-a53\", \"cortex-a72\", \"cortex-a73.cortext-a35\", \"cortex-a73.cortext-a53\", \"cortex-a73\", \"cortex-a75.cortext-a55\", \"cortex-a75\", \"cortex-a76.cortext-a55\", \"cortex-a76\", \"cortex-a76ae\", \"cortex-a77\", \"cortex-a78\", \"cortex-a78ae\", \"cortex-a76c\", \"cortex-a510\", \"cortex-a710\", \"cortex-m0\", \"cortex-m0plus\", \"cortex-m1\", \"cortex-m23\", \"cortex-m33\", \"cortex-m35p\", \"cortex-m3\", \"cortex-m4\", \"cortex-m55\", \"cortex-m7\", \"cortex-r4\", \"cortex-r4f\", \"cortex-r52\", \"cortex-r5\", \"cortex-r7\", \"cortex-r8\", \"cortex-x1\", \"cortex-x2\", \"denver2\", \"denver\", \"exynos-m1\", \"fa526\", \"fa606te\", \"fa626\", \"fa626te\", \"fa726te\", \"falkor\", \"fmp626\", \"generic-armv7-a\", \"i80200\", \"i80321-400-b0\", \"i80321-400\", \"i80321-600-b0\", \"i80321-600\", \"ipx1200\", \"ipx425-266\", \"ipx425-400\", \"ipx425-533\", \"iwmmxt2\", \"iwmmxt\", \"krait\", \"kryo2\", \"kryo\", \"marvell-f\", \"marvell-pj4\", \"mpcore\", \"neoverse-e1\", \"neoverse-n1\", \"neoverse-n2\", \"neoverse-v1\", \"pxa210a\", \"pxa210b\", \"pxa210c\", \"pxa250a\", \"pxa250b\", \"pxa250c\", \"pxa27x\", \"pxa30x\", \"pxa31x\", \"pxa32x\", \"pxa930\", \"sa1110\", \"saphira\", \"scorpion\", \"strongarm1100\", \"strongarm110\", \"strongarm\", \"thunderx2\", \"thunderx2t99\", \"thunderx\", \"thunderxt81\", \"thunderxt83\", \"thunderxt88\", \"tsv110\", \"xgene1\", \"xscale\".") + elseif("${CMAKE_SYSTEM_PROCESSOR}" MATCHES "^(powerpc|ppc)64.*") + set(TARGET_ARCHITECTURE "auto" CACHE STRING "CPU architecture to optimize for. Using an incorrect setting here can result in crashes of the resulting binary because of invalid instructions used. Setting the value to \"auto\" will try to optimize for the architecture where cmake is called. Other supported values are: \"none\", \"generic\", \"power8\", \"power9\", \"power10\".") + else() + message(WARNING "[OptimizeForArchitecture] The CMAKE_SYSTEM_PROCESSOR '${CMAKE_SYSTEM_PROCESSOR}' is not supported by OptimizeForArchitecture") + endif() + + if(NOT OFA_VERBOSE) + set(CMAKE_REQUIRED_QUIET true) + endif() + + if(NOT _last_target_arch STREQUAL "${TARGET_ARCHITECTURE}") + string(TOLOWER "${TARGET_ARCHITECTURE}" TARGET_ARCHITECTURE) + message(STATUS "[OptimizeForArchitecture] Target architecture changed from \"${_last_target_arch}\" to \"${TARGET_ARCHITECTURE}\"") + + if(TARGET_ARCHITECTURE STREQUAL "auto") + OFA_AutodetectHostArchitecture() + message(STATUS "[OptimizeForArchitecture] Detected Host CPU: ${TARGET_ARCHITECTURE}") + endif() + + set(_last_target_arch "${TARGET_ARCHITECTURE}" CACHE STRING "" FORCE) + mark_as_advanced(_last_target_arch) + + message(STATUS "[OptimizeForArchitecture] Checking Host CPU features. This can take some time ...") + if("${CMAKE_SYSTEM_PROCESSOR}" MATCHES "i686.*|i386.*|x86.*|amd64.*|x86_64.*|AMD64.*" OR "${CMAKE_OSX_ARCHITECTURES}" MATCHES "x86_64") + include(OFA/HandleX86Options) + OFA_HandleX86Options() + elseif("${CMAKE_SYSTEM_PROCESSOR}" MATCHES "^(arm.*|ARM.*|aarch64.*|AARCH64.*)" OR "${CMAKE_OSX_ARCHITECTURES}" MATCHES "arm64") + include(OFA/HandleArmOptions) + OFA_HandleArmOptions() + elseif("${CMAKE_SYSTEM_PROCESSOR}" MATCHES "^(powerpc|ppc)64.*") + include(OFA/HandlePpcOptions) + OFA_HandlePpcOptions() + endif() + + if(ARCHITECTURE_CXX_FLAGS) + if(APPLE) + message(STATUS "[OptimizeForArchitecture] (!) ${CMAKE_OSX_ARCHITECTURES} architecture optimization flags applied: ${ARCHITECTURE_CXX_FLAGS}.\nIn case of runtime errors re-compile with TARGET_ARCHITECTURE=none or TARGET_ARCHITECTURE=generic.") + else() + message(STATUS "[OptimizeForArchitecture] (!) ${CMAKE_SYSTEM_PROCESSOR} architecture optimization flags applied: ${ARCHITECTURE_CXX_FLAGS}.\nIn case of runtime errors re-compile with TARGET_ARCHITECTURE=none or TARGET_ARCHITECTURE=generic.") + endif() + endif() + endif() + +endmacro(OptimizeForArchitecture) diff --git a/src/cmake/OFA/cpuinfo_arm.c b/src/cmake/OFA/cpuinfo_arm.c new file mode 100644 index 0000000000..9988fe60c8 --- /dev/null +++ b/src/cmake/OFA/cpuinfo_arm.c @@ -0,0 +1,47 @@ +#include +#include +#include +#include +#include + +sigjmp_buf go_here; + +void sigill_handler(int signum) +{ + (void)signum; + siglongjmp(go_here, 1); +} + +int main(void) +{ + struct sigaction sa; + sigemptyset(&sa.sa_mask); + sa.sa_flags = 0; + sa.sa_handler = sigill_handler; + if (sigaction(SIGILL, &sa, NULL) < 0) + { + perror("sigaction"); + exit(2); + } + + do + { + if (sigsetjmp(go_here, 1)) + { + exit(-1); + } + else + { + unsigned long ret; + asm("mrs %0, MIDR_EL1" : "=r"(ret)); + + printf("%s 0x%02lX\n", "[OptimizeForArchitecture] CPU implementer :", (ret >> 24) & 0xFF); + printf("%s 0x%01lX\n", "[OptimizeForArchitecture] CPU architecture:", (ret >> 16) & 0xF); + printf("%s 0x%01lX\n", "[OptimizeForArchitecture] CPU variant :", (ret >> 20) & 0xF); + printf("%s 0x%03lX\n", "[OptimizeForArchitecture] CPU part :", (ret >> 4) & 0xFFF); + printf("%s %ld\n", "[OptimizeForArchitecture] CPU revision :", ret & 0xF); + } + } while (0); + + return 0; +} diff --git a/src/cmake/OFA/cpuinfo_x86.cxx b/src/cmake/OFA/cpuinfo_x86.cxx new file mode 100644 index 0000000000..190a2cc11e --- /dev/null +++ b/src/cmake/OFA/cpuinfo_x86.cxx @@ -0,0 +1,732 @@ +#include +#include +#include + +#define print_features(reg, features, n) \ + for (int i = 0; i < n; ++i) \ + printf("%s", (reg >> i & 0x1) && !features[i].empty() ? (features[i] + " ").c_str() : ""); + +// Get the vendor ID +void getVendorID() +{ + int a[3]; + for (int i = 0; i < 3; ++i) + a[i] = 0; + + // EAX=0x00000000: Vendor ID + __asm__("mov $0x00000000, %eax\n\t"); + __asm__("cpuid\n\t"); + __asm__("mov %%ebx, %0\n\t" : "=r"(a[0])); + __asm__("mov %%edx, %0\n\t" : "=r"(a[1])); + __asm__("mov %%ecx, %0\n\t" : "=r"(a[2])); + + char vendorID[13]; + vendorID[12] = 0; + memcpy(&vendorID[0], &a[0], 4); + memcpy(&vendorID[4], &a[1], 4); + memcpy(&vendorID[8], &a[2], 4); + + printf("[OptimizeForArchitecture] vendor_id : %s\n", vendorID); +} + +// Get processor information +void getProcInfo() +{ + int eax = 0; + + // EAX=0x00000001: Processor Info + __asm__("mov $0x00000001 , %eax\n\t"); + __asm__("cpuid\n\t"); + __asm__("mov %%eax, %0\n\t" : "=r"(eax)); // gives model and family + + int stepping = eax >> 0 & 0xF; + int model = eax >> 4 & 0xF; + int family = eax >> 8 & 0xF; + if (family == 6 || family == 15) + model += (eax >> 16 & 0xF) << 4; + + printf("[OptimizeForArchitecture] cpu family : %d\n", family); + printf("[OptimizeForArchitecture] model : %d\n", model); + printf("[OptimizeForArchitecture] stepping : %d\n", stepping); +} + +// Get processor features +void getFeatures() +{ + int eax_max, ecx_max, eax, ebx, ecx, edx; + + // Note: If the comment begins with a quoted string, that string is + // used in /proc/cpuinfo instead of the macro name. If the string is + // "", this feature bit is not displayed in /proc/cpuinfo at all. + + // CPU flags + printf("flags : "); + + // EAX=0x00000000: largest value that EAX can be set to before calling CPUID + __asm__("mov $0x00000000, %eax\n\t"); + __asm__("cpuid\n\t"); + __asm__("mov %%eax, %0\n\t" : "=r"(eax_max)); + + if (eax_max >= 0x00000001) + { + // EAX=0x00000001: Processor Info and Feature Bits + __asm__("mov $0x00000001 , %eax\n\t"); + __asm__("cpuid\n\t"); + __asm__("mov %%ecx, %0\n\t" : "=r"(ecx)); // feature flags + __asm__("mov %%edx, %0\n\t" : "=r"(edx)); // feature flags + + // Intel-defined CPU features, CPUID level 0x00000001 (EDX), word 0 + { + std::string features[] = { + "fpu", /* Onboard FPU */ + "vme", /* Virtual Mode Extensions */ + "de", /* Debugging Extensions */ + "pse", /* Page Size Extensions */ + "tsc", /* Time Stamp Counter */ + "msr", /* Model-Specific Registers */ + "pae", /* Physical Address Extensions */ + "mce", /* Machine Check Exception */ + "cx8", /* CMPXCHG8 instruction */ + "apic", /* Onboard APIC */ + "", /* Reserved */ + "sep", /* SYSENTER/SYSEXIT */ + "mtrr", /* Memory Type Range Registers */ + "pge", /* Page Global Enable */ + "mca", /* Machine Check Architecture */ + "cmov", /* CMOV instructions (plus FCMOVcc, FCOMI with FPU) */ + "pat", /* Page Attribute Table */ + "pse36", /* 36-bit PSEs */ + "pn", /* Processor serial number */ + "clflush", /* CLFLUSH instruction */ + "", /* Reserved */ + "dts", /* "dts" Debug Store */ + "acpi", /* ACPI via MSR */ + "mmx", /* Multimedia Extensions */ + "fxsr", /* FXSAVE/FXRSTOR, CR4.OSFXSR */ + "sse", /* "sse" */ + "sse2", /* "sse2" */ + "ss", /* "ss" CPU self snoop */ + "ht", /* Hyper-Threading */ + "tm", /* "tm" Automatic clock control */ + "ia64", /* IA-64 processor */ + "pbe" /* Pending Break Enable */ + }; + print_features(edx, features, 32); + } + + // Intel-defined CPU features, CPUID level 0x00000001 (ECX), word 4 + { + std::string features[] = { + "sse3", /* "pni" SSE-3 */ + "pclmulqdq", /* PCLMULQDQ instruction */ + "dtes64", /* 64-bit Debug Store */ + "monitor", /* "monitor" MONITOR/MWAIT support */ + "ds_cpl", /* "ds_cpl" CPL-qualified (filtered) Debug Store */ + "vmx", /* Hardware virtualization */ + "smx", /* Safer Mode eXtensions */ + "est", /* Enhanced SpeedStep */ + "tm2", /* Thermal Monitor 2 */ + "ssse3", /* Supplemental SSE-3 */ + "cid", /* Context ID */ + "sdbg", /* Silicon Debug */ + "fma", /* Fused multiply-add */ + "cx16", /* CMPXCHG16B instruction */ + "xtpr", /* Send Task Priority Messages */ + "pdcm", /* Perf/Debug Capabilities MSR */ + "", /* Reserved */ + "pcid", /* Process Context Identifiers */ + "dca", /* Direct Cache Access */ + "sse4_1", /* "sse4_1" SSE-4.1 */ + "sse4_2", /* "sse4_2" SSE-4.2 */ + "x2apic", /* X2APIC */ + "movbe", /* MOVBE instruction */ + "popcnt", /* POPCNT instruction */ + "tsc_deadline_timer", /* TSC deadline timer */ + "aes", /* AES instructions */ + "xsave", /* XSAVE/XRSTOR/XSETBV/XGETBV instructions */ + "", /* "" XSAVE instruction enabled in the OS */ + "avx", /* Advanced Vector Extensions */ + "f16c", /* 16-bit FP conversions */ + "rdrand", /* RDRAND instruction */ + "hypervisor" /* Running on a hypervisor */ + }; + print_features(ecx, features, 32); + } + } // EAX=0x00000001 + + // if (eax_max >=0x00000006) { + // // EAX=0x00000006: Extended Features + // __asm__("mov $0x00000006 , %eax\n\t"); + // __asm__("cpuid\n\t"); + // __asm__("mov %%eax, %0\n\t":"=r" (eax)); //extended feature flags + // __asm__("mov %%ebx, %0\n\t":"=r" (ebx)); //extended feature flags + // __asm__("mov %%ecx, %0\n\t":"=r" (ecx)); //extended feature flags + // __asm__("mov %%edx, %0\n\t":"=r" (edx)); //extended feature flags + + // // Intel-defined CPU features, CPUID level 0x00000001 (ECX), word 4 + + // { + // std::string features[] = { "cxmmx", /* Cyrix MMX extensions */ + // "k6_mtrr", /* AMD K6 nonstandard MTRRs */ + // "cyrix_arr", /* Cyrix ARRs (= MTRRs) */ + // "centaur_mcr", /* Centaur MCRs (= MTRRs) */ + // "k8", /* "" Opteron, Athlon64 */ + // "", /* "" Athlon */ + // "", /* "" P3 */ + // "", /* "" P4 */ + // "constant_tsc", /* TSC ticks at a constant rate */ + // "up", /* SMP kernel running on UP */ + // "art", /* Always running timer (ART) */ + // "arch_perfmon", /* Intel Architectural PerfMon */ + // "pebs", /* Precise-Event Based Sampling */ + // "bts", /* Branch Trace Store */ + // "", /* "" syscall in IA32 userspace */ + // "", /* "" sysenter in IA32 userspace */ + // "rep_good", /* REP microcode works well */ + // "", /* Reserved */ + // "", /* "" LFENCE synchronizes RDTSC */ + // "acc_power", /* AMD Accumulated Power Mechanism */ + // "nopl", /* The NOPL (0F 1F) instructions */ + // "", /* "" Always-present feature */ + // "xtopology", /* CPU topology enum extensions */ + // "tsc_reliable", /* TSC is known to be reliable */ + // "nonstop_tsc", /* TSC does not stop in C states */ + // "cpuid", /* CPU has CPUID instruction itself */ + // "extd_apicid", /* Extended APICID (8 bits) */ + // "amd_dcm", /* AMD multi-node processor */ + // "aperfmperf", /* P-State hardware coordination feedback capability (APERF/MPERF MSRs) */ + // "rapl", /* AMD/Hygon RAPL interface */ + // "nonstop_tsc_s3", /* TSC doesn't stop in S3 state */ + // "tsc_known_freq" /* TSC has known frequency */ + // }; + // print_features(ecx, features, 32); + // } + // } // EAX=0x00000006 + + if (eax_max >= 0x00000007) + { + // EAX=0x00000007, ECX=0x00000000: Extended Features + __asm__("mov $0x00000007 , %eax\n\t"); + __asm__("mov $0x00000000 , %ecx\n\t"); + __asm__("cpuid\n\t"); + __asm__("mov %%eax, %0\n\t" : "=r"(ecx_max)); // gives maximum ECX value + __asm__("mov %%ebx, %0\n\t" : "=r"(ebx)); // extended feature flags + __asm__("mov %%ecx, %0\n\t" : "=r"(ecx)); // extended feature flags + __asm__("mov %%edx, %0\n\t" : "=r"(edx)); // extended feature flags + + // Intel-defined CPU features, CPUID level 0x00000007:0 (EBX), word 9 + { + std::string features[] = { + "fsgsbase", /* RDFSBASE, WRFSBASE, RDGSBASE, WRGSBASE instructions*/ + "tsc_adjust", /* TSC adjustment MSR 0x3B */ + "sgx", /* Software Guard Extensions */ + "bmi1", /* 1st group bit manipulation extensions */ + "hle", /* Hardware Lock Elision */ + "avx2", /* AVX2 instructions */ + "", /* "" FPU data pointer updated only on x87 exceptions */ + "smep", /* Supervisor Mode Execution Protection */ + "bmi2", /* 2nd group bit manipulation extensions */ + "erms", /* Enhanced REP MOVSB/STOSB instructions */ + "invpcid", /* Invalidate Processor Context ID */ + "rtm", /* Restricted Transactional Memory */ + "cqm", /* Cache QoS Monitoring */ + "", /* "" Zero out FPU CS and FPU DS */ + "mpx", /* Memory Protection Extension */ + "rdt_a", /* Resource Director Technology Allocation */ + "avx512f", /* AVX-512 Foundation */ + "avx512dq", /* AVX-512 DQ (Double/Quad granular) Instructions */ + "rdseed", /* RDSEED instruction */ + "adx", /* ADCX and ADOX instructions */ + "smap", /* Supervisor Mode Access Prevention */ + "avx512ifma", /* AVX-512 Integer Fused Multiply-Add instructions */ + "pcommit", "clflushopt", /* CLFLUSHOPT instruction */ + "clwb", /* CLWB instruction */ + "intel_pt", /* Intel Processor Trace */ + "avx512pf", /* AVX-512 Prefetch */ + "avx512er", /* AVX-512 Exponential and Reciprocal */ + "avx512cd", /* AVX-512 Conflict Detection */ + "sha_ni", /* SHA1/SHA256 Instruction Extensions */ + "avx512bw", /* AVX-512 BW (Byte/Word granular) Instructions */ + "avx512vl" /* AVX-512 VL (128/256 Vector Length) Extensions */ + }; + print_features(ebx, features, 32); + } + + // Intel-defined CPU features, CPUID level 0x00000007:0 (ECX), word 16 + { + std::string features[] = {"prefetchwt1", + "avx512vbmi", /* AVX512 Vector Bit Manipulation instructions*/ + "umip", /* User Mode Instruction Protection */ + "pku", /* Protection Keys for Userspace */ + "ospke", /* OS Protection Keys Enable */ + "waitpkg", /* UMONITOR/UMWAIT/TPAUSE Instructions */ + "avx512vbmi2", /* Additional AVX512 Vector Bit Manipulation Instructions */ + "cetss", + "gfni", /* Galois Field New Instructions */ + "vaes", /* Vector AES */ + "vpclmulqdq", /* Carry-Less Multiplication Double Quadword */ + "avx512vnni", /* Vector Neural Network Instructions */ + "avx512bitalg", /* Support for VPOPCNT[B,W] and VPSHUF-BITQMB instructions */ + "tme", /* Intel Total Memory Encryption */ + "avx512vpopcntdq", /* POPCNT for vectors of DW/QW */ + "", /* Reserved */ + "la57", /* 5-level page tables */ + "", /* Reserved */ + "", /* Reserved */ + "", /* Reserved */ + "", /* Reserved */ + "", /* Reserved */ + "rdpid", /* RDPID instruction */ + "keylocker", + "bus_lock_detect", /* Bus Lock detect */ + "cldemote", /* CLDEMOTE instruction */ + "", /* Reserved */ + "movdiri", /* MOVDIRI instruction */ + "movdir64b", /* MOVDIR64B instruction */ + "enqcmd", /* ENQCMD and ENQCMDS instructions */ + "sgx_lc", /* Software Guard Extensions Launch Control */ + "pks"}; + print_features(ecx, features, 32); + } + + // Intel-defined CPU features, CPUID level 0x00000007:0 (EDX), word 18 + { + std::string features[] = { + "", /* Reserved */ + "", /* Reserved */ + "avx5124vnniw", /* AVX-512 Neural Network Instructions */ + "avx5124fmaps", /* AVX-512 Multiply Accumulation Single precision */ + "fsrm", /* Fast Short Rep Mov */ + "", /* Reserved */ + "", /* Reserved */ + "", /* Reserved */ + "avx512vp2intersect", /* AVX-512 Intersect for D/Q */ + "srbds", /* "" SRBDS mitigation MSR available */ + "md_clear", /* VERW clears CPU buffers */ + "", /* "" RTM transaction always aborts */ + "", /* Reserved */ + "", /* "" TSX_FORCE_ABORT */ + "serialize", /* SERIALIZE instruction */ + "", /* "" This part has CPUs of more than one type */ + "tsxldtrk", /* TSX Suspend Load Address Tracking */ + "", /* Reserved */ + "pconfig", /* Intel PCONFIG */ + "arch_lbr", /* Intel ARCH LBR */ + "cet_ibt", + "", /* Reserved */ + "amx-bf16", /* AMX BFLOAT16 Support */ + "avx512fp16", /* AVX512 FP16 */ + "amx-tile", /* AMX tile Support */ + "amx-int8", /* AMX int8 Support */ + "ibrs ibpb", /* "" Speculation Control (IBRS + IBPB) */ + "stibp", /* "" Single Thread Indirect Branch Predictors */ + "flush_l1d", /* Flush L1D cache */ + "arch_capabilities", /* IA32_ARCH_CAPABILITIES MSR (Intel) */ + "", /* "" IA32_CORE_CAPABILITIES MSR */ + "ssbd" /* "" Speculative Store Bypass Disable */ + }; + print_features(edx, features, 32); + } + + if (ecx_max >= 0x00000001) + { + // EAX=0x00000007, ECX=0x00000001: Extended Features + __asm__("mov $0x00000007 , %eax\n\t"); + __asm__("mov $0x00000001 , %ecx\n\t"); + __asm__("cpuid\n\t"); + __asm__("mov %%eax, %0\n\t" : "=r"(eax)); // extended feature flags + + // Intel-defined CPU features, CPUID level 0x00000007:1 (EAX), word 12 + { + std::string features[] = { + "", /* Reserved */ + "", /* Reserved */ + "", /* Reserved */ + "", /* Reserved */ + "avx_vnni", /* AVX VNNI instructions */ + "avx512bf16", /* AVX512 BFLOAT16 instructions */ + "", /* Reserved */ + "", /* Reserved */ + "", /* Reserved */ + "", /* Reserved */ + "", /* Reserved */ + "", /* Reserved */ + "", /* Reserved */ + "", /* Reserved */ + "", /* Reserved */ + "", /* Reserved */ + "", /* Reserved */ + "", /* Reserved */ + "", /* Reserved */ + "", /* Reserved */ + "", /* Reserved */ + "", /* Reserved */ + "", /* Reserved */ + "", /* Reserved */ + "", /* Reserved */ + "", /* Reserved */ + "", /* Reserved */ + "", /* Reserved */ + "", /* Reserved */ + "", /* Reserved */ + "", /* Reserved */ + "" /* Reserved */ + }; + print_features(eax, features, 32); + } + } // ECX=0x00000001 + } // EAX=0x00000007 + + if (eax_max >= 0x0000000d) + { + // EAX=0x0000000d, ECX=0x00000001: Extended Features + __asm__("mov $0x0000000d , %eax\n\t"); + __asm__("mov $0x00000001 , %ecx\n\t"); + __asm__("cpuid\n\t"); + __asm__("mov %%eax, %0\n\t" : "=r"(eax)); // extended feature flags + + // Intel-defined CPU features, CPUID level 0x0000000d:1 (EAX), word 10 + { + std::string features[] = { + "xsaveopt", /* XSAVEOPT instruction */ + "xsavec", /* XSAVEC instruction */ + "xgetbv1", /* XGETBV with ECX = 1 instruction */ + "xsaves", /* XSAVES/XRSTORS instructions */ + "xfd", /* "" eXtended Feature Disabling */ + "", /* Reserved */ + "", /* Reserved */ + "", /* Reserved */ + "", /* Reserved */ + "", /* Reserved */ + "", /* Reserved */ + "", /* Reserved */ + "", /* Reserved */ + "", /* Reserved */ + "", /* Reserved */ + "", /* Reserved */ + "", /* Reserved */ + "", /* Reserved */ + "", /* Reserved */ + "", /* Reserved */ + "", /* Reserved */ + "", /* Reserved */ + "", /* Reserved */ + "", /* Reserved */ + "", /* Reserved */ + "", /* Reserved */ + "", /* Reserved */ + "", /* Reserved */ + "", /* Reserved */ + "", /* Reserved */ + "", /* Reserved */ + "", /* Reserved */ + "" /* Reserved */ + }; + print_features(eax, features, 32); + } + } // EAX=0x0000000d + + // EAX=0x80000000: largest value that EAX can be set to before calling CPUID + __asm__("mov $0x80000000, %eax\n\t"); + __asm__("cpuid\n\t"); + __asm__("mov %%eax, %0\n\t" : "=r"(eax_max)); + + if (eax_max >= 0x80000001) + { + // EAX=80000001: Processor Info and Feature Bits + __asm__("mov $0x80000001 , %eax\n\t"); + __asm__("cpuid\n\t"); + __asm__("mov %%ecx, %0\n\t" : "=r"(ecx)); // feature flags + __asm__("mov %%edx, %0\n\t" : "=r"(edx)); // feature flags + + // AMD-defined CPU features, CPUID level 0x80000001 (EDX), word 1 + // Don't duplicate feature flags which are redundant with Intel! + { + std::string features[] = { + "", /* Onboard FPU */ + "", /* Virtual Mode Extensions */ + "", /* Debugging Extensions */ + "", /* Page Size Extensions */ + "", /* Time Stamp Counter */ + "", /* Model-Specific Registers */ + "", /* Physical Address Extensions */ + "", /* Machine Check Exception */ + "", /* CMPXCHG8 instruction */ + "", /* Onboard APIC */ + "", /* Reserved */ + "syscall", /* SYSCALL/SYSRET */ + "", /* Memory Type Range Registers */ + "", /* Page Global Enable */ + "", /* Machine Check Architecture */ + "", /* CMOV instructions (plus FCMOVcc, FCOMI with FPU) */ + "", /* Page Attribute Table */ + "", /* 36-bit PSEs */ + "", /* Reserved */ + "mp", /* MP Capable */ + "nx", /* Execute Disable */ + "", /* Reserved */ + "mmxext", /* AMD MMX extensions */ + "", /* Multimedia Extensions */ + "", /* FXSAVE/FXRSTOR, CR4.OSFXSR */ + "fxsr_opt", /* FXSAVE/FXRSTOR optimizations */ + "pdpe1gb", /* "pdpe1gb" GB pages */ + "rdtscp", /* RDTSCP */ + "", /* Reserved */ + "lm", /* Long Mode (x86-64, 64-bit support) */ + "3dnowext", /* AMD 3DNow extensions */ + "3dnow" /* 3DNow */ + }; + print_features(edx, features, 32); + } + + // AMD-defined CPU features, CPUID level 0x80000001 (ECX), word 6 + { + std::string features[] = { + "lahf_lm", /* LAHF/SAHF in long mode */ + "cmp_legacy", /* If yes HyperThreading not valid */ + "svm", /* Secure Virtual Machine */ + "extapic", /* Extended APIC space */ + "cr8_legacy", /* CR8 in 32-bit mode */ + "abm", /* Advanced bit manipulation */ + "sse4a", /* SSE-4A */ + "misalignsse", /* Misaligned SSE mode */ + "3dnowprefetch", /* 3DNow prefetch instructions */ + "osvw", /* OS Visible Workaround */ + "ibs", /* Instruction Based Sampling */ + "xop", /* extended AVX instructions */ + "skinit", /* SKINIT/STGI instructions */ + "wdt", /* Watchdog timer */ + "", /* Reserved */ + "lwp", /* Light Weight Profiling */ + "fma4", /* 4 operands MAC instructions */ + "tce", /* Translation Cache Extension */ + "", /* Reserved */ + "nodeid_msr", /* NodeId MSR */ + "", /* Reserved */ + "tbm", /* Trailing Bit Manipulations */ + "topoext", /* Topology extensions CPUID leafs */ + "perfctr_core", /* Core performance counter extensions */ + "perfctr_nb", /* NB performance counter extensions */ + "", /* Reserved */ + "bpext", /* Data breakpoint extension */ + "ptsc", /* Performance time-stamp counter */ + "perfctr_l2", /* Last Level Cache performance counter extensions */ + "mwaitx", /* MWAIT extension (MONITORX/MWAITX instructions) */ + "", /* Reserved */ + "" /* Reserved */ + + }; + print_features(ecx, features, 32); + } + } // EAX=0x80000001 + + if (eax_max >= 0x80000007) + { + // EAX=0x80000007: Extended Features + __asm__("mov $0x80000007 , %eax\n\t"); + __asm__("cpuid\n\t"); + __asm__("mov %%eax, %0\n\t" : "=r"(eax)); // extended feature flags + __asm__("mov %%ebx, %0\n\t" : "=r"(ebx)); // extended feature flags + __asm__("mov %%ecx, %0\n\t" : "=r"(ecx)); // extended feature flags + __asm__("mov %%edx, %0\n\t" : "=r"(edx)); // extended feature flags + + // AMD-defined CPU features, CPUID level 0x80000007 (EBX), word 17 + { + std::string features[] = { + "overflow_recov", /* MCA overflow recovery support */ + "succor", /* Uncorrectable error containment and recovery */ + "", /* Reserved */ + "smca", /* Scalable MCA */ + "", /* Reserved */ + "", /* Reserved */ + "", /* Reserved */ + "", /* Reserved */ + "", /* Reserved */ + "", /* Reserved */ + "", /* Reserved */ + "", /* Reserved */ + "", /* Reserved */ + "", /* Reserved */ + "", /* Reserved */ + "", /* Reserved */ + "", /* Reserved */ + "", /* Reserved */ + "", /* Reserved */ + "", /* Reserved */ + "", /* Reserved */ + "", /* Reserved */ + "", /* Reserved */ + "", /* Reserved */ + "", /* Reserved */ + "", /* Reserved */ + "", /* Reserved */ + "", /* Reserved */ + "", /* Reserved */ + "", /* Reserved */ + "", /* Reserved */ + "" /* Reserved */ + }; + print_features(ebx, features, 32); + } + } // EAX=0x80000007 + + if (eax_max >= 0x80000008) + { + // EAX=0x80000008: Extended Features + __asm__("mov $0x80000008 , %eax\n\t"); + __asm__("cpuid\n\t"); + __asm__("mov %%eax, %0\n\t" : "=r"(eax)); // extended feature flags + __asm__("mov %%ebx, %0\n\t" : "=r"(ebx)); // extended feature flags + __asm__("mov %%ecx, %0\n\t" : "=r"(ecx)); // extended feature flags + __asm__("mov %%edx, %0\n\t" : "=r"(edx)); // extended feature flags + + // AMD-defined CPU features, CPUID level 0x80000008 (EBX), word 18 + { + std::string features[] = { + "clzero", /* CLZERO instruction */ + "irperf", /* Instructions Retired Count */ + "xsaveerptr", /* Always save/restore FP error pointers */ + "", /* Reserved */ + "rdpru", /* Read processor register at user level */ + "", /* Reserved */ + "", /* Reserved */ + "", /* Reserved */ + "", /* Reserved */ + "wbnoinvd", /* WBNOINVD instruction */ + "", /* Reserved */ + "", /* Reserved */ + "", /* "" Indirect Branch Prediction Barrier */ + "", /* Reserved */ + "", /* "" Indirect Branch Restricted Speculation */ + "", /* "" Single Thread Indirect Branch Predictors */ + "", /* Reserved */ + "", /* "" Single Thread Indirect Branch Predictors always-on preferred */ + "", /* Reserved */ + "", /* Reserved */ + "", /* Reserved */ + "", /* Reserved */ + "", /* Reserved */ + "amd_ppin", /* Protected Processor Inventory Number */ + "", /* "" Speculative Store Bypass Disable */ + "virt_ssbd", /* Virtualized Speculative Store Bypass Disable */ + "", /* "" Speculative Store Bypass is fixed in hardware. */ + "", /* Reserved */ + "", /* Reserved */ + "", /* Reserved */ + "", /* Reserved */ + "" /* Reserved */ + }; + print_features(ebx, features, 32); + } + } // EAX=0x80000008 + + if (eax_max >= 0x8000000a) + { + // EAX=0x8000000a: Extended Features + __asm__("mov $0x8000000a , %eax\n\t"); + __asm__("cpuid\n\t"); + __asm__("mov %%eax, %0\n\t" : "=r"(eax)); // extended feature flags + __asm__("mov %%ebx, %0\n\t" : "=r"(ebx)); // extended feature flags + __asm__("mov %%ecx, %0\n\t" : "=r"(ecx)); // extended feature flags + __asm__("mov %%edx, %0\n\t" : "=r"(edx)); // extended feature flags + + // AMD-defined CPU features, CPUID level 0x8000000a (EDX), word 15 + { + std::string features[] = { + "npt", /* Nested Page Table support */ + "lbrv", /* LBR Virtualization support */ + "svm_lock", /* "svm_lock" SVM locking MSR */ + "nrip_save", /* "nrip_save" SVM next_rip save */ + "tsc_scale", /* "tsc_scale" TSC scaling support */ + "vmcb_clean", /* "vmcb_clean" VMCB clean bits support */ + "flushbyasid", /* flush-by-ASID support */ + "decodeassists", /* Decode Assists support */ + "", /* Reserved */ + "", /* Reserved */ + "pausefilter", /* filtered pause intercept */ + "", /* Reserved */ + "pfthreshold", /* pause filter threshold */ + "avic", /* Virtual Interrupt Controller */ + "", /* Reserved */ + "v_vmsave_vmload", /* Virtual VMSAVE VMLOAD */ + "vgif", /* Virtual GIF */ + "", /* Reserved */ + "", /* Reserved */ + "", /* Reserved */ + "v_spec_ctrl", /* Virtual SPEC_CTRL */ + "", /* Reserved */ + "", /* Reserved */ + "", /* Reserved */ + "", /* Reserved */ + "", /* Reserved */ + "", /* Reserved */ + "", /* Reserved */ + "", /* "" SVME addr check */ + "", /* Reserved */ + "", /* Reserved */ + "" /* Reserved */ + }; + print_features(edx, features, 32); + } + } // EAX=0x8000000a + + if (eax_max >= 0x8000001f) + { + // EAX=0x8000001f: Extended Features + __asm__("mov $0x8000001f , %eax\n\t"); + __asm__("cpuid\n\t"); + __asm__("mov %%eax, %0\n\t" : "=r"(eax)); // extended feature flags + __asm__("mov %%ebx, %0\n\t" : "=r"(ebx)); // extended feature flags + __asm__("mov %%ecx, %0\n\t" : "=r"(ecx)); // extended feature flags + __asm__("mov %%edx, %0\n\t" : "=r"(edx)); // extended feature flags + + // AMD-defined CPU features, CPUID level 0x8000001f (EAX), word 19 + { + std::string features[] = { + "sme", /* AMD Secure Memory Encryption */ + "sev", /* AMD Secure Encrypted Virtualization */ + "", /* "" VM Page Flush MSR is supported */ + "sev_es", /* AMD Secure Encrypted Virtualization - Encrypted State */ + "", /* Reserved */ + "", /* Reserved */ + "", /* Reserved */ + "", /* Reserved */ + "", /* Reserved */ + "", /* Reserved */ + "", /* "" AMD hardware-enforced cache coherency */ + "", /* Reserved */ + "", /* Reserved */ + "", /* Reserved */ + "", /* Reserved */ + "", /* Reserved */ + "", /* Reserved */ + "", /* Reserved */ + "", /* Reserved */ + "", /* Reserved */ + "", /* Reserved */ + "", /* Reserved */ + "", /* Reserved */ + "", /* Reserved */ + "", /* Reserved */ + "", /* Reserved */ + "", /* Reserved */ + "", /* Reserved */ + "", /* Reserved */ + "", /* Reserved */ + "", /* Reserved */ + "" /* Reserved */ + }; + print_features(eax, features, 32); + } + } // EAX=0x8000001f + + printf("\n"); +} + +int main() +{ + getVendorID(); + getProcInfo(); + getFeatures(); + return 0; +} diff --git a/src/cmake/OptimizeForArchitecture.cmake b/src/cmake/OptimizeForArchitecture.cmake deleted file mode 100644 index 89a2e9f2be..0000000000 --- a/src/cmake/OptimizeForArchitecture.cmake +++ /dev/null @@ -1,612 +0,0 @@ -# Determine the host CPU feature set and determine the best set of compiler -# flags to enable all supported SIMD relevant features. Alternatively, the -# target CPU can be explicitly selected (for generating more generic binaries -# or for targeting a different system). -# Compilers provide e.g. the -march=native flag to achieve a similar result. -# This fails to address the need for building for a different microarchitecture -# than the current host. -# The script tries to deduce all settings from the model and family numbers of -# the CPU instead of reading the CPUID flags from e.g. /proc/cpuinfo. This makes -# the detection more independent from the CPUID code in the kernel (e.g. avx2 is -# not listed on older kernels). -# -# Usage: -# OptimizeForArchitecture() -# If either of Vc_SSE_INTRINSICS_BROKEN, Vc_AVX_INTRINSICS_BROKEN, -# Vc_AVX2_INTRINSICS_BROKEN is defined and set, the OptimizeForArchitecture -# macro will consequently disable the relevant features via compiler flags. -# See https://github.com/VcDevel/Vc/blob/master/cmake/OptimizeForArchitecture.cmake - -#============================================================================= -# Copyright 2010-2016 Matthias Kretz -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are -# met: -# -# * Redistributions of source code must retain the above copyright notice, -# this list of conditions and the following disclaimer. -# * Redistributions in binary form must reproduce the above copyright notice, -# this list of conditions and the following disclaimer in the documentation -# and/or other materials provided with the distribution. -# * Neither the names of contributing organizations nor the -# names of its contributors may be used to endorse or promote products -# derived from this software without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER AND CONTRIBUTORS ``AS IS'' -# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -# ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE FOR -# ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -#============================================================================= - -get_filename_component(_currentDir "${CMAKE_CURRENT_LIST_FILE}" PATH) -include("${_currentDir}/AddCompilerFlag.cmake") -include(CheckIncludeFileCXX) - -macro(_my_find _list _value _ret) - list(FIND ${_list} "${_value}" _found) - if(_found EQUAL -1) - set(${_ret} FALSE) - else(_found EQUAL -1) - set(${_ret} TRUE) - endif(_found EQUAL -1) -endmacro(_my_find) - -macro(OFA_AutodetectX86) - set(_vendor_id) - set(_cpu_family) - set(_cpu_model) - if(CMAKE_SYSTEM_NAME STREQUAL "Linux") - file(READ "/proc/cpuinfo" _cpuinfo) - string(REGEX REPLACE ".*vendor_id[ \t]*:[ \t]+([a-zA-Z0-9_-]+).*" "\\1" _vendor_id "${_cpuinfo}") - string(REGEX REPLACE ".*cpu family[ \t]*:[ \t]+([a-zA-Z0-9_-]+).*" "\\1" _cpu_family "${_cpuinfo}") - string(REGEX REPLACE ".*model[ \t]*:[ \t]+([a-zA-Z0-9_-]+).*" "\\1" _cpu_model "${_cpuinfo}") - string(REGEX REPLACE ".*flags[ \t]*:[ \t]+([^\n]+).*" "\\1" _cpu_flags "${_cpuinfo}") - elseif(CMAKE_SYSTEM_NAME STREQUAL "Darwin") - exec_program("/usr/sbin/sysctl -n machdep.cpu.vendor machdep.cpu.model machdep.cpu.family machdep.cpu.features" OUTPUT_VARIABLE _sysctl_output_string) - string(REPLACE "\n" ";" _sysctl_output ${_sysctl_output_string}) - list(GET _sysctl_output 0 _vendor_id) - list(GET _sysctl_output 1 _cpu_model) - list(GET _sysctl_output 2 _cpu_family) - list(GET _sysctl_output 3 _cpu_flags) - - string(TOLOWER "${_cpu_flags}" _cpu_flags) - string(REPLACE "." "_" _cpu_flags "${_cpu_flags}") - elseif(CMAKE_SYSTEM_NAME STREQUAL "Windows") - get_filename_component(_vendor_id "[HKEY_LOCAL_MACHINE\\Hardware\\Description\\System\\CentralProcessor\\0;VendorIdentifier]" NAME CACHE) - get_filename_component(_cpu_id "[HKEY_LOCAL_MACHINE\\Hardware\\Description\\System\\CentralProcessor\\0;Identifier]" NAME CACHE) - mark_as_advanced(_vendor_id _cpu_id) - string(REGEX REPLACE ".* Family ([0-9]+) .*" "\\1" _cpu_family "${_cpu_id}") - string(REGEX REPLACE ".* Model ([0-9]+) .*" "\\1" _cpu_model "${_cpu_id}") - endif(CMAKE_SYSTEM_NAME STREQUAL "Linux") - if(_vendor_id STREQUAL "GenuineIntel") - if(_cpu_family EQUAL 6) - # taken from the Intel ORM - # http://www.intel.com/content/www/us/en/processors/architectures-software-developer-manuals.html - # CPUID Signature Values of Of Recent Intel Microarchitectures - # 4E 5E | Skylake microarchitecture - # 3D 47 56 | Broadwell microarchitecture - # 3C 45 46 3F | Haswell microarchitecture - # 3A 3E | Ivy Bridge microarchitecture - # 2A 2D | Sandy Bridge microarchitecture - # 25 2C 2F | Intel microarchitecture Westmere - # 1A 1E 1F 2E | Intel microarchitecture Nehalem - # 17 1D | Enhanced Intel Core microarchitecture - # 0F | Intel Core microarchitecture - # - # Intel SDM Vol. 3C 35-1 / December 2016: - # 57 | Xeon Phi 3200, 5200, 7200 [Knights Landing] - # 85 | Future Xeon Phi - # 8E 9E | 7th gen. Core [Kaby Lake] - # 55 | Future Xeon [Skylake w/ AVX512] - # 4E 5E | 6th gen. Core / E3 v5 [Skylake w/o AVX512] - # 56 | Xeon D-1500 [Broadwell] - # 4F | Xeon E5 v4, E7 v4, i7-69xx [Broadwell] - # 47 | 5th gen. Core / Xeon E3 v4 [Broadwell] - # 3D | M-5xxx / 5th gen. [Broadwell] - # 3F | Xeon E5 v3, E7 v3, i7-59xx [Haswell-E] - # 3C 45 46 | 4th gen. Core, Xeon E3 v3 [Haswell] - # 3E | Xeon E5 v2, E7 v2, i7-49xx [Ivy Bridge-E] - # 3A | 3rd gen. Core, Xeon E3 v2 [Ivy Bridge] - # 2D | Xeon E5, i7-39xx [Sandy Bridge] - # 2F | Xeon E7 - # 2A | Xeon E3, 2nd gen. Core [Sandy Bridge] - # 2E | Xeon 7500, 6500 series - # 25 2C | Xeon 3600, 5600 series, Core i7, i5 and i3 - # - # Values from the Intel SDE: - # 5C | Goldmont - # 5A | Silvermont - # 57 | Knights Landing - # 66 | Cannonlake - # 55 | Skylake Server - # 4E | Skylake Client - # 3C | Broadwell (likely a bug in the SDE) - # 3C | Haswell - if(_cpu_model EQUAL 87) # 57 - set(TARGET_ARCHITECTURE "knl") # Knights Landing - elseif(_cpu_model EQUAL 92) - set(TARGET_ARCHITECTURE "goldmont") - elseif(_cpu_model EQUAL 90 OR _cpu_model EQUAL 76) - set(TARGET_ARCHITECTURE "silvermont") - elseif(_cpu_model EQUAL 102) - set(TARGET_ARCHITECTURE "cannonlake") - elseif(_cpu_model EQUAL 142 OR _cpu_model EQUAL 158) # 8E, 9E - set(TARGET_ARCHITECTURE "kaby-lake") - elseif(_cpu_model EQUAL 85) # 55 - set(TARGET_ARCHITECTURE "skylake-avx512") - elseif(_cpu_model EQUAL 78 OR _cpu_model EQUAL 94 OR _cpu_model EQUAL 165) # 4E, 5E - set(TARGET_ARCHITECTURE "skylake") - elseif(_cpu_model EQUAL 61 OR _cpu_model EQUAL 71 OR _cpu_model EQUAL 79 OR _cpu_model EQUAL 86) # 3D, 47, 4F, 56 - set(TARGET_ARCHITECTURE "broadwell") - elseif(_cpu_model EQUAL 60 OR _cpu_model EQUAL 69 OR _cpu_model EQUAL 70 OR _cpu_model EQUAL 63) - set(TARGET_ARCHITECTURE "haswell") - elseif(_cpu_model EQUAL 58 OR _cpu_model EQUAL 62) - set(TARGET_ARCHITECTURE "ivy-bridge") - elseif(_cpu_model EQUAL 42 OR _cpu_model EQUAL 45) - set(TARGET_ARCHITECTURE "sandy-bridge") - elseif(_cpu_model EQUAL 37 OR _cpu_model EQUAL 44 OR _cpu_model EQUAL 47) - set(TARGET_ARCHITECTURE "westmere") - elseif(_cpu_model EQUAL 26 OR _cpu_model EQUAL 30 OR _cpu_model EQUAL 31 OR _cpu_model EQUAL 46) - set(TARGET_ARCHITECTURE "nehalem") - elseif(_cpu_model EQUAL 23 OR _cpu_model EQUAL 29) - set(TARGET_ARCHITECTURE "penryn") - elseif(_cpu_model EQUAL 15) - set(TARGET_ARCHITECTURE "merom") - elseif(_cpu_model EQUAL 28) - set(TARGET_ARCHITECTURE "atom") - elseif(_cpu_model EQUAL 14) - set(TARGET_ARCHITECTURE "core") - elseif(_cpu_model LESS 14) - message(WARNING "Your CPU (family ${_cpu_family}, model ${_cpu_model}) is not known. Auto-detection of optimization flags failed and will use the generic CPU settings with SSE2.") - set(TARGET_ARCHITECTURE "generic") - else() - message(WARNING "Your CPU (family ${_cpu_family}, model ${_cpu_model}) is not known. Auto-detection of optimization flags failed and will use the 65nm Core 2 CPU settings.") - set(TARGET_ARCHITECTURE "merom") - endif() - elseif(_cpu_family EQUAL 7) # Itanium (not supported) - message(WARNING "Your CPU (Itanium: family ${_cpu_family}, model ${_cpu_model}) is not supported by OptimizeForArchitecture.cmake.") - elseif(_cpu_family EQUAL 15) # NetBurst - list(APPEND _available_vector_units_list "sse" "sse2") - if(_cpu_model GREATER 2) # Not sure whether this must be 3 or even 4 instead - list(APPEND _available_vector_units_list "sse" "sse2" "sse3") - endif(_cpu_model GREATER 2) - endif(_cpu_family EQUAL 6) - elseif(_vendor_id STREQUAL "AuthenticAMD") - if(_cpu_family EQUAL 23) - set(TARGET_ARCHITECTURE "zen") - elseif(_cpu_family EQUAL 22) # 16h - set(TARGET_ARCHITECTURE "AMD 16h") - elseif(_cpu_family EQUAL 21) # 15h - if(_cpu_model LESS 2) - set(TARGET_ARCHITECTURE "bulldozer") - else() - set(TARGET_ARCHITECTURE "piledriver") - endif() - elseif(_cpu_family EQUAL 20) # 14h - set(TARGET_ARCHITECTURE "AMD 14h") - elseif(_cpu_family EQUAL 18) # 12h - elseif(_cpu_family EQUAL 16) # 10h - set(TARGET_ARCHITECTURE "barcelona") - elseif(_cpu_family EQUAL 15) - set(TARGET_ARCHITECTURE "k8") - if(_cpu_model GREATER 64) # I don't know the right number to put here. This is just a guess from the hardware I have access to - set(TARGET_ARCHITECTURE "k8-sse3") - endif(_cpu_model GREATER 64) - endif() - endif(_vendor_id STREQUAL "GenuineIntel") -endmacro() - -macro(OFA_AutodetectArm) - message(WARNING "Architecture auto-detection for CMAKE_SYSTEM_PROCESSOR '${CMAKE_SYSTEM_PROCESSOR}' is not supported by OptimizeForArchitecture.cmake") -endmacro() - -macro(OFA_AutodetectHostArchitecture) - set(TARGET_ARCHITECTURE "generic") - set(Vc_ARCHITECTURE_FLAGS) - if("${CMAKE_SYSTEM_PROCESSOR}" MATCHES "(x86|AMD64)") - OFA_AutodetectX86() - elseif("${CMAKE_SYSTEM_PROCESSOR}" MATCHES "(arm|aarch32|aarch64)") - OFA_AutodetectArm() - else() - message(FATAL_ERROR "OptimizeForArchitecture.cmake does not implement support for CMAKE_SYSTEM_PROCESSOR: ${CMAKE_SYSTEM_PROCESSOR}") - endif() -endmacro() - -macro(OFA_HandleX86Options) - set(_march_flag_list) - set(_available_vector_units_list) - macro(_nehalem) - list(APPEND _march_flag_list "nehalem") - list(APPEND _march_flag_list "corei7") - list(APPEND _march_flag_list "core2") - list(APPEND _available_vector_units_list "sse" "sse2" "sse3" "ssse3" "sse4.1" "sse4.2") - endmacro() - macro(_westmere) - list(APPEND _march_flag_list "westmere") - _nehalem() - endmacro() - macro(_sandybridge) - list(APPEND _march_flag_list "sandybridge") - list(APPEND _march_flag_list "corei7-avx") - _westmere() - list(APPEND _available_vector_units_list "sse" "sse2" "sse3" "ssse3" "sse4.1" "sse4.2" "avx") - endmacro() - macro(_ivybridge) - list(APPEND _march_flag_list "ivybridge") - list(APPEND _march_flag_list "core-avx-i") - _sandybridge() - list(APPEND _available_vector_units_list "rdrnd" "f16c") - endmacro() - macro(_haswell) - list(APPEND _march_flag_list "haswell") - list(APPEND _march_flag_list "core-avx2") - _ivybridge() - list(APPEND _available_vector_units_list "avx2" "fma" "bmi" "bmi2") - endmacro() - macro(_broadwell) - list(APPEND _march_flag_list "broadwell") - _haswell() - endmacro() - macro(_skylake) - list(APPEND _march_flag_list "skylake") - _broadwell() - endmacro() - macro(_skylake_avx512) - list(APPEND _march_flag_list "skylake-avx512") - _skylake() - list(APPEND _available_vector_units_list "avx512f" "avx512cd" "avx512dq" "avx512bw" "avx512vl") - endmacro() - macro(_cannonlake) - list(APPEND _march_flag_list "cannonlake") - _skylake_avx512() - list(APPEND _available_vector_units_list "avx512ifma" "avx512vbmi") - endmacro() - macro(_knightslanding) - list(APPEND _march_flag_list "knl") - _broadwell() - list(APPEND _available_vector_units_list "avx512f" "avx512pf" "avx512er" "avx512cd") - endmacro() - macro(_silvermont) - list(APPEND _march_flag_list "silvermont") - _westmere() - list(APPEND _available_vector_units_list "rdrnd") - endmacro() - macro(_goldmont) - list(APPEND _march_flag_list "goldmont") - _silvermont() - endmacro() - - if(TARGET_ARCHITECTURE STREQUAL "core") - list(APPEND _march_flag_list "core2") - list(APPEND _available_vector_units_list "sse" "sse2" "sse3") - elseif(TARGET_ARCHITECTURE STREQUAL "merom") - list(APPEND _march_flag_list "merom") - list(APPEND _march_flag_list "core2") - list(APPEND _available_vector_units_list "sse" "sse2" "sse3" "ssse3") - elseif(TARGET_ARCHITECTURE STREQUAL "penryn") - list(APPEND _march_flag_list "penryn") - list(APPEND _march_flag_list "core2") - list(APPEND _available_vector_units_list "sse" "sse2" "sse3" "ssse3") - message(STATUS "Sadly the Penryn architecture exists in variants with SSE4.1 and without SSE4.1.") - if(_cpu_flags MATCHES "sse4_1") - message(STATUS "SSE4.1: enabled (auto-detected from this computer's CPU flags)") - list(APPEND _available_vector_units_list "sse4.1") - else() - message(STATUS "SSE4.1: disabled (auto-detected from this computer's CPU flags)") - endif() - elseif(TARGET_ARCHITECTURE STREQUAL "knl") - _knightslanding() - elseif(TARGET_ARCHITECTURE STREQUAL "cannonlake") - _cannonlake() - elseif(TARGET_ARCHITECTURE STREQUAL "kaby-lake") - _skylake() - elseif(TARGET_ARCHITECTURE STREQUAL "skylake-xeon" OR TARGET_ARCHITECTURE STREQUAL "skylake-avx512") - _skylake_avx512() - elseif(TARGET_ARCHITECTURE STREQUAL "skylake") - _skylake() - elseif(TARGET_ARCHITECTURE STREQUAL "broadwell") - _broadwell() - elseif(TARGET_ARCHITECTURE STREQUAL "haswell") - _haswell() - elseif(TARGET_ARCHITECTURE STREQUAL "ivy-bridge") - _ivybridge() - elseif(TARGET_ARCHITECTURE STREQUAL "sandy-bridge") - _sandybridge() - elseif(TARGET_ARCHITECTURE STREQUAL "westmere") - _westmere() - elseif(TARGET_ARCHITECTURE STREQUAL "nehalem") - _nehalem() - elseif(TARGET_ARCHITECTURE STREQUAL "goldmont") - _goldmont() - elseif(TARGET_ARCHITECTURE STREQUAL "silvermont") - _silvermont() - elseif(TARGET_ARCHITECTURE STREQUAL "atom") - list(APPEND _march_flag_list "atom") - list(APPEND _march_flag_list "core2") - list(APPEND _available_vector_units_list "sse" "sse2" "sse3" "ssse3") - elseif(TARGET_ARCHITECTURE STREQUAL "k8") - list(APPEND _march_flag_list "k8") - list(APPEND _available_vector_units_list "sse" "sse2") - elseif(TARGET_ARCHITECTURE STREQUAL "k8-sse3") - list(APPEND _march_flag_list "k8-sse3") - list(APPEND _march_flag_list "k8") - list(APPEND _available_vector_units_list "sse" "sse2" "sse3") - elseif(TARGET_ARCHITECTURE STREQUAL "AMD 16h") - list(APPEND _march_flag_list "btver2") - list(APPEND _march_flag_list "btver1") - list(APPEND _available_vector_units_list "sse" "sse2" "sse3" "ssse3" "sse4a" "sse4.1" "sse4.2" "avx" "f16c") - elseif(TARGET_ARCHITECTURE STREQUAL "AMD 14h") - list(APPEND _march_flag_list "btver1") - list(APPEND _available_vector_units_list "sse" "sse2" "sse3" "ssse3" "sse4a") - elseif(TARGET_ARCHITECTURE STREQUAL "zen") - list(APPEND _march_flag_list "znver1") - _skylake() - list(APPEND _available_vector_units_list "sse4a") - elseif(TARGET_ARCHITECTURE STREQUAL "piledriver") - list(APPEND _march_flag_list "bdver2") - list(APPEND _march_flag_list "bdver1") - list(APPEND _march_flag_list "bulldozer") - list(APPEND _march_flag_list "barcelona") - list(APPEND _march_flag_list "core2") - list(APPEND _available_vector_units_list "sse" "sse2" "sse3" "ssse3" "sse4a" "sse4.1" "sse4.2" "avx" "xop" "fma4" "fma" "f16c") - elseif(TARGET_ARCHITECTURE STREQUAL "interlagos") - list(APPEND _march_flag_list "bdver1") - list(APPEND _march_flag_list "bulldozer") - list(APPEND _march_flag_list "barcelona") - list(APPEND _march_flag_list "core2") - list(APPEND _available_vector_units_list "sse" "sse2" "sse3" "ssse3" "sse4a" "sse4.1" "sse4.2" "avx" "xop" "fma4") - elseif(TARGET_ARCHITECTURE STREQUAL "bulldozer") - list(APPEND _march_flag_list "bdver1") - list(APPEND _march_flag_list "bulldozer") - list(APPEND _march_flag_list "barcelona") - list(APPEND _march_flag_list "core2") - list(APPEND _available_vector_units_list "sse" "sse2" "sse3" "ssse3" "sse4a" "sse4.1" "sse4.2" "avx" "xop" "fma4") - elseif(TARGET_ARCHITECTURE STREQUAL "barcelona") - list(APPEND _march_flag_list "barcelona") - list(APPEND _march_flag_list "core2") - list(APPEND _available_vector_units_list "sse" "sse2" "sse3" "sse4a") - elseif(TARGET_ARCHITECTURE STREQUAL "istanbul") - list(APPEND _march_flag_list "barcelona") - list(APPEND _march_flag_list "core2") - list(APPEND _available_vector_units_list "sse" "sse2" "sse3" "sse4a") - elseif(TARGET_ARCHITECTURE STREQUAL "magny-cours") - list(APPEND _march_flag_list "barcelona") - list(APPEND _march_flag_list "core2") - list(APPEND _available_vector_units_list "sse" "sse2" "sse3" "sse4a") - elseif(TARGET_ARCHITECTURE STREQUAL "generic") - list(APPEND _march_flag_list "generic") - elseif(TARGET_ARCHITECTURE STREQUAL "none") - # add this clause to remove it from the else clause - else(TARGET_ARCHITECTURE STREQUAL "core") - message(FATAL_ERROR "Unknown target architecture: \"${TARGET_ARCHITECTURE}\". Please set TARGET_ARCHITECTURE to a supported value.") - endif(TARGET_ARCHITECTURE STREQUAL "core") - - if(NOT TARGET_ARCHITECTURE STREQUAL "none") - set(_disable_vector_unit_list) - set(_enable_vector_unit_list) - if(DEFINED Vc_AVX_INTRINSICS_BROKEN AND Vc_AVX_INTRINSICS_BROKEN) - message(STATUS "AVX disabled because of old/broken toolchain") - set(_avx_broken true) - set(_avx2_broken true) - set(_fma4_broken true) - set(_xop_broken true) - else() - set(_avx_broken false) - if(DEFINED Vc_FMA4_INTRINSICS_BROKEN AND Vc_FMA4_INTRINSICS_BROKEN) - message(STATUS "FMA4 disabled because of old/broken toolchain") - set(_fma4_broken true) - else() - set(_fma4_broken false) - endif() - if(DEFINED Vc_XOP_INTRINSICS_BROKEN AND Vc_XOP_INTRINSICS_BROKEN) - message(STATUS "XOP disabled because of old/broken toolchain") - set(_xop_broken true) - else() - set(_xop_broken false) - endif() - if(DEFINED Vc_AVX2_INTRINSICS_BROKEN AND Vc_AVX2_INTRINSICS_BROKEN) - message(STATUS "AVX2 disabled because of old/broken toolchain") - set(_avx2_broken true) - else() - set(_avx2_broken false) - endif() - endif() - - macro(_enable_or_disable _name _flag _documentation _broken) - if(_broken) - set(_found false) - else() - _my_find(_available_vector_units_list "${_flag}" _found) - endif() - set(USE_${_name} ${_found} CACHE BOOL "${documentation}" ${_force}) - mark_as_advanced(USE_${_name}) - if(USE_${_name}) - list(APPEND _enable_vector_unit_list "${_flag}") - else() - list(APPEND _disable_vector_unit_list "${_flag}") - endif() - endmacro() - _enable_or_disable(SSE2 "sse2" "Use SSE2. If SSE2 instructions are not enabled the SSE implementation will be disabled." false) - _enable_or_disable(SSE3 "sse3" "Use SSE3. If SSE3 instructions are not enabled they will be emulated." false) - _enable_or_disable(SSSE3 "ssse3" "Use SSSE3. If SSSE3 instructions are not enabled they will be emulated." false) - _enable_or_disable(SSE4_1 "sse4.1" "Use SSE4.1. If SSE4.1 instructions are not enabled they will be emulated." false) - _enable_or_disable(SSE4_2 "sse4.2" "Use SSE4.2. If SSE4.2 instructions are not enabled they will be emulated." false) - _enable_or_disable(SSE4a "sse4a" "Use SSE4a. If SSE4a instructions are not enabled they will be emulated." false) - _enable_or_disable(AVX "avx" "Use AVX. This will all floating-point vector sizes relative to SSE." _avx_broken) - _enable_or_disable(FMA "fma" "Use FMA." _avx_broken) - _enable_or_disable(BMI2 "bmi2" "Use BMI2." _avx_broken) - _enable_or_disable(AVX2 "avx2" "Use AVX2. This will double all of the vector sizes relative to SSE." _avx2_broken) - _enable_or_disable(XOP "xop" "Use XOP." _xop_broken) - _enable_or_disable(FMA4 "fma4" "Use FMA4." _fma4_broken) - _enable_or_disable(AVX512F "avx512f" "Use AVX512F. This will double all floating-point vector sizes relative to AVX2." false) - _enable_or_disable(AVX512VL "avx512vl" "Use AVX512VL. This enables 128- and 256-bit vector length instructions with EVEX coding (improved write-masking & more vector registers)." _avx2_broken) - _enable_or_disable(AVX512PF "avx512pf" "Use AVX512PF. This enables prefetch instructions for gathers and scatters." false) - _enable_or_disable(AVX512ER "avx512er" "Use AVX512ER. This enables exponential and reciprocal instructions." false) - _enable_or_disable(AVX512CD "avx512cd" "Use AVX512CD." false) - _enable_or_disable(AVX512DQ "avx512dq" "Use AVX512DQ." false) - _enable_or_disable(AVX512BW "avx512bw" "Use AVX512BW." false) - _enable_or_disable(AVX512IFMA "avx512ifma" "Use AVX512IFMA." false) - _enable_or_disable(AVX512VBMI "avx512vbmi" "Use AVX512VBMI." false) - - if(MSVC) - # MSVC on 32 bit can select /arch:SSE2 (since 2010 also /arch:AVX) - # MSVC on 64 bit cannot select anything (should have changed with MSVC 2010) - _my_find(_enable_vector_unit_list "avx2" _found) - if(_found) - AddCompilerFlag("/arch:AVX2" CXX_FLAGS Vc_ARCHITECTURE_FLAGS CXX_RESULT _found) - endif() - if(NOT _found) - _my_find(_enable_vector_unit_list "avx" _found) - if(_found) - AddCompilerFlag("/arch:AVX" CXX_FLAGS Vc_ARCHITECTURE_FLAGS CXX_RESULT _found) - endif() - endif() - if(NOT _found) - _my_find(_enable_vector_unit_list "sse2" _found) - if(_found) - AddCompilerFlag("/arch:SSE2" CXX_FLAGS Vc_ARCHITECTURE_FLAGS) - endif() - endif() - foreach(_flag ${_enable_vector_unit_list}) - string(TOUPPER "${_flag}" _flag) - string(REPLACE "." "_" _flag "__${_flag}__") - add_definitions("-D${_flag}") - endforeach(_flag) - elseif(CMAKE_CXX_COMPILER MATCHES "/(icpc|icc)$") # ICC (on Linux) - set(OFA_map_knl "-xMIC-AVX512") - set(OFA_map_cannonlake "-xCORE-AVX512") - set(OFA_map_skylake-avx512 "-xCORE-AVX512") - set(OFA_map_skylake "-xCORE-AVX2") - set(OFA_map_broadwell "-xCORE-AVX2") - set(OFA_map_haswell "-xCORE-AVX2") - set(OFA_map_ivybridge "-xCORE-AVX-I") - set(OFA_map_sandybridge "-xAVX") - set(OFA_map_westmere "-xSSE4.2") - set(OFA_map_nehalem "-xSSE4.2") - set(OFA_map_penryn "-xSSSE3") - set(OFA_map_merom "-xSSSE3") - set(OFA_map_core2 "-xSSE3") - set(_ok FALSE) - foreach(arch ${_march_flag_list}) - if(DEFINED OFA_map_${arch}) - AddCompilerFlag(${OFA_map_${arch}} CXX_FLAGS Vc_ARCHITECTURE_FLAGS CXX_RESULT _ok) - if(_ok) - break() - endif() - endif() - endforeach() - if(NOT _ok) - # This is the Intel compiler, so SSE2 is a very reasonable baseline. - message(STATUS "Did not recognize the requested architecture flag, falling back to SSE2") - AddCompilerFlag("-xSSE2" CXX_FLAGS Vc_ARCHITECTURE_FLAGS) - endif() - else() # not MSVC and not ICC => GCC, Clang, Open64 - foreach(_flag ${_march_flag_list}) - AddCompilerFlag("-march=${_flag}" CXX_RESULT _good CXX_FLAGS Vc_ARCHITECTURE_FLAGS) - if(_good) - break() - endif(_good) - endforeach(_flag) - foreach(_flag ${_enable_vector_unit_list}) - AddCompilerFlag("-m${_flag}" CXX_RESULT _result) - if(_result) - set(_header FALSE) - if(_flag STREQUAL "sse3") - set(_header "pmmintrin.h") - elseif(_flag STREQUAL "ssse3") - set(_header "tmmintrin.h") - elseif(_flag STREQUAL "sse4.1") - set(_header "smmintrin.h") - elseif(_flag STREQUAL "sse4.2") - set(_header "smmintrin.h") - elseif(_flag STREQUAL "sse4a") - set(_header "ammintrin.h") - elseif(_flag STREQUAL "avx") - set(_header "immintrin.h") - elseif(_flag STREQUAL "avx2") - set(_header "immintrin.h") - elseif(_flag STREQUAL "fma4") - set(_header "x86intrin.h") - elseif(_flag STREQUAL "xop") - set(_header "x86intrin.h") - endif() - set(_resultVar "HAVE_${_header}") - string(REPLACE "." "_" _resultVar "${_resultVar}") - if(_header) - CHECK_INCLUDE_FILE_CXX("${_header}" ${_resultVar} "-m${_flag}") - if(NOT ${_resultVar}) - set(_useVar "USE_${_flag}") - string(TOUPPER "${_useVar}" _useVar) - string(REPLACE "." "_" _useVar "${_useVar}") - message(STATUS "disabling ${_useVar} because ${_header} is missing") - set(${_useVar} FALSE) - list(APPEND _disable_vector_unit_list "${_flag}") - endif() - endif() - if(NOT _header OR ${_resultVar}) - list(APPEND Vc_ARCHITECTURE_FLAGS "-m${_flag}") - endif() - endif() - endforeach(_flag) - foreach(_flag ${_disable_vector_unit_list}) - AddCompilerFlag("-mno-${_flag}" CXX_FLAGS Vc_ARCHITECTURE_FLAGS) - endforeach(_flag) - endif() - endif() -endmacro() - -macro(OFA_HandleArmOptions) - option(USE_NEON "Enable use of NEON instructions" ON) - if(USE_NEON) - AddCompilerFlag(-mfloat-abi=softfp CXX_FLAGS Vc_ARCHITECTURE_FLAGS) - AddCompilerFlag(-mfpu=neon CXX_FLAGS Vc_ARCHITECTURE_FLAGS) - endif() -endmacro() - -macro(OptimizeForArchitecture) - if("${CMAKE_SYSTEM_PROCESSOR}" MATCHES "(x86|AMD64)") - set(TARGET_ARCHITECTURE "auto" CACHE STRING "CPU architecture to optimize for. \ -Using an incorrect setting here can result in crashes of the resulting binary because of invalid instructions used. \ -Setting the value to \"auto\" will try to optimize for the architecture where cmake is called. \ -Other supported values are: \"none\", \"generic\", \"core\", \"merom\" (65nm Core2), \ -\"penryn\" (45nm Core2), \"nehalem\", \"westmere\", \"sandy-bridge\", \"ivy-bridge\", \ -\"haswell\", \"broadwell\", \"skylake\", \"skylake-xeon\", \"kaby-lake\", \"cannonlake\", \"silvermont\", \ -\"goldmont\", \"knl\" (Knights Landing), \"atom\", \"k8\", \"k8-sse3\", \"barcelona\", \ -\"istanbul\", \"magny-cours\", \"bulldozer\", \"interlagos\", \"piledriver\", \ -\"AMD 14h\", \"AMD 16h\", \"zen\".") - elseif("${CMAKE_SYSTEM_PROCESSOR}" MATCHES "(arm|aarch32|aarch64)") - set(TARGET_ARCHITECTURE "auto" CACHE STRING "CPU architecture to optimize for. \ -Using an incorrect setting here can result in crashes of the resulting binary because of invalid instructions used. \ -Setting the value to \"auto\" will try to optimize for the architecture where cmake is called. \ -Other supported values are: \"none\", \"generic\", TODO...") - else() - message(WARNING "The CMAKE_SYSTEM_PROCESSOR '${CMAKE_SYSTEM_PROCESSOR}' is not supported by OptimizeForArchitecture.cmake") - endif() - set(_force) - if(NOT _last_target_arch STREQUAL "${TARGET_ARCHITECTURE}") - message(STATUS "target changed from \"${_last_target_arch}\" to \"${TARGET_ARCHITECTURE}\"") - set(_force FORCE) - endif() - set(_last_target_arch "${TARGET_ARCHITECTURE}" CACHE STRING "" FORCE) - mark_as_advanced(_last_target_arch) - string(TOLOWER "${TARGET_ARCHITECTURE}" TARGET_ARCHITECTURE) - - if(TARGET_ARCHITECTURE STREQUAL "auto") - OFA_AutodetectHostArchitecture() - message(STATUS "Detected Host CPU: ${TARGET_ARCHITECTURE}") - endif() - - if("${CMAKE_SYSTEM_PROCESSOR}" MATCHES "(x86|AMD64)") - OFA_HandleX86Options() - elseif("${CMAKE_SYSTEM_PROCESSOR}" MATCHES "(arm|aarch32|aarch64)") - OFA_HandleArmOptions() - endif() -endmacro(OptimizeForArchitecture) - diff --git a/src/nonFree/sift/CMakeLists.txt b/src/nonFree/sift/CMakeLists.txt index c3b066fc11..123e9ab393 100644 --- a/src/nonFree/sift/CMakeLists.txt +++ b/src/nonFree/sift/CMakeLists.txt @@ -44,7 +44,10 @@ set(FEATS_H set_source_files_properties(${FEATS} ${FEATS_H} PROPERTIES LANGUAGE C) set_source_files_properties(${FEATS_H} PROPERTIES HEADER_FILE_ONLY TRUE) -set(SIMD_DEFINITIONS "-DVL_DISABLE_AVX") +set(SIMD_DEFINITIONS) +if (NOT ALICEVISION_HAVE_AVX) + list(APPEND SIMD_DEFINITIONS "-DVL_DISABLE_AVX") +endif() if (NOT ALICEVISION_HAVE_SSE) list(APPEND SIMD_DEFINITIONS "-DVL_DISABLE_SSE2") endif() From 159e6be66974c3b21e103c72a9d29a5aadaf0c8a Mon Sep 17 00:00:00 2001 From: Philipp Remy Date: Tue, 2 Sep 2025 12:11:04 +0200 Subject: [PATCH 03/40] FIX: Unified handling of OpenMP Instead of setting CFLAGS or CXXFLAGS manually, simply use the CMake targets OpenMP::OpenMP_C and OpenMP::OpenMP_CXX respectively. This applies to dependencies (MeshSD), nonfree (VLSift) and the AliceVision targets itself. Signed-off-by: Philipp Remy --- CMakeLists.txt | 2 +- src/CMakeLists.txt | 12 ++---------- src/aliceVision/lensCorrectionProfile/CMakeLists.txt | 1 + src/aliceVision/system/CMakeLists.txt | 1 + src/dependencies/MeshSDFilter/CMakeLists.txt | 11 ++++------- src/nonFree/sift/CMakeLists.txt | 2 ++ 6 files changed, 11 insertions(+), 18 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 80ed419c96..4e98b46016 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -18,7 +18,7 @@ option(AV_BUILD_ALICEVISION "Enable building of AliceVision" ON) option(AV_EIGEN_MEMORY_ALIGNMENT "Enable Eigen memory alignment" ON) option(ALICEVISION_BUILD_TESTS "Build AliceVision tests" OFF) option(AV_USE_CUDA "Enable CUDA" ON) -option(AV_USE_OPENMP "Enable OpenMP" $<$,OFF,ON>) # disable by default for AppleClang +option(AV_USE_OPENMP "Enable OpenMP" ON) # AppleClang now supports OpenMP, if installed as an external dependency (Homebrew, MacPorts, ...) option(BUILD_SHARED_LIBS "Build shared libraries" ON) option(ALICEVISION_INSTALL_MESHROOM_PLUGIN "Install AliceVision's plugin for Meshroom" ON) diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 575af581c6..38916601df 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -275,16 +275,8 @@ else() # ON OR AUTO endif() if (ALICEVISION_HAVE_OPENMP) - set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${OpenMP_C_FLAGS}") - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OpenMP_CXX_FLAGS}") - if (NOT MSVC) - if ("${CMAKE_CXX_COMPILER_ID}" STREQUAL "Clang") - # for those using the clang with OpenMP support - list(APPEND ALICEVISION_LIBRARY_DEPENDENCIES omp) - else() - list(APPEND ALICEVISION_LIBRARY_DEPENDENCIES gomp) - endif() - endif() + set(ALICEVISION_OPENMP_CXX_TARGETS OpenMP::OpenMP_CXX) + list(APPEND ALICEVISION_LIBRARY_DEPENDENCIES OpenMP::OpenMP_CXX) endif() # ============================================================================== diff --git a/src/aliceVision/lensCorrectionProfile/CMakeLists.txt b/src/aliceVision/lensCorrectionProfile/CMakeLists.txt index ba9a5e9583..fc534a310f 100644 --- a/src/aliceVision/lensCorrectionProfile/CMakeLists.txt +++ b/src/aliceVision/lensCorrectionProfile/CMakeLists.txt @@ -13,4 +13,5 @@ alicevision_add_library(aliceVision_lensCorrectionProfile PRIVATE_LINKS Boost::log expat::expat + ${ALICEVISION_OPENMP_CXX_TARGETS} ) diff --git a/src/aliceVision/system/CMakeLists.txt b/src/aliceVision/system/CMakeLists.txt index 0139df4b62..625669f978 100644 --- a/src/aliceVision/system/CMakeLists.txt +++ b/src/aliceVision/system/CMakeLists.txt @@ -33,6 +33,7 @@ alicevision_add_library(aliceVision_system Boost::system Boost::date_time ${ALICEVISION_NVTX_LIBRARY} + ${ALICEVISION_OPENMP_CXX_TARGETS} PRIVATE_LINKS Boost::boost ) diff --git a/src/dependencies/MeshSDFilter/CMakeLists.txt b/src/dependencies/MeshSDFilter/CMakeLists.txt index 16dc98c4ce..0b65d50681 100644 --- a/src/dependencies/MeshSDFilter/CMakeLists.txt +++ b/src/dependencies/MeshSDFilter/CMakeLists.txt @@ -29,7 +29,8 @@ endif() # Detect OpenMP environment set(OPENMP ON CACHE BOOL "OpenMP") -if (OPENMP) +# Only use OpenMP if enabled at the top level +if (OPENMP AND ALICEVISION_USE_OPENMP) find_package(OpenMP QUIET) if (OPENMP_FOUND) message("OpenMP found. OpenMP activated in release.") @@ -64,7 +65,7 @@ add_executable(MeshSDFilter MeshNormalFilter.h MeshSDFilter.cpp ) -target_link_libraries(MeshSDFilter MeshSDLibrary) +target_link_libraries(MeshSDFilter MeshSDLibrary ${ALICEVISION_OPENMP_CXX_TARGETS}) # Executable for denoising @@ -76,7 +77,7 @@ add_executable(MeshDenoiser MeshNormalDenoising.h MeshDenoiser.cpp ) -target_link_libraries(MeshDenoiser MeshSDLibrary) +target_link_libraries(MeshDenoiser MeshSDLibrary ${ALICEVISION_OPENMP_CXX_TARGETS}) if (OPENMP_FOUND) @@ -84,11 +85,7 @@ if (OPENMP_FOUND) # target_compile_definitions(MeshSDLibrary PUBLIC "$<$:USE_OPENMP>") # target_link_libraries(MeshSDLibrary "$<$:${OpenMP_CXX_FLAGS}>") - target_compile_options(MeshSDFilter PUBLIC "$<$:${OpenMP_CXX_FLAGS}>") target_compile_definitions(MeshSDFilter PUBLIC "$<$:USE_OPENMP>") - target_link_libraries(MeshSDFilter "$<$:${OpenMP_CXX_FLAGS}>") - target_compile_options(MeshDenoiser PUBLIC "$<$:${OpenMP_CXX_FLAGS}>") target_compile_definitions(MeshDenoiser PUBLIC "$<$:USE_OPENMP>") - target_link_libraries(MeshDenoiser "$<$:${OpenMP_CXX_FLAGS}>") endif() diff --git a/src/nonFree/sift/CMakeLists.txt b/src/nonFree/sift/CMakeLists.txt index 123e9ab393..776978fd59 100644 --- a/src/nonFree/sift/CMakeLists.txt +++ b/src/nonFree/sift/CMakeLists.txt @@ -55,6 +55,8 @@ endif() alicevision_add_library(vlsift SOURCES ${FEATS} ${FEATS_H} + PRIVATE_LINKS + ${ALICEVISION_OPENMP_C_TARGETS} PUBLIC_DEFINITIONS ${SIMD_DEFINITIONS} PRIVATE_DEFINITIONS From f1aaa4abc58eab5fcd926806f819b397a002a5c1 Mon Sep 17 00:00:00 2001 From: Philipp Remy Date: Tue, 2 Sep 2025 12:16:38 +0200 Subject: [PATCH 04/40] FEAT: Add support for building Apple Frameworks This commit adds support for compiling library targets into Apple Frameworks on macOS. Embedding resources into the bundle is supported as well. It is checked early if the user attempts to build a universal binary, which is currently unsuppprted due to missing support in the dependency building code. Signed-off-by: Philipp Remy --- CMakeLists.txt | 9 +++++++++ src/cmake/FrameworkInfo.plist.in | 28 ++++++++++++++++++++++++++++ src/cmake/Helpers.cmake | 23 ++++++++++++++++++++++- 3 files changed, 59 insertions(+), 1 deletion(-) create mode 100644 src/cmake/FrameworkInfo.plist.in diff --git a/CMakeLists.txt b/CMakeLists.txt index 4e98b46016..d5a41435cf 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -20,6 +20,9 @@ option(ALICEVISION_BUILD_TESTS "Build AliceVision tests" OFF) option(AV_USE_CUDA "Enable CUDA" ON) option(AV_USE_OPENMP "Enable OpenMP" ON) # AppleClang now supports OpenMP, if installed as an external dependency (Homebrew, MacPorts, ...) option(BUILD_SHARED_LIBS "Build shared libraries" ON) +if(APPLE AND BUILD_SHARED_LIBS) + option(BUILD_APPLE_FRAMEWORKS "Create Frameworks instead of plain dynamic libraries on macOS" ON) +endif() option(ALICEVISION_INSTALL_MESHROOM_PLUGIN "Install AliceVision's plugin for Meshroom" ON) # Global policy section @@ -29,6 +32,12 @@ if (NOT CMAKE_BUILD_TYPE) set(CMAKE_BUILD_TYPE Release CACHE STRING "Build type for AliceVision" FORCE) endif() +# Currently no universal binaries are supported. Fail early. +# FIXME: Enable universal builds by adapting dependency building accordingly. +if(APPLE AND CMAKE_OSX_ARCHITECTURES MATCHES "arm64" AND CMAKE_OSX_ARCHITECTURES MATCHES "x86_64") + message(FATAL_ERROR "Building universal binaries is currently not supported. Please set 'CMAKE_OSX_ARCHITECTURES' to either arm64 (Apple Silicon) or x86_64 (Intel)!") +endif() + # set(CMAKE_BUILD_TYPE "Release" CACHE STRING "Build type for AliceVision") set(DEPS_CMAKE_BUILD_TYPE "Release" CACHE STRING "Build type for all external libraries (only used if ALICEVISION_BUILD_DEPENDENCIES=ON)") string(TOLOWER ${DEPS_CMAKE_BUILD_TYPE} DEPS_CMAKE_BUILD_TYPE_LOWERCASE) diff --git a/src/cmake/FrameworkInfo.plist.in b/src/cmake/FrameworkInfo.plist.in new file mode 100644 index 0000000000..bf24650841 --- /dev/null +++ b/src/cmake/FrameworkInfo.plist.in @@ -0,0 +1,28 @@ + + + + + CFBundleDevelopmentRegion + en + CFBundleExecutable + ${MACOSX_FRAMEWORK_NAME} + CFBundleIdentifier + ${MACOSX_FRAMEWORK_IDENTIFIER} + NSHumanReadableCopyright + Copyright © 2018 - Present AliceVision Contributors + CFBundleInfoDictionaryVersion + ${MACOSX_FRAMEWORK_BUNDLE_VERSION} + CFBundleName + ${MACOSX_FRAMEWORK_BUNDLE_NAME} + CFBundlePackageType + FMWK + CFBundleShortVersionString + ${MACOSX_FRAMEWORK_SHORT_VERSION_STRING} + CFBundleSupportedPlatforms + + MacOSX + + CFBundleVersion + ${MACOSX_FRAMEWORK_BUNDLE_VERSION} + + diff --git a/src/cmake/Helpers.cmake b/src/cmake/Helpers.cmake index ea07f9d293..0946e3c86a 100644 --- a/src/cmake/Helpers.cmake +++ b/src/cmake/Helpers.cmake @@ -5,7 +5,7 @@ function(alicevision_add_library library_name) set(options USE_CUDA) set(singleValues "") - set(multipleValues SOURCES PUBLIC_LINKS PRIVATE_LINKS PUBLIC_INCLUDE_DIRS PRIVATE_INCLUDE_DIRS PUBLIC_DEFINITIONS PRIVATE_DEFINITIONS) + set(multipleValues SOURCES PUBLIC_LINKS PRIVATE_LINKS PUBLIC_INCLUDE_DIRS PRIVATE_INCLUDE_DIRS PUBLIC_DEFINITIONS PRIVATE_DEFINITIONS RESOURCES) cmake_parse_arguments(LIBRARY "${options}" "${singleValues}" "${multipleValues}" ${ARGN}) @@ -111,6 +111,23 @@ function(alicevision_add_library library_name) target_compile_options(${library_name} PUBLIC "/Zc:__cplusplus") endif() + # If building Apple Frameworks, set metadata + if(APPLE AND BUILD_APPLE_FRAMEWORKS) + target_sources(${library_name} PUBLIC ${LIBRARY_RESOURCES}) + set_target_properties(${library_name} PROPERTIES + INSTALL_NAME_DIR "@rpath" + FRAMEWORK TRUE + FRAMEWORK_VERSION A + MACOSX_FRAMEWORK_NAME "${library_name}" + MACOSX_FRAMEWORK_IDENTIFIER org.aliceVision.${library_name} + XCODE_ATTRIBUTE_PRODUCT_BUNDLE_IDENTIFIER "org.aliceVision.${library_name}" + MACOSX_FRAMEWORK_BUNDLE_VERSION "${ALICEVISION_VERSION_MAJOR}.${ALICEVISION_VERSION_MINOR}.${ALICEVISION_VERSION_REVISION}" + MACOSX_FRAMEWORK_SHORT_VERSION_STRING "${ALICEVISION_VERSION_MAJOR}.${ALICEVISION_VERSION_MINOR}" + RESOURCE "${LIBRARY_RESOURCES}" + MACOSX_FRAMEWORK_INFO_PLIST "${ALICEVISION_ROOT}/../src/cmake/FrameworkInfo.plist.in" + ) + endif() + install(TARGETS ${library_name} EXPORT aliceVision-targets ARCHIVE @@ -119,6 +136,10 @@ function(alicevision_add_library library_name) DESTINATION ${CMAKE_INSTALL_LIBDIR} RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR} + FRAMEWORK + DESTINATION ${CMAKE_INSTALL_LIBDIR} + RESOURCE + DESTINATION ${CMAKE_INSTALL_DATADIR}/aliceVision ) endfunction() From 33af124231b05c7841fa27a71b4a1b59d50fe472 Mon Sep 17 00:00:00 2001 From: Philipp Remy Date: Tue, 2 Sep 2025 18:13:57 +0200 Subject: [PATCH 05/40] FIX: Executable file names Executable file names are suffixed with their respective versions in the format "MAJOR.MINOR". This is fine as long as the executable (as on Windows) has a proper file extension. On macOS (and other Unix OSes as well), the appended version suffix could be falsely interpreted as a file extension. Change "MAJOR.MINOR" to "MAJOR_MINOR" formatting. Signed-off-by: Philipp Remy --- src/cmake/Helpers.cmake | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/cmake/Helpers.cmake b/src/cmake/Helpers.cmake index 0946e3c86a..f214fed08b 100644 --- a/src/cmake/Helpers.cmake +++ b/src/cmake/Helpers.cmake @@ -252,7 +252,7 @@ function(alicevision_add_software software_name) set_target_properties(${software_name}_exe PROPERTIES SOVERSION ${ALICEVISION_SOFTWARE_VERSION_MAJOR} - VERSION "${ALICEVISION_SOFTWARE_VERSION_MAJOR}.${ALICEVISION_SOFTWARE_VERSION_MINOR}" + VERSION "${ALICEVISION_SOFTWARE_VERSION_MAJOR}_${ALICEVISION_SOFTWARE_VERSION_MINOR}" ) install(TARGETS ${software_name}_exe From e816c995f9d3c48ea3f6590e4f2ce13369d7363f Mon Sep 17 00:00:00 2001 From: Philipp Remy Date: Sun, 7 Sep 2025 12:56:34 +0200 Subject: [PATCH 06/40] FIX: Remove any CUDA logic on Apple targets CUDA has long been removed and even longer been deprecated on Apple platforms. Guard any CUDA functionality behind if(APPLE) statements by default so CUDA won't be used on Apple targets. Signed-off-by: Philipp Remy --- CMakeLists.txt | 6 +++++- src/CMakeLists.txt | 6 +++++- src/cmake/Dependencies.cmake | 4 +++- 3 files changed, 13 insertions(+), 3 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index d5a41435cf..9fa5b9336d 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -17,8 +17,12 @@ option(ALICEVISION_BUILD_DEPENDENCIES "Build all AliceVision dependencies" OFF) option(AV_BUILD_ALICEVISION "Enable building of AliceVision" ON) option(AV_EIGEN_MEMORY_ALIGNMENT "Enable Eigen memory alignment" ON) option(ALICEVISION_BUILD_TESTS "Build AliceVision tests" OFF) -option(AV_USE_CUDA "Enable CUDA" ON) option(AV_USE_OPENMP "Enable OpenMP" ON) # AppleClang now supports OpenMP, if installed as an external dependency (Homebrew, MacPorts, ...) +if(APPLE) + option(AV_USE_CUDA "Enable CUDA" OFF) +else() + option(AV_USE_CUDA "Enable CUDA" ON) +endif() option(BUILD_SHARED_LIBS "Build shared libraries" ON) if(APPLE AND BUILD_SHARED_LIBS) option(BUILD_APPLE_FRAMEWORKS "Create Frameworks instead of plain dynamic libraries on macOS" ON) diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 38916601df..c657177232 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -67,7 +67,11 @@ trilean_option(ALICEVISION_USE_ALEMBIC "Enable Alembic I/O" AUTO) trilean_option(ALICEVISION_USE_UNCERTAINTYTE "Enable Uncertainty computation" AUTO) trilean_option(ALICEVISION_USE_ONNX "Enable ONNX Runtime" AUTO) option(ALICEVISION_USE_ONNX_GPU "Use CUDA with ONNX Runtime" ON) -trilean_option(ALICEVISION_USE_CUDA "Enable CUDA" ON) +if(APPLE) + trilean_option(ALICEVISION_USE_CUDA "Enable CUDA" OFF) +else() + trilean_option(ALICEVISION_USE_CUDA "Enable CUDA" ON) +endif() trilean_option(ALICEVISION_USE_OPENCV "Enable use of OpenCV algorithms" OFF) trilean_option(ALICEVISION_USE_OPENCV_CONTRIB "Enable use of OpenCV algorithms from extra modules" AUTO) option(ALICEVISION_USE_OCVSIFT "Add or not OpenCV SIFT in available features" OFF) diff --git a/src/cmake/Dependencies.cmake b/src/cmake/Dependencies.cmake index 5f3791fe6b..0699c42e9a 100644 --- a/src/cmake/Dependencies.cmake +++ b/src/cmake/Dependencies.cmake @@ -6,7 +6,9 @@ set(AV_BUILD_DEPENDENCIES_PARALLEL 1 ) set(AV_ONNX_APPLE_ARCH "arm64" CACHE STRING "Version to download OFF Apple [arm64, x86_64]") -option(AV_BUILD_CUDA "Enable building an embedded Cuda" OFF) +if(NOT APPLE) + option(AV_BUILD_CUDA "Enable building an embedded Cuda" OFF) +endif() option(AV_BUILD_ZLIB "Enable building an embedded ZLIB" OFF) option(AV_BUILD_ASSIMP "Enable building an embedded ASSIMP" ON) option(AV_BUILD_TIFF "Enable building an embedded Tiff" ON) From d19b4e5e0600a93958bb6f66581dc6b28498fda4 Mon Sep 17 00:00:00 2001 From: Philipp Remy Date: Sun, 7 Sep 2025 13:46:17 +0200 Subject: [PATCH 07/40] FIX: Dependencies: Use new behavior of policy CMP0135 Setting CMP0135 to NEW resolves a bunch of warnings and allows for proper rebuilds if the URL of a dependencies changed. Signed-off-by: Philipp Remy --- src/cmake/Dependencies.cmake | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/cmake/Dependencies.cmake b/src/cmake/Dependencies.cmake index 0699c42e9a..e49894633a 100644 --- a/src/cmake/Dependencies.cmake +++ b/src/cmake/Dependencies.cmake @@ -1,6 +1,9 @@ #Build rules for all dependencies include(ExternalProject) +# Set CMP0135 to properly rebuild if URLs change +cmake_policy(SET CMP0135 NEW) + set(AV_BUILD_DEPENDENCIES_PARALLEL 1 CACHE STRING "Number of cores to use when building dependencies (0 - use the number of cores of the processor)" ) From 3c6ac8bf617e7422234ea1a86eb4cf448cd6ce4f Mon Sep 17 00:00:00 2001 From: Philipp Remy Date: Sun, 7 Sep 2025 12:53:13 +0200 Subject: [PATCH 08/40] FEAT: Dependencies: Allow building an embedded OpenMP library This is mainly useful for Apple targets (and therefore only enabled for Apple targets by default) where the system compiler (aka AppleClang) supports OpenMP but does not ship the required library and headers. Signed-off-by: Philipp Remy --- src/cmake/Dependencies.cmake | 36 ++++++++++++++++++++++++++++++++++-- 1 file changed, 34 insertions(+), 2 deletions(-) diff --git a/src/cmake/Dependencies.cmake b/src/cmake/Dependencies.cmake index e49894633a..f192a4ba3b 100644 --- a/src/cmake/Dependencies.cmake +++ b/src/cmake/Dependencies.cmake @@ -12,6 +12,11 @@ set(AV_ONNX_APPLE_ARCH "arm64" CACHE STRING "Version to download OFF Apple [arm6 if(NOT APPLE) option(AV_BUILD_CUDA "Enable building an embedded Cuda" OFF) endif() +if(APPLE) + option(AV_BUILD_OPENMP "Enable building an embedded OpenMP" ON) +else() + option(AV_BUILD_OPENMP "Enable building an embedded OpenMP" OFF) +endif() option(AV_BUILD_ZLIB "Enable building an embedded ZLIB" OFF) option(AV_BUILD_ASSIMP "Enable building an embedded ASSIMP" ON) option(AV_BUILD_TIFF "Enable building an embedded Tiff" ON) @@ -56,6 +61,7 @@ endif() ##########LOGGING#########"" message(STATUS "") +message(STATUS "AV_BUILD_OPENMP: ${AV_BUILD_OPENMP}") message(STATUS "AV_BUILD_CUDA: ${AV_BUILD_CUDA}") message(STATUS "AV_BUILD_ZLIB: ${AV_BUILD_ZLIB}") message(STATUS "AV_BUILD_ASSIMP: ${AV_BUILD_ASSIMP}") @@ -109,10 +115,35 @@ set(CMAKE_CORE_BUILD_FLAGS -DCMAKE_C_COMPILER=${CMAKE_C_COMPILER} -DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER} -DCMAKE_CXX_STANDARD=20 + -DCMAKE_OSX_ARCHITECTURES=${CMAKE_OSX_ARCHITECTURES} ) #### START EXTERNAL #### +if(AV_BUILD_OPENMP) + set(OPENMP_TARGET OpenMP) + ExternalProject_Add(${OPENMP_TARGET} + URL https://github.com/llvm/llvm-project/archive/refs/tags/llvmorg-20.1.8.zip + URL_HASH MD5=3b667447bfc7f17e34f91bbeab030e82 + DOWNLOAD_DIR ${BUILD_DIR}/download/${OPENMP_TARGET} + PREFIX ${BUILD_DIR} + BUILD_IN_SOURCE 0 + BUILD_ALWAYS 0 + UPDATE_COMMAND "" + SOURCE_DIR ${CMAKE_CURRENT_BINARY_DIR}/${OPENMP_TARGET} + BINARY_DIR ${BUILD_DIR}/${OPENMP_TARGET}_build + INSTALL_DIR ${CMAKE_INSTALL_PREFIX} + CONFIGURE_COMMAND + ${CMAKE_COMMAND} + ${CMAKE_CORE_BUILD_FLAGS} + -DCMAKE_INSTALL_PREFIX:PATH= + -DLIBOMP_ENABLE_SHARED=ON + -DLIBOMP_ENABLE_STATIC=OFF + /openmp + BUILD_COMMAND $(MAKE) -j${AV_BUILD_DEPENDENCIES_PARALLEL} + ) +endif() + if(AV_BUILD_ZLIB) set(ZLIB_TARGET zlib) @@ -606,7 +637,7 @@ if(AV_BUILD_LIBRAW) -DINSTALL_CMAKE_MODULE_PATH:PATH=/cmake BUILD_COMMAND $(MAKE) -j${AV_BUILD_DEPENDENCIES_PARALLEL} - DEPENDS libraw_cmake ${ZLIB_TARGET} + DEPENDS libraw_cmake ${ZLIB_TARGET} ${OPENMP_TARGET} ) set(LIBRAW_CMAKE_FLAGS @@ -814,7 +845,7 @@ if(AV_BUILD_PCL) ${ZLIB_CMAKE_FLAGS} -DCMAKE_INSTALL_PREFIX:PATH= BUILD_COMMAND $(MAKE) -j${AV_BUILD_DEPENDENCIES_PARALLEL} - DEPENDS ${FLANN_TARGET} ${LZ4_TARGET} ${EIGEN_TARGET} ${BOOST_TARGET} ${PNG_TARGET} ${CUDA_TARGET} ${ZLIB_TARGET} + DEPENDS ${FLANN_TARGET} ${LZ4_TARGET} ${EIGEN_TARGET} ${BOOST_TARGET} ${PNG_TARGET} ${CUDA_TARGET} ${ZLIB_TARGET} ${OPENMP_TARGET} ) set(PCL_CMAKE_FLAGS -DPCL_DIR:PATH=${CMAKE_INSTALL_PREFIX}/share/pcl-1.12/) @@ -1423,6 +1454,7 @@ if(AV_BUILD_OPENMESH) endif() set(AV_DEPS + ${OPENMP_TARGET} ${ZLIB_TARGET} ${ASSIMP_TARGET} ${GEOGRAM_TARGET} From 65ca963150dad45022d02f6a89835f413c5cc2dd Mon Sep 17 00:00:00 2001 From: Philipp Remy Date: Sun, 7 Sep 2025 12:59:23 +0200 Subject: [PATCH 09/40] FIX: Dependencies: Only build PopSIFT if CUDA is available PopSIFT has a hard dependency on CUDA being available. Remove it from the target list if CUDA is disabled by the project (by setting AV_USE_CUDA to OFF). Signed-off-by: Philipp Remy --- src/cmake/Dependencies.cmake | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/cmake/Dependencies.cmake b/src/cmake/Dependencies.cmake index f192a4ba3b..e988825021 100644 --- a/src/cmake/Dependencies.cmake +++ b/src/cmake/Dependencies.cmake @@ -23,7 +23,11 @@ option(AV_BUILD_TIFF "Enable building an embedded Tiff" ON) option(AV_BUILD_JPEG "Enable building an embedded Jpeg" ON) option(AV_BUILD_PNG "Enable building an embedded Png" ON) option(AV_BUILD_LIBRAW "Enable building an embedded libraw" ON) -option(AV_BUILD_POPSIFT "Enable building an embedded PopSift" ON) +if(AV_USE_CUDA) + option(AV_BUILD_POPSIFT "Enable building an embedded PopSift" ON) +else() + option(AV_BUILD_POPSIFT "Enable building an embedded PopSift" OFF) +endif() option(AV_BUILD_CCTAG "Enable building an embedded CCTag" ON) option(AV_BUILD_APRILTAG "Enable building an embedded AprilTag" ON) option(AV_BUILD_OPENCV "Enable building an embedded OpenCV" ON) From 38c52b128d59cb13e442032241c96a47a98d9652 Mon Sep 17 00:00:00 2001 From: Philipp Remy Date: Sun, 7 Sep 2025 13:01:19 +0200 Subject: [PATCH 10/40] PATCH: Dependencies: Do not build BLAS/LAPACK and SuiteSparse on Apple targets On Apple targets, optimized (=faster) implementations of BLAS/LAPACK and Sparse Solvers (since 2017) are available in Accelerate.framework (a system-provided Framework existent on every machine by default). BLAS and LAPACK aree used by multiple dependencies, each of them being capable of handling Apple's Accelerate.framework. SuiteSparse is *one* possible backend for Sparse Solvers in Ceres, but it can use 'AccelerateSparse' (= Accelerate.framework) as an alternative. During testing, no performance difference was noticable. This relieves Apple targets from needing to have a Fortran compiler installed and reduces final bundle size by ~160MB (GraphBLAS in SuiteSparse is *huge*). Signed-off-by: Philipp Remy --- src/cmake/Dependencies.cmake | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/src/cmake/Dependencies.cmake b/src/cmake/Dependencies.cmake index e988825021..72c86470f0 100644 --- a/src/cmake/Dependencies.cmake +++ b/src/cmake/Dependencies.cmake @@ -32,8 +32,13 @@ option(AV_BUILD_CCTAG "Enable building an embedded CCTag" ON) option(AV_BUILD_APRILTAG "Enable building an embedded AprilTag" ON) option(AV_BUILD_OPENCV "Enable building an embedded OpenCV" ON) option(AV_BUILD_ONNXRUNTIME "Enable building an embedded ONNX runtime" ON) -option(AV_BUILD_LAPACK "Enable building an embedded Lapack" ON) -option(AV_BUILD_SUITESPARSE "Enable building an embedded SuiteSparse" ON) +if(APPLE) + option(AV_BUILD_LAPACK "Enable building an embedded Lapack" OFF) # On Darwin targets, BLAS/LAPACK is provided by Accelerate.framework + option(AV_BUILD_SUITESPARSE "Enable building an embedded SuiteSparse" OFF) # On Darwin targets, Sparse Solvers are provided by Accelerate.framework (only used by Ceres) +else() + option(AV_BUILD_LAPACK "Enable building an embedded Lapack" ON) + option(AV_BUILD_SUITESPARSE "Enable building an embedded SuiteSparse" ON) +endif() option(AV_BUILD_FFMPEG "Enable building an embedded FFMpeg" ON) option(AV_BUILD_VPX "Enable building an embedded libvpx required for ffmpeg" ON) option(AV_BUILD_COINUTILS "Enable building an embedded CoinUtils" ON) From 7f83d39c358b94087448f89f1c5d21ed6b00a596 Mon Sep 17 00:00:00 2001 From: Philipp Remy Date: Mon, 22 Sep 2025 20:26:52 +0200 Subject: [PATCH 11/40] PATCH: Dependencies: Disable SuiteSparse when building an embedded Ceres on Apple targets Ceres can utilize the system Accelerate.framework instead of using SuiteSparse to enable fast Sparse Solvers. Therefore, don't require SuiteSparse on Apple platforms. Signed-off-by: Philipp Remy --- src/cmake/Dependencies.cmake | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/cmake/Dependencies.cmake b/src/cmake/Dependencies.cmake index 72c86470f0..fa780ab449 100644 --- a/src/cmake/Dependencies.cmake +++ b/src/cmake/Dependencies.cmake @@ -1349,7 +1349,7 @@ if(AV_BUILD_CERES) ${CMAKE_COMMAND} ${CMAKE_CORE_BUILD_FLAGS} ${SUITESPARSE_CMAKE_FLAGS} - -DSUITESPARSE:BOOL=ON + -DSUITESPARSE:BOOL=$,OFF,ON> -DLAPACK:BOOL=ON ${EIGEN_CMAKE_FLAGS} -DMINIGLOG=ON From 4d887b084c71a0eb384e6a1b7814668c1c60ba0b Mon Sep 17 00:00:00 2001 From: Philipp Remy Date: Sun, 7 Sep 2025 13:05:57 +0200 Subject: [PATCH 12/40] PATCH: Dependencies: Use CMAKE_OSX_ARCHITECTURES when building an embedded Geogram As we can now safely rely on CMAKE_OSX_ARCHITECTURES when determining which architecture to build on Apple targets, use CMAKE_OSX_ARCHITECTURES instead of CMAKE_SYSTEM_PROCESSOR when setting the Vorpaline Platforms. This enables cross-compiling Geogram on Apple targets from x86_64 to arm64 and vice-versa. Signed-off-by: Philipp Remy --- src/cmake/Dependencies.cmake | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/cmake/Dependencies.cmake b/src/cmake/Dependencies.cmake index fa780ab449..29f97babaa 100644 --- a/src/cmake/Dependencies.cmake +++ b/src/cmake/Dependencies.cmake @@ -232,12 +232,12 @@ if(AV_BUILD_GEOGRAM) if(WIN32) set(VORPALINE_PLATFORM_FLAGS -DVORPALINE_PLATFORM=Win-vs-dynamic-generic) elseif(APPLE) - if(CMAKE_SYSTEM_PROCESSOR MATCHES "x86_64") + if(CMAKE_OSX_ARCHITECTURES MATCHES "x86_64") set(VORPALINE_PLATFORM_FLAGS -DVORPALINE_PLATFORM=Darwin-clang-dynamic) - elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "aarch64" OR CMAKE_SYSTEM_PROCESSOR MATCHES "arm64") + elseif(CMAKE_OSX_ARCHITECTURES MATCHES "arm64") set(VORPALINE_PLATFORM_FLAGS -DVORPALINE_PLATFORM=Darwin-aarch64-clang-dynamic) else() - message(FATAL_ERROR "Encountered unsupported CMAKE_SYSTEM_PROCESSOR value when trying to set VORPALINE_PLATFORM for Geogram! Supported architectures are x86_64 and aarch64/arm64.") + message(FATAL_ERROR "Encountered unsupported CMAKE_OSX_ARCHITECTURES value when trying to set VORPALINE_PLATFORM for Geogram! Supported architectures are x86_64 and aarch64/arm64.") endif() elseif(UNIX) # Assumes Linux if(CMAKE_SYSTEM_PROCESSOR MATCHES "x86_64") From 1a2020fb4fb5df5605cf27e2ac8bc534372bdc73 Mon Sep 17 00:00:00 2001 From: Philipp Remy Date: Fri, 17 Oct 2025 14:32:44 +0200 Subject: [PATCH 13/40] FIX: Dependencies: Bump version of libtiff This commit introduces two changes: (1) It bumps libtiff from v4.5.0 to v4.7.1 (appears to be ABI and API compatible) (2) It switches to the CMake build system in the libtiff tree. This change was required as the configure script caused the install_name on Apple targets to be a hardcoded absolute path instead of the recommended @rpath approach, making this library completely unportable. With the new CMake build system @rpath is properly used. Signed-off-by: Philipp Remy --- src/cmake/Dependencies.cmake | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/src/cmake/Dependencies.cmake b/src/cmake/Dependencies.cmake index 29f97babaa..aa966190ad 100644 --- a/src/cmake/Dependencies.cmake +++ b/src/cmake/Dependencies.cmake @@ -504,8 +504,8 @@ if(AV_BUILD_TIFF) set(TIFF_TARGET tiff) ExternalProject_Add(${TIFF_TARGET} - URL http://download.osgeo.org/libtiff/tiff-4.5.0.tar.gz - URL_HASH MD5=db9e220a1971acc64487f1d51a20dcaa + URL https://download.osgeo.org/libtiff/tiff-4.7.1.tar.xz + URL_HASH MD5=f1524d2d57d93e8a521c30e3a56b99e6 DOWNLOAD_DIR ${BUILD_DIR}/download/tiff PREFIX ${BUILD_DIR} BUILD_IN_SOURCE 0 @@ -514,11 +514,14 @@ if(AV_BUILD_TIFF) SOURCE_DIR ${CMAKE_CURRENT_BINARY_DIR}/tiff BINARY_DIR ${BUILD_DIR}/tiff_build INSTALL_DIR ${CMAKE_INSTALL_PREFIX} - CONFIGURE_COMMAND /configure - --prefix= - --disable-tests - --disable-docs - --disable-tools + CONFIGURE_COMMAND + ${CMAKE_COMMAND} + ${CMAKE_CORE_BUILD_FLAGS} + -Dtiff-tools=OFF + -Dtiff-tests=OFF + -Dtiff-docs=OFF + -DCMAKE_INSTALL_PREFIX= + BUILD_COMMAND $(MAKE) -j${AV_BUILD_DEPENDENCIES_PARALLEL} INSTALL_COMMAND $(MAKE) install DEPENDS ${ZLIB_TARGET} From 4648eb8d751b7c1cede917d276daa7f780f7c6e9 Mon Sep 17 00:00:00 2001 From: Philipp Remy Date: Sun, 7 Sep 2025 14:19:28 +0200 Subject: [PATCH 14/40] FIX: Dependencies: Bump version for libPNG This commit introduces two changes: (1) Bumps the version of libPNG from 1.6.39 to 1.6.50 (appears to be ABI and API compatible), as 1.6.39 requires a non-existing fp16.h header on macOS. (2) Sets DPNG_ARM_NEON to ON, if an ARM processor is used. NEON is broadly supported on reasonably modern ARM CPUs. Signed-off-by: Philipp Remy --- src/cmake/Dependencies.cmake | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/src/cmake/Dependencies.cmake b/src/cmake/Dependencies.cmake index aa966190ad..85ed8b3a77 100644 --- a/src/cmake/Dependencies.cmake +++ b/src/cmake/Dependencies.cmake @@ -535,8 +535,10 @@ endif() if(AV_BUILD_PNG) # Add LibPng - if(${CMAKE_SYSTEM_PROCESSOR} MATCHES "arm") - set(AV_PNG_ARM_NEON OFF) + if(${CMAKE_SYSTEM_PROCESSOR} MATCHES "arm") + # Enable ARM NEON on ARM CPUs, as it should be + # supported on any reasonably modern ARM CPU + set(AV_PNG_ARM_NEON on) else() set(AV_PNG_ARM_NEON off) endif() @@ -545,8 +547,8 @@ if(AV_BUILD_PNG) ExternalProject_Add( ${PNG_TARGET} - URL https://download.sourceforge.net/libpng/libpng-1.6.39.tar.gz - URL_HASH MD5=93b8e79a008747e70f7704f600349559 + URL https://github.com/pnggroup/libpng/archive/refs/tags/v1.6.50.tar.gz + URL_HASH MD5=4a6433f54317b8f0d4cb749c09d4eff2 DOWNLOAD_DIR ${BUILD_DIR}/download/libpng PREFIX ${BUILD_DIR} BUILD_IN_SOURCE 0 From 298a07875053983f76bb8c42557fe75e2bd507ac Mon Sep 17 00:00:00 2001 From: Philipp Remy Date: Sun, 7 Sep 2025 14:26:34 +0200 Subject: [PATCH 15/40] FIX: Dependencies: Bump version of libVPX Bump version of libVPX from v1.13.0 to v1.15.2 (appears to be ABI and API compatible), because v1.13.0 uses Linux specific linker flags on Apple targets. Signed-off-by: Philipp Remy --- src/cmake/Dependencies.cmake | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/cmake/Dependencies.cmake b/src/cmake/Dependencies.cmake index 85ed8b3a77..251f6f3fea 100644 --- a/src/cmake/Dependencies.cmake +++ b/src/cmake/Dependencies.cmake @@ -704,7 +704,7 @@ if(AV_BUILD_FFMPEG) ExternalProject_add(${VPX_TARGET} GIT_REPOSITORY https://chromium.googlesource.com/webm/libvpx.git - GIT_TAG v1.13.0 + GIT_TAG v1.15.2 GIT_PROGRESS OFF PREFIX ${BUILD_DIR} BUILD_IN_SOURCE 0 From d595dd60c8cd3334d97efadeae9c57a0ef73bdb2 Mon Sep 17 00:00:00 2001 From: Philipp Remy Date: Wed, 10 Sep 2025 09:20:29 +0200 Subject: [PATCH 16/40] FIX: Dependencies: Bump ffmpeg version Bump ffmpeg version from v5.1.2 to v7.1.1. This should be fine, as OpenCV and OpenImageIO appear to be the only dependants of ffmpeg, with both having support for ffmpeg 7.X. This fixes an issue on x86_64 Linux where inline assembly failed to compile due to wrong syntax (at least with GCC 13). Signed-off-by: Philipp Remy --- src/cmake/Dependencies.cmake | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/cmake/Dependencies.cmake b/src/cmake/Dependencies.cmake index 251f6f3fea..cea8e8459d 100644 --- a/src/cmake/Dependencies.cmake +++ b/src/cmake/Dependencies.cmake @@ -720,8 +720,8 @@ if(AV_BUILD_FFMPEG) set(FFMPEG_TARGET ffmpeg) ExternalProject_add(${FFMPEG_TARGET} - URL http://ffmpeg.org/releases/ffmpeg-5.1.2.tar.bz2 - URL_HASH MD5=53ce2a391fe1db4b5ce5c43b9ea9a814 + URL https://www.ffmpeg.org/releases/ffmpeg-7.1.1.tar.xz + URL_HASH MD5=26f2bd7d20c6c616f31d7130c88d7250 DOWNLOAD_DIR ${BUILD_DIR}/download/ffmpeg PREFIX ${BUILD_DIR} BUILD_IN_SOURCE 0 From f48b07ae4583ccdad5af5e9b0e09d265983393ac Mon Sep 17 00:00:00 2001 From: Philipp Remy Date: Wed, 17 Sep 2025 18:15:54 +0200 Subject: [PATCH 17/40] PATCH: Dependencies: When fetching ONNXRuntime, match the specified OSX Architecture This patch automatically selects the correct ONNXRuntime binary, based on CMAKE_OSX_ARCHITECTURES (which is used throughout the whole CMake infrastructure) and allows for cross-compiling macOS for x86_64 and vice-versa. Signed-off-by: Philipp Remy --- src/cmake/Dependencies.cmake | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/cmake/Dependencies.cmake b/src/cmake/Dependencies.cmake index cea8e8459d..f77f403924 100644 --- a/src/cmake/Dependencies.cmake +++ b/src/cmake/Dependencies.cmake @@ -7,7 +7,7 @@ cmake_policy(SET CMP0135 NEW) set(AV_BUILD_DEPENDENCIES_PARALLEL 1 CACHE STRING "Number of cores to use when building dependencies (0 - use the number of cores of the processor)" ) -set(AV_ONNX_APPLE_ARCH "arm64" CACHE STRING "Version to download OFF Apple [arm64, x86_64]") +set(AV_ONNX_APPLE_ARCH ${CMAKE_OSX_ARCHITECTURES} CACHE STRING "Version to download OFF Apple [arm64, x86_64]") if(NOT APPLE) option(AV_BUILD_CUDA "Enable building an embedded Cuda" OFF) From 4656cf9c1df0e905d17f6bf25ba78777b127906c Mon Sep 17 00:00:00 2001 From: Philipp Remy Date: Sun, 7 Sep 2025 14:30:54 +0200 Subject: [PATCH 18/40] FIX: Dependencies: FLANN pkgconfig directories Currently, pkgconfig only looks in */lib64/* directories, causing FLANN to not find lz4 on Apple targets. This might work on Linux, but on Apple platforms the differentiation between lib64 and lib does not existby default. This commit allows pkgconfig to look in */lib/* as well, as this is the default installation directory on macOS. The path is prepended, so on Linux targets */lib64/* should still take precedence. Signed-off-by: Philipp Remy --- src/cmake/Dependencies.cmake | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/cmake/Dependencies.cmake b/src/cmake/Dependencies.cmake index f77f403924..a0fec0a38f 100644 --- a/src/cmake/Dependencies.cmake +++ b/src/cmake/Dependencies.cmake @@ -781,7 +781,7 @@ if(AV_BUILD_FLANN) BINARY_DIR ${BUILD_DIR}/${FLANN_TARGET}_build INSTALL_DIR ${CMAKE_INSTALL_PREFIX} CONFIGURE_COMMAND - ${CMAKE_COMMAND} -E env PKG_CONFIG_PATH=${CMAKE_INSTALL_PREFIX}/lib64/pkgconfig/ + ${CMAKE_COMMAND} -E env PKG_CONFIG_PATH="${CMAKE_INSTALL_PREFIX}/lib64/pkgconfig:${CMAKE_INSTALL_PREFIX}/lib/pkgconfig" ${CMAKE_COMMAND} ${CMAKE_CORE_BUILD_FLAGS} -DBUILD_C_BINDINGS:BOOL=OFF From cbaa3af71a7ca153c87086bab142b4ac50394f7a Mon Sep 17 00:00:00 2001 From: Philipp Remy Date: Sun, 7 Sep 2025 19:28:17 +0200 Subject: [PATCH 19/40] FEAT: Dependencies: Allow building an embedded XercesC library XercesC is a required hard dependency of libE57Format and it cannot build as an embedded project. Allow the user to rely on building a XercesC library as a dependency. Because it is *only* required by libE57Format, make it a cmake_dependent_option() based on whether libE57Format should be build. Signed-off-by: Philipp Remy --- src/cmake/Dependencies.cmake | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) diff --git a/src/cmake/Dependencies.cmake b/src/cmake/Dependencies.cmake index a0fec0a38f..deb2c3abe1 100644 --- a/src/cmake/Dependencies.cmake +++ b/src/cmake/Dependencies.cmake @@ -63,6 +63,11 @@ option(AV_BUILD_CERES "Enable building an embedded Ceres library" ON) option(AV_BUILD_SWIG "Enable building an embedded SWIG library" ON) option(AV_BUILD_PYBIND11 "Enable building of pybind11 library" OFF) option(AV_BUILD_OPENMESH "Enable building an embedded OpenMesh library" ON) +if(AV_BUILD_E57FORMAT) + option(AV_BUILD_XERCESC "Enable building an embedded XercesC library" ON) +else() + option(AV_BUILD_XERCESC "Enable building an embedded XercesC library" OFF) +endif() if(AV_BUILD_DEPENDENCIES_PARALLEL EQUAL 0) cmake_host_system_information(RESULT AV_BUILD_DEPENDENCIES_PARALLEL QUERY NUMBER_OF_LOGICAL_CORES) @@ -112,6 +117,7 @@ message(STATUS "AV_BUILD_OPENIMAGEIO ${AV_BUILD_OPENIMAGEIO}") message(STATUS "AV_BUILD_CERES ${AV_BUILD_CERES}") message(STATUS "AV_BUILD_SWIG ${AV_BUILD_SWIG}") message(STATUS "AV_BUILD_OPENMESH ${AV_BUILD_OPENMESH}") +message(STATUS "AV_BUILD_XERCESC ${AV_BUILD_XERCESC}") message(STATUS "AV_BUILD_DEPENDENCIES_PARALLEL: ${AV_BUILD_DEPENDENCIES_PARALLEL}") ##########END LOGGING#########" @@ -1416,6 +1422,29 @@ if(AV_BUILD_SWIG) ) endif() +if(AV_BUILD_XERCESC) + set(XERCESC_TARGET XercesC) + + ExternalProject_add(${XERCESC_TARGET} + URL https://dlcdn.apache.org//xerces/c/3/sources/xerces-c-3.3.0.tar.gz + URL_HASH MD5=1b7778f47d5eab1644f59c87ed06ac19 + DOWNLOAD_DIR ${BUILD_DIR}/download/${XERCESC_TARGET} + PREFIX ${BUILD_DIR} + BUILD_IN_SOURCE 0 + BUILD_ALWAYS 0 + UPDATE_COMMAND "" + SOURCE_DIR ${CMAKE_CURRENT_BINARY_DIR}/${XERCESC_TARGET} + BINARY_DIR ${BUILD_DIR}/${XERCESC_TARGET}_build + INSTALL_DIR ${CMAKE_INSTALL_PREFIX} + CONFIGURE_COMMAND ${CMAKE_COMMAND} + ${CMAKE_CORE_BUILD_FLAGS} + -DCMAKE_INSTALL_PREFIX:PATH= + BUILD_COMMAND $(MAKE) -j${AV_BUILD_DEPENDENCIES_PARALLEL} + ) + + set(XERCESC_CMAKE_FLAGS -DXercesC_DIR:PATH=${CMAKE_INSTALL_PREFIX}/lib/XercesC) +endif() + if(AV_BUILD_E57FORMAT) # Add libE57Format set(E57FORMAT_TARGET E57Format) @@ -1434,8 +1463,10 @@ if(AV_BUILD_E57FORMAT) CONFIGURE_COMMAND ${CMAKE_COMMAND} -DE57_BUILD_TEST:BOOL=OFF -DBUILD_SHARED_LIBS:BOOL=ON + ${XERCESC_CMAKE_FLAGS} -DCMAKE_INSTALL_PREFIX:PATH= BUILD_COMMAND $(MAKE) -j${AV_BUILD_DEPENDENCIES_PARALLEL} + DEPENDS ${XERCESC_TARGET} ) set(E57FORMAT_CMAKE_FLAGS -DE57FORMAT_DIR:PATH=${CMAKE_INSTALL_PREFIX}/share/E57Format) From 26740c40a55d4f0f146d0c56ec7b22bec9ea3bf2 Mon Sep 17 00:00:00 2001 From: Philipp Remy Date: Sun, 7 Sep 2025 19:35:13 +0200 Subject: [PATCH 20/40] FIX: Dependencies: Bump version for libE57Format This commit introduces two changes: (1) Bump version from v3.1.1 to v3.2.0 (appears to be ABI and API compatible). v3.1.1 caused compilation errors on Apple targets because of missing documentation paragraphs. The project internally sets -Wall and -Werror, so these become compilation errors. (2) Explicitly tell libE57Format to install its CMake configuration files to /share/E57Format. These files are not installed to the standard location by using GNUInstallDirs. Signed-off-by: Philipp Remy --- src/cmake/Dependencies.cmake | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/cmake/Dependencies.cmake b/src/cmake/Dependencies.cmake index deb2c3abe1..cd711b2ecb 100644 --- a/src/cmake/Dependencies.cmake +++ b/src/cmake/Dependencies.cmake @@ -1451,7 +1451,7 @@ if(AV_BUILD_E57FORMAT) ExternalProject_add(${E57FORMAT_TARGET} GIT_REPOSITORY https://github.com/asmaloney/libE57Format.git - GIT_TAG v3.1.1 + GIT_TAG v3.2.0 DOWNLOAD_DIR ${BUILD_DIR}/download/${E57FORMAT_TARGET} PREFIX ${BUILD_DIR} BUILD_IN_SOURCE 0 @@ -1464,6 +1464,7 @@ if(AV_BUILD_E57FORMAT) -DE57_BUILD_TEST:BOOL=OFF -DBUILD_SHARED_LIBS:BOOL=ON ${XERCESC_CMAKE_FLAGS} + -DE57_INSTALL_CMAKEDIR=/share/E57Format -DCMAKE_INSTALL_PREFIX:PATH= BUILD_COMMAND $(MAKE) -j${AV_BUILD_DEPENDENCIES_PARALLEL} DEPENDS ${XERCESC_TARGET} From 68164b1155152d62aae454cc4718ef2d8539fee6 Mon Sep 17 00:00:00 2001 From: Philipp Remy Date: Sun, 7 Sep 2025 20:33:08 +0200 Subject: [PATCH 21/40] FIX: Dependencies: Bump version of pcl Bump version of pcl from v1.13.0 to v1.15.1 (appears to be API and ABI compatible). This fixes compilation errors on Apple targets. Signed-off-by: Philipp Remy --- src/cmake/Dependencies.cmake | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/cmake/Dependencies.cmake b/src/cmake/Dependencies.cmake index cd711b2ecb..5b63770a00 100644 --- a/src/cmake/Dependencies.cmake +++ b/src/cmake/Dependencies.cmake @@ -838,8 +838,8 @@ if(AV_BUILD_PCL) set(PCL_TARGET pcl) ExternalProject_Add(${PCL_TARGET} - URL https://github.com/PointCloudLibrary/pcl/archive/refs/tags/pcl-1.13.0.tar.gz - URL_HASH MD5=987a5f6e440407a2bcae10c1022568b0 + URL https://github.com/PointCloudLibrary/pcl/archive/refs/tags/pcl-1.15.1.tar.gz + URL_HASH MD5=e29ad2147fbe2109233e2b3a0254dbab DOWNLOAD_DIR ${BUILD_DIR}/download/${PCL_TARGET} PREFIX ${BUILD_DIR} BUILD_IN_SOURCE 0 From 719c3056a9243c7d5b610092f41d08c7f109d848 Mon Sep 17 00:00:00 2001 From: Philipp Remy Date: Wed, 24 Sep 2025 13:55:35 +0200 Subject: [PATCH 22/40] FIX: Dependencies: Bump version if pxr and dynamically set Python executable This commit introduces two changes: (1) Bump version of pxr from v23.05 to v25.08. This fixes compilation errors with newer Boost versions on macOS. (2) Explicitly set the Python executable to "python3" (as system-installed on macOS), and keep "python" on other targets. Signed-off-by: Philipp Remy --- src/cmake/Dependencies.cmake | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/src/cmake/Dependencies.cmake b/src/cmake/Dependencies.cmake index 5b63770a00..e4da732241 100644 --- a/src/cmake/Dependencies.cmake +++ b/src/cmake/Dependencies.cmake @@ -874,9 +874,15 @@ endif() if(AV_BUILD_USD) set(USD_TARGET pxr) + if(APPLE) + set(PYTHON_EXECUTABLE python3) + else() + set(PYTHON_EXECUTABLE python) + endif() + ExternalProject_Add(${USD_TARGET} GIT_REPOSITORY https://github.com/PixarAnimationStudios/USD.git - GIT_TAG v23.05 + GIT_TAG v25.08 PREFIX ${BUILD_DIR} BUILD_IN_SOURCE 0 BUILD_ALWAYS 0 @@ -886,7 +892,7 @@ if(AV_BUILD_USD) SOURCE_DIR ${CMAKE_CURRENT_BINARY_DIR}/usd BINARY_DIR ${BUILD_DIR}/usd_build INSTALL_DIR ${CMAKE_INSTALL_PREFIX} - BUILD_COMMAND python ${CMAKE_CURRENT_BINARY_DIR}/usd/build_scripts/build_usd.py + BUILD_COMMAND ${PYTHON_EXECUTABLE} ${CMAKE_CURRENT_BINARY_DIR}/usd/build_scripts/build_usd.py --build-shared --no-examples --no-tools From e068669210650f02e84fdec051396bde2e84f186 Mon Sep 17 00:00:00 2001 From: Philipp Remy Date: Wed, 24 Sep 2025 13:56:13 +0200 Subject: [PATCH 23/40] FEAT: Dependencies: Allows building an embedded PCRE2 library This commit introduces the possibility to build an embedded PCRE2 library. This is required by SWIG, so only build it if SWIG should be build itself. Signed-off-by: Philipp Remy --- src/cmake/Dependencies.cmake | 32 ++++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) diff --git a/src/cmake/Dependencies.cmake b/src/cmake/Dependencies.cmake index e4da732241..5d24604431 100644 --- a/src/cmake/Dependencies.cmake +++ b/src/cmake/Dependencies.cmake @@ -68,6 +68,11 @@ if(AV_BUILD_E57FORMAT) else() option(AV_BUILD_XERCESC "Enable building an embedded XercesC library" OFF) endif() +if(AV_BUILD_SWIG) + option(AV_BUILD_PCRE2 "Enable building an embedded PCR2 library" ON) +else() + option(AV_BUILD_PCRE2 "Enable building an embedded PCR2 library" OFF) +endif() if(AV_BUILD_DEPENDENCIES_PARALLEL EQUAL 0) cmake_host_system_information(RESULT AV_BUILD_DEPENDENCIES_PARALLEL QUERY NUMBER_OF_LOGICAL_CORES) @@ -118,6 +123,7 @@ message(STATUS "AV_BUILD_CERES ${AV_BUILD_CERES}") message(STATUS "AV_BUILD_SWIG ${AV_BUILD_SWIG}") message(STATUS "AV_BUILD_OPENMESH ${AV_BUILD_OPENMESH}") message(STATUS "AV_BUILD_XERCESC ${AV_BUILD_XERCESC}") +message(STATUS "AV_BUILD_PCRE2 ${AV_BUILD_PCRE2}") message(STATUS "AV_BUILD_DEPENDENCIES_PARALLEL: ${AV_BUILD_DEPENDENCIES_PARALLEL}") ##########END LOGGING#########" @@ -1403,6 +1409,30 @@ if(AV_BUILD_LEMON) set(LEMON_CMAKE_FLAGS -DLEMON_DIR:PATH=${CMAKE_INSTALL_PREFIX}/share/lemon/cmake) endif() +if(AV_BUILD_PCRE2) + set(PCRE2_TARGET PCRE2) + + ExternalProject_Add(${PCRE2_TARGET} + URL https://github.com/PCRE2Project/pcre2/archive/refs/tags/pcre2-10.46.tar.gz + URL_HASH MD5=d23a93c740f6e53833835493835a769b + DOWNLOAD_DIR ${BUILD_DIR}/download/${PCRE2_TARGET} + PREFIX ${BUILD_DIR} + BUILD_IN_SOURCE 0 + BUILD_ALWAYS 0 + UPDATE_COMMAND "" + SOURCE_DIR ${CMAKE_CURRENT_BINARY_DIR}/${PCRE2_TARGET} + BINARY_DIR ${BUILD_DIR}/${PCRE2_TARGET}_build + INSTALL_DIR ${CMAKE_INSTALL_PREFIX} + CONFIGURE_COMMAND ${CMAKE_COMMAND} + -DCMAKE_INSTALL_PREFIX:PATH= + BUILD_COMMAND $(MAKE) -j${AV_BUILD_DEPENDENCIES_PARALLEL} + ) + + set(PCRE2_CMAKE_FLAGS + -DPCRE2_DIR=${CMAKE_INSTALL_PREFIX}/lib/cmake/pcre2 + ) +endif() + if(AV_BUILD_SWIG) set(SWIG_TARGET SWIG) @@ -1418,8 +1448,10 @@ if(AV_BUILD_SWIG) BINARY_DIR ${BUILD_DIR}/${SWIG_TARGET}_build INSTALL_DIR ${CMAKE_INSTALL_PREFIX} CONFIGURE_COMMAND ${CMAKE_COMMAND} + ${PCRE2_CMAKE_FLAGS} -DCMAKE_INSTALL_PREFIX:PATH= BUILD_COMMAND $(MAKE) -j${AV_BUILD_DEPENDENCIES_PARALLEL} + DEPENDS ${PCRE2_TARGET} ) set(SWIG_CMAKE_FLAGS From 91bd549c044a3e502e6a52457f7991cb8e71ef94 Mon Sep 17 00:00:00 2001 From: Philipp Remy Date: Wed, 24 Sep 2025 18:06:13 +0200 Subject: [PATCH 24/40] PATCH: Dependencies: Build an embedded pybind11 if OpenImageIO is build As we currently set -DUSE_PYTHON=ON when building OpenImageIO, it has a hard dependency on pybind11. Building this currently defaults to OFF. If no external pybind11 is found, this will cause a CMake error. As the project should be able to be build without any external dependencies in its standard configuration, build an embedded pybind11 by default. Signed-off-by: Philipp Remy --- src/cmake/Dependencies.cmake | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/cmake/Dependencies.cmake b/src/cmake/Dependencies.cmake index 5d24604431..df0fab0cc1 100644 --- a/src/cmake/Dependencies.cmake +++ b/src/cmake/Dependencies.cmake @@ -61,7 +61,11 @@ option(AV_BUILD_OPENIMAGEIO "Enable building an embedded OpenImageIO library" ON option(AV_BUILD_BOOST "Enable building an embedded Boost library" ON) option(AV_BUILD_CERES "Enable building an embedded Ceres library" ON) option(AV_BUILD_SWIG "Enable building an embedded SWIG library" ON) -option(AV_BUILD_PYBIND11 "Enable building of pybind11 library" OFF) +if(AV_BUILD_OPENIMAGEIO) + option(AV_BUILD_PYBIND11 "Enable building of pybind11 library" ON) +else() + option(AV_BUILD_PYBIND11 "Enable building of pybind11 library" OFF) +endif() option(AV_BUILD_OPENMESH "Enable building an embedded OpenMesh library" ON) if(AV_BUILD_E57FORMAT) option(AV_BUILD_XERCESC "Enable building an embedded XercesC library" ON) From 063258fbcf2a56343900d7599e603bfc0157835d Mon Sep 17 00:00:00 2001 From: Philipp Remy Date: Sun, 7 Sep 2025 21:54:13 +0200 Subject: [PATCH 25/40] FIX: Dependencies: Explicitly set the install name directory of libVPX to be @rpath If ALICEVISION_USE_RPATH is set, tell libVPX to set its install name directory to @rpath using environment LDFLAGS, as the autoconf script does not have a native switch for that. Signed-off-by: Philipp Remy --- src/cmake/Dependencies.cmake | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/src/cmake/Dependencies.cmake b/src/cmake/Dependencies.cmake index df0fab0cc1..d046e71dd5 100644 --- a/src/cmake/Dependencies.cmake +++ b/src/cmake/Dependencies.cmake @@ -718,6 +718,12 @@ if(AV_BUILD_FFMPEG) if(AV_BUILD_VPX) set(VPX_TARGET libvpx) + if(APPLE AND ALICEVISION_USE_RPATH) + set(VPX_APPLE_LDFLAGS ${CMAKE_COMMAND} -E env LDFLAGS=-Wl,-install_name,@rpath/libvpx.dylib) + else() + set(VPX_APPLE_LDFLAGS) + endif() + ExternalProject_add(${VPX_TARGET} GIT_REPOSITORY https://chromium.googlesource.com/webm/libvpx.git GIT_TAG v1.15.2 @@ -727,7 +733,9 @@ if(AV_BUILD_FFMPEG) BUILD_ALWAYS 0 UPDATE_COMMAND "" INSTALL_DIR ${CMAKE_INSTALL_PREFIX} - CONFIGURE_COMMAND /configure --prefix= + CONFIGURE_COMMAND + ${VPX_APPLE_LDFLAGS} + /configure --prefix= --enable-shared --disable-static --disable-examples BUILD_COMMAND $(MAKE) -j${AV_BUILD_DEPENDENCIES_PARALLEL} ) From 2a1f71b839a1f65bb77ddcdd499c76cdb437ee01 Mon Sep 17 00:00:00 2001 From: Philipp Remy Date: Wed, 17 Sep 2025 14:52:33 +0200 Subject: [PATCH 26/40] FIX: Dependencies: Workaround in libVPX for Darwin > 24 This commit fixes a build logic issue in libVPX, where the configure script is only able to detect Darwin up to version 24.X.X (macOS Sonoma). macOS 26 Tahoe is Darwin 25.X.X, causing the configure script to fall back to generic-gnu, which assumes a Linux host OS. As a workaround in CMake, we detect (using CMAKE_SYSTEM_VERSION) if the host Darwin is > 2. In that case, we explicitly set the newest available toolchain for libVPX, which logically is [x86_64/arm64]-darwin24-gcc. To be removed when libVPX either changes its detection logic or a newer ABI- and API-compatible version with proper Darwin > 24 support is released. Signed-off-by: Philipp Remy --- src/cmake/Dependencies.cmake | 21 ++++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) diff --git a/src/cmake/Dependencies.cmake b/src/cmake/Dependencies.cmake index d046e71dd5..8ff9bfc35d 100644 --- a/src/cmake/Dependencies.cmake +++ b/src/cmake/Dependencies.cmake @@ -718,6 +718,25 @@ if(AV_BUILD_FFMPEG) if(AV_BUILD_VPX) set(VPX_TARGET libvpx) + # This is currently required until libVPX properly supports macOS 26 + # Tahoe. + # The configure.sh script only detects Darwin versions up to 24.X.X, + # but macOS 26 Tahoe is version 25.X.X, causing the configure script + # to fall back to generic-gnu, which assumes Linux and therefore pulls + # in the wrong linker flags. + # If we detect macOS 26 or higher, we explicitly set the toolchain to + # be (x86_64/arm64)-darwin24-gcc. For this, use CMAKE_SYSTEM_VERSION. + set(VPX_TOOLCHAIN_FLAG) + if(APPLE) + if(CMAKE_SYSTEM_VERSION VERSION_GREATER_EQUAL 25) # Tahoe and later + if(CMAKE_OSX_ARCHITECTURES MATCHES "x86_64") + set(VPX_TOOLCHAIN_FLAG --target=x86_64-darwin24-gcc) + elseif(CMAKE_OSX_ARCHITECTURES MATCHES "arm64") + set(VPX_TOOLCHAIN_FLAG --target=arm64-darwin24-gcc) + endif() + endif() + endif() + if(APPLE AND ALICEVISION_USE_RPATH) set(VPX_APPLE_LDFLAGS ${CMAKE_COMMAND} -E env LDFLAGS=-Wl,-install_name,@rpath/libvpx.dylib) else() @@ -735,7 +754,7 @@ if(AV_BUILD_FFMPEG) INSTALL_DIR ${CMAKE_INSTALL_PREFIX} CONFIGURE_COMMAND ${VPX_APPLE_LDFLAGS} - /configure --prefix= + /configure --prefix= ${VPX_TOOLCHAIN_FLAG} --enable-shared --disable-static --disable-examples BUILD_COMMAND $(MAKE) -j${AV_BUILD_DEPENDENCIES_PARALLEL} ) From 540057b4430bbb3f3201eabbca8504525e24a4cc Mon Sep 17 00:00:00 2001 From: Philipp Remy Date: Sun, 7 Sep 2025 21:55:51 +0200 Subject: [PATCH 27/40] FIX: Dependencies: Explicitly set ffmpegs install name directory to @rpath If ALICEVISION_USE_RPATH is set, explicitly set the install name directory for the ffmpeg libraries to @rpath using the available b2 CLI argument. Signed-off-by: Philipp Remy --- src/cmake/Dependencies.cmake | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/src/cmake/Dependencies.cmake b/src/cmake/Dependencies.cmake index 8ff9bfc35d..67116446e9 100644 --- a/src/cmake/Dependencies.cmake +++ b/src/cmake/Dependencies.cmake @@ -762,6 +762,12 @@ if(AV_BUILD_FFMPEG) set(FFMPEG_TARGET ffmpeg) + if(APPLE AND ALICEVISION_USE_RPATH) + set(FFMPEG_APPLE_LDFLAGS --install-name-dir=@rpath) + else() + set(FFMPEG_APPLE_LDFLAGS) + endif() + ExternalProject_add(${FFMPEG_TARGET} URL https://www.ffmpeg.org/releases/ffmpeg-7.1.1.tar.xz URL_HASH MD5=26f2bd7d20c6c616f31d7130c88d7250 @@ -781,6 +787,7 @@ if(AV_BUILD_FFMPEG) --disable-gpl --enable-nonfree --enable-libvpx + ${FFMPEG_APPLE_LDFLAGS} BUILD_COMMAND $(MAKE) -j${AV_BUILD_DEPENDENCIES_PARALLEL} DEPENDS ${VPX_TARGET} ) From 7f591363ce39f76a4e7b4a60f39a934794a3baa6 Mon Sep 17 00:00:00 2001 From: Philipp Remy Date: Wed, 24 Sep 2025 17:50:13 +0200 Subject: [PATCH 28/40] PATCH: Dependencies: Do not build tests for Ceres As for other dependencies, do not build the tests for Ceres (as they are not needed and not tested). Besides from saving compilation time, this allows for cross-compiling Ceres on macOS. Otherwise, Ceres will build an embedded glog but does not pass through CMAKE_OSX_ARCHITECTURES, causing linking errors because of mismatched architectures. Signed-off-by: Philipp Remy --- src/cmake/Dependencies.cmake | 1 + 1 file changed, 1 insertion(+) diff --git a/src/cmake/Dependencies.cmake b/src/cmake/Dependencies.cmake index 67116446e9..b3c41910e3 100644 --- a/src/cmake/Dependencies.cmake +++ b/src/cmake/Dependencies.cmake @@ -1415,6 +1415,7 @@ if(AV_BUILD_CERES) ${EIGEN_CMAKE_FLAGS} -DMINIGLOG=ON -DBUILD_EXAMPLES:BOOL=OFF + -DBUILD_TESTING:BOOL=OFF -DCMAKE_INSTALL_PREFIX:PATH= BUILD_COMMAND $(MAKE) -j${AV_BUILD_DEPENDENCIES_PARALLEL} From a5a38158a3f85abbf6a711496a2a7ea27e4aced9 Mon Sep 17 00:00:00 2001 From: Philipp Remy Date: Wed, 24 Sep 2025 14:02:14 +0200 Subject: [PATCH 29/40] PATCH: Dependencies: Add rpath handling to CMAKE_CORE_BUILD_FLAGS If the user explicitly disables rpath handling when building AliceVision by setting ALICEVISION_USE_RPATH=OFF, respect this in the embedded dependencies. This mainly fixes some older dependencies, where this is not set to ON by default and therefore results in a library with absolute install names (which drastically reduces portability of the resulting binary). Signed-off-by: Philipp Remy --- CMakeLists.txt | 1 + src/cmake/Dependencies.cmake | 1 + 2 files changed, 2 insertions(+) diff --git a/CMakeLists.txt b/CMakeLists.txt index 9fa5b9336d..7db8e0ed6a 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -14,6 +14,7 @@ if(APPLE AND NOT CMAKE_OSX_ARCHITECTURES) endif() option(ALICEVISION_BUILD_DEPENDENCIES "Build all AliceVision dependencies" OFF) +option(ALICEVISION_USE_RPATH "Add RPATH on software with relative paths to libraries" ON) option(AV_BUILD_ALICEVISION "Enable building of AliceVision" ON) option(AV_EIGEN_MEMORY_ALIGNMENT "Enable Eigen memory alignment" ON) option(ALICEVISION_BUILD_TESTS "Build AliceVision tests" OFF) diff --git a/src/cmake/Dependencies.cmake b/src/cmake/Dependencies.cmake index b3c41910e3..9f5bf173e6 100644 --- a/src/cmake/Dependencies.cmake +++ b/src/cmake/Dependencies.cmake @@ -141,6 +141,7 @@ set(CMAKE_CORE_BUILD_FLAGS -DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER} -DCMAKE_CXX_STANDARD=20 -DCMAKE_OSX_ARCHITECTURES=${CMAKE_OSX_ARCHITECTURES} + -DCMAKE_MACOSX_RPATH=${ALICEVISION_USE_RPATH} ) From b705105baf6da8a6b53a0bc3baaf70a6bf31d356 Mon Sep 17 00:00:00 2001 From: Philipp Remy Date: Wed, 24 Sep 2025 17:47:51 +0200 Subject: [PATCH 30/40] FIX: Dependencies: Ensure that all dependencies are build with CMAKE_CORE_BUILD_FLAGS Some dependencies were missing the said flags. This ensures a predictable build with matching options for all dependencies. FIXME: LEMON currently does not support C++20. Therefore, CMAKE_CORE_BUILD_FLAGS is not applied to LEMON. Signed-off-by: Philipp Remy --- src/cmake/Dependencies.cmake | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/cmake/Dependencies.cmake b/src/cmake/Dependencies.cmake index 9f5bf173e6..04b75832f9 100644 --- a/src/cmake/Dependencies.cmake +++ b/src/cmake/Dependencies.cmake @@ -379,6 +379,7 @@ if(AV_BUILD_EIGEN) BINARY_DIR ${BUILD_DIR}/eigen_build INSTALL_DIR ${CMAKE_INSTALL_PREFIX} CONFIGURE_COMMAND ${CMAKE_COMMAND} + ${CMAKE_CORE_BUILD_FLAGS} -DCMAKE_CXX_STANDARD=20 ${EIGEN_CMAKE_ALIGNMENT_FLAGS} -DCMAKE_INSTALL_PREFIX:PATH= @@ -1463,7 +1464,7 @@ if(AV_BUILD_PCRE2) SOURCE_DIR ${CMAKE_CURRENT_BINARY_DIR}/${PCRE2_TARGET} BINARY_DIR ${BUILD_DIR}/${PCRE2_TARGET}_build INSTALL_DIR ${CMAKE_INSTALL_PREFIX} - CONFIGURE_COMMAND ${CMAKE_COMMAND} + CONFIGURE_COMMAND ${CMAKE_COMMAND} ${CMAKE_CORE_BUILD_FLAGS} -DCMAKE_INSTALL_PREFIX:PATH= BUILD_COMMAND $(MAKE) -j${AV_BUILD_DEPENDENCIES_PARALLEL} ) @@ -1488,6 +1489,7 @@ if(AV_BUILD_SWIG) BINARY_DIR ${BUILD_DIR}/${SWIG_TARGET}_build INSTALL_DIR ${CMAKE_INSTALL_PREFIX} CONFIGURE_COMMAND ${CMAKE_COMMAND} + ${CMAKE_CORE_BUILD_FLAGS} ${PCRE2_CMAKE_FLAGS} -DCMAKE_INSTALL_PREFIX:PATH= BUILD_COMMAND $(MAKE) -j${AV_BUILD_DEPENDENCIES_PARALLEL} @@ -1539,6 +1541,7 @@ if(AV_BUILD_E57FORMAT) BINARY_DIR ${BUILD_DIR}/${E57FORMAT_TARGET}_build INSTALL_DIR ${CMAKE_INSTALL_PREFIX} CONFIGURE_COMMAND ${CMAKE_COMMAND} + ${CMAKE_CORE_BUILD_FLAGS} -DE57_BUILD_TEST:BOOL=OFF -DBUILD_SHARED_LIBS:BOOL=ON ${XERCESC_CMAKE_FLAGS} @@ -1567,6 +1570,7 @@ if(AV_BUILD_OPENMESH) BINARY_DIR ${BUILD_DIR}/${OPENMESH_TARGET}_build INSTALL_DIR ${CMAKE_INSTALL_PREFIX} CONFIGURE_COMMAND ${CMAKE_COMMAND} + ${CMAKE_CORE_BUILD_FLAGS} -DCMAKE_BUILD_TYPE=Release -DCMAKE_INSTALL_PREFIX:PATH= -DBUILD_APPS=OFF From 5a52eed560cb3ab9a0dea83ff479cfce169d0d11 Mon Sep 17 00:00:00 2001 From: Philipp Remy Date: Sun, 7 Sep 2025 20:38:00 +0200 Subject: [PATCH 31/40] FIX: AliceVision configure variables Explicitly pass through top-level CMake variables when building AliceVision as an external project in Dependencies.cmake. This change allows for the following configure step to respect any choices the user made at the top level. FIXME: We should ideally only have one place to set the options - the very top-level CMakeLists.txt. But that would be a breaking change, as we would need to remove all "AV_" prefixed variables and replace them with the src/CMakeLists.txt "ALICEVISION_" prefix style. Signed-off-by: Philipp Remy --- src/cmake/Dependencies.cmake | 146 +++++++++++++++++++++++++++++++---- 1 file changed, 133 insertions(+), 13 deletions(-) diff --git a/src/cmake/Dependencies.cmake b/src/cmake/Dependencies.cmake index 04b75832f9..7869d3c0a0 100644 --- a/src/cmake/Dependencies.cmake +++ b/src/cmake/Dependencies.cmake @@ -1620,6 +1620,119 @@ set(AV_DEPS ) if(AV_BUILD_ALICEVISION) + + # Build the required flags to pass through + set(AV_BUILD_FLAGS + -DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE} + -DALICEVISION_USE_RPATH=${ALICEVISION_USE_RPATH} + -DBUILD_SHARED_LIBS=${BUILD_SHARED_LIBS} + -DBUILD_APPLE_FRAMEWORKS=${BUILD_APPLE_FRAMEWORKS} + -DCMAKE_OSX_ARCHITECTURES=${CMAKE_OSX_ARCHITECTURES} + -DALICEVISION_ROOT=${ALICEVISION_ROOT} + ) + set(AV_COMPONENT_FLAGS + -DAV_BUILD_ALICEVISION=${AV_BUILD_ALICEVISION} + -DALICEVISION_BUILD_DOC=OFF + -DALICEVISION_BUILD_TESTS=${ALICEVISION_BUILD_TESTS} + -DALICEVISION_INSTALL_MESHROOM_PLUGIN=${ALICEVISION_INSTALL_MESHROOM_PLUGIN} + ) + set(AV_TOPLEVEL_FLAGS + -DAV_EIGEN_MEMORY_ALIGNMENT=${AV_EIGEN_MEMORY_ALIGNMENT} + -DALICEVISION_USE_CCTAG=${AV_BUILD_CCTAG} + -DALICEVISION_USE_APRILTAG=${AV_BUILD_APRILTAG} + -DALICEVISION_USE_OPENCV=${AV_BUILD_OPENCV} + -DALICEVISION_USE_POPSIFT=${AV_BUILD_POPSIFT} + -DALICEVISION_USE_CUDA=${AV_USE_CUDA} + ) + set(AV_MISC_FLAGS + -DMINIGLOG=ON + ) + + # FIXME: Ideally we should move all options to the top-level CMakeLists.txt + # file. However, this approach provides backwards-compatibility and sets + # the flags as always, but enables the user to override them manually. + # As the last option specified takes precedence on the CLI, we only pass + # these flags if they are defined (= the user specified them on the CLI). + # The following flags are only added if specified for now + if(DEFINED ALICEVISION_REMOVE_ABSOLUTE) + list(APPEND AV_BUILD_FLAGS -DALICEVISION_REMOVE_ABSOLUTE=${ALICEVISION_REMOVE_ABSOLUTE}) + endif() + if(DEFINED ALICEVISION_REQUIRE_CERES_WITH_SUITESPARSE) + list(APPEND AV_MISC_FLAGS -DALICEVISION_REQUIRE_CERES_WITH_SUITESPARSE=${ALICEVISION_REQUIRE_CERES_WITH_SUITESPARSE}) + endif() + if(DEFINED ALICEVISION_USE_MESHSDFILTER) + list(APPEND AV_TOPLEVEL_FLAGS -DALICEVISION_USE_MESHSDFILTER=${ALICEVISION_USE_MESHSDFILTER}) + endif() + if(DEFINED ALICEVISION_USE_OCVSIFT) + list(APPEND AV_TOPLEVEL_FLAGS -DALICEVISION_USE_OCVSIFT=${ALICEVISION_USE_OCVSIFT}) + endif() + if(DEFINED ALICEVISION_USE_OPENCV_CONTRIB) + list(APPEND AV_TOPLEVEL_FLAGS -DALICEVISION_USE_OPENCV_CONTRIB=${ALICEVISION_USE_OPENCV_CONTRIB}) + endif() + if(DEFINED ALICEVISION_USE_OPENCV) + list(APPEND AV_TOPLEVEL_FLAGS -DALICEVISION_USE_OPENCV=${ALICEVISION_USE_OPENCV}) + endif() + if(DEFINED ALICEVISION_USE_ONNX_GPU) + list(APPEND AV_TOPLEVEL_FLAGS -DALICEVISION_USE_ONNX_GPU=${ALICEVISION_USE_ONNX_GPU}) + endif() + if(DEFINED ALICEVISION_USE_ONNX) + list(APPEND AV_TOPLEVEL_FLAGS -DALICEVISION_USE_ONNX=${ALICEVISION_USE_ONNX}) + endif() + if(DEFINED ALICEVISION_USE_UNCERTAINTYTE) + list(APPEND AV_TOPLEVEL_FLAGS -DALICEVISION_USE_UNCERTAINTYTE=${ALICEVISION_USE_UNCERTAINTYTE}) + endif() + if(DEFINED ALICEVISION_USE_ALEMBIC) + list(APPEND AV_TOPLEVEL_FLAGS -DALICEVISION_USE_ALEMBIC=${ALICEVISION_USE_ALEMBIC}) + endif() + if(DEFINED ALICEVISION_USE_POPSIFT) + list(APPEND AV_TOPLEVEL_FLAGS -DALICEVISION_USE_POPSIFT=${ALICEVISION_USE_POPSIFT}) + endif() + if(DEFINED ALICEVISION_USE_APRILTAG) + list(APPEND AV_TOPLEVEL_FLAGS -DALICEVISION_USE_APRILTAG=${ALICEVISION_USE_APRILTAG}) + endif() + if(DEFINED ALICEVISION_USE_CCTAG) + list(APPEND AV_TOPLEVEL_FLAGS -DALICEVISION_USE_CCTAG=${ALICEVISION_USE_CCTAG}) + endif() + if(DEFINED ALICEVISION_USE_OPENMP) + list(APPEND AV_TOPLEVEL_FLAGS -DALICEVISION_USE_OPENMP=${ALICEVISION_USE_OPENMP}) + endif() + if(DEFINED ALICEVISION_USE_CUDA) + list(APPEND AV_TOPLEVEL_FLAGS -DALICEVISION_USE_CUDA=${ALICEVISION_USE_CUDA}) + endif() + if(DEFINED ALICEVISION_BUILD_LIDAR) + list(APPEND AV_TOPLEVEL_FLAGS -DALICEVISION_BUILD_LIDAR=${ALICEVISION_BUILD_LIDAR}) + endif() + if(DEFINED ALICEVISION_BUILD_SWIG_BINDING) + list(APPEND AV_TOPLEVEL_FLAGS -DALICEVISION_BUILD_SWIG_BINDING=${ALICEVISION_BUILD_SWIG_BINDING}) + endif() + if(DEFINED ALICEVISION_BUILD_DOC) + list(APPEND AV_TOPLEVEL_FLAGS -DALICEVISION_BUILD_DOC=${ALICEVISION_BUILD_DOC}) + endif() + if(DEFINED ALICEVISION_BUILD_COVERAGE) + list(APPEND AV_TOPLEVEL_FLAGS -DALICEVISION_BUILD_COVERAGE=${ALICEVISION_BUILD_COVERAGE}) + endif() + if(DEFINED ALICEVISION_BUILD_SOFTWARE) + list(APPEND AV_TOPLEVEL_FLAGS -DALICEVISION_BUILD_SOFTWARE=${ALICEVISION_BUILD_SOFTWARE}) + endif() + if(DEFINED ALICEVISION_BUILD_PANORAMA) + list(APPEND AV_TOPLEVEL_FLAGS -DALICEVISION_BUILD_PANORAMA=${ALICEVISION_BUILD_PANORAMA}) + endif() + if(DEFINED ALICEVISION_BUILD_PHOTOMETRICSTEREO) + list(APPEND AV_TOPLEVEL_FLAGS -DALICEVISION_BUILD_PHOTOMETRICSTEREO=${ALICEVISION_BUILD_PHOTOMETRICSTEREO}) + endif() + if(DEFINED ALICEVISION_BUILD_SEGMENTATION) + list(APPEND AV_TOPLEVEL_FLAGS -DALICEVISION_BUILD_SEGMENTATION=${ALICEVISION_BUILD_SEGMENTATION}) + endif() + if(DEFINED ALICEVISION_BUILD_HDR) + list(APPEND AV_TOPLEVEL_FLAGS -DALICEVISION_BUILD_HDR=${ALICEVISION_BUILD_HDR}) + endif() + if(DEFINED ALICEVISION_BUILD_MVS) + list(APPEND AV_TOPLEVEL_FLAGS -DALICEVISION_BUILD_MVS=${ALICEVISION_BUILD_MVS}) + endif() + if(DEFINED ALICEVISION_BUILD_SFM) + list(APPEND AV_TOPLEVEL_FLAGS -DALICEVISION_BUILD_SFM=${ALICEVISION_BUILD_SFM}) + endif() + # Build Alicevision super build mode ExternalProject_Add(aliceVision PREFIX ${CMAKE_CURRENT_SOURCE_DIR} @@ -1629,18 +1742,20 @@ if(AV_BUILD_ALICEVISION) BINARY_DIR ${BUILD_DIR}/aliceVision_build INSTALL_DIR ${CMAKE_INSTALL_PREFIX} CONFIGURE_COMMAND ${CMAKE_COMMAND} - -DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE} -DBUILD_SHARED_LIBS:BOOL=ON -DTARGET_ARCHITECTURE=core - -DALICEVISION_ROOT=${ALICEVISION_ROOT} - -DALICEVISION_USE_ALEMBIC=ON - -DMINIGLOG=ON - -DALICEVISION_USE_CCTAG=${AV_BUILD_CCTAG} - -DALICEVISION_USE_APRILTAG=${AV_BUILD_APRILTAG} - -DALICEVISION_USE_OPENCV=${AV_BUILD_OPENCV} - -DALICEVISION_USE_POPSIFT=${AV_BUILD_POPSIFT} - -DALICEVISION_USE_CUDA=${AV_USE_CUDA} - -DALICEVISION_BUILD_SWIG_BINDING=${AV_USE_SWIG} - -DALICEVISION_BUILD_DOC=OFF + # Build Flags + ${AV_BUILD_FLAGS} + + # Misc Flags + ${AV_MISC_FLAGS} + + # Top-level flags + ${AV_TOPLEVEL_FLAGS} + + # Component Flags + ${AV_COMPONENT_FLAGS} + + # Dependency flags ${ZLIB_CMAKE_FLAGS} ${ASSIMP_CMAKE_FLAGS} ${EIGEN_CMAKE_FLAGS} @@ -1657,7 +1772,9 @@ if(AV_BUILD_ALICEVISION) ${CCTAG_CMAKE_FLAGS} ${APRILTAG_CMAKE_FLAGS} ${EXPAT_CMAKE_FLAGS} - ${COINUTILS_CMAKE_FLAGS} ${OSI_CMAKE_FLAGS} ${CLP_CMAKE_FLAGS} + ${COINUTILS_CMAKE_FLAGS} + ${OSI_CMAKE_FLAGS} + ${CLP_CMAKE_FLAGS} ${LZ4_CMAKE_FLAGS} ${FLANN_CMAKE_FLAGS} ${NANOFLANN_CMAKE_FLAGS} @@ -1667,7 +1784,10 @@ if(AV_BUILD_ALICEVISION) ${E57FORMAT_CMAKE_FLAGS} ${OPENMESH_CMAKE_FLAGS} - -DCMAKE_INSTALL_PREFIX:PATH= + # Install Prefix and Source Folder + -DCMAKE_INSTALL_PREFIX:PATH= + + BUILD_COMMAND $(MAKE) -j${AV_BUILD_DEPENDENCIES_PARALLEL} DEPENDS ${AV_DEPS} ) endif() From 2bcc31f84896c1c5d46b9d4d62b98db0b6f17d7b Mon Sep 17 00:00:00 2001 From: Philipp Remy Date: Sun, 7 Sep 2025 20:45:01 +0200 Subject: [PATCH 32/40] FIX: Utils: Remove wrong link from aliceVision_selectConnectedViews aliceVision_selectConnectedViews linked to aliceVision_depthMap although it does not use the library. This caused a linker issue on macOS, as the target was not guarded behind a ALICEVISION_HAVE_CUDA check. Remove the unnecessary link completely. Signed-off-by: Philipp Remy --- src/software/utils/CMakeLists.txt | 1 - 1 file changed, 1 deletion(-) diff --git a/src/software/utils/CMakeLists.txt b/src/software/utils/CMakeLists.txt index c6d9ed9161..2e9527b0ba 100644 --- a/src/software/utils/CMakeLists.txt +++ b/src/software/utils/CMakeLists.txt @@ -615,7 +615,6 @@ if (ALICEVISION_BUILD_MVS) aliceVision_cmdline aliceVision_mvsData aliceVision_mvsUtils - aliceVision_depthMap aliceVision_sfmData aliceVision_sfmDataIO Boost::program_options From 80e9ecc587da969eaa430c96dcff01220d83054d Mon Sep 17 00:00:00 2001 From: Philipp Remy Date: Sun, 7 Sep 2025 21:48:12 +0200 Subject: [PATCH 33/40] FIX: Use common rpaths on Apple targets Adapt the install rpaths on Apple targets to accommodate for the typical library paths. This includes the standard Unix paths (lib, next-to-loading-binary) and the Apple bundle structures (Libraries, Frameworks). Add every possibility for @loader_path (for dynamic libraries) and @executable_path (for executables) to ensure proper lookup based on the context. Signed-off-by: Philipp Remy --- src/CMakeLists.txt | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index c657177232..343b6a8463 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -124,7 +124,17 @@ endif() if (ALICEVISION_USE_RPATH) if (APPLE) set(CMAKE_MACOSX_RPATH 1) - set(CMAKE_INSTALL_RPATH "@loader_path/../${CMAKE_INSTALL_LIBDIR};@loader_path") + set(CMAKE_INSTALL_RPATH + "@loader_path/../${CMAKE_INSTALL_LIBDIR}" + "@loader_path" + "@loader_path/../lib" + "@loader_path/../Libraries" + "@loader_path/../Frameworks" + "@executable_path/../${CMAKE_INSTALL_LIBDIR}" + "@executable_path/../lib" + "@executable_path/../Libraries" + "@executable_path/../Frameworks" + ) elseif (UNIX) set(CMAKE_INSTALL_RPATH "\\$ORIGIN/../${CMAKE_INSTALL_LIBDIR};\\$ORIGIN") endif() From 2eda1c24aeb73b46c583f074ccf37eeff046931b Mon Sep 17 00:00:00 2001 From: Philipp Remy Date: Sat, 18 Oct 2025 14:49:46 +0200 Subject: [PATCH 34/40] FEAT: Allow cross-compilation on macOS This commit introduces several changes to the general build infrastructure. Given CMAKE_OSX_ARCHITECTURES, it is now possible to cross-compile the whole project and its dependencies from x86_64 to aarch64 and vice versa on macOS. This is easily achievable because Apple can compile for both architectures with the same sysroot. Choosing the correct flags depends on the underlying build system: (1) CMake: In CMake, one can just set CMAKE_OSX_ARCHITECTURES to either arm64 or x86_86. (2) Autoconf/Automake: Usually involves setting the --host flag and passing additional CFLAGS, CXXFLAGS and LDFLAGS (with "-arch x86_64/arm64"). (3) libVPX: A target must be specified using the --target flag (includes the major Darwin version number). (4) Boost: The architecture for b2 must be set using the "architecture=X" flag - additionally pass CFLAGS, CXXFLAGS and LDFLAGS (with "-arch x86_64/arm64"). (5) ffmpeg: Must pass the additional CFLAGS, CXXFLAGS and LDFLAGS (with "-arch x86_64/arm64") and the --arch (with x86_64 or aarch64(!)), --enable-cross-compile and --sysroot (xcrun --sdk macosx --show-sdk-path) flags. Signed-off-by: Philipp Remy --- src/cmake/Dependencies.cmake | 67 +++++++++++++++++++++++++++++++++--- 1 file changed, 63 insertions(+), 4 deletions(-) diff --git a/src/cmake/Dependencies.cmake b/src/cmake/Dependencies.cmake index 7869d3c0a0..c53531492d 100644 --- a/src/cmake/Dependencies.cmake +++ b/src/cmake/Dependencies.cmake @@ -145,6 +145,30 @@ set(CMAKE_CORE_BUILD_FLAGS ) +# Set additional reusable flags for cross-compiling on macOS +# Supports x86_64/arm64 cross-compilation +if(APPLE) + # Get the current sysroot + execute_process(COMMAND xcrun --sdk macosx --show-sdk-path + OUTPUT_VARIABLE APPLE_SYSROOT + COMMAND_ERROR_IS_FATAL ANY + OUTPUT_STRIP_TRAILING_WHITESPACE + ) + if(CMAKE_OSX_ARCHITECTURES MATCHES "arm64") + set(APPLE_ARCH_FLAGS -arch\ arm64) + set(APPLE_ARCH_CFLAGS CFLAGS=${APPLE_ARCH_FLAGS}) + set(APPLE_ARCH_CXXFLAGS CXXFLAGS=${APPLE_ARCH_FLAGS}) + set(APPLE_ARCH_LDFLAGS LDFLAGS=${APPLE_ARCH_FLAGS}) + set(APPLE_ARCH_HOST --host=aarch64-apple-darwin) + elseif(CMAKE_OSX_ARCHITECTURES MATCHES "x86_64") + set(APPLE_ARCH_FLAGS -arch\ x86_64) + set(APPLE_ARCH_CFLAGS CFLAGS=${APPLE_ARCH_FLAGS}) + set(APPLE_ARCH_CXXFLAGS CXXFLAGS=${APPLE_ARCH_FLAGS}) + set(APPLE_ARCH_LDFLAGS LDFLAGS=${APPLE_ARCH_FLAGS}) + set(APPLE_ARCH_HOST --host=x86_64-apple-darwin) + endif() +endif() + #### START EXTERNAL #### if(AV_BUILD_OPENMP) set(OPENMP_TARGET OpenMP) @@ -690,6 +714,16 @@ if(AV_BUILD_BOOST) set(SCRIPT_EXTENSION sh) endif() + # Allow cross-compiling on Apple targets + if(APPLE) + if(${CMAKE_OSX_ARCHITECTURES} MATCHES "arm64") + set(BOOST_APPLE_ARCH arm) + elseif(${CMAKE_OSX_ARCHITECTURES} MATCHES "x86_64") + set(BOOST_APPLE_ARCH x86) + endif() + set(APPLE_B2_FLAGS toolset=clang-darwin target-os=darwin architecture=${BOOST_APPLE_ARCH} cxxflags=${APPLE_ARCH_FLAGS} cflags=${APPLE_ARCH_FLAGS} linkflags=${APPLE_ARCH_FLAGS}) + endif() + ExternalProject_Add(${BOOST_TARGET} URL https://archives.boost.io/release/1.86.0/source/boost_1_86_0.tar.bz2 URL_HASH MD5=2d098ba2e1457708a02de996857c2b10 @@ -706,10 +740,10 @@ if(AV_BUILD_BOOST) ./bootstrap.${SCRIPT_EXTENSION} --prefix= --with-libraries=atomic,container,date_time,exception,graph,iostreams,json,log,math,program_options,regex,serialization,system,test,thread,stacktrace,timer BUILD_COMMAND cd && - ./b2 --prefix= variant=${DEPS_CMAKE_BUILD_TYPE_LOWERCASE} cxxstd=20 link=shared threading=multi -j8 + ./b2 --prefix= variant=${DEPS_CMAKE_BUILD_TYPE_LOWERCASE} cxxstd=20 link=shared threading=multi ${APPLE_B2_FLAGS} -j${AV_BUILD_DEPENDENCIES_PARALLEL} INSTALL_COMMAND cd && - ./b2 variant=${DEPS_CMAKE_BUILD_TYPE_LOWERCASE} cxxstd=20 link=shared threading=multi install + ./b2 variant=${DEPS_CMAKE_BUILD_TYPE_LOWERCASE} cxxstd=20 link=shared threading=multi ${APPLE_B2_FLAGS} install DEPENDS ${ZLIB_TARGET} ) @@ -736,6 +770,14 @@ if(AV_BUILD_FFMPEG) elseif(CMAKE_OSX_ARCHITECTURES MATCHES "arm64") set(VPX_TOOLCHAIN_FLAG --target=arm64-darwin24-gcc) endif() + else() # Lower + # Extract major Darwin Version + string(REGEX MATCH "^[0-9]+" DARWIN_VERSION_MAJOR "${CMAKE_SYSTEM_VERSION}") + if(CMAKE_OSX_ARCHITECTURES MATCHES "x86_64") + set(VPX_TOOLCHAIN_FLAG --target=x86_64-darwin${DARWIN_VERSION_MAJOR}-gcc) + elseif(CMAKE_OSX_ARCHITECTURES MATCHES "arm64") + set(VPX_TOOLCHAIN_FLAG --target=arm64-darwin${DARWIN_VERSION_MAJOR}-gcc) + endif() endif() endif() @@ -770,6 +812,19 @@ if(AV_BUILD_FFMPEG) set(FFMPEG_APPLE_LDFLAGS) endif() + if(APPLE) + if(CMAKE_OSX_ARCHITECTURES MATCHES "x86_64") + set(APPLE_FFMPEG_ARCH_FLAGS --arch=x86_64 --enable-cross-compile --sysroot=${APPLE_SYSROOT}) + elseif(CMAKE_OSX_ARCHITECTURES MATCHES "arm64") + set(APPLE_FFMPEG_ARCH_FLAGS --arch=aarch64 --enable-cross-compile --sysroot=${APPLE_SYSROOT}) + endif() + set(FFMPEG_CFLAGS -I/include\ ${APPLE_ARCH_FLAGS}) + set(FFMPEG_LDFLAGS -L/lib\ ${APPLE_ARCH_FLAGS}) + else() + set(FFMPEG_CFLAGS -I/include) + set(FFMPEG_LDFLAGS -L/lib) + endif() + ExternalProject_add(${FFMPEG_TARGET} URL https://www.ffmpeg.org/releases/ffmpeg-7.1.1.tar.xz URL_HASH MD5=26f2bd7d20c6c616f31d7130c88d7250 @@ -782,13 +837,14 @@ if(AV_BUILD_FFMPEG) INSTALL_DIR ${CMAKE_INSTALL_PREFIX} CONFIGURE_COMMAND /configure --prefix= - --extra-cflags="-I/include" - --extra-ldflags="-L/lib" + --extra-cflags=${FFMPEG_CFLAGS} + --extra-ldflags=${FFMPEG_LDFLAGS} --enable-shared --disable-static --disable-gpl --enable-nonfree --enable-libvpx + ${APPLE_FFMPEG_ARCH_FLAGS} ${FFMPEG_APPLE_LDFLAGS} BUILD_COMMAND $(MAKE) -j${AV_BUILD_DEPENDENCIES_PARALLEL} DEPENDS ${VPX_TARGET} @@ -922,6 +978,7 @@ if(AV_BUILD_USD) if(APPLE) set(PYTHON_EXECUTABLE python3) + set(APPLE_ARCH_TARGET_FLAG --build-target ${CMAKE_OSX_ARCHITECTURES}) else() set(PYTHON_EXECUTABLE python) endif() @@ -953,6 +1010,7 @@ if(AV_BUILD_USD) --no-tests --no-docs --no-python + ${APPLE_ARCH_TARGET_FLAG} ) @@ -1443,6 +1501,7 @@ if(AV_BUILD_LEMON) BINARY_DIR ${BUILD_DIR}/${LEMON_TARGET}_build INSTALL_DIR ${CMAKE_INSTALL_PREFIX} CONFIGURE_COMMAND ${CMAKE_COMMAND} + -DCMAKE_OSX_ARCHITECTURES=${CMAKE_OSX_ARCHITECTURES} # FIXME: Use CMAKE_CORE_BUILD_FLAGS, as soon as LEMON supports C++20. -DCMAKE_INSTALL_PREFIX:PATH= BUILD_COMMAND $(MAKE) -j${AV_BUILD_DEPENDENCIES_PARALLEL} ) From 53652e66f7388d59f07a8e2d7c57fbf3fd1dd6d8 Mon Sep 17 00:00:00 2001 From: Philipp Remy Date: Thu, 16 Oct 2025 17:05:21 +0200 Subject: [PATCH 35/40] FIX: Dependencies: Explicitly use "-headerpad_max_install_names" on Apple targets where necessary As it might be desirable to later change the embedded rpaths of a binary, "install_name_tool" expects enough padding between the header and the first data section of a Mach-O file. This caused issues when changing the rpaths of the following dependencies, which added little to no padding to the Mach-O header: - libtiff - ffmpeg Pass "-headerpad_max_install_names" explicitly as linker flags to these dependencies when building them on Apple platforms. This adds the maximum amount of padding to the header (per ld man page: "Automatically adds space for future expansion of load commands such that all paths could expand to MAXPATHLEN."). Signed-off-by: Philipp Remy --- src/cmake/Dependencies.cmake | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/cmake/Dependencies.cmake b/src/cmake/Dependencies.cmake index c53531492d..abbd5223c9 100644 --- a/src/cmake/Dependencies.cmake +++ b/src/cmake/Dependencies.cmake @@ -818,8 +818,8 @@ if(AV_BUILD_FFMPEG) elseif(CMAKE_OSX_ARCHITECTURES MATCHES "arm64") set(APPLE_FFMPEG_ARCH_FLAGS --arch=aarch64 --enable-cross-compile --sysroot=${APPLE_SYSROOT}) endif() - set(FFMPEG_CFLAGS -I/include\ ${APPLE_ARCH_FLAGS}) - set(FFMPEG_LDFLAGS -L/lib\ ${APPLE_ARCH_FLAGS}) + set(FFMPEG_CFLAGS -I/include\ ${APPLE_ARCH_FLAGS}\ -Wl,-headerpad_max_install_names) # ffmpeg does not include padding in the Mach-O header + set(FFMPEG_LDFLAGS -L/lib\ ${APPLE_ARCH_FLAGS}\ -headerpad_max_install_names) # ffmpeg does not include padding in the Mach-O header else() set(FFMPEG_CFLAGS -I/include) set(FFMPEG_LDFLAGS -L/lib) From f4c4b21f6340456a94e8a52b3d80ddcd94043026 Mon Sep 17 00:00:00 2001 From: Philipp Remy Date: Wed, 24 Sep 2025 17:57:04 +0200 Subject: [PATCH 36/40] PATCH: Ceres components on Apple platforms As we do not build Ceres with SuiteSparse support (Ceres can utilize Accelerate.framework), do not ask for SuiteSparse on Apple platforms. As an equivalent, make sure to find a Ceres with AccelerateSparse (can be disabled in the same manner as Ceres with SuiteSparse). Signed-off-by: Philipp Remy --- src/CMakeLists.txt | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 343b6a8463..ae175c9b67 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -79,7 +79,11 @@ mark_as_advanced(FORCE ALICEVISION_USE_OCVSIFT) option(ALICEVISION_USE_MESHSDFILTER "Use MeshSDFilter library (enable MeshDenoising and MeshDecimate)" ON) -option(ALICEVISION_REQUIRE_CERES_WITH_SUITESPARSE "Require Ceres with SuiteSparse (ensure best performances)" ON) +if(APPLE) + option(ALICEVISION_REQUIRE_CERES_WITH_ACCELERATESPARSE "Require Ceres with AccelerateSparse (ensure best performances, macOS only)" ON) +else() + option(ALICEVISION_REQUIRE_CERES_WITH_SUITESPARSE "Require Ceres with SuiteSparse (ensure best performances)" ON) +endif() option(ALICEVISION_USE_RPATH "Add RPATH on software with relative paths to libraries" ON) option(ALICEVISION_REMOVE_ABSOLUTE "Remove absolute paths in dependencies" OFF) @@ -437,6 +441,9 @@ if (ALICEVISION_BUILD_SFM) if (ALICEVISION_REQUIRE_CERES_WITH_SUITESPARSE) message(STATUS "By default, Ceres required SuiteSparse to ensure best performances. if you explicitly need to build without it, you can use the option: -DALICEVISION_REQUIRE_CERES_WITH_SUITESPARSE=OFF") find_package(Ceres QUIET REQUIRED COMPONENTS SuiteSparse CONFIG) + elseif (ALICEVISION_REQUIRE_CERES_WITH_ACCELERATESPARSE) + message(STATUS "By default, Ceres required AccelerateSparse to ensure best performances. if you explicitly need to build without it, you can use the option: -DALICEVISION_REQUIRE_CERES_WITH_ACCELERATESPARSE=OFF") + find_package(Ceres QUIET REQUIRED COMPONENTS AccelerateSparse CONFIG) else() find_package(Ceres CONFIG QUIET CONFIG) endif() From 90696ebb82ab2ff561d85d1fd3615e4a54c33174 Mon Sep 17 00:00:00 2001 From: Philipp Remy Date: Wed, 8 Oct 2025 18:41:59 +0200 Subject: [PATCH 37/40] PATCH: Respect CMAKE_OSX_ARCHITECTURES when setting the build directories for AliceVision Currently, the name is set by CMAKE_SYSTEM_PROCESSOR. However, this does not always match the target architecture, as users can cross-compile on Apple targets. On Apple, use CMAKE_OSX_ARCHITECTURES instead. Signed-off-by: Philipp Remy --- src/CMakeLists.txt | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index ae175c9b67..d823499403 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -146,8 +146,13 @@ endif() # Set build path -set(EXECUTABLE_OUTPUT_PATH "${ALICEVISION_ROOT}/${CMAKE_SYSTEM_NAME}-${CMAKE_SYSTEM_PROCESSOR}") -set(LIBRARY_OUTPUT_PATH "${ALICEVISION_ROOT}/${CMAKE_SYSTEM_NAME}-${CMAKE_SYSTEM_PROCESSOR}") +if(APPLE) + set(EXECUTABLE_OUTPUT_PATH "${ALICEVISION_ROOT}/${CMAKE_SYSTEM_NAME}-${CMAKE_OSX_ARCHITECTURES}") + set(LIBRARY_OUTPUT_PATH "${ALICEVISION_ROOT}/${CMAKE_SYSTEM_NAME}-${CMAKE_OSX_ARCHITECTURES}") +else() + set(EXECUTABLE_OUTPUT_PATH "${ALICEVISION_ROOT}/${CMAKE_SYSTEM_NAME}-${CMAKE_SYSTEM_PROCESSOR}") + set(LIBRARY_OUTPUT_PATH "${ALICEVISION_ROOT}/${CMAKE_SYSTEM_NAME}-${CMAKE_SYSTEM_PROCESSOR}") +endif() # Windows specific defines if (WIN32) From 3bfd94cc4f110b78fcd13611752d21c2b7782e9b Mon Sep 17 00:00:00 2001 From: Philipp Remy Date: Fri, 17 Oct 2025 16:03:36 +0200 Subject: [PATCH 38/40] PATCH: Build: Add a bundling solution for Apple targets This commit adds a Python script (darwin_bundle.py) which takes several Mach-O files (or Framework folders) as input and attempts to resolve all required dependencies to create a standalone (i.e, self-contained) bundle. A CMake target for use after the AliceVision build is added to automatically invoke the script with all targets of the project (can be invoked with make darwin-bundle). This works somewhat similar to what the regular bundle target does, but the CMake implementation did not appear to work for Apple targets (out of the box). Signed-off-by: Philipp Remy --- src/CMakeLists.txt | 28 ++ src/cmake/Dependencies.cmake | 8 + src/cmake/Helpers.cmake | 6 + src/cmake/darwin_bundle.py | 560 +++++++++++++++++++++++++++++++++++ 4 files changed, 602 insertions(+) create mode 100644 src/cmake/darwin_bundle.py diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index d823499403..d870ec0dff 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -1036,6 +1036,9 @@ add_subdirectory(dependencies) # AliceVision modules # ============================================================================== +# Initial global target list is empty, will be added by target helper functions +set(ALICEVISION_GLOBAL_TARGET_LIST "" CACHE INTERNAL "Global list of all AliceVision targets enabled") + # software(s) under patent or commercial licence # Included for research purpose only if (ALICEVISION_BUILD_SFM) @@ -1109,3 +1112,28 @@ if (ALICEVISION_BUILD_SWIG_BINDING) endif() add_custom_target(uninstall "${CMAKE_COMMAND}" -P "${cmakeUninstallFile}") + +# ============================================================================== +# Bundling on Apple targets +# ============================================================================== +# Get output file name for each target +if(APPLE) + find_program(PYTHON_EXECUTABLE python3) + set(ALICEVISION_BUILD_ARTIFACTS) + foreach(AV_TARGET ${ALICEVISION_GLOBAL_TARGET_LIST}) + # Different handling if Framework + get_target_property(IS_FRAMEWORK ${AV_TARGET} FRAMEWORK) + if(IS_FRAMEWORK) + list(APPEND ALICEVISION_BUILD_ARTIFACTS "$") + else() + list(APPEND ALICEVISION_BUILD_ARTIFACTS "$") + endif() + endforeach() + # Add a custom target + add_custom_target(darwin-bundle + ${PYTHON_EXECUTABLE} ${ALICEVISION_ROOT}/../src/cmake/darwin_bundle.py -o ${ALICEVISION_BUNDLE_PREFIX} ${ALICEVISION_BUILD_ARTIFACTS} + DEPENDS ${ALICEVISION_GLOBAL_TARGET_LIST} + COMMENT "Creating Darwin Bundle" + COMMAND_EXPAND_LISTS + ) +endif() diff --git a/src/cmake/Dependencies.cmake b/src/cmake/Dependencies.cmake index abbd5223c9..01f0d5943d 100644 --- a/src/cmake/Dependencies.cmake +++ b/src/cmake/Dependencies.cmake @@ -1688,6 +1688,7 @@ if(AV_BUILD_ALICEVISION) -DBUILD_APPLE_FRAMEWORKS=${BUILD_APPLE_FRAMEWORKS} -DCMAKE_OSX_ARCHITECTURES=${CMAKE_OSX_ARCHITECTURES} -DALICEVISION_ROOT=${ALICEVISION_ROOT} + -DALICEVISION_BUNDLE_PREFIX=${ALICEVISION_BUNDLE_PREFIX} ) set(AV_COMPONENT_FLAGS -DAV_BUILD_ALICEVISION=${AV_BUILD_ALICEVISION} @@ -1849,4 +1850,11 @@ if(AV_BUILD_ALICEVISION) BUILD_COMMAND $(MAKE) -j${AV_BUILD_DEPENDENCIES_PARALLEL} DEPENDS ${AV_DEPS} ) + + # Pipe through the darwin-bundle target on Apple targets + if(APPLE) + add_custom_target(darwin-bundle + ${CMAKE_MAKE_PROGRAM} -C ${BUILD_DIR}/aliceVision_build darwin-bundle + ) + endif() endif() diff --git a/src/cmake/Helpers.cmake b/src/cmake/Helpers.cmake index f214fed08b..1c1739f30e 100644 --- a/src/cmake/Helpers.cmake +++ b/src/cmake/Helpers.cmake @@ -128,6 +128,9 @@ function(alicevision_add_library library_name) ) endif() + # Add to global target list + set(ALICEVISION_GLOBAL_TARGET_LIST "${ALICEVISION_GLOBAL_TARGET_LIST};${library_name}" CACHE INTERNAL "Global list of all AliceVision targets enabled") + install(TARGETS ${library_name} EXPORT aliceVision-targets ARCHIVE @@ -255,6 +258,9 @@ function(alicevision_add_software software_name) VERSION "${ALICEVISION_SOFTWARE_VERSION_MAJOR}_${ALICEVISION_SOFTWARE_VERSION_MINOR}" ) + # Add to global target list + set(ALICEVISION_GLOBAL_TARGET_LIST "${ALICEVISION_GLOBAL_TARGET_LIST};${software_name}_exe" CACHE INTERNAL "Global list of all AliceVision targets enabled") + install(TARGETS ${software_name}_exe RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR} diff --git a/src/cmake/darwin_bundle.py b/src/cmake/darwin_bundle.py new file mode 100644 index 0000000000..98791878b6 --- /dev/null +++ b/src/cmake/darwin_bundle.py @@ -0,0 +1,560 @@ +# +# Darwin Bundle +# +# Creates a standalone AliceVision bundle for use in Meshroom for Apple targets +# +# Usage: python3 darwin_bundle.py [-o ] ... +# + +import argparse +import shutil +import subprocess + +from functools import partial +from multiprocessing import Pool, cpu_count, Manager +from multiprocessing.managers import DictProxy +from pathlib import Path +from re import sub +from typing import Optional + +TARGET_RPATHS: list[str] = [ + "@executable_path", + "@executable_path/../lib", + "@loader_path", + "@loader_path/../lib", +] + + +# Returns a tuple of +# (1) The name of the Mach-O +# (2) The required dependencies +# (3) The available rpaths +def get_deps_and_rpaths(macho: Path) -> tuple[Path, list[Path], list[Path]]: + # If Framework, we need to check the inner Mach-O + oldPath = macho + if "framework" in macho.suffix: + macho = macho.joinpath(macho.stem) + deps: list[Path] = [] + rpaths: list[Path] = [] + depsCmd = subprocess.run( + ["otool", "-L", macho], universal_newlines=True, stdout=subprocess.PIPE + ) + depsLines = depsCmd.stdout.splitlines() + for line in depsLines[ + 2: + ]: # Skip the first line (just info) and the second line (always denotes itself) + deps.append( + Path(sub(r"\(compatibility version [^)]+\)", "", line.strip()).strip()) + ) # Remove the compatibility stuff + + rpathCmd = subprocess.run( + ["otool", "-l", macho], universal_newlines=True, stdout=subprocess.PIPE + ) + rpathLines = iter(rpathCmd.stdout.splitlines()) + for line in rpathLines: + if "LC_RPATH" in line.strip(): + _ = next(rpathLines, None) # This is "cmdsize XX" + rpaths.append( + Path( + sub( + r"\(offset \d+\)", + "", + next(rpathLines, "").strip().removeprefix("path "), + ).strip() + ) + ) # This is the rpath + + return ( + oldPath, + deps, + rpaths, + ) # Return the old path, so we still have .framework (not .framework/Mach-O) + + +# Extracts the architectures of a Mach-O file +def get_archs(path: Path) -> set[str]: + if ".framework" in path.suffix: + path = path.joinpath(path.stem) + result = subprocess.run( + ["lipo", "-archs", str(path)], + universal_newlines=True, + text=True, + stdout=subprocess.PIPE, + stderr=subprocess.DEVNULL, + ) + return set(result.stdout.strip().split()) + + +# Checks if lh and rh share the same architecture +def check_arch_match(lh: Path, rh: Path) -> bool: + lh_archs = get_archs(lh) + rh_archs = get_archs(rh) + # True if either is universal (more than one arch) + # or if they share at least one arch + if len(lh_archs) > 1 or len(rh_archs) > 1: + return True + return not lh_archs.isdisjoint(rh_archs) + + +# Returns a tuple of +# (1) Whether op was successful +# (2) Resolved paths +# (3) Tuple of reasons and unresolved paths (must be empty on success) +# (4) Tuple of conflicting paths per dependency +# (5) The resolved rpaths to pass through to subdependencies +def try_and_match_deps( + input: tuple[Path, list[Path], list[Path]], + globalCache, + additionalLookupPaths: Optional[set[Path]] = None, +) -> tuple[ + bool, + list[Path], + list[tuple[list[str], Path]], + list[tuple[list[Path], Path]], + set[Path], +]: + possibleReasonsForErr: list[str] = [] + + # If the input file has no extension, we assume it is an executable and resolve any @executable_paths + isExecutable = False + if input[0].suffix == "": + isExecutable = True + + # We properly create paths from @executable_path (if possible), @loader_path and relative paths + for i, rpath in enumerate(input[2]): + if "@executable_path" in rpath.parts: + if isExecutable: + input[2][i] = ( + input[0].parent.parent.joinpath(Path(*rpath.parts[1:])).resolve() + ) + else: + possibleReasonsForErr.append( + f"Could not resolve {rpath}! Input file is not an executable: {input[0]}." + ) + elif "@loader_path" in rpath.parts: + input[2][i] = input[0].parent.joinpath(Path(*rpath.parts[1:])).resolve() + elif not rpath.is_absolute(): + input[2][i] = input[0].parent.joinpath(rpath).resolve() + + # Remove duplicates + uniqueLookupPaths = set(input[2]) + # Add additonal lookup paths from parents + if additionalLookupPaths: + uniqueLookupPaths.update(additionalLookupPaths) + + # Try to resolve the required libraries with the available rpaths + resolvedPaths: list[Path] = [] + unresolvedPaths: list[tuple[list[str], Path]] = [] + conflicitingPaths: list[tuple[list[Path], Path]] = [] + for dep in input[1]: + isCached = False + for macho in globalCache.items(): + if dep.stem == macho[0].stem and get_archs(input[0]).issubset(macho[1]): + isCached = True + break + if isCached: + continue + # Filter system libraries + if "/usr/lib" in str(dep) or "/System" in str(dep): + continue + # If not @rpath prefixed, check if absolute + elif "@rpath" not in dep.parts and dep.is_absolute(): + if dep.exists(): + if check_arch_match(input[0], dep): + resolvedPaths.append(dep) + continue + else: + print( + f"[ WARN ] Resolved dependency exists, but the architectures do not match: Dependant: {input[0]}, resolved dependency: {dep}." + ) + continue + else: + unresolvedPaths.append( + (["Absolute path of dependency does not exist!"], dep) + ) + continue + elif "@rpath" in dep.parts: + depRpathStripped = Path(*dep.parts[1:]) + resolvedPathsInner: list[Path] = [] + for lookupPath in uniqueLookupPaths: + if lookupPath.joinpath(depRpathStripped).exists(): + if check_arch_match( + input[0], lookupPath.joinpath(depRpathStripped) + ): + resolvedPathsInner.append(lookupPath.joinpath(depRpathStripped)) + continue + else: + print( + f"[ WARN ] Resolved dependency exists, but the architectures do not match: Dependant: {input[0]}, resolved dependency: {lookupPath.joinpath(depRpathStripped)}." + ) + continue + if len(resolvedPathsInner) == 0: + reasons: list[str] = [ + f"No exctracted rpaths were able to resolve the dependency! Required by: {input[0]}." + ] + if not isExecutable: + reasons += possibleReasonsForErr + unresolvedPaths.append((reasons, dep)) + elif len(resolvedPathsInner) > 1: + conflicitingPaths.append((resolvedPathsInner, dep)) + resolvedPaths.append(resolvedPathsInner[0]) + else: + resolvedPaths += resolvedPathsInner + continue + elif not dep.is_absolute(): + resolvedPathsInner: list[Path] = [] + for lookupPath in uniqueLookupPaths: + if lookupPath.joinpath(dep).exists(): + if check_arch_match(input[0], lookupPath.joinpath(dep)): + resolvedPathsInner.append(lookupPath.joinpath(dep)) + continue + else: + print( + f"[ WARN ] Resolved dependency exists, but the architectures do not match: Dependant: {input[0]}, resolved dependency: {lookupPath.joinpath(dep)}." + ) + continue + if len(resolvedPathsInner) == 0: + unresolvedPaths.append( + ( + [ + "The relative path of the dependency did not resolve to an existing dependency!" + ], + dep, + ) + ) + elif len(resolvedPathsInner) > 1: + conflicitingPaths.append((resolvedPathsInner, dep)) + resolvedPaths.append(resolvedPathsInner[0]) + else: + resolvedPaths += resolvedPathsInner + continue + else: + unresolvedPaths.append( + (["Encountered unknown dependency path scheme!"], dep) + ) + + for resolvedPath in resolvedPaths: + globalCache[resolvedPath] = get_archs(resolvedPath) + return ( + len(unresolvedPaths) == 0, + resolvedPaths, + unresolvedPaths, + conflicitingPaths, + uniqueLookupPaths, + ) + + +def traverse_deps_and_resolve( + input: tuple[Path, list[Path], list[Path]], globalCache +) -> tuple[ + bool, list[Path], list[tuple[list[str], Path]], list[tuple[list[Path], Path]] +]: + # Initial state + successTop, resolvedTop, unresolvedTop, conflictingTop, lookupPathsCombined = ( + try_and_match_deps(input, globalCache) + ) + + # Use sets for quick membership tests + resolved_set = set(resolvedTop) + processed: set[Path] = set() # things we have already pulled deps for + queue: list[Path] = list(resolvedTop) # things we still need to process + + # BFS-style traversal through dependencies + while queue: + subDep = queue.pop(0) + if subDep in processed: + continue + processed.add(subDep) + + ok, resolved, unresolved, conflicting, uniqueLookupPaths = try_and_match_deps( + get_deps_and_rpaths(subDep), globalCache, lookupPathsCombined + ) + + # Update overall success flag + if not ok: + successTop = False + + # Update lookup paths + lookupPathsCombined.update(uniqueLookupPaths) + + # Add newly resolved dependencies: + for newDep in resolved: + if newDep not in resolved_set: + resolved_set.add(newDep) + resolvedTop.append(newDep) + queue.append(newDep) + + # Update unresolved and conflicting lists + unresolvedTop += unresolved + conflictingTop += conflicting + + return (successTop, resolvedTop, unresolvedTop, conflictingTop) + + +def copy_safe(src: Path, dst_dir: Path) -> Path: + dst = dst_dir / src.name + + if dst.exists(): + return dst + + if src.is_dir(): + _ = shutil.copytree( + src, + dst, + symlinks=True, + ) + elif src.is_symlink(): + # Resolve the target of the symlink + target = src.resolve() + # Copy the target file first + if not (dst_dir / target.name).exists(): + _ = shutil.copy2(target, dst_dir / target.name) + # Recreate the symlink in the target dir + dst.symlink_to(target.name) + else: + # Regular file + _ = shutil.copy2(src, dst) + + return dst + + +def fixup_macho_with_predefined_rpaths(macho: Path) -> bool: + success = True + + # Special case Framework + if macho.suffix == ".framework": + machoInner = macho.joinpath(macho.stem) + else: + machoInner = macho + + rpaths: set[Path] = set() + # Extract existing rpaths + rpathCmd = subprocess.run( + ["otool", "-l", machoInner], + universal_newlines=True, + stdout=subprocess.PIPE, + stderr=subprocess.DEVNULL, + ) + rpathLines = iter(rpathCmd.stdout.splitlines()) + for line in rpathLines: + if "LC_RPATH" in line.strip(): + _ = next(rpathLines, None) # This is "cmdsize XX" + rpaths.add( + Path( + sub( + r"\(offset \d+\)", + "", + next(rpathLines, "").strip().removeprefix("path "), + ).strip() + ) + ) # This is the rpath + + # Remove existing rpaths + for rpath in rpaths: + _ = subprocess.run( + ["install_name_tool", "-delete_rpath", rpath, machoInner], + universal_newlines=True, + stdout=subprocess.DEVNULL, + stderr=subprocess.DEVNULL, + ) + + # Add predefined set of target rpaths + for rpath in TARGET_RPATHS: + _ = subprocess.run( + ["install_name_tool", "-add_rpath", rpath, machoInner], + universal_newlines=True, + stdout=subprocess.DEVNULL, + stderr=subprocess.DEVNULL, + ) + + # Re-sign the binary with ad-hoc signature + # Otherwise it will be terminated by SIGABRT + _ = subprocess.run( + ["codesign", "--force", "--deep", "--sign", "-", macho], + universal_newlines=True, + stdout=subprocess.DEVNULL, + stderr=subprocess.DEVNULL, + ) + + return success + + +def entry(): + # Create the parser + parser = argparse.ArgumentParser( + description="Make a self-contained AliceVision bundle on Darwin" + ) + + # Optional output directory + _ = parser.add_argument( + "-o", + "--output", + type=Path, + default=Path.cwd().joinpath("bundle"), + help="Output directory", + ) + + # Positional arguments: arbitrary number of file paths + _ = parser.add_argument( + "input_files", type=Path, nargs="+", help="Input files to process" + ) + + # Parse args + args = parser.parse_args() + + # Access them + outputDir: Path = args.output + inputFiles: list[Path] = args.input_files + + # Log Info about input + print(f"[ INFO ] Placing bundle at: {outputDir}.") + print("[ INFO ] Attempting for files:") + for inputFile in inputFiles: + print("[ INFO ] \t" + str(inputFile)) + print("\n[ INFO ] ### Patience... ###\n") + + # Get all binaries + inputMachOBins: list[Path] = [] + for file in inputFiles: + fileCmd = subprocess.run( + ["file", file], universal_newlines=True, stdout=subprocess.PIPE + ) + if "Mach-O" in fileCmd.stdout: + inputMachOBins.append(file) + + # Get all libraries and Frameworks + inputMachOLibs: list[Path] = [] + for file in inputFiles: + if "dylib" in file.suffix: + fileCmd = subprocess.run( + ["file", file], universal_newlines=True, stdout=subprocess.PIPE + ) + if "Mach-O" in fileCmd.stdout: + inputMachOLibs.append(file) + elif "framework" in file.suffix: + fileCmd = subprocess.run( + ["file", file.joinpath(file.stem)], + universal_newlines=True, + stdout=subprocess.PIPE, + ) + if "Mach-O" in fileCmd.stdout: + inputMachOLibs.append(file) + else: + continue + + # Create concurrent interpreters + concurrentPool = Pool(cpu_count()) + + print("[ INFO ] (1 / 5) Looking up required dependencies and embedded rpaths...") + + # Extract required dependencies and rpaths + depsAndRpathsPerInput: list[tuple[Path, list[Path], list[Path]]] = ( + concurrentPool.map(get_deps_and_rpaths, inputMachOLibs + inputMachOBins) + ) + + print("[ INFO ] (2 / 5) Attempting to resolve all dependencies...") + + # Resolve per input and subdependency + manager = Manager() + globalCache: DictProxy[Path, set[str]] = manager.dict() + resolveFunc = partial(traverse_deps_and_resolve, globalCache=globalCache) + resolverResultPerInput: list[ + tuple[ + bool, + list[Path], + list[tuple[list[str], Path]], + list[tuple[list[Path], Path]], + ] + ] = concurrentPool.map(resolveFunc, depsAndRpathsPerInput) + + # Check for Results + isSuccessful = True + for result in resolverResultPerInput: + if not result[0]: + print("[ ERROR ] An error occured during the resolving process:") + for unresolved in result[2]: + print( + f"[ ERROR ] \tDependency: {str(unresolved[1])}, failed with: {str(unresolved[0])}" + ) + isSuccessful = False + if len(result[3]) != 0: + for conflictingDep in result[3]: + print( + f"[ WARN ] Multiple paths were found to resolve {str(conflictingDep[1])}:" + ) + for conflictingPath in conflictingDep[0]: + print(f"[ WARN ] \tFound suitable: {str(conflictingPath)}") + + if not isSuccessful: + print("[ ERROR ] Errors occured! Refusing to build bundle.") + exit(-1) + + print("[ INFO ] (3 / 5) Making bundle structure...") + + # Create the output directory + shutil.rmtree(outputDir, ignore_errors=True) + outputDir.mkdir(parents=True, exist_ok=True) + (outputDir / "lib").mkdir(parents=True, exist_ok=True) + (outputDir / "bin").mkdir(parents=True, exist_ok=True) + + # Copy input files + # Determine if they are dylibs/Frameworks or executables + inputBins: set[Path] = set() + inputLibs: set[Path] = set() + for inputFile in inputFiles: + if inputFile.suffix == "": + inputBins.add(inputFile) + else: + inputLibs.add(inputFile) + + # Copy bins + dstBin = partial(copy_safe, dst_dir=outputDir / "bin") + destBins = set(list(concurrentPool.map(dstBin, inputBins))) + + # Copy libs + dstLib = partial(copy_safe, dst_dir=outputDir / "lib") + destLibs = set(list(concurrentPool.map(dstLib, inputLibs))) + + print("[ INFO ] (4 / 5) Copying required files...") + + # Create set for files to copy + filesToCopy: set[Path] = set() + # Must handle special cases of Frameworks + for result in resolverResultPerInput: + for resolvedPath in result[1]: + if resolvedPath.suffix == "": + # We want to get the actual .framework folder. + # Means we call parent until the suffix is .framework + frameworkFolder = resolvedPath + while not frameworkFolder.suffix == ".framework": + frameworkFolder = frameworkFolder.parent + filesToCopy.add(frameworkFolder) + else: + filesToCopy.add(resolvedPath) + + # Copy all into new bundle + dstResolvedLibs = set(list(concurrentPool.map(dstLib, filesToCopy))) + + # Create destination list + allDstFiles = dstResolvedLibs.union(destLibs).union(destBins) + + print("[ INFO ] (5 / 5) Fixing up copied files...") + + # Fixup all destination files + successList: list[bool] = concurrentPool.map( + fixup_macho_with_predefined_rpaths, allDstFiles + ) + + # Done + if False in successList: + print("[ ERROR ] Errors occured during fixup. Bundle will be unfunctional.") + exit(-1) + else: + print( + f"\n[ INFO ] ### Successfully created self-contained bundle at {outputDir.resolve()}. ###" + ) + + +# Only launch when called directly +if __name__ == "__main__": + entry() From 649b74a1b30e598596e9899749a52c15915bb7a1 Mon Sep 17 00:00:00 2001 From: Philipp Remy Date: Thu, 9 Oct 2025 17:26:21 +0200 Subject: [PATCH 39/40] PATCH: Add build instructions for macOS Due to the additional requirements and the unique build process of this platform (cross-compiling, environment, ...) move the build instructions to a custom file. This commit also introduces a list of available target architectures in the OFA system, available at src/cmake/OFA/SupportedArchitectures.md. Signed-off-by: Philipp Remy --- INSTALL.md | 21 +-- INSTALL_macOS.md | 176 ++++++++++++++++++++++++ src/cmake/OFA/SupportedArchitectures.md | 138 +++++++++++++++++++ 3 files changed, 316 insertions(+), 19 deletions(-) create mode 100644 INSTALL_macOS.md create mode 100644 src/cmake/OFA/SupportedArchitectures.md diff --git a/INSTALL.md b/INSTALL.md index ce81658f95..6590522fa0 100644 --- a/INSTALL.md +++ b/INSTALL.md @@ -1,5 +1,7 @@ # AliceVision +For specific build instructions for macOS see [INSTALL_macOS.md](./INSTALL_macOS.md). + ## Build instructions Required tools: @@ -357,25 +359,6 @@ make test * Change the target to Release. * Compile the libraries and binaries samples. - -### Mac OSX compilation - -```bash -git clone --recursive https://github.com/alicevision/AliceVision.git -mkdir build && cd build -cmake -DCMAKE_BUILD_TYPE=Release -G "Xcode" ../AliceVision -``` - -If you want to enable the build of the unit tests: -```bash -cmake -DCMAKE_BUILD_TYPE=Release \ - -DALICEVISION_BUILD_TESTS=ON \ - -G "Xcode" \ - ../AliceVision -xcodebuild -configuration Release -``` - - ## Using AliceVision as a third party library dependency in CMake AliceVision can be used as a third party library once it has been installed. diff --git a/INSTALL_macOS.md b/INSTALL_macOS.md new file mode 100644 index 0000000000..9a00586801 --- /dev/null +++ b/INSTALL_macOS.md @@ -0,0 +1,176 @@ +# Building the project on macOS + +Make sure to read the whole document at least *once* before compiling the project! If you find any bugs in the build infrastructure, consider opening a bug report on the [GitHub Issue page](https://github.com/alicevision/AliceVision/issues). + +## Platform Support + +AliceVision is supported for both `arm64` and `x86_64` based Macs. Cross-compiling between the two architectures is supported (see following remarks and the build instructions for cross-compilations below). + +Note that support for `x86_64` based Macs might be removed at some point in the future, as Apple is slowly reaching EOL for Intel Macs. + +Any CUDA-related functionality is disabled and unavailable on macOS. NVIDIA has long stopped shipping the CUDA toolkit for macOS and using a Mac compatible with one of the last suppported cards is too unlikely to even try hacking in Apple+CUDA support into the project. This currently implies the following: + +- PopSIFT is completely unavailable and attempting to build it will cause a CMake error. +- CCTag won't use its CUDA implementation and instead uses the CPU backend. +- ONNX Runtime with GPU support is unavailable. +- The DepthMap library from AliceVision (and therefore all its dependants) are unavailable due to them being implemented only in CUDA. This might change in the future if new DepthMap backends are added. + +## Building from Source + +Building on macOS has a few important remarks: + +- It is *highly* recommended to build with `ALICEVISION_BUILD_DEPENDENCIES=ON`. This ensures that all required dependencies are available and are fulfilling API and ABI requirements. Relying on dependencies provided by a package mangager is not supported by the project (see the following note). + +- It is *highly* recommended to have a clean build environment when compiling AliceVision for the host architecture (only the required dependencies and tools should be installed). Due to the amount of dependencies involved we cannot guarantee that package managers are always shipping the required components and that the versions available are API and ABI compatible[^1]. While it might be harmless for one user, we cannot test every possible combination and must therefore declare a build with package-manager dependencies _unsupported_. + +- It is *mandatory* to have a clean build system when you are cross-compiling from `arm64` to `x86_64` and vice-versa. We simply cannot force any subprojects and dependencies to not look for libraries provided by your package manager[^2], which *will* cause linker errors in the build process[^3]. + +- It is *mandatory* to have a clean build system if you want to compile a package that is redistributable (including all dependencies). This ensures that there are no external dependencies which one subproject silently pulled in. The danger in this case stems from the fact that this could go completely unnoticed, because on the build machine all dependencies are available and the package therefore 'just works'™[^4]. + +- Besides some external build tools, the project can build *all* required dependencies from source. + +- At the time the project *only* supports building on macOS with the `Unix Makefiles` CMake generator (this is the default). + +- By default, the project optimizes the build for the CPU architecture of the build machine. This is usually desirable to ensure proper usage of SIMD instructions. If you need broder support, especially for redistribution, consider specifiying the target architecture when configuring the project with CMake: If you want to disable optimization completely, pass `TARGET_ARCHITECTURE=none`. In the same manner you can explicitly set the architecture you want to optimize for[^5]. You can enable verbose output of this process by passing `OFA_VERBOSE=ON` on the CMake CLI. Only change this if you know what you are doing: Tampering with these options will produce binaries that in the worst case will fault at runtime and in the best case are many times slower than expected. + +- For some other Apple-specific CMake options see this section: [CMake Options for Apple](#cmake-options-for-apple). + +## Required external tools + +These differ slightly depending on the target architecture you want to build for. Note that if you are cross-compiling, you still need the additional tools for the target architecture. As these are only build-time tools, they don't have to be compiled *for* the target architecture, they only need to be available on your `$PATH` on the build machine. + +### arm64 (Apple Silicon Macs, M-CPUs) + +- [x] A working C/C++ compiler with C++20 support (e.g., Xcode/Xcode Command Line Tools)[^6] +- [x] cmake >= 3.25 but < 4.0 (some dependencies do not support CMake 4) +- [x] make (included in Xcode/Xcode Command Line Tools) +- [x] autoconf (Homebrew, MacPorts, Nix) +- [x] automake (Homebrew, MacPorts, Nix) +- [x] pkgconfig (Homebrew, MacPorts, Nix) +- [x] gettext (Homebrew, MacPorts, Nix) +- [x] m4 (Homebrew, MacPorts, Nix) +- [x] BISON[^7] (Homebrew, MacPorts, Nix) +- [x] NumPy[^8] (Homebrew, pip) + +### x86_64 (Intel Macs, Core-i-CPUs) + +- [x] ALL FROM ABOVE +- [x] nasm[^9] (Homwbrew, MacPorts, Nix) + +## Build Instructions for native compilation + +1. Create a build directory (in-source builds are unsupported): + +`mkdir build && cd build` + +2. Configure the project from the build directory: + ```bash + cmake \ + -DCMAKE_BUILD_TYPE= \ + -DCMAKE_INSTALL_PREFIX= \ # Omit this if you want a system-wide install + -DALICEVISION_BUILD_DEPENDENCIES=ON \ # Mandatory for Apple targets + -DAV_BUILD_DEPENDENCIES_PARALLEL= \ # Setting this to 0 will use all threads available + ... \ # To enable or disable other options + # Can be relative and will usually just be '..' + ``` + +3. Start build: + +You might see *a lot* of warning messages, especially from the embedded dependencies' build process. This is expected and as long as no errors occur, you shouldn't care. + +`make # Note the missing -j option: Omit it to avoid build issues. This is handled internally by AV_BUILD_DEPENDENCIES_PARALLEL.` + +4. Install project: + +`(sudo) make install # Use sudo for a system-wide install` + +[OPTIONAL: 5. Create a bundle] + +This target creates a self-contained bundle (i.e., a folder containing a `lib` and a `bin` folder, with no external dependencies besides any system libraries/Frameworks)[^9]. Any additional required resources (e.g., `share` folder) must be copied manually. This is mainly useful if you want to create a redistributable bundle, especially for use in Meshroom. + +`(sudo) make darwin-bundle # Use sudo if the bundle should be created system-wide` + +## Build Instructions for cross compilation + +Enabling cross-compilation is done by setting `CMAKE_OSX_ARCHITECTURES` on the CMake CLI to *either* `arm64` *or* `x86_64`. Compiling universal binaries is *not* suppported at this point. +When cross-compiling, the Optimize-For-Architecture logic will set some reasonable defaults: +- For `arm64`: The default target architecture will be `apple-m1`, making the resulting binaries compatible with all Apple Silicon Macs. +- For `x86_64`: The default target architecture will be `skylake`, making the resulting binaries compatible with all Intel Macs not older than 2015. +If you want to target a different machine, consider setting `TARGET_ARCHITECTURE` on the CMake CLI to a supported value[^5]. + +1. Create a build directory (in-source builds are unsupported): + +`mkdir build-cross && cd build-cross` + +2. Configure the project from the build directory: + ```bash + cmake \ + -DCMAKE_BUILD_TYPE= \ + -DCMAKE_OSX_ARCHITECTURES= \ # Sets the target architecture + -DCMAKE_INSTALL_PREFIX= \ # Omit this if you want a system-wide install + -DALICEVISION_BUILD_DEPENDENCIES=ON \ # Mandatory for Apple targets + -DAV_BUILD_DEPENDENCIES_PARALLEL= \ # Setting this to 0 will use all threads available + ... \ # To enable or disable other options + # Can be relative and will usually just be '..' + ``` + +3. Start build: + +You might see *a lot* of warning messages, especially from the embedded dependencies' build process. This is expected and as long as no errors occur, you shouldn't care. + +`make # Note the missing -j option: Omit it to avoid build issues. This is handled internally by AV_BUILD_DEPENDENCIES_PARALLEL.` + +4. Install project: + +`(sudo) make install # Use sudo for a system-wide install` + +[OPTIONAL: 5. Create a bundle] + +This target creates a self-contained bundle (i.e., a folder containing a `lib` and a `bin` folder, with no external dependencies besides any system libraries/Frameworks)[^10]. Any additional required resources (e.g., `share` folder) must be copied manually. This is mainly useful if you want to create a redistributable bundle, especially for use in Meshroom. + +`(sudo) make darwin-bundle # Use sudo if the bundle should be created system-wide` + +## CMake Options for Apple + +These are some influential CMake options specific to Apple: + +| Option | Description | Available Values | Default Value | +| --------- | ----------- | ---------------- | ------------- | +| `CMAKE_OSX_ARCHITECTURES` | Sets the target architecture to compile for | Either `arm64` or `x86_64` | `${CMAKE_HOST_SYSTEM_PROCESSOR}` | +| `ALICEVISION_USE_RPATH` | Whether to use @rpath instead of absolute paths for resolving dependencies (highly recommended) | `ON` / `OFF` | `ON` | +| `BUILD_APPLE_FRAMEWORKS` | Whether to build Framework bundles instead of plain dynamic libraries | `ON` / `OFF` | `ON` | +| `AV_ONNX_APPLE_ARCH` | What architecture to download for the ONNX Runtime (only active if `AV_BUILD_ONNXRUNTIME=ON`) | Either `arm64` or `x86_64` | `${CMAKE_OSX_ARCHITECTURES}` | +| `AV_BUILD_OPENMP` | Whether to build an embedded OpenMP (only active if `ALICEVISION_BUILD_DEPENDENCIES=ON`, highly recommended when using AppleClang) | `ON` / `OFF` | `ON` | +| `AV_BUILD_LAPACK` | Whether to build an embedded BLAS/LAPACK (not recommended, Apple provides it through `Accelerate.framework`, requires a Fortran compiler to be available on `$PATH`) | `ON` / `OFF` | `OFF` | +| `AV_BUILD_SUITESPARSE` | Whether to build an embedded Suitesparse (not recommended, Apple provides an equivalent for Sparse Solvers through `Accelerate.framework`, will massively increase final bundle size) | `ON` / `OFF` | `OFF` | +| `AV_BUILD_ZLIB` | Whether to build an embedded zlib (might be needed for older versions of macOS, especially when redestributing) | `ON` / `OFF` | `OFF` | +| `AV_BUILD_PCL` | Whether to build an embedded PointCloudLibrary (only required if you plan to build an embedded USD and use the `aliceVision_exportUSD` software) | `ON` / `OFF` | `OFF` | +| `AV_BUILD_USD` | Whether to build an embedded UniversalSceneDescription library (only required if you plan to use the `aliceVision_exportUSD` software) | `ON` / `OFF` | `OFF` | +| `ALICEVISION_REQUIRE_CERES_WITH_ACCELERATESPARSE` | Whether to require the Ceres dependency to be built with `AccelerateSparse`/`Accelerate.framework` (highly recommended to match SuiteSparse speeds) | `ON` / `OFF` | `ON` | +| `ALICEVISION_BUNDLE_PREFIX` | Where to place the bundle created by `make darwin-bundle` | Any path | `${DCMAKE_INSTALL_PREFIX}/bundle` | + +[^1]: If you see linker errors, this should be the very first thing to check! Do the headers match the package of the library that is linked in? Are any non-project dependencies included from package manager directories (e.g., `/opt/homebrew`, `/usr/local`, `/opt/local`)? If so, clean your *whole* build folder, remove the offending packages and try again. + +[^2]: Take a look at `CMAKE_IGNORE_PATH` for example: While we can pass that to dependencies, it only affects `find_package()` calls that use `CONFIG` mode. If a subproject provides its own `Find-X.cmake` module, there is no way for us to exclude certain prefixes that could cause trouble. + +[^3]: If CMake would actually check for architecture compatibility in the configure step, we could just emit a nice and clear error to give some hint to the user. But as this is not the case, CMake will happily accept *any* architecture and only the final link step will tell that there was an architecture mismatch (if you can make out the one line in between the thousand lines of messages). + +[^4]: When being transferred to a different machine there are essentially three ways this could go: + (1) The other machine has all and compatible dependencies in the right location: It works. Lucky you. + (2) The other machine has all but non-compatible dependencies in the right location: It might work, if they are ABI and API compatible. If not, the worst case would be a SIGSEGV or a SIGABRT and you have no idea by what or why they were caused. Maybe `dyld` will complain about missing libraries, if it cannot find the correct version (similar to (3)). + (3) The other machine is missing dependencies or they are in the wrong location: This might be the best case scenario, because the error will be relatively clear: You will see something like `dyld: Library not loaded: . Referenced from: . Reason: tried `. + +[^5]: For a list of supported values see: [Supported Architectures](src/cmake/OFA/SupportedArchitectures.md). + +[^6]: WARNING: Note that *any* package-manager provided LLVM/Clang is currently unable to build the Boost libraries with `b2` (see [this issue](https://github.com/Homebrew/homebrew-core/issues/235411)). That would not be a problem if we could use the CMake build system for Boost, but we can't do that because CCTag depends on `Boost::math_c99`, which the CMake build system *cannot build at all* :^(. Even worse, the required `boostrap.sh` for Boost does not allow overriding the C++ compiler or adding the required LDFLAGS to fix this (e.g., by using `CXX`, `CXXFLAGS` or `LDFLAGS`). So AppleClang is practically the only option right now; you need to `completely` uninstall external LLVM/Clang packages. Work on removing the `Boost::math_c99` in CCTag is WIP. Once lifted, this warning does no longer apply. + +[^7]: This is only required if you intend to build SWIG from source. It requires a BISON newer than the one provided by Apple and therefore the external binary must be on your `$PATH` *first*. Look at the documentation of your shell on how to do this. + +[^8]: This is only required if you plan to build the AliceVision SWIG bindings. + +[^9]: Alternatively you can also use `yasm`. These are mainly required by libVPX and ffmpeg. + +[^10]: The underlying Python script performs the following steps: + (1) Extracts all required dependencies and available rpaths + (2) Recursively checks if all dependencies can be resolved (and if the architectures match) on a per-file basis + (3) Copies all input files and resolved dependencies into the respective folders diff --git a/src/cmake/OFA/SupportedArchitectures.md b/src/cmake/OFA/SupportedArchitectures.md new file mode 100644 index 0000000000..dad7c87003 --- /dev/null +++ b/src/cmake/OFA/SupportedArchitectures.md @@ -0,0 +1,138 @@ +# Supported Architectures + +This document lists all supported target architectures that can be specified by setting `TARGET_ARCHITECTURE` on the CMake CLI. Only use this explicitly if you know what you are doing! + +## x86/x86_64 + +| Vendor | Codename / CPU Microarchitecture | Family | Name | CMake Flag | +|:--|:--|:--|:--|:--| +| Intel | Core | x86 / x86_64 | core | `TARGET_ARCHITECTURE=core` | +| Intel | Core 2 | x86 / x86_64 | core2 | `TARGET_ARCHITECTURE=core2` | +| Intel | Merom (65nm Core2) | x86 / x86_64 | merom | `TARGET_ARCHITECTURE=merom` | +| Intel | Penryn (45nm Core2) | x86 / x86_64 | penryn | `TARGET_ARCHITECTURE=penryn` | +| Intel | Nehalem | x86 / x86_64 | nehalem | `TARGET_ARCHITECTURE=nehalem` | +| Intel | Westmere | x86 / x86_64 | westmere | `TARGET_ARCHITECTURE=westmere` | +| Intel | Sandy Bridge | x86 / x86_64 | sandybridge | `TARGET_ARCHITECTURE=sandybridge` | +| Intel | Ivy Bridge | x86 / x86_64 | ivybridge | `TARGET_ARCHITECTURE=ivybridge` | +| Intel | Haswell | x86 / x86_64 | haswell | `TARGET_ARCHITECTURE=haswell` | +| Intel | Broadwell | x86 / x86_64 | broadwell | `TARGET_ARCHITECTURE=broadwell` | +| Intel | Skylake | x86 / x86_64 | skylake | `TARGET_ARCHITECTURE=skylake` | +| Intel | Skylake-X (Xeon) | x86 / x86_64 | skylake-xeon | `TARGET_ARCHITECTURE=skylake-xeon` | +| Intel | Kaby Lake | x86 / x86_64 | kabylake | `TARGET_ARCHITECTURE=kabylake` | +| Intel | Cannon Lake | x86 / x86_64 | cannonlake | `TARGET_ARCHITECTURE=cannonlake` | +| Intel | Cascade Lake | x86 / x86_64 | cascadelake | `TARGET_ARCHITECTURE=cascadelake` | +| Intel | Cooper Lake | x86 / x86_64 | cooperlake | `TARGET_ARCHITECTURE=cooperlake` | +| Intel | Ice Lake | x86 / x86_64 | icelake | `TARGET_ARCHITECTURE=icelake` | +| Intel | Ice Lake Xeon | x86 / x86_64 | icelake-xeon | `TARGET_ARCHITECTURE=icelake-xeon` | +| Intel | Tiger Lake | x86 / x86_64 | tigerlake | `TARGET_ARCHITECTURE=tigerlake` | +| Intel | Alder Lake | x86 / x86_64 | alderlake | `TARGET_ARCHITECTURE=alderlake` | +| Intel | Sapphire Rapids | x86 / x86_64 | sapphirerapids | `TARGET_ARCHITECTURE=sapphirerapids` | +| Intel | Rocket Lake | x86 / x86_64 | rocketlake | `TARGET_ARCHITECTURE=rocketlake` | +| Intel | Raptor Lake | x86 / x86_64 | raptorlake | `TARGET_ARCHITECTURE=raptorlake` | +| Intel | Bonnell | x86 / x86_64 | bonnell | `TARGET_ARCHITECTURE=bonnell` | +| Intel | Silvermont | x86 / x86_64 | silvermont | `TARGET_ARCHITECTURE=silvermont` | +| Intel | Goldmont | x86 / x86_64 | goldmont | `TARGET_ARCHITECTURE=goldmont` | +| Intel | Goldmont Plus | x86 / x86_64 | goldmont-plus | `TARGET_ARCHITECTURE=goldmont-plus` | +| Intel | Tremont | x86 / x86_64 | tremont | `TARGET_ARCHITECTURE=tremont` | +| Intel | Knights Landing | x86 / x86_64 | knl | `TARGET_ARCHITECTURE=knl` | +| Intel | Knights Mill | x86 / x86_64 | knm | `TARGET_ARCHITECTURE=knm` | +| Intel | Atom (generic) | x86 / x86_64 | atom | `TARGET_ARCHITECTURE=atom` | +| AMD | K8 | x86 / x86_64 | k8 | `TARGET_ARCHITECTURE=k8` | +| AMD | K8 SSE3 | x86 / x86_64 | k8-sse3 | `TARGET_ARCHITECTURE=k8-sse3` | +| AMD | Barcelona | x86 / x86_64 | barcelona | `TARGET_ARCHITECTURE=barcelona` | +| AMD | Istanbul | x86 / x86_64 | istanbul | `TARGET_ARCHITECTURE=istanbul` | +| AMD | Magny-Cours | x86 / x86_64 | magny-cours | `TARGET_ARCHITECTURE=magny-cours` | +| AMD | Bulldozer | x86 / x86_64 | bulldozer | `TARGET_ARCHITECTURE=bulldozer` | +| AMD | Interlagos | x86 / x86_64 | interlagos | `TARGET_ARCHITECTURE=interlagos` | +| AMD | Piledriver | x86 / x86_64 | piledriver | `TARGET_ARCHITECTURE=piledriver` | +| AMD | Steamroller | x86 / x86_64 | steamroller | `TARGET_ARCHITECTURE=steamroller` | +| AMD | Excavator | x86 / x86_64 | excavator | `TARGET_ARCHITECTURE=excavator` | +| AMD | Family 14h | x86 / x86_64 | amd14h | `TARGET_ARCHITECTURE=amd14h` | +| AMD | Family 16h | x86 / x86_64 | amd16h | `TARGET_ARCHITECTURE=amd16h` | +| AMD | Zen | x86 / x86_64 | zen | `TARGET_ARCHITECTURE=zen` | +| AMD | Zen 2 | x86 / x86_64 | zen2 | `TARGET_ARCHITECTURE=zen2` | +| AMD | Zen 3 | x86 / x86_64 | zen3 | `TARGET_ARCHITECTURE=zen3` | +| AMD | Zen 4 | x86 / x86_64 | zen4 | `TARGET_ARCHITECTURE=zen4` | +| Generic | Generic | x86 / x86_64 | generic | `TARGET_ARCHITECTURE=generic` | +| Generic | None (no optimization) | x86 / x86_64 | none | `TARGET_ARCHITECTURE=none` | +| Generic | Auto-detect host CPU | x86 / x86_64 | auto | `TARGET_ARCHITECTURE=auto` | +| Generic | Compiler “native” | x86 / x86_64 | native | `TARGET_ARCHITECTURE=native` | + +## ARM/ARM64 + +| Vendor | Codename / CPU Microarchitecture | Family | Name | CMake Flag | +|:--|:--|:--|:--|:--| +| Fujitsu | A64FX | arm64 | a64fx | `TARGET_ARCHITECTURE=a64fx` | +| Apple | A6 | arm64 | apple-a6 | `TARGET_ARCHITECTURE=apple-a6` | +| Apple | A7 | arm64 | apple-a7 | `TARGET_ARCHITECTURE=apple-a7` | +| Apple | A8 | arm64 | apple-a8 | `TARGET_ARCHITECTURE=apple-a8` | +| Apple | A9 | arm64 | apple-a9 | `TARGET_ARCHITECTURE=apple-a9` | +| Apple | A10 | arm64 | apple-a10 | `TARGET_ARCHITECTURE=apple-a10` | +| Apple | A11 | arm64 | apple-a11 | `TARGET_ARCHITECTURE=apple-a11` | +| Apple | A12 | arm64 | apple-a12 | `TARGET_ARCHITECTURE=apple-a12` | +| Apple | A13 | arm64 | apple-a13 | `TARGET_ARCHITECTURE=apple-a13` | +| Apple | A14 | arm64 | apple-a14 | `TARGET_ARCHITECTURE=apple-a14` | +| Apple | A15 | arm64 | apple-a15 | `TARGET_ARCHITECTURE=apple-a15` | +| Apple | A16 | arm64 | apple-a16 | `TARGET_ARCHITECTURE=apple-a16` | +| Apple | M1 | arm64 | apple-m1 | `TARGET_ARCHITECTURE=apple-m1` | +| Apple | M2 | arm64 | apple-m2 | `TARGET_ARCHITECTURE=apple-m2` | +| Apple | M3 | arm64 | apple-m3 | `TARGET_ARCHITECTURE=apple-m3` | +| Apple | M4 | arm64 | apple-m4 | `TARGET_ARCHITECTURE=apple-m4` | +| ARM | Cortex-A5 | arm / arm64 | cortex-a5 | `TARGET_ARCHITECTURE=cortex-a5` | +| ARM | Cortex-A7 | arm / arm64 | cortex-a7 | `TARGET_ARCHITECTURE=cortex-a7` | +| ARM | Cortex-A8 | arm / arm64 | cortex-a8 | `TARGET_ARCHITECTURE=cortex-a8` | +| ARM | Cortex-A9 | arm / arm64 | cortex-a9 | `TARGET_ARCHITECTURE=cortex-a9` | +| ARM | Cortex-A15 | arm / arm64 | cortex-a15 | `TARGET_ARCHITECTURE=cortex-a15` | +| ARM | Cortex-A17 | arm / arm64 | cortex-a17 | `TARGET_ARCHITECTURE=cortex-a17` | +| ARM | Cortex-A32 | arm / arm64 | cortex-a32 | `TARGET_ARCHITECTURE=cortex-a32` | +| ARM | Cortex-A35 | arm / arm64 | cortex-a35 | `TARGET_ARCHITECTURE=cortex-a35` | +| ARM | Cortex-A53 | arm / arm64 | cortex-a53 | `TARGET_ARCHITECTURE=cortex-a53` | +| ARM | Cortex-A55 | arm / arm64 | cortex-a55 | `TARGET_ARCHITECTURE=cortex-a55` | +| ARM | Cortex-A57 | arm / arm64 | cortex-a57 | `TARGET_ARCHITECTURE=cortex-a57` | +| ARM | Cortex-A72 | arm / arm64 | cortex-a72 | `TARGET_ARCHITECTURE=cortex-a72` | +| ARM | Cortex-A73 | arm / arm64 | cortex-a73 | `TARGET_ARCHITECTURE=cortex-a73` | +| ARM | Cortex-A75 | arm / arm64 | cortex-a75 | `TARGET_ARCHITECTURE=cortex-a75` | +| ARM | Cortex-A76 | arm / arm64 | cortex-a76 | `TARGET_ARCHITECTURE=cortex-a76` | +| ARM | Cortex-A76AE | arm / arm64 | cortex-a76ae | `TARGET_ARCHITECTURE=cortex-a76ae` | +| ARM | Cortex-A77 | arm / arm64 | cortex-a77 | `TARGET_ARCHITECTURE=cortex-a77` | +| ARM | Cortex-A78 | arm / arm64 | cortex-a78 | `TARGET_ARCHITECTURE=cortex-a78` | +| ARM | Cortex-A78AE | arm / arm64 | cortex-a78ae | `TARGET_ARCHITECTURE=cortex-a78ae` | +| ARM | Cortex-A510 | arm / arm64 | cortex-a510 | `TARGET_ARCHITECTURE=cortex-a510` | +| ARM | Cortex-A710 | arm / arm64 | cortex-a710 | `TARGET_ARCHITECTURE=cortex-a710` | +| ARM | Cortex-X1 | arm / arm64 | cortex-x1 | `TARGET_ARCHITECTURE=cortex-x1` | +| ARM | Cortex-X2 | arm / arm64 | cortex-x2 | `TARGET_ARCHITECTURE=cortex-x2` | +| ARM | Neoverse E1 | arm64 | neoverse-e1 | `TARGET_ARCHITECTURE=neoverse-e1` | +| ARM | Neoverse N1 | arm64 | neoverse-n1 | `TARGET_ARCHITECTURE=neoverse-n1` | +| ARM | Neoverse N2 | arm64 | neoverse-n2 | `TARGET_ARCHITECTURE=neoverse-n2` | +| ARM | Neoverse V1 | arm64 | neoverse-v1 | `TARGET_ARCHITECTURE=neoverse-v1` | +| Qualcomm | Krait | arm / arm64 | krait | `TARGET_ARCHITECTURE=krait` | +| Qualcomm | Kryo | arm64 | kryo | `TARGET_ARCHITECTURE=kryo` | +| Qualcomm | Kryo 2 | arm64 | kryo2 | `TARGET_ARCHITECTURE=kryo2` | +| Cavium | ThunderX | arm64 | thunderx | `TARGET_ARCHITECTURE=thunderx` | +| Cavium | ThunderX2 | arm64 | thunderx2 | `TARGET_ARCHITECTURE=thunderx2` | +| Cavium | ThunderX2T99 | arm64 | thunderx2t99 | `TARGET_ARCHITECTURE=thunderx2t99` | +| Cavium | ThunderXT81 | arm64 | thunderxt81 | `TARGET_ARCHITECTURE=thunderxt81` | +| Cavium | ThunderXT83 | arm64 | thunderxt83 | `TARGET_ARCHITECTURE=thunderxt83` | +| Cavium | ThunderXT88 | arm64 | thunderxt88 | `TARGET_ARCHITECTURE=thunderxt88` | +| Marvell | PJ4 | arm / arm64 | marvell-pj4 | `TARGET_ARCHITECTURE=marvell-pj4` | +| Marvell | F | arm / arm64 | marvell-f | `TARGET_ARCHITECTURE=marvell-f` | +| Marvell | XScale | arm / arm64 | xscale | `TARGET_ARCHITECTURE=xscale` | +| Broadcom | Brahma B15 | arm / arm64 | brahma-b15 | `TARGET_ARCHITECTURE=brahma-b15` | +| Broadcom | Brahma B53 | arm / arm64 | brahma-b53 | `TARGET_ARCHITECTURE=brahma-b53` | +| Applied Micro | X-Gene 1 | arm64 | xgene1 | `TARGET_ARCHITECTURE=xgene1` | +| Generic | Generic | arm / arm64 | generic | `TARGET_ARCHITECTURE=generic` | +| Generic | None (no optimization) | arm / arm64 | none | `TARGET_ARCHITECTURE=none` | +| Generic | Auto-detect host CPU | arm / arm64 | auto | `TARGET_ARCHITECTURE=auto` | +| Generic | Compiler “native” | arm / arm64 | native | `TARGET_ARCHITECTURE=native` | + +## PPC + +| Vendor | Codename / CPU Microarchitecture | Family | Name | CMake Flag | +|:--|:--|:--|:--|:--| +| IBM | POWER8 | PPC | power8 | `TARGET_ARCHITECTURE=power8` | +| IBM | POWER9 | PPC | power9 | `TARGET_ARCHITECTURE=power9` | +| IBM | POWER10 | PPC | power10 | `TARGET_ARCHITECTURE=power10` | +| Generic | Generic | PPC | generic | `TARGET_ARCHITECTURE=generic` | +| Generic | None (no optimization) | PPC | none | `TARGET_ARCHITECTURE=none` | +| Generic | Auto-detect host CPU | PPC | auto | `TARGET_ARCHITECTURE=auto` | +| Generic | Compiler “native” | PPC | native | `TARGET_ARCHITECTURE=native` | From 5e73b0155ba6d83d8f35d00b54b0c8f2f0eaf479 Mon Sep 17 00:00:00 2001 From: Philipp Remy Date: Fri, 17 Oct 2025 15:55:57 +0200 Subject: [PATCH 40/40] FIX: SWIG Bindings: Use correct SWIG executable and adapt for Apple ld This commit introduces two changes: (1) It correctly sets the SWIG executable for Apple targets. (2) It adapts to the Apple linker (ld), which does not allow undefined symbols in shared objects (.so) by default. Regarding (2), see SWIG issue 2469. Signed-off-by: Philipp Remy --- src/cmake/Dependencies.cmake | 15 +++++++++++---- src/cmake/Helpers.cmake | 9 +++++++++ 2 files changed, 20 insertions(+), 4 deletions(-) diff --git a/src/cmake/Dependencies.cmake b/src/cmake/Dependencies.cmake index 01f0d5943d..54195fb148 100644 --- a/src/cmake/Dependencies.cmake +++ b/src/cmake/Dependencies.cmake @@ -1555,10 +1555,17 @@ if(AV_BUILD_SWIG) DEPENDS ${PCRE2_TARGET} ) - set(SWIG_CMAKE_FLAGS - -DSWIG_DIR=${CMAKE_INSTALL_PREFIX}/share/swig/4.3.0 - -DSWIG_EXECUTABLE=${CMAKE_INSTALL_PREFIX}/bin-deps - ) + if(APPLE) + set(SWIG_CMAKE_FLAGS + -DSWIG_DIR=${CMAKE_INSTALL_PREFIX}/share/swig/4.3.0 + -DSWIG_EXECUTABLE=${CMAKE_INSTALL_PREFIX}/bin/swig + ) + else() + set(SWIG_CMAKE_FLAGS + -DSWIG_DIR=${CMAKE_INSTALL_PREFIX}/share/swig/4.3.0 + -DSWIG_EXECUTABLE=${CMAKE_INSTALL_PREFIX}/bin-deps + ) + endif() endif() if(AV_BUILD_XERCESC) diff --git a/src/cmake/Helpers.cmake b/src/cmake/Helpers.cmake index 1c1739f30e..c1873758d6 100644 --- a/src/cmake/Helpers.cmake +++ b/src/cmake/Helpers.cmake @@ -365,6 +365,15 @@ function(alicevision_swig_add_library module_name) TARGET ${module_name} PROPERTY COMPILE_OPTIONS -std=c++20 ) + if(APPLE) + # The ld on macOS does not allow undefined symbols for shared objects, + # so this must be explicitly passed to the linker. + # See: https://github.com/swig/swig/issues/2469 + set_property( + TARGET ${module_name} + PROPERTY LINK_OPTIONS -undefined dynamic_lookup + ) + endif() target_link_libraries(${module_name} PUBLIC ${SWIG_MODULE_PUBLIC_LINKS}