diff --git a/CMakeLists.txt b/CMakeLists.txt index 9bd72150ef..7db8e0ed6a 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,13 +1,33 @@ cmake_minimum_required(VERSION 3.25) project(aliceVision LANGUAGES C CXX) +# Initialize CMAKE_OSX_ARCHITECTURES, if not specified on the command line. +if(APPLE AND NOT CMAKE_OSX_ARCHITECTURES) + message(STATUS "Host processor: ${CMAKE_HOST_SYSTEM_PROCESSOR}") + if(CMAKE_HOST_SYSTEM_PROCESSOR STREQUAL "arm64") + set(CMAKE_OSX_ARCHITECTURES "arm64") + elseif(CMAKE_HOST_SYSTEM_PROCESSOR STREQUAL "x86_64") + set(CMAKE_OSX_ARCHITECTURES "x86_64") + else() + message(FATAL_ERROR "CMAKE_HOST_SYSTEM_PROCESSOR was neither arm64 nor x86_64 on an Apple platform and CMAKE_OSX_ARCHITECTURES was not specified!") + endif() +endif() + option(ALICEVISION_BUILD_DEPENDENCIES "Build all AliceVision dependencies" OFF) +option(ALICEVISION_USE_RPATH "Add RPATH on software with relative paths to libraries" ON) option(AV_BUILD_ALICEVISION "Enable building of AliceVision" ON) option(AV_EIGEN_MEMORY_ALIGNMENT "Enable Eigen memory alignment" ON) option(ALICEVISION_BUILD_TESTS "Build AliceVision tests" OFF) -option(AV_USE_CUDA "Enable CUDA" ON) -option(AV_USE_OPENMP "Enable OpenMP" $<$,OFF,ON>) # disable by default for AppleClang +option(AV_USE_OPENMP "Enable OpenMP" ON) # AppleClang now supports OpenMP, if installed as an external dependency (Homebrew, MacPorts, ...) +if(APPLE) + option(AV_USE_CUDA "Enable CUDA" OFF) +else() + option(AV_USE_CUDA "Enable CUDA" ON) +endif() option(BUILD_SHARED_LIBS "Build shared libraries" ON) +if(APPLE AND BUILD_SHARED_LIBS) + option(BUILD_APPLE_FRAMEWORKS "Create Frameworks instead of plain dynamic libraries on macOS" ON) +endif() option(ALICEVISION_INSTALL_MESHROOM_PLUGIN "Install AliceVision's plugin for Meshroom" ON) # Global policy section @@ -17,6 +37,12 @@ if (NOT CMAKE_BUILD_TYPE) set(CMAKE_BUILD_TYPE Release CACHE STRING "Build type for AliceVision" FORCE) endif() +# Currently no universal binaries are supported. Fail early. +# FIXME: Enable universal builds by adapting dependency building accordingly. +if(APPLE AND CMAKE_OSX_ARCHITECTURES MATCHES "arm64" AND CMAKE_OSX_ARCHITECTURES MATCHES "x86_64") + message(FATAL_ERROR "Building universal binaries is currently not supported. Please set 'CMAKE_OSX_ARCHITECTURES' to either arm64 (Apple Silicon) or x86_64 (Intel)!") +endif() + # set(CMAKE_BUILD_TYPE "Release" CACHE STRING "Build type for AliceVision") set(DEPS_CMAKE_BUILD_TYPE "Release" CACHE STRING "Build type for all external libraries (only used if ALICEVISION_BUILD_DEPENDENCIES=ON)") string(TOLOWER ${DEPS_CMAKE_BUILD_TYPE} DEPS_CMAKE_BUILD_TYPE_LOWERCASE) diff --git a/INSTALL.md b/INSTALL.md index ce81658f95..6590522fa0 100644 --- a/INSTALL.md +++ b/INSTALL.md @@ -1,5 +1,7 @@ # AliceVision +For specific build instructions for macOS see [INSTALL_macOS.md](./INSTALL_macOS.md). + ## Build instructions Required tools: @@ -357,25 +359,6 @@ make test * Change the target to Release. * Compile the libraries and binaries samples. - -### Mac OSX compilation - -```bash -git clone --recursive https://github.com/alicevision/AliceVision.git -mkdir build && cd build -cmake -DCMAKE_BUILD_TYPE=Release -G "Xcode" ../AliceVision -``` - -If you want to enable the build of the unit tests: -```bash -cmake -DCMAKE_BUILD_TYPE=Release \ - -DALICEVISION_BUILD_TESTS=ON \ - -G "Xcode" \ - ../AliceVision -xcodebuild -configuration Release -``` - - ## Using AliceVision as a third party library dependency in CMake AliceVision can be used as a third party library once it has been installed. diff --git a/INSTALL_macOS.md b/INSTALL_macOS.md new file mode 100644 index 0000000000..9a00586801 --- /dev/null +++ b/INSTALL_macOS.md @@ -0,0 +1,176 @@ +# Building the project on macOS + +Make sure to read the whole document at least *once* before compiling the project! If you find any bugs in the build infrastructure, consider opening a bug report on the [GitHub Issue page](https://github.com/alicevision/AliceVision/issues). + +## Platform Support + +AliceVision is supported for both `arm64` and `x86_64` based Macs. Cross-compiling between the two architectures is supported (see following remarks and the build instructions for cross-compilations below). + +Note that support for `x86_64` based Macs might be removed at some point in the future, as Apple is slowly reaching EOL for Intel Macs. + +Any CUDA-related functionality is disabled and unavailable on macOS. NVIDIA has long stopped shipping the CUDA toolkit for macOS and using a Mac compatible with one of the last suppported cards is too unlikely to even try hacking in Apple+CUDA support into the project. This currently implies the following: + +- PopSIFT is completely unavailable and attempting to build it will cause a CMake error. +- CCTag won't use its CUDA implementation and instead uses the CPU backend. +- ONNX Runtime with GPU support is unavailable. +- The DepthMap library from AliceVision (and therefore all its dependants) are unavailable due to them being implemented only in CUDA. This might change in the future if new DepthMap backends are added. + +## Building from Source + +Building on macOS has a few important remarks: + +- It is *highly* recommended to build with `ALICEVISION_BUILD_DEPENDENCIES=ON`. This ensures that all required dependencies are available and are fulfilling API and ABI requirements. Relying on dependencies provided by a package mangager is not supported by the project (see the following note). + +- It is *highly* recommended to have a clean build environment when compiling AliceVision for the host architecture (only the required dependencies and tools should be installed). Due to the amount of dependencies involved we cannot guarantee that package managers are always shipping the required components and that the versions available are API and ABI compatible[^1]. While it might be harmless for one user, we cannot test every possible combination and must therefore declare a build with package-manager dependencies _unsupported_. + +- It is *mandatory* to have a clean build system when you are cross-compiling from `arm64` to `x86_64` and vice-versa. We simply cannot force any subprojects and dependencies to not look for libraries provided by your package manager[^2], which *will* cause linker errors in the build process[^3]. + +- It is *mandatory* to have a clean build system if you want to compile a package that is redistributable (including all dependencies). This ensures that there are no external dependencies which one subproject silently pulled in. The danger in this case stems from the fact that this could go completely unnoticed, because on the build machine all dependencies are available and the package therefore 'just works'™[^4]. + +- Besides some external build tools, the project can build *all* required dependencies from source. + +- At the time the project *only* supports building on macOS with the `Unix Makefiles` CMake generator (this is the default). + +- By default, the project optimizes the build for the CPU architecture of the build machine. This is usually desirable to ensure proper usage of SIMD instructions. If you need broder support, especially for redistribution, consider specifiying the target architecture when configuring the project with CMake: If you want to disable optimization completely, pass `TARGET_ARCHITECTURE=none`. In the same manner you can explicitly set the architecture you want to optimize for[^5]. You can enable verbose output of this process by passing `OFA_VERBOSE=ON` on the CMake CLI. Only change this if you know what you are doing: Tampering with these options will produce binaries that in the worst case will fault at runtime and in the best case are many times slower than expected. + +- For some other Apple-specific CMake options see this section: [CMake Options for Apple](#cmake-options-for-apple). + +## Required external tools + +These differ slightly depending on the target architecture you want to build for. Note that if you are cross-compiling, you still need the additional tools for the target architecture. As these are only build-time tools, they don't have to be compiled *for* the target architecture, they only need to be available on your `$PATH` on the build machine. + +### arm64 (Apple Silicon Macs, M-CPUs) + +- [x] A working C/C++ compiler with C++20 support (e.g., Xcode/Xcode Command Line Tools)[^6] +- [x] cmake >= 3.25 but < 4.0 (some dependencies do not support CMake 4) +- [x] make (included in Xcode/Xcode Command Line Tools) +- [x] autoconf (Homebrew, MacPorts, Nix) +- [x] automake (Homebrew, MacPorts, Nix) +- [x] pkgconfig (Homebrew, MacPorts, Nix) +- [x] gettext (Homebrew, MacPorts, Nix) +- [x] m4 (Homebrew, MacPorts, Nix) +- [x] BISON[^7] (Homebrew, MacPorts, Nix) +- [x] NumPy[^8] (Homebrew, pip) + +### x86_64 (Intel Macs, Core-i-CPUs) + +- [x] ALL FROM ABOVE +- [x] nasm[^9] (Homwbrew, MacPorts, Nix) + +## Build Instructions for native compilation + +1. Create a build directory (in-source builds are unsupported): + +`mkdir build && cd build` + +2. Configure the project from the build directory: + ```bash + cmake \ + -DCMAKE_BUILD_TYPE= \ + -DCMAKE_INSTALL_PREFIX= \ # Omit this if you want a system-wide install + -DALICEVISION_BUILD_DEPENDENCIES=ON \ # Mandatory for Apple targets + -DAV_BUILD_DEPENDENCIES_PARALLEL= \ # Setting this to 0 will use all threads available + ... \ # To enable or disable other options + # Can be relative and will usually just be '..' + ``` + +3. Start build: + +You might see *a lot* of warning messages, especially from the embedded dependencies' build process. This is expected and as long as no errors occur, you shouldn't care. + +`make # Note the missing -j option: Omit it to avoid build issues. This is handled internally by AV_BUILD_DEPENDENCIES_PARALLEL.` + +4. Install project: + +`(sudo) make install # Use sudo for a system-wide install` + +[OPTIONAL: 5. Create a bundle] + +This target creates a self-contained bundle (i.e., a folder containing a `lib` and a `bin` folder, with no external dependencies besides any system libraries/Frameworks)[^9]. Any additional required resources (e.g., `share` folder) must be copied manually. This is mainly useful if you want to create a redistributable bundle, especially for use in Meshroom. + +`(sudo) make darwin-bundle # Use sudo if the bundle should be created system-wide` + +## Build Instructions for cross compilation + +Enabling cross-compilation is done by setting `CMAKE_OSX_ARCHITECTURES` on the CMake CLI to *either* `arm64` *or* `x86_64`. Compiling universal binaries is *not* suppported at this point. +When cross-compiling, the Optimize-For-Architecture logic will set some reasonable defaults: +- For `arm64`: The default target architecture will be `apple-m1`, making the resulting binaries compatible with all Apple Silicon Macs. +- For `x86_64`: The default target architecture will be `skylake`, making the resulting binaries compatible with all Intel Macs not older than 2015. +If you want to target a different machine, consider setting `TARGET_ARCHITECTURE` on the CMake CLI to a supported value[^5]. + +1. Create a build directory (in-source builds are unsupported): + +`mkdir build-cross && cd build-cross` + +2. Configure the project from the build directory: + ```bash + cmake \ + -DCMAKE_BUILD_TYPE= \ + -DCMAKE_OSX_ARCHITECTURES= \ # Sets the target architecture + -DCMAKE_INSTALL_PREFIX= \ # Omit this if you want a system-wide install + -DALICEVISION_BUILD_DEPENDENCIES=ON \ # Mandatory for Apple targets + -DAV_BUILD_DEPENDENCIES_PARALLEL= \ # Setting this to 0 will use all threads available + ... \ # To enable or disable other options + # Can be relative and will usually just be '..' + ``` + +3. Start build: + +You might see *a lot* of warning messages, especially from the embedded dependencies' build process. This is expected and as long as no errors occur, you shouldn't care. + +`make # Note the missing -j option: Omit it to avoid build issues. This is handled internally by AV_BUILD_DEPENDENCIES_PARALLEL.` + +4. Install project: + +`(sudo) make install # Use sudo for a system-wide install` + +[OPTIONAL: 5. Create a bundle] + +This target creates a self-contained bundle (i.e., a folder containing a `lib` and a `bin` folder, with no external dependencies besides any system libraries/Frameworks)[^10]. Any additional required resources (e.g., `share` folder) must be copied manually. This is mainly useful if you want to create a redistributable bundle, especially for use in Meshroom. + +`(sudo) make darwin-bundle # Use sudo if the bundle should be created system-wide` + +## CMake Options for Apple + +These are some influential CMake options specific to Apple: + +| Option | Description | Available Values | Default Value | +| --------- | ----------- | ---------------- | ------------- | +| `CMAKE_OSX_ARCHITECTURES` | Sets the target architecture to compile for | Either `arm64` or `x86_64` | `${CMAKE_HOST_SYSTEM_PROCESSOR}` | +| `ALICEVISION_USE_RPATH` | Whether to use @rpath instead of absolute paths for resolving dependencies (highly recommended) | `ON` / `OFF` | `ON` | +| `BUILD_APPLE_FRAMEWORKS` | Whether to build Framework bundles instead of plain dynamic libraries | `ON` / `OFF` | `ON` | +| `AV_ONNX_APPLE_ARCH` | What architecture to download for the ONNX Runtime (only active if `AV_BUILD_ONNXRUNTIME=ON`) | Either `arm64` or `x86_64` | `${CMAKE_OSX_ARCHITECTURES}` | +| `AV_BUILD_OPENMP` | Whether to build an embedded OpenMP (only active if `ALICEVISION_BUILD_DEPENDENCIES=ON`, highly recommended when using AppleClang) | `ON` / `OFF` | `ON` | +| `AV_BUILD_LAPACK` | Whether to build an embedded BLAS/LAPACK (not recommended, Apple provides it through `Accelerate.framework`, requires a Fortran compiler to be available on `$PATH`) | `ON` / `OFF` | `OFF` | +| `AV_BUILD_SUITESPARSE` | Whether to build an embedded Suitesparse (not recommended, Apple provides an equivalent for Sparse Solvers through `Accelerate.framework`, will massively increase final bundle size) | `ON` / `OFF` | `OFF` | +| `AV_BUILD_ZLIB` | Whether to build an embedded zlib (might be needed for older versions of macOS, especially when redestributing) | `ON` / `OFF` | `OFF` | +| `AV_BUILD_PCL` | Whether to build an embedded PointCloudLibrary (only required if you plan to build an embedded USD and use the `aliceVision_exportUSD` software) | `ON` / `OFF` | `OFF` | +| `AV_BUILD_USD` | Whether to build an embedded UniversalSceneDescription library (only required if you plan to use the `aliceVision_exportUSD` software) | `ON` / `OFF` | `OFF` | +| `ALICEVISION_REQUIRE_CERES_WITH_ACCELERATESPARSE` | Whether to require the Ceres dependency to be built with `AccelerateSparse`/`Accelerate.framework` (highly recommended to match SuiteSparse speeds) | `ON` / `OFF` | `ON` | +| `ALICEVISION_BUNDLE_PREFIX` | Where to place the bundle created by `make darwin-bundle` | Any path | `${DCMAKE_INSTALL_PREFIX}/bundle` | + +[^1]: If you see linker errors, this should be the very first thing to check! Do the headers match the package of the library that is linked in? Are any non-project dependencies included from package manager directories (e.g., `/opt/homebrew`, `/usr/local`, `/opt/local`)? If so, clean your *whole* build folder, remove the offending packages and try again. + +[^2]: Take a look at `CMAKE_IGNORE_PATH` for example: While we can pass that to dependencies, it only affects `find_package()` calls that use `CONFIG` mode. If a subproject provides its own `Find-X.cmake` module, there is no way for us to exclude certain prefixes that could cause trouble. + +[^3]: If CMake would actually check for architecture compatibility in the configure step, we could just emit a nice and clear error to give some hint to the user. But as this is not the case, CMake will happily accept *any* architecture and only the final link step will tell that there was an architecture mismatch (if you can make out the one line in between the thousand lines of messages). + +[^4]: When being transferred to a different machine there are essentially three ways this could go: + (1) The other machine has all and compatible dependencies in the right location: It works. Lucky you. + (2) The other machine has all but non-compatible dependencies in the right location: It might work, if they are ABI and API compatible. If not, the worst case would be a SIGSEGV or a SIGABRT and you have no idea by what or why they were caused. Maybe `dyld` will complain about missing libraries, if it cannot find the correct version (similar to (3)). + (3) The other machine is missing dependencies or they are in the wrong location: This might be the best case scenario, because the error will be relatively clear: You will see something like `dyld: Library not loaded: . Referenced from: . Reason: tried `. + +[^5]: For a list of supported values see: [Supported Architectures](src/cmake/OFA/SupportedArchitectures.md). + +[^6]: WARNING: Note that *any* package-manager provided LLVM/Clang is currently unable to build the Boost libraries with `b2` (see [this issue](https://github.com/Homebrew/homebrew-core/issues/235411)). That would not be a problem if we could use the CMake build system for Boost, but we can't do that because CCTag depends on `Boost::math_c99`, which the CMake build system *cannot build at all* :^(. Even worse, the required `boostrap.sh` for Boost does not allow overriding the C++ compiler or adding the required LDFLAGS to fix this (e.g., by using `CXX`, `CXXFLAGS` or `LDFLAGS`). So AppleClang is practically the only option right now; you need to `completely` uninstall external LLVM/Clang packages. Work on removing the `Boost::math_c99` in CCTag is WIP. Once lifted, this warning does no longer apply. + +[^7]: This is only required if you intend to build SWIG from source. It requires a BISON newer than the one provided by Apple and therefore the external binary must be on your `$PATH` *first*. Look at the documentation of your shell on how to do this. + +[^8]: This is only required if you plan to build the AliceVision SWIG bindings. + +[^9]: Alternatively you can also use `yasm`. These are mainly required by libVPX and ffmpeg. + +[^10]: The underlying Python script performs the following steps: + (1) Extracts all required dependencies and available rpaths + (2) Recursively checks if all dependencies can be resolved (and if the architectures match) on a per-file basis + (3) Copies all input files and resolved dependencies into the respective folders diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index b7914bee51..d870ec0dff 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -67,7 +67,11 @@ trilean_option(ALICEVISION_USE_ALEMBIC "Enable Alembic I/O" AUTO) trilean_option(ALICEVISION_USE_UNCERTAINTYTE "Enable Uncertainty computation" AUTO) trilean_option(ALICEVISION_USE_ONNX "Enable ONNX Runtime" AUTO) option(ALICEVISION_USE_ONNX_GPU "Use CUDA with ONNX Runtime" ON) -trilean_option(ALICEVISION_USE_CUDA "Enable CUDA" ON) +if(APPLE) + trilean_option(ALICEVISION_USE_CUDA "Enable CUDA" OFF) +else() + trilean_option(ALICEVISION_USE_CUDA "Enable CUDA" ON) +endif() trilean_option(ALICEVISION_USE_OPENCV "Enable use of OpenCV algorithms" OFF) trilean_option(ALICEVISION_USE_OPENCV_CONTRIB "Enable use of OpenCV algorithms from extra modules" AUTO) option(ALICEVISION_USE_OCVSIFT "Add or not OpenCV SIFT in available features" OFF) @@ -75,7 +79,11 @@ mark_as_advanced(FORCE ALICEVISION_USE_OCVSIFT) option(ALICEVISION_USE_MESHSDFILTER "Use MeshSDFilter library (enable MeshDenoising and MeshDecimate)" ON) -option(ALICEVISION_REQUIRE_CERES_WITH_SUITESPARSE "Require Ceres with SuiteSparse (ensure best performances)" ON) +if(APPLE) + option(ALICEVISION_REQUIRE_CERES_WITH_ACCELERATESPARSE "Require Ceres with AccelerateSparse (ensure best performances, macOS only)" ON) +else() + option(ALICEVISION_REQUIRE_CERES_WITH_SUITESPARSE "Require Ceres with SuiteSparse (ensure best performances)" ON) +endif() option(ALICEVISION_USE_RPATH "Add RPATH on software with relative paths to libraries" ON) option(ALICEVISION_REMOVE_ABSOLUTE "Remove absolute paths in dependencies" OFF) @@ -120,7 +128,17 @@ endif() if (ALICEVISION_USE_RPATH) if (APPLE) set(CMAKE_MACOSX_RPATH 1) - set(CMAKE_INSTALL_RPATH "@loader_path/../${CMAKE_INSTALL_LIBDIR};@loader_path") + set(CMAKE_INSTALL_RPATH + "@loader_path/../${CMAKE_INSTALL_LIBDIR}" + "@loader_path" + "@loader_path/../lib" + "@loader_path/../Libraries" + "@loader_path/../Frameworks" + "@executable_path/../${CMAKE_INSTALL_LIBDIR}" + "@executable_path/../lib" + "@executable_path/../Libraries" + "@executable_path/../Frameworks" + ) elseif (UNIX) set(CMAKE_INSTALL_RPATH "\\$ORIGIN/../${CMAKE_INSTALL_LIBDIR};\\$ORIGIN") endif() @@ -128,8 +146,13 @@ endif() # Set build path -set(EXECUTABLE_OUTPUT_PATH "${ALICEVISION_ROOT}/${CMAKE_SYSTEM_NAME}-${CMAKE_SYSTEM_PROCESSOR}") -set(LIBRARY_OUTPUT_PATH "${ALICEVISION_ROOT}/${CMAKE_SYSTEM_NAME}-${CMAKE_SYSTEM_PROCESSOR}") +if(APPLE) + set(EXECUTABLE_OUTPUT_PATH "${ALICEVISION_ROOT}/${CMAKE_SYSTEM_NAME}-${CMAKE_OSX_ARCHITECTURES}") + set(LIBRARY_OUTPUT_PATH "${ALICEVISION_ROOT}/${CMAKE_SYSTEM_NAME}-${CMAKE_OSX_ARCHITECTURES}") +else() + set(EXECUTABLE_OUTPUT_PATH "${ALICEVISION_ROOT}/${CMAKE_SYSTEM_NAME}-${CMAKE_SYSTEM_PROCESSOR}") + set(LIBRARY_OUTPUT_PATH "${ALICEVISION_ROOT}/${CMAKE_SYSTEM_NAME}-${CMAKE_SYSTEM_PROCESSOR}") +endif() # Windows specific defines if (WIN32) @@ -187,14 +210,21 @@ endmacro(add_target_properties) # ============================================================================== set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} ${CMAKE_CURRENT_SOURCE_DIR}/cmake) -include(OptimizeForArchitecture) +include(OFA/OptimizeForArchitecture) OptimizeForArchitecture() -set(ALICEVISION_HAVE_SSE 0) -if (SSE2_FOUND OR TARGET_ARCHITECTURE STREQUAL "native") +set(ALICEVISION_HAVE_SSE 0 CACHE BOOL "SSE2 is available" FORCE) +if (USE_SSE2 OR TARGET_ARCHITECTURE STREQUAL "native") if (MSVC AND NOT ${CMAKE_CL_64}) add_definitions(/arch:SSE2) endif() - set(ALICEVISION_HAVE_SSE 1) + set(ALICEVISION_HAVE_SSE 1 CACHE BOOL "SSE2 is available" FORCE) +endif() +set(ALICEVISION_HAVE_AVX 0 CACHE BOOL "AVX is available" FORCE) +if(USE_AVX OR TARGET_ARCHITECTURE STREQUAL "native") + if (MSVC AND NOT ${CMAKE_CL_64}) + add_definitions(/arch:AVX) + endif() + set(ALICEVISION_HAVE_AVX 1 CACHE BOOL "AVX is available" FORCE) endif() if (UNIX) @@ -222,6 +252,7 @@ endif() # allocation feature with a separate flag, so use it if alignment is enabled in Eigen. # See https://eigen.tuxfamily.org/dox/group__TopicUnalignedArrayAssert.html if (AV_EIGEN_MEMORY_ALIGNMENT) + include(AddCompilerFlag) if (CMAKE_CXX_COMPILER_ID STREQUAL "GNU" AND CMAKE_CXX_COMPILER_VERSION VERSION_GREATER_EQUAL 7.1) AddCompilerFlag("-faligned-new") elseif (CMAKE_CXX_COMPILER_ID STREQUAL "Clang" AND CMAKE_CXX_COMPILER_VERSION VERSION_GREATER_EQUAL 6.0) @@ -267,16 +298,8 @@ else() # ON OR AUTO endif() if (ALICEVISION_HAVE_OPENMP) - set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${OpenMP_C_FLAGS}") - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OpenMP_CXX_FLAGS}") - if (NOT MSVC) - if ("${CMAKE_CXX_COMPILER_ID}" STREQUAL "Clang") - # for those using the clang with OpenMP support - list(APPEND ALICEVISION_LIBRARY_DEPENDENCIES omp) - else() - list(APPEND ALICEVISION_LIBRARY_DEPENDENCIES gomp) - endif() - endif() + set(ALICEVISION_OPENMP_CXX_TARGETS OpenMP::OpenMP_CXX) + list(APPEND ALICEVISION_LIBRARY_DEPENDENCIES OpenMP::OpenMP_CXX) endif() # ============================================================================== @@ -423,6 +446,9 @@ if (ALICEVISION_BUILD_SFM) if (ALICEVISION_REQUIRE_CERES_WITH_SUITESPARSE) message(STATUS "By default, Ceres required SuiteSparse to ensure best performances. if you explicitly need to build without it, you can use the option: -DALICEVISION_REQUIRE_CERES_WITH_SUITESPARSE=OFF") find_package(Ceres QUIET REQUIRED COMPONENTS SuiteSparse CONFIG) + elseif (ALICEVISION_REQUIRE_CERES_WITH_ACCELERATESPARSE) + message(STATUS "By default, Ceres required AccelerateSparse to ensure best performances. if you explicitly need to build without it, you can use the option: -DALICEVISION_REQUIRE_CERES_WITH_ACCELERATESPARSE=OFF") + find_package(Ceres QUIET REQUIRED COMPONENTS AccelerateSparse CONFIG) else() find_package(Ceres CONFIG QUIET CONFIG) endif() @@ -1010,6 +1036,9 @@ add_subdirectory(dependencies) # AliceVision modules # ============================================================================== +# Initial global target list is empty, will be added by target helper functions +set(ALICEVISION_GLOBAL_TARGET_LIST "" CACHE INTERNAL "Global list of all AliceVision targets enabled") + # software(s) under patent or commercial licence # Included for research purpose only if (ALICEVISION_BUILD_SFM) @@ -1083,3 +1112,28 @@ if (ALICEVISION_BUILD_SWIG_BINDING) endif() add_custom_target(uninstall "${CMAKE_COMMAND}" -P "${cmakeUninstallFile}") + +# ============================================================================== +# Bundling on Apple targets +# ============================================================================== +# Get output file name for each target +if(APPLE) + find_program(PYTHON_EXECUTABLE python3) + set(ALICEVISION_BUILD_ARTIFACTS) + foreach(AV_TARGET ${ALICEVISION_GLOBAL_TARGET_LIST}) + # Different handling if Framework + get_target_property(IS_FRAMEWORK ${AV_TARGET} FRAMEWORK) + if(IS_FRAMEWORK) + list(APPEND ALICEVISION_BUILD_ARTIFACTS "$") + else() + list(APPEND ALICEVISION_BUILD_ARTIFACTS "$") + endif() + endforeach() + # Add a custom target + add_custom_target(darwin-bundle + ${PYTHON_EXECUTABLE} ${ALICEVISION_ROOT}/../src/cmake/darwin_bundle.py -o ${ALICEVISION_BUNDLE_PREFIX} ${ALICEVISION_BUILD_ARTIFACTS} + DEPENDS ${ALICEVISION_GLOBAL_TARGET_LIST} + COMMENT "Creating Darwin Bundle" + COMMAND_EXPAND_LISTS + ) +endif() diff --git a/src/aliceVision/lensCorrectionProfile/CMakeLists.txt b/src/aliceVision/lensCorrectionProfile/CMakeLists.txt index ba9a5e9583..fc534a310f 100644 --- a/src/aliceVision/lensCorrectionProfile/CMakeLists.txt +++ b/src/aliceVision/lensCorrectionProfile/CMakeLists.txt @@ -13,4 +13,5 @@ alicevision_add_library(aliceVision_lensCorrectionProfile PRIVATE_LINKS Boost::log expat::expat + ${ALICEVISION_OPENMP_CXX_TARGETS} ) diff --git a/src/aliceVision/system/CMakeLists.txt b/src/aliceVision/system/CMakeLists.txt index 0139df4b62..625669f978 100644 --- a/src/aliceVision/system/CMakeLists.txt +++ b/src/aliceVision/system/CMakeLists.txt @@ -33,6 +33,7 @@ alicevision_add_library(aliceVision_system Boost::system Boost::date_time ${ALICEVISION_NVTX_LIBRARY} + ${ALICEVISION_OPENMP_CXX_TARGETS} PRIVATE_LINKS Boost::boost ) diff --git a/src/cmake/Dependencies.cmake b/src/cmake/Dependencies.cmake index 5f3791fe6b..54195fb148 100644 --- a/src/cmake/Dependencies.cmake +++ b/src/cmake/Dependencies.cmake @@ -1,25 +1,44 @@ #Build rules for all dependencies include(ExternalProject) +# Set CMP0135 to properly rebuild if URLs change +cmake_policy(SET CMP0135 NEW) + set(AV_BUILD_DEPENDENCIES_PARALLEL 1 CACHE STRING "Number of cores to use when building dependencies (0 - use the number of cores of the processor)" ) -set(AV_ONNX_APPLE_ARCH "arm64" CACHE STRING "Version to download OFF Apple [arm64, x86_64]") +set(AV_ONNX_APPLE_ARCH ${CMAKE_OSX_ARCHITECTURES} CACHE STRING "Version to download OFF Apple [arm64, x86_64]") -option(AV_BUILD_CUDA "Enable building an embedded Cuda" OFF) +if(NOT APPLE) + option(AV_BUILD_CUDA "Enable building an embedded Cuda" OFF) +endif() +if(APPLE) + option(AV_BUILD_OPENMP "Enable building an embedded OpenMP" ON) +else() + option(AV_BUILD_OPENMP "Enable building an embedded OpenMP" OFF) +endif() option(AV_BUILD_ZLIB "Enable building an embedded ZLIB" OFF) option(AV_BUILD_ASSIMP "Enable building an embedded ASSIMP" ON) option(AV_BUILD_TIFF "Enable building an embedded Tiff" ON) option(AV_BUILD_JPEG "Enable building an embedded Jpeg" ON) option(AV_BUILD_PNG "Enable building an embedded Png" ON) option(AV_BUILD_LIBRAW "Enable building an embedded libraw" ON) -option(AV_BUILD_POPSIFT "Enable building an embedded PopSift" ON) +if(AV_USE_CUDA) + option(AV_BUILD_POPSIFT "Enable building an embedded PopSift" ON) +else() + option(AV_BUILD_POPSIFT "Enable building an embedded PopSift" OFF) +endif() option(AV_BUILD_CCTAG "Enable building an embedded CCTag" ON) option(AV_BUILD_APRILTAG "Enable building an embedded AprilTag" ON) option(AV_BUILD_OPENCV "Enable building an embedded OpenCV" ON) option(AV_BUILD_ONNXRUNTIME "Enable building an embedded ONNX runtime" ON) -option(AV_BUILD_LAPACK "Enable building an embedded Lapack" ON) -option(AV_BUILD_SUITESPARSE "Enable building an embedded SuiteSparse" ON) +if(APPLE) + option(AV_BUILD_LAPACK "Enable building an embedded Lapack" OFF) # On Darwin targets, BLAS/LAPACK is provided by Accelerate.framework + option(AV_BUILD_SUITESPARSE "Enable building an embedded SuiteSparse" OFF) # On Darwin targets, Sparse Solvers are provided by Accelerate.framework (only used by Ceres) +else() + option(AV_BUILD_LAPACK "Enable building an embedded Lapack" ON) + option(AV_BUILD_SUITESPARSE "Enable building an embedded SuiteSparse" ON) +endif() option(AV_BUILD_FFMPEG "Enable building an embedded FFMpeg" ON) option(AV_BUILD_VPX "Enable building an embedded libvpx required for ffmpeg" ON) option(AV_BUILD_COINUTILS "Enable building an embedded CoinUtils" ON) @@ -42,8 +61,22 @@ option(AV_BUILD_OPENIMAGEIO "Enable building an embedded OpenImageIO library" ON option(AV_BUILD_BOOST "Enable building an embedded Boost library" ON) option(AV_BUILD_CERES "Enable building an embedded Ceres library" ON) option(AV_BUILD_SWIG "Enable building an embedded SWIG library" ON) -option(AV_BUILD_PYBIND11 "Enable building of pybind11 library" OFF) +if(AV_BUILD_OPENIMAGEIO) + option(AV_BUILD_PYBIND11 "Enable building of pybind11 library" ON) +else() + option(AV_BUILD_PYBIND11 "Enable building of pybind11 library" OFF) +endif() option(AV_BUILD_OPENMESH "Enable building an embedded OpenMesh library" ON) +if(AV_BUILD_E57FORMAT) + option(AV_BUILD_XERCESC "Enable building an embedded XercesC library" ON) +else() + option(AV_BUILD_XERCESC "Enable building an embedded XercesC library" OFF) +endif() +if(AV_BUILD_SWIG) + option(AV_BUILD_PCRE2 "Enable building an embedded PCR2 library" ON) +else() + option(AV_BUILD_PCRE2 "Enable building an embedded PCR2 library" OFF) +endif() if(AV_BUILD_DEPENDENCIES_PARALLEL EQUAL 0) cmake_host_system_information(RESULT AV_BUILD_DEPENDENCIES_PARALLEL QUERY NUMBER_OF_LOGICAL_CORES) @@ -51,6 +84,7 @@ endif() ##########LOGGING#########"" message(STATUS "") +message(STATUS "AV_BUILD_OPENMP: ${AV_BUILD_OPENMP}") message(STATUS "AV_BUILD_CUDA: ${AV_BUILD_CUDA}") message(STATUS "AV_BUILD_ZLIB: ${AV_BUILD_ZLIB}") message(STATUS "AV_BUILD_ASSIMP: ${AV_BUILD_ASSIMP}") @@ -92,6 +126,8 @@ message(STATUS "AV_BUILD_OPENIMAGEIO ${AV_BUILD_OPENIMAGEIO}") message(STATUS "AV_BUILD_CERES ${AV_BUILD_CERES}") message(STATUS "AV_BUILD_SWIG ${AV_BUILD_SWIG}") message(STATUS "AV_BUILD_OPENMESH ${AV_BUILD_OPENMESH}") +message(STATUS "AV_BUILD_XERCESC ${AV_BUILD_XERCESC}") +message(STATUS "AV_BUILD_PCRE2 ${AV_BUILD_PCRE2}") message(STATUS "AV_BUILD_DEPENDENCIES_PARALLEL: ${AV_BUILD_DEPENDENCIES_PARALLEL}") ##########END LOGGING#########" @@ -104,10 +140,60 @@ set(CMAKE_CORE_BUILD_FLAGS -DCMAKE_C_COMPILER=${CMAKE_C_COMPILER} -DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER} -DCMAKE_CXX_STANDARD=20 + -DCMAKE_OSX_ARCHITECTURES=${CMAKE_OSX_ARCHITECTURES} + -DCMAKE_MACOSX_RPATH=${ALICEVISION_USE_RPATH} ) +# Set additional reusable flags for cross-compiling on macOS +# Supports x86_64/arm64 cross-compilation +if(APPLE) + # Get the current sysroot + execute_process(COMMAND xcrun --sdk macosx --show-sdk-path + OUTPUT_VARIABLE APPLE_SYSROOT + COMMAND_ERROR_IS_FATAL ANY + OUTPUT_STRIP_TRAILING_WHITESPACE + ) + if(CMAKE_OSX_ARCHITECTURES MATCHES "arm64") + set(APPLE_ARCH_FLAGS -arch\ arm64) + set(APPLE_ARCH_CFLAGS CFLAGS=${APPLE_ARCH_FLAGS}) + set(APPLE_ARCH_CXXFLAGS CXXFLAGS=${APPLE_ARCH_FLAGS}) + set(APPLE_ARCH_LDFLAGS LDFLAGS=${APPLE_ARCH_FLAGS}) + set(APPLE_ARCH_HOST --host=aarch64-apple-darwin) + elseif(CMAKE_OSX_ARCHITECTURES MATCHES "x86_64") + set(APPLE_ARCH_FLAGS -arch\ x86_64) + set(APPLE_ARCH_CFLAGS CFLAGS=${APPLE_ARCH_FLAGS}) + set(APPLE_ARCH_CXXFLAGS CXXFLAGS=${APPLE_ARCH_FLAGS}) + set(APPLE_ARCH_LDFLAGS LDFLAGS=${APPLE_ARCH_FLAGS}) + set(APPLE_ARCH_HOST --host=x86_64-apple-darwin) + endif() +endif() + #### START EXTERNAL #### +if(AV_BUILD_OPENMP) + set(OPENMP_TARGET OpenMP) + ExternalProject_Add(${OPENMP_TARGET} + URL https://github.com/llvm/llvm-project/archive/refs/tags/llvmorg-20.1.8.zip + URL_HASH MD5=3b667447bfc7f17e34f91bbeab030e82 + DOWNLOAD_DIR ${BUILD_DIR}/download/${OPENMP_TARGET} + PREFIX ${BUILD_DIR} + BUILD_IN_SOURCE 0 + BUILD_ALWAYS 0 + UPDATE_COMMAND "" + SOURCE_DIR ${CMAKE_CURRENT_BINARY_DIR}/${OPENMP_TARGET} + BINARY_DIR ${BUILD_DIR}/${OPENMP_TARGET}_build + INSTALL_DIR ${CMAKE_INSTALL_PREFIX} + CONFIGURE_COMMAND + ${CMAKE_COMMAND} + ${CMAKE_CORE_BUILD_FLAGS} + -DCMAKE_INSTALL_PREFIX:PATH= + -DLIBOMP_ENABLE_SHARED=ON + -DLIBOMP_ENABLE_STATIC=OFF + /openmp + BUILD_COMMAND $(MAKE) -j${AV_BUILD_DEPENDENCIES_PARALLEL} + ) +endif() + if(AV_BUILD_ZLIB) set(ZLIB_TARGET zlib) @@ -187,12 +273,12 @@ if(AV_BUILD_GEOGRAM) if(WIN32) set(VORPALINE_PLATFORM_FLAGS -DVORPALINE_PLATFORM=Win-vs-dynamic-generic) elseif(APPLE) - if(CMAKE_SYSTEM_PROCESSOR MATCHES "x86_64") + if(CMAKE_OSX_ARCHITECTURES MATCHES "x86_64") set(VORPALINE_PLATFORM_FLAGS -DVORPALINE_PLATFORM=Darwin-clang-dynamic) - elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "aarch64" OR CMAKE_SYSTEM_PROCESSOR MATCHES "arm64") + elseif(CMAKE_OSX_ARCHITECTURES MATCHES "arm64") set(VORPALINE_PLATFORM_FLAGS -DVORPALINE_PLATFORM=Darwin-aarch64-clang-dynamic) else() - message(FATAL_ERROR "Encountered unsupported CMAKE_SYSTEM_PROCESSOR value when trying to set VORPALINE_PLATFORM for Geogram! Supported architectures are x86_64 and aarch64/arm64.") + message(FATAL_ERROR "Encountered unsupported CMAKE_OSX_ARCHITECTURES value when trying to set VORPALINE_PLATFORM for Geogram! Supported architectures are x86_64 and aarch64/arm64.") endif() elseif(UNIX) # Assumes Linux if(CMAKE_SYSTEM_PROCESSOR MATCHES "x86_64") @@ -317,6 +403,7 @@ if(AV_BUILD_EIGEN) BINARY_DIR ${BUILD_DIR}/eigen_build INSTALL_DIR ${CMAKE_INSTALL_PREFIX} CONFIGURE_COMMAND ${CMAKE_COMMAND} + ${CMAKE_CORE_BUILD_FLAGS} -DCMAKE_CXX_STANDARD=20 ${EIGEN_CMAKE_ALIGNMENT_FLAGS} -DCMAKE_INSTALL_PREFIX:PATH= @@ -459,8 +546,8 @@ if(AV_BUILD_TIFF) set(TIFF_TARGET tiff) ExternalProject_Add(${TIFF_TARGET} - URL http://download.osgeo.org/libtiff/tiff-4.5.0.tar.gz - URL_HASH MD5=db9e220a1971acc64487f1d51a20dcaa + URL https://download.osgeo.org/libtiff/tiff-4.7.1.tar.xz + URL_HASH MD5=f1524d2d57d93e8a521c30e3a56b99e6 DOWNLOAD_DIR ${BUILD_DIR}/download/tiff PREFIX ${BUILD_DIR} BUILD_IN_SOURCE 0 @@ -469,11 +556,14 @@ if(AV_BUILD_TIFF) SOURCE_DIR ${CMAKE_CURRENT_BINARY_DIR}/tiff BINARY_DIR ${BUILD_DIR}/tiff_build INSTALL_DIR ${CMAKE_INSTALL_PREFIX} - CONFIGURE_COMMAND /configure - --prefix= - --disable-tests - --disable-docs - --disable-tools + CONFIGURE_COMMAND + ${CMAKE_COMMAND} + ${CMAKE_CORE_BUILD_FLAGS} + -Dtiff-tools=OFF + -Dtiff-tests=OFF + -Dtiff-docs=OFF + -DCMAKE_INSTALL_PREFIX= + BUILD_COMMAND $(MAKE) -j${AV_BUILD_DEPENDENCIES_PARALLEL} INSTALL_COMMAND $(MAKE) install DEPENDS ${ZLIB_TARGET} @@ -487,8 +577,10 @@ endif() if(AV_BUILD_PNG) # Add LibPng - if(${CMAKE_SYSTEM_PROCESSOR} MATCHES "arm") - set(AV_PNG_ARM_NEON OFF) + if(${CMAKE_SYSTEM_PROCESSOR} MATCHES "arm") + # Enable ARM NEON on ARM CPUs, as it should be + # supported on any reasonably modern ARM CPU + set(AV_PNG_ARM_NEON on) else() set(AV_PNG_ARM_NEON off) endif() @@ -497,8 +589,8 @@ if(AV_BUILD_PNG) ExternalProject_Add( ${PNG_TARGET} - URL https://download.sourceforge.net/libpng/libpng-1.6.39.tar.gz - URL_HASH MD5=93b8e79a008747e70f7704f600349559 + URL https://github.com/pnggroup/libpng/archive/refs/tags/v1.6.50.tar.gz + URL_HASH MD5=4a6433f54317b8f0d4cb749c09d4eff2 DOWNLOAD_DIR ${BUILD_DIR}/download/libpng PREFIX ${BUILD_DIR} BUILD_IN_SOURCE 0 @@ -601,7 +693,7 @@ if(AV_BUILD_LIBRAW) -DINSTALL_CMAKE_MODULE_PATH:PATH=/cmake BUILD_COMMAND $(MAKE) -j${AV_BUILD_DEPENDENCIES_PARALLEL} - DEPENDS libraw_cmake ${ZLIB_TARGET} + DEPENDS libraw_cmake ${ZLIB_TARGET} ${OPENMP_TARGET} ) set(LIBRAW_CMAKE_FLAGS @@ -622,6 +714,16 @@ if(AV_BUILD_BOOST) set(SCRIPT_EXTENSION sh) endif() + # Allow cross-compiling on Apple targets + if(APPLE) + if(${CMAKE_OSX_ARCHITECTURES} MATCHES "arm64") + set(BOOST_APPLE_ARCH arm) + elseif(${CMAKE_OSX_ARCHITECTURES} MATCHES "x86_64") + set(BOOST_APPLE_ARCH x86) + endif() + set(APPLE_B2_FLAGS toolset=clang-darwin target-os=darwin architecture=${BOOST_APPLE_ARCH} cxxflags=${APPLE_ARCH_FLAGS} cflags=${APPLE_ARCH_FLAGS} linkflags=${APPLE_ARCH_FLAGS}) + endif() + ExternalProject_Add(${BOOST_TARGET} URL https://archives.boost.io/release/1.86.0/source/boost_1_86_0.tar.bz2 URL_HASH MD5=2d098ba2e1457708a02de996857c2b10 @@ -638,10 +740,10 @@ if(AV_BUILD_BOOST) ./bootstrap.${SCRIPT_EXTENSION} --prefix= --with-libraries=atomic,container,date_time,exception,graph,iostreams,json,log,math,program_options,regex,serialization,system,test,thread,stacktrace,timer BUILD_COMMAND cd && - ./b2 --prefix= variant=${DEPS_CMAKE_BUILD_TYPE_LOWERCASE} cxxstd=20 link=shared threading=multi -j8 + ./b2 --prefix= variant=${DEPS_CMAKE_BUILD_TYPE_LOWERCASE} cxxstd=20 link=shared threading=multi ${APPLE_B2_FLAGS} -j${AV_BUILD_DEPENDENCIES_PARALLEL} INSTALL_COMMAND cd && - ./b2 variant=${DEPS_CMAKE_BUILD_TYPE_LOWERCASE} cxxstd=20 link=shared threading=multi install + ./b2 variant=${DEPS_CMAKE_BUILD_TYPE_LOWERCASE} cxxstd=20 link=shared threading=multi ${APPLE_B2_FLAGS} install DEPENDS ${ZLIB_TARGET} ) @@ -652,16 +754,51 @@ if(AV_BUILD_FFMPEG) if(AV_BUILD_VPX) set(VPX_TARGET libvpx) + # This is currently required until libVPX properly supports macOS 26 + # Tahoe. + # The configure.sh script only detects Darwin versions up to 24.X.X, + # but macOS 26 Tahoe is version 25.X.X, causing the configure script + # to fall back to generic-gnu, which assumes Linux and therefore pulls + # in the wrong linker flags. + # If we detect macOS 26 or higher, we explicitly set the toolchain to + # be (x86_64/arm64)-darwin24-gcc. For this, use CMAKE_SYSTEM_VERSION. + set(VPX_TOOLCHAIN_FLAG) + if(APPLE) + if(CMAKE_SYSTEM_VERSION VERSION_GREATER_EQUAL 25) # Tahoe and later + if(CMAKE_OSX_ARCHITECTURES MATCHES "x86_64") + set(VPX_TOOLCHAIN_FLAG --target=x86_64-darwin24-gcc) + elseif(CMAKE_OSX_ARCHITECTURES MATCHES "arm64") + set(VPX_TOOLCHAIN_FLAG --target=arm64-darwin24-gcc) + endif() + else() # Lower + # Extract major Darwin Version + string(REGEX MATCH "^[0-9]+" DARWIN_VERSION_MAJOR "${CMAKE_SYSTEM_VERSION}") + if(CMAKE_OSX_ARCHITECTURES MATCHES "x86_64") + set(VPX_TOOLCHAIN_FLAG --target=x86_64-darwin${DARWIN_VERSION_MAJOR}-gcc) + elseif(CMAKE_OSX_ARCHITECTURES MATCHES "arm64") + set(VPX_TOOLCHAIN_FLAG --target=arm64-darwin${DARWIN_VERSION_MAJOR}-gcc) + endif() + endif() + endif() + + if(APPLE AND ALICEVISION_USE_RPATH) + set(VPX_APPLE_LDFLAGS ${CMAKE_COMMAND} -E env LDFLAGS=-Wl,-install_name,@rpath/libvpx.dylib) + else() + set(VPX_APPLE_LDFLAGS) + endif() + ExternalProject_add(${VPX_TARGET} GIT_REPOSITORY https://chromium.googlesource.com/webm/libvpx.git - GIT_TAG v1.13.0 + GIT_TAG v1.15.2 GIT_PROGRESS OFF PREFIX ${BUILD_DIR} BUILD_IN_SOURCE 0 BUILD_ALWAYS 0 UPDATE_COMMAND "" INSTALL_DIR ${CMAKE_INSTALL_PREFIX} - CONFIGURE_COMMAND /configure --prefix= + CONFIGURE_COMMAND + ${VPX_APPLE_LDFLAGS} + /configure --prefix= ${VPX_TOOLCHAIN_FLAG} --enable-shared --disable-static --disable-examples BUILD_COMMAND $(MAKE) -j${AV_BUILD_DEPENDENCIES_PARALLEL} ) @@ -669,9 +806,28 @@ if(AV_BUILD_FFMPEG) set(FFMPEG_TARGET ffmpeg) + if(APPLE AND ALICEVISION_USE_RPATH) + set(FFMPEG_APPLE_LDFLAGS --install-name-dir=@rpath) + else() + set(FFMPEG_APPLE_LDFLAGS) + endif() + + if(APPLE) + if(CMAKE_OSX_ARCHITECTURES MATCHES "x86_64") + set(APPLE_FFMPEG_ARCH_FLAGS --arch=x86_64 --enable-cross-compile --sysroot=${APPLE_SYSROOT}) + elseif(CMAKE_OSX_ARCHITECTURES MATCHES "arm64") + set(APPLE_FFMPEG_ARCH_FLAGS --arch=aarch64 --enable-cross-compile --sysroot=${APPLE_SYSROOT}) + endif() + set(FFMPEG_CFLAGS -I/include\ ${APPLE_ARCH_FLAGS}\ -Wl,-headerpad_max_install_names) # ffmpeg does not include padding in the Mach-O header + set(FFMPEG_LDFLAGS -L/lib\ ${APPLE_ARCH_FLAGS}\ -headerpad_max_install_names) # ffmpeg does not include padding in the Mach-O header + else() + set(FFMPEG_CFLAGS -I/include) + set(FFMPEG_LDFLAGS -L/lib) + endif() + ExternalProject_add(${FFMPEG_TARGET} - URL http://ffmpeg.org/releases/ffmpeg-5.1.2.tar.bz2 - URL_HASH MD5=53ce2a391fe1db4b5ce5c43b9ea9a814 + URL https://www.ffmpeg.org/releases/ffmpeg-7.1.1.tar.xz + URL_HASH MD5=26f2bd7d20c6c616f31d7130c88d7250 DOWNLOAD_DIR ${BUILD_DIR}/download/ffmpeg PREFIX ${BUILD_DIR} BUILD_IN_SOURCE 0 @@ -681,13 +837,15 @@ if(AV_BUILD_FFMPEG) INSTALL_DIR ${CMAKE_INSTALL_PREFIX} CONFIGURE_COMMAND /configure --prefix= - --extra-cflags="-I/include" - --extra-ldflags="-L/lib" + --extra-cflags=${FFMPEG_CFLAGS} + --extra-ldflags=${FFMPEG_LDFLAGS} --enable-shared --disable-static --disable-gpl --enable-nonfree --enable-libvpx + ${APPLE_FFMPEG_ARCH_FLAGS} + ${FFMPEG_APPLE_LDFLAGS} BUILD_COMMAND $(MAKE) -j${AV_BUILD_DEPENDENCIES_PARALLEL} DEPENDS ${VPX_TARGET} ) @@ -731,7 +889,7 @@ if(AV_BUILD_FLANN) BINARY_DIR ${BUILD_DIR}/${FLANN_TARGET}_build INSTALL_DIR ${CMAKE_INSTALL_PREFIX} CONFIGURE_COMMAND - ${CMAKE_COMMAND} -E env PKG_CONFIG_PATH=${CMAKE_INSTALL_PREFIX}/lib64/pkgconfig/ + ${CMAKE_COMMAND} -E env PKG_CONFIG_PATH="${CMAKE_INSTALL_PREFIX}/lib64/pkgconfig:${CMAKE_INSTALL_PREFIX}/lib/pkgconfig" ${CMAKE_COMMAND} ${CMAKE_CORE_BUILD_FLAGS} -DBUILD_C_BINDINGS:BOOL=OFF @@ -782,8 +940,8 @@ if(AV_BUILD_PCL) set(PCL_TARGET pcl) ExternalProject_Add(${PCL_TARGET} - URL https://github.com/PointCloudLibrary/pcl/archive/refs/tags/pcl-1.13.0.tar.gz - URL_HASH MD5=987a5f6e440407a2bcae10c1022568b0 + URL https://github.com/PointCloudLibrary/pcl/archive/refs/tags/pcl-1.15.1.tar.gz + URL_HASH MD5=e29ad2147fbe2109233e2b3a0254dbab DOWNLOAD_DIR ${BUILD_DIR}/download/${PCL_TARGET} PREFIX ${BUILD_DIR} BUILD_IN_SOURCE 0 @@ -809,7 +967,7 @@ if(AV_BUILD_PCL) ${ZLIB_CMAKE_FLAGS} -DCMAKE_INSTALL_PREFIX:PATH= BUILD_COMMAND $(MAKE) -j${AV_BUILD_DEPENDENCIES_PARALLEL} - DEPENDS ${FLANN_TARGET} ${LZ4_TARGET} ${EIGEN_TARGET} ${BOOST_TARGET} ${PNG_TARGET} ${CUDA_TARGET} ${ZLIB_TARGET} + DEPENDS ${FLANN_TARGET} ${LZ4_TARGET} ${EIGEN_TARGET} ${BOOST_TARGET} ${PNG_TARGET} ${CUDA_TARGET} ${ZLIB_TARGET} ${OPENMP_TARGET} ) set(PCL_CMAKE_FLAGS -DPCL_DIR:PATH=${CMAKE_INSTALL_PREFIX}/share/pcl-1.12/) @@ -818,9 +976,16 @@ endif() if(AV_BUILD_USD) set(USD_TARGET pxr) + if(APPLE) + set(PYTHON_EXECUTABLE python3) + set(APPLE_ARCH_TARGET_FLAG --build-target ${CMAKE_OSX_ARCHITECTURES}) + else() + set(PYTHON_EXECUTABLE python) + endif() + ExternalProject_Add(${USD_TARGET} GIT_REPOSITORY https://github.com/PixarAnimationStudios/USD.git - GIT_TAG v23.05 + GIT_TAG v25.08 PREFIX ${BUILD_DIR} BUILD_IN_SOURCE 0 BUILD_ALWAYS 0 @@ -830,7 +995,7 @@ if(AV_BUILD_USD) SOURCE_DIR ${CMAKE_CURRENT_BINARY_DIR}/usd BINARY_DIR ${BUILD_DIR}/usd_build INSTALL_DIR ${CMAKE_INSTALL_PREFIX} - BUILD_COMMAND python ${CMAKE_CURRENT_BINARY_DIR}/usd/build_scripts/build_usd.py + BUILD_COMMAND ${PYTHON_EXECUTABLE} ${CMAKE_CURRENT_BINARY_DIR}/usd/build_scripts/build_usd.py --build-shared --no-examples --no-tools @@ -845,6 +1010,7 @@ if(AV_BUILD_USD) --no-tests --no-docs --no-python + ${APPLE_ARCH_TARGET_FLAG} ) @@ -1304,11 +1470,12 @@ if(AV_BUILD_CERES) ${CMAKE_COMMAND} ${CMAKE_CORE_BUILD_FLAGS} ${SUITESPARSE_CMAKE_FLAGS} - -DSUITESPARSE:BOOL=ON + -DSUITESPARSE:BOOL=$,OFF,ON> -DLAPACK:BOOL=ON ${EIGEN_CMAKE_FLAGS} -DMINIGLOG=ON -DBUILD_EXAMPLES:BOOL=OFF + -DBUILD_TESTING:BOOL=OFF -DCMAKE_INSTALL_PREFIX:PATH= BUILD_COMMAND $(MAKE) -j${AV_BUILD_DEPENDENCIES_PARALLEL} @@ -1334,6 +1501,7 @@ if(AV_BUILD_LEMON) BINARY_DIR ${BUILD_DIR}/${LEMON_TARGET}_build INSTALL_DIR ${CMAKE_INSTALL_PREFIX} CONFIGURE_COMMAND ${CMAKE_COMMAND} + -DCMAKE_OSX_ARCHITECTURES=${CMAKE_OSX_ARCHITECTURES} # FIXME: Use CMAKE_CORE_BUILD_FLAGS, as soon as LEMON supports C++20. -DCMAKE_INSTALL_PREFIX:PATH= BUILD_COMMAND $(MAKE) -j${AV_BUILD_DEPENDENCIES_PARALLEL} ) @@ -1341,6 +1509,30 @@ if(AV_BUILD_LEMON) set(LEMON_CMAKE_FLAGS -DLEMON_DIR:PATH=${CMAKE_INSTALL_PREFIX}/share/lemon/cmake) endif() +if(AV_BUILD_PCRE2) + set(PCRE2_TARGET PCRE2) + + ExternalProject_Add(${PCRE2_TARGET} + URL https://github.com/PCRE2Project/pcre2/archive/refs/tags/pcre2-10.46.tar.gz + URL_HASH MD5=d23a93c740f6e53833835493835a769b + DOWNLOAD_DIR ${BUILD_DIR}/download/${PCRE2_TARGET} + PREFIX ${BUILD_DIR} + BUILD_IN_SOURCE 0 + BUILD_ALWAYS 0 + UPDATE_COMMAND "" + SOURCE_DIR ${CMAKE_CURRENT_BINARY_DIR}/${PCRE2_TARGET} + BINARY_DIR ${BUILD_DIR}/${PCRE2_TARGET}_build + INSTALL_DIR ${CMAKE_INSTALL_PREFIX} + CONFIGURE_COMMAND ${CMAKE_COMMAND} ${CMAKE_CORE_BUILD_FLAGS} + -DCMAKE_INSTALL_PREFIX:PATH= + BUILD_COMMAND $(MAKE) -j${AV_BUILD_DEPENDENCIES_PARALLEL} + ) + + set(PCRE2_CMAKE_FLAGS + -DPCRE2_DIR=${CMAKE_INSTALL_PREFIX}/lib/cmake/pcre2 + ) +endif() + if(AV_BUILD_SWIG) set(SWIG_TARGET SWIG) @@ -1356,14 +1548,47 @@ if(AV_BUILD_SWIG) BINARY_DIR ${BUILD_DIR}/${SWIG_TARGET}_build INSTALL_DIR ${CMAKE_INSTALL_PREFIX} CONFIGURE_COMMAND ${CMAKE_COMMAND} + ${CMAKE_CORE_BUILD_FLAGS} + ${PCRE2_CMAKE_FLAGS} -DCMAKE_INSTALL_PREFIX:PATH= BUILD_COMMAND $(MAKE) -j${AV_BUILD_DEPENDENCIES_PARALLEL} + DEPENDS ${PCRE2_TARGET} ) - set(SWIG_CMAKE_FLAGS - -DSWIG_DIR=${CMAKE_INSTALL_PREFIX}/share/swig/4.3.0 - -DSWIG_EXECUTABLE=${CMAKE_INSTALL_PREFIX}/bin-deps + if(APPLE) + set(SWIG_CMAKE_FLAGS + -DSWIG_DIR=${CMAKE_INSTALL_PREFIX}/share/swig/4.3.0 + -DSWIG_EXECUTABLE=${CMAKE_INSTALL_PREFIX}/bin/swig + ) + else() + set(SWIG_CMAKE_FLAGS + -DSWIG_DIR=${CMAKE_INSTALL_PREFIX}/share/swig/4.3.0 + -DSWIG_EXECUTABLE=${CMAKE_INSTALL_PREFIX}/bin-deps + ) + endif() +endif() + +if(AV_BUILD_XERCESC) + set(XERCESC_TARGET XercesC) + + ExternalProject_add(${XERCESC_TARGET} + URL https://dlcdn.apache.org//xerces/c/3/sources/xerces-c-3.3.0.tar.gz + URL_HASH MD5=1b7778f47d5eab1644f59c87ed06ac19 + DOWNLOAD_DIR ${BUILD_DIR}/download/${XERCESC_TARGET} + PREFIX ${BUILD_DIR} + BUILD_IN_SOURCE 0 + BUILD_ALWAYS 0 + UPDATE_COMMAND "" + SOURCE_DIR ${CMAKE_CURRENT_BINARY_DIR}/${XERCESC_TARGET} + BINARY_DIR ${BUILD_DIR}/${XERCESC_TARGET}_build + INSTALL_DIR ${CMAKE_INSTALL_PREFIX} + CONFIGURE_COMMAND ${CMAKE_COMMAND} + ${CMAKE_CORE_BUILD_FLAGS} + -DCMAKE_INSTALL_PREFIX:PATH= + BUILD_COMMAND $(MAKE) -j${AV_BUILD_DEPENDENCIES_PARALLEL} ) + + set(XERCESC_CMAKE_FLAGS -DXercesC_DIR:PATH=${CMAKE_INSTALL_PREFIX}/lib/XercesC) endif() if(AV_BUILD_E57FORMAT) @@ -1372,7 +1597,7 @@ if(AV_BUILD_E57FORMAT) ExternalProject_add(${E57FORMAT_TARGET} GIT_REPOSITORY https://github.com/asmaloney/libE57Format.git - GIT_TAG v3.1.1 + GIT_TAG v3.2.0 DOWNLOAD_DIR ${BUILD_DIR}/download/${E57FORMAT_TARGET} PREFIX ${BUILD_DIR} BUILD_IN_SOURCE 0 @@ -1382,10 +1607,14 @@ if(AV_BUILD_E57FORMAT) BINARY_DIR ${BUILD_DIR}/${E57FORMAT_TARGET}_build INSTALL_DIR ${CMAKE_INSTALL_PREFIX} CONFIGURE_COMMAND ${CMAKE_COMMAND} + ${CMAKE_CORE_BUILD_FLAGS} -DE57_BUILD_TEST:BOOL=OFF -DBUILD_SHARED_LIBS:BOOL=ON + ${XERCESC_CMAKE_FLAGS} + -DE57_INSTALL_CMAKEDIR=/share/E57Format -DCMAKE_INSTALL_PREFIX:PATH= BUILD_COMMAND $(MAKE) -j${AV_BUILD_DEPENDENCIES_PARALLEL} + DEPENDS ${XERCESC_TARGET} ) set(E57FORMAT_CMAKE_FLAGS -DE57FORMAT_DIR:PATH=${CMAKE_INSTALL_PREFIX}/share/E57Format) @@ -1407,6 +1636,7 @@ if(AV_BUILD_OPENMESH) BINARY_DIR ${BUILD_DIR}/${OPENMESH_TARGET}_build INSTALL_DIR ${CMAKE_INSTALL_PREFIX} CONFIGURE_COMMAND ${CMAKE_COMMAND} + ${CMAKE_CORE_BUILD_FLAGS} -DCMAKE_BUILD_TYPE=Release -DCMAKE_INSTALL_PREFIX:PATH= -DBUILD_APPS=OFF @@ -1418,6 +1648,7 @@ if(AV_BUILD_OPENMESH) endif() set(AV_DEPS + ${OPENMP_TARGET} ${ZLIB_TARGET} ${ASSIMP_TARGET} ${GEOGRAM_TARGET} @@ -1455,6 +1686,120 @@ set(AV_DEPS ) if(AV_BUILD_ALICEVISION) + + # Build the required flags to pass through + set(AV_BUILD_FLAGS + -DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE} + -DALICEVISION_USE_RPATH=${ALICEVISION_USE_RPATH} + -DBUILD_SHARED_LIBS=${BUILD_SHARED_LIBS} + -DBUILD_APPLE_FRAMEWORKS=${BUILD_APPLE_FRAMEWORKS} + -DCMAKE_OSX_ARCHITECTURES=${CMAKE_OSX_ARCHITECTURES} + -DALICEVISION_ROOT=${ALICEVISION_ROOT} + -DALICEVISION_BUNDLE_PREFIX=${ALICEVISION_BUNDLE_PREFIX} + ) + set(AV_COMPONENT_FLAGS + -DAV_BUILD_ALICEVISION=${AV_BUILD_ALICEVISION} + -DALICEVISION_BUILD_DOC=OFF + -DALICEVISION_BUILD_TESTS=${ALICEVISION_BUILD_TESTS} + -DALICEVISION_INSTALL_MESHROOM_PLUGIN=${ALICEVISION_INSTALL_MESHROOM_PLUGIN} + ) + set(AV_TOPLEVEL_FLAGS + -DAV_EIGEN_MEMORY_ALIGNMENT=${AV_EIGEN_MEMORY_ALIGNMENT} + -DALICEVISION_USE_CCTAG=${AV_BUILD_CCTAG} + -DALICEVISION_USE_APRILTAG=${AV_BUILD_APRILTAG} + -DALICEVISION_USE_OPENCV=${AV_BUILD_OPENCV} + -DALICEVISION_USE_POPSIFT=${AV_BUILD_POPSIFT} + -DALICEVISION_USE_CUDA=${AV_USE_CUDA} + ) + set(AV_MISC_FLAGS + -DMINIGLOG=ON + ) + + # FIXME: Ideally we should move all options to the top-level CMakeLists.txt + # file. However, this approach provides backwards-compatibility and sets + # the flags as always, but enables the user to override them manually. + # As the last option specified takes precedence on the CLI, we only pass + # these flags if they are defined (= the user specified them on the CLI). + # The following flags are only added if specified for now + if(DEFINED ALICEVISION_REMOVE_ABSOLUTE) + list(APPEND AV_BUILD_FLAGS -DALICEVISION_REMOVE_ABSOLUTE=${ALICEVISION_REMOVE_ABSOLUTE}) + endif() + if(DEFINED ALICEVISION_REQUIRE_CERES_WITH_SUITESPARSE) + list(APPEND AV_MISC_FLAGS -DALICEVISION_REQUIRE_CERES_WITH_SUITESPARSE=${ALICEVISION_REQUIRE_CERES_WITH_SUITESPARSE}) + endif() + if(DEFINED ALICEVISION_USE_MESHSDFILTER) + list(APPEND AV_TOPLEVEL_FLAGS -DALICEVISION_USE_MESHSDFILTER=${ALICEVISION_USE_MESHSDFILTER}) + endif() + if(DEFINED ALICEVISION_USE_OCVSIFT) + list(APPEND AV_TOPLEVEL_FLAGS -DALICEVISION_USE_OCVSIFT=${ALICEVISION_USE_OCVSIFT}) + endif() + if(DEFINED ALICEVISION_USE_OPENCV_CONTRIB) + list(APPEND AV_TOPLEVEL_FLAGS -DALICEVISION_USE_OPENCV_CONTRIB=${ALICEVISION_USE_OPENCV_CONTRIB}) + endif() + if(DEFINED ALICEVISION_USE_OPENCV) + list(APPEND AV_TOPLEVEL_FLAGS -DALICEVISION_USE_OPENCV=${ALICEVISION_USE_OPENCV}) + endif() + if(DEFINED ALICEVISION_USE_ONNX_GPU) + list(APPEND AV_TOPLEVEL_FLAGS -DALICEVISION_USE_ONNX_GPU=${ALICEVISION_USE_ONNX_GPU}) + endif() + if(DEFINED ALICEVISION_USE_ONNX) + list(APPEND AV_TOPLEVEL_FLAGS -DALICEVISION_USE_ONNX=${ALICEVISION_USE_ONNX}) + endif() + if(DEFINED ALICEVISION_USE_UNCERTAINTYTE) + list(APPEND AV_TOPLEVEL_FLAGS -DALICEVISION_USE_UNCERTAINTYTE=${ALICEVISION_USE_UNCERTAINTYTE}) + endif() + if(DEFINED ALICEVISION_USE_ALEMBIC) + list(APPEND AV_TOPLEVEL_FLAGS -DALICEVISION_USE_ALEMBIC=${ALICEVISION_USE_ALEMBIC}) + endif() + if(DEFINED ALICEVISION_USE_POPSIFT) + list(APPEND AV_TOPLEVEL_FLAGS -DALICEVISION_USE_POPSIFT=${ALICEVISION_USE_POPSIFT}) + endif() + if(DEFINED ALICEVISION_USE_APRILTAG) + list(APPEND AV_TOPLEVEL_FLAGS -DALICEVISION_USE_APRILTAG=${ALICEVISION_USE_APRILTAG}) + endif() + if(DEFINED ALICEVISION_USE_CCTAG) + list(APPEND AV_TOPLEVEL_FLAGS -DALICEVISION_USE_CCTAG=${ALICEVISION_USE_CCTAG}) + endif() + if(DEFINED ALICEVISION_USE_OPENMP) + list(APPEND AV_TOPLEVEL_FLAGS -DALICEVISION_USE_OPENMP=${ALICEVISION_USE_OPENMP}) + endif() + if(DEFINED ALICEVISION_USE_CUDA) + list(APPEND AV_TOPLEVEL_FLAGS -DALICEVISION_USE_CUDA=${ALICEVISION_USE_CUDA}) + endif() + if(DEFINED ALICEVISION_BUILD_LIDAR) + list(APPEND AV_TOPLEVEL_FLAGS -DALICEVISION_BUILD_LIDAR=${ALICEVISION_BUILD_LIDAR}) + endif() + if(DEFINED ALICEVISION_BUILD_SWIG_BINDING) + list(APPEND AV_TOPLEVEL_FLAGS -DALICEVISION_BUILD_SWIG_BINDING=${ALICEVISION_BUILD_SWIG_BINDING}) + endif() + if(DEFINED ALICEVISION_BUILD_DOC) + list(APPEND AV_TOPLEVEL_FLAGS -DALICEVISION_BUILD_DOC=${ALICEVISION_BUILD_DOC}) + endif() + if(DEFINED ALICEVISION_BUILD_COVERAGE) + list(APPEND AV_TOPLEVEL_FLAGS -DALICEVISION_BUILD_COVERAGE=${ALICEVISION_BUILD_COVERAGE}) + endif() + if(DEFINED ALICEVISION_BUILD_SOFTWARE) + list(APPEND AV_TOPLEVEL_FLAGS -DALICEVISION_BUILD_SOFTWARE=${ALICEVISION_BUILD_SOFTWARE}) + endif() + if(DEFINED ALICEVISION_BUILD_PANORAMA) + list(APPEND AV_TOPLEVEL_FLAGS -DALICEVISION_BUILD_PANORAMA=${ALICEVISION_BUILD_PANORAMA}) + endif() + if(DEFINED ALICEVISION_BUILD_PHOTOMETRICSTEREO) + list(APPEND AV_TOPLEVEL_FLAGS -DALICEVISION_BUILD_PHOTOMETRICSTEREO=${ALICEVISION_BUILD_PHOTOMETRICSTEREO}) + endif() + if(DEFINED ALICEVISION_BUILD_SEGMENTATION) + list(APPEND AV_TOPLEVEL_FLAGS -DALICEVISION_BUILD_SEGMENTATION=${ALICEVISION_BUILD_SEGMENTATION}) + endif() + if(DEFINED ALICEVISION_BUILD_HDR) + list(APPEND AV_TOPLEVEL_FLAGS -DALICEVISION_BUILD_HDR=${ALICEVISION_BUILD_HDR}) + endif() + if(DEFINED ALICEVISION_BUILD_MVS) + list(APPEND AV_TOPLEVEL_FLAGS -DALICEVISION_BUILD_MVS=${ALICEVISION_BUILD_MVS}) + endif() + if(DEFINED ALICEVISION_BUILD_SFM) + list(APPEND AV_TOPLEVEL_FLAGS -DALICEVISION_BUILD_SFM=${ALICEVISION_BUILD_SFM}) + endif() + # Build Alicevision super build mode ExternalProject_Add(aliceVision PREFIX ${CMAKE_CURRENT_SOURCE_DIR} @@ -1464,18 +1809,20 @@ if(AV_BUILD_ALICEVISION) BINARY_DIR ${BUILD_DIR}/aliceVision_build INSTALL_DIR ${CMAKE_INSTALL_PREFIX} CONFIGURE_COMMAND ${CMAKE_COMMAND} - -DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE} -DBUILD_SHARED_LIBS:BOOL=ON -DTARGET_ARCHITECTURE=core - -DALICEVISION_ROOT=${ALICEVISION_ROOT} - -DALICEVISION_USE_ALEMBIC=ON - -DMINIGLOG=ON - -DALICEVISION_USE_CCTAG=${AV_BUILD_CCTAG} - -DALICEVISION_USE_APRILTAG=${AV_BUILD_APRILTAG} - -DALICEVISION_USE_OPENCV=${AV_BUILD_OPENCV} - -DALICEVISION_USE_POPSIFT=${AV_BUILD_POPSIFT} - -DALICEVISION_USE_CUDA=${AV_USE_CUDA} - -DALICEVISION_BUILD_SWIG_BINDING=${AV_USE_SWIG} - -DALICEVISION_BUILD_DOC=OFF + # Build Flags + ${AV_BUILD_FLAGS} + + # Misc Flags + ${AV_MISC_FLAGS} + + # Top-level flags + ${AV_TOPLEVEL_FLAGS} + + # Component Flags + ${AV_COMPONENT_FLAGS} + + # Dependency flags ${ZLIB_CMAKE_FLAGS} ${ASSIMP_CMAKE_FLAGS} ${EIGEN_CMAKE_FLAGS} @@ -1492,7 +1839,9 @@ if(AV_BUILD_ALICEVISION) ${CCTAG_CMAKE_FLAGS} ${APRILTAG_CMAKE_FLAGS} ${EXPAT_CMAKE_FLAGS} - ${COINUTILS_CMAKE_FLAGS} ${OSI_CMAKE_FLAGS} ${CLP_CMAKE_FLAGS} + ${COINUTILS_CMAKE_FLAGS} + ${OSI_CMAKE_FLAGS} + ${CLP_CMAKE_FLAGS} ${LZ4_CMAKE_FLAGS} ${FLANN_CMAKE_FLAGS} ${NANOFLANN_CMAKE_FLAGS} @@ -1502,7 +1851,17 @@ if(AV_BUILD_ALICEVISION) ${E57FORMAT_CMAKE_FLAGS} ${OPENMESH_CMAKE_FLAGS} - -DCMAKE_INSTALL_PREFIX:PATH= + # Install Prefix and Source Folder + -DCMAKE_INSTALL_PREFIX:PATH= + + BUILD_COMMAND $(MAKE) -j${AV_BUILD_DEPENDENCIES_PARALLEL} DEPENDS ${AV_DEPS} ) + + # Pipe through the darwin-bundle target on Apple targets + if(APPLE) + add_custom_target(darwin-bundle + ${CMAKE_MAKE_PROGRAM} -C ${BUILD_DIR}/aliceVision_build darwin-bundle + ) + endif() endif() diff --git a/src/cmake/FrameworkInfo.plist.in b/src/cmake/FrameworkInfo.plist.in new file mode 100644 index 0000000000..bf24650841 --- /dev/null +++ b/src/cmake/FrameworkInfo.plist.in @@ -0,0 +1,28 @@ + + + + + CFBundleDevelopmentRegion + en + CFBundleExecutable + ${MACOSX_FRAMEWORK_NAME} + CFBundleIdentifier + ${MACOSX_FRAMEWORK_IDENTIFIER} + NSHumanReadableCopyright + Copyright © 2018 - Present AliceVision Contributors + CFBundleInfoDictionaryVersion + ${MACOSX_FRAMEWORK_BUNDLE_VERSION} + CFBundleName + ${MACOSX_FRAMEWORK_BUNDLE_NAME} + CFBundlePackageType + FMWK + CFBundleShortVersionString + ${MACOSX_FRAMEWORK_SHORT_VERSION_STRING} + CFBundleSupportedPlatforms + + MacOSX + + CFBundleVersion + ${MACOSX_FRAMEWORK_BUNDLE_VERSION} + + diff --git a/src/cmake/Helpers.cmake b/src/cmake/Helpers.cmake index ea07f9d293..c1873758d6 100644 --- a/src/cmake/Helpers.cmake +++ b/src/cmake/Helpers.cmake @@ -5,7 +5,7 @@ function(alicevision_add_library library_name) set(options USE_CUDA) set(singleValues "") - set(multipleValues SOURCES PUBLIC_LINKS PRIVATE_LINKS PUBLIC_INCLUDE_DIRS PRIVATE_INCLUDE_DIRS PUBLIC_DEFINITIONS PRIVATE_DEFINITIONS) + set(multipleValues SOURCES PUBLIC_LINKS PRIVATE_LINKS PUBLIC_INCLUDE_DIRS PRIVATE_INCLUDE_DIRS PUBLIC_DEFINITIONS PRIVATE_DEFINITIONS RESOURCES) cmake_parse_arguments(LIBRARY "${options}" "${singleValues}" "${multipleValues}" ${ARGN}) @@ -111,6 +111,26 @@ function(alicevision_add_library library_name) target_compile_options(${library_name} PUBLIC "/Zc:__cplusplus") endif() + # If building Apple Frameworks, set metadata + if(APPLE AND BUILD_APPLE_FRAMEWORKS) + target_sources(${library_name} PUBLIC ${LIBRARY_RESOURCES}) + set_target_properties(${library_name} PROPERTIES + INSTALL_NAME_DIR "@rpath" + FRAMEWORK TRUE + FRAMEWORK_VERSION A + MACOSX_FRAMEWORK_NAME "${library_name}" + MACOSX_FRAMEWORK_IDENTIFIER org.aliceVision.${library_name} + XCODE_ATTRIBUTE_PRODUCT_BUNDLE_IDENTIFIER "org.aliceVision.${library_name}" + MACOSX_FRAMEWORK_BUNDLE_VERSION "${ALICEVISION_VERSION_MAJOR}.${ALICEVISION_VERSION_MINOR}.${ALICEVISION_VERSION_REVISION}" + MACOSX_FRAMEWORK_SHORT_VERSION_STRING "${ALICEVISION_VERSION_MAJOR}.${ALICEVISION_VERSION_MINOR}" + RESOURCE "${LIBRARY_RESOURCES}" + MACOSX_FRAMEWORK_INFO_PLIST "${ALICEVISION_ROOT}/../src/cmake/FrameworkInfo.plist.in" + ) + endif() + + # Add to global target list + set(ALICEVISION_GLOBAL_TARGET_LIST "${ALICEVISION_GLOBAL_TARGET_LIST};${library_name}" CACHE INTERNAL "Global list of all AliceVision targets enabled") + install(TARGETS ${library_name} EXPORT aliceVision-targets ARCHIVE @@ -119,6 +139,10 @@ function(alicevision_add_library library_name) DESTINATION ${CMAKE_INSTALL_LIBDIR} RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR} + FRAMEWORK + DESTINATION ${CMAKE_INSTALL_LIBDIR} + RESOURCE + DESTINATION ${CMAKE_INSTALL_DATADIR}/aliceVision ) endfunction() @@ -231,9 +255,12 @@ function(alicevision_add_software software_name) set_target_properties(${software_name}_exe PROPERTIES SOVERSION ${ALICEVISION_SOFTWARE_VERSION_MAJOR} - VERSION "${ALICEVISION_SOFTWARE_VERSION_MAJOR}.${ALICEVISION_SOFTWARE_VERSION_MINOR}" + VERSION "${ALICEVISION_SOFTWARE_VERSION_MAJOR}_${ALICEVISION_SOFTWARE_VERSION_MINOR}" ) + # Add to global target list + set(ALICEVISION_GLOBAL_TARGET_LIST "${ALICEVISION_GLOBAL_TARGET_LIST};${software_name}_exe" CACHE INTERNAL "Global list of all AliceVision targets enabled") + install(TARGETS ${software_name}_exe RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR} @@ -338,6 +365,15 @@ function(alicevision_swig_add_library module_name) TARGET ${module_name} PROPERTY COMPILE_OPTIONS -std=c++20 ) + if(APPLE) + # The ld on macOS does not allow undefined symbols for shared objects, + # so this must be explicitly passed to the linker. + # See: https://github.com/swig/swig/issues/2469 + set_property( + TARGET ${module_name} + PROPERTY LINK_OPTIONS -undefined dynamic_lookup + ) + endif() target_link_libraries(${module_name} PUBLIC ${SWIG_MODULE_PUBLIC_LINKS} diff --git a/src/cmake/OFA/AddCXXCompilerFlag.cmake b/src/cmake/OFA/AddCXXCompilerFlag.cmake new file mode 100644 index 0000000000..00e433535c --- /dev/null +++ b/src/cmake/OFA/AddCXXCompilerFlag.cmake @@ -0,0 +1,204 @@ +# Add a given compiler flag to flag variables. +# +# Usage: +# AddCXXCompilerFlag( +# [CODE ] +# [EXTRA_FLAGS ] +# [FLAGS ] +# [HEADERS ] +# [RESULT ] +# [TESTS ]) +# +# Input argument: +# flag to be added after succesful completion of all tests +# +# Optional input arguments: +# CODE variable holding the test code; this overrides the +# automatic generation of the test code +# EXTRA_FLAGS variable holding the list of extra compiler flags that +# are used without checks +# FLAGS variable holding the list of flags to which is +# added after succesful completion of all tests +# HEADERS variable holding the list of header files prepended to +# the C++ test code's main function +# TESTS variable holding the list of tests to be included in +# the C++ test code's main function body +# +# Output argument: +# RESULT variable holding the result of all tests + +#============================================================================= +# This code is largely inspired by +# +# AddCompilerFlag.cmake +# Copyright 2010-2015 Matthias Kretz +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: +# +# * Redistributions of source code must retain the above copyright notice, +# this list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# * Neither the names of contributing organizations nor the +# names of its contributors may be used to endorse or promote products +# derived from this software without specific prior written permission. +# +# and +# +# CheckCXXCompilerFlag.cmake +# Copyright 2006-2009 Kitware, Inc. +# Copyright 2006 Alexander Neundorf +# Copyright 2011-2013 Matthias Kretz +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: +# +# * Redistributions of source code must retain the above copyright notice, +# this list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# * The names of Kitware, Inc., the Insight Consortium, or the names of +# any consortium members, or of any contributors, may not be used to +# endorse or promote products derived from this software without +# specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER AND CONTRIBUTORS ``AS IS'' +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +# ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE FOR +# ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# ============================================================================= + +include(CheckIncludeFileCXX) +include(OFA/CheckCXXCompilerFlag) + +macro(AddCXXCompilerFlag _flag) + set(state 0) + unset(_code) + unset(_extra_flags) + unset(_flags) + unset(_headers) + unset(_result) + unset(_tests) + + foreach(_arg ${ARGN}) + if("x${_arg}" STREQUAL "xCODE") + set(state 1) + elseif("x${_arg}" STREQUAL "xEXTRA_FLAGS") + set(state 2) + elseif("x${_arg}" STREQUAL "xFLAGS") + set(state 3) + elseif("x${_arg}" STREQUAL "xHEADERS") + set(state 4) + elseif("x${_arg}" STREQUAL "xRESULT") + set(state 5) + elseif("x${_arg}" STREQUAL "xTESTS") + set(state 6) + + elseif(state EQUAL 1) + set(_code ${_arg}) + elseif(state EQUAL 2) + set(_extra_flags ${_arg}) + elseif(state EQUAL 3) + set(_flags ${_arg}) + elseif(state EQUAL 4) + set(_headers ${_arg}) + elseif(state EQUAL 5) + set(_result ${_arg}) + elseif(state EQUAL 6) + set(_tests ${_arg}) + else() + message(FATAL_ERROR "[OptimizeForArchitecture] The argument ${_arg} is not supported by AddCXXCompilerFlag") + endif() + endforeach() + + set(_check_include_file_cxx TRUE) + set(_check_cxx_source_compiles TRUE) + + # Check availability of header file(s) + foreach(_header ${_headers}) + set(_resultVar "HAVE_${_header}") + string(REGEX REPLACE "[-.+/:= ]" "_" _resultVar "${_resultVar}") + check_include_file_cxx(${_header} ${_resultVar} "${_flag}${_extra_flags}") + + if(NOT ${_resultVar}) + set(_check_include_file_cxx FALSE) + endif() + endforeach() + + # Check if compiler supports flag and can compile code + set(_cxx_code) + foreach(_header ${_headers}) + set(_cxx_code "${_cxx_code}\n#include<${_header}>") + endforeach() + + if(_code) + set(_cxx_code "${_cxx_code}\n${_code}") + elseif(_tests) + set(_cxx_code "${_cxx_code}\nint main() {") + foreach(_test ${_tests}) + set(_cxx_code "${_cxx_code}\n${_test}") + endforeach() + set(_cxx_code "${_cxx_code}\nreturn 0; }") + else() + set(_cxx_code "${_cxx_code}\nint main() { return 0; }") + endif() + + set(_CMAKE_REQUIRED_FLAGS "${CMAKE_REQUIRED_FLAGS}") + set(CMAKE_REQUIRED_FLAGS "${_flag}${_extra_flags}") + set(_resultVar "HAVE_${_flag}") + string(REGEX REPLACE "[-.+/:= ]" "_" _resultVar "${_resultVar}") + check_cxx_source_compiles("${_cxx_code}" ${_resultVar} + # Some compilers do not fail with a bad flag + FAIL_REGEX "error: bad value (.*) for .* switch" # GNU + FAIL_REGEX "argument unused during compilation" # clang + FAIL_REGEX "warning: the flag .* has been deprecated" # clang + FAIL_REGEX "is valid for .* but not for C\\\\+\\\\+" # GNU + FAIL_REGEX "unrecognized .*option" # GNU + FAIL_REGEX "ignored for target" # GNU + FAIL_REGEX "ignoring unknown option" # MSVC + FAIL_REGEX "warning D9002" # MSVC + FAIL_REGEX "[Uu]nknown option" # HP + FAIL_REGEX "[Ww]arning: [Oo]ption" # SunPro + FAIL_REGEX "[Ww]arning: illegal use of -xarch option" # SunPro + FAIL_REGEX "command option .* is not recognized" # XL + FAIL_REGEX "WARNING: unknown flag:" # Open64 + FAIL_REGEX "command line error" # ICC + FAIL_REGEX "command line warning" # ICC + FAIL_REGEX "#10236:" # ICC: File not found + FAIL_REGEX " #10159: " # ICC + FAIL_REGEX " #10353: " # ICC: option '-mfma' ignored, suggest using '-march=core-avx2' + FAIL_REGEX " #10006: " # ICC: ignoring unknown option '-mavx512fp16' + ) + set(CMAKE_REQUIRED_FLAGS "${_CMAKE_REQUIRED_FLAGS}") + + if(NOT ${_resultVar}) + set(_check_cxx_source_compiles FALSE) + endif() + + if (DEFINED _result) + if (${_check_include_file_cxx} AND ${_check_cxx_source_compiles}) + set(${_result} TRUE) + else() + set(${_result} FALSE) + endif() + endif() + + if(DEFINED _flags AND ${_check_include_file_cxx} AND ${_check_cxx_source_compiles}) + list(APPEND ${_flags} "${_flag}") + endif() +endmacro(AddCXXCompilerFlag) diff --git a/src/cmake/OFA/AutodetectArm.cmake b/src/cmake/OFA/AutodetectArm.cmake new file mode 100644 index 0000000000..786798a5c8 --- /dev/null +++ b/src/cmake/OFA/AutodetectArm.cmake @@ -0,0 +1,427 @@ +#============================================================================= +# Autodetection of ARM / ARM64 CPUs +# +# This is a two-step process: +# +# 1. Get the CPUID from the system by reading /proc/cpuconfig (on +# Linux), the system registry (on Windows), or executing an +# OS-specific command (macOS, BSD, SunOS, ...) +# +# 2. Determine the specific CPU from the CPUID +#============================================================================= + +macro(OFA_AutodetectArm) + set(_cpu_implementer) + set(_cpu_architecture) + set(_cpu_variant) + set(_cpu_part) + set(_cpu_revision) + + # Get CPUID from system + if(CMAKE_SYSTEM_NAME STREQUAL "Linux") + + # Linux + file(READ "/proc/cpuinfo" _cpuinfo) + string(REGEX REPLACE ".*CPU implementer[ \t]*:[ \t]+([a-zA-Z0-9_-]+).*" "\\1" _cpu_implementer "${_cpuinfo}") + string(REGEX REPLACE ".*CPU architecture[ \t]*:[ \t]+([a-zA-Z0-9_-]+).*" "\\1" _cpu_architecture "${_cpuinfo}") + string(REGEX REPLACE ".*CPU variant[ \t]*:[ \t]+([a-zA-Z0-9_-]+).*" "\\1" _cpu_variant "${_cpuinfo}") + string(REGEX REPLACE ".*CPU part[ \t]*:[ \t]+([a-zA-Z0-9_-]+).*" "\\1" _cpu_part "${_cpuinfo}") + string(REGEX REPLACE ".*CPU revision[ \t]*:[ \t]+([a-zA-Z0-9_-]+).*" "\\1" _cpu_revision "${_cpuinfo}") + string(REGEX REPLACE ".*Features[ \t]*:[ \t]+([^\n]+).*" "\\1" _cpu_flags "${_cpuinfo}") + + elseif(CMAKE_SYSTEM_NAME STREQUAL "Darwin") + + # macOS + execute_process(COMMAND /usr/sbin/sysctl -n hw.cputype hw.cputype hw.cpusubtype hw.cpufamily hw.cpusubfamily + OUTPUT_VARIABLE _sysctl_output_string RESULT_VARIABLE _error) + if(NOT _error) + string(REPLACE "\n" ";" _sysctl_output ${_sysctl_output_string}) + list(GET _sysctl_output 0 _cpu_implementer) + list(GET _sysctl_output 1 _cpu_architecture) + list(GET _sysctl_output 2 _cpu_variant) + list(GET _sysctl_output 3 _cpu_part) + list(GET _sysctl_output 4 _cpu_revision) + endif() + if(_error) + message(WARNING "[OptimizeForArchitecture] Auto-detection of optimization flags failed and will use the generic CPU settings.") + endif() + + else() + + # Try to retrieve CPUID directly + try_run(_exit _ok + ${CMAKE_CURRENT_BINARY_DIR} + ${CMAKE_SOURCE_DIR}/CMake/OFA/cpuinfo_arm.c + RUN_OUTPUT_VARIABLE _cpuinfo) + + if(_ok AND ${_exit} EQUAL 0) + string(REGEX REPLACE ".*implementer[ \t]*:[ \t]+([a-zA-Z0-9_-]+).*" "\\1" _cpu_implementer "${_cpuinfo}") + string(REGEX REPLACE ".*architecture[ \t]*:[ \t]+([a-zA-Z0-9_-]+).*" "\\1" _cpu_architecture "${_cpuinfo}") + string(REGEX REPLACE ".*variant[ \t]*:[ \t]+([a-zA-Z0-9_-]+).*" "\\1" _cpu_variant "${_cpuinfo}") + string(REGEX REPLACE ".*part[ \t]*:[ \t]+([a-zA-Z0-9_-]+).*" "\\1" _cpu_part "${_cpuinfo}") + string(REGEX REPLACE ".*revision[ \t]*:[ \t]+([^\n]+).*" "\\1" _cpu_revision "${_cpuinfo}") + + else() + + message(FATAL_ERROR "[OptimizeForArchitecture] OptimizeForArchitecture.cmake does not implement support for CMAKE_SYSTEM_NAME: ${CMAKE_SYSTEM_NAME}") + endif() + endif() + + # Determine CPU from CPUID + # Taken from https://github.com/karelzak/util-linux/blob/master/sys-utils/lscpu-arm.c + # and https://gcc.gnu.org/onlinedocs/gcc/ARM-Options.html + + # ARM + if(_cpu_implementer STREQUAL "0x41") + if(_cpu_part STREQUAL "0x810") + set(TARGET_ARCHITECTURE "arm810") + elseif(_cpu_part STREQUAL "0x920") + set(TARGET_ARCHITECTURE "arm920t") + elseif(_cpu_part STREQUAL "0x922") + set(TARGET_ARCHITECTURE "arm922t") + elseif(_cpu_part STREQUAL "0x926") + set(TARGET_ARCHITECTURE "arm926ej-s") + elseif(_cpu_part STREQUAL "0x940") + set(TARGET_ARCHITECTURE "arm940t") + elseif(_cpu_part STREQUAL "0x946") + set(TARGET_ARCHITECTURE "arm946e-s") + elseif(_cpu_part STREQUAL "0x966") + set(TARGET_ARCHITECTURE "arm966e-s") + elseif(_cpu_part STREQUAL "0xa20") + set(TARGET_ARCHITECTURE "arm1020e") + elseif(_cpu_part STREQUAL "0xa22") + set(TARGET_ARCHITECTURE "arm1022e") + elseif(_cpu_part STREQUAL "0xa26") + set(TARGET_ARCHITECTURE "arm1026ej-s") + elseif(_cpu_part STREQUAL "0xb02") + set(TARGET_ARCHITECTURE "mpcore") + elseif(_cpu_part STREQUAL "0xb36") + set(TARGET_ARCHITECTURE "arm1136jf-s") + elseif(_cpu_part STREQUAL "0xb56") + set(TARGET_ARCHITECTURE "arm1156t2f-s") + elseif(_cpu_part STREQUAL "0xb76") + set(TARGET_ARCHITECTURE "arm1176jzf-s") + elseif(_cpu_part STREQUAL "0xc05") + set(TARGET_ARCHITECTURE "cortex-a5") + elseif(_cpu_part STREQUAL "0xc07") + set(TARGET_ARCHITECTURE "cortex-a7") + elseif(_cpu_part STREQUAL "0xc08") + set(TARGET_ARCHITECTURE "cortex-a8") + elseif(_cpu_part STREQUAL "0xc09") + set(TARGET_ARCHITECTURE "cortex-a9") + elseif(_cpu_part STREQUAL "0xc0d") + set(TARGET_ARCHITECTURE "cortex-a12") + elseif(_cpu_part STREQUAL "0xc0f") + set(TARGET_ARCHITECTURE "cortex-a15") + elseif(_cpu_part STREQUAL "0xc0e") + set(TARGET_ARCHITECTURE "cortex-a17") + elseif(_cpu_part STREQUAL "0xc14") + set(TARGET_ARCHITECTURE "cortex-r4f") + elseif(_cpu_part STREQUAL "0xc15") + set(TARGET_ARCHITECTURE "cortex-r5") + elseif(_cpu_part STREQUAL "0xc17") + set(TARGET_ARCHITECTURE "cortex-r7") + elseif(_cpu_part STREQUAL "0xc18") + set(TARGET_ARCHITECTURE "cortex-r8") + elseif(_cpu_part STREQUAL "0xc20") + set(TARGET_ARCHITECTURE "cortex-m0") + elseif(_cpu_part STREQUAL "0xc21") + set(TARGET_ARCHITECTURE "cortex-m1") + elseif(_cpu_part STREQUAL "0xc23") + set(TARGET_ARCHITECTURE "cortex-m3") + elseif(_cpu_part STREQUAL "0xc24") + set(TARGET_ARCHITECTURE "cortex-m4") + elseif(_cpu_part STREQUAL "0xc27") + set(TARGET_ARCHITECTURE "cortex-m7") + elseif(_cpu_part STREQUAL "0xc60") + set(TARGET_ARCHITECTURE "cortex-m0plus") + elseif(_cpu_part STREQUAL "0xd01") + set(TARGET_ARCHITECTURE "cortex-a32") + elseif(_cpu_part STREQUAL "0xd02") + set(TARGET_ARCHITECTURE "cortex-a34") + elseif(_cpu_part STREQUAL "0xd03") + set(TARGET_ARCHITECTURE "cortex-a53") + elseif(_cpu_part STREQUAL "0xd04") + set(TARGET_ARCHITECTURE "cortex-a35") + elseif(_cpu_part STREQUAL "0xd05") + set(TARGET_ARCHITECTURE "cortex-a55") + elseif(_cpu_part STREQUAL "0xd07") + set(TARGET_ARCHITECTURE "cortex-a57") + elseif(_cpu_part STREQUAL "0xd08") + set(TARGET_ARCHITECTURE "cortex-a72") + elseif(_cpu_part STREQUAL "0xd09") + set(TARGET_ARCHITECTURE "cortex-a73") + elseif(_cpu_part STREQUAL "0xd0a") + set(TARGET_ARCHITECTURE "cortex-a75") + elseif(_cpu_part STREQUAL "0xd0b") + set(TARGET_ARCHITECTURE "cortex-a76") + elseif(_cpu_part STREQUAL "0xd0c") + set(TARGET_ARCHITECTURE "neoverse-n1") + elseif(_cpu_part STREQUAL "0xd0d") + set(TARGET_ARCHITECTURE "cortex-a77") + elseif(_cpu_part STREQUAL "0xd0e") + set(TARGET_ARCHITECTURE "cortex-a76ae") + elseif(_cpu_part STREQUAL "0xd13") + set(TARGET_ARCHITECTURE "cortex-r52") + elseif(_cpu_part STREQUAL "0xd20") + set(TARGET_ARCHITECTURE "cortex-m23") + elseif(_cpu_part STREQUAL "0xd21") + set(TARGET_ARCHITECTURE "cortex-m33") + elseif(_cpu_part STREQUAL "0xd40") + set(TARGET_ARCHITECTURE "neoverse-v1") + elseif(_cpu_part STREQUAL "0xd41") + set(TARGET_ARCHITECTURE "cortex-a78") + elseif(_cpu_part STREQUAL "0xd42") + set(TARGET_ARCHITECTURE "cortex-a78ae") + elseif(_cpu_part STREQUAL "0xd44") + set(TARGET_ARCHITECTURE "cortex-x1") + elseif(_cpu_part STREQUAL "0xd46") + set(TARGET_ARCHITECTURE "cortex-a510") + elseif(_cpu_part STREQUAL "0xd47") + set(TARGET_ARCHITECTURE "cortex-a710") + elseif(_cpu_part STREQUAL "0xd48") + set(TARGET_ARCHITECTURE "cortex-x2") + elseif(_cpu_part STREQUAL "0xd49") + set(TARGET_ARCHITECTURE "neoverse-n2") + elseif(_cpu_part STREQUAL "0xd4a") + set(TARGET_ARCHITECTURE "neoverse-e1") + elseif(_cpu_part STREQUAL "0xd4b") + set(TARGET_ARCHITECTURE "cortex-a78c") + endif() + + # Broadcom + elseif(_cpu_implementer STREQUAL "0x42") + if(_cpu_part STREQUAL "0x0f") + set(TARGET_ARCHITECTURE "brahma-b15") + elseif(_cpu_part STREQUAL "0x100") + set(TARGET_ARCHITECTURE "brahma-b53") + elseif(_cpu_part STREQUAL "0x516") + set(TARGET_ARCHITECTURE "thunderx2") + endif() + + # Cavium + elseif(_cpu_implementer STREQUAL "0x43") + if(_cpu_part STREQUAL "0x0a0") + set(TARGET_ARCHITECTURE "thunderx") + elseif(_cpu_part STREQUAL "0x0a1") + set(TARGET_ARCHITECTURE "thunderxt88") + elseif(_cpu_part STREQUAL "0x0a2") + set(TARGET_ARCHITECTURE "thunderxt81") + elseif(_cpu_part STREQUAL "0x0a3") + set(TARGET_ARCHITECTURE "thunderxt83") + elseif(_cpu_part STREQUAL "0x0af") + set(TARGET_ARCHITECTURE "thunderx2t99") + endif() + + # DEC + elseif(_cpu_implementer STREQUAL "0x44") + if(_cpu_part STREQUAL "0xa10") + set(TARGET_ARCHITECTURE "strongarm110") + elseif(_cpu_part STREQUAL "0xa11") + set(TARGET_ARCHITECTURE "strongarm1100") + endif() + + # FUJITSU + elseif(_cpu_implementer STREQUAL "0x46") + if(_cpu_part STREQUAL "0x001") + set(TARGET_ARCHITECTURE "a64fx") + endif() + + # HiSilicon + elseif(_cpu_implementer STREQUAL "0x48") + if(_cpu_part STREQUAL "0xd01") + set(TARGET_ARCHITECTURE "tsv110") + endif() + + # Infineon + elseif(_cpu_implementer STREQUAL "0x49") + + # Motorola/Freescale + elseif(_cpu_implementer STREQUAL "0x4d") + + # Nvidia + elseif(_cpu_implementer STREQUAL "0x4e") + if(_cpu_part STREQUAL "0x000") + set(TARGET_ARCHITECTURE "denver") + elseif(_cpu_part STREQUAL "0x003") + set(TARGET_ARCHITECTURE "denver2") + elseif(_cpu_part STREQUAL "0x004") + set(TARGET_ARCHITECTURE "carmel") + endif() + + # APM + elseif(_cpu_implementer STREQUAL "0x50") + if(_cpu_part STREQUAL "0x000") + set(TARGET_ARCHITECTURE "xgene1") + endif() + + # Qualcomm + elseif(_cpu_implementer STREQUAL "0x51") + if(_cpu_part STREQUAL "0x00f") + set(TARGET_ARCHITECTURE "scorpion") + elseif(_cpu_part STREQUAL "0x02d") + set(TARGET_ARCHITECTURE "scorpion") + elseif(_cpu_part STREQUAL "0x04d") + set(TARGET_ARCHITECTURE "krait") + elseif(_cpu_part STREQUAL "0x06f") + set(TARGET_ARCHITECTURE "krait") + elseif(_cpu_part STREQUAL "0x201") + set(TARGET_ARCHITECTURE "kryo") + elseif(_cpu_part STREQUAL "0x205") + set(TARGET_ARCHITECTURE "kryo") + elseif(_cpu_part STREQUAL "0x211") + set(TARGET_ARCHITECTURE "kryo") + elseif(_cpu_part STREQUAL "0x800") + set(TARGET_ARCHITECTURE "falkor") + elseif(_cpu_part STREQUAL "0x801") + set(TARGET_ARCHITECTURE "kryo2") + elseif(_cpu_part STREQUAL "0xc00") + set(TARGET_ARCHITECTURE "falkor") + elseif(_cpu_part STREQUAL "0xc01") + set(TARGET_ARCHITECTURE "saphira") + endif() + + # Samsung + elseif(_cpu_implementer STREQUAL "0x53") + if(_cpu_part STREQUAL "0x001") + set(TARGET_ARCHITECTURE "exynos-m1") + endif() + + # Marvell + elseif(_cpu_implementer STREQUAL "0x56") + if(_cpu_part STREQUAL "0x131") + set(TARGET_ARCHITECTURE "marvell-f") + elseif(_cpu_part STREQUAL "0x581") + set(TARGET_ARCHITECTURE "marvell-pj4") + elseif(_cpu_part STREQUAL "0x584") + set(TARGET_ARCHITECTURE "marvell-pj4") + endif() + + # Apple + elseif(_cpu_implementer STREQUAL "0x61") + if(_cpu_part STREQUAL "0x022") + set(TARGET_ARCHITECTURE "icestorm") + elseif(_cpu_part STREQUAL "0x023") + set(TARGET_ARCHITECTURE "firestorm") + endif() + + # Faraday + elseif(_cpu_implementer STREQUAL "0x66") + if(_cpu_part STREQUAL "0x526") + set(TARGET_ARCHITECTURE "fa526") + elseif(_cpu_part STREQUAL "0x626") + set(TARGET_ARCHITECTURE "fa626") + endif() + + # Intel + elseif(_cpu_implementer STREQUAL "0x69") + if(_cpu_part STREQUAL "0x200") + set(TARGET_ARCHITECTURE "i80200") + elseif(_cpu_part STREQUAL "0x210") + set(TARGET_ARCHITECTURE "pxa250a") + elseif(_cpu_part STREQUAL "0x212") + set(TARGET_ARCHITECTURE "pxa210a") + elseif(_cpu_part STREQUAL "0x242") + set(TARGET_ARCHITECTURE "i80321-400") + elseif(_cpu_part STREQUAL "0x243") + set(TARGET_ARCHITECTURE "i80321-600") + elseif(_cpu_part STREQUAL "0x290") + set(TARGET_ARCHITECTURE "pxa250b") + elseif(_cpu_part STREQUAL "0x292") + set(TARGET_ARCHITECTURE "pxa210b") + elseif(_cpu_part STREQUAL "0x2c2") + set(TARGET_ARCHITECTURE "i80321-400-b0") + elseif(_cpu_part STREQUAL "0x2c3") + set(TARGET_ARCHITECTURE "i80321-600-b0") + elseif(_cpu_part STREQUAL "0x2d0") + set(TARGET_ARCHITECTURE "pxa250c") + elseif(_cpu_part STREQUAL "0x2d2") + set(TARGET_ARCHITECTURE "pxa210c") + elseif(_cpu_part STREQUAL "0x411") + set(TARGET_ARCHITECTURE "pxa27x") + elseif(_cpu_part STREQUAL "0x41c") + set(TARGET_ARCHITECTURE "ipx425-533") + elseif(_cpu_part STREQUAL "0x41d") + set(TARGET_ARCHITECTURE "ipx425-400") + elseif(_cpu_part STREQUAL "0x41f") + set(TARGET_ARCHITECTURE "ipx425-266") + elseif(_cpu_part STREQUAL "0x682") + set(TARGET_ARCHITECTURE "pxa32x") + elseif(_cpu_part STREQUAL "0x683") + set(TARGET_ARCHITECTURE "pxa930") + elseif(_cpu_part STREQUAL "0x688") + set(TARGET_ARCHITECTURE "pxa30x") + elseif(_cpu_part STREQUAL "0x689") + set(TARGET_ARCHITECTURE "pxa31x") + elseif(_cpu_part STREQUAL "0xb11") + set(TARGET_ARCHITECTURE "sa1110") + elseif(_cpu_part STREQUAL "0xc12") + set(TARGET_ARCHITECTURE "ipx1200") + endif() + + # Phytium + elseif(_cpu_implementer STREQUAL "0x70") + if(_cpu_part STREQUAL "0x662") + set(TARGET_ARCHITECTURE "ftc662") + elseif(_cpu_part STREQUAL "0x663") + set(TARGET_ARCHITECTURE "ftc663") + endif() + + # Ampere + elseif(_cpu_implementer STREQUAL "0xc0") + + # Taken from /Library/Developer/CommandLineTools/SDKs/MacOSX12.sdk/System/Library/Frameworks/Kernel.framework/Versions/A/Headers/mach/machine.h + elseif(_cpu_implementer STREQUAL "16777228" OR _cpu_implementer STREQUAL "0x100000C") # Apple ARM64 + if( _cpu_part STREQUAL "0x1e2d6381" OR _cpu_part STREQUAL "506291073") # Swift (A6) + set(TARGET_ARCHITECTURE "apple-a6") + elseif(_cpu_part STREQUAL "0x37a09642" OR _cpu_part STREQUAL "933271106") # Cyclone (A7) + set(TARGET_ARCHITECTURE "apple-a7") + elseif(_cpu_part STREQUAL "0x2c91a47e" OR _cpu_part STREQUAL "747742334") # Typhoon (A8) + set(TARGET_ARCHITECTURE "apple-a8") + elseif(_cpu_part STREQUAL "0x92fb37c8" OR _cpu_part STREQUAL "2465937352") # Twister (A9) + set(TARGET_ARCHITECTURE "apple-a9") + elseif(_cpu_part STREQUAL "0x67ceee93" OR _cpu_part STREQUAL "1741614739") # Hurrican (A10) + set(TARGET_ARCHITECTURE "apple-a10") + elseif(_cpu_part STREQUAL "0xe81e7ef6" OR _cpu_part STREQUAL "3894312694") # Monsoon Mistral (A11) + set(TARGET_ARCHITECTURE "apple-a11") + elseif(_cpu_part STREQUAL "0x07d34b9f" OR _cpu_part STREQUAL "131287967") # Vortex Tempest (A12) + set(TARGET_ARCHITECTURE "apple-a12") + elseif(_cpu_part STREQUAL "0x462504d2" OR _cpu_part STREQUAL "1176831186") # Lightning Thunder (A13) + set(TARGET_ARCHITECTURE "apple-a13") + elseif(_cpu_part STREQUAL "0x1b588bb3" OR _cpu_part STREQUAL "458787763") # Firestorm Icestorm (A14 / M1 / M1 Pro / M1 Max / M1 Ultra) + set(TARGET_ARCHITECTURE "apple-m1") + elseif(_cpu_part STREQUAL "0xda33d83d" OR _cpu_part STREQUAL "3660830781") # Blizzard Avalanche (A15 / M2 / M2 Pro / M2 Max) + set(TARGET_ARCHITECTURE "apple-m2") + elseif(_cpu_part STREQUAL "0x8765edea" OR _cpu_part STREQUAL "2271604202") # Everest Sawtooth (A16) + set(TARGET_ARCHITECTURE "apple-a16") + elseif(_cpu_part STREQUAL "0x2876f5b5" OR _cpu_part STREQUAL "678884789") # Coll (A17) + set(TARGET_ARCHITECTURE "apple-a17") + elseif(_cpu_part STREQUAL "0x204526d0" OR _cpu_part STREQUAL "541402832") # Tupai (A18) + set(TARGET_ARCHITECTURE "apple-a18") + elseif(_cpu_part STREQUAL "0x75d4acb9" OR _cpu_part STREQUAL "1976872121") # Tahiti (A18 Pro) + set(TARGET_ARCHITECTURE "apple-a18") + elseif(_cpu_part STREQUAL "0xfa33415e" OR _cpu_part STREQUAL "4197663070") # Ibiza (M3) + set(TARGET_ARCHITECTURE "apple-m3") + elseif(_cpu_part STREQUAL "0x72015832" OR _cpu_part STREQUAL "1912690738") # Palma (M3 Pro) + set(TARGET_ARCHITECTURE "apple-m3") + elseif(_cpu_part STREQUAL "0x5f4dea93" OR _cpu_part STREQUAL "1598941843") # Lobos (M3 Max) + set(TARGET_ARCHITECTURE "apple-m3") + elseif(_cpu_part STREQUAL "0x6f5129ac" OR _cpu_part STREQUAL "1867590060") # Donan (M4) + set(TARGET_ARCHITECTURE "apple-m4") + elseif(_cpu_part STREQUAL "0x17d5b93a" OR _cpu_part STREQUAL "399882554") # Brava (M4 Pro) + set(TARGET_ARCHITECTURE "apple-m4") + endif() + + else() + message(WARNING "[OptimizeForArchitecture] Auto-detection of optimization flags failed and will use the generic CPU settings.") + return() + endif() + + if(OFA_VERBOSE) + message(STATUS "[OptimizeForArchitecture] CPU implementer: ${_cpu_implementer}") + message(STATUS "[OptimizeForArchitecture] CPU architecture: ${_cpu_architecture}") + message(STATUS "[OptimizeForArchitecture] CPU variant: ${_cpu_variant}") + message(STATUS "[OptimizeForArchitecture] CPU part: ${_cpu_part}") + message(STATUS "[OptimizeForArchitecture] CPU revision: ${_cpu_revision}") + endif() +endmacro(OFA_AutodetectArm) diff --git a/src/cmake/OFA/AutodetectPpc.cmake b/src/cmake/OFA/AutodetectPpc.cmake new file mode 100644 index 0000000000..70c3a70685 --- /dev/null +++ b/src/cmake/OFA/AutodetectPpc.cmake @@ -0,0 +1,57 @@ +#============================================================================= +# Autodetection of PPC / PPC64 CPUs +# +# This is a two-step process: +# +# 1. Get the CPUID from the system by reading /proc/cpuconfig (on +# Linux), the system registry (on Windows), or executing an +# OS-specific command (macOS, BSD, SunOS, ...) +# +# 2. Determine the specific CPU from the CPUID +#============================================================================= + +macro(OFA_AutodetectPpc) + set(_cpu) + + if(CMAKE_SYSTEM_NAME STREQUAL "Linux") + + # Linux + file(READ "/proc/cpuinfo" _cpuinfo) + string(REGEX REPLACE ".*cpu[ \t]*:[ \t]+([a-zA-Z0-9_-]+).*" "\\1" _cpu "${_cpuinfo}") + if(_cpu STREQUAL "POWER3") + set(TARGET_ARCHITECTURE "power3") + elseif(_cpu STREQUAL "POWER4") + set(TARGET_ARCHITECTURE "power4") + elseif(_cpu STREQUAL "POWER5") + set(TARGET_ARCHITECTURE "power5") + elseif(_cpu STREQUAL "POWER5+") + set(TARGET_ARCHITECTURE "power5+") + elseif(_cpu STREQUAL "POWER6") + set(TARGET_ARCHITECTURE "power6") + elseif(_cpu STREQUAL "POWER6X") + set(TARGET_ARCHITECTURE "power6x") + elseif(_cpu STREQUAL "POWER7") + set(TARGET_ARCHITECTURE "power7") + elseif(_cpu STREQUAL "POWER8" OR _cpu STREQUAL "POWER8NVL") + set(TARGET_ARCHITECTURE "power8") + elseif(_cpu STREQUAL "POWER9" OR _cpu STREQUAL "POWER9NVL") + set(TARGET_ARCHITECTURE "power9") + elseif(_cpu STREQUAL "POWER10" OR _cpu STREQUAL "POWER10NVL") + set(TARGET_ARCHITECTURE "power10") + else() + message(WARNING "[OptimizeForArchitecture] Auto-detection of optimization flags failed and will use the generic CPU settings.") + endif() + + # TODO: AIX, FreeBSD, ... + + else() + + message(FATAL_ERROR "[OptimizeForArchitecture] OptimizeForArchitecture.cmake does not implement support for CMAKE_SYSTEM_NAME: ${CMAKE_SYSTEM_NAME}") + return() + + endif() + + if(OFA_VERBOSE) + message(STATUS "[OptimizeForArchitecture] CPU: ${_cpu}") + endif() +endmacro(OFA_AutodetectPpc) diff --git a/src/cmake/OFA/AutodetectX86.cmake b/src/cmake/OFA/AutodetectX86.cmake new file mode 100644 index 0000000000..337207bc32 --- /dev/null +++ b/src/cmake/OFA/AutodetectX86.cmake @@ -0,0 +1,461 @@ +#============================================================================= +# Autodetection of X86 / X86_64 CPUs +# +# This is a two-step process: +# +# 1. Get the CPUID from the system by reading /proc/cpuconfig (on +# Linux), the system registry (on Windows), or executing an +# OS-specific command (macOS, BSD, SunOS, ...) +# +# 2. Determine the specific CPU from the CPUID +#============================================================================= + +macro(OFA_AutodetectX86) + set(_vendor_id) + set(_cpu_family) + set(_cpu_model) + set(_cpu_stepping) + + # Get CPUID from system + if(CMAKE_SYSTEM_NAME STREQUAL "Linux") + + # Linux + file(READ "/proc/cpuinfo" _cpuinfo) + string(REGEX REPLACE ".*vendor_id[ \t]*:[ \t]+([a-zA-Z0-9_-]+).*" "\\1" _vendor_id "${_cpuinfo}") + string(REGEX REPLACE ".*cpu family[ \t]*:[ \t]+([a-zA-Z0-9_-]+).*" "\\1" _cpu_family "${_cpuinfo}") + string(REGEX REPLACE ".*model[ \t]*:[ \t]+([a-zA-Z0-9_-]+).*" "\\1" _cpu_model "${_cpuinfo}") + string(REGEX REPLACE ".*stepping[ \t]*:[ \t]+([a-zA-Z0-9_-]+).*" "\\1" _cpu_stepping "${_cpuinfo}") + string(REGEX REPLACE ".*flags[ \t]*:[ \t]+([^\n]+).*" "\\1" _cpu_flags "${_cpuinfo}") + + elseif(CMAKE_SYSTEM_NAME STREQUAL "Darwin") + + # macOS + execute_process(COMMAND /usr/sbin/sysctl -n machdep.cpu.vendor machdep.cpu.family machdep.cpu.model machdep.cpu.stepping machdep.cpu.features + OUTPUT_VARIABLE _sysctl_output_string RESULT_VARIABLE _error) + if(NOT _error) + string(REPLACE "\n" ";" _sysctl_output ${_sysctl_output_string}) + list(GET _sysctl_output 0 _vendor_id) + list(GET _sysctl_output 1 _cpu_family) + list(GET _sysctl_output 2 _cpu_model) + list(GET _sysctl_output 3 _cpu_stepping) + list(GET _sysctl_output 4 _cpu_flags) + string(TOLOWER "${_cpu_flags}" _cpu_flags) + string(REPLACE "." "_" _cpu_flags "${_cpu_flags}") + else() + # Apple Silicon (ARM64) running in Rosetta 2 mode + # + # The regular detection mechanism for macOS-x64_86 does not work + # because the emulated CPU does not provide the required + # information via the sysctl command. We therefore generate fake + # vendor, model, and stepping information based on the + # macOS-specific CPU codes. + execute_process(COMMAND /usr/sbin/sysctl -n hw.cputype machdep.cpu.family hw.cpufamily machdep.cpu.features + OUTPUT_VARIABLE _sysctl_output_string RESULT_VARIABLE _error) + if(NOT _error) + string(REPLACE "\n" ";" _sysctl_output ${_sysctl_output_string}) + list(GET _sysctl_output 0 _cpu_implementer) + list(GET _sysctl_output 1 _cpu_family) + list(GET _sysctl_output 2 _cpu_model) + list(GET _sysctl_output 3 _cpu_flags) + string(TOLOWER "${_cpu_flags}" _cpu_flags) + string(REPLACE "." "_" _cpu_flags "${_cpu_flags}") + + # Fake vendor + if(_cpu_implementer STREQUAL "0x7" OR _cpu_implementer STREQUAL "7") + set(_vendor_id "GenuineIntel") + else() + set(_vendor_id "Unknown") + endif() + + # Fake stepping + set(_cpu_stepping "Unknown") + + # Fake model + # Taken from /Library/Developer/CommandLineTools/SDKs/MacOSX12.sdk/System/Library/Frameworks/Kernel.framework/Versions/A/Headers/mach/machine.h + if( _cpu_model STREQUAL "0x78ea4fbc" OR _cpu_model STREQUAL "2028621756") # Penryn + set(_cpu_model "23") + elseif(_cpu_model STREQUAL "0x6b5a4cd2" OR _cpu_model STREQUAL "1801080018") # Nehalem + set(_cpu_model "26") + elseif(_cpu_model STREQUAL "0x573b5eec" OR _cpu_model STREQUAL "1463508716") # Westmere + set(_cpu_model "37") + elseif(_cpu_model STREQUAL "0x5490b78c" OR _cpu_model STREQUAL "1418770316") # Sandybridge + set(_cpu_model "42") + elseif(_cpu_model STREQUAL "0x1f65e835" OR _cpu_model STREQUAL "526772277") # Ivybridge + set(_cpu_model "58") + elseif(_cpu_model STREQUAL "0x10b282dc" OR _cpu_model STREQUAL "280134364") # Haswell + set(_cpu_model "60") + elseif(_cpu_model STREQUAL "0x582ed09c" OR _cpu_model STREQUAL "1479463068") # Broadwell + set(_cpu_model "61") + elseif(_cpu_model STREQUAL "0x37fc219f" OR _cpu_model STREQUAL "939270559") # Skylake + set(_cpu_model "78") + elseif(_cpu_model STREQUAL "0x0f817246" OR _cpu_model STREQUAL "260141638") # Kabylake + set(_cpu_model "142") + elseif(_cpu_model STREQUAL "0x38435547" OR _cpu_model STREQUAL "943936839") # Icelake + set(_cpu_model "125") + elseif(_cpu_model STREQUAL "0x1cf8a03e" OR _cpu_model STREQUAL "486055998") # Cometlake + set(_cpu_model "142") + else() + set(_cpu_model "Unknown") + endif() + endif() + endif() + if(_error) + message(FATAL_ERROR "[OptimizeForArchitecture] OptimizeForArchitecture.cmake does not implement support for CMAKE_SYSTEM_PROCESSOR: ${CMAKE_SYSTEM_PROCESSOR}") + endif() + + elseif(CMAKE_SYSTEM_NAME STREQUAL "Windows") + + # Windows + get_filename_component(_vendor_id "[HKEY_LOCAL_MACHINE\\Hardware\\Description\\System\\CentralProcessor\\0;VendorIdentifier]" NAME CACHE) + get_filename_component(_cpu_id "[HKEY_LOCAL_MACHINE\\Hardware\\Description\\System\\CentralProcessor\\0;Identifier]" NAME CACHE) + mark_as_advanced(_vendor_id _cpu_id) + string(REGEX REPLACE ".* Family ([0-9]+) .*" "\\1" _cpu_family "${_cpu_id}") + string(REGEX REPLACE ".* Model ([0-9]+) .*" "\\1" _cpu_model "${_cpu_id}") + string(REGEX REPLACE ".* Stepping ([0-9]+) .*" "\\1" _cpu_mstepping "${_cpu_id}") + + else() + + # Try to retrieve CPUID directly + try_run(_exit _ok + ${CMAKE_CURRENT_BINARY_DIR} + ${CMAKE_SOURCE_DIR}/CMake/OFA/cpuinfo_x86.cxx + RUN_OUTPUT_VARIABLE _cpuinfo) + + if(_ok AND ${_exit} EQUAL 0) + string(REGEX REPLACE ".*vendor_id[ \t]*:[ \t]+([a-zA-Z0-9_-]+).*" "\\1" _vendor_id "${_cpuinfo}") + string(REGEX REPLACE ".*cpu family[ \t]*:[ \t]+([a-zA-Z0-9_-]+).*" "\\1" _cpu_family "${_cpuinfo}") + string(REGEX REPLACE ".*model[ \t]*:[ \t]+([a-zA-Z0-9_-]+).*" "\\1" _cpu_model "${_cpuinfo}") + string(REGEX REPLACE ".*stepping[ \t]*:[ \t]+([a-zA-Z0-9_-]+).*" "\\1" _cpu_stepping "${_cpuinfo}") + string(REGEX REPLACE ".*flags[ \t]*:[ \t]+([^\n]+).*" "\\1" _cpu_flags "${_cpuinfo}") + + else() + + message(FATAL_ERROR "[OptimizeForArchitecture] OptimizeForArchitecture.cmake does not implement support for CMAKE_SYSTEM_NAME: ${CMAKE_SYSTEM_NAME}") + endif() + endif() + + # Determine CPU from CPUID + if(_vendor_id STREQUAL "GenuineIntel") + if(_cpu_family EQUAL 19) + + # MIC architecture + if(_cpu_model EQUAL 1) + set(TARGET_ARCHITECTURE "diamondrapids") + elseif(_cpu_model EQUAL 0) + set(TARGET_ARCHITECTURE "novalake") + + else() + message(WARNING + " [OptimizeForArchitecture] Your CPU is not known.\n" + " \tAuto-detection of optimization flags failed and will use the 65nm Core 2 CPU settings.\n" + " \tPlease send an email to gismo@inria.fr with the following content so that we can update the OFA script:\n" + " \tVendor id: ${_vendor_id}\n" + " \tCPU family: ${_cpu_family}\n" + " \tCPU mode: ${_cpu_model}\n" + " \tCPU stepping: ${_cpu_stepping}\n" + " \tCPU flags: ${_cpu_flags}") + set(TARGET_ARCHITECTURE "merom") + endif() + + elseif(_cpu_family EQUAL 6) + # taken from the Intel ORM + # http://www.intel.com/content/www/us/en/processors/architectures-software-developer-manuals.html + # CPUID Signature Values Of Recent Intel Microarchitectures + # 4E 5E | Skylake microarchitecture + # 3D 47 56 | Broadwell microarchitecture + # 3C 45 46 3F | Haswell microarchitecture + # 3A 3E | Ivy Bridge microarchitecture + # 2A 2D | Sandy Bridge microarchitecture + # 25 2C 2F | Intel microarchitecture Westmere + # 1A 1E 1F 2E | Intel microarchitecture Nehalem + # 17 1D | Enhanced Intel Core microarchitecture + # 0F | Intel Core microarchitecture + # + # Intel SDM Vol. 3C 35-1 / December 2016: + # 57 | Xeon Phi 3200, 5200, 7200 [Knights Landing] + # 85 | Future Xeon Phi + # 8E 9E | 7th gen. Core [Kaby Lake] + # 55 | Future Xeon [Skylake w/ AVX512] + # 4E 5E | 6th gen. Core / E3 v5 [Skylake w/o AVX512] + # 56 | Xeon D-1500 [Broadwell] + # 4F | Xeon E5 v4, E7 v4, i7-69xx [Broadwell] + # 47 | 5th gen. Core / Xeon E3 v4 [Broadwell] + # 3D | M-5xxx / 5th gen. [Broadwell] + # 3F | Xeon E5 v3, E7 v3, i7-59xx [Haswell-E] + # 3C 45 46 | 4th gen. Core, Xeon E3 v3 [Haswell] + # 3E | Xeon E5 v2, E7 v2, i7-49xx [Ivy Bridge-E] + # 3A | 3rd gen. Core, Xeon E3 v2 [Ivy Bridge] + # 2D | Xeon E5, i7-39xx [Sandy Bridge] + # 2F | Xeon E7 + # 2A | Xeon E3, 2nd gen. Core [Sandy Bridge] + # 2E | Xeon 7500, 6500 series + # 25 2C | Xeon 3600, 5600 series, Core i7, i5 and i3 + # + # Values from the Intel SDE: + # 5C | Goldmont + # 5A | Silvermont + # 57 | Knights Landing + # 66 | Cannonlake + # 55 | Skylake Server + # 4E | Skylake Client + # 3C | Broadwell (likely a bug in the SDE) + # 3C | Haswell + # + # Latest updates taken from https://en.wikichip.org/wiki/intel/cpuid + + # MIC architecture + if(_cpu_model EQUAL 133) + set(TARGET_ARCHITECTURE "knm") # Knights Mill + + elseif(_cpu_model EQUAL 87) + set(TARGET_ARCHITECTURE "knl") # Knights Landing + + # Small cores + elseif(_cpu_model EQUAL 138 OR _cpu_model EQUAL 150 OR _cpu_model EQUAL 156) + set(TARGET_ARCHITECTURE "tremont") + + elseif(_cpu_model EQUAL 122) + set(TARGET_ARCHITECTURE "goldmont-plus") + + elseif(_cpu_model EQUAL 92 OR _cpu_model EQUAL 95) + set(TARGET_ARCHITECTURE "goldmont") + + elseif(_cpu_model EQUAL 55 OR _cpu_model EQUAL 74 OR _cpu_model EQUAL 76 OR _cpu_model EQUAL 77 OR _cpu_model EQUAL 90 OR _cpu_model EQUAL 93) + set(TARGET_ARCHITECTURE "silvermont") + + elseif(_cpu_model EQUAL 28 OR _cpu_model EQUAL 38 OR _cpu_model EQUAL 39 OR _cpu_model EQUAL 53 OR _cpu_model EQUAL 54) + set(TARGET_ARCHITECTURE "bonnell") + + # Big cores (server) + elseif(_cpu_model EQUAL 221) + set(TARGET_ARCHITECTURE "clearwaterforest") + + elseif(_cpu_model EQUAL 175) + set(TARGET_ARCHITECTURE "sierraforest") + + elseif(_cpu_model EQUAL 173 OR _cpu_model EQUAL 174) + set(TARGET_ARCHITECTURE "graniterapids") + + elseif(_cpu_model EQUAL 207) + set(TARGET_ARCHITECTURE "emeraldrapids") + + elseif(_cpu_model EQUAL 143) + set(TARGET_ARCHITECTURE "sapphirerapids") + + elseif(_cpu_model EQUAL 106 OR _cpu_model EQUAL 108) + set(TARGET_ARCHITECTURE "icelake-avx512") + + elseif(_cpu_model EQUAL 85) + if(_cpu_stepping LESS 5) + set(TARGET_ARCHITECTURE "skylake-avx512") + elseif(_cpu_stepping LESS 8) + set(TARGET_ARCHITECTURE "cascadelake") + else() + set(TARGET_ARCHITECTURE "cooperlake") + endif() + + elseif(_cpu_model EQUAL 79 OR _cpu_model EQUAL 86) + set(TARGET_ARCHITECTURE "broadwell") + + elseif(_cpu_model EQUAL 63) + set(TARGET_ARCHITECTURE "haswell") + + elseif(_cpu_model EQUAL 62) + set(TARGET_ARCHITECTURE "ivybridge") + + elseif(_cpu_model EQUAL 45) + set(TARGET_ARCHITECTURE "sandybridge") + + elseif(_cpu_model EQUAL 44 OR _cpu_model EQUAL 47) + set(TARGET_ARCHITECTURE "westmere") + + elseif(_cpu_model EQUAL 26 OR _cpu_model EQUAL 30 OR _cpu_model EQUAL 46) + set(TARGET_ARCHITECTURE "nehalem") + + elseif(_cpu_model EQUAL 23 OR _cpu_model EQUAL 29) + set(TARGET_ARCHITECTURE "penryn") + + # Big cores (client) + elseif(_cpu_model EQUAL 204) + set(TARGET_ARCHITECTURE "pantherlake") + + elseif(_cpu_model EQUAL 188 OR _cpu_model EQUAL 189) + set(TARGET_ARCHITECTURE "lunarlake") + + elseif(_cpu_model EQUAL 181 OR _cpu_model EQUAL 197 OR _cpu_model EQUAL 198) + set(TARGET_ARCHITECTURE "arrowlake") + + elseif(_cpu_model EQUAL 170 OR _cpu_model EQUAL 171 OR _cpu_model EQUAL 172) + set(TARGET_ARCHITECTURE "meteorlake") + + elseif(_cpu_model EQUAL 183 OR _cpu_model EQUAL 186 OR _cpu_model EQUAL 190 OR _cpu_model EQUAL 191) + set(TARGET_ARCHITECTURE "raptorlake") # Raptor Lake refresh = Bartlett Lake + + elseif(_cpu_model EQUAL 151 OR _cpu_model EQUAL 154) + set(TARGET_ARCHITECTURE "alderlake") + + elseif(_cpu_model EQUAL 167) + set(TARGET_ARCHITECTURE "rocketlake") + + elseif(_cpu_model EQUAL 165 OR _cpu_model EQUAL 166) + set(TARGET_ARCHITECTURE "cometlake") + + elseif(_cpu_model EQUAL 140 OR _cpu_model EQUAL 141) + set(TARGET_ARCHITECTURE "tigerlake") + + elseif(_cpu_model EQUAL 125 OR _cpu_model EQUAL 126) + set(TARGET_ARCHITECTURE "icelake") + + elseif(_cpu_model EQUAL 102) + set(TARGET_ARCHITECTURE "cannonlake") + + elseif(_cpu_model EQUAL 142 OR _cpu_model EQUAL 158) + set(TARGET_ARCHITECTURE "kabylake") + + elseif(_cpu_model EQUAL 78 OR _cpu_model EQUAL 94) + set(TARGET_ARCHITECTURE "skylake") + + elseif(_cpu_model EQUAL 61 OR _cpu_model EQUAL 71) + set(TARGET_ARCHITECTURE "broadwell") + + elseif(_cpu_model EQUAL 60 OR _cpu_model EQUAL 69 OR _cpu_model EQUAL 70) + set(TARGET_ARCHITECTURE "haswell") + + elseif(_cpu_model EQUAL 58) + set(TARGET_ARCHITECTURE "ivybridge") + + elseif(_cpu_model EQUAL 42) + set(TARGET_ARCHITECTURE "sandybridge") + + elseif(_cpu_model EQUAL 37) + set(TARGET_ARCHITECTURE "westmere") + + elseif(_cpu_model EQUAL 30 OR _cpu_model EQUAL 31) + set(TARGET_ARCHITECTURE "nehalem") + + elseif(_cpu_model EQUAL 23 OR _cpu_model EQUAL 29) + set(TARGET_ARCHITECTURE "penryn") + + elseif(_cpu_model EQUAL 15 OR _cpu_model EQUAL 22) + set(TARGET_ARCHITECTURE "merom") + + elseif(_cpu_model EQUAL 28) + set(TARGET_ARCHITECTURE "atom") + + elseif(_cpu_model EQUAL 14) + set(TARGET_ARCHITECTURE "core") + + elseif(_cpu_model LESS 14) + message(WARNING + " [OptimizeForArchitecture] Your CPU is not known.\n" + " \tAuto-detection of optimization flags failed and will use the generic CPU settings with SSE2.\n" + " \tPlease send an email to gismo@inria.fr with the following content so that we can update the OFA script:\n" + " \tVendor id: ${_vendor_id}\n" + " \tCPU family: ${_cpu_family}\n" + " \tCPU mode: ${_cpu_model}\n" + " \tCPU stepping: ${_cpu_stepping}\n" + " \tCPU flags: ${_cpu_flags}") + set(TARGET_ARCHITECTURE "generic") + else() + message(WARNING + " [OptimizeForArchitecture] Your CPU is not known.\n" + " \tAuto-detection of optimization flags failed and will use the 65nm Core 2 CPU settings.\n" + " \tPlease send an email to gismo@inria.fr with the following content so that we can update the OFA script:\n" + " \tVendor id: ${_vendor_id}\n" + " \tCPU family: ${_cpu_family}\n" + " \tCPU mode: ${_cpu_model}\n" + " \tCPU stepping: ${_cpu_stepping}\n" + " \tCPU flags: ${_cpu_flags}") + set(TARGET_ARCHITECTURE "merom") + endif() + + elseif(_cpu_family EQUAL 7) # Itanium (not supported) + message(WARNING "[OptimizeForArchitecture] Your CPU (Itanium: family ${_cpu_family}, model ${_cpu_model}) is not supported by OptimizeForArchitecture.cmake.") + + elseif(_cpu_family EQUAL 15) # NetBurst + list(APPEND _available_vector_units_list "sse" "sse2") + if(_cpu_model GREATER 2) # Not sure whether this must be 3 or even 4 instead + list(APPEND _available_vector_units_list "sse" "sse2" "sse3") + endif() + + endif() + + elseif(_vendor_id STREQUAL "AuthenticAMD") + # taken from the list of AMD CPU microarchitectures + # https://en.wikipedia.org/wiki/List_of_AMD_CPU_microarchitectures + # CPUID Signature Values Of Recent AMD Microarchitectures + # 05 05h | K6 + # 06 06h | K7 + # 15 0Fh | K8 / Hammer + # 16 10h | K10 + # 17 11h | K8 & K10 "hybrid" + # 18 12h | K10 (Llano) / K12 (ARM based AMD cpu) + # 20 14h | Bobcat + # 21 15h | Bulldozer / Piledriver / Steamroller / Excavator + # 22 16h | Jaguar / Puma + # 23 17h | Zen / Zen+ / Zen 2 + # 24 18h | Hygon Dhyana + # 25 19h | Zen 3 / Zen 3+ / Zen 4 + # 26 1Ah | Zen 5 + + if(_cpu_family EQUAL 25) # 19h + set(TARGET_ARCHITECTURE "zen3") # Some newer models will be Zen 4 + + elseif(_cpu_family EQUAL 24) # 18h + set(TARGET_ARCHITECTURE "zen") + + elseif(_cpu_family EQUAL 23) # 17h + if(_cpu_model LESS 49) + set(TARGET_ARCHITECTURE "zen") + else() + set(TARGET_ARCHITECTURE "zen2") + endif() + + elseif(_cpu_family EQUAL 22) # 16h + set(TARGET_ARCHITECTURE "amd16h") + + elseif(_cpu_family EQUAL 21) # 15h + if(_cpu_model LESS 16) + set(TARGET_ARCHITECTURE "bulldozer") + elseif(_cpu_model LESS 32) + set(TARGET_ARCHITECTURE "piledriver") + elseif(_cpu_model LESS 64) + set(TARGET_ARCHITECTURE "steamroller") + else() + set(TARGET_ARCHITECTURE "excavator") + endif() + + elseif(_cpu_family EQUAL 20) # 14h + set(TARGET_ARCHITECTURE "amd14h") + + elseif(_cpu_family EQUAL 18) # 12h (K10 / K12) + + elseif(_cpu_family EQUAL 17) # 12h (K8 & K10 hybrid) + + elseif(_cpu_family EQUAL 16) # 10h (K10) + set(TARGET_ARCHITECTURE "barcelona") + + elseif(_cpu_family EQUAL 15) # 0Fh (K8 / Hammer) + if(_cpu_model LESS 39) + set(TARGET_ARCHITECTURE "k8") + else() + set(TARGET_ARCHITECTURE "k8-sse3") + endif() + + elseif(_cpu_family EQUAL 6) # 06h (K7) + elseif(_cpu_family EQUAL 5) # 05h (K6) + + endif() + + else() + message(WARNING "[OptimizeForArchitecture] Auto-detection of optimization flags failed and will use the generic CPU settings.") + return() + endif() + + if(OFA_VERBOSE) + message(STATUS "[OptimizeForArchitecture] Vendor id: ${_vendor_id}") + message(STATUS "[OptimizeForArchitecture] CPU family: ${_cpu_family}") + message(STATUS "[OptimizeForArchitecture] CPU mode: ${_cpu_model}") + message(STATUS "[OptimizeForArchitecture] CPU stepping: ${_cpu_stepping}") + endif() +endmacro(OFA_AutodetectX86) diff --git a/src/cmake/OFA/CheckARM.txt b/src/cmake/OFA/CheckARM.txt new file mode 100644 index 0000000000..63540835bc --- /dev/null +++ b/src/cmake/OFA/CheckARM.txt @@ -0,0 +1,176 @@ +# List of arm/arm64 checks + +# FORMAT: +# [,];;;;[] +# +# lines starting with # are comments +# lines starting with push_enable: start a block of tests enabled for the given compilers only +# lines starting with pop_enable: ends a block of tests enabled for the given compilers only +# lines starting with push_disable: start a block of tests disabled for the given compilers +# lines starting with pop_disable: ends a block of tests disabled for the given compilers + +# DESCRIPTION: +# For each line of this file, HandleArmOptions generates the code snipped +# +# #include +# #include +# ... +# int main { +# name(parameter0, parameter1, ...); +# return 0; +# } +# +# and compiles it with, e.g. +# +# gcc -m -m +# +# if the extension should be enabled and +# +# gcc -m-no -m-no +# +# if the extension should be disabled. In the above example, the +# compiler name 'gcc' and the flag prefixes '-m' and '-mno-' will be +# set properly by HandleX86Options. +# +# EXTENSION ALIAS: +# By default, it is assumed that the name of the extension, e.g., +# avx512f coinsides with the name of the compiler flag to be used to +# enable/disable it, e.g., -mno-avx512f. Some compilers like Oracle's +# SunPro have non-canonical naming conventions, +# cf. https://docs.oracle.com/cd/E77782_01/html/E77792/gqexw.html. +# +# In this case, the optional parameter can be used +# to specify the name of the extension as reported by the system, +# whereas the compiler-specific extension flag(s) are given in +# and [], respectively. +# +# ENABLING/DISABLING OF CHECKS: +# Checks can be explicitly disabled for particular compilers by placing +# them inside a push_disable/pop_disable block, e.g. +# +# push_disable:SunPro,IntelLLVM +# +# pop_disable:SunPro +# +# Similarly, checks can be explicitly enabled for particular compilers +# by placing them inside a push_disable/pop_disable block, e.g. +# +# push_enable:SunPro +# +# pop_enable:SunPro + +# ARM (aarch32) 32-bit + +# armv4 : no options +# armv4t : no options + +# armv5t : no options +# armv5te : no options +# armv5tej : no options + +# armv6 : fp vfpv2 +# armv6j : fp vfpv2 +# armv6k : fp vfpv2 +# armv6z : fp vfpv2 +# armv6kz : fp vfpv2 +# armv6zk : fp vfpv2 +# armv6t2 : fp vfpv2 +# armv6-m : no options +# armv6s-m : no options +fp;arm_neon.h;vcvt_f16_f32;float32x4_t() +vfpv2;cstdlib;exit;0 + +# armv7 : fp vfpv3-d16 +vfpv3-d16;cstdlib;exit;0;vfpv3_d16 + +# armv7-a : mp sec fp vfpv3 vfpv3-d16-fp16 vfpv3-fp16 vfpv4-d16 vfpv4 simd +# neon-fp16 neon-vfpv4 nosimd vfpv3-d16 neon neon-vfpv3 +# armv7ve : vfpv3-d16 vfpv3 vfpv3-d16-fp16 vfpv3-fp16 fp vfpv4 neon neon-fp16 +# simd nosimd vfpv4-d16 neon-vfpv3 neon-vfpv4 +mp;cstdlib;exit;0 +neon;cstdlib;exit;0 +neon-fp16;cstdlib;exit;0;neon_fp16 +neon-vfpv3;cstdlib;exit;0;neon_vfpv3 +neon-vfpv4;cstdlib;exit;0;neon_vfpv4 +sec;cstdlib;exit;0 +simd;cstdlib;exit;0 +vfpv3;cstdlib;exit;0 +vfpv3-d16-fp16;cstdlib;exit;0;vfpv3_d16_fp16 +vfpv3-fp16;cstdlib;exit;0;vfpv3_fp16 +vfpv4;cstdlib;exit;0 +vfpv4-d16;cstdlib;exit;0;vfpv4_d16 + +# armv7-r : fp.sp fp vfpv3xd-fp16 vfpv3-d16-fp16 idiv noidiv vfpv3xd vfpv3-d16 +fp.sp;cstdlib;exit;0;fp_sp +fp.dp;cstdlib;exit;0;fp_dp +idiv;cstdlib;exit;0 +vfpv3dx;cstdlib;exit;0 +vfpv3dx-fp16;cstdlib;exit;0;vfpv3dx_fp16 + +# armv7-m : no options +# armv7e-m : fp fpv5 fp.dp vfpv4-sp-d16 fpv5-d16 +fpv5;cstdlib;exit;0 +fpv5_d16;cstdlib;exit;0 +vfpv4-sp-d16;cstdlib;exit;0;vfpv4_sp_d16 + +# armv8-a : crc simd crypto nocrypto sb predres +crc;arm_acle.h;__crc32b;(uint32_t)0,(uint8_t)0 +crypto;arm_neon.h;vaesdq_u8;uint8x16_t(), uint8x16_t() +sb;cstdlib;exit;0 +predres;cstdlib;exit;0 + +# armv8-r : crc fp.sp simd crypto nocrypto +# armv8.1-a : simd crypto nocrypto sb predres +# armv8.2-a : simd fp16 fp16fml crypto nocrypto dotprod sb predres i8mm bf16 +bf16,sve;arm_sve.h;svbfdot;svfloat32_t(),svbfloat16_t(),svbfloat16_t() +dotprod;arm_neon.h;svdot;svint32_t(),svint8_t(),svint8_t() +fp16;arm_neon.h;vabdq_f16;float16x8_t(),float16x8_t() +fp16fml;arm_neon.h;vfmlalq_high_f16;float32x4_t(),float16x8_t(),float16x8_t() +i8mm,sve;arm_sve.h;svmmla;svint32_t(),svint8_t(),svint8_t() +simd;arm_neon.h;vaddq_u32;uint32x4_t(),uint32x4_t() + +# armv8.3-a : simd fp16 fp16fml crypto nocrypto dotprod sb predres i8mm bf16 +# armv8.4-a : simd fp16 crypto nocrypto sb predres i8mm bf16 +# armv8.5-a : simd fp16 crypto nocrypto i8mm bf16 +# armv8.6-a : simd fp16 crypto nocrypto i8mm bf16 + +# ARM64 (aarch64) 64-bit + +# armv8.x-a : fp simd crypto crc lse fp16 rcpc rdma dotprod aes sha2 sha3 sm4 fp16fml sve profile rng memtag sb ssbs predres sve2 sve2-sm4 sve2-aes sve2-sha3 sve2-bitperm tme i8mm f32mm f64mm bf16 flagm pauth asimd crc32 +crc32;arm_acle.h;__crc32b;(uint32_t)0,(uint8_t)0 +simd;cstdlib;exit;0;asimd +aes,crypto;arm_neon.h;vaesdq_u8;uint8x16_t(), uint8x16_t() +dsp,sve;arm_sve.h;svqadd_z;svbool_t(),svint8_t(),svint8_t() +f32mm,sve;arm_sve.h;svmmla;svfloat32_t(),svfloat32_t(),svfloat32_t() +f64mm,sve;arm_sve.h;svmmla;svfloat64_t(),svfloat64_t(),svfloat64_t() +flagm;cstdlib;exit;0 +lse;cstdlib;exit;0 +memtag;cstdlib;exit;0 +mve;cstdlib;exit;0 +mve_fp;cstdlib;exit;0 +pauth;cstdlib;exit;0 +profile;cstdlib;exit;0 +ras;cstdlib;exit;0 +rcpc;cstdlib;exit;0 +rdm;cstdlib;exit;0 +rdma;cstdlib;exit;0 +rng;cstdlib;exit;0 +sec;cstdlib;exit;0 +sha2,crypto;arm_neon.h;vsha256hq_u32;uint32x4_t(),uint32x4_t(),uint32x4_t() +sha3;arm_neon.h;vsha512hq_u64;uint64x2_t(),uint64x2_t(),uint64x2_t() +sm4;arm_neon.h;vsm4eq_u32;uint32x4_t(), uint32x4_t() +ssbs;cstdlib;exit;0 +tme;cstdlib;exit;0 +zcm;cstdlib;exit;0 +zcz;cstdlib;exit;0 + +# SVE +sve;arm_sve.h;svwhilelt_b64;0,1 + +# SVE2 +sve2;arm_sve.h;svaba;svint8_t(),svint8_t(),svint8_t() +sve2-aes;arm_sve.h;svaesd;svuint8_t(),svuint8_t() +sve2-bitperm;arm_sve.h;svbdep;svuint8_t(),svuint8_t() +sve2-sha3;arm_sve.h;svrax1;svint64_t(),svint64_t() +sve2-sm4;arm_sve.h;svsm4e;svuint32_t(),svuint32_t() + diff --git a/src/cmake/OFA/CheckCXXCompilerFlag.cmake b/src/cmake/OFA/CheckCXXCompilerFlag.cmake new file mode 100644 index 0000000000..eb16e22a3f --- /dev/null +++ b/src/cmake/OFA/CheckCXXCompilerFlag.cmake @@ -0,0 +1,72 @@ +# - Check whether the CXX compiler supports a given flag. +# CHECK_CXX_COMPILER_FLAG( ) +# - the compiler flag +# - variable to store the result +# This internally calls the check_cxx_source_compiles macro. See help +# for CheckCXXSourceCompiles for a listing of variables that can +# modify the build. + +#============================================================================= +# Copyright 2006-2009 Kitware, Inc. +# Copyright 2006 Alexander Neundorf +# Copyright 2011-2013 Matthias Kretz +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: +# +# * Redistributions of source code must retain the above copyright notice, +# this list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# * The names of Kitware, Inc., the Insight Consortium, or the names of +# any consortium members, or of any contributors, may not be used to +# endorse or promote products derived from this software without +# specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER AND CONTRIBUTORS ``AS IS'' +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +# ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE FOR +# ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +#============================================================================= + +INCLUDE(CheckCXXSourceCompiles) + +MACRO (CHECK_CXX_COMPILER_FLAG _FLAG _RESULT) + SET(SAFE_CMAKE_REQUIRED_DEFINITIONS "${CMAKE_REQUIRED_DEFINITIONS}") + SET(CMAKE_REQUIRED_DEFINITIONS "${_FLAG}") + if(${ARGC} GREATER 2) + SET(TEST_SOURCE "${ARGV2}") + else() + SET(TEST_SOURCE "int main() { return 0;}") + endif() + CHECK_CXX_SOURCE_COMPILES("${TEST_SOURCE}" ${_RESULT} + # Some compilers do not fail with a bad flag + FAIL_REGEX "error: bad value (.*) for .* switch" # GNU + FAIL_REGEX "argument unused during compilation" # clang + FAIL_REGEX "is valid for .* but not for C\\\\+\\\\+" # GNU + FAIL_REGEX "unrecognized .*option" # GNU + FAIL_REGEX "ignored for target" # GNU + FAIL_REGEX "ignoring unknown option" # MSVC + FAIL_REGEX "warning D9002" # MSVC + FAIL_REGEX "[Uu]nknown option" # HP + FAIL_REGEX "[Ww]arning: [Oo]ption" # SunPro + FAIL_REGEX "command option .* is not recognized" # XL + FAIL_REGEX "WARNING: unknown flag:" # Open64 + FAIL_REGEX "command line error" # ICC + FAIL_REGEX "command line warning" # ICC + FAIL_REGEX "#10236:" # ICC: File not found + FAIL_REGEX " #10159: " # ICC + FAIL_REGEX " #10353: " # ICC: option '-mfma' ignored, suggest using '-march=core-avx2' + ) + SET (CMAKE_REQUIRED_DEFINITIONS "${SAFE_CMAKE_REQUIRED_DEFINITIONS}") +ENDMACRO (CHECK_CXX_COMPILER_FLAG) diff --git a/src/cmake/OFA/CheckX86.txt b/src/cmake/OFA/CheckX86.txt new file mode 100644 index 0000000000..66830b0a4f --- /dev/null +++ b/src/cmake/OFA/CheckX86.txt @@ -0,0 +1,218 @@ +# List of x86/x86_64 checks + +# FORMAT: +# [,];;;;[] +# +# lines starting with # are comments +# lines starting with push_enable: start a block of tests enabled for the given compilers only +# lines starting with pop_enable: ends a block of tests enabled for the given compilers only +# lines starting with push_disable: start a block of tests disabled for the given compilers +# lines starting with pop_disable: ends a block of tests disabled for the given compilers + +# DESCRIPTION: +# For each line of this file, HandleX86Options generates the code snipped +# +# #include +# #include +# ... +# int main { +# name(parameter0, parameter1, ...); +# return 0; +# } +# +# and compiles it with, e.g. +# +# gcc -m -m +# +# if the extension should be enabled and +# +# gcc -m-no -m-no +# +# if the extension should be disabled. In the above example, the +# compiler name 'gcc' and the flag prefixes '-m' and '-mno-' will be +# set properly by HandleX86Options. +# +# EXTENSION ALIAS: +# By default, it is assumed that the name of the extension, e.g., +# avx512f coinsides with the name of the compiler flag to be used to +# enable/disable it, e.g., -mno-avx512f. Some compilers like Oracle's +# SunPro have non-canonical naming conventions, +# cf. https://docs.oracle.com/cd/E77782_01/html/E77792/gqexw.html. +# +# In this case, the optional parameter can be used +# to specify the name of the extension as reported by the system, +# whereas the compiler-specific extension flag(s) are given in +# and [], respectively. +# +# ENABLING/DISABLING OF CHECKS: +# Checks can be explicitly disabled for particular compilers by placing +# them inside a push_disable/pop_disable block, e.g. +# +# push_disable:MSVC,SunPro +# +# pop_disable:MSVC,SunPro +# +# Similarly, checks can be explicitly enabled for particular compilers +# by placing them inside a push_disable/pop_disable block, e.g. +# +# push_enable:SunPro +# +# pop_enable:SunPro + +# MSVC and Oracle's SunPro compiler fail these checks +push_disable:MSVC,SunPro + +# MMX +mmx;mmintrin.h;_mm_add_pi16;__m64(),__m64() + +# SSE/SSE2/SSE3/SSE4.1/SSE4.2/SSE4A/AVX/AVX2/FMA +avx;immintrin.h;_mm256_add_pd;_mm256_setzero_pd(),_mm256_setzero_pd() +avx2;immintrin.h;_mm256_hadd_epi16;_mm256_setzero_si256(),_mm256_setzero_si256() +fma;immintrin.h;_mm_fmadd_pd;_mm_setzero_pd(),_mm_setzero_pd(),_mm_setzero_pd() +sse2;emmintrin.h;_mm_add_epi16;_mm_setzero_si128(),_mm_setzero_si128() +sse3;pmmintrin.h;_mm_addsub_pd;_mm_setzero_pd(),_mm_setzero_pd() +sse4.1;smmintrin.h;_mm_max_epi32;_mm_setzero_si128(),_mm_setzero_si128() +sse4.2;nmmintrin.h;_mm_cmpgt_epi64;_mm_setzero_si128(),_mm_setzero_si128() +sse4a;ammintrin.h;_mm_extract_si64;_mm_setzero_si128(),_mm_setzero_si128() +sse;xmmintrin.h;_mm_add_ps;_mm_setzero_ps(),_mm_setzero_ps() +ssse3;tmmintrin.h;_mm_hadd_epi16;_mm_setzero_si128(),_mm_setzero_si128() + +# AVX-VNNI +avxvnni;immintrin.h;_mm_dpbusd_avx_epi32;_mm_setzero_si128(),_mm_setzero_si128(),_mm_setzero_si128() + +# AVX-512 +avx5124fmaps;immintrin.h;_mm_4fmadd_ss;_mm_setzero_ps(),_mm_setzero_ps(),_mm_setzero_ps(),_mm_setzero_ps(),_mm_setzero_ps(),new __m128[1] +avx5124vnniw;immintrin.h;_mm512_4dpwssd_epi32;_mm512_setzero_si512(),_mm512_setzero_si512(),_mm512_setzero_si512(),_mm512_setzero_si512(),_mm512_setzero_si512(),new __m128i[1] +avx512bf16,avx512vl;immintrin.h;_mm_cvtne2ps_pbh;_mm_setzero_ps(),_mm_setzero_ps() +avx512bitalg,avx512vl;immintrin.h;_mm_popcnt_epi16;_mm_setzero_si128() +avx512bw;immintrin.h;_mm512_abs_epi16;_mm512_setzero_si512() +avx512cd;immintrin.h;_mm512_broadcastmb_epi64;__mmask8() +avx512dq;immintrin.h;_mm512_and_pd;_mm512_setzero_pd(),_mm512_setzero_pd() +avx512er;immintrin.h;_mm512_exp2a23_pd;_mm512_setzero_pd() +avx512f;immintrin.h;_mm512_abs_epi32;_mm512_setzero_si512() +avx512fp16,avx512vl;immintrin.h;_mm_add_ph;_mm_setzero_ph(),_mm_setzero_ph() +avx512ifma;immintrin.h;_mm512_maskz_madd52hi_epu64;__mmask8(),_mm512_setzero_si512(),_mm512_setzero_si512(),_mm512_setzero_si512() +avx512pf;immintrin.h;_mm512_prefetch_i32scatter_pd;NULL,_mm256_setzero_si256(),(int)1,_MM_HINT_T0 +avx512vbmi2,avx512vl;immintrin.h;_mm_mask_compress_epi16;_mm_setzero_si128(),__mmask8(),_mm_setzero_si128() +avx512vbmi;immintrin.h;_mm512_permutex2var_epi8;_mm512_setzero_si512(),_mm512_setzero_si512(),_mm512_setzero_si512() +avx512vl,avx512f;immintrin.h;_mm_abs_epi64;_mm_setzero_si128() +avx512vnni,avx512vl;immintrin.h;_mm_dpbusd_epi32;_mm_setzero_si128(),_mm_setzero_si128(),_mm_setzero_si128() +avx512vp2intersect,avx512vl;immintrin.h;_mm_2intersect_epi32;_mm_setzero_si128(),_mm_setzero_si128(),new __mmask8[1],new __mmask8[1] +avx512vpopcntdq,avx512vl;immintrin.h;_mm_popcnt_epi64;_mm_setzero_si128() + +# AMX +amx-bf16;immintrin.h;_tile_dpbf16ps;0,1,2 +amx-int8;immintrin.h;_tile_dpbssd;0,1,2 +amx-tile;immintrin.h;_tile_zero;0 + +# Other +adx;immintrin.h;_addcarryx_u32;(unsigned char)0,(unsigned int)1,(unsigned int)1,new unsigned int[1] +aes;wmmintrin.h;_mm_aesdec_si128;_mm_setzero_si128(),_mm_setzero_si128() +bmi2;immintrin.h;_bzhi_u32;(unsigned int)1,(unsigned int)1 +enqcmd;immintrin.h;_enqcmd;(void*)NULL,(void const*)NULL +f16c;immintrin.h;_mm_cvtph_ps;_mm_setzero_si128() +fsgsbase;immintrin.h;_readfsbase_u32; +fxsr;immintrin.h;_fxrstor;(void*)NULL +gfni,avx512vl;immintrin.h;_mm_gf2p8mul_epi8;_mm_setzero_si128(),_mm_setzero_si128() +hreset;immintrin.h;_hreset;1 +invpcid;immintrin.h;_invpcid;(unsigned int)1,(void*)NULL +keylocker;immintrin.h;_mm_aesdec128kl_u8;new __m128i[1],_mm_setzero_si128(),(const void*)NULL +keylocker_wide;immintrin.h;_mm_aesdecwide128kl_u8;new __m128i[1],(const __m128i*)new __m128i[1], (const void*)NULL +lzcnt;immintrin.h;_lzcnt_u32;(unsigned int)1 +monitor;pmmintrin.h;_mm_monitor;(void const*)NULL,(unsigned)1,(unsigned)1 +movbe;immintrin.h;_loadbe_i16;(void const*)NULL +movdir64b;immintrin.h;_movdir64b;(void*)NULL,(const void*)NULL +movdiri;immintrin.h;_directstoreu_u32;(void*)NULL,(unsigned int)1 +mpx;immintrin.h;_bnd_chk_ptr_lbounds;(const void*)NULL +pclmul;wmmintrin.h;_mm_clmulepi64_si128;_mm_setzero_si128(),_mm_setzero_si128(),(const int)0;pclmul +pconfig;immintrin.h;_pconfig_u32;(const int)1,new size_t[1] +pku;cstdlib;exit;0 +popcnt;immintrin.h;_mm_popcnt_u32;(unsigned int)1 +prfchw;immintrin.h;_m_prefetchw;(void*)NULL +prefetchwt1;xmmintrin.h;_mm_prefetch;(char const*)NULL,(int)1 +ptwrite;immintrin.h;_ptwrite32;(unsigned int)0 +rdpid;immintrin.h;_rdpid_u32; +rdrnd;immintrin.h;_rdrand16_step;(unsigned short*)new unsigned short[1] +rdseed;immintrin.h;_rdseed16_step;(unsigned short*)new unsigned short[1] +rdtscp;immintrin.h;__rdtscp;(unsigned int*)NULL +rtm;immintrin.h;_xend; +serialize;immintrin.h;_serialize; +sha;immintrin.h;_mm_sha1msg1_epu32;_mm_setzero_si128(),_mm_setzero_si128() +tsc;immintrin.h;_rdtsc; +tsxldtrk;immintrin.h;_xresldtrk; +uintr;immintrin.h;_clui; +vaes,avx512vl;immintrin.h;_mm256_aesdec_epi128;_mm256_setzero_si256(),_mm256_setzero_si256() +vpclmulqdq,avx512vl;immintrin.h;_mm256_clmulepi64_epi128;_mm256_setzero_si256(),_mm256_setzero_si256(),(const int)1 +waitpkg;immintrin.h;_umonitor;(void*)NULL +wbnoinvd;immintrin.h;_wbnoinvd; +xsavec,xsave;immintrin.h;_xsavec;(void*)NULL,(unsigned long long)0 +xsaveopt,xsave;immintrin.h;_xsaveopt;(void*)NULL,(unsigned long long)0 +xsaves;immintrin.h;_xgetbv;(unsigned int)1 +xss,xsave;immintrin.h;_xrstors;(const void*)NULL,(unsigned long long)0 + +# GNU GCC fails the following tests ... +push_disable:GNU +abm;x86intrin.h;_bextri_u32;(unsigned int)0,(unsigned int)0 +bmi;immintrin.h;_andn_u32;(unsigned int)1,(unsigned int)1 +cldemote;immintrin.h;_mm_cldemote;(void const*)NULL +clflushopt;immintrin.h;_mm_clflushopt;(void const*)NULL +clwb;immintrin.h;_mm_clwb;(void const*)NULL +pop_disable:GNU + +# ... and needs a slightly modified implementation +push_enable:GNU +abm;x86intrin.h;__bextri_u32;(unsigned int)0,(unsigned int)0 +bmi;immintrin.h;__andn_u32;(unsigned int)1,(unsigned int)1 +cldemote;immintrin.h;_cldemote;(void*)NULL +clflushopt;immintrin.h;_mm_clflushopt;(void*)NULL +clwb;immintrin.h;_mm_clwb;(void*)NULL +pop_enable:GNU + +pop_disable:MSVC,SunPro + + +# Special checks for the MSVC compiler +push_enable:MSVC + +# SSE/SSE2/SSE3/SSE4.1/SSE4.2/SSE4A/AVX/AVX2/FMA +SSE;xmmintrin.h;_mm_add_ps;_mm_setzero_ps(),_mm_setzero_ps();sse +SSE2;emmintrin.h;_mm_add_epi16;_mm_setzero_si128(),_mm_setzero_si128();sse2 +AVX;immintrin.h;_mm256_add_pd;_mm256_setzero_pd(),_mm256_setzero_pd();avx +AVX2;immintrin.h;_mm256_hadd_epi16;_mm256_setzero_si256(),_mm256_setzero_si256();avx2 + +# AVX-512 +AVX512;immintrin.h;_mm512_abs_epi32;_mm512_setzero_si512();avx512f + +pop_enable:MSVC + + +# Special checks for Oracle's SunPro compiler +# https://docs.oracle.com/cd/E77782_01/html/E77792/gqexw.html +push_enable:SunPro + +# SSE/SSE2/SSE3/SSE4.1/SSE4.2/SSE4A/AVX/AVX2/FMA +avx;immintrin.h;_mm256_add_pd;_mm256_setzero_pd(),_mm256_setzero_pd() +avx2;immintrin.h;_mm256_hadd_epi16;_mm256_setzero_si256(),_mm256_setzero_si256() +sse2;emmintrin.h;_mm_add_epi16;_mm_setzero_si128(),_mm_setzero_si128() +sse3;pmmintrin.h;_mm_addsub_pd;_mm_setzero_pd(),_mm_setzero_pd() +sse4_1;smmintrin.h;_mm_max_epi32;_mm_setzero_si128(),_mm_setzero_si128();sse4.1 +sse4_2;nmmintrin.h;_mm_cmpgt_epi64;_mm_setzero_si128(),_mm_setzero_si128();sse4.2 +sse;xmmintrin.h;_mm_add_ps;_mm_setzero_ps(),_mm_setzero_ps() +ssse3;tmmintrin.h;_mm_hadd_epi16;_mm_setzero_si128(),_mm_setzero_si128() + +# AVX-512 +avx512;immintrin.h;_mm512_abs_epi32;_mm512_setzero_si512();avx512f +avx512;xmmintrin.h;_mm_prefetch;(char const*)NULL,(int)1;prefetchwt1 + +# Other +avx_i;emmintrin.h;_mm_cvtph_ps;_mm_setzero_si128();f16c +aes;wmmintrin.h;_mm_aesdec_si128;_mm_setzero_si128(),_mm_setzero_si128();aes +aes;wmmintrin.h;_mm_clmulepi64_si128;_mm_setzero_si128(),_mm_setzero_si128(),(const int)0;pclmul +avx2;immintrin.h;_lzcnt_u32;(unsigned int)1;lzcnt +sse4_2;immintrin.h;_mm_popcnt_u32;(unsigned int)1;popcnt +avx_i;immintrin.h;_andn_u32;(unsigned int)1,(unsigned int)1;bmi +avx_i;immintrin.h;_bzhi_u32;(unsigned int)1,(unsigned int)1;bmi2 +avx_i;immintrin.h;_readfsbase_u32;;fsgsbase +avx_i;immintrin.h;_rdrand16_step;(unsigned short*)new unsigned short[1];rdrnd +pop_enable:SunPro + diff --git a/src/cmake/OFA/CommonMacros.cmake b/src/cmake/OFA/CommonMacros.cmake new file mode 100644 index 0000000000..89e911f6cf --- /dev/null +++ b/src/cmake/OFA/CommonMacros.cmake @@ -0,0 +1,10 @@ +include(OFA/AddCXXCompilerFlag) + +macro(_ofa_find _list _value _ret) + list(FIND ${_list} "${_value}" _found) + if(_found EQUAL -1) + set(${_ret} FALSE) + else() + set(${_ret} TRUE) + endif() +endmacro(_ofa_find) diff --git a/src/cmake/OFA/HandleArmOptions.cmake b/src/cmake/OFA/HandleArmOptions.cmake new file mode 100644 index 0000000000..b9f464f6b0 --- /dev/null +++ b/src/cmake/OFA/HandleArmOptions.cmake @@ -0,0 +1,1162 @@ +#============================================================================= +# Handling of ARM / ARM64 options +# +# This is a three-step process: +# +# 1. Generate a list of available compiler flags for the specific CPU +# +# 2. Enable/disable feature flags based on available CPU features, +# used-defined USE_ variables and the capabilities of the +# host system's compiler and linker +# +# 3. Set compiler-specific flags (e.g., -m/-mno-) +#============================================================================= + +include(OFA/CommonMacros) + +macro(OFA_HandleArmOptions) + + # Special treatment for "native" flag + if(TARGET_ARCHITECTURE STREQUAL "native") + if(MSVC) + # MSVC (on Windows) + message(FATAL_ERROR "[OptimizeForArchitecture] MSVC does not support \"native\" flag.") + elseif(CMAKE_CXX_COMPILER_ID MATCHES "NVHPC" + OR CMAKE_CXX_COMPILER_ID MATCHES "PGI") + # NVidia HPC / PGI (on Linux/Windows) + AddCompilerFlag("-tp=native" CXX_FLAGS ARCHITECTURE_CXX_FLAGS RESULT _ok) + elseif(CMAKE_CXX_COMPILER_ID MATCHES "Cray") + # Cray (on Linux) + message(FATAL_ERROR, "[OptimizeForArchitecture] Cray compiler does not support \"native\" flag.") + else() + # Others: GNU, Clang and variants + AddCXXCompilerFlag("-mcpu=native" FLAGS ARCHITECTURE_CXX_FLAGS RESULT _ok) + endif() + + if(NOT _ok) + message(FATAL_ERROR "[OptimizeForArchitecture] An error occured while setting the \"native\" flag.") + endif() + + elseif(NOT TARGET_ARCHITECTURE STREQUAL "none") + + # Step 1: Generate a list of compiler flags for the specific CPU + set(_march_flag_list) + set(_mtune_flag_list) + set(_available_extension_list) + + # ARM + if(TARGET_ARCHITECTURE STREQUAL "strongarm") + list(APPEND _mtune_flag_list "strongarm") + list(APPEND _march_flag_list "armv4") + elseif(TARGET_ARCHITECTURE STREQUAL "arm8") + list(APPEND _mtune_flag_list "arm8") + list(APPEND _march_flag_list "armv4") + elseif(TARGET_ARCHITECTURE STREQUAL "arm810") + list(APPEND _mtune_flag_list "arm810") + list(APPEND _march_flag_list "armv4") + elseif(TARGET_ARCHITECTURE STREQUAL "fa526") + list(APPEND _mtune_flag_list "fa526") + list(APPEND _march_flag_list "armv4") + elseif(TARGET_ARCHITECTURE STREQUAL "fa626") + list(APPEND _mtune_flag_list "fa626") + list(APPEND _march_flag_list "armv4") + elseif(TARGET_ARCHITECTURE STREQUAL "arm7tdmi") + list(APPEND _mtune_flag_list "arm7tdmi") + list(APPEND _march_flag_list "armv4t") + elseif(TARGET_ARCHITECTURE STREQUAL "arm7tdmi-s") + list(APPEND _mtune_flag_list "arm7tdmi-s") + list(APPEND _march_flag_list "armv4t") + elseif(TARGET_ARCHITECTURE STREQUAL "arm710t") + list(APPEND _mtune_flag_list "arm710t") + list(APPEND _march_flag_list "armv4t") + elseif(TARGET_ARCHITECTURE STREQUAL "arm720t") + list(APPEND _mtune_flag_list "arm720t") + list(APPEND _march_flag_list "armv4t") + elseif(TARGET_ARCHITECTURE STREQUAL "arm740t") + list(APPEND _mtune_flag_list "arm740t") + list(APPEND _march_flag_list "armv4t") + elseif(TARGET_ARCHITECTURE STREQUAL "arm9") + list(APPEND _mtune_flag_list "arm9") + list(APPEND _march_flag_list "armv4t") + elseif(TARGET_ARCHITECTURE STREQUAL "arm9tdmi") + list(APPEND _mtune_flag_list "arm9tdmi") + list(APPEND _march_flag_list "armv4t") + elseif(TARGET_ARCHITECTURE STREQUAL "arm920") + list(APPEND _mtune_flag_list "arm920") + list(APPEND _march_flag_list "armv4t") + elseif(TARGET_ARCHITECTURE STREQUAL "arm920t") + list(APPEND _mtune_flag_list "arm920t") + list(APPEND _march_flag_list "armv4t") + elseif(TARGET_ARCHITECTURE STREQUAL "arm922t") + list(APPEND _mtune_flag_list "arm922t") + list(APPEND _march_flag_list "armv4t") + elseif(TARGET_ARCHITECTURE STREQUAL "arm940t") + list(APPEND _mtune_flag_list "arm940t") + list(APPEND _march_flag_list "armv4t") + + elseif(TARGET_ARCHITECTURE STREQUAL "arm1020t") + list(APPEND _mtune_flag_list "arm1020t") + list(APPEND _march_flag_list "armv5t") + elseif(TARGET_ARCHITECTURE STREQUAL "arm10tdmi") + list(APPEND _mtune_flag_list "arm10tdmi") + list(APPEND _march_flag_list "armv5t") + + elseif(TARGET_ARCHITECTURE STREQUAL "arm9e") + list(APPEND _mtune_flag_list "arm9e") + list(APPEND _march_flag_list "armv5te") + list(APPEND _available_extension_list "fp") + elseif(TARGET_ARCHITECTURE STREQUAL "arm946e-s") + list(APPEND _mtune_flag_list "arm946e-s") + list(APPEND _march_flag_list "armv5te") + list(APPEND _available_extension_list "fp") + elseif(TARGET_ARCHITECTURE STREQUAL "arm966e-s") + list(APPEND _mtune_flag_list "arm966e-s") + list(APPEND _march_flag_list "armv5te") + list(APPEND _available_extension_list "fp") + elseif(TARGET_ARCHITECTURE STREQUAL "arm968e-s") + list(APPEND _mtune_flag_list "arm968e-s") + list(APPEND _march_flag_list "armv5te") + list(APPEND _available_extension_list "fp") + elseif(TARGET_ARCHITECTURE STREQUAL "arm10e") + list(APPEND _mtune_flag_list "arm10e") + list(APPEND _march_flag_list "armv5te") + list(APPEND _available_extension_list "fp") + elseif(TARGET_ARCHITECTURE STREQUAL "arm1020e") + list(APPEND _mtune_flag_list "arm1020e") + list(APPEND _march_flag_list "armv5te") + list(APPEND _available_extension_list "fp") + elseif(TARGET_ARCHITECTURE STREQUAL "arm1022e") + list(APPEND _mtune_flag_list "arm1022e") + list(APPEND _march_flag_list "armv5te") + list(APPEND _available_extension_list "fp") + elseif(TARGET_ARCHITECTURE STREQUAL "xscale") + list(APPEND _mtune_flag_list "xscale") + list(APPEND _march_flag_list "armv5te") + elseif(TARGET_ARCHITECTURE STREQUAL "iwmmxt") + list(APPEND _mtune_flag_list "iwmmxt") + list(APPEND _march_flag_list "armv5te") + elseif(TARGET_ARCHITECTURE STREQUAL "iwmmxt2") + list(APPEND _mtune_flag_list "iwmmxt2") + list(APPEND _march_flag_list "armv5te") + elseif(TARGET_ARCHITECTURE STREQUAL "fa606te") + list(APPEND _mtune_flag_list "fa606te") + list(APPEND _march_flag_list "armv5te") + elseif(TARGET_ARCHITECTURE STREQUAL "fa626te") + list(APPEND _mtune_flag_list "fa626te") + list(APPEND _march_flag_list "armv5te") + elseif(TARGET_ARCHITECTURE STREQUAL "fmp626") + list(APPEND _mtune_flag_list "fmp626") + list(APPEND _march_flag_list "armv5te") + elseif(TARGET_ARCHITECTURE STREQUAL "fa726te") + list(APPEND _mtune_flag_list "fa726te") + list(APPEND _march_flag_list "armv5te") + elseif(TARGET_ARCHITECTURE STREQUAL "arm926ej-s") + list(APPEND _mtune_flag_list "arm926ej-s") + list(APPEND _march_flag_list "armv5tej") + list(APPEND _available_extension_list "fp") + elseif(TARGET_ARCHITECTURE STREQUAL "arm1026ej-s") + list(APPEND _mtune_flag_list "arm1026ej-s") + list(APPEND _march_flag_list "armv5tej") + list(APPEND _available_extension_list "fp") + + elseif(TARGET_ARCHITECTURE STREQUAL "mpcore") + list(APPEND _mtune_flag_list "mpcore") + list(APPEND _march_flag_list "armv6k") + list(APPEND _available_extension_list "fp") + elseif(TARGET_ARCHITECTURE STREQUAL "arm1136j-s") + list(APPEND _mtune_flag_list "arm1136j-s") + list(APPEND _march_flag_list "armv6j") + elseif(TARGET_ARCHITECTURE STREQUAL "arm1136jf-s") + list(APPEND _mtune_flag_list "arm1136jf-s") + list(APPEND _march_flag_list "armv6j") + list(APPEND _available_extension_list "fp") + elseif(TARGET_ARCHITECTURE STREQUAL "arm1156t2-s") + list(APPEND _mtune_flag_list "arm1156t2-s") + list(APPEND _march_flag_list "armv6t2") + elseif(TARGET_ARCHITECTURE STREQUAL "arm1156t2f-s") + list(APPEND _mtune_flag_list "arm1156t2f-s") + list(APPEND _march_flag_list "armv6t2") + list(APPEND _available_extension_list "fp") + elseif(TARGET_ARCHITECTURE STREQUAL "arm1176jz-s") + list(APPEND _mtune_flag_list "arm1176jz-s") + list(APPEND _march_flag_list "armv6kz") + elseif(TARGET_ARCHITECTURE STREQUAL "arm1176jzf-s") + list(APPEND _mtune_flag_list "arm1176jzf-s") + list(APPEND _march_flag_list "armv6kz") + list(APPEND _available_extension_list "fp") + + elseif(TARGET_ARCHITECTURE STREQUAL "generic-armv7-a") + list(APPEND _mtune_flag_list "generic-armv7-a") + list(APPEND _march_flag_list "armv7-a") + list(APPEND _available_extension_list "mp" "sec" "vfpv3-d16" "vfpv3" "vfpv3-d16-fp16" "vfpv3-fp16" "vfpv4-d16" "vfpv4" "simd" "neon-fp16" "neon-vfpv4") + + elseif(TARGET_ARCHITECTURE STREQUAL "cortex-a78") + list(APPEND _mtune_flag_list "cortex-a78") + list(APPEND _march_flag_list "armv8.2-a") + list(APPEND _march_flag_list "armv8.1-a") + list(APPEND _march_flag_list "armv8-a") + list(APPEND _march_flag_list "armv7-a") + list(APPEND _available_extension_list "fp16" "dotprod") + + elseif(TARGET_ARCHITECTURE STREQUAL "cortex-a5") + list(APPEND _mtune_flag_list "cortex-a5") + list(APPEND _march_flag_list "armv7-a") + list(APPEND _available_extension_list "mp" "sec" "neon-fp16") + elseif(TARGET_ARCHITECTURE STREQUAL "cortex-a7") + list(APPEND _mtune_flag_list "cortex-a7") + list(APPEND _march_flag_list "armv7ve") + list(APPEND _available_extension_list "simd") + elseif(TARGET_ARCHITECTURE STREQUAL "cortex-a8") + list(APPEND _mtune_flag_list "cortex-a8") + list(APPEND _march_flag_list "armv7-a") + list(APPEND _available_extension_list "sec" "simd") + elseif(TARGET_ARCHITECTURE STREQUAL "cortex-a9") + list(APPEND _mtune_flag_list "cortex-a9") + list(APPEND _march_flag_list "armv7-a") + list(APPEND _available_extension_list "mp" "sec" "neon-fp16") + elseif(TARGET_ARCHITECTURE STREQUAL "cortex-a12") + list(APPEND _mtune_flag_list "cortex-a12") + list(APPEND _march_flag_list "armv7ve") + list(APPEND _available_extension_list "simd") + elseif(TARGET_ARCHITECTURE STREQUAL "cortex-a15") + list(APPEND _mtune_flag_list "cortex-a15") + list(APPEND _march_flag_list "armv7ve") + list(APPEND _available_extension_list "simd") + elseif(TARGET_ARCHITECTURE STREQUAL "cortex-a15.cortex-a7") + list(APPEND _mtune_flag_list "cortex-a15.cortex-a7") + list(APPEND _march_flag_list "armv7ve") + list(APPEND _available_extension_list "simd") + elseif(TARGET_ARCHITECTURE STREQUAL "cortex-a17") + list(APPEND _mtune_flag_list "cortex-a17") + list(APPEND _march_flag_list "armv7ve") + list(APPEND _available_extension_list "simd") + elseif(TARGET_ARCHITECTURE STREQUAL "cortex-a17.cortex-a7") + list(APPEND _mtune_flag_list "cortex-a17.cortex-a7") + list(APPEND _march_flag_list "armv7ve") + list(APPEND _available_extension_list "simd") + elseif(TARGET_ARCHITECTURE STREQUAL "cortex-a32") + list(APPEND _mtune_flag_list "cortex-a32") + list(APPEND _march_flag_list "armv8-a") + list(APPEND _march_flag_list "armv7-a") + list(APPEND _available_extension_list "crc" "simd") + elseif(TARGET_ARCHITECTURE STREQUAL "cortex-a34") + list(APPEND _mtune_flag_list "cortex-a34") + list(APPEND _march_flag_list "armv8-a") + list(APPEND _march_flag_list "armv7-a") + list(APPEND _available_extension_list "crc" "crypto" "simd") + elseif(TARGET_ARCHITECTURE STREQUAL "cortex-a35") + list(APPEND _mtune_flag_list "cortex-a35") + list(APPEND _march_flag_list "armv8-a") + list(APPEND _march_flag_list "armv7-a") + list(APPEND _available_extension_list "crc" "crypto" "simd") + elseif(TARGET_ARCHITECTURE STREQUAL "cortex-a53") + list(APPEND _mtune_flag_list "cortex-a53") + list(APPEND _march_flag_list "armv8-a") + list(APPEND _march_flag_list "armv7-a") + list(APPEND _available_extension_list "crc" "crypto" "simd") + elseif(TARGET_ARCHITECTURE STREQUAL "cortex-a55") + list(APPEND _mtune_flag_list "cortex-a55") + list(APPEND _march_flag_list "armv8.2-a") + list(APPEND _march_flag_list "armv8.1-a") + list(APPEND _march_flag_list "armv8-a") + list(APPEND _march_flag_list "armv7-a") + list(APPEND _available_extension_list "fp16" "dotprod") + elseif(TARGET_ARCHITECTURE STREQUAL "cortex-a57") + list(APPEND _mtune_flag_list "cortex-a57") + list(APPEND _march_flag_list "armv8-a") + list(APPEND _march_flag_list "armv7-a") + list(APPEND _available_extension_list "crc" "simd") + elseif(TARGET_ARCHITECTURE STREQUAL "cortex-a57.cortext-a53") + list(APPEND _mtune_flag_list "cortex-a57.cortext-a53") + list(APPEND _march_flag_list "armv8-a") + list(APPEND _march_flag_list "armv7-a") + list(APPEND _available_extension_list "crc" "simd") + elseif(TARGET_ARCHITECTURE STREQUAL "cortex-a72") + list(APPEND _mtune_flag_list "cortex-a72") + list(APPEND _march_flag_list "armv8-a") + list(APPEND _march_flag_list "armv7-a") + list(APPEND _available_extension_list "crc" "crypto" "simd") + elseif(TARGET_ARCHITECTURE STREQUAL "cortex-a72.cortext-a53") + list(APPEND _mtune_flag_list "cortex-a72.cortext-a53") + list(APPEND _march_flag_list "armv8-a") + list(APPEND _march_flag_list "armv7-a") + list(APPEND _available_extension_list "crc" "simd") + elseif(TARGET_ARCHITECTURE STREQUAL "cortex-a73") + list(APPEND _mtune_flag_list "cortex-a73") + list(APPEND _march_flag_list "armv8-a") + list(APPEND _march_flag_list "armv7-a") + list(APPEND _available_extension_list "crc" "crypto" "simd") + elseif(TARGET_ARCHITECTURE STREQUAL "cortex-a73.cortext-a35") + list(APPEND _mtune_flag_list "cortex-a73.cortext-a35") + list(APPEND _march_flag_list "armv8-a") + list(APPEND _march_flag_list "armv7-a") + list(APPEND _available_extension_list "crc" "simd") + elseif(TARGET_ARCHITECTURE STREQUAL "cortex-a73.cortext-a53") + list(APPEND _mtune_flag_list "cortex-a73.cortext-a53") + list(APPEND _march_flag_list "armv8-a") + list(APPEND _march_flag_list "armv7-a") + list(APPEND _available_extension_list "crc" "simd") + elseif(TARGET_ARCHITECTURE STREQUAL "cortex-a75") + list(APPEND _mtune_flag_list "cortex-a75") + list(APPEND _march_flag_list "armv8.2-a") + list(APPEND _march_flag_list "armv8.1-a") + list(APPEND _march_flag_list "armv8-a") + list(APPEND _march_flag_list "armv7-a") + list(APPEND _available_extension_list "fp16" "dotprod") + elseif(TARGET_ARCHITECTURE STREQUAL "cortex-a75.cortext-a55") + list(APPEND _mtune_flag_list "cortex-a75.cortext-a55") + list(APPEND _march_flag_list "armv8.2-a") + list(APPEND _march_flag_list "armv8.1-a") + list(APPEND _march_flag_list "armv8-a") + list(APPEND _available_extension_list "fp16" "dotprod") + elseif(TARGET_ARCHITECTURE STREQUAL "cortex-a76") + list(APPEND _mtune_flag_list "cortex-a76") + list(APPEND _march_flag_list "armv8.2-a") + list(APPEND _march_flag_list "armv8.1-a") + list(APPEND _march_flag_list "armv8-a") + list(APPEND _march_flag_list "armv7-a") + list(APPEND _available_extension_list "fp16" "dotprod") + elseif(TARGET_ARCHITECTURE STREQUAL "cortex-a76.cortext-a55") + list(APPEND _mtune_flag_list "cortex-a76.cortext-a55") + list(APPEND _march_flag_list "armv8.2-a") + list(APPEND _march_flag_list "armv8.1-a") + list(APPEND _march_flag_list "armv8-a") + list(APPEND _march_flag_list "armv7-a") + list(APPEND _available_extension_list "fp16" "dotprod") + elseif(TARGET_ARCHITECTURE STREQUAL "cortex-a76ae") + list(APPEND _mtune_flag_list "cortex-a76ae") + list(APPEND _march_flag_list "armv8.2-a") + list(APPEND _march_flag_list "armv8.1-a") + list(APPEND _march_flag_list "armv8-a") + list(APPEND _march_flag_list "armv7-a") + list(APPEND _available_extension_list "fp16" "dotprod") + elseif(TARGET_ARCHITECTURE STREQUAL "cortex-a77") + list(APPEND _mtune_flag_list "cortex-a77") + list(APPEND _march_flag_list "armv8.2-a") + list(APPEND _march_flag_list "armv8.1-a") + list(APPEND _march_flag_list "armv8-a") + list(APPEND _march_flag_list "armv7-a") + list(APPEND _available_extension_list "fp16" "dotprod") + elseif(TARGET_ARCHITECTURE STREQUAL "cortex-a78") + list(APPEND _mtune_flag_list "cortex-a78") + list(APPEND _march_flag_list "armv8.2-a") + list(APPEND _march_flag_list "armv8.1-a") + list(APPEND _march_flag_list "armv8-a") + list(APPEND _march_flag_list "armv7-a") + list(APPEND _available_extension_list "fp16" "dotprod") + elseif(TARGET_ARCHITECTURE STREQUAL "cortex-a78ae") + list(APPEND _mtune_flag_list "cortex-a78ae") + list(APPEND _march_flag_list "armv8.2-a") + list(APPEND _march_flag_list "armv8.1-a") + list(APPEND _march_flag_list "armv8-a") + list(APPEND _march_flag_list "armv7-a") + list(APPEND _available_extension_list "fp16" "dotprod") + elseif(TARGET_ARCHITECTURE STREQUAL "cortex-a78c") + list(APPEND _mtune_flag_list "cortex-a78c") + list(APPEND _march_flag_list "armv8.2-a") + list(APPEND _march_flag_list "armv8.1-a") + list(APPEND _march_flag_list "armv8-a") + list(APPEND _march_flag_list "armv7-a") + list(APPEND _available_extension_list "fp16" "dotprod") + elseif(TARGET_ARCHITECTURE STREQUAL "cortex-a710") + list(APPEND _mtune_flag_list "cortex-a510") + list(APPEND _march_flag_list "armv9-a") + list(APPEND _march_flag_list "armv8.6-a") + list(APPEND _march_flag_list "armv8.5-a") + list(APPEND _march_flag_list "armv8.4-a") + list(APPEND _march_flag_list "armv8.3-a") + list(APPEND _march_flag_list "armv8.2-a") + list(APPEND _march_flag_list "armv8.1-a") + list(APPEND _march_flag_list "armv8-a") + list(APPEND _march_flag_list "armv7-a") + list(APPEND _available_extension_list "bf16" "fp16" "i8mm") + elseif(TARGET_ARCHITECTURE STREQUAL "cortex-a710") + list(APPEND _mtune_flag_list "cortex-a710") + list(APPEND _march_flag_list "armv9-a") + list(APPEND _march_flag_list "armv8.6-a") + list(APPEND _march_flag_list "armv8.5-a") + list(APPEND _march_flag_list "armv8.4-a") + list(APPEND _march_flag_list "armv8.3-a") + list(APPEND _march_flag_list "armv8.2-a") + list(APPEND _march_flag_list "armv8.1-a") + list(APPEND _march_flag_list "armv8-a") + list(APPEND _march_flag_list "armv7-a") + list(APPEND _available_extension_list "bf16" "fp16" "i8mm") + + elseif(TARGET_ARCHITECTURE STREQUAL "cortex-m0") + list(APPEND _mtune_flag_list "cortex-m0") + list(APPEND _march_flag_list "armv6s-m") + elseif(TARGET_ARCHITECTURE STREQUAL "cortex-m0plus") + list(APPEND _mtune_flag_list "cortex-m0plus") + list(APPEND _march_flag_list "armv6s-m") + elseif(TARGET_ARCHITECTURE STREQUAL "cortex-m1") + list(APPEND _mtune_flag_list "cortex-m1") + list(APPEND _march_flag_list "armv6s-m") + elseif(TARGET_ARCHITECTURE STREQUAL "cortex-m3") + list(APPEND _mtune_flag_list "cortex-m3") + list(APPEND _march_flag_list "armv7-m") + elseif(TARGET_ARCHITECTURE STREQUAL "cortex-m4") + list(APPEND _mtune_flag_list "cortex-m4") + list(APPEND _march_flag_list "armv7e-m") + list(APPEND _available_extension_list "fp") + elseif(TARGET_ARCHITECTURE STREQUAL "cortex-m7") + list(APPEND _mtune_flag_list "cortex-m7") + list(APPEND _march_flag_list "armv7e-m") + list(APPEND _available_extension_list "fp.dp") + elseif(TARGET_ARCHITECTURE STREQUAL "cortex-m23") + list(APPEND _mtune_flag_list "cortex-m23") + list(APPEND _march_flag_list "armv8-m.base") + list(APPEND _march_flag_list "armv7-m") + elseif(TARGET_ARCHITECTURE STREQUAL "cortex-m33") + list(APPEND _mtune_flag_list "cortex-m33") + list(APPEND _march_flag_list "armv8-m.main") + list(APPEND _march_flag_list "armv7-m") + list(APPEND _available_extension_list "dsp" "fp") + elseif(TARGET_ARCHITECTURE STREQUAL "cortex-m35p") + list(APPEND _mtune_flag_list "cortex-m35p") + list(APPEND _march_flag_list "armv8-m.main") + list(APPEND _march_flag_list "armv7-m") + list(APPEND _available_extension_list "dsp" "fp") + elseif(TARGET_ARCHITECTURE STREQUAL "cortex-m55") + list(APPEND _mtune_flag_list "cortex-m55") + list(APPEND _march_flag_list "armv8.1-m.main") + list(APPEND _march_flag_list "armv8-m") + list(APPEND _march_flag_list "armv7-m") + list(APPEND _available_extension_list "mve.fp" "fp.dp") + + elseif(TARGET_ARCHITECTURE STREQUAL "cortex-r4") + list(APPEND _mtune_flag_list "cortex-r4") + list(APPEND _march_flag_list "armv7-r") + elseif(TARGET_ARCHITECTURE STREQUAL "cortex-r4f") + list(APPEND _mtune_flag_list "cortex-r4f") + list(APPEND _march_flag_list "armv7-r") + list(APPEND _available_extension_list "fp") + elseif(TARGET_ARCHITECTURE STREQUAL "cortex-r5") + list(APPEND _mtune_flag_list "cortex-r5") + list(APPEND _march_flag_list "armv7-r") + list(APPEND _available_extension_list "idiv" "fp") + elseif(TARGET_ARCHITECTURE STREQUAL "cortex-r7") + list(APPEND _mtune_flag_list "cortex-r7") + list(APPEND _march_flag_list "armv7-r") + list(APPEND _available_extension_list "idiv" "vfpv3-d16-fp16") + elseif(TARGET_ARCHITECTURE STREQUAL "cortex-r8") + list(APPEND _mtune_flag_list "cortex-r8") + list(APPEND _march_flag_list "armv7-r") + elseif(TARGET_ARCHITECTURE STREQUAL "cortex-r52") + list(APPEND _mtune_flag_list "cortex-r52") + list(APPEND _march_flag_list "armv8-r") + list(APPEND _march_flag_list "armv7-r") + list(APPEND _available_extension_list "crc" "simd" "idiv" "vfpv3-d16-fp16") + + elseif(TARGET_ARCHITECTURE STREQUAL "cortex-x1") + list(APPEND _mtune_flag_list "cortex-x1") + list(APPEND _march_flag_list "armv8.2-a") + list(APPEND _march_flag_list "armv8.1-a") + list(APPEND _march_flag_list "armv8-a") + list(APPEND _march_flag_list "armv7-a") + list(APPEND _available_extension_list "fp16" "dotprod") + elseif(TARGET_ARCHITECTURE STREQUAL "cortex-x2") + list(APPEND _march_flag_list "armv9-a") + list(APPEND _march_flag_list "armv8.6-a") + list(APPEND _march_flag_list "armv8.5-a") + list(APPEND _march_flag_list "armv8.4-a") + list(APPEND _march_flag_list "armv8.3-a") + list(APPEND _march_flag_list "armv8.2-a") + list(APPEND _march_flag_list "armv8.1-a") + list(APPEND _march_flag_list "armv8-a") + list(APPEND _march_flag_list "armv7-a") + list(APPEND _available_extension_list "bf16" "fp16" "i8mm") + + elseif(TARGET_ARCHITECTURE STREQUAL "neoverse-e1") + list(APPEND _mtune_flag_list "neoverse-e1") + list(APPEND _march_flag_list "armv8.2-a") + list(APPEND _march_flag_list "armv8.1-a") + list(APPEND _march_flag_list "armv8-a") + list(APPEND _march_flag_list "armv7-a") + list(APPEND _available_extension_list "fp16" "dotprod") + elseif(TARGET_ARCHITECTURE STREQUAL "neoverse-n1") + list(APPEND _mtune_flag_list "neoverse-n1") + list(APPEND _march_flag_list "armv8.2-a") + list(APPEND _march_flag_list "armv8.1-a") + list(APPEND _march_flag_list "armv8-a") + list(APPEND _march_flag_list "armv7-a") + list(APPEND _available_extension_list "fp16" "dotprod") + elseif(TARGET_ARCHITECTURE STREQUAL "neoverse-n2") + list(APPEND _mtune_flag_list "neoverse-n2") + list(APPEND _march_flag_list "armv8.5-a") + list(APPEND _march_flag_list "armv8.4-a") + list(APPEND _march_flag_list "armv8.3-a") + list(APPEND _march_flag_list "armv8.2-a") + list(APPEND _march_flag_list "armv8.1-a") + list(APPEND _march_flag_list "armv8-a") + list(APPEND _march_flag_list "armv7-a") + list(APPEND _available_extension_list "bf16" "fp16" "i8mm") + elseif(TARGET_ARCHITECTURE STREQUAL "neoverse-v1") + list(APPEND _mtune_flag_list "neoverse-v1") + list(APPEND _march_flag_list "armv8.4-a") + list(APPEND _march_flag_list "armv8.3-a") + list(APPEND _march_flag_list "armv8.2-a") + list(APPEND _march_flag_list "armv8.1-a") + list(APPEND _march_flag_list "armv8-a") + list(APPEND _march_flag_list "armv7-a") + list(APPEND _available_extension_list "bf16" "fp16" "i8mm") + + # Broadcom + elseif(TARGET_ARCHITECTURE STREQUAL "brahma-b15") + list(APPEND _mtune_flag_list "brahma-b15") + elseif(TARGET_ARCHITECTURE STREQUAL "brahma-b53") + list(APPEND _mtune_flag_list "brahma-b53") + elseif(TARGET_ARCHITECTURE STREQUAL "thunderx2") + list(APPEND _mtune_flag_list "thunderx2") + list(APPEND _march_flag_list "armv8.1-a") + list(APPEND _march_flag_list "armv8-a") + list(APPEND _march_flag_list "armv7-a") + list(APPEND _available_extension_list "crypto") + + # Cavium + elseif(TARGET_ARCHITECTURE STREQUAL "thunderx") + list(APPEND _mtune_flag_list "thunderx") + list(APPEND _march_flag_list "armv8-a") + list(APPEND _march_flag_list "armv7-a") + list(APPEND _available_extension_list "crc" "crypto") + elseif(TARGET_ARCHITECTURE STREQUAL "thunderxt88") + list(APPEND _mtune_flag_list "thunderxt88") + list(APPEND _march_flag_list "armv8-a") + list(APPEND _march_flag_list "armv7-a") + list(APPEND _available_extension_list "crc" "crypto") + elseif(TARGET_ARCHITECTURE STREQUAL "thunderxt81") + list(APPEND _mtune_flag_list "thunderxt81") + list(APPEND _march_flag_list "armv8-a") + list(APPEND _march_flag_list "armv7-a") + list(APPEND _available_extension_list "crc" "crypto") + elseif(TARGET_ARCHITECTURE STREQUAL "thunderxt83") + list(APPEND _mtune_flag_list "thunderxt83") + list(APPEND _march_flag_list "armv8-a") + list(APPEND _march_flag_list "armv7-a") + list(APPEND _available_extension_list "crc" "crypto") + elseif(TARGET_ARCHITECTURE STREQUAL "thunderx2t99") + list(APPEND _mtune_flag_list "thunderx2t99") + list(APPEND _march_flag_list "armv8.1-a") + list(APPEND _march_flag_list "armv8-a") + list(APPEND _march_flag_list "armv7-a") + list(APPEND _available_extension_list "fp" "asimd" "evtstrm" "aes" "pmull" "sha1" "sha2" "crc32" "atomics" "cpuid" "asimdrdm") + + # DEC + elseif(TARGET_ARCHITECTURE STREQUAL "strongarm110") + list(APPEND _mtune_flag_list "strongarm110") + list(APPEND _march_flag_list "armv4") + elseif(TARGET_ARCHITECTURE STREQUAL "strongarm1100") + list(APPEND _mtune_flag_list "strongarm1100") + list(APPEND _march_flag_list "armv4") + + # FUJITSU + elseif(TARGET_ARCHITECTURE STREQUAL "a64fx") + list(APPEND _mtune_flag_list "a64fx") + list(APPEND _march_flag_list "armv8.2-a") + list(APPEND _march_flag_list "armv8.1-a") + list(APPEND _march_flag_list "armv8-a") + list(APPEND _march_flag_list "armv7-a") + list(APPEND _available_extension_list "fp" "asimd" "evtstrm" "sha1" "sha2" "crc32" "atomics" "fphp" "asimdhp" "cpuid" "asimdrdm" "fcma" "dcpop" "sve") + + # HiSilicon + elseif(TARGET_ARCHITECTURE STREQUAL "tsv110") + list(APPEND _mtune_flag_list "tsv110") + list(APPEND _march_flag_list "armv8.2-a") + list(APPEND _march_flag_list "armv8.1-a") + list(APPEND _march_flag_list "armv8-a") + list(APPEND _march_flag_list "armv7-a") + list(APPEND _available_extension_list "aes" "crypto" "fp16" "sha2") + + # Nvidia + elseif(TARGET_ARCHITECTURE STREQUAL "denver") + list(APPEND _mtune_flag_list "denver") + list(APPEND _march_flag_list "armv8-a") + list(APPEND _march_flag_list "armv7-a") + list(APPEND _available_extension_list "crc" "crypto" "simd" "vfpv3" "vfpv4") + elseif(TARGET_ARCHITECTURE STREQUAL "denver2") + list(APPEND _mtune_flag_list "denver2") + list(APPEND _march_flag_list "armv8-a") + list(APPEND _march_flag_list "armv7-a") + list(APPEND _available_extension_list "crc" "crypto" "simd" "vfpv3" "vfpv4") + elseif(TARGET_ARCHITECTURE STREQUAL "carmel") + list(APPEND _mtune_flag_list "denver") + list(APPEND _march_flag_list "armv8.2-a") + list(APPEND _march_flag_list "armv8.1-a") + list(APPEND _march_flag_list "armv8-a") + list(APPEND _march_flag_list "armv7-a") + list(APPEND _available_extension_list "crc" "crypto" "simd" "vfpv3" "vfpv4") + + # APM + elseif(TARGET_ARCHITECTURE STREQUAL "xgene1") + list(APPEND _mtune_flag_list "xgene1") + list(APPEND _march_flag_list "armv8-a") + list(APPEND _march_flag_list "armv7-a") + + # Qualcomm + elseif(TARGET_ARCHITECTURE STREQUAL "scorpion") + list(APPEND _mtune_flag_list "scorpion") + list(APPEND _march_flag_list "armv7-a") + elseif(TARGET_ARCHITECTURE STREQUAL "krait") + list(APPEND _mtune_flag_list "krait") + list(APPEND _march_flag_list "armv7-a") + elseif(TARGET_ARCHITECTURE STREQUAL "kryo") + list(APPEND _mtune_flag_list "kryo") + list(APPEND _march_flag_list "armv8-a") + list(APPEND _march_flag_list "armv7-a") + elseif(TARGET_ARCHITECTURE STREQUAL "kryo2") + list(APPEND _mtune_flag_list "kryo2") + list(APPEND _march_flag_list "armv8.2-a") + list(APPEND _march_flag_list "armv8.1-a") + list(APPEND _march_flag_list "armv8-a") + list(APPEND _march_flag_list "armv7-a") + elseif(TARGET_ARCHITECTURE STREQUAL "falkor") + list(APPEND _mtune_flag_list "falkor") + list(APPEND _march_flag_list "armv8.1-a") + list(APPEND _march_flag_list "armv8-a") + list(APPEND _march_flag_list "armv7-a") + elseif(TARGET_ARCHITECTURE STREQUAL "saphira") + list(APPEND _mtune_flag_list "saphira") + list(APPEND _march_flag_list "armv8.4-a") + list(APPEND _march_flag_list "armv8.3-a") + list(APPEND _march_flag_list "armv8.2-a") + list(APPEND _march_flag_list "armv8.1-a") + list(APPEND _march_flag_list "armv8-a") + list(APPEND _march_flag_list "armv7-a") + list(APPEND _available_extension_list "crc" "crypto" "simd" "vfpv3" "vfpv4") + + # Samsung + elseif(TARGET_ARCHITECTURE STREQUAL "exynos-m1") + list(APPEND _mtune_flag_list "exynos-m1") + list(APPEND _march_flag_list "armv8-a") + list(APPEND _march_flag_list "armv7-a") + list(APPEND _available_extension_list "crypto" "simd") + + # Marvell + elseif(TARGET_ARCHITECTURE STREQUAL "marvell-f") + list(APPEND _mtune_flag_list "marvell-f") + list(APPEND _march_flag_list "armv5te") + elseif(TARGET_ARCHITECTURE STREQUAL "marvell-pj4") + list(APPEND _mtune_flag_list "marvell-pj4") + list(APPEND _march_flag_list "armv7-a") + list(APPEND _available_extension_list "mp" "sec" "fp") + + # Intel + elseif(TARGET_ARCHITECTURE STREQUAL "i80200") + list(APPEND _mtune_flag_list "i80200") + elseif(TARGET_ARCHITECTURE STREQUAL "pxa250a") + list(APPEND _mtune_flag_list "pxa250a") + elseif(TARGET_ARCHITECTURE STREQUAL "pxa210a") + list(APPEND _mtune_flag_list "pxa210a") + elseif(TARGET_ARCHITECTURE STREQUAL "i80321-400") + list(APPEND _mtune_flag_list "i80321-400") + elseif(TARGET_ARCHITECTURE STREQUAL "i80321-600") + list(APPEND _mtune_flag_list "i80321-600") + elseif(TARGET_ARCHITECTURE STREQUAL "pxa250b") + list(APPEND _mtune_flag_list "pxa250b") + elseif(TARGET_ARCHITECTURE STREQUAL "pxa210b") + list(APPEND _mtune_flag_list "pxa210b") + elseif(TARGET_ARCHITECTURE STREQUAL "i80321-400-b0") + list(APPEND _mtune_flag_list "i80321-400-b0") + elseif(TARGET_ARCHITECTURE STREQUAL "i80321-600-b0") + list(APPEND _mtune_flag_list "i80321-600-b0") + elseif(TARGET_ARCHITECTURE STREQUAL "pxa250c") + list(APPEND _mtune_flag_list "pxa250c") + elseif(TARGET_ARCHITECTURE STREQUAL "pxa210c") + list(APPEND _mtune_flag_list "pxa210c") + elseif(TARGET_ARCHITECTURE STREQUAL "pxa27x") + list(APPEND _mtune_flag_list "pxa27x") + elseif(TARGET_ARCHITECTURE STREQUAL "ipx425-533") + list(APPEND _mtune_flag_list "ipx425-533") + elseif(TARGET_ARCHITECTURE STREQUAL "ipx425-400") + list(APPEND _mtune_flag_list "ipx425-400") + elseif(TARGET_ARCHITECTURE STREQUAL "ipx425-266") + list(APPEND _mtune_flag_list "ipx425-266") + elseif(TARGET_ARCHITECTURE STREQUAL "pxa32x") + list(APPEND _mtune_flag_list "pxa32x") + elseif(TARGET_ARCHITECTURE STREQUAL "pxa930") + list(APPEND _mtune_flag_list "pxa930") + elseif(TARGET_ARCHITECTURE STREQUAL "pxa30x") + list(APPEND _mtune_flag_list "pxa30x") + elseif(TARGET_ARCHITECTURE STREQUAL "pxa31x") + list(APPEND _mtune_flag_list "pxa31x") + elseif(TARGET_ARCHITECTURE STREQUAL "sa1110") + list(APPEND _mtune_flag_list "sa1110") + elseif(TARGET_ARCHITECTURE STREQUAL "ipx1200") + list(APPEND _mtune_flag_list "ipx1200") + + # Apple + elseif(TARGET_ARCHITECTURE STREQUAL "apple-a6") + list(APPEND _mtune_flag_list "apple-a6") + list(APPEND _march_flag_list "armv7-a") + elseif(TARGET_ARCHITECTURE STREQUAL "apple-a7") + list(APPEND _mtune_flag_list "apple-a7") + list(APPEND _march_flag_list "armv8-a") + list(APPEND _available_extension_list "aes" "crypto" "fp" "simd" "sha2" "zcm" "zcz") + elseif(TARGET_ARCHITECTURE STREQUAL "apple-a8") + list(APPEND _mtune_flag_list "apple-a8") + list(APPEND _march_flag_list "armv8-a") + list(APPEND _available_extension_list "aes" "crypto" "fp" "simd" "sha2" "zcm" "zcz") + elseif(TARGET_ARCHITECTURE STREQUAL "apple-a9") + list(APPEND _mtune_flag_list "apple-a9") + list(APPEND _march_flag_list "armv8-a") + list(APPEND _available_extension_list "aes" "crypto" "fp" "simd" "sha2" "zcm" "zcz") + elseif(TARGET_ARCHITECTURE STREQUAL "apple-a10") + list(APPEND _mtune_flag_list "apple-a10") + list(APPEND _march_flag_list "armv8.1-a") + list(APPEND _march_flag_list "armv8-a") + list(APPEND _available_extension_list "aes" "crc" "crypto" "fp" "simd" "rdm" "sha2" "zcm" "zcz") + elseif(TARGET_ARCHITECTURE STREQUAL "apple-a11") + list(APPEND _mtune_flag_list "apple-a11") + list(APPEND _march_flag_list "armv8.2-a") + list(APPEND _march_flag_list "armv8.1-a") + list(APPEND _march_flag_list "armv8-a") + list(APPEND _available_extension_list "aes" "crc" "crypto" "fp" "lse" "simd" "ras" "rdm" "sha2" "zcm" "zcz") + elseif(TARGET_ARCHITECTURE STREQUAL "apple-a12") + list(APPEND _mtune_flag_list "apple-a12") + list(APPEND _march_flag_list "armv8.3-a") + list(APPEND _march_flag_list "armv8.2-a") + list(APPEND _march_flag_list "armv8.1-a") + list(APPEND _march_flag_list "armv8-a") + list(APPEND _available_extension_list "aes" "crc" "crypto" "fp" "fp16" "lse" "simd" "ras" "rcpc" "rdm" "sha2" "zcm" "zcz") + elseif(TARGET_ARCHITECTURE STREQUAL "apple-a13") + list(APPEND _mtune_flag_list "apple-a13") + list(APPEND _march_flag_list "armv8.4-a") + list(APPEND _march_flag_list "armv8.3-a") + list(APPEND _march_flag_list "armv8.2-a") + list(APPEND _march_flag_list "armv8.1-a") + list(APPEND _march_flag_list "armv8-a") + list(APPEND _available_extension_list "aes" "crc" "crypto" "dotprod" "fp" "fp16" "fp16fml" "lse" "simd" "ras" "rcpc" "rdm" "sha2" "sha3" "sm4" "zcm" "zcz") + elseif(TARGET_ARCHITECTURE STREQUAL "apple-a14") + list(APPEND _mtune_flag_list "apple-a14") + list(APPEND _march_flag_list "armv8.5-a") + list(APPEND _march_flag_list "armv8.4-a") + list(APPEND _march_flag_list "armv8.3-a") + list(APPEND _march_flag_list "armv8.2-a") + list(APPEND _march_flag_list "armv8.1-a") + list(APPEND _march_flag_list "armv8-a") + list(APPEND _available_extension_list "aes" "crc" "crypto" "fp" "fp16" "lse" "simd" "ras" "rcpc" "rdm" "sha2" "zcm" "zcz") + elseif(TARGET_ARCHITECTURE STREQUAL "apple-a15") + list(APPEND _mtune_flag_list "apple-a15") + list(APPEND _march_flag_list "armv8.5-a") + list(APPEND _march_flag_list "armv8.4-a") + list(APPEND _march_flag_list "armv8.3-a") + list(APPEND _march_flag_list "armv8.2-a") + list(APPEND _march_flag_list "armv8.1-a") + list(APPEND _march_flag_list "armv8-a") + list(APPEND _available_extension_list "aes" "crc" "crypto" "fp" "fp16" "lse" "simd" "ras" "rcpc" "rdm" "sha2" "zcm" "zcz") + elseif(TARGET_ARCHITECTURE STREQUAL "apple-a16") + list(APPEND _mtune_flag_list "apple-a16") + list(APPEND _march_flag_list "armv8.5-a") + list(APPEND _march_flag_list "armv8.4-a") + list(APPEND _march_flag_list "armv8.3-a") + list(APPEND _march_flag_list "armv8.2-a") + list(APPEND _march_flag_list "armv8.1-a") + list(APPEND _march_flag_list "armv8-a") + list(APPEND _available_extension_list "aes" "crc" "crypto" "fp" "fp16" "lse" "simd" "ras" "rcpc" "rdm" "sha2" "zcm" "zcz") + elseif(TARGET_ARCHITECTURE STREQUAL "apple-a17") + list(APPEND _mtune_flag_list "apple-a17") + list(APPEND _march_flag_list "armv8.5-a") + list(APPEND _march_flag_list "armv8.4-a") + list(APPEND _march_flag_list "armv8.3-a") + list(APPEND _march_flag_list "armv8.2-a") + list(APPEND _march_flag_list "armv8.1-a") + list(APPEND _march_flag_list "armv8-a") + list(APPEND _available_extension_list "aes" "crc" "crypto" "fp" "fp16" "lse" "simd" "ras" "rcpc" "rdm" "sha2" "zcm" "zcz") + elseif(TARGET_ARCHITECTURE STREQUAL "apple-m1") + list(APPEND _mtune_flag_list "apple-m1" "vortext") + list(APPEND _march_flag_list "armv8.5-a") + list(APPEND _march_flag_list "armv8.4-a") + list(APPEND _march_flag_list "armv8.3-a") + list(APPEND _march_flag_list "armv8.2-a") + list(APPEND _march_flag_list "armv8.1-a") + list(APPEND _march_flag_list "armv8-a") + list(APPEND _available_extension_list "aes" "altnzcv" "ccdp" "complxnum" "crc" "crypto" "dotprod" "fp" "fp-armv8" "fp16" "fp16fml" "fptoint" "fullfp16" "jsconv" "lse" "neon" "pauth" "perfmon" "predres" "ras" "rcpc" "rdm" "sb" "sha2" "sha3" "simd" "specrestrict" "ssbs" "zcm" "zcz") + elseif(TARGET_ARCHITECTURE STREQUAL "apple-m2") + list(APPEND _mtune_flag_list "apple-m2" "apple-m1" "vortex") + list(APPEND _march_flag_list "armv8.6-a") + list(APPEND _march_flag_list "armv8.5-a") + list(APPEND _march_flag_list "armv8.4-a") + list(APPEND _march_flag_list "armv8.3-a") + list(APPEND _march_flag_list "armv8.2-a") + list(APPEND _march_flag_list "armv8.1-a") + list(APPEND _march_flag_list "armv8-a") + list(APPEND _available_extension_list "aes" "altnzcv" "bf16" "ccdp" "complxnum" "crc" "crypto" "dotprod" "fp" "fp-armv8" "fp16" "fp16fml" "fptoint" "fullfp16" "i8mm" "jsconv" "lse" "neon" "pauth" "perfmon" "predres" "ras" "rcpc" "rdm" "sb" "sha2" "sha3" "simd" "specrestrict" "ssbs" "zcm" "zcz") + elseif(TARGET_ARCHITECTURE STREQUAL "apple-m3") + list(APPEND _mtune_flag_list "apple-m3" "apple-m2" "apple-m1" "vortex") + list(APPEND _march_flag_list "armv8.6-a") + list(APPEND _march_flag_list "armv8.5-a") + list(APPEND _march_flag_list "armv8.4-a") + list(APPEND _march_flag_list "armv8.3-a") + list(APPEND _march_flag_list "armv8.2-a") + list(APPEND _march_flag_list "armv8.1-a") + list(APPEND _march_flag_list "armv8-a") + list(APPEND _available_extension_list "aes" "altnzcv" "bf16" "ccdp" "complxnum" "crc" "crypto" "dotprod" "fp" "fp-armv8" "fp16" "fp16fml" "fpac" "fptoint" "fullfp16" "i8mm" "jsconv" "lse" "neon" "pauth" "perfmon" "predres" "ras" "rcpc" "rdm" "sb" "sha2" "sha3" "simd" "specrestrict" "ssbs" "zcm" "zcz") + elseif(TARGET_ARCHITECTURE STREQUAL "apple-m4") + list(APPEND _mtune_flag_list "apple-m4" "apple-m3" "apple-m2" "apple-m1" "vortex") + list(APPEND _march_flag_list "armv8.7-a") + list(APPEND _march_flag_list "armv8.6-a") + list(APPEND _march_flag_list "armv8.5-a") + list(APPEND _march_flag_list "armv8.4-a") + list(APPEND _march_flag_list "armv8.3-a") + list(APPEND _march_flag_list "armv8.2-a") + list(APPEND _march_flag_list "armv8.1-a") + list(APPEND _march_flag_list "armv8-a") + list(APPEND _available_extension_list "aes" "altnzcv" "bf16" "ccdp" "complxnum" "crc" "crypto" "dotprod" "fp" "fp-armv8" "fp16" "fp16fml" "fpac" "fptoint" "fullfp16" "i8mm" "jsconv" "lse" "neon" "pauth" "perfmon" "predres" "ras" "rcpc" "rdm" "sb" "sha2" "sha3" "simd" "sme" "sme-f64f64" "sme-i16i64" "sme2" "specrestrict" "ssbs" "wfxt" "zcm" "zcz") + + # Others + elseif(TARGET_ARCHITECTURE STREQUAL "generic") + list(APPEND _march_flag_list "generic") + elseif(TARGET_ARCHITECTURE STREQUAL "none") + # add this clause to remove it from the else clause + + else() + message(FATAL_ERROR "[OptimizeForArchitecture] Unknown target architecture: \"${TARGET_ARCHITECTURE}\". Please set TARGET_ARCHITECTURE to a supported value.") + endif() + + # Clean list of available extensions + list(SORT _available_extension_list) + list(REMOVE_DUPLICATES _available_extension_list) + + if(OFA_VERBOSE) + if(_march_flag_list) + string(REPLACE ";" ", " _str "${_march_flag_list}") + string(TOUPPER ${_str} _str) + message(STATUS "[OptimizeForArchitecture] CPU architectures (-march): " ${_str}) + endif() + if(_mtune_flag_list) + string(REPLACE ";" ", " _str "${_mtune_flag_list}") + string(TOUPPER ${_str} _str) + message(STATUS "[OptimizeForArchitecture] CPU microarchitectures (-mtune): " ${_str}) + endif() + if(_available_extension_list) + list(LENGTH _available_extension_list _len) + string(REPLACE ";" ", " _str "${_available_extension_list}") + string(TOUPPER ${_str} _str) + message(STATUS "[OptimizeForArchitecture] Extensions (${_len} available): ${_str}") + endif() + endif() + + # Following the recommendation from + # https://community.arm.com/developer/tools-software/ + # tools/b/tools-software-ides-blog/posts/ + # compiler-flags-across-architectures-march-mtune-and-mcpu we + # first try to use the -mcpu flag and set it a value from the + # list of -mtune flags. If that fails, e.g., if the compiler + # does not yet support the specified target, we try to set the + # -march and -mtune flags as fallback option. + + # Set compiler-specific option names + set(_mcpu_flag "-mcpu=") + set(_march_flag "-march=") + set(_mtune_flag "-mtune") + + set(CAN_USE_MCPU FALSE) + foreach(_flag ${_mtune_flag_list}) + AddCXXCompilerFlag("${_mcpu_flag}${_flag}" FLAGS ARCHITECTURE_CXX_FLAGS RESULT _ok) + if(_ok) + set(CAN_USE_MCPU TRUE) + break() + endif() + endforeach() + + # Fallback: set -march and -mtune flags + set(_check_extension_list) + set(_check_extension_flag_list) + set(_disable_extension_flag_list) + set(_enable_extension_flag_list) + set(_ignore_extension_flag_list) + + foreach(_flag ${_march_flag_list}) + AddCXXCompilerFlag("${_march_flag}${_flag}" RESULT _ok) + if(_ok) + set(_march ${_flag}) + break() + endif() + endforeach() + + # Step 2: Enable/disable feature flags based on available CPU + # features, used-defined USE_ variables and + # the capabilities of the host system's compiler and linker + file(READ ${ALICEVISION_ROOT}/../src/cmake/OFA/CheckARM.txt _checks) + string(REGEX REPLACE "[:;]" "|" _checks "${_checks}") + string(REPLACE "\n" ";" _checks "${_checks}") + + set(_skip_check FALSE) + + # Iterate over the list of checks line by line + foreach (_check ${_checks}) + string(REPLACE "|" ";" _check "${_check}") + + # Parse for special lines + if ("${_check}" MATCHES "^#" ) # Skip comment + continue() + + elseif ("${_check}" MATCHES "^push_enable" ) # Start enable block + list(GET _check 1 _push_enable_list) + string(REPLACE "," ";" _push_enable_list "${_push_enable_list}") + _ofa_find(_push_enable_list "${CMAKE_CXX_COMPILER_ID}" _found) + if(_found) + list(INSERT _skip_check 0 FALSE) + else() + list(INSERT _skip_check 0 TRUE) + endif() + continue() + + elseif ("${_check}" MATCHES "^pop_enable" ) # End enable block + list(REMOVE_AT _skip_check 0) + continue() + + elseif ("${_check}" MATCHES "^push_disable" ) # Start disable block + list(GET _check 1 _push_disable_list) + string(REPLACE "," ";" _push_disable_list "${_push_disable_list}") + _ofa_find(_push_disable_list "${CMAKE_CXX_COMPILER_ID}" _found) + if(_found) + list(INSERT _skip_check 0 TRUE) + else() + # Compiler was not found in the list, so we keep its previous status + list(GET _skip_check 0 _skip) + list(INSERT _skip_check 0 ${_skip}) + endif() + continue() + + elseif ("${_check}" MATCHES "^pop_disable" ) # End disable block + list(REMOVE_AT _skip_check 0) + continue() + endif() + + # Skip test? + list(GET _skip_check 0 _skip) + if(_skip) + continue() + endif() + + # Extract extra CPU extensions, header files, function name, and parameters + list(GET _check 0 _check_extension_flags) + list(GET _check 1 _check_headers) + list(GET _check 2 _check_function) + list(GET _check 3 _check_params) + + # Convert list of extensions into compiler flags + string(REPLACE "," ";" _check_extension_flags "${_check_extension_flags}") + list(GET _check_extension_flags 0 _extension_flag) + list(APPEND _check_extension_flag_list "${_extension_flag}") + string(REPLACE ";" "+" _check_extra_flags "+${_check_extension_flags}") + + # Extract optional extension alias + list(LENGTH _check _len) + if(${_len} EQUAL 5) + list(GET _check 4 _extension) + else() + set(_extension "${_extension_flag}") + endif() + + list(APPEND _check_extension_list "${_extension}") + + # Define USE_<_extension_flag> variable + set(_useVar "USE_${_extension_flag}") + string(TOUPPER "${_useVar}" _useVar) + string(REPLACE "[-.+/:= ]" "_" _useVar "${_useVar}") + + # If not specified externally, set the value of the + # USE_<_extension_flag> variable to TRUE if it is found in the list + # of available extensions and FALSE otherwise + if(NOT DEFINED ${_useVar}) + _ofa_find(_available_extension_list "${_extension}" _found) + set(${_useVar} ${_found}) + endif() + + if(${_useVar}) + # Check if the compiler supports the -march=<_march>+<_extension_flag> + # flag and can compile the provided test code with it + set(_code "\nint main() { ${_check_function}(${_check_params})\; return 0\; }") + AddCXXCompilerFlag("${_march_flag}${_march}+${_extension_flag}" + EXTRA_FLAGS ${_check_extra_flags} + HEADERS ${_check_headers} + CODE "${_code}" + RESULT _ok) + if(NOT ${_ok}) + # Test failed + set(${_useVar} FALSE CACHE BOOL "Use ${_extension} extension.") + else() + # Test succeeded + set(${_useVar} TRUE CACHE BOOL "Use ${_extension} extension.") + endif() + else() + # Disable extension without running tests + set(${_useVar} FALSE CACHE BOOL "Use ${_extension} extension.") + endif() + mark_as_advanced(${_useVar}) + endforeach() + + # Generate lists of enabled/disabled flags + list(REMOVE_DUPLICATES _check_extension_flag_list) + foreach(_extension_flag ${_check_extension_flag_list}) + _ofa_find(_available_extension_list "${_extension_flag}" _found) + set(_useVar "USE_${_extension_flag}") + string(TOUPPER "${_useVar}" _useVar) + string(REPLACE "[-.+/:= ]" "_" _useVar "${_useVar}") + + if(${_useVar}) + # Add <_extension_flag> to list of enabled extensions (if supported) + set(_haveVar "HAVE_${_march_flag}${_march}+${_extension_flag}") + string(REGEX REPLACE "[-.+/:= ]" "_" _haveVar "${_haveVar}") + if(NOT ${_haveVar}) + if(OFA_VERBOSE) + message(STATUS "[OptimizeForArchitecture] Ignoring flag ${_march_flag}${_march}+${_extension_flag} because checks failed") + endif() + list(APPEND _ignore_extension_flag_list "${_extension_flag}") + continue() + endif() + list(APPEND _enable_extension_flag_list "${_extension_flag}") + else() + # Add <_extension_flag> to list of disabled extensions (if supported) + AddCXXCompilerFlag("${_march_flag}${_march}+no${_extension_flag}") + set(_haveVar "HAVE_${_march_flag}${_march}+no${_extension_flag}") + string(REGEX REPLACE "[-.+/:= ]" "_" _haveVar "${_haveVar}") + if(NOT ${_haveVar}) + if(OFA_VERBOSE) + message(STATUS "[OptimizeForArchitecture] Ignoring flag ${_march_flag}${_march}+no${_extension_flag} because checks failed") + endif() + list(APPEND _ignore_extension_flag_list "${_extension_flag}") + continue() + endif() + list(APPEND _disable_extension_flag_list "${_extension_flag}") + endif() + endforeach() + + if(OFA_VERBOSE) + # Print checked extension flags + if(_check_extension_flag_list) + list(LENGTH _check_extension_flag_list _len) + list(SORT _check_extension_flag_list) + string(REPLACE ";" ", " _str "${_check_extension_flag_list}") + string(TOUPPER ${_str} _str) + message(STATUS "[OptimizeForArchitecture] Extensions (${_len} checked): ${_str}") + endif() + # Print enabled extension flags + if(_enable_extension_flag_list) + list(LENGTH _enable_extension_flag_list _len) + list(SORT _enable_extension_flag_list) + string(REPLACE ";" ", " _str "${_enable_extension_flag_list}") + string(TOUPPER ${_str} _str) + if(CAN_USE_MCPU) + message(STATUS "[OptimizeForArchitecture] Extensions (${_len} available): ${_str}") + else() + message(STATUS "[OptimizeForArchitecture] Extensions (${_len} enabled): ${_str}") + endif() + endif() + # Print disabled extension flags + if(_disable_extension_flag_list) + list(LENGTH _disable_extension_flag_list _len) + list(SORT _disable_extension_flag_list) + string(REPLACE ";" ", " _str "${_disable_extension_flag_list}") + string(TOUPPER ${_str} _str) + message(STATUS "[OptimizeForArchitecture] Extensions (${_len} disabled): ${_str}") + endif() + # Print ignored extension flags + if(_ignore_extension_flag_list) + list(LENGTH _ignore_extension_flag_list _len) + list(SORT _ignore_extension_flag_list) + string(REPLACE ";" ", " _str "${_ignore_extension_flag_list}") + string(TOUPPER ${_str} _str) + message(STATUS "[OptimizeForArchitecture] Extensions (${_len} ignored): ${_str}") + endif() + # Print unhandled extension flags + set(_unhandled_extension_list) + foreach(_extension ${_available_extension_list}) + _ofa_find(_check_extension_list "${_extension}" _found) + if(NOT _found) + list(APPEND _unhandled_extension_list ${_extension}) + endif() + endforeach() + if(_unhandled_extension_list) + list(LENGTH _unhandled_extension_list _len) + list(SORT _unhandled_extension_list) + string(REPLACE ";" ", " _str "${_unhandled_extension_list}") + string(TOUPPER ${_str} _str) + message(STATUS "[OptimizeForArchitecture] Extensions (${_len} unhandled): ${_str}") + endif() + endif() + + # Step 3: Set compiler-specific flags (e.g., -m/-mno-) + if(NOT CAN_USE_MCPU) + + if(MSVC AND MSVC_VERSION GREATER 1900) + _ofa_find(_enable_extension_flag_list "vfpv4" _found) + if(_found) + AddCompilerFlag("/arch:VFPv4" CXX_FLAGS ARCHITECTURE_CXX_FLAGS CXX_RESULT _found) + endif() + if(NOT _found) + _ofa_find(_enable_extension_flag_list "simd" _found) + if(_found) + AddCompilerFlag("/arch:ARMv7VE" CXX_FLAGS ARCHITECTURE_CXX_FLAGS CXX_RESULT _found) + endif() + endif() + foreach(_flag ${_enable_extension_flag_list}) + string(TOUPPER "${_flag}" _flag) + string(REPLACE "[-.+/:= ]" "_" _flag "__${_flag}__") + add_definitions("-D${_flag}") + endforeach(_flag) + + elseif(CMAKE_CXX_COMPILER_ID MATCHES "Cray") + + # TODO: Add Cray flags + + elseif(CMAKE_CXX_COMPILER_ID MATCHES "Fujitsu") + + # TODO: Add Fujitsu flags + + elseif(CMAKE_CXX_COMPILER_ID MATCHES "NVHPC") + + # TODO: Add NVHPC flags + + else() + # Others: GNU, Clang and variants + foreach(_march ${_march_flag_list}) + AddCXXCompilerFlag("-march=${_march}" RESULT _ok) + if(_ok) + set(_march_plus_extensions "${_march}") + foreach(_flag ${_enable_extension_flag_list}) + AddCXXCompilerFlag("-march=${_march_plus_extensions}+${_flag}" RESULT _ok) + if(_ok) + set(_march_plus_extensions "${_march_plus_extensions}+${_flag}") + endif(_ok) + endforeach() + foreach(_flag ${_disable_extension_flag_list}) + AddCXXCompilerFlag("-march=${_march_plus_extensions}+no${_flag}" RESULT _ok) + if(_ok) + set(_march_plus_extensions "${_march_plus_extensions}+no${_flag}") + endif(_ok) + endforeach() + AddCXXCompilerFlag("-march=${_march_plus_extensions}" FLAGS ARCHITECTURE_CXX_FLAGS) + break() + endif() + endforeach() + + # Set -mtune flag + foreach(_mtune ${_mtune_flag_list}) + AddCXXCompilerFlag("-mtune=${_mtune}" FLAGS ARCHITECTURE_CXX_FLAGS RESULT _ok) + if(_ok) + break() + endif() + endforeach() + endif() + + endif() # CAN_USE_MCPU + + endif() + + # Compile code with profiling instrumentation + if(TARGET_PROFILER STREQUAL "gprof") + AddCXXCompilerFlag("-pg" FLAGS ARCHITECTURE_CXX_FLAGS) + endif() + + # Remove duplicate flags + list(REMOVE_DUPLICATES ARCHITECTURE_CXX_FLAGS) + + if(OFA_VERBOSE) + string(REPLACE ";" ", " _str "${ARCHITECTURE_CXX_FLAGS}") + message(STATUS "[OptimizeForArchitecture] ARCHITECTURE_CXX_FLAGS: " ${_str}) + endif() + +endmacro(OFA_HandleArmOptions) diff --git a/src/cmake/OFA/HandlePpcOptions.cmake b/src/cmake/OFA/HandlePpcOptions.cmake new file mode 100644 index 0000000000..4def043583 --- /dev/null +++ b/src/cmake/OFA/HandlePpcOptions.cmake @@ -0,0 +1,170 @@ +#============================================================================= +# Handling of PPC / PPC64 options +# +# This is a three-step process: +# +# 1. Generate a list of available compiler flags for the specific CPU +# +# 2. Enable/disable feature flags based on available CPU features, +# used-defined USE_ variables and the capabilities of the +# host system's compiler and linker +# +# 3. Set compiler-specific flags (e.g., -m/-mno-) +#============================================================================= + +include(OFA/CommonMacros) + +macro(OFA_HandlePpcOptions) + + # Special treatment for "native" flag + if(TARGET_ARCHITECTURE STREQUAL "native") + if(CMAKE_CXX_COMPILER_ID MATCHES "NVHPC" OR + CMAKE_CXX_COMPILER_ID MATCHES "PGI") + # NVidia HPC / PGI + AddCXXCompilerFlag("-tp=native" FLAGS ARCHITECTURE_CXX_FLAGS RESULT _ok) + elseif(CMAKE_CXX_COMPILER_ID MATCHES "XL") + # IBM XL (on Linux/AIX) + AddCXXCompilerFlag("-qarch=auto" FLAGS ARCHITECTURE_CXX_FLAGS RESULT _ok) + else() + # Others: GNU, Clang and variants + AddCXXCompilerFlag("-march=native" FLAGS ARCHITECTURE_CXX_FLAGS RESULT _ok) + endif() + + if(NOT _ok) + message(FATAL_ERROR "[OptimizeForArchitecture] An error occured while setting the \"native\" flag.") + endif() + + elseif(NOT TARGET_ARCHITECTURE STREQUAL "none") + + # Step 1: Generate a list of compiler flags for the specific CPU + set(_march_flag_list) + + # Define macros for PowerPC64 + macro(_power3) + list(APPEND _march_flag_list "power3") + endmacro() + macro(_power4) + list(APPEND _march_flag_list "power4") + _power3() + endmacro() + macro(_power5) + list(APPEND _march_flag_list "power5") + _power4() + endmacro() + macro(_power5plus) + list(APPEND _march_flag_list "power5+") + _power5() + endmacro() + macro(_power6) + list(APPEND _march_flag_list "power6") + _power5() + endmacro() + macro(_power6x) + list(APPEND _march_flag_list "power6x") + _power6() + endmacro() + macro(_power7) + list(APPEND _march_flag_list "power7") + _power6() + endmacro() + macro(_power8) + list(APPEND _march_flag_list "pwr8") + list(APPEND _march_flag_list "power8") + _power7() + endmacro() + macro(_power9) + list(APPEND _march_flag_list "pwr9") + list(APPEND _march_flag_list "power9") + _power8() + endmacro() + macro(_power10) + list(APPEND _march_flag_list "pwr10") + list(APPEND _march_flag_list "power10") + _power9() + endmacro() + + # PowerPC64 + if(TARGET_ARCHITECTURE STREQUAL "power3") + _power3() + elseif(TARGET_ARCHITECTURE STREQUAL "power4") + _power4() + elseif(TARGET_ARCHITECTURE STREQUAL "power5") + _power5() + elseif(TARGET_ARCHITECTURE STREQUAL "power5+") + _power5plus() + elseif(TARGET_ARCHITECTURE STREQUAL "power6") + _power6() + elseif(TARGET_ARCHITECTURE STREQUAL "power6x") + _power6x() + elseif(TARGET_ARCHITECTURE STREQUAL "power7") + _power7() + elseif(TARGET_ARCHITECTURE STREQUAL "power8") + _power8() + elseif(TARGET_ARCHITECTURE STREQUAL "power9") + _power9() + elseif(TARGET_ARCHITECTURE STREQUAL "power10") + _power10() + + # Others + elseif(TARGET_ARCHITECTURE STREQUAL "generic") + list(APPEND _march_flag_list "generic") + elseif(TARGET_ARCHITECTURE STREQUAL "none") + # add this clause to remove it from the else clause + + else() + message(FATAL_ERROR "[OptimizeForArchitecture] Unknown target architecture: \"${TARGET_ARCHITECTURE}\". Please set TARGET_ARCHITECTURE to a supported value.") + endif() + + # Step 2: We do not enable/disable feature flags for PPC/PPC64 CPUs + + # Step 3: Set compiler-specific flags (e.g., -m/-mno-) + if(CMAKE_CXX_COMPILER_ID MATCHES "XL") + + # Set -qarch flag + foreach(_flag ${_march_flag_list}) + AddCXXCompilerFlag("-mcpu=${_flag}" FLAGS ARCHITECTURE_CXX_FLAGS RESULT _good) + AddCXXCompilerFlag("-qarch=${_flag}" FLAGS ARCHITECTURE_CXX_FLAGS RESULT _good) + if(_good) + break() + endif(_good) + endforeach(_flag) + + elseif(CMAKE_CXX_COMPILER_ID MATCHES "NVHPC" + OR CMAKE_CXX_COMPILER_ID MATCHES "PGI") + + # Set -tp flag + foreach(_flag ${_march_flag_list}) + AddCXXCompilerFlag("-tp=${_flag}" FLAGS ARCHITECTURE_CXX_FLAGS RESULT _good) + if(_good) + break() + endif(_good) + endforeach(_flag) + + else() + # Others: GNU, Clang and variants + + # Set -march flag + foreach(_flag ${_march_flag_list}) + AddCXXCompilerFlag("-march=${_flag}" FLAGS ARCHITECTURE_CXX_FLAGS RESULT _good) + if(_good) + break() + endif(_good) + endforeach(_flag) + + endif() + endif() + + # Compile code with profiling instrumentation + if(TARGET_PROFILER STREQUAL "gprof") + AddCXXCompilerFlag("-pg" FLAGS ARCHITECTURE_CXX_FLAGS) + endif() + + # Remove duplicate flags + list(REMOVE_DUPLICATES ARCHITECTURE_CXX_FLAGS) + + if(OFA_VERBOSE) + string(REPLACE ";" ", " _str "${ARCHITECTURE_CXX_FLAGS}") + message(STATUS "[OptimizeForArchitecture] ARCHITECTURE_CXX_FLAGS: " ${_str}) + endif() + +endmacro(OFA_HandlePpcOptions) diff --git a/src/cmake/OFA/HandleX86Options.cmake b/src/cmake/OFA/HandleX86Options.cmake new file mode 100644 index 0000000000..d8f523a9d2 --- /dev/null +++ b/src/cmake/OFA/HandleX86Options.cmake @@ -0,0 +1,899 @@ +#============================================================================= +# Handling of X86 / X86_64 options +# +# This is a three-step process: +# +# 1. Generate a list of available compiler flags for the specific CPU +# +# 2. Enable/disable feature flags based on available CPU features, +# used-defined USE_ variables and the capabilities of the +# host system's compiler and linker +# +# 3. Set compiler-specific flags (e.g., -m/-mno-) +#============================================================================= + +include(OFA/CommonMacros) + +#Requires CMake 3.2 + +macro(OFA_HandleX86Options) + + # Special treatment for "native" flag + if(TARGET_ARCHITECTURE STREQUAL "native") + if(MSVC) + # MSVC (on Windows) + message(FATAL_ERROR "[OptimizeForArchitecture] MSVC does not support \"native\" flag.") + elseif(CMAKE_CXX_COMPILER_ID MATCHES "Intel" + OR CMAKE_CXX_COMPILER_ID MATCHES "IntelLLVM") + if(WIN32) + # Intel (on Windows) + AddCXXCompilerFlag("/QxHOST" FLAGS ARCHITECTURE_CXX_FLAGS RESULT _ok) + else() + # Intel (on Linux) + AddCXXCompilerFlag("-xHOST" FLAGS ARCHITECTURE_CXX_FLAGS RESULT _ok) + endif() + elseif(CMAKE_CXX_COMPILER_ID MATCHES "NVHPC" + OR CMAKE_CXX_COMPILER_ID MATCHES "PGI") + # NVidia HPC / PGI (on Linux/Windows) + AddCXXCompilerFlag("-tp=native" FLAGS ARCHITECTURE_CXX_FLAGS RESULT _ok) + elseif(CMAKE_CXX_COMPILER_ID MATCHES "SunPro") + # Sun/Oracle Studio (on Linux/Sun OS) + AddCXXCompilerFlag("-native" FLAGS ARCHITECTURE_CXX_FLAGS RESULT _ok) + elseif(CMAKE_CXX_COMPILER_ID MATCHES "Cray") + # Cray (on Linux) + message(FATAL_ERROR, "[OptimizeForArchitecture] Cray compiler does not support \"native\" flag.") + else() + # Others: GNU, Clang and variants + AddCXXCompilerFlag("-march=native" FLAGS ARCHITECTURE_CXX_FLAGS RESULT _ok) + endif() + + if(NOT _ok) + message(FATAL_ERROR "[OptimizeForArchitecture] An error occured while setting the \"native\" flag.") + endif() + + elseif(NOT TARGET_ARCHITECTURE STREQUAL "none") + + # Step 1: Generate a list of compiler flags for the specific CPU + set(_march_flag_list) + set(_available_extension_list) + + # Define macros for Intel + macro(_nehalem) + list(APPEND _march_flag_list "nehalem") + list(APPEND _march_flag_list "corei7") + list(APPEND _march_flag_list "core2") + list(APPEND _available_extension_list "cx16" "fxsr" "sahf" "mmx" "popcnt" "sse" "sse2" "sse3" "ssse3" "sse4.1" "sse4.2") + endmacro() + macro(_westmere) + list(APPEND _march_flag_list "westmere") + _nehalem() + list(APPEND _available_extension_list "aes" "pclmul") + endmacro() + macro(_sandybridge) + list(APPEND _march_flag_list "sandybridge") + list(APPEND _march_flag_list "corei7-avx") + _westmere() + list(APPEND _available_extension_list "avx" "xsave") + endmacro() + macro(_ivybridge) + list(APPEND _march_flag_list "ivybridge") + list(APPEND _march_flag_list "core-avx-i") + _sandybridge() + list(APPEND _available_extension_list "f16c" "fsgsbase" "rdrnd" ) + endmacro() + macro(_haswell) + list(APPEND _march_flag_list "haswell") + list(APPEND _march_flag_list "core-avx2") + _ivybridge() + list(APPEND _available_extension_list "avx2" "bmi" "bmi2" "fma" "hle" "lzcnt" "movbe") + endmacro() + macro(_broadwell) + list(APPEND _march_flag_list "broadwell") + _haswell() + list(APPEND _available_extension_list "adx" "prfchw" "rdseed") + endmacro() + macro(_skylake) + list(APPEND _march_flag_list "skylake") + _broadwell() + list(APPEND _available_extension_list "clflushopt" "sgx" "xsavec" "xsaves") + endmacro() + macro(_skylake_avx512) + list(APPEND _march_flag_list "skylake-avx512") + _skylake() + list(APPEND _available_extension_list "avx512bw" "avx512cd" "avx512dq" "avx512f" "avx512vl" "clwb" "pku") + endmacro() + macro(_cascadelake) + list(APPEND _march_flag_list "cascadelake") + _skylake_avx512() + list(APPEND _available_extension_list "avx512vnni") + endmacro() + macro(_cooperlake) + list(APPEND _march_flag_list "cooperlake") + _cascadelake() + list(APPEND _available_extension_list "avx512bf16") + endmacro() + macro(_cannonlake) + list(APPEND _march_flag_list "cannonlake") + _skylake() + list(APPEND _available_extension_list "avx512bw" "avx512cd" "avx512dq" "avx512f" "avx512ifma" "avx512vbmi" "avx512vl" "evex512" "pku" "sha") + endmacro() + macro(_icelake) + list(APPEND _march_flag_list "icelake-client") + _cannonlake() + list(APPEND _available_extension_list "avx512bitalg" "avx512vbmi2" "avx512vnni" "avx512vpopcntdq" "gfni" "rdpid" "vaes" "vpclmulqdq") + endmacro() + macro(_icelake_avx512) + list(APPEND _march_flag_list "icelake-server") + _icelake() + list(APPEND _available_extension_list "clwb" "pconfig" "wbnoinvd") + endmacro() + macro(_tigerlake) + list(APPEND _march_flag_list "tigerlake") + _icelake() + list(APPEND _available_extension_list "avx512vp2intersect" "clwb" "kl" "movdir64b" "movdiri" "widekl") + endmacro() + macro(_cometlake) + list(APPEND _march_flag_list "cometlake") + _tigerlake() + endmacro() + macro(_alderlake) + list(APPEND _march_flag_list "alderlake") + _skylake() + list(APPEND _available_extension_list "avxvnni" "cldemote" "clwb" "gfni" "hreset" "kl" "movdir64b" "movdiri" "pconfig" "pku" "ptwrite" "rdpid" "serialize" "sha" "vaes" "vpclmulqdq" "waitpkg" "widekl") + endmacro() + macro(_rocketlake) + list(APPEND _march_flag_list "rocketlake") + _skylake_avx512() + list(APPEND _available_extension_list "avx512bitalg" "avx512ifma" "avx512vbmi" "avx512vbmi2" "avx512vnni" "avx512vpopcntdq" "gfni" "rdpid" "sgx" "sha" "vaes" "vpclmulqdq") + endmacro() + macro(_raptorlake) + list(APPEND _march_flag_list "raptorlake") + _skylake() + list(APPEND _available_extension_list "avxvnni" "cldemote" "clwb" "gfni" "hreset" "kl" "movdir64b" "movdiri" "pconfig" "pku" "ptwrite" "rdpid" "serialize" "sha" "vaes" "vpclmulqdq" "waitpkg" "widekl") + endmacro() + macro(_meteorlake) + list(APPEND _march_flag_list "meteorlake") + _raptorlake() + endmacro() + macro(_arrowlake) + list(APPEND _march_flag_list "arrowlake") + _meteorlake() + list(APPEND _available_extension_list "avxifma" "avxneconvert" "avxvnniint8" "cmpccxadd" "uintr") + endmacro() + macro(_lunarlake) + list(APPEND _march_flag_list "lunarlake") + _arrowlake() + list(APPEND _available_extension_list "avxvnniint16" "sha512" "sm3" "sm4") + endmacro() + macro(_pantherlake) + list(APPEND _march_flag_list "pantherlake") + _lunarlake() + list(APPEND _available_extension_list "prefetchi") + endmacro() + macro(_sapphirerapids) + list(APPEND _march_flag_list "sapphirerapids") + _skylake_avx512() + list(APPEND _available_extension_list "amx-bf16" "amx-int8" "amx-tile" "avx512bf16" "avx512bitalg" "avx512fp16" "avx512ifma" "avx512vbmi" "avx512vbmi2" "avx512vnni" "avx512vpopcntdq" "avxvnni" "cldemote" "enqcmd" "gfni" "movdir64b" "movdiri" "pconfig" "ptwrite" "rdpid" "serialize" "sha" "tsxldtrk" "uintr" "vaes" "vpclmulqdq" "waitpkg" "wbnoinvd") + endmacro() + macro(_emeraldrapids) + list(APPEND _march_flag_list "emeraldrapids") + _sapphirerapids() + endmacro() + macro(_graniterapids) + list(APPEND _march_flag_list "graniterapids") + _emeraldrapids() + list(APPEND _available_extension_list "amx-fp16" "prefetchi") + endmacro() + macro(_sierraforest) + list(APPEND _march_flag_list "sierraforest") + _skylake() + list(APPEND _available_extension_list "avxifma" "avxneconvert" "avxvnni" "avxvnniint8" "cldemote" "clwb" "cmpccxadd" "enqcmd" "gfni" "hreset" "kl" "movdir64b" "movdiri" "pconfig" "pku" "ptwrite" "rdpid" "serialize" "sha" "uintr" "vaes" "vpclmulqdq" "waitpkg" "widekl") + endmacro() + macro(_clearwaterforest) + list(APPEND _march_flag_list "clearwaterforest") + _sierraforest() + list(APPEND _available_extension_list "avxvnniint16" "prefetchi" "sha512" "sm3" "sm4" "usermsr") + endmacro() + macro(_knightslanding) + list(APPEND _march_flag_list "knl") + _broadwell() + list(APPEND _available_extension_list "avx512f" "avx512pf" "avx512er" "avx512cd") + endmacro() + macro(_knightsmill) + list(APPEND _march_flag_list "knm") + _broadwell() + list(APPEND _available_extension_list "avx512f" "avx512pf" "avx512er" "avx512cd" "avx5124fmaps" "avx5124vnni" "avx512vpopcntdq") + endmacro() + macro(_silvermont) + list(APPEND _march_flag_list "silvermont") + _westmere() + list(APPEND _available_extension_list "rdrnd") + endmacro() + macro(_goldmont) + list(APPEND _march_flag_list "goldmont") + _silvermont() + list(APPEND _available_extension_list "rdseed") + endmacro() + macro(_goldmont_plus) + list(APPEND _march_flag_list "goldmont-plus") + _goldmont() + list(APPEND _available_extension_list "rdpid") + endmacro() + macro(_tremont) + list(APPEND _march_flag_list "tremont") + _goldmont_plus() + endmacro() + + # Define macros for AMD + macro(_k8) + list(APPEND _march_flag_list "k8") + list(APPEND _available_extension_list "mmx" "3dnow" "sse" "sse2") + endmacro() + macro(_k8_sse3) + list(APPEND _march_flag_list "k8-sse3") + _k8() + list(APPEND _available_extension_list "sse3") + endmacro() + macro(_barcelona) # amd10h + list(APPEND _march_flag_list "barcelona") + _k8_sse3() + list(APPEND _available_extension_list "sse4a" "abm") + endmacro() + macro(_amd14h) + list(APPEND _march_flag_list "btver1") + _barcelona() + list(APPEND _available_extension_list "cx16" "ssse3") + endmacro() + macro(_bulldozer) # amd15h + list(APPEND _march_flag_list "bdver1") + _amd14h() + list(APPEND _available_extension_list "sse4.1" "sse4.2" "avx" "xop" "fma4" "lwp" "aes" "pclmul") + endmacro() + macro(_piledriver) + list(APPEND _march_flag_list "bdver2") + _bulldozer() + list(APPEND _available_extension_list "fma" "f16c" "bmi" "tbm") + endmacro() + macro(_steamroller) + list(APPEND _march_flag_list "bdver3") + _piledriver() + list(APPEND _available_extension_list "fsgsbase") + endmacro() + macro(_excavator) + list(APPEND _march_flag_list "bdver4") + _steamroller() + list(APPEND _available_extension_list "bmi2" "avx2" "movbe") + endmacro() + macro(_amd16h) + list(APPEND _march_flag_list "btver2") + _amd14h() + list(APPEND _available_extension_list "movbe" "sse4.1" "sse4.2" "avx" "f16c" "bmi" "pclmul" "aes") + endmacro() + macro(_zen) + list(APPEND _march_flag_list "znver1") + _amd16h() + list(APPEND _available_extension_list "bmi2" "fma" "fsgsbase" "avx2" "adcx" "rdseed" "mwaitx" "sha" "clzero" "xsavec" "xsaves" "clflushopt" "popcnt") + endmacro() + macro(_zen2) + list(APPEND _march_flag_list "znver2") + _zen() + list(APPEND _available_extension_list "clwb" "rdpid" "wbnoinvd") + endmacro() + macro(_zen3) + list(APPEND _march_flag_list "znver3") + _zen2() + list(APPEND _available_extension_list "pku" "vpclmulqdq" "vaes") + endmacro() + macro(_zen4) + list(APPEND _march_flag_list "znver4") + _zen3() + list(APPEND _available_extension_list "avx512f" "avx512cd" "avx512vl" "avx512dq" "avx512bw" "avx512ifma" "avx512vbmi" "avx512vpopcntdq" "avx512bitalg" "avx512vbmi2" "avx512vnni" "avx512bf16") + endmacro() + + # Intel + if(TARGET_ARCHITECTURE STREQUAL "core" OR TARGET_ARCHITECTURE STREQUAL "core2") + list(APPEND _march_flag_list "core2") + list(APPEND _available_extension_list "mmx" "sse" "sse2" "sse3") + elseif(TARGET_ARCHITECTURE STREQUAL "merom") + list(APPEND _march_flag_list "merom") + list(APPEND _march_flag_list "core2") + list(APPEND _available_extension_list "mmx" "sse" "sse2" "sse3" "ssse3") + elseif(TARGET_ARCHITECTURE STREQUAL "penryn") + list(APPEND _march_flag_list "penryn") + list(APPEND _march_flag_list "core2") + list(APPEND _available_extension_list "mmx" "sse" "sse2" "sse3" "ssse3") + message(STATUS "[OptimizeForArchitecture] Sadly the Penryn architecture exists in variants with SSE4.1 and without SSE4.1.") + if(_cpu_flags MATCHES "sse4_1") + message(STATUS "[OptimizeForArchitecture] SSE4.1: enabled (auto-detected from this computer's CPU flags)") + list(APPEND _available_extension_list "sse4.1") + else() + message(STATUS "[OptimizeForArchitecture] SSE4.1: disabled (auto-detected from this computer's CPU flags)") + endif() + elseif(TARGET_ARCHITECTURE STREQUAL "knm") + _knightsmill() + elseif(TARGET_ARCHITECTURE STREQUAL "knl") + _knightslanding() + elseif(TARGET_ARCHITECTURE STREQUAL "pantherlake") + _pantherlake() + elseif(TARGET_ARCHITECTURE STREQUAL "lunarlake") + _lunarlake() + elseif(TARGET_ARCHITECTURE STREQUAL "arrowlake") + _arrowlake() + elseif(TARGET_ARCHITECTURE STREQUAL "meteorlake") + _meteorlake() + elseif(TARGET_ARCHITECTURE STREQUAL "raptorlake") + _raptorlake() + elseif(TARGET_ARCHITECTURE STREQUAL "rocketlake") + _rocketlake() + elseif(TARGET_ARCHITECTURE STREQUAL "clearwaterforest") + _clearwaterforest() + elseif(TARGET_ARCHITECTURE STREQUAL "sierraforest") + _sierraforest() + elseif(TARGET_ARCHITECTURE STREQUAL "graniterapids") + _graniterapids() + elseif(TARGET_ARCHITECTURE STREQUAL "emeraldrapids") + _emeraldrapids() + elseif(TARGET_ARCHITECTURE STREQUAL "sapphirerapids") + _sapphirerapids() + elseif(TARGET_ARCHITECTURE STREQUAL "alderlake") + _alderlake() + elseif(TARGET_ARCHITECTURE STREQUAL "cometlake") + _cometlake() + elseif(TARGET_ARCHITECTURE STREQUAL "tigerlake") + _tigerlake() + elseif(TARGET_ARCHITECTURE STREQUAL "icelake") + _icelake() + elseif(TARGET_ARCHITECTURE STREQUAL "icelake-xeon" OR TARGET_ARCHITECTURE STREQUAL "icelake-avx512") + _icelake_avx512() + elseif(TARGET_ARCHITECTURE STREQUAL "cannonlake") + _cannonlake() + elseif(TARGET_ARCHITECTURE STREQUAL "cooperlake") + _cooperlake() + elseif(TARGET_ARCHITECTURE STREQUAL "cascadelake") + _cascadelake() + elseif(TARGET_ARCHITECTURE STREQUAL "kabylake") + _skylake() + elseif(TARGET_ARCHITECTURE STREQUAL "skylake-xeon" OR TARGET_ARCHITECTURE STREQUAL "skylake-avx512") + _skylake_avx512() + elseif(TARGET_ARCHITECTURE STREQUAL "skylake") + _skylake() + elseif(TARGET_ARCHITECTURE STREQUAL "broadwell") + _broadwell() + elseif(TARGET_ARCHITECTURE STREQUAL "haswell") + _haswell() + elseif(TARGET_ARCHITECTURE STREQUAL "ivybridge") + _ivybridge() + elseif(TARGET_ARCHITECTURE STREQUAL "sandybridge") + _sandybridge() + elseif(TARGET_ARCHITECTURE STREQUAL "westmere") + _westmere() + elseif(TARGET_ARCHITECTURE STREQUAL "nehalem") + _nehalem() + elseif(TARGET_ARCHITECTURE STREQUAL "tremont") + _tremont() + elseif(TARGET_ARCHITECTURE STREQUAL "goldmont-plus") + _goldmont_plus() + elseif(TARGET_ARCHITECTURE STREQUAL "goldmont") + _goldmont() + elseif(TARGET_ARCHITECTURE STREQUAL "silvermont") + _silvermont() + elseif(TARGET_ARCHITECTURE STREQUAL "bonnell") + list(APPEND _march_flag_list "bonnell") + list(APPEND _march_flag_list "atom") + list(APPEND _march_flag_list "core2") + list(APPEND _available_extension_list "sse" "sse2" "sse3" "ssse3") + elseif(TARGET_ARCHITECTURE STREQUAL "atom") + list(APPEND _march_flag_list "atom") + list(APPEND _march_flag_list "core2") + list(APPEND _available_extension_list "sse" "sse2" "sse3" "ssse3") + + # AMD + elseif(TARGET_ARCHITECTURE STREQUAL "k8") + _k8() + elseif(TARGET_ARCHITECTURE STREQUAL "k8-sse3") + k8_sse3() + elseif(TARGET_ARCHITECTURE STREQUAL "barcelona" OR + TARGET_ARCHITECTURE STREQUAL "istanbul" OR + TARGET_ARCHITECTURE STREQUAL "magny-cours") + _barcelona() + elseif(TARGET_ARCHITECTURE STREQUAL "amd14h") + _amd14h() + elseif(TARGET_ARCHITECTURE STREQUAL "bulldozer" OR + TARGET_ARCHITECTURE STREQUAL "interlagos") + _bulldozer() + elseif(TARGET_ARCHITECTURE STREQUAL "piledriver") + _piledriver() + elseif(TARGET_ARCHITECTURE STREQUAL "steamroller") + _steamroller() + elseif(TARGET_ARCHITECTURE STREQUAL "excavator") + _excavator() + elseif(TARGET_ARCHITECTURE STREQUAL "amd16h") + _amd16h() + elseif(TARGET_ARCHITECTURE STREQUAL "zen") + _zen() + elseif(TARGET_ARCHITECTURE STREQUAL "zen2") + _zen2() + elseif(TARGET_ARCHITECTURE STREQUAL "zen3") + _zen3() + elseif(TARGET_ARCHITECTURE STREQUAL "zen4") + _zen4() + + # Others + elseif(TARGET_ARCHITECTURE STREQUAL "generic") + list(APPEND _march_flag_list "generic") + list(APPEND _available_extension_list "sse") + list(APPEND _available_extension_list "sse2") + elseif(TARGET_ARCHITECTURE STREQUAL "none") + # add this clause to remove it from the else clause + + else() + message(FATAL_ERROR "[OptimizeForArchitecture] Unknown target architecture: \"${TARGET_ARCHITECTURE}\". Please set TARGET_ARCHITECTURE to a supported value.") + endif() + + # Clean list of available extensions + list(SORT _available_extension_list) + list(REMOVE_DUPLICATES _available_extension_list) + + if(OFA_VERBOSE) + if(_march_flag_list) + string(REPLACE ";" ", " _str "${_march_flag_list}") + string(TOUPPER ${_str} _str) + message(STATUS "[OptimizeForArchitecture] CPU architectures: " ${_str}) + endif() + if(_available_extension_list) + list(LENGTH _available_extension_list _len) + string(REPLACE ";" ", " _str "${_available_extension_list}") + string(TOUPPER ${_str} _str) + message(STATUS "[OptimizeForArchitecture] Extensions (${_len} available): ${_str}") + endif() + endif() + + set(_check_extension_list) + set(_check_extension_flag_list) + set(_disable_extension_flag_list) + set(_enable_extension_flag_list) + set(_ignore_extension_flag_list) + + # Set compiler-specific option names + if(CMAKE_CXX_COMPILER_ID MATCHES "MSVC") + set(_enable_flag "/arch:") + unset(_disable) + elseif(CMAKE_CXX_COMPILER_ID MATCHES "SunPro") + set(_enable_flag "-xarch=") + unset(_disable_flag) + else() + set(_enable_flag "-m") + set(_disable_flag "-mno-") + endif() + + # Step 2: Enable/disable feature flags based on available CPU + # features, used-defined USE_ variables and + # the capabilities of the host system's compiler and linker + file(READ ${ALICEVISION_ROOT}/../src/cmake/OFA/CheckX86.txt _checks) + string(REGEX REPLACE "[:;]" "|" _checks "${_checks}") + string(REPLACE "\n" ";" _checks "${_checks}") + + set(_skip_check FALSE) + + # Iterate over the list of checks line by line + foreach (_check ${_checks}) + string(REPLACE "|" ";" _check "${_check}") + + # Parse for special lines + if ("${_check}" MATCHES "^#" ) # Skip comment + continue() + + elseif ("${_check}" MATCHES "^push_enable" ) # Start enable block + list(GET _check 1 _push_enable_list) + string(REPLACE "," ";" _push_enable_list "${_push_enable_list}") + _ofa_find(_push_enable_list "${CMAKE_CXX_COMPILER_ID}" _found) + if(_found) + list(INSERT _skip_check 0 FALSE) + else() + list(INSERT _skip_check 0 TRUE) + endif() + continue() + + elseif ("${_check}" MATCHES "^pop_enable" ) # End enable block + list(REMOVE_AT _skip_check 0) + continue() + + elseif ("${_check}" MATCHES "^push_disable" ) # Start disable block + list(GET _check 1 _push_disable_list) + string(REPLACE "," ";" _push_disable_list "${_push_disable_list}") + _ofa_find(_push_disable_list "${CMAKE_CXX_COMPILER_ID}" _found) + if(_found) + list(INSERT _skip_check 0 TRUE) + else() + # Compiler was not found in the list, so we keep its previous status + list(GET _skip_check 0 _skip) + list(INSERT _skip_check 0 ${_skip}) + endif() + continue() + + elseif ("${_check}" MATCHES "^pop_disable" ) # End disable block + list(REMOVE_AT _skip_check 0) + continue() + endif() + + # Skip test? + list(GET _skip_check 0 _skip) + if(_skip) + continue() + endif() + + # Extract extra CPU extensions, header files, function name, and parameters + list(GET _check 0 _check_extension_flags) + list(GET _check 1 _check_headers) + list(GET _check 2 _check_function) + list(GET _check 3 _check_params) + + # Convert list of extensions into compiler flags + string(REPLACE "," ";" _check_extension_flags "${_check_extension_flags}") + list(GET _check_extension_flags 0 _extension_flag) + list(APPEND _check_extension_flag_list "${_extension_flag}") + string(REPLACE ";" " ${_enable_flag}" _check_extra_flags " ${_enable_flag}${_check_extension_flags}") + + # Extract optional extension alias + list(LENGTH _check _len) + if(${_len} EQUAL 5) + list(GET _check 4 _extension) + else() + set(_extension "${_extension_flag}") + endif() + + list(APPEND _check_extension_list "${_extension}") + + # Define USE_<_extension_flag> variable + set(_useVar "USE_${_extension_flag}") + string(TOUPPER "${_useVar}" _useVar) + string(REPLACE "[-.+/:= ]" "_" _useVar "${_useVar}") + + # If not specified externally, set the value of the + # USE_<_extension_flag> variable to TRUE if it is found in the list + # of available extensions and FALSE otherwise + if(NOT DEFINED ${_useVar}) + _ofa_find(_available_extension_list "${_extension}" _found) + set(${_useVar} ${_found}) + endif() + + if(${_useVar}) + # Check if the compiler supports the -m<_extension_flag> + # flag and can compile the provided test code with it + set(_code "\nint main() { ${_check_function}(${_check_params})\; return 0\; }") + AddCXXCompilerFlag("${_enable_flag}${_extension_flag}" + EXTRA_FLAGS ${_check_extra_flags} + HEADERS ${_check_headers} + CODE "${_code}" + RESULT _ok) + if(NOT ${_ok}) + # Test failed + set(${_useVar} FALSE CACHE BOOL "Use ${_extension} extension.") + else() + # Test succeeded + set(${_useVar} TRUE CACHE BOOL "Use ${_extension} extension.") + endif() + else() + # Disable extension without running tests + set(${_useVar} FALSE CACHE BOOL "Use ${_extension} extension.") + endif() + mark_as_advanced(${_useVar}) + endforeach() + + # Generate lists of enabled/disabled flags + list(REMOVE_DUPLICATES _check_extension_flag_list) + foreach(_extension_flag ${_check_extension_flag_list}) + _ofa_find(_available_extension_list "${_extension_flag}" _found) + set(_useVar "USE_${_extension_flag}") + string(TOUPPER "${_useVar}" _useVar) + string(REPLACE "[-.+/:= ]" "_" _useVar "${_useVar}") + + if(${_useVar}) + # Add <_extension_flag> to list of enabled extensions (if supported) + set(_haveVar "HAVE_${_enable_flag}${_extension_flag}") + string(REGEX REPLACE "[-.+/:= ]" "_" _haveVar "${_haveVar}") + if(NOT ${_haveVar}) + if(OFA_VERBOSE) + message(STATUS "[OptimizeForArchitecture] Ignoring flag ${_enable_flag}${_extension_flag} because checks failed") + endif() + list(APPEND _ignore_extension_flag_list "${_extension_flag}") + continue() + endif() + list(APPEND _enable_extension_flag_list "${_extension_flag}") + elseif(DEFINED _disable_flag) + # Add <_extension_flag> to list of disabled extensions (if supported) + AddCXXCompilerFlag("${_disable_flag}${_extension_flag}") + set(_haveVar "HAVE_${_disable_flag}${_extension_flag}") + string(REGEX REPLACE "[-.+/:= ]" "_" _haveVar "${_haveVar}") + if(NOT ${_haveVar}) + if(OFA_VERBOSE) + message(STATUS "[OptimizeForArchitecture] Ignoring flag ${_disable_flag}${_extension_flag} because checks failed") + endif() + list(APPEND _ignore_extension_flag_list "${_extension_flag}") + continue() + endif() + list(APPEND _disable_extension_flag_list "${_extension_flag}") + else() + list(APPEND _ignore_extension_flag_list "${_extension_flag}") + endif() + endforeach() + + if(OFA_VERBOSE) + # Print checked extension flags + if(_check_extension_flag_list) + list(LENGTH _check_extension_flag_list _len) + list(SORT _check_extension_flag_list) + string(REPLACE ";" ", " _str "${_check_extension_flag_list}") + string(TOUPPER ${_str} _str) + message(STATUS "[OptimizeForArchitecture] Extensions (${_len} checked): ${_str}") + endif() + # Print enabled extension flags + if(_enable_extension_flag_list) + list(LENGTH _enable_extension_flag_list _len) + list(SORT _enable_extension_flag_list) + string(REPLACE ";" ", " _str "${_enable_extension_flag_list}") + string(TOUPPER ${_str} _str) + message(STATUS "[OptimizeForArchitecture] Extensions (${_len} enabled): ${_str}") + endif() + # Print disabled extension flags + if(_disable_extension_flag_list) + list(LENGTH _disable_extension_flag_list _len) + list(SORT _disable_extension_flag_list) + string(REPLACE ";" ", " _str "${_disable_extension_flag_list}") + string(TOUPPER ${_str} _str) + message(STATUS "[OptimizeForArchitecture] Extensions (${_len} disabled): ${_str}") + endif() + # Print ignored extension flags + if(_ignore_extension_flag_list) + list(LENGTH _ignore_extension_flag_list _len) + list(SORT _ignore_extension_flag_list) + string(REPLACE ";" ", " _str "${_ignore_extension_flag_list}") + string(TOUPPER ${_str} _str) + message(STATUS "[OptimizeForArchitecture] Extensions (${_len} ignored): ${_str}") + endif() + # Print unhandled extension flags + set(_unhandled_extension_list) + foreach(_extension ${_available_extension_list}) + _ofa_find(_check_extension_list "${_extension}" _found) + if(NOT _found) + list(APPEND _unhandled_extension_list ${_extension}) + endif() + endforeach() + if(_unhandled_extension_list) + list(LENGTH _unhandled_extension_list _len) + list(SORT _unhandled_extension_list) + string(REPLACE ";" ", " _str "${_unhandled_extension_list}") + string(TOUPPER ${_str} _str) + message(STATUS "[OptimizeForArchitecture] Extensions (${_len} unhandled): ${_str}") + endif() + endif() + + # Step 3: Set compiler-specific flags (e.g., -m/-mno-) + if(MSVC AND MSVC_VERSION GREATER 1700) + _ofa_find(_enable_extension_flag_list "avx512f" _found) + if(_found) + AddCXXCompilerFlag("/arch:AVX512" FLAGS ARCHITECTURE_CXX_FLAGS RESULT _found) + endif() + if(NOT _found) + _ofa_find(_enable_extension_flag_list "avx2" _found) + if(_found) + AddCXXCompilerFlag("/arch:AVX2" FLAGS ARCHITECTURE_CXX_FLAGS RESULT _found) + endif() + endif() + if(NOT _found) + _ofa_find(_enable_extension_flag_list "avx" _found) + if(_found) + AddCXXCompilerFlag("/arch:AVX" FLAGS ARCHITECTURE_CXX_FLAGS RESULT _found) + endif() + endif() + if(NOT _found) + _ofa_find(_enable_extension_flag_list "sse2" _found) + if(_found) + AddCXXCompilerFlag("/arch:SSE2" FLAGS ARCHITECTURE_CXX_FLAGS) + endif() + endif() + if(NOT _found) + _ofa_find(_enable_extension_flag_list "sse" _found) + if(_found) + AddCXXCompilerFlag("/arch:SSE" FLAGS ARCHITECTURE_CXX_FLAGS) + endif() + endif() + foreach(_extension ${_enable_extension_flag_list}) + string(TOUPPER "${_extension}" _extension) + string(REPLACE "[-.+/:= ]" "_" _extension "__${_extension}__") + add_definitions("-D${_extension}") + endforeach(_extension) + + elseif(CMAKE_CXX_COMPILER_ID MATCHES "Intel" + OR CMAKE_CXX_COMPILER_ID MATCHES "IntelLLVM") + + if(WIN32) + # Intel (on Windows) + set(OFA_map_knl "-QxKNL;-QxMIC-AVX512") + set(OFA_map_knm "-QxKNM;-QxMIC-AVX512") + set(OFA_map_clearwaterforest "-QxCLEARWATERFOREST;-QxCORE-AVX512") + set(OFA_map_sierraforest "-QxSIERRAFOREST;-QxCORE-AVX512") + set(OFA_map_diamondrapids "-QxDIAMONDRAPIDS;-QxCORE-AVX512") + set(OFA_map_graniterapids "-QxGRANITERAPICS;-QxCORE-AVX512") + set(OFA_map_emeraldrapids "-QxEMERALDRAPIDS;-QxCORE-AVX512") + set(OFA_map_sapphirerapids "-QxSAPPHIRERAPIDS;-QxCORE-AVX512") + set(OFA_map_pantherlake "-QxPANTHERLAKE;-QxCORE-AVX512") + set(OFA_map_lunarlake "-QxLUNARLAKE;-QxCORE-AVX512") + set(OFA_map_arrowlake "-QxARROWLAKE;-QxCORE-AVX512") + set(OFA_map_meteorlake "-QxMETEORLAKE;-QxCORE-AVX512") + set(OFA_map_raptorlake "-QxRAPTORLAKE;-QxCORE-AVX512") + set(OFA_map_alderlake "-QxALDERLAKE;-QxCORE-AVX512") + set(OFA_map_rocketlake "-QxROCKETLAKE;-QxCORE-AVX512") + set(OFA_map_tigerlake "-QxTIGERLAKE;-QxCORE-AVX512") + set(OFA_map_icelake-server "-QxICELAKE-SERVER;-QxCORE-AVX512") + set(OFA_map_icelake-avx512 "-QxICELAKE-SERVER;-QxCORE-AVX512") + set(OFA_map_icelake-client "-QxICELAKE-CLIENT;-QxCORE-AVX512") + set(OFA_map_icelake "-QxICELAKE-CLIENT;-QxCORE-AVX512") + set(OFA_map_cannonlake "-QxCANNONLAKE;-QxCORE-AVX512") + set(OFA_map_cooperlake "-QxCOOPERLAKE;-QxCORE-AVX512") + set(OFA_map_cascadelake "-QxCASCADELAKE;-QxCORE-AVX512") + set(OFA_map_skylake-avx512 "-QxSKYLAKE-AVX512;-QxCORE-AVX512") + set(OFA_map_coffeelake "-QxCOFFEELAKE;-QxCORE-AVX2") + set(OFA_map_kabylake "-QxKABYLAKE;-QxCORE-AVX2") + set(OFA_map_amberlake "-QxAMBERLAKE;-QxCORE-AVX2") + set(OFA_map_skylake "-QxSKYLAKE;-QxCORE-AVX2") + set(OFA_map_broadwell "-QxBROADWELL;-QxCORE-AVX2") + set(OFA_map_haswell "-QxHASWELL;-QxCORE-AVX2") + set(OFA_map_ivybridge "-QxIVYBRIDGE;-QxCORE-AVX-I") + set(OFA_map_sandybridge "-QxSANDYBRIDGE;-QxAVX") + set(OFA_map_westmere "-QxSSE4.2") + set(OFA_map_nehalem "-QxSSE4.2") + set(OFA_map_penryn "-QxSSSE3") + set(OFA_map_merom "-QxSSSE3") + set(OFA_map_core2 "-QxSSE3") + set(OFA_map_tremont "-QxTREMONT") + set(OFA_map_goldmont-plus "-QxGOLDMONT-PLUS") + set(OFA_map_goldmont "-QxGOLDMONT") + set(OFA_map_silvermont "-QxSILVERMONT") + set(_ok FALSE) + else() + # Intel (in Linux) + set(OFA_map_knl "-xKNL;-xMIC-AVX512") + set(OFA_map_knm "-xKNM;-xMIC-AVX512") + set(OFA_map_clearwaterforest "-xCLEARWATERFOREST;-xCORE-AVX512") + set(OFA_map_sierraforest "-xSIERRAFOREST;-xCORE-AVX512") + set(OFA_map_diamondrapids "-xDIAMONDRAPIDS;-xCORE-AVX512") + set(OFA_map_graniterapids "-xGRANITERAPIDS;-xCORE-AVX512") + set(OFA_map_emeraldrapids "-xEMERALDRAPIDS;-xCORE-AVX512") + set(OFA_map_sapphirerapids "-xSAPPHIRERAPIDS;-xCORE-AVX512") + set(OFA_map_pantherlake "-xPANTHERLAKE;-xCORE-AVX512") + set(OFA_map_lunarlake "-xLUNARLAKE;-xCORE-AVX512") + set(OFA_map_arrowlake "-xARROWLAKE;-xCORE-AVX512") + set(OFA_map_meteorlake "-xMETEORLAKE;-xCORE-AVX512") + set(OFA_map_raptorlake "-xRAPTORLAKE;-xCORE-AVX512") + set(OFA_map_alderlake "-xALDERLAKE;-xCORE-AVX512") + set(OFA_map_rocketlake "-xROCKETLAKE;-xCORE-AVX512") + set(OFA_map_tigerlake "-xTIGERLAKE;-xCORE-AVX512") + set(OFA_map_icelake-server "-xICELAKE-SERVER;-xCORE-AVX512") + set(OFA_map_icelake-avx512 "-xICELAKE-SERVER;-xCORE-AVX512") + set(OFA_map_icelake-client "-xICELAKE-CLIENT;-xCORE-AVX512") + set(OFA_map_icelake "-xICELAKE-CLIENT;-xCORE-AVX512") + set(OFA_map_cannonlake "-xCANNONLAKE;-xCORE-AVX512") + set(OFA_map_cooperlake "-xCOOPERLAKE;-xCORE-AVX512") + set(OFA_map_cascadelake "-xCASCADELAKE;-xCORE-AVX512") + set(OFA_map_skylake-avx512 "-xSKYLAKE-AVX512;-xCORE-AVX512") + set(OFA_map_coffeelake "-xCOFFEELAKE;-xCORE-AVX2") + set(OFA_map_kabylake "-xKABYLAKE;-xCORE-AVX2") + set(OFA_map_amberlake "-xAMBERLAKE;-xCORE-AVX2") + set(OFA_map_skylake "-xSKYLAKE;-xCORE-AVX2") + set(OFA_map_broadwell "-xBROADWELL;-xCORE-AVX2") + set(OFA_map_haswell "-xHASWELL;-xCORE-AVX2") + set(OFA_map_ivybridge "-xIVYBRIDGE;-xCORE-AVX-I") + set(OFA_map_sandybridge "-xSANDYBRIDGE;-xAVX") + set(OFA_map_westmere "-xSSE4.2") + set(OFA_map_nehalem "-xSSE4.2") + set(OFA_map_penryn "-xSSSE3") + set(OFA_map_merom "-xSSSE3") + set(OFA_map_core2 "-xSSE3") + set(OFA_map_tremont "-xTREMONT") + set(OFA_map_goldmont-plus "-xGOLDMONT-PLUS") + set(OFA_map_goldmont "-xGOLDMONT") + set(OFA_map_silvermont "-xSILVERMONT") + set(_ok FALSE) + endif() + + foreach(_arch ${_march_flag_list}) + if(DEFINED OFA_map_${_arch}) + foreach(_flag ${OFA_map_${_arch}}) + AddCXXCompilerFlag(${_flag} FLAGS ARCHITECTURE_CXX_FLAGS RESULT _ok) + if(_ok) + break() + endif() + endforeach() + if(_ok) + break() + endif() + endif() + endforeach() + if(NOT _ok) + # This is the Intel compiler, so SSE2 is a very reasonable baseline. + message(STATUS "[OptimizeForArchitecture] Did not recognize the requested architecture flag ${_arch}, falling back to SSE2") + if(WIN32) + AddCXXCompilerFlag("-QxSSE2" FLAGS ARCHITECTURE_CXX_FLAGS) + else() + AddCXXCompilerFlag("-xSSE2" FLAGS ARCHITECTURE_CXX_FLAGS) + endif() + endif() + + # Set -m<_extension> flag for enabled features + foreach(_extension ${_enable_extension_flag_list}) + AddCXXCompilerFlag("${_enable_flag}${_extension}" FLAGS ARCHITECTURE_CXX_FLAGS) + endforeach(_extension) + + # Set -mno-<_extension> flag for disabled features + if(DEFINED _disable_flag) + foreach(_extension ${_disable_extension_flag_list}) + AddCXXCompilerFlag("${_disable_flag}${_extension}" FLAGS ARCHITECTURE_CXX_FLAGS) + endforeach(_extension) + endif() + + elseif(CMAKE_CXX_COMPILER_ID MATCHES "SunPro") + + # Set -xtarget flag + foreach(_flag ${_march_flag_list}) + AddCXXCompilerFlag("-xtarget=${_flag}" FLAGS ARCHITECTURE_CXX_FLAGS RESULT _good) + if(_good) + break() + endif(_good) + endforeach(_flag) + + # Set -xarch= flag for enabled features + foreach(_flag ${_enable_extension_flag_list}) + AddCXXCompilerFlag("-xarch=${_flag}" FLAGS ARCHITECTURE_CXX_FLAGS) + endforeach(_flag) + + # TODO PGI/Cray ... + + else() + # Others: GNU, Clang and variants + + # Set -march flag + foreach(_flag ${_march_flag_list}) + AddCXXCompilerFlag("-march=${_flag}" FLAGS ARCHITECTURE_CXX_FLAGS RESULT _good) + if(_good) + break() + endif(_good) + endforeach(_flag) + + # Set -m flag for enabled features + foreach(_flag ${_enable_extension_flag_list}) + AddCXXCompilerFlag("-m${_flag}" FLAGS ARCHITECTURE_CXX_FLAGS) + endforeach(_flag) + + # Set -mno-feature flag for disabled features + foreach(_flag ${_disable_extension_flag_list}) + AddCXXCompilerFlag("-mno-${_flag}" FLAGS ARCHITECTURE_CXX_FLAGS) + endforeach(_flag) + endif() + endif() + + # Compile code with profiling instrumentation + if(TARGET_PROFILER STREQUAL "gprof") + AddCXXCompilerFlag("-pg" FLAGS ARCHITECTURE_CXX_FLAGS) + elseif(TARGET_PROFILER STREQUAL "vtune") + if (CMAKE_CXX_COMPILER_ID MATCHES "Intel") + # Need to check if this also works on Windows + AddCXXCompilerFlag("-g" FLAGS ARCHITECTURE_CXX_FLAGS) + AddCXXCompilerFlag("-debug inline-debug-info" FLAGS ARCHITECTURE_CXX_FLAGS) + AddCXXCompilerFlag("-D TBB_USE_THREADING_TOOLS" FLAGS ARCHITECTURE_CXX_FLAGS) + AddCXXCompilerFlag("-parallel-source-info=2" FLAGS ARCHITECTURE_CXX_FLAGS) + AddCXXCompilerFlag("-gline-tables-only" FLAGS ARCHITECTURE_CXX_FLAGS) + AddCXXCompilerFlag("-fdebug-info-for-profiling" FLAGS ARCHITECTURE_CXX_FLAGS) + AddCXXCompilerFlag("-Xsprofile" FLAGS ARCHITECTURE_CXX_FLAGS) + endif() + endif() + + # Remove duplicate flags + list(REMOVE_DUPLICATES ARCHITECTURE_CXX_FLAGS) + + if(OFA_VERBOSE) + string(REPLACE ";" ", " _str "${ARCHITECTURE_CXX_FLAGS}") + message(STATUS "[OptimizeForArchitecture] ARCHITECTURE_CXX_FLAGS: " ${_str}) + endif() + +endmacro(OFA_HandleX86Options) diff --git a/src/cmake/OFA/License.txt b/src/cmake/OFA/License.txt new file mode 100644 index 0000000000..a612ad9813 --- /dev/null +++ b/src/cmake/OFA/License.txt @@ -0,0 +1,373 @@ +Mozilla Public License Version 2.0 +================================== + +1. Definitions +-------------- + +1.1. "Contributor" + means each individual or legal entity that creates, contributes to + the creation of, or owns Covered Software. + +1.2. "Contributor Version" + means the combination of the Contributions of others (if any) used + by a Contributor and that particular Contributor's Contribution. + +1.3. "Contribution" + means Covered Software of a particular Contributor. + +1.4. "Covered Software" + means Source Code Form to which the initial Contributor has attached + the notice in Exhibit A, the Executable Form of such Source Code + Form, and Modifications of such Source Code Form, in each case + including portions thereof. + +1.5. "Incompatible With Secondary Licenses" + means + + (a) that the initial Contributor has attached the notice described + in Exhibit B to the Covered Software; or + + (b) that the Covered Software was made available under the terms of + version 1.1 or earlier of the License, but not also under the + terms of a Secondary License. + +1.6. "Executable Form" + means any form of the work other than Source Code Form. + +1.7. "Larger Work" + means a work that combines Covered Software with other material, in + a separate file or files, that is not Covered Software. + +1.8. "License" + means this document. + +1.9. "Licensable" + means having the right to grant, to the maximum extent possible, + whether at the time of the initial grant or subsequently, any and + all of the rights conveyed by this License. + +1.10. "Modifications" + means any of the following: + + (a) any file in Source Code Form that results from an addition to, + deletion from, or modification of the contents of Covered + Software; or + + (b) any new file in Source Code Form that contains any Covered + Software. + +1.11. "Patent Claims" of a Contributor + means any patent claim(s), including without limitation, method, + process, and apparatus claims, in any patent Licensable by such + Contributor that would be infringed, but for the grant of the + License, by the making, using, selling, offering for sale, having + made, import, or transfer of either its Contributions or its + Contributor Version. + +1.12. "Secondary License" + means either the GNU General Public License, Version 2.0, the GNU + Lesser General Public License, Version 2.1, the GNU Affero General + Public License, Version 3.0, or any later versions of those + licenses. + +1.13. "Source Code Form" + means the form of the work preferred for making modifications. + +1.14. "You" (or "Your") + means an individual or a legal entity exercising rights under this + License. For legal entities, "You" includes any entity that + controls, is controlled by, or is under common control with You. For + purposes of this definition, "control" means (a) the power, direct + or indirect, to cause the direction or management of such entity, + whether by contract or otherwise, or (b) ownership of more than + fifty percent (50%) of the outstanding shares or beneficial + ownership of such entity. + +2. License Grants and Conditions +-------------------------------- + +2.1. Grants + +Each Contributor hereby grants You a world-wide, royalty-free, +non-exclusive license: + +(a) under intellectual property rights (other than patent or trademark) + Licensable by such Contributor to use, reproduce, make available, + modify, display, perform, distribute, and otherwise exploit its + Contributions, either on an unmodified basis, with Modifications, or + as part of a Larger Work; and + +(b) under Patent Claims of such Contributor to make, use, sell, offer + for sale, have made, import, and otherwise transfer either its + Contributions or its Contributor Version. + +2.2. Effective Date + +The licenses granted in Section 2.1 with respect to any Contribution +become effective for each Contribution on the date the Contributor first +distributes such Contribution. + +2.3. Limitations on Grant Scope + +The licenses granted in this Section 2 are the only rights granted under +this License. No additional rights or licenses will be implied from the +distribution or licensing of Covered Software under this License. +Notwithstanding Section 2.1(b) above, no patent license is granted by a +Contributor: + +(a) for any code that a Contributor has removed from Covered Software; + or + +(b) for infringements caused by: (i) Your and any other third party's + modifications of Covered Software, or (ii) the combination of its + Contributions with other software (except as part of its Contributor + Version); or + +(c) under Patent Claims infringed by Covered Software in the absence of + its Contributions. + +This License does not grant any rights in the trademarks, service marks, +or logos of any Contributor (except as may be necessary to comply with +the notice requirements in Section 3.4). + +2.4. Subsequent Licenses + +No Contributor makes additional grants as a result of Your choice to +distribute the Covered Software under a subsequent version of this +License (see Section 10.2) or under the terms of a Secondary License (if +permitted under the terms of Section 3.3). + +2.5. Representation + +Each Contributor represents that the Contributor believes its +Contributions are its original creation(s) or it has sufficient rights +to grant the rights to its Contributions conveyed by this License. + +2.6. Fair Use + +This License is not intended to limit any rights You have under +applicable copyright doctrines of fair use, fair dealing, or other +equivalents. + +2.7. Conditions + +Sections 3.1, 3.2, 3.3, and 3.4 are conditions of the licenses granted +in Section 2.1. + +3. Responsibilities +------------------- + +3.1. Distribution of Source Form + +All distribution of Covered Software in Source Code Form, including any +Modifications that You create or to which You contribute, must be under +the terms of this License. You must inform recipients that the Source +Code Form of the Covered Software is governed by the terms of this +License, and how they can obtain a copy of this License. You may not +attempt to alter or restrict the recipients' rights in the Source Code +Form. + +3.2. Distribution of Executable Form + +If You distribute Covered Software in Executable Form then: + +(a) such Covered Software must also be made available in Source Code + Form, as described in Section 3.1, and You must inform recipients of + the Executable Form how they can obtain a copy of such Source Code + Form by reasonable means in a timely manner, at a charge no more + than the cost of distribution to the recipient; and + +(b) You may distribute such Executable Form under the terms of this + License, or sublicense it under different terms, provided that the + license for the Executable Form does not attempt to limit or alter + the recipients' rights in the Source Code Form under this License. + +3.3. Distribution of a Larger Work + +You may create and distribute a Larger Work under terms of Your choice, +provided that You also comply with the requirements of this License for +the Covered Software. If the Larger Work is a combination of Covered +Software with a work governed by one or more Secondary Licenses, and the +Covered Software is not Incompatible With Secondary Licenses, this +License permits You to additionally distribute such Covered Software +under the terms of such Secondary License(s), so that the recipient of +the Larger Work may, at their option, further distribute the Covered +Software under the terms of either this License or such Secondary +License(s). + +3.4. Notices + +You may not remove or alter the substance of any license notices +(including copyright notices, patent notices, disclaimers of warranty, +or limitations of liability) contained within the Source Code Form of +the Covered Software, except that You may alter any license notices to +the extent required to remedy known factual inaccuracies. + +3.5. Application of Additional Terms + +You may choose to offer, and to charge a fee for, warranty, support, +indemnity or liability obligations to one or more recipients of Covered +Software. However, You may do so only on Your own behalf, and not on +behalf of any Contributor. You must make it absolutely clear that any +such warranty, support, indemnity, or liability obligation is offered by +You alone, and You hereby agree to indemnify every Contributor for any +liability incurred by such Contributor as a result of warranty, support, +indemnity or liability terms You offer. You may include additional +disclaimers of warranty and limitations of liability specific to any +jurisdiction. + +4. Inability to Comply Due to Statute or Regulation +--------------------------------------------------- + +If it is impossible for You to comply with any of the terms of this +License with respect to some or all of the Covered Software due to +statute, judicial order, or regulation then You must: (a) comply with +the terms of this License to the maximum extent possible; and (b) +describe the limitations and the code they affect. Such description must +be placed in a text file included with all distributions of the Covered +Software under this License. Except to the extent prohibited by statute +or regulation, such description must be sufficiently detailed for a +recipient of ordinary skill to be able to understand it. + +5. Termination +-------------- + +5.1. The rights granted under this License will terminate automatically +if You fail to comply with any of its terms. However, if You become +compliant, then the rights granted under this License from a particular +Contributor are reinstated (a) provisionally, unless and until such +Contributor explicitly and finally terminates Your grants, and (b) on an +ongoing basis, if such Contributor fails to notify You of the +non-compliance by some reasonable means prior to 60 days after You have +come back into compliance. Moreover, Your grants from a particular +Contributor are reinstated on an ongoing basis if such Contributor +notifies You of the non-compliance by some reasonable means, this is the +first time You have received notice of non-compliance with this License +from such Contributor, and You become compliant prior to 30 days after +Your receipt of the notice. + +5.2. If You initiate litigation against any entity by asserting a patent +infringement claim (excluding declaratory judgment actions, +counter-claims, and cross-claims) alleging that a Contributor Version +directly or indirectly infringes any patent, then the rights granted to +You by any and all Contributors for the Covered Software under Section +2.1 of this License shall terminate. + +5.3. In the event of termination under Sections 5.1 or 5.2 above, all +end user license agreements (excluding distributors and resellers) which +have been validly granted by You or Your distributors under this License +prior to termination shall survive termination. + +************************************************************************ +* * +* 6. Disclaimer of Warranty * +* ------------------------- * +* * +* Covered Software is provided under this License on an "as is" * +* basis, without warranty of any kind, either expressed, implied, or * +* statutory, including, without limitation, warranties that the * +* Covered Software is free of defects, merchantable, fit for a * +* particular purpose or non-infringing. The entire risk as to the * +* quality and performance of the Covered Software is with You. * +* Should any Covered Software prove defective in any respect, You * +* (not any Contributor) assume the cost of any necessary servicing, * +* repair, or correction. This disclaimer of warranty constitutes an * +* essential part of this License. No use of any Covered Software is * +* authorized under this License except under this disclaimer. * +* * +************************************************************************ + +************************************************************************ +* * +* 7. Limitation of Liability * +* -------------------------- * +* * +* Under no circumstances and under no legal theory, whether tort * +* (including negligence), contract, or otherwise, shall any * +* Contributor, or anyone who distributes Covered Software as * +* permitted above, be liable to You for any direct, indirect, * +* special, incidental, or consequential damages of any character * +* including, without limitation, damages for lost profits, loss of * +* goodwill, work stoppage, computer failure or malfunction, or any * +* and all other commercial damages or losses, even if such party * +* shall have been informed of the possibility of such damages. This * +* limitation of liability shall not apply to liability for death or * +* personal injury resulting from such party's negligence to the * +* extent applicable law prohibits such limitation. Some * +* jurisdictions do not allow the exclusion or limitation of * +* incidental or consequential damages, so this exclusion and * +* limitation may not apply to You. * +* * +************************************************************************ + +8. Litigation +------------- + +Any litigation relating to this License may be brought only in the +courts of a jurisdiction where the defendant maintains its principal +place of business and such litigation shall be governed by laws of that +jurisdiction, without reference to its conflict-of-law provisions. +Nothing in this Section shall prevent a party's ability to bring +cross-claims or counter-claims. + +9. Miscellaneous +---------------- + +This License represents the complete agreement concerning the subject +matter hereof. If any provision of this License is held to be +unenforceable, such provision shall be reformed only to the extent +necessary to make it enforceable. Any law or regulation which provides +that the language of a contract shall be construed against the drafter +shall not be used to construe this License against a Contributor. + +10. Versions of the License +--------------------------- + +10.1. New Versions + +Mozilla Foundation is the license steward. Except as provided in Section +10.3, no one other than the license steward has the right to modify or +publish new versions of this License. Each version will be given a +distinguishing version number. + +10.2. Effect of New Versions + +You may distribute the Covered Software under the terms of the version +of the License under which You originally received the Covered Software, +or under the terms of any subsequent version published by the license +steward. + +10.3. Modified Versions + +If you create software not governed by this License, and you want to +create a new license for such software, you may create and use a +modified version of this License if you rename the license and remove +any references to the name of the license steward (except to note that +such modified license differs from this License). + +10.4. Distributing Source Code Form that is Incompatible With Secondary +Licenses + +If You choose to distribute Source Code Form that is Incompatible With +Secondary Licenses under the terms of this version of the License, the +notice described in Exhibit B of this License must be attached. + +Exhibit A - Source Code Form License Notice +------------------------------------------- + + This Source Code Form is subject to the terms of the Mozilla Public + License, v. 2.0. If a copy of the MPL was not distributed with this + file, You can obtain one at http://mozilla.org/MPL/2.0/. + +If it is not possible or desirable to put the notice in a particular +file, then You may include the notice in a location (such as a LICENSE +file in a relevant directory) where a recipient would be likely to look +for such a notice. + +You may add additional accurate notices of copyright ownership. + +Exhibit B - "Incompatible With Secondary Licenses" Notice +--------------------------------------------------------- + + This Source Code Form is "Incompatible With Secondary Licenses", as + defined by the Mozilla Public License, v. 2.0. diff --git a/src/cmake/OFA/OptimizeForArchitecture.cmake b/src/cmake/OFA/OptimizeForArchitecture.cmake new file mode 100644 index 0000000000..81fa9935bb --- /dev/null +++ b/src/cmake/OFA/OptimizeForArchitecture.cmake @@ -0,0 +1,159 @@ +# Determine the host CPU feature set and determine the best set of compiler +# flags to enable all supported SIMD relevant features. Alternatively, the +# target CPU can be explicitly selected (for generating more generic binaries +# or for targeting a different system). +# Compilers provide e.g. the -march=native flag to achieve a similar result. +# This fails to address the need for building for a different microarchitecture +# than the current host. +# The script tries to deduce all settings from the model and family numbers of +# the CPU instead of reading the CPUID flags from e.g. /proc/cpuinfo. This makes +# the detection more independent from the CPUID code in the kernel (e.g. avx2 is +# not listed on older kernels). +# +# Usage: +# OptimizeForArchitecture() +# +# Optional inputs: +# TARGET_ARCHITECTURE= specifies the target architecture (default=auto) +# TARGET_PROFILER= specifies the target profiler (default=none) +# OFA_VERBOSE= prints verbose output (default=off) +# +# If any of the _broken flags are defined and set to true, +# the OptimizeForArchitecture macro will consequently disable the +# relevant features via compiler flags. +# +# Output: +# ARCHITECTURE_CXX_FLAGS compiler flags optimized for the target architecture +# +# Internal variables: +# USE_ boolean variable holding the status of +# HAVE_ boolean variable holding the compiler;s capability + +#============================================================================= +# Copyright 2010-2016 Matthias Kretz +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: +# +# * Redistributions of source code must retain the above copyright notice, +# this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# * Neither the names of contributing organizations nor the +# names of its contributors may be used to endorse or promote products +# derived from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER AND CONTRIBUTORS ``AS IS'' +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +# ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE FOR +# ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +#============================================================================= + +#============================================================================= +# Extension of the original version by Matthias Moller +# +# Changelog: +# - Update of CPUIDs for latest Intel and AMD processors +# - Added support for PPC64 (Clang, GCC, IBM XLC) +# - Added Support for ARM (Clang, GCC, ARM Clang, Cray, Fujitsu) +# - Restructuring and splitting into multiple files +#============================================================================= + +#============================================================================= +# Autodetection of CPU +#============================================================================= + +macro(OFA_AutodetectHostArchitecture) + + # If building a macOS universal binary, optimizing via compiler flags is unsupported. + if("${CMAKE_OSX_ARCHITECTURES}" STREQUAL "arm64;x86_64" OR "${CMAKE_OSX_ARCHITECTURES}" STREQUAL "x86_64;arm64") + message(STATUS "[OptimizeForArchitecture] CMAKE_OSX_ARCHITECTURES indicates the project will be compiled into a universal binary. Optimization via compiler flags is unsupported and will not be performed. This should usually only be done if a redistributable bundle is required.\n\t--> If building only for the current host machine, consider specifying CMAKE_OSX_ARCHITECTURES via the command line to enable optimizations.") + return() + endif() + + set(TARGET_ARCHITECTURE "none") + set(ARCHITECTURE_CXX_FLAGS CACHE STRING "CPU architecture compiler flags") + + if(APPLE AND NOT "${CMAKE_OSX_ARCHITECTURES}" MATCHES ${CMAKE_SYSTEM_PROCESSOR}) + message(STATUS "[OptimizeForArchitecture] CMAKE_OSX_ARCHITECTURES indicates the project will be cross-compiled from ${CMAKE_HOST_SYSTEM_PROCESSOR} to ${CMAKE_OSX_ARCHITECTURES}.") + if("${CMAKE_OSX_ARCHITECTURES}" MATCHES "x86_64") + set(TARGET_ARCHITECTURE "skylake" CACHE STRING "CPU architecture to optimize for. Using an incorrect setting here can result in crashes of the resulting binary because of invalid instructions used. Setting the value to \"auto\" will try to optimize for the architecture where cmake is called. Setting the value to \"native\" bypasses all checks and uses \"-march=native\" or the compiler equivalent flag. Other supported values are: \"none\", \"generic\", \"core\", \"core2\", \"merom\" (65nm Core2), \"penryn\" (45nm Core2), \"nehalem\", \"westmere\", \"sandybridge\", \"ivybridge\", \"haswell\", \"broadwell\", \"skylake\", \"skylake-xeon\", \"kabylake\", \"cannonlake\", \"cascadelake\", \"cooperlake\", \"icelake\", \"icelake-xeon\", \"tigerlake\", \"alderlake\", \"sapphirerapids\", \"rocketlake\", \"raptorlake\", \"bonnell\", \"silvermont\", \"goldmont\", \"goldmont-plus\", \"tremont\", \"knl\" (Knights Landing), \"knm\" (Knights Mill), \"atom\", \"k8\", \"k8-sse3\", \"barcelona\", \"istanbul\", \"magny-cours\", \"bulldozer\", \"interlagos\", \"piledriver\", \"steamroller\", \"excavator\", \"amd14h\", \"amd16h\", \"zen\", \"zen2\", \"zen3\", \"zen4\"." FORCE) + elseif("${CMAKE_OSX_ARCHITECTURES}" MATCHES "arm64") + set(TARGET_ARCHITECTURE "apple-m1" CACHE STRING "CPU architecture to optimize for. Using an incorrect setting here can result in crashes of the resulting binary because of invalid instructions used. Setting the value to \"auto\" will try to optimize for the architecture where cmake is called. Setting the value to \"native\" bypasses all checks and uses \"-march=native\" or the compiler equivalent flag. Other supported values are: \"none\", \"generic\", \"a64fx\", \"apple-a6\", \"apple-a7\", \"apple-a8\", \"apple-a9\", \"apple-a10\", \"apple-a11\", \"apple-a12\", \"apple-a13\", \"apple-a14\", \"apple-a15\", \"apple-a16\", \"apple-m1\", \"apple-m2\", \"apple-m3\", \"apple-m4\", \"arm1020e\", \"arm1020t\", \"arm1022e\", \"arm1026ej-s\", \"arm10e\", \"arm10tdmi\", \"arm1136j-s\", \"arm1136jf-s\", \"arm1156t2-s\", \"arm1156t2f-s\", \"arm1176jz-s\", \"arm1176jzf-s\", \"arm710t\", \"arm720t\", \"arm740t\", \"arm7tdmi-s\", \"arm7tdmi\", \"arm810\", \"arm8\", \"arm920\", \"arm920t\", \"arm922t\", \"arm926ej-s\", \"arm940t\", \"arm946e-s\", \"arm966e-s\", \"arm968e-s\", \"arm9\", \"arm9e\", \"arm9tdmi\", \"brahma-b15\", \"brahma-b53\", \"carmel\", \"cortex-a7\", \"cortex-a8\", \"cortex-a9\", \"cortex-a12\", \"cortex-a15.cortex-a7\", \"cortex-a15\", \"cortex-a17.cortex-a7\", \"cortex-a17\", \"cortex-a32\", \"cortex-a34\", \"cortex-a35\", \"cortex-a53\", \"cortex-a55\", \"cortex-a57.cortext-a53\", \"cortex-a57\", \"cortex-a5\", \"cortex-a72.cortext-a53\", \"cortex-a72\", \"cortex-a73.cortext-a35\", \"cortex-a73.cortext-a53\", \"cortex-a73\", \"cortex-a75.cortext-a55\", \"cortex-a75\", \"cortex-a76.cortext-a55\", \"cortex-a76\", \"cortex-a76ae\", \"cortex-a77\", \"cortex-a78\", \"cortex-a78ae\", \"cortex-a76c\", \"cortex-a510\", \"cortex-a710\", \"cortex-m0\", \"cortex-m0plus\", \"cortex-m1\", \"cortex-m23\", \"cortex-m33\", \"cortex-m35p\", \"cortex-m3\", \"cortex-m4\", \"cortex-m55\", \"cortex-m7\", \"cortex-r4\", \"cortex-r4f\", \"cortex-r52\", \"cortex-r5\", \"cortex-r7\", \"cortex-r8\", \"cortex-x1\", \"cortex-x2\", \"denver2\", \"denver\", \"exynos-m1\", \"fa526\", \"fa606te\", \"fa626\", \"fa626te\", \"fa726te\", \"falkor\", \"fmp626\", \"generic-armv7-a\", \"i80200\", \"i80321-400-b0\", \"i80321-400\", \"i80321-600-b0\", \"i80321-600\", \"ipx1200\", \"ipx425-266\", \"ipx425-400\", \"ipx425-533\", \"iwmmxt2\", \"iwmmxt\", \"krait\", \"kryo2\", \"kryo\", \"marvell-f\", \"marvell-pj4\", \"mpcore\", \"neoverse-e1\", \"neoverse-n1\", \"neoverse-n2\", \"neoverse-v1\", \"pxa210a\", \"pxa210b\", \"pxa210c\", \"pxa250a\", \"pxa250b\", \"pxa250c\", \"pxa27x\", \"pxa30x\", \"pxa31x\", \"pxa32x\", \"pxa930\", \"sa1110\", \"saphira\", \"scorpion\", \"strongarm1100\", \"strongarm110\", \"strongarm\", \"thunderx2\", \"thunderx2t99\", \"thunderx\", \"thunderxt81\", \"thunderxt83\", \"thunderxt88\", \"tsv110\", \"xgene1\", \"xscale\"." FORCE) + message(STATUS "[OptimizeForArchitecture] Set baseline architecture for cross-compiling on Darwin to ${TARGET_ARCHITECTURE}. You can overwrite this value by setting TARGET_ARCHITECTURE on the CLI. To disable optimization set TARGET_ARCHITECTURE=none (note that this will significantly slow the down the resulting binaries and should only be done if targeting a redistributable build).") + endif() + elseif("${CMAKE_SYSTEM_PROCESSOR}" MATCHES "i686.*|i386.*|x86.*|amd64.*|x86_64.*|AMD64.*") + include(OFA/AutodetectX86) + OFA_AutodetectX86() + elseif("${CMAKE_SYSTEM_PROCESSOR}" MATCHES "^(arm.*|ARM.*|aarch64.*|AARCH64.*)") + include(OFA/AutodetectArm) + OFA_AutodetectArm() + elseif("${CMAKE_SYSTEM_PROCESSOR}" MATCHES "^(powerpc|ppc)64.*") + include(OFA/AutodetectPpc) + OFA_AutodetectPpc() + else() + message(WARNING "[OptimizeForArchitecture] The CMAKE_SYSTEM_PROCESSOR '${CMAKE_SYSTEM_PROCESSOR}' is not supported by OptimizeForArchitecture") + endif() +endmacro(OFA_AutodetectHostArchitecture) + +#============================================================================= +# Handling of CPU options +#============================================================================= + +macro(OptimizeForArchitecture) + if("${CMAKE_SYSTEM_PROCESSOR}" MATCHES "i686.*|i386.*|x86.*|amd64.*|x86_64.*|AMD64.*") + set(TARGET_ARCHITECTURE "auto" CACHE STRING "CPU architecture to optimize for. Using an incorrect setting here can result in crashes of the resulting binary because of invalid instructions used. Setting the value to \"auto\" will try to optimize for the architecture where cmake is called. Setting the value to \"native\" bypasses all checks and uses \"-march=native\" or the compiler equivalent flag. Other supported values are: \"none\", \"generic\", \"core\", \"core2\", \"merom\" (65nm Core2), \"penryn\" (45nm Core2), \"nehalem\", \"westmere\", \"sandybridge\", \"ivybridge\", \"haswell\", \"broadwell\", \"skylake\", \"skylake-xeon\", \"kabylake\", \"cannonlake\", \"cascadelake\", \"cooperlake\", \"icelake\", \"icelake-xeon\", \"tigerlake\", \"alderlake\", \"sapphirerapids\", \"rocketlake\", \"raptorlake\", \"bonnell\", \"silvermont\", \"goldmont\", \"goldmont-plus\", \"tremont\", \"knl\" (Knights Landing), \"knm\" (Knights Mill), \"atom\", \"k8\", \"k8-sse3\", \"barcelona\", \"istanbul\", \"magny-cours\", \"bulldozer\", \"interlagos\", \"piledriver\", \"steamroller\", \"excavator\", \"amd14h\", \"amd16h\", \"zen\", \"zen2\", \"zen3\", \"zen4\"." ) + elseif("${CMAKE_SYSTEM_PROCESSOR}" MATCHES "^(arm.*|ARM.*|aarch64.*|AARCH64.*)") + set(TARGET_ARCHITECTURE "auto" CACHE STRING "CPU architecture to optimize for. Using an incorrect setting here can result in crashes of the resulting binary because of invalid instructions used. Setting the value to \"auto\" will try to optimize for the architecture where cmake is called. Setting the value to \"native\" bypasses all checks and uses \"-march=native\" or the compiler equivalent flag. Other supported values are: \"none\", \"generic\", \"a64fx\", \"apple-a6\", \"apple-a7\", \"apple-a8\", \"apple-a9\", \"apple-a10\", \"apple-a11\", \"apple-a12\", \"apple-a13\", \"apple-a14\", \"apple-a15\", \"apple-a16\", \"apple-m1\", \"apple-m2\", \"apple-m3\", \"apple-m4\", \"arm1020e\", \"arm1020t\", \"arm1022e\", \"arm1026ej-s\", \"arm10e\", \"arm10tdmi\", \"arm1136j-s\", \"arm1136jf-s\", \"arm1156t2-s\", \"arm1156t2f-s\", \"arm1176jz-s\", \"arm1176jzf-s\", \"arm710t\", \"arm720t\", \"arm740t\", \"arm7tdmi-s\", \"arm7tdmi\", \"arm810\", \"arm8\", \"arm920\", \"arm920t\", \"arm922t\", \"arm926ej-s\", \"arm940t\", \"arm946e-s\", \"arm966e-s\", \"arm968e-s\", \"arm9\", \"arm9e\", \"arm9tdmi\", \"brahma-b15\", \"brahma-b53\", \"carmel\", \"cortex-a7\", \"cortex-a8\", \"cortex-a9\", \"cortex-a12\", \"cortex-a15.cortex-a7\", \"cortex-a15\", \"cortex-a17.cortex-a7\", \"cortex-a17\", \"cortex-a32\", \"cortex-a34\", \"cortex-a35\", \"cortex-a53\", \"cortex-a55\", \"cortex-a57.cortext-a53\", \"cortex-a57\", \"cortex-a5\", \"cortex-a72.cortext-a53\", \"cortex-a72\", \"cortex-a73.cortext-a35\", \"cortex-a73.cortext-a53\", \"cortex-a73\", \"cortex-a75.cortext-a55\", \"cortex-a75\", \"cortex-a76.cortext-a55\", \"cortex-a76\", \"cortex-a76ae\", \"cortex-a77\", \"cortex-a78\", \"cortex-a78ae\", \"cortex-a76c\", \"cortex-a510\", \"cortex-a710\", \"cortex-m0\", \"cortex-m0plus\", \"cortex-m1\", \"cortex-m23\", \"cortex-m33\", \"cortex-m35p\", \"cortex-m3\", \"cortex-m4\", \"cortex-m55\", \"cortex-m7\", \"cortex-r4\", \"cortex-r4f\", \"cortex-r52\", \"cortex-r5\", \"cortex-r7\", \"cortex-r8\", \"cortex-x1\", \"cortex-x2\", \"denver2\", \"denver\", \"exynos-m1\", \"fa526\", \"fa606te\", \"fa626\", \"fa626te\", \"fa726te\", \"falkor\", \"fmp626\", \"generic-armv7-a\", \"i80200\", \"i80321-400-b0\", \"i80321-400\", \"i80321-600-b0\", \"i80321-600\", \"ipx1200\", \"ipx425-266\", \"ipx425-400\", \"ipx425-533\", \"iwmmxt2\", \"iwmmxt\", \"krait\", \"kryo2\", \"kryo\", \"marvell-f\", \"marvell-pj4\", \"mpcore\", \"neoverse-e1\", \"neoverse-n1\", \"neoverse-n2\", \"neoverse-v1\", \"pxa210a\", \"pxa210b\", \"pxa210c\", \"pxa250a\", \"pxa250b\", \"pxa250c\", \"pxa27x\", \"pxa30x\", \"pxa31x\", \"pxa32x\", \"pxa930\", \"sa1110\", \"saphira\", \"scorpion\", \"strongarm1100\", \"strongarm110\", \"strongarm\", \"thunderx2\", \"thunderx2t99\", \"thunderx\", \"thunderxt81\", \"thunderxt83\", \"thunderxt88\", \"tsv110\", \"xgene1\", \"xscale\".") + elseif("${CMAKE_SYSTEM_PROCESSOR}" MATCHES "^(powerpc|ppc)64.*") + set(TARGET_ARCHITECTURE "auto" CACHE STRING "CPU architecture to optimize for. Using an incorrect setting here can result in crashes of the resulting binary because of invalid instructions used. Setting the value to \"auto\" will try to optimize for the architecture where cmake is called. Other supported values are: \"none\", \"generic\", \"power8\", \"power9\", \"power10\".") + else() + message(WARNING "[OptimizeForArchitecture] The CMAKE_SYSTEM_PROCESSOR '${CMAKE_SYSTEM_PROCESSOR}' is not supported by OptimizeForArchitecture") + endif() + + if(NOT OFA_VERBOSE) + set(CMAKE_REQUIRED_QUIET true) + endif() + + if(NOT _last_target_arch STREQUAL "${TARGET_ARCHITECTURE}") + string(TOLOWER "${TARGET_ARCHITECTURE}" TARGET_ARCHITECTURE) + message(STATUS "[OptimizeForArchitecture] Target architecture changed from \"${_last_target_arch}\" to \"${TARGET_ARCHITECTURE}\"") + + if(TARGET_ARCHITECTURE STREQUAL "auto") + OFA_AutodetectHostArchitecture() + message(STATUS "[OptimizeForArchitecture] Detected Host CPU: ${TARGET_ARCHITECTURE}") + endif() + + set(_last_target_arch "${TARGET_ARCHITECTURE}" CACHE STRING "" FORCE) + mark_as_advanced(_last_target_arch) + + message(STATUS "[OptimizeForArchitecture] Checking Host CPU features. This can take some time ...") + if("${CMAKE_SYSTEM_PROCESSOR}" MATCHES "i686.*|i386.*|x86.*|amd64.*|x86_64.*|AMD64.*" OR "${CMAKE_OSX_ARCHITECTURES}" MATCHES "x86_64") + include(OFA/HandleX86Options) + OFA_HandleX86Options() + elseif("${CMAKE_SYSTEM_PROCESSOR}" MATCHES "^(arm.*|ARM.*|aarch64.*|AARCH64.*)" OR "${CMAKE_OSX_ARCHITECTURES}" MATCHES "arm64") + include(OFA/HandleArmOptions) + OFA_HandleArmOptions() + elseif("${CMAKE_SYSTEM_PROCESSOR}" MATCHES "^(powerpc|ppc)64.*") + include(OFA/HandlePpcOptions) + OFA_HandlePpcOptions() + endif() + + if(ARCHITECTURE_CXX_FLAGS) + if(APPLE) + message(STATUS "[OptimizeForArchitecture] (!) ${CMAKE_OSX_ARCHITECTURES} architecture optimization flags applied: ${ARCHITECTURE_CXX_FLAGS}.\nIn case of runtime errors re-compile with TARGET_ARCHITECTURE=none or TARGET_ARCHITECTURE=generic.") + else() + message(STATUS "[OptimizeForArchitecture] (!) ${CMAKE_SYSTEM_PROCESSOR} architecture optimization flags applied: ${ARCHITECTURE_CXX_FLAGS}.\nIn case of runtime errors re-compile with TARGET_ARCHITECTURE=none or TARGET_ARCHITECTURE=generic.") + endif() + endif() + endif() + +endmacro(OptimizeForArchitecture) diff --git a/src/cmake/OFA/SupportedArchitectures.md b/src/cmake/OFA/SupportedArchitectures.md new file mode 100644 index 0000000000..dad7c87003 --- /dev/null +++ b/src/cmake/OFA/SupportedArchitectures.md @@ -0,0 +1,138 @@ +# Supported Architectures + +This document lists all supported target architectures that can be specified by setting `TARGET_ARCHITECTURE` on the CMake CLI. Only use this explicitly if you know what you are doing! + +## x86/x86_64 + +| Vendor | Codename / CPU Microarchitecture | Family | Name | CMake Flag | +|:--|:--|:--|:--|:--| +| Intel | Core | x86 / x86_64 | core | `TARGET_ARCHITECTURE=core` | +| Intel | Core 2 | x86 / x86_64 | core2 | `TARGET_ARCHITECTURE=core2` | +| Intel | Merom (65nm Core2) | x86 / x86_64 | merom | `TARGET_ARCHITECTURE=merom` | +| Intel | Penryn (45nm Core2) | x86 / x86_64 | penryn | `TARGET_ARCHITECTURE=penryn` | +| Intel | Nehalem | x86 / x86_64 | nehalem | `TARGET_ARCHITECTURE=nehalem` | +| Intel | Westmere | x86 / x86_64 | westmere | `TARGET_ARCHITECTURE=westmere` | +| Intel | Sandy Bridge | x86 / x86_64 | sandybridge | `TARGET_ARCHITECTURE=sandybridge` | +| Intel | Ivy Bridge | x86 / x86_64 | ivybridge | `TARGET_ARCHITECTURE=ivybridge` | +| Intel | Haswell | x86 / x86_64 | haswell | `TARGET_ARCHITECTURE=haswell` | +| Intel | Broadwell | x86 / x86_64 | broadwell | `TARGET_ARCHITECTURE=broadwell` | +| Intel | Skylake | x86 / x86_64 | skylake | `TARGET_ARCHITECTURE=skylake` | +| Intel | Skylake-X (Xeon) | x86 / x86_64 | skylake-xeon | `TARGET_ARCHITECTURE=skylake-xeon` | +| Intel | Kaby Lake | x86 / x86_64 | kabylake | `TARGET_ARCHITECTURE=kabylake` | +| Intel | Cannon Lake | x86 / x86_64 | cannonlake | `TARGET_ARCHITECTURE=cannonlake` | +| Intel | Cascade Lake | x86 / x86_64 | cascadelake | `TARGET_ARCHITECTURE=cascadelake` | +| Intel | Cooper Lake | x86 / x86_64 | cooperlake | `TARGET_ARCHITECTURE=cooperlake` | +| Intel | Ice Lake | x86 / x86_64 | icelake | `TARGET_ARCHITECTURE=icelake` | +| Intel | Ice Lake Xeon | x86 / x86_64 | icelake-xeon | `TARGET_ARCHITECTURE=icelake-xeon` | +| Intel | Tiger Lake | x86 / x86_64 | tigerlake | `TARGET_ARCHITECTURE=tigerlake` | +| Intel | Alder Lake | x86 / x86_64 | alderlake | `TARGET_ARCHITECTURE=alderlake` | +| Intel | Sapphire Rapids | x86 / x86_64 | sapphirerapids | `TARGET_ARCHITECTURE=sapphirerapids` | +| Intel | Rocket Lake | x86 / x86_64 | rocketlake | `TARGET_ARCHITECTURE=rocketlake` | +| Intel | Raptor Lake | x86 / x86_64 | raptorlake | `TARGET_ARCHITECTURE=raptorlake` | +| Intel | Bonnell | x86 / x86_64 | bonnell | `TARGET_ARCHITECTURE=bonnell` | +| Intel | Silvermont | x86 / x86_64 | silvermont | `TARGET_ARCHITECTURE=silvermont` | +| Intel | Goldmont | x86 / x86_64 | goldmont | `TARGET_ARCHITECTURE=goldmont` | +| Intel | Goldmont Plus | x86 / x86_64 | goldmont-plus | `TARGET_ARCHITECTURE=goldmont-plus` | +| Intel | Tremont | x86 / x86_64 | tremont | `TARGET_ARCHITECTURE=tremont` | +| Intel | Knights Landing | x86 / x86_64 | knl | `TARGET_ARCHITECTURE=knl` | +| Intel | Knights Mill | x86 / x86_64 | knm | `TARGET_ARCHITECTURE=knm` | +| Intel | Atom (generic) | x86 / x86_64 | atom | `TARGET_ARCHITECTURE=atom` | +| AMD | K8 | x86 / x86_64 | k8 | `TARGET_ARCHITECTURE=k8` | +| AMD | K8 SSE3 | x86 / x86_64 | k8-sse3 | `TARGET_ARCHITECTURE=k8-sse3` | +| AMD | Barcelona | x86 / x86_64 | barcelona | `TARGET_ARCHITECTURE=barcelona` | +| AMD | Istanbul | x86 / x86_64 | istanbul | `TARGET_ARCHITECTURE=istanbul` | +| AMD | Magny-Cours | x86 / x86_64 | magny-cours | `TARGET_ARCHITECTURE=magny-cours` | +| AMD | Bulldozer | x86 / x86_64 | bulldozer | `TARGET_ARCHITECTURE=bulldozer` | +| AMD | Interlagos | x86 / x86_64 | interlagos | `TARGET_ARCHITECTURE=interlagos` | +| AMD | Piledriver | x86 / x86_64 | piledriver | `TARGET_ARCHITECTURE=piledriver` | +| AMD | Steamroller | x86 / x86_64 | steamroller | `TARGET_ARCHITECTURE=steamroller` | +| AMD | Excavator | x86 / x86_64 | excavator | `TARGET_ARCHITECTURE=excavator` | +| AMD | Family 14h | x86 / x86_64 | amd14h | `TARGET_ARCHITECTURE=amd14h` | +| AMD | Family 16h | x86 / x86_64 | amd16h | `TARGET_ARCHITECTURE=amd16h` | +| AMD | Zen | x86 / x86_64 | zen | `TARGET_ARCHITECTURE=zen` | +| AMD | Zen 2 | x86 / x86_64 | zen2 | `TARGET_ARCHITECTURE=zen2` | +| AMD | Zen 3 | x86 / x86_64 | zen3 | `TARGET_ARCHITECTURE=zen3` | +| AMD | Zen 4 | x86 / x86_64 | zen4 | `TARGET_ARCHITECTURE=zen4` | +| Generic | Generic | x86 / x86_64 | generic | `TARGET_ARCHITECTURE=generic` | +| Generic | None (no optimization) | x86 / x86_64 | none | `TARGET_ARCHITECTURE=none` | +| Generic | Auto-detect host CPU | x86 / x86_64 | auto | `TARGET_ARCHITECTURE=auto` | +| Generic | Compiler “native” | x86 / x86_64 | native | `TARGET_ARCHITECTURE=native` | + +## ARM/ARM64 + +| Vendor | Codename / CPU Microarchitecture | Family | Name | CMake Flag | +|:--|:--|:--|:--|:--| +| Fujitsu | A64FX | arm64 | a64fx | `TARGET_ARCHITECTURE=a64fx` | +| Apple | A6 | arm64 | apple-a6 | `TARGET_ARCHITECTURE=apple-a6` | +| Apple | A7 | arm64 | apple-a7 | `TARGET_ARCHITECTURE=apple-a7` | +| Apple | A8 | arm64 | apple-a8 | `TARGET_ARCHITECTURE=apple-a8` | +| Apple | A9 | arm64 | apple-a9 | `TARGET_ARCHITECTURE=apple-a9` | +| Apple | A10 | arm64 | apple-a10 | `TARGET_ARCHITECTURE=apple-a10` | +| Apple | A11 | arm64 | apple-a11 | `TARGET_ARCHITECTURE=apple-a11` | +| Apple | A12 | arm64 | apple-a12 | `TARGET_ARCHITECTURE=apple-a12` | +| Apple | A13 | arm64 | apple-a13 | `TARGET_ARCHITECTURE=apple-a13` | +| Apple | A14 | arm64 | apple-a14 | `TARGET_ARCHITECTURE=apple-a14` | +| Apple | A15 | arm64 | apple-a15 | `TARGET_ARCHITECTURE=apple-a15` | +| Apple | A16 | arm64 | apple-a16 | `TARGET_ARCHITECTURE=apple-a16` | +| Apple | M1 | arm64 | apple-m1 | `TARGET_ARCHITECTURE=apple-m1` | +| Apple | M2 | arm64 | apple-m2 | `TARGET_ARCHITECTURE=apple-m2` | +| Apple | M3 | arm64 | apple-m3 | `TARGET_ARCHITECTURE=apple-m3` | +| Apple | M4 | arm64 | apple-m4 | `TARGET_ARCHITECTURE=apple-m4` | +| ARM | Cortex-A5 | arm / arm64 | cortex-a5 | `TARGET_ARCHITECTURE=cortex-a5` | +| ARM | Cortex-A7 | arm / arm64 | cortex-a7 | `TARGET_ARCHITECTURE=cortex-a7` | +| ARM | Cortex-A8 | arm / arm64 | cortex-a8 | `TARGET_ARCHITECTURE=cortex-a8` | +| ARM | Cortex-A9 | arm / arm64 | cortex-a9 | `TARGET_ARCHITECTURE=cortex-a9` | +| ARM | Cortex-A15 | arm / arm64 | cortex-a15 | `TARGET_ARCHITECTURE=cortex-a15` | +| ARM | Cortex-A17 | arm / arm64 | cortex-a17 | `TARGET_ARCHITECTURE=cortex-a17` | +| ARM | Cortex-A32 | arm / arm64 | cortex-a32 | `TARGET_ARCHITECTURE=cortex-a32` | +| ARM | Cortex-A35 | arm / arm64 | cortex-a35 | `TARGET_ARCHITECTURE=cortex-a35` | +| ARM | Cortex-A53 | arm / arm64 | cortex-a53 | `TARGET_ARCHITECTURE=cortex-a53` | +| ARM | Cortex-A55 | arm / arm64 | cortex-a55 | `TARGET_ARCHITECTURE=cortex-a55` | +| ARM | Cortex-A57 | arm / arm64 | cortex-a57 | `TARGET_ARCHITECTURE=cortex-a57` | +| ARM | Cortex-A72 | arm / arm64 | cortex-a72 | `TARGET_ARCHITECTURE=cortex-a72` | +| ARM | Cortex-A73 | arm / arm64 | cortex-a73 | `TARGET_ARCHITECTURE=cortex-a73` | +| ARM | Cortex-A75 | arm / arm64 | cortex-a75 | `TARGET_ARCHITECTURE=cortex-a75` | +| ARM | Cortex-A76 | arm / arm64 | cortex-a76 | `TARGET_ARCHITECTURE=cortex-a76` | +| ARM | Cortex-A76AE | arm / arm64 | cortex-a76ae | `TARGET_ARCHITECTURE=cortex-a76ae` | +| ARM | Cortex-A77 | arm / arm64 | cortex-a77 | `TARGET_ARCHITECTURE=cortex-a77` | +| ARM | Cortex-A78 | arm / arm64 | cortex-a78 | `TARGET_ARCHITECTURE=cortex-a78` | +| ARM | Cortex-A78AE | arm / arm64 | cortex-a78ae | `TARGET_ARCHITECTURE=cortex-a78ae` | +| ARM | Cortex-A510 | arm / arm64 | cortex-a510 | `TARGET_ARCHITECTURE=cortex-a510` | +| ARM | Cortex-A710 | arm / arm64 | cortex-a710 | `TARGET_ARCHITECTURE=cortex-a710` | +| ARM | Cortex-X1 | arm / arm64 | cortex-x1 | `TARGET_ARCHITECTURE=cortex-x1` | +| ARM | Cortex-X2 | arm / arm64 | cortex-x2 | `TARGET_ARCHITECTURE=cortex-x2` | +| ARM | Neoverse E1 | arm64 | neoverse-e1 | `TARGET_ARCHITECTURE=neoverse-e1` | +| ARM | Neoverse N1 | arm64 | neoverse-n1 | `TARGET_ARCHITECTURE=neoverse-n1` | +| ARM | Neoverse N2 | arm64 | neoverse-n2 | `TARGET_ARCHITECTURE=neoverse-n2` | +| ARM | Neoverse V1 | arm64 | neoverse-v1 | `TARGET_ARCHITECTURE=neoverse-v1` | +| Qualcomm | Krait | arm / arm64 | krait | `TARGET_ARCHITECTURE=krait` | +| Qualcomm | Kryo | arm64 | kryo | `TARGET_ARCHITECTURE=kryo` | +| Qualcomm | Kryo 2 | arm64 | kryo2 | `TARGET_ARCHITECTURE=kryo2` | +| Cavium | ThunderX | arm64 | thunderx | `TARGET_ARCHITECTURE=thunderx` | +| Cavium | ThunderX2 | arm64 | thunderx2 | `TARGET_ARCHITECTURE=thunderx2` | +| Cavium | ThunderX2T99 | arm64 | thunderx2t99 | `TARGET_ARCHITECTURE=thunderx2t99` | +| Cavium | ThunderXT81 | arm64 | thunderxt81 | `TARGET_ARCHITECTURE=thunderxt81` | +| Cavium | ThunderXT83 | arm64 | thunderxt83 | `TARGET_ARCHITECTURE=thunderxt83` | +| Cavium | ThunderXT88 | arm64 | thunderxt88 | `TARGET_ARCHITECTURE=thunderxt88` | +| Marvell | PJ4 | arm / arm64 | marvell-pj4 | `TARGET_ARCHITECTURE=marvell-pj4` | +| Marvell | F | arm / arm64 | marvell-f | `TARGET_ARCHITECTURE=marvell-f` | +| Marvell | XScale | arm / arm64 | xscale | `TARGET_ARCHITECTURE=xscale` | +| Broadcom | Brahma B15 | arm / arm64 | brahma-b15 | `TARGET_ARCHITECTURE=brahma-b15` | +| Broadcom | Brahma B53 | arm / arm64 | brahma-b53 | `TARGET_ARCHITECTURE=brahma-b53` | +| Applied Micro | X-Gene 1 | arm64 | xgene1 | `TARGET_ARCHITECTURE=xgene1` | +| Generic | Generic | arm / arm64 | generic | `TARGET_ARCHITECTURE=generic` | +| Generic | None (no optimization) | arm / arm64 | none | `TARGET_ARCHITECTURE=none` | +| Generic | Auto-detect host CPU | arm / arm64 | auto | `TARGET_ARCHITECTURE=auto` | +| Generic | Compiler “native” | arm / arm64 | native | `TARGET_ARCHITECTURE=native` | + +## PPC + +| Vendor | Codename / CPU Microarchitecture | Family | Name | CMake Flag | +|:--|:--|:--|:--|:--| +| IBM | POWER8 | PPC | power8 | `TARGET_ARCHITECTURE=power8` | +| IBM | POWER9 | PPC | power9 | `TARGET_ARCHITECTURE=power9` | +| IBM | POWER10 | PPC | power10 | `TARGET_ARCHITECTURE=power10` | +| Generic | Generic | PPC | generic | `TARGET_ARCHITECTURE=generic` | +| Generic | None (no optimization) | PPC | none | `TARGET_ARCHITECTURE=none` | +| Generic | Auto-detect host CPU | PPC | auto | `TARGET_ARCHITECTURE=auto` | +| Generic | Compiler “native” | PPC | native | `TARGET_ARCHITECTURE=native` | diff --git a/src/cmake/OFA/cpuinfo_arm.c b/src/cmake/OFA/cpuinfo_arm.c new file mode 100644 index 0000000000..9988fe60c8 --- /dev/null +++ b/src/cmake/OFA/cpuinfo_arm.c @@ -0,0 +1,47 @@ +#include +#include +#include +#include +#include + +sigjmp_buf go_here; + +void sigill_handler(int signum) +{ + (void)signum; + siglongjmp(go_here, 1); +} + +int main(void) +{ + struct sigaction sa; + sigemptyset(&sa.sa_mask); + sa.sa_flags = 0; + sa.sa_handler = sigill_handler; + if (sigaction(SIGILL, &sa, NULL) < 0) + { + perror("sigaction"); + exit(2); + } + + do + { + if (sigsetjmp(go_here, 1)) + { + exit(-1); + } + else + { + unsigned long ret; + asm("mrs %0, MIDR_EL1" : "=r"(ret)); + + printf("%s 0x%02lX\n", "[OptimizeForArchitecture] CPU implementer :", (ret >> 24) & 0xFF); + printf("%s 0x%01lX\n", "[OptimizeForArchitecture] CPU architecture:", (ret >> 16) & 0xF); + printf("%s 0x%01lX\n", "[OptimizeForArchitecture] CPU variant :", (ret >> 20) & 0xF); + printf("%s 0x%03lX\n", "[OptimizeForArchitecture] CPU part :", (ret >> 4) & 0xFFF); + printf("%s %ld\n", "[OptimizeForArchitecture] CPU revision :", ret & 0xF); + } + } while (0); + + return 0; +} diff --git a/src/cmake/OFA/cpuinfo_x86.cxx b/src/cmake/OFA/cpuinfo_x86.cxx new file mode 100644 index 0000000000..190a2cc11e --- /dev/null +++ b/src/cmake/OFA/cpuinfo_x86.cxx @@ -0,0 +1,732 @@ +#include +#include +#include + +#define print_features(reg, features, n) \ + for (int i = 0; i < n; ++i) \ + printf("%s", (reg >> i & 0x1) && !features[i].empty() ? (features[i] + " ").c_str() : ""); + +// Get the vendor ID +void getVendorID() +{ + int a[3]; + for (int i = 0; i < 3; ++i) + a[i] = 0; + + // EAX=0x00000000: Vendor ID + __asm__("mov $0x00000000, %eax\n\t"); + __asm__("cpuid\n\t"); + __asm__("mov %%ebx, %0\n\t" : "=r"(a[0])); + __asm__("mov %%edx, %0\n\t" : "=r"(a[1])); + __asm__("mov %%ecx, %0\n\t" : "=r"(a[2])); + + char vendorID[13]; + vendorID[12] = 0; + memcpy(&vendorID[0], &a[0], 4); + memcpy(&vendorID[4], &a[1], 4); + memcpy(&vendorID[8], &a[2], 4); + + printf("[OptimizeForArchitecture] vendor_id : %s\n", vendorID); +} + +// Get processor information +void getProcInfo() +{ + int eax = 0; + + // EAX=0x00000001: Processor Info + __asm__("mov $0x00000001 , %eax\n\t"); + __asm__("cpuid\n\t"); + __asm__("mov %%eax, %0\n\t" : "=r"(eax)); // gives model and family + + int stepping = eax >> 0 & 0xF; + int model = eax >> 4 & 0xF; + int family = eax >> 8 & 0xF; + if (family == 6 || family == 15) + model += (eax >> 16 & 0xF) << 4; + + printf("[OptimizeForArchitecture] cpu family : %d\n", family); + printf("[OptimizeForArchitecture] model : %d\n", model); + printf("[OptimizeForArchitecture] stepping : %d\n", stepping); +} + +// Get processor features +void getFeatures() +{ + int eax_max, ecx_max, eax, ebx, ecx, edx; + + // Note: If the comment begins with a quoted string, that string is + // used in /proc/cpuinfo instead of the macro name. If the string is + // "", this feature bit is not displayed in /proc/cpuinfo at all. + + // CPU flags + printf("flags : "); + + // EAX=0x00000000: largest value that EAX can be set to before calling CPUID + __asm__("mov $0x00000000, %eax\n\t"); + __asm__("cpuid\n\t"); + __asm__("mov %%eax, %0\n\t" : "=r"(eax_max)); + + if (eax_max >= 0x00000001) + { + // EAX=0x00000001: Processor Info and Feature Bits + __asm__("mov $0x00000001 , %eax\n\t"); + __asm__("cpuid\n\t"); + __asm__("mov %%ecx, %0\n\t" : "=r"(ecx)); // feature flags + __asm__("mov %%edx, %0\n\t" : "=r"(edx)); // feature flags + + // Intel-defined CPU features, CPUID level 0x00000001 (EDX), word 0 + { + std::string features[] = { + "fpu", /* Onboard FPU */ + "vme", /* Virtual Mode Extensions */ + "de", /* Debugging Extensions */ + "pse", /* Page Size Extensions */ + "tsc", /* Time Stamp Counter */ + "msr", /* Model-Specific Registers */ + "pae", /* Physical Address Extensions */ + "mce", /* Machine Check Exception */ + "cx8", /* CMPXCHG8 instruction */ + "apic", /* Onboard APIC */ + "", /* Reserved */ + "sep", /* SYSENTER/SYSEXIT */ + "mtrr", /* Memory Type Range Registers */ + "pge", /* Page Global Enable */ + "mca", /* Machine Check Architecture */ + "cmov", /* CMOV instructions (plus FCMOVcc, FCOMI with FPU) */ + "pat", /* Page Attribute Table */ + "pse36", /* 36-bit PSEs */ + "pn", /* Processor serial number */ + "clflush", /* CLFLUSH instruction */ + "", /* Reserved */ + "dts", /* "dts" Debug Store */ + "acpi", /* ACPI via MSR */ + "mmx", /* Multimedia Extensions */ + "fxsr", /* FXSAVE/FXRSTOR, CR4.OSFXSR */ + "sse", /* "sse" */ + "sse2", /* "sse2" */ + "ss", /* "ss" CPU self snoop */ + "ht", /* Hyper-Threading */ + "tm", /* "tm" Automatic clock control */ + "ia64", /* IA-64 processor */ + "pbe" /* Pending Break Enable */ + }; + print_features(edx, features, 32); + } + + // Intel-defined CPU features, CPUID level 0x00000001 (ECX), word 4 + { + std::string features[] = { + "sse3", /* "pni" SSE-3 */ + "pclmulqdq", /* PCLMULQDQ instruction */ + "dtes64", /* 64-bit Debug Store */ + "monitor", /* "monitor" MONITOR/MWAIT support */ + "ds_cpl", /* "ds_cpl" CPL-qualified (filtered) Debug Store */ + "vmx", /* Hardware virtualization */ + "smx", /* Safer Mode eXtensions */ + "est", /* Enhanced SpeedStep */ + "tm2", /* Thermal Monitor 2 */ + "ssse3", /* Supplemental SSE-3 */ + "cid", /* Context ID */ + "sdbg", /* Silicon Debug */ + "fma", /* Fused multiply-add */ + "cx16", /* CMPXCHG16B instruction */ + "xtpr", /* Send Task Priority Messages */ + "pdcm", /* Perf/Debug Capabilities MSR */ + "", /* Reserved */ + "pcid", /* Process Context Identifiers */ + "dca", /* Direct Cache Access */ + "sse4_1", /* "sse4_1" SSE-4.1 */ + "sse4_2", /* "sse4_2" SSE-4.2 */ + "x2apic", /* X2APIC */ + "movbe", /* MOVBE instruction */ + "popcnt", /* POPCNT instruction */ + "tsc_deadline_timer", /* TSC deadline timer */ + "aes", /* AES instructions */ + "xsave", /* XSAVE/XRSTOR/XSETBV/XGETBV instructions */ + "", /* "" XSAVE instruction enabled in the OS */ + "avx", /* Advanced Vector Extensions */ + "f16c", /* 16-bit FP conversions */ + "rdrand", /* RDRAND instruction */ + "hypervisor" /* Running on a hypervisor */ + }; + print_features(ecx, features, 32); + } + } // EAX=0x00000001 + + // if (eax_max >=0x00000006) { + // // EAX=0x00000006: Extended Features + // __asm__("mov $0x00000006 , %eax\n\t"); + // __asm__("cpuid\n\t"); + // __asm__("mov %%eax, %0\n\t":"=r" (eax)); //extended feature flags + // __asm__("mov %%ebx, %0\n\t":"=r" (ebx)); //extended feature flags + // __asm__("mov %%ecx, %0\n\t":"=r" (ecx)); //extended feature flags + // __asm__("mov %%edx, %0\n\t":"=r" (edx)); //extended feature flags + + // // Intel-defined CPU features, CPUID level 0x00000001 (ECX), word 4 + + // { + // std::string features[] = { "cxmmx", /* Cyrix MMX extensions */ + // "k6_mtrr", /* AMD K6 nonstandard MTRRs */ + // "cyrix_arr", /* Cyrix ARRs (= MTRRs) */ + // "centaur_mcr", /* Centaur MCRs (= MTRRs) */ + // "k8", /* "" Opteron, Athlon64 */ + // "", /* "" Athlon */ + // "", /* "" P3 */ + // "", /* "" P4 */ + // "constant_tsc", /* TSC ticks at a constant rate */ + // "up", /* SMP kernel running on UP */ + // "art", /* Always running timer (ART) */ + // "arch_perfmon", /* Intel Architectural PerfMon */ + // "pebs", /* Precise-Event Based Sampling */ + // "bts", /* Branch Trace Store */ + // "", /* "" syscall in IA32 userspace */ + // "", /* "" sysenter in IA32 userspace */ + // "rep_good", /* REP microcode works well */ + // "", /* Reserved */ + // "", /* "" LFENCE synchronizes RDTSC */ + // "acc_power", /* AMD Accumulated Power Mechanism */ + // "nopl", /* The NOPL (0F 1F) instructions */ + // "", /* "" Always-present feature */ + // "xtopology", /* CPU topology enum extensions */ + // "tsc_reliable", /* TSC is known to be reliable */ + // "nonstop_tsc", /* TSC does not stop in C states */ + // "cpuid", /* CPU has CPUID instruction itself */ + // "extd_apicid", /* Extended APICID (8 bits) */ + // "amd_dcm", /* AMD multi-node processor */ + // "aperfmperf", /* P-State hardware coordination feedback capability (APERF/MPERF MSRs) */ + // "rapl", /* AMD/Hygon RAPL interface */ + // "nonstop_tsc_s3", /* TSC doesn't stop in S3 state */ + // "tsc_known_freq" /* TSC has known frequency */ + // }; + // print_features(ecx, features, 32); + // } + // } // EAX=0x00000006 + + if (eax_max >= 0x00000007) + { + // EAX=0x00000007, ECX=0x00000000: Extended Features + __asm__("mov $0x00000007 , %eax\n\t"); + __asm__("mov $0x00000000 , %ecx\n\t"); + __asm__("cpuid\n\t"); + __asm__("mov %%eax, %0\n\t" : "=r"(ecx_max)); // gives maximum ECX value + __asm__("mov %%ebx, %0\n\t" : "=r"(ebx)); // extended feature flags + __asm__("mov %%ecx, %0\n\t" : "=r"(ecx)); // extended feature flags + __asm__("mov %%edx, %0\n\t" : "=r"(edx)); // extended feature flags + + // Intel-defined CPU features, CPUID level 0x00000007:0 (EBX), word 9 + { + std::string features[] = { + "fsgsbase", /* RDFSBASE, WRFSBASE, RDGSBASE, WRGSBASE instructions*/ + "tsc_adjust", /* TSC adjustment MSR 0x3B */ + "sgx", /* Software Guard Extensions */ + "bmi1", /* 1st group bit manipulation extensions */ + "hle", /* Hardware Lock Elision */ + "avx2", /* AVX2 instructions */ + "", /* "" FPU data pointer updated only on x87 exceptions */ + "smep", /* Supervisor Mode Execution Protection */ + "bmi2", /* 2nd group bit manipulation extensions */ + "erms", /* Enhanced REP MOVSB/STOSB instructions */ + "invpcid", /* Invalidate Processor Context ID */ + "rtm", /* Restricted Transactional Memory */ + "cqm", /* Cache QoS Monitoring */ + "", /* "" Zero out FPU CS and FPU DS */ + "mpx", /* Memory Protection Extension */ + "rdt_a", /* Resource Director Technology Allocation */ + "avx512f", /* AVX-512 Foundation */ + "avx512dq", /* AVX-512 DQ (Double/Quad granular) Instructions */ + "rdseed", /* RDSEED instruction */ + "adx", /* ADCX and ADOX instructions */ + "smap", /* Supervisor Mode Access Prevention */ + "avx512ifma", /* AVX-512 Integer Fused Multiply-Add instructions */ + "pcommit", "clflushopt", /* CLFLUSHOPT instruction */ + "clwb", /* CLWB instruction */ + "intel_pt", /* Intel Processor Trace */ + "avx512pf", /* AVX-512 Prefetch */ + "avx512er", /* AVX-512 Exponential and Reciprocal */ + "avx512cd", /* AVX-512 Conflict Detection */ + "sha_ni", /* SHA1/SHA256 Instruction Extensions */ + "avx512bw", /* AVX-512 BW (Byte/Word granular) Instructions */ + "avx512vl" /* AVX-512 VL (128/256 Vector Length) Extensions */ + }; + print_features(ebx, features, 32); + } + + // Intel-defined CPU features, CPUID level 0x00000007:0 (ECX), word 16 + { + std::string features[] = {"prefetchwt1", + "avx512vbmi", /* AVX512 Vector Bit Manipulation instructions*/ + "umip", /* User Mode Instruction Protection */ + "pku", /* Protection Keys for Userspace */ + "ospke", /* OS Protection Keys Enable */ + "waitpkg", /* UMONITOR/UMWAIT/TPAUSE Instructions */ + "avx512vbmi2", /* Additional AVX512 Vector Bit Manipulation Instructions */ + "cetss", + "gfni", /* Galois Field New Instructions */ + "vaes", /* Vector AES */ + "vpclmulqdq", /* Carry-Less Multiplication Double Quadword */ + "avx512vnni", /* Vector Neural Network Instructions */ + "avx512bitalg", /* Support for VPOPCNT[B,W] and VPSHUF-BITQMB instructions */ + "tme", /* Intel Total Memory Encryption */ + "avx512vpopcntdq", /* POPCNT for vectors of DW/QW */ + "", /* Reserved */ + "la57", /* 5-level page tables */ + "", /* Reserved */ + "", /* Reserved */ + "", /* Reserved */ + "", /* Reserved */ + "", /* Reserved */ + "rdpid", /* RDPID instruction */ + "keylocker", + "bus_lock_detect", /* Bus Lock detect */ + "cldemote", /* CLDEMOTE instruction */ + "", /* Reserved */ + "movdiri", /* MOVDIRI instruction */ + "movdir64b", /* MOVDIR64B instruction */ + "enqcmd", /* ENQCMD and ENQCMDS instructions */ + "sgx_lc", /* Software Guard Extensions Launch Control */ + "pks"}; + print_features(ecx, features, 32); + } + + // Intel-defined CPU features, CPUID level 0x00000007:0 (EDX), word 18 + { + std::string features[] = { + "", /* Reserved */ + "", /* Reserved */ + "avx5124vnniw", /* AVX-512 Neural Network Instructions */ + "avx5124fmaps", /* AVX-512 Multiply Accumulation Single precision */ + "fsrm", /* Fast Short Rep Mov */ + "", /* Reserved */ + "", /* Reserved */ + "", /* Reserved */ + "avx512vp2intersect", /* AVX-512 Intersect for D/Q */ + "srbds", /* "" SRBDS mitigation MSR available */ + "md_clear", /* VERW clears CPU buffers */ + "", /* "" RTM transaction always aborts */ + "", /* Reserved */ + "", /* "" TSX_FORCE_ABORT */ + "serialize", /* SERIALIZE instruction */ + "", /* "" This part has CPUs of more than one type */ + "tsxldtrk", /* TSX Suspend Load Address Tracking */ + "", /* Reserved */ + "pconfig", /* Intel PCONFIG */ + "arch_lbr", /* Intel ARCH LBR */ + "cet_ibt", + "", /* Reserved */ + "amx-bf16", /* AMX BFLOAT16 Support */ + "avx512fp16", /* AVX512 FP16 */ + "amx-tile", /* AMX tile Support */ + "amx-int8", /* AMX int8 Support */ + "ibrs ibpb", /* "" Speculation Control (IBRS + IBPB) */ + "stibp", /* "" Single Thread Indirect Branch Predictors */ + "flush_l1d", /* Flush L1D cache */ + "arch_capabilities", /* IA32_ARCH_CAPABILITIES MSR (Intel) */ + "", /* "" IA32_CORE_CAPABILITIES MSR */ + "ssbd" /* "" Speculative Store Bypass Disable */ + }; + print_features(edx, features, 32); + } + + if (ecx_max >= 0x00000001) + { + // EAX=0x00000007, ECX=0x00000001: Extended Features + __asm__("mov $0x00000007 , %eax\n\t"); + __asm__("mov $0x00000001 , %ecx\n\t"); + __asm__("cpuid\n\t"); + __asm__("mov %%eax, %0\n\t" : "=r"(eax)); // extended feature flags + + // Intel-defined CPU features, CPUID level 0x00000007:1 (EAX), word 12 + { + std::string features[] = { + "", /* Reserved */ + "", /* Reserved */ + "", /* Reserved */ + "", /* Reserved */ + "avx_vnni", /* AVX VNNI instructions */ + "avx512bf16", /* AVX512 BFLOAT16 instructions */ + "", /* Reserved */ + "", /* Reserved */ + "", /* Reserved */ + "", /* Reserved */ + "", /* Reserved */ + "", /* Reserved */ + "", /* Reserved */ + "", /* Reserved */ + "", /* Reserved */ + "", /* Reserved */ + "", /* Reserved */ + "", /* Reserved */ + "", /* Reserved */ + "", /* Reserved */ + "", /* Reserved */ + "", /* Reserved */ + "", /* Reserved */ + "", /* Reserved */ + "", /* Reserved */ + "", /* Reserved */ + "", /* Reserved */ + "", /* Reserved */ + "", /* Reserved */ + "", /* Reserved */ + "", /* Reserved */ + "" /* Reserved */ + }; + print_features(eax, features, 32); + } + } // ECX=0x00000001 + } // EAX=0x00000007 + + if (eax_max >= 0x0000000d) + { + // EAX=0x0000000d, ECX=0x00000001: Extended Features + __asm__("mov $0x0000000d , %eax\n\t"); + __asm__("mov $0x00000001 , %ecx\n\t"); + __asm__("cpuid\n\t"); + __asm__("mov %%eax, %0\n\t" : "=r"(eax)); // extended feature flags + + // Intel-defined CPU features, CPUID level 0x0000000d:1 (EAX), word 10 + { + std::string features[] = { + "xsaveopt", /* XSAVEOPT instruction */ + "xsavec", /* XSAVEC instruction */ + "xgetbv1", /* XGETBV with ECX = 1 instruction */ + "xsaves", /* XSAVES/XRSTORS instructions */ + "xfd", /* "" eXtended Feature Disabling */ + "", /* Reserved */ + "", /* Reserved */ + "", /* Reserved */ + "", /* Reserved */ + "", /* Reserved */ + "", /* Reserved */ + "", /* Reserved */ + "", /* Reserved */ + "", /* Reserved */ + "", /* Reserved */ + "", /* Reserved */ + "", /* Reserved */ + "", /* Reserved */ + "", /* Reserved */ + "", /* Reserved */ + "", /* Reserved */ + "", /* Reserved */ + "", /* Reserved */ + "", /* Reserved */ + "", /* Reserved */ + "", /* Reserved */ + "", /* Reserved */ + "", /* Reserved */ + "", /* Reserved */ + "", /* Reserved */ + "", /* Reserved */ + "", /* Reserved */ + "" /* Reserved */ + }; + print_features(eax, features, 32); + } + } // EAX=0x0000000d + + // EAX=0x80000000: largest value that EAX can be set to before calling CPUID + __asm__("mov $0x80000000, %eax\n\t"); + __asm__("cpuid\n\t"); + __asm__("mov %%eax, %0\n\t" : "=r"(eax_max)); + + if (eax_max >= 0x80000001) + { + // EAX=80000001: Processor Info and Feature Bits + __asm__("mov $0x80000001 , %eax\n\t"); + __asm__("cpuid\n\t"); + __asm__("mov %%ecx, %0\n\t" : "=r"(ecx)); // feature flags + __asm__("mov %%edx, %0\n\t" : "=r"(edx)); // feature flags + + // AMD-defined CPU features, CPUID level 0x80000001 (EDX), word 1 + // Don't duplicate feature flags which are redundant with Intel! + { + std::string features[] = { + "", /* Onboard FPU */ + "", /* Virtual Mode Extensions */ + "", /* Debugging Extensions */ + "", /* Page Size Extensions */ + "", /* Time Stamp Counter */ + "", /* Model-Specific Registers */ + "", /* Physical Address Extensions */ + "", /* Machine Check Exception */ + "", /* CMPXCHG8 instruction */ + "", /* Onboard APIC */ + "", /* Reserved */ + "syscall", /* SYSCALL/SYSRET */ + "", /* Memory Type Range Registers */ + "", /* Page Global Enable */ + "", /* Machine Check Architecture */ + "", /* CMOV instructions (plus FCMOVcc, FCOMI with FPU) */ + "", /* Page Attribute Table */ + "", /* 36-bit PSEs */ + "", /* Reserved */ + "mp", /* MP Capable */ + "nx", /* Execute Disable */ + "", /* Reserved */ + "mmxext", /* AMD MMX extensions */ + "", /* Multimedia Extensions */ + "", /* FXSAVE/FXRSTOR, CR4.OSFXSR */ + "fxsr_opt", /* FXSAVE/FXRSTOR optimizations */ + "pdpe1gb", /* "pdpe1gb" GB pages */ + "rdtscp", /* RDTSCP */ + "", /* Reserved */ + "lm", /* Long Mode (x86-64, 64-bit support) */ + "3dnowext", /* AMD 3DNow extensions */ + "3dnow" /* 3DNow */ + }; + print_features(edx, features, 32); + } + + // AMD-defined CPU features, CPUID level 0x80000001 (ECX), word 6 + { + std::string features[] = { + "lahf_lm", /* LAHF/SAHF in long mode */ + "cmp_legacy", /* If yes HyperThreading not valid */ + "svm", /* Secure Virtual Machine */ + "extapic", /* Extended APIC space */ + "cr8_legacy", /* CR8 in 32-bit mode */ + "abm", /* Advanced bit manipulation */ + "sse4a", /* SSE-4A */ + "misalignsse", /* Misaligned SSE mode */ + "3dnowprefetch", /* 3DNow prefetch instructions */ + "osvw", /* OS Visible Workaround */ + "ibs", /* Instruction Based Sampling */ + "xop", /* extended AVX instructions */ + "skinit", /* SKINIT/STGI instructions */ + "wdt", /* Watchdog timer */ + "", /* Reserved */ + "lwp", /* Light Weight Profiling */ + "fma4", /* 4 operands MAC instructions */ + "tce", /* Translation Cache Extension */ + "", /* Reserved */ + "nodeid_msr", /* NodeId MSR */ + "", /* Reserved */ + "tbm", /* Trailing Bit Manipulations */ + "topoext", /* Topology extensions CPUID leafs */ + "perfctr_core", /* Core performance counter extensions */ + "perfctr_nb", /* NB performance counter extensions */ + "", /* Reserved */ + "bpext", /* Data breakpoint extension */ + "ptsc", /* Performance time-stamp counter */ + "perfctr_l2", /* Last Level Cache performance counter extensions */ + "mwaitx", /* MWAIT extension (MONITORX/MWAITX instructions) */ + "", /* Reserved */ + "" /* Reserved */ + + }; + print_features(ecx, features, 32); + } + } // EAX=0x80000001 + + if (eax_max >= 0x80000007) + { + // EAX=0x80000007: Extended Features + __asm__("mov $0x80000007 , %eax\n\t"); + __asm__("cpuid\n\t"); + __asm__("mov %%eax, %0\n\t" : "=r"(eax)); // extended feature flags + __asm__("mov %%ebx, %0\n\t" : "=r"(ebx)); // extended feature flags + __asm__("mov %%ecx, %0\n\t" : "=r"(ecx)); // extended feature flags + __asm__("mov %%edx, %0\n\t" : "=r"(edx)); // extended feature flags + + // AMD-defined CPU features, CPUID level 0x80000007 (EBX), word 17 + { + std::string features[] = { + "overflow_recov", /* MCA overflow recovery support */ + "succor", /* Uncorrectable error containment and recovery */ + "", /* Reserved */ + "smca", /* Scalable MCA */ + "", /* Reserved */ + "", /* Reserved */ + "", /* Reserved */ + "", /* Reserved */ + "", /* Reserved */ + "", /* Reserved */ + "", /* Reserved */ + "", /* Reserved */ + "", /* Reserved */ + "", /* Reserved */ + "", /* Reserved */ + "", /* Reserved */ + "", /* Reserved */ + "", /* Reserved */ + "", /* Reserved */ + "", /* Reserved */ + "", /* Reserved */ + "", /* Reserved */ + "", /* Reserved */ + "", /* Reserved */ + "", /* Reserved */ + "", /* Reserved */ + "", /* Reserved */ + "", /* Reserved */ + "", /* Reserved */ + "", /* Reserved */ + "", /* Reserved */ + "" /* Reserved */ + }; + print_features(ebx, features, 32); + } + } // EAX=0x80000007 + + if (eax_max >= 0x80000008) + { + // EAX=0x80000008: Extended Features + __asm__("mov $0x80000008 , %eax\n\t"); + __asm__("cpuid\n\t"); + __asm__("mov %%eax, %0\n\t" : "=r"(eax)); // extended feature flags + __asm__("mov %%ebx, %0\n\t" : "=r"(ebx)); // extended feature flags + __asm__("mov %%ecx, %0\n\t" : "=r"(ecx)); // extended feature flags + __asm__("mov %%edx, %0\n\t" : "=r"(edx)); // extended feature flags + + // AMD-defined CPU features, CPUID level 0x80000008 (EBX), word 18 + { + std::string features[] = { + "clzero", /* CLZERO instruction */ + "irperf", /* Instructions Retired Count */ + "xsaveerptr", /* Always save/restore FP error pointers */ + "", /* Reserved */ + "rdpru", /* Read processor register at user level */ + "", /* Reserved */ + "", /* Reserved */ + "", /* Reserved */ + "", /* Reserved */ + "wbnoinvd", /* WBNOINVD instruction */ + "", /* Reserved */ + "", /* Reserved */ + "", /* "" Indirect Branch Prediction Barrier */ + "", /* Reserved */ + "", /* "" Indirect Branch Restricted Speculation */ + "", /* "" Single Thread Indirect Branch Predictors */ + "", /* Reserved */ + "", /* "" Single Thread Indirect Branch Predictors always-on preferred */ + "", /* Reserved */ + "", /* Reserved */ + "", /* Reserved */ + "", /* Reserved */ + "", /* Reserved */ + "amd_ppin", /* Protected Processor Inventory Number */ + "", /* "" Speculative Store Bypass Disable */ + "virt_ssbd", /* Virtualized Speculative Store Bypass Disable */ + "", /* "" Speculative Store Bypass is fixed in hardware. */ + "", /* Reserved */ + "", /* Reserved */ + "", /* Reserved */ + "", /* Reserved */ + "" /* Reserved */ + }; + print_features(ebx, features, 32); + } + } // EAX=0x80000008 + + if (eax_max >= 0x8000000a) + { + // EAX=0x8000000a: Extended Features + __asm__("mov $0x8000000a , %eax\n\t"); + __asm__("cpuid\n\t"); + __asm__("mov %%eax, %0\n\t" : "=r"(eax)); // extended feature flags + __asm__("mov %%ebx, %0\n\t" : "=r"(ebx)); // extended feature flags + __asm__("mov %%ecx, %0\n\t" : "=r"(ecx)); // extended feature flags + __asm__("mov %%edx, %0\n\t" : "=r"(edx)); // extended feature flags + + // AMD-defined CPU features, CPUID level 0x8000000a (EDX), word 15 + { + std::string features[] = { + "npt", /* Nested Page Table support */ + "lbrv", /* LBR Virtualization support */ + "svm_lock", /* "svm_lock" SVM locking MSR */ + "nrip_save", /* "nrip_save" SVM next_rip save */ + "tsc_scale", /* "tsc_scale" TSC scaling support */ + "vmcb_clean", /* "vmcb_clean" VMCB clean bits support */ + "flushbyasid", /* flush-by-ASID support */ + "decodeassists", /* Decode Assists support */ + "", /* Reserved */ + "", /* Reserved */ + "pausefilter", /* filtered pause intercept */ + "", /* Reserved */ + "pfthreshold", /* pause filter threshold */ + "avic", /* Virtual Interrupt Controller */ + "", /* Reserved */ + "v_vmsave_vmload", /* Virtual VMSAVE VMLOAD */ + "vgif", /* Virtual GIF */ + "", /* Reserved */ + "", /* Reserved */ + "", /* Reserved */ + "v_spec_ctrl", /* Virtual SPEC_CTRL */ + "", /* Reserved */ + "", /* Reserved */ + "", /* Reserved */ + "", /* Reserved */ + "", /* Reserved */ + "", /* Reserved */ + "", /* Reserved */ + "", /* "" SVME addr check */ + "", /* Reserved */ + "", /* Reserved */ + "" /* Reserved */ + }; + print_features(edx, features, 32); + } + } // EAX=0x8000000a + + if (eax_max >= 0x8000001f) + { + // EAX=0x8000001f: Extended Features + __asm__("mov $0x8000001f , %eax\n\t"); + __asm__("cpuid\n\t"); + __asm__("mov %%eax, %0\n\t" : "=r"(eax)); // extended feature flags + __asm__("mov %%ebx, %0\n\t" : "=r"(ebx)); // extended feature flags + __asm__("mov %%ecx, %0\n\t" : "=r"(ecx)); // extended feature flags + __asm__("mov %%edx, %0\n\t" : "=r"(edx)); // extended feature flags + + // AMD-defined CPU features, CPUID level 0x8000001f (EAX), word 19 + { + std::string features[] = { + "sme", /* AMD Secure Memory Encryption */ + "sev", /* AMD Secure Encrypted Virtualization */ + "", /* "" VM Page Flush MSR is supported */ + "sev_es", /* AMD Secure Encrypted Virtualization - Encrypted State */ + "", /* Reserved */ + "", /* Reserved */ + "", /* Reserved */ + "", /* Reserved */ + "", /* Reserved */ + "", /* Reserved */ + "", /* "" AMD hardware-enforced cache coherency */ + "", /* Reserved */ + "", /* Reserved */ + "", /* Reserved */ + "", /* Reserved */ + "", /* Reserved */ + "", /* Reserved */ + "", /* Reserved */ + "", /* Reserved */ + "", /* Reserved */ + "", /* Reserved */ + "", /* Reserved */ + "", /* Reserved */ + "", /* Reserved */ + "", /* Reserved */ + "", /* Reserved */ + "", /* Reserved */ + "", /* Reserved */ + "", /* Reserved */ + "", /* Reserved */ + "", /* Reserved */ + "" /* Reserved */ + }; + print_features(eax, features, 32); + } + } // EAX=0x8000001f + + printf("\n"); +} + +int main() +{ + getVendorID(); + getProcInfo(); + getFeatures(); + return 0; +} diff --git a/src/cmake/OptimizeForArchitecture.cmake b/src/cmake/OptimizeForArchitecture.cmake deleted file mode 100644 index 89a2e9f2be..0000000000 --- a/src/cmake/OptimizeForArchitecture.cmake +++ /dev/null @@ -1,612 +0,0 @@ -# Determine the host CPU feature set and determine the best set of compiler -# flags to enable all supported SIMD relevant features. Alternatively, the -# target CPU can be explicitly selected (for generating more generic binaries -# or for targeting a different system). -# Compilers provide e.g. the -march=native flag to achieve a similar result. -# This fails to address the need for building for a different microarchitecture -# than the current host. -# The script tries to deduce all settings from the model and family numbers of -# the CPU instead of reading the CPUID flags from e.g. /proc/cpuinfo. This makes -# the detection more independent from the CPUID code in the kernel (e.g. avx2 is -# not listed on older kernels). -# -# Usage: -# OptimizeForArchitecture() -# If either of Vc_SSE_INTRINSICS_BROKEN, Vc_AVX_INTRINSICS_BROKEN, -# Vc_AVX2_INTRINSICS_BROKEN is defined and set, the OptimizeForArchitecture -# macro will consequently disable the relevant features via compiler flags. -# See https://github.com/VcDevel/Vc/blob/master/cmake/OptimizeForArchitecture.cmake - -#============================================================================= -# Copyright 2010-2016 Matthias Kretz -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are -# met: -# -# * Redistributions of source code must retain the above copyright notice, -# this list of conditions and the following disclaimer. -# * Redistributions in binary form must reproduce the above copyright notice, -# this list of conditions and the following disclaimer in the documentation -# and/or other materials provided with the distribution. -# * Neither the names of contributing organizations nor the -# names of its contributors may be used to endorse or promote products -# derived from this software without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER AND CONTRIBUTORS ``AS IS'' -# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -# ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE FOR -# ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -#============================================================================= - -get_filename_component(_currentDir "${CMAKE_CURRENT_LIST_FILE}" PATH) -include("${_currentDir}/AddCompilerFlag.cmake") -include(CheckIncludeFileCXX) - -macro(_my_find _list _value _ret) - list(FIND ${_list} "${_value}" _found) - if(_found EQUAL -1) - set(${_ret} FALSE) - else(_found EQUAL -1) - set(${_ret} TRUE) - endif(_found EQUAL -1) -endmacro(_my_find) - -macro(OFA_AutodetectX86) - set(_vendor_id) - set(_cpu_family) - set(_cpu_model) - if(CMAKE_SYSTEM_NAME STREQUAL "Linux") - file(READ "/proc/cpuinfo" _cpuinfo) - string(REGEX REPLACE ".*vendor_id[ \t]*:[ \t]+([a-zA-Z0-9_-]+).*" "\\1" _vendor_id "${_cpuinfo}") - string(REGEX REPLACE ".*cpu family[ \t]*:[ \t]+([a-zA-Z0-9_-]+).*" "\\1" _cpu_family "${_cpuinfo}") - string(REGEX REPLACE ".*model[ \t]*:[ \t]+([a-zA-Z0-9_-]+).*" "\\1" _cpu_model "${_cpuinfo}") - string(REGEX REPLACE ".*flags[ \t]*:[ \t]+([^\n]+).*" "\\1" _cpu_flags "${_cpuinfo}") - elseif(CMAKE_SYSTEM_NAME STREQUAL "Darwin") - exec_program("/usr/sbin/sysctl -n machdep.cpu.vendor machdep.cpu.model machdep.cpu.family machdep.cpu.features" OUTPUT_VARIABLE _sysctl_output_string) - string(REPLACE "\n" ";" _sysctl_output ${_sysctl_output_string}) - list(GET _sysctl_output 0 _vendor_id) - list(GET _sysctl_output 1 _cpu_model) - list(GET _sysctl_output 2 _cpu_family) - list(GET _sysctl_output 3 _cpu_flags) - - string(TOLOWER "${_cpu_flags}" _cpu_flags) - string(REPLACE "." "_" _cpu_flags "${_cpu_flags}") - elseif(CMAKE_SYSTEM_NAME STREQUAL "Windows") - get_filename_component(_vendor_id "[HKEY_LOCAL_MACHINE\\Hardware\\Description\\System\\CentralProcessor\\0;VendorIdentifier]" NAME CACHE) - get_filename_component(_cpu_id "[HKEY_LOCAL_MACHINE\\Hardware\\Description\\System\\CentralProcessor\\0;Identifier]" NAME CACHE) - mark_as_advanced(_vendor_id _cpu_id) - string(REGEX REPLACE ".* Family ([0-9]+) .*" "\\1" _cpu_family "${_cpu_id}") - string(REGEX REPLACE ".* Model ([0-9]+) .*" "\\1" _cpu_model "${_cpu_id}") - endif(CMAKE_SYSTEM_NAME STREQUAL "Linux") - if(_vendor_id STREQUAL "GenuineIntel") - if(_cpu_family EQUAL 6) - # taken from the Intel ORM - # http://www.intel.com/content/www/us/en/processors/architectures-software-developer-manuals.html - # CPUID Signature Values of Of Recent Intel Microarchitectures - # 4E 5E | Skylake microarchitecture - # 3D 47 56 | Broadwell microarchitecture - # 3C 45 46 3F | Haswell microarchitecture - # 3A 3E | Ivy Bridge microarchitecture - # 2A 2D | Sandy Bridge microarchitecture - # 25 2C 2F | Intel microarchitecture Westmere - # 1A 1E 1F 2E | Intel microarchitecture Nehalem - # 17 1D | Enhanced Intel Core microarchitecture - # 0F | Intel Core microarchitecture - # - # Intel SDM Vol. 3C 35-1 / December 2016: - # 57 | Xeon Phi 3200, 5200, 7200 [Knights Landing] - # 85 | Future Xeon Phi - # 8E 9E | 7th gen. Core [Kaby Lake] - # 55 | Future Xeon [Skylake w/ AVX512] - # 4E 5E | 6th gen. Core / E3 v5 [Skylake w/o AVX512] - # 56 | Xeon D-1500 [Broadwell] - # 4F | Xeon E5 v4, E7 v4, i7-69xx [Broadwell] - # 47 | 5th gen. Core / Xeon E3 v4 [Broadwell] - # 3D | M-5xxx / 5th gen. [Broadwell] - # 3F | Xeon E5 v3, E7 v3, i7-59xx [Haswell-E] - # 3C 45 46 | 4th gen. Core, Xeon E3 v3 [Haswell] - # 3E | Xeon E5 v2, E7 v2, i7-49xx [Ivy Bridge-E] - # 3A | 3rd gen. Core, Xeon E3 v2 [Ivy Bridge] - # 2D | Xeon E5, i7-39xx [Sandy Bridge] - # 2F | Xeon E7 - # 2A | Xeon E3, 2nd gen. Core [Sandy Bridge] - # 2E | Xeon 7500, 6500 series - # 25 2C | Xeon 3600, 5600 series, Core i7, i5 and i3 - # - # Values from the Intel SDE: - # 5C | Goldmont - # 5A | Silvermont - # 57 | Knights Landing - # 66 | Cannonlake - # 55 | Skylake Server - # 4E | Skylake Client - # 3C | Broadwell (likely a bug in the SDE) - # 3C | Haswell - if(_cpu_model EQUAL 87) # 57 - set(TARGET_ARCHITECTURE "knl") # Knights Landing - elseif(_cpu_model EQUAL 92) - set(TARGET_ARCHITECTURE "goldmont") - elseif(_cpu_model EQUAL 90 OR _cpu_model EQUAL 76) - set(TARGET_ARCHITECTURE "silvermont") - elseif(_cpu_model EQUAL 102) - set(TARGET_ARCHITECTURE "cannonlake") - elseif(_cpu_model EQUAL 142 OR _cpu_model EQUAL 158) # 8E, 9E - set(TARGET_ARCHITECTURE "kaby-lake") - elseif(_cpu_model EQUAL 85) # 55 - set(TARGET_ARCHITECTURE "skylake-avx512") - elseif(_cpu_model EQUAL 78 OR _cpu_model EQUAL 94 OR _cpu_model EQUAL 165) # 4E, 5E - set(TARGET_ARCHITECTURE "skylake") - elseif(_cpu_model EQUAL 61 OR _cpu_model EQUAL 71 OR _cpu_model EQUAL 79 OR _cpu_model EQUAL 86) # 3D, 47, 4F, 56 - set(TARGET_ARCHITECTURE "broadwell") - elseif(_cpu_model EQUAL 60 OR _cpu_model EQUAL 69 OR _cpu_model EQUAL 70 OR _cpu_model EQUAL 63) - set(TARGET_ARCHITECTURE "haswell") - elseif(_cpu_model EQUAL 58 OR _cpu_model EQUAL 62) - set(TARGET_ARCHITECTURE "ivy-bridge") - elseif(_cpu_model EQUAL 42 OR _cpu_model EQUAL 45) - set(TARGET_ARCHITECTURE "sandy-bridge") - elseif(_cpu_model EQUAL 37 OR _cpu_model EQUAL 44 OR _cpu_model EQUAL 47) - set(TARGET_ARCHITECTURE "westmere") - elseif(_cpu_model EQUAL 26 OR _cpu_model EQUAL 30 OR _cpu_model EQUAL 31 OR _cpu_model EQUAL 46) - set(TARGET_ARCHITECTURE "nehalem") - elseif(_cpu_model EQUAL 23 OR _cpu_model EQUAL 29) - set(TARGET_ARCHITECTURE "penryn") - elseif(_cpu_model EQUAL 15) - set(TARGET_ARCHITECTURE "merom") - elseif(_cpu_model EQUAL 28) - set(TARGET_ARCHITECTURE "atom") - elseif(_cpu_model EQUAL 14) - set(TARGET_ARCHITECTURE "core") - elseif(_cpu_model LESS 14) - message(WARNING "Your CPU (family ${_cpu_family}, model ${_cpu_model}) is not known. Auto-detection of optimization flags failed and will use the generic CPU settings with SSE2.") - set(TARGET_ARCHITECTURE "generic") - else() - message(WARNING "Your CPU (family ${_cpu_family}, model ${_cpu_model}) is not known. Auto-detection of optimization flags failed and will use the 65nm Core 2 CPU settings.") - set(TARGET_ARCHITECTURE "merom") - endif() - elseif(_cpu_family EQUAL 7) # Itanium (not supported) - message(WARNING "Your CPU (Itanium: family ${_cpu_family}, model ${_cpu_model}) is not supported by OptimizeForArchitecture.cmake.") - elseif(_cpu_family EQUAL 15) # NetBurst - list(APPEND _available_vector_units_list "sse" "sse2") - if(_cpu_model GREATER 2) # Not sure whether this must be 3 or even 4 instead - list(APPEND _available_vector_units_list "sse" "sse2" "sse3") - endif(_cpu_model GREATER 2) - endif(_cpu_family EQUAL 6) - elseif(_vendor_id STREQUAL "AuthenticAMD") - if(_cpu_family EQUAL 23) - set(TARGET_ARCHITECTURE "zen") - elseif(_cpu_family EQUAL 22) # 16h - set(TARGET_ARCHITECTURE "AMD 16h") - elseif(_cpu_family EQUAL 21) # 15h - if(_cpu_model LESS 2) - set(TARGET_ARCHITECTURE "bulldozer") - else() - set(TARGET_ARCHITECTURE "piledriver") - endif() - elseif(_cpu_family EQUAL 20) # 14h - set(TARGET_ARCHITECTURE "AMD 14h") - elseif(_cpu_family EQUAL 18) # 12h - elseif(_cpu_family EQUAL 16) # 10h - set(TARGET_ARCHITECTURE "barcelona") - elseif(_cpu_family EQUAL 15) - set(TARGET_ARCHITECTURE "k8") - if(_cpu_model GREATER 64) # I don't know the right number to put here. This is just a guess from the hardware I have access to - set(TARGET_ARCHITECTURE "k8-sse3") - endif(_cpu_model GREATER 64) - endif() - endif(_vendor_id STREQUAL "GenuineIntel") -endmacro() - -macro(OFA_AutodetectArm) - message(WARNING "Architecture auto-detection for CMAKE_SYSTEM_PROCESSOR '${CMAKE_SYSTEM_PROCESSOR}' is not supported by OptimizeForArchitecture.cmake") -endmacro() - -macro(OFA_AutodetectHostArchitecture) - set(TARGET_ARCHITECTURE "generic") - set(Vc_ARCHITECTURE_FLAGS) - if("${CMAKE_SYSTEM_PROCESSOR}" MATCHES "(x86|AMD64)") - OFA_AutodetectX86() - elseif("${CMAKE_SYSTEM_PROCESSOR}" MATCHES "(arm|aarch32|aarch64)") - OFA_AutodetectArm() - else() - message(FATAL_ERROR "OptimizeForArchitecture.cmake does not implement support for CMAKE_SYSTEM_PROCESSOR: ${CMAKE_SYSTEM_PROCESSOR}") - endif() -endmacro() - -macro(OFA_HandleX86Options) - set(_march_flag_list) - set(_available_vector_units_list) - macro(_nehalem) - list(APPEND _march_flag_list "nehalem") - list(APPEND _march_flag_list "corei7") - list(APPEND _march_flag_list "core2") - list(APPEND _available_vector_units_list "sse" "sse2" "sse3" "ssse3" "sse4.1" "sse4.2") - endmacro() - macro(_westmere) - list(APPEND _march_flag_list "westmere") - _nehalem() - endmacro() - macro(_sandybridge) - list(APPEND _march_flag_list "sandybridge") - list(APPEND _march_flag_list "corei7-avx") - _westmere() - list(APPEND _available_vector_units_list "sse" "sse2" "sse3" "ssse3" "sse4.1" "sse4.2" "avx") - endmacro() - macro(_ivybridge) - list(APPEND _march_flag_list "ivybridge") - list(APPEND _march_flag_list "core-avx-i") - _sandybridge() - list(APPEND _available_vector_units_list "rdrnd" "f16c") - endmacro() - macro(_haswell) - list(APPEND _march_flag_list "haswell") - list(APPEND _march_flag_list "core-avx2") - _ivybridge() - list(APPEND _available_vector_units_list "avx2" "fma" "bmi" "bmi2") - endmacro() - macro(_broadwell) - list(APPEND _march_flag_list "broadwell") - _haswell() - endmacro() - macro(_skylake) - list(APPEND _march_flag_list "skylake") - _broadwell() - endmacro() - macro(_skylake_avx512) - list(APPEND _march_flag_list "skylake-avx512") - _skylake() - list(APPEND _available_vector_units_list "avx512f" "avx512cd" "avx512dq" "avx512bw" "avx512vl") - endmacro() - macro(_cannonlake) - list(APPEND _march_flag_list "cannonlake") - _skylake_avx512() - list(APPEND _available_vector_units_list "avx512ifma" "avx512vbmi") - endmacro() - macro(_knightslanding) - list(APPEND _march_flag_list "knl") - _broadwell() - list(APPEND _available_vector_units_list "avx512f" "avx512pf" "avx512er" "avx512cd") - endmacro() - macro(_silvermont) - list(APPEND _march_flag_list "silvermont") - _westmere() - list(APPEND _available_vector_units_list "rdrnd") - endmacro() - macro(_goldmont) - list(APPEND _march_flag_list "goldmont") - _silvermont() - endmacro() - - if(TARGET_ARCHITECTURE STREQUAL "core") - list(APPEND _march_flag_list "core2") - list(APPEND _available_vector_units_list "sse" "sse2" "sse3") - elseif(TARGET_ARCHITECTURE STREQUAL "merom") - list(APPEND _march_flag_list "merom") - list(APPEND _march_flag_list "core2") - list(APPEND _available_vector_units_list "sse" "sse2" "sse3" "ssse3") - elseif(TARGET_ARCHITECTURE STREQUAL "penryn") - list(APPEND _march_flag_list "penryn") - list(APPEND _march_flag_list "core2") - list(APPEND _available_vector_units_list "sse" "sse2" "sse3" "ssse3") - message(STATUS "Sadly the Penryn architecture exists in variants with SSE4.1 and without SSE4.1.") - if(_cpu_flags MATCHES "sse4_1") - message(STATUS "SSE4.1: enabled (auto-detected from this computer's CPU flags)") - list(APPEND _available_vector_units_list "sse4.1") - else() - message(STATUS "SSE4.1: disabled (auto-detected from this computer's CPU flags)") - endif() - elseif(TARGET_ARCHITECTURE STREQUAL "knl") - _knightslanding() - elseif(TARGET_ARCHITECTURE STREQUAL "cannonlake") - _cannonlake() - elseif(TARGET_ARCHITECTURE STREQUAL "kaby-lake") - _skylake() - elseif(TARGET_ARCHITECTURE STREQUAL "skylake-xeon" OR TARGET_ARCHITECTURE STREQUAL "skylake-avx512") - _skylake_avx512() - elseif(TARGET_ARCHITECTURE STREQUAL "skylake") - _skylake() - elseif(TARGET_ARCHITECTURE STREQUAL "broadwell") - _broadwell() - elseif(TARGET_ARCHITECTURE STREQUAL "haswell") - _haswell() - elseif(TARGET_ARCHITECTURE STREQUAL "ivy-bridge") - _ivybridge() - elseif(TARGET_ARCHITECTURE STREQUAL "sandy-bridge") - _sandybridge() - elseif(TARGET_ARCHITECTURE STREQUAL "westmere") - _westmere() - elseif(TARGET_ARCHITECTURE STREQUAL "nehalem") - _nehalem() - elseif(TARGET_ARCHITECTURE STREQUAL "goldmont") - _goldmont() - elseif(TARGET_ARCHITECTURE STREQUAL "silvermont") - _silvermont() - elseif(TARGET_ARCHITECTURE STREQUAL "atom") - list(APPEND _march_flag_list "atom") - list(APPEND _march_flag_list "core2") - list(APPEND _available_vector_units_list "sse" "sse2" "sse3" "ssse3") - elseif(TARGET_ARCHITECTURE STREQUAL "k8") - list(APPEND _march_flag_list "k8") - list(APPEND _available_vector_units_list "sse" "sse2") - elseif(TARGET_ARCHITECTURE STREQUAL "k8-sse3") - list(APPEND _march_flag_list "k8-sse3") - list(APPEND _march_flag_list "k8") - list(APPEND _available_vector_units_list "sse" "sse2" "sse3") - elseif(TARGET_ARCHITECTURE STREQUAL "AMD 16h") - list(APPEND _march_flag_list "btver2") - list(APPEND _march_flag_list "btver1") - list(APPEND _available_vector_units_list "sse" "sse2" "sse3" "ssse3" "sse4a" "sse4.1" "sse4.2" "avx" "f16c") - elseif(TARGET_ARCHITECTURE STREQUAL "AMD 14h") - list(APPEND _march_flag_list "btver1") - list(APPEND _available_vector_units_list "sse" "sse2" "sse3" "ssse3" "sse4a") - elseif(TARGET_ARCHITECTURE STREQUAL "zen") - list(APPEND _march_flag_list "znver1") - _skylake() - list(APPEND _available_vector_units_list "sse4a") - elseif(TARGET_ARCHITECTURE STREQUAL "piledriver") - list(APPEND _march_flag_list "bdver2") - list(APPEND _march_flag_list "bdver1") - list(APPEND _march_flag_list "bulldozer") - list(APPEND _march_flag_list "barcelona") - list(APPEND _march_flag_list "core2") - list(APPEND _available_vector_units_list "sse" "sse2" "sse3" "ssse3" "sse4a" "sse4.1" "sse4.2" "avx" "xop" "fma4" "fma" "f16c") - elseif(TARGET_ARCHITECTURE STREQUAL "interlagos") - list(APPEND _march_flag_list "bdver1") - list(APPEND _march_flag_list "bulldozer") - list(APPEND _march_flag_list "barcelona") - list(APPEND _march_flag_list "core2") - list(APPEND _available_vector_units_list "sse" "sse2" "sse3" "ssse3" "sse4a" "sse4.1" "sse4.2" "avx" "xop" "fma4") - elseif(TARGET_ARCHITECTURE STREQUAL "bulldozer") - list(APPEND _march_flag_list "bdver1") - list(APPEND _march_flag_list "bulldozer") - list(APPEND _march_flag_list "barcelona") - list(APPEND _march_flag_list "core2") - list(APPEND _available_vector_units_list "sse" "sse2" "sse3" "ssse3" "sse4a" "sse4.1" "sse4.2" "avx" "xop" "fma4") - elseif(TARGET_ARCHITECTURE STREQUAL "barcelona") - list(APPEND _march_flag_list "barcelona") - list(APPEND _march_flag_list "core2") - list(APPEND _available_vector_units_list "sse" "sse2" "sse3" "sse4a") - elseif(TARGET_ARCHITECTURE STREQUAL "istanbul") - list(APPEND _march_flag_list "barcelona") - list(APPEND _march_flag_list "core2") - list(APPEND _available_vector_units_list "sse" "sse2" "sse3" "sse4a") - elseif(TARGET_ARCHITECTURE STREQUAL "magny-cours") - list(APPEND _march_flag_list "barcelona") - list(APPEND _march_flag_list "core2") - list(APPEND _available_vector_units_list "sse" "sse2" "sse3" "sse4a") - elseif(TARGET_ARCHITECTURE STREQUAL "generic") - list(APPEND _march_flag_list "generic") - elseif(TARGET_ARCHITECTURE STREQUAL "none") - # add this clause to remove it from the else clause - else(TARGET_ARCHITECTURE STREQUAL "core") - message(FATAL_ERROR "Unknown target architecture: \"${TARGET_ARCHITECTURE}\". Please set TARGET_ARCHITECTURE to a supported value.") - endif(TARGET_ARCHITECTURE STREQUAL "core") - - if(NOT TARGET_ARCHITECTURE STREQUAL "none") - set(_disable_vector_unit_list) - set(_enable_vector_unit_list) - if(DEFINED Vc_AVX_INTRINSICS_BROKEN AND Vc_AVX_INTRINSICS_BROKEN) - message(STATUS "AVX disabled because of old/broken toolchain") - set(_avx_broken true) - set(_avx2_broken true) - set(_fma4_broken true) - set(_xop_broken true) - else() - set(_avx_broken false) - if(DEFINED Vc_FMA4_INTRINSICS_BROKEN AND Vc_FMA4_INTRINSICS_BROKEN) - message(STATUS "FMA4 disabled because of old/broken toolchain") - set(_fma4_broken true) - else() - set(_fma4_broken false) - endif() - if(DEFINED Vc_XOP_INTRINSICS_BROKEN AND Vc_XOP_INTRINSICS_BROKEN) - message(STATUS "XOP disabled because of old/broken toolchain") - set(_xop_broken true) - else() - set(_xop_broken false) - endif() - if(DEFINED Vc_AVX2_INTRINSICS_BROKEN AND Vc_AVX2_INTRINSICS_BROKEN) - message(STATUS "AVX2 disabled because of old/broken toolchain") - set(_avx2_broken true) - else() - set(_avx2_broken false) - endif() - endif() - - macro(_enable_or_disable _name _flag _documentation _broken) - if(_broken) - set(_found false) - else() - _my_find(_available_vector_units_list "${_flag}" _found) - endif() - set(USE_${_name} ${_found} CACHE BOOL "${documentation}" ${_force}) - mark_as_advanced(USE_${_name}) - if(USE_${_name}) - list(APPEND _enable_vector_unit_list "${_flag}") - else() - list(APPEND _disable_vector_unit_list "${_flag}") - endif() - endmacro() - _enable_or_disable(SSE2 "sse2" "Use SSE2. If SSE2 instructions are not enabled the SSE implementation will be disabled." false) - _enable_or_disable(SSE3 "sse3" "Use SSE3. If SSE3 instructions are not enabled they will be emulated." false) - _enable_or_disable(SSSE3 "ssse3" "Use SSSE3. If SSSE3 instructions are not enabled they will be emulated." false) - _enable_or_disable(SSE4_1 "sse4.1" "Use SSE4.1. If SSE4.1 instructions are not enabled they will be emulated." false) - _enable_or_disable(SSE4_2 "sse4.2" "Use SSE4.2. If SSE4.2 instructions are not enabled they will be emulated." false) - _enable_or_disable(SSE4a "sse4a" "Use SSE4a. If SSE4a instructions are not enabled they will be emulated." false) - _enable_or_disable(AVX "avx" "Use AVX. This will all floating-point vector sizes relative to SSE." _avx_broken) - _enable_or_disable(FMA "fma" "Use FMA." _avx_broken) - _enable_or_disable(BMI2 "bmi2" "Use BMI2." _avx_broken) - _enable_or_disable(AVX2 "avx2" "Use AVX2. This will double all of the vector sizes relative to SSE." _avx2_broken) - _enable_or_disable(XOP "xop" "Use XOP." _xop_broken) - _enable_or_disable(FMA4 "fma4" "Use FMA4." _fma4_broken) - _enable_or_disable(AVX512F "avx512f" "Use AVX512F. This will double all floating-point vector sizes relative to AVX2." false) - _enable_or_disable(AVX512VL "avx512vl" "Use AVX512VL. This enables 128- and 256-bit vector length instructions with EVEX coding (improved write-masking & more vector registers)." _avx2_broken) - _enable_or_disable(AVX512PF "avx512pf" "Use AVX512PF. This enables prefetch instructions for gathers and scatters." false) - _enable_or_disable(AVX512ER "avx512er" "Use AVX512ER. This enables exponential and reciprocal instructions." false) - _enable_or_disable(AVX512CD "avx512cd" "Use AVX512CD." false) - _enable_or_disable(AVX512DQ "avx512dq" "Use AVX512DQ." false) - _enable_or_disable(AVX512BW "avx512bw" "Use AVX512BW." false) - _enable_or_disable(AVX512IFMA "avx512ifma" "Use AVX512IFMA." false) - _enable_or_disable(AVX512VBMI "avx512vbmi" "Use AVX512VBMI." false) - - if(MSVC) - # MSVC on 32 bit can select /arch:SSE2 (since 2010 also /arch:AVX) - # MSVC on 64 bit cannot select anything (should have changed with MSVC 2010) - _my_find(_enable_vector_unit_list "avx2" _found) - if(_found) - AddCompilerFlag("/arch:AVX2" CXX_FLAGS Vc_ARCHITECTURE_FLAGS CXX_RESULT _found) - endif() - if(NOT _found) - _my_find(_enable_vector_unit_list "avx" _found) - if(_found) - AddCompilerFlag("/arch:AVX" CXX_FLAGS Vc_ARCHITECTURE_FLAGS CXX_RESULT _found) - endif() - endif() - if(NOT _found) - _my_find(_enable_vector_unit_list "sse2" _found) - if(_found) - AddCompilerFlag("/arch:SSE2" CXX_FLAGS Vc_ARCHITECTURE_FLAGS) - endif() - endif() - foreach(_flag ${_enable_vector_unit_list}) - string(TOUPPER "${_flag}" _flag) - string(REPLACE "." "_" _flag "__${_flag}__") - add_definitions("-D${_flag}") - endforeach(_flag) - elseif(CMAKE_CXX_COMPILER MATCHES "/(icpc|icc)$") # ICC (on Linux) - set(OFA_map_knl "-xMIC-AVX512") - set(OFA_map_cannonlake "-xCORE-AVX512") - set(OFA_map_skylake-avx512 "-xCORE-AVX512") - set(OFA_map_skylake "-xCORE-AVX2") - set(OFA_map_broadwell "-xCORE-AVX2") - set(OFA_map_haswell "-xCORE-AVX2") - set(OFA_map_ivybridge "-xCORE-AVX-I") - set(OFA_map_sandybridge "-xAVX") - set(OFA_map_westmere "-xSSE4.2") - set(OFA_map_nehalem "-xSSE4.2") - set(OFA_map_penryn "-xSSSE3") - set(OFA_map_merom "-xSSSE3") - set(OFA_map_core2 "-xSSE3") - set(_ok FALSE) - foreach(arch ${_march_flag_list}) - if(DEFINED OFA_map_${arch}) - AddCompilerFlag(${OFA_map_${arch}} CXX_FLAGS Vc_ARCHITECTURE_FLAGS CXX_RESULT _ok) - if(_ok) - break() - endif() - endif() - endforeach() - if(NOT _ok) - # This is the Intel compiler, so SSE2 is a very reasonable baseline. - message(STATUS "Did not recognize the requested architecture flag, falling back to SSE2") - AddCompilerFlag("-xSSE2" CXX_FLAGS Vc_ARCHITECTURE_FLAGS) - endif() - else() # not MSVC and not ICC => GCC, Clang, Open64 - foreach(_flag ${_march_flag_list}) - AddCompilerFlag("-march=${_flag}" CXX_RESULT _good CXX_FLAGS Vc_ARCHITECTURE_FLAGS) - if(_good) - break() - endif(_good) - endforeach(_flag) - foreach(_flag ${_enable_vector_unit_list}) - AddCompilerFlag("-m${_flag}" CXX_RESULT _result) - if(_result) - set(_header FALSE) - if(_flag STREQUAL "sse3") - set(_header "pmmintrin.h") - elseif(_flag STREQUAL "ssse3") - set(_header "tmmintrin.h") - elseif(_flag STREQUAL "sse4.1") - set(_header "smmintrin.h") - elseif(_flag STREQUAL "sse4.2") - set(_header "smmintrin.h") - elseif(_flag STREQUAL "sse4a") - set(_header "ammintrin.h") - elseif(_flag STREQUAL "avx") - set(_header "immintrin.h") - elseif(_flag STREQUAL "avx2") - set(_header "immintrin.h") - elseif(_flag STREQUAL "fma4") - set(_header "x86intrin.h") - elseif(_flag STREQUAL "xop") - set(_header "x86intrin.h") - endif() - set(_resultVar "HAVE_${_header}") - string(REPLACE "." "_" _resultVar "${_resultVar}") - if(_header) - CHECK_INCLUDE_FILE_CXX("${_header}" ${_resultVar} "-m${_flag}") - if(NOT ${_resultVar}) - set(_useVar "USE_${_flag}") - string(TOUPPER "${_useVar}" _useVar) - string(REPLACE "." "_" _useVar "${_useVar}") - message(STATUS "disabling ${_useVar} because ${_header} is missing") - set(${_useVar} FALSE) - list(APPEND _disable_vector_unit_list "${_flag}") - endif() - endif() - if(NOT _header OR ${_resultVar}) - list(APPEND Vc_ARCHITECTURE_FLAGS "-m${_flag}") - endif() - endif() - endforeach(_flag) - foreach(_flag ${_disable_vector_unit_list}) - AddCompilerFlag("-mno-${_flag}" CXX_FLAGS Vc_ARCHITECTURE_FLAGS) - endforeach(_flag) - endif() - endif() -endmacro() - -macro(OFA_HandleArmOptions) - option(USE_NEON "Enable use of NEON instructions" ON) - if(USE_NEON) - AddCompilerFlag(-mfloat-abi=softfp CXX_FLAGS Vc_ARCHITECTURE_FLAGS) - AddCompilerFlag(-mfpu=neon CXX_FLAGS Vc_ARCHITECTURE_FLAGS) - endif() -endmacro() - -macro(OptimizeForArchitecture) - if("${CMAKE_SYSTEM_PROCESSOR}" MATCHES "(x86|AMD64)") - set(TARGET_ARCHITECTURE "auto" CACHE STRING "CPU architecture to optimize for. \ -Using an incorrect setting here can result in crashes of the resulting binary because of invalid instructions used. \ -Setting the value to \"auto\" will try to optimize for the architecture where cmake is called. \ -Other supported values are: \"none\", \"generic\", \"core\", \"merom\" (65nm Core2), \ -\"penryn\" (45nm Core2), \"nehalem\", \"westmere\", \"sandy-bridge\", \"ivy-bridge\", \ -\"haswell\", \"broadwell\", \"skylake\", \"skylake-xeon\", \"kaby-lake\", \"cannonlake\", \"silvermont\", \ -\"goldmont\", \"knl\" (Knights Landing), \"atom\", \"k8\", \"k8-sse3\", \"barcelona\", \ -\"istanbul\", \"magny-cours\", \"bulldozer\", \"interlagos\", \"piledriver\", \ -\"AMD 14h\", \"AMD 16h\", \"zen\".") - elseif("${CMAKE_SYSTEM_PROCESSOR}" MATCHES "(arm|aarch32|aarch64)") - set(TARGET_ARCHITECTURE "auto" CACHE STRING "CPU architecture to optimize for. \ -Using an incorrect setting here can result in crashes of the resulting binary because of invalid instructions used. \ -Setting the value to \"auto\" will try to optimize for the architecture where cmake is called. \ -Other supported values are: \"none\", \"generic\", TODO...") - else() - message(WARNING "The CMAKE_SYSTEM_PROCESSOR '${CMAKE_SYSTEM_PROCESSOR}' is not supported by OptimizeForArchitecture.cmake") - endif() - set(_force) - if(NOT _last_target_arch STREQUAL "${TARGET_ARCHITECTURE}") - message(STATUS "target changed from \"${_last_target_arch}\" to \"${TARGET_ARCHITECTURE}\"") - set(_force FORCE) - endif() - set(_last_target_arch "${TARGET_ARCHITECTURE}" CACHE STRING "" FORCE) - mark_as_advanced(_last_target_arch) - string(TOLOWER "${TARGET_ARCHITECTURE}" TARGET_ARCHITECTURE) - - if(TARGET_ARCHITECTURE STREQUAL "auto") - OFA_AutodetectHostArchitecture() - message(STATUS "Detected Host CPU: ${TARGET_ARCHITECTURE}") - endif() - - if("${CMAKE_SYSTEM_PROCESSOR}" MATCHES "(x86|AMD64)") - OFA_HandleX86Options() - elseif("${CMAKE_SYSTEM_PROCESSOR}" MATCHES "(arm|aarch32|aarch64)") - OFA_HandleArmOptions() - endif() -endmacro(OptimizeForArchitecture) - diff --git a/src/cmake/darwin_bundle.py b/src/cmake/darwin_bundle.py new file mode 100644 index 0000000000..98791878b6 --- /dev/null +++ b/src/cmake/darwin_bundle.py @@ -0,0 +1,560 @@ +# +# Darwin Bundle +# +# Creates a standalone AliceVision bundle for use in Meshroom for Apple targets +# +# Usage: python3 darwin_bundle.py [-o ] ... +# + +import argparse +import shutil +import subprocess + +from functools import partial +from multiprocessing import Pool, cpu_count, Manager +from multiprocessing.managers import DictProxy +from pathlib import Path +from re import sub +from typing import Optional + +TARGET_RPATHS: list[str] = [ + "@executable_path", + "@executable_path/../lib", + "@loader_path", + "@loader_path/../lib", +] + + +# Returns a tuple of +# (1) The name of the Mach-O +# (2) The required dependencies +# (3) The available rpaths +def get_deps_and_rpaths(macho: Path) -> tuple[Path, list[Path], list[Path]]: + # If Framework, we need to check the inner Mach-O + oldPath = macho + if "framework" in macho.suffix: + macho = macho.joinpath(macho.stem) + deps: list[Path] = [] + rpaths: list[Path] = [] + depsCmd = subprocess.run( + ["otool", "-L", macho], universal_newlines=True, stdout=subprocess.PIPE + ) + depsLines = depsCmd.stdout.splitlines() + for line in depsLines[ + 2: + ]: # Skip the first line (just info) and the second line (always denotes itself) + deps.append( + Path(sub(r"\(compatibility version [^)]+\)", "", line.strip()).strip()) + ) # Remove the compatibility stuff + + rpathCmd = subprocess.run( + ["otool", "-l", macho], universal_newlines=True, stdout=subprocess.PIPE + ) + rpathLines = iter(rpathCmd.stdout.splitlines()) + for line in rpathLines: + if "LC_RPATH" in line.strip(): + _ = next(rpathLines, None) # This is "cmdsize XX" + rpaths.append( + Path( + sub( + r"\(offset \d+\)", + "", + next(rpathLines, "").strip().removeprefix("path "), + ).strip() + ) + ) # This is the rpath + + return ( + oldPath, + deps, + rpaths, + ) # Return the old path, so we still have .framework (not .framework/Mach-O) + + +# Extracts the architectures of a Mach-O file +def get_archs(path: Path) -> set[str]: + if ".framework" in path.suffix: + path = path.joinpath(path.stem) + result = subprocess.run( + ["lipo", "-archs", str(path)], + universal_newlines=True, + text=True, + stdout=subprocess.PIPE, + stderr=subprocess.DEVNULL, + ) + return set(result.stdout.strip().split()) + + +# Checks if lh and rh share the same architecture +def check_arch_match(lh: Path, rh: Path) -> bool: + lh_archs = get_archs(lh) + rh_archs = get_archs(rh) + # True if either is universal (more than one arch) + # or if they share at least one arch + if len(lh_archs) > 1 or len(rh_archs) > 1: + return True + return not lh_archs.isdisjoint(rh_archs) + + +# Returns a tuple of +# (1) Whether op was successful +# (2) Resolved paths +# (3) Tuple of reasons and unresolved paths (must be empty on success) +# (4) Tuple of conflicting paths per dependency +# (5) The resolved rpaths to pass through to subdependencies +def try_and_match_deps( + input: tuple[Path, list[Path], list[Path]], + globalCache, + additionalLookupPaths: Optional[set[Path]] = None, +) -> tuple[ + bool, + list[Path], + list[tuple[list[str], Path]], + list[tuple[list[Path], Path]], + set[Path], +]: + possibleReasonsForErr: list[str] = [] + + # If the input file has no extension, we assume it is an executable and resolve any @executable_paths + isExecutable = False + if input[0].suffix == "": + isExecutable = True + + # We properly create paths from @executable_path (if possible), @loader_path and relative paths + for i, rpath in enumerate(input[2]): + if "@executable_path" in rpath.parts: + if isExecutable: + input[2][i] = ( + input[0].parent.parent.joinpath(Path(*rpath.parts[1:])).resolve() + ) + else: + possibleReasonsForErr.append( + f"Could not resolve {rpath}! Input file is not an executable: {input[0]}." + ) + elif "@loader_path" in rpath.parts: + input[2][i] = input[0].parent.joinpath(Path(*rpath.parts[1:])).resolve() + elif not rpath.is_absolute(): + input[2][i] = input[0].parent.joinpath(rpath).resolve() + + # Remove duplicates + uniqueLookupPaths = set(input[2]) + # Add additonal lookup paths from parents + if additionalLookupPaths: + uniqueLookupPaths.update(additionalLookupPaths) + + # Try to resolve the required libraries with the available rpaths + resolvedPaths: list[Path] = [] + unresolvedPaths: list[tuple[list[str], Path]] = [] + conflicitingPaths: list[tuple[list[Path], Path]] = [] + for dep in input[1]: + isCached = False + for macho in globalCache.items(): + if dep.stem == macho[0].stem and get_archs(input[0]).issubset(macho[1]): + isCached = True + break + if isCached: + continue + # Filter system libraries + if "/usr/lib" in str(dep) or "/System" in str(dep): + continue + # If not @rpath prefixed, check if absolute + elif "@rpath" not in dep.parts and dep.is_absolute(): + if dep.exists(): + if check_arch_match(input[0], dep): + resolvedPaths.append(dep) + continue + else: + print( + f"[ WARN ] Resolved dependency exists, but the architectures do not match: Dependant: {input[0]}, resolved dependency: {dep}." + ) + continue + else: + unresolvedPaths.append( + (["Absolute path of dependency does not exist!"], dep) + ) + continue + elif "@rpath" in dep.parts: + depRpathStripped = Path(*dep.parts[1:]) + resolvedPathsInner: list[Path] = [] + for lookupPath in uniqueLookupPaths: + if lookupPath.joinpath(depRpathStripped).exists(): + if check_arch_match( + input[0], lookupPath.joinpath(depRpathStripped) + ): + resolvedPathsInner.append(lookupPath.joinpath(depRpathStripped)) + continue + else: + print( + f"[ WARN ] Resolved dependency exists, but the architectures do not match: Dependant: {input[0]}, resolved dependency: {lookupPath.joinpath(depRpathStripped)}." + ) + continue + if len(resolvedPathsInner) == 0: + reasons: list[str] = [ + f"No exctracted rpaths were able to resolve the dependency! Required by: {input[0]}." + ] + if not isExecutable: + reasons += possibleReasonsForErr + unresolvedPaths.append((reasons, dep)) + elif len(resolvedPathsInner) > 1: + conflicitingPaths.append((resolvedPathsInner, dep)) + resolvedPaths.append(resolvedPathsInner[0]) + else: + resolvedPaths += resolvedPathsInner + continue + elif not dep.is_absolute(): + resolvedPathsInner: list[Path] = [] + for lookupPath in uniqueLookupPaths: + if lookupPath.joinpath(dep).exists(): + if check_arch_match(input[0], lookupPath.joinpath(dep)): + resolvedPathsInner.append(lookupPath.joinpath(dep)) + continue + else: + print( + f"[ WARN ] Resolved dependency exists, but the architectures do not match: Dependant: {input[0]}, resolved dependency: {lookupPath.joinpath(dep)}." + ) + continue + if len(resolvedPathsInner) == 0: + unresolvedPaths.append( + ( + [ + "The relative path of the dependency did not resolve to an existing dependency!" + ], + dep, + ) + ) + elif len(resolvedPathsInner) > 1: + conflicitingPaths.append((resolvedPathsInner, dep)) + resolvedPaths.append(resolvedPathsInner[0]) + else: + resolvedPaths += resolvedPathsInner + continue + else: + unresolvedPaths.append( + (["Encountered unknown dependency path scheme!"], dep) + ) + + for resolvedPath in resolvedPaths: + globalCache[resolvedPath] = get_archs(resolvedPath) + return ( + len(unresolvedPaths) == 0, + resolvedPaths, + unresolvedPaths, + conflicitingPaths, + uniqueLookupPaths, + ) + + +def traverse_deps_and_resolve( + input: tuple[Path, list[Path], list[Path]], globalCache +) -> tuple[ + bool, list[Path], list[tuple[list[str], Path]], list[tuple[list[Path], Path]] +]: + # Initial state + successTop, resolvedTop, unresolvedTop, conflictingTop, lookupPathsCombined = ( + try_and_match_deps(input, globalCache) + ) + + # Use sets for quick membership tests + resolved_set = set(resolvedTop) + processed: set[Path] = set() # things we have already pulled deps for + queue: list[Path] = list(resolvedTop) # things we still need to process + + # BFS-style traversal through dependencies + while queue: + subDep = queue.pop(0) + if subDep in processed: + continue + processed.add(subDep) + + ok, resolved, unresolved, conflicting, uniqueLookupPaths = try_and_match_deps( + get_deps_and_rpaths(subDep), globalCache, lookupPathsCombined + ) + + # Update overall success flag + if not ok: + successTop = False + + # Update lookup paths + lookupPathsCombined.update(uniqueLookupPaths) + + # Add newly resolved dependencies: + for newDep in resolved: + if newDep not in resolved_set: + resolved_set.add(newDep) + resolvedTop.append(newDep) + queue.append(newDep) + + # Update unresolved and conflicting lists + unresolvedTop += unresolved + conflictingTop += conflicting + + return (successTop, resolvedTop, unresolvedTop, conflictingTop) + + +def copy_safe(src: Path, dst_dir: Path) -> Path: + dst = dst_dir / src.name + + if dst.exists(): + return dst + + if src.is_dir(): + _ = shutil.copytree( + src, + dst, + symlinks=True, + ) + elif src.is_symlink(): + # Resolve the target of the symlink + target = src.resolve() + # Copy the target file first + if not (dst_dir / target.name).exists(): + _ = shutil.copy2(target, dst_dir / target.name) + # Recreate the symlink in the target dir + dst.symlink_to(target.name) + else: + # Regular file + _ = shutil.copy2(src, dst) + + return dst + + +def fixup_macho_with_predefined_rpaths(macho: Path) -> bool: + success = True + + # Special case Framework + if macho.suffix == ".framework": + machoInner = macho.joinpath(macho.stem) + else: + machoInner = macho + + rpaths: set[Path] = set() + # Extract existing rpaths + rpathCmd = subprocess.run( + ["otool", "-l", machoInner], + universal_newlines=True, + stdout=subprocess.PIPE, + stderr=subprocess.DEVNULL, + ) + rpathLines = iter(rpathCmd.stdout.splitlines()) + for line in rpathLines: + if "LC_RPATH" in line.strip(): + _ = next(rpathLines, None) # This is "cmdsize XX" + rpaths.add( + Path( + sub( + r"\(offset \d+\)", + "", + next(rpathLines, "").strip().removeprefix("path "), + ).strip() + ) + ) # This is the rpath + + # Remove existing rpaths + for rpath in rpaths: + _ = subprocess.run( + ["install_name_tool", "-delete_rpath", rpath, machoInner], + universal_newlines=True, + stdout=subprocess.DEVNULL, + stderr=subprocess.DEVNULL, + ) + + # Add predefined set of target rpaths + for rpath in TARGET_RPATHS: + _ = subprocess.run( + ["install_name_tool", "-add_rpath", rpath, machoInner], + universal_newlines=True, + stdout=subprocess.DEVNULL, + stderr=subprocess.DEVNULL, + ) + + # Re-sign the binary with ad-hoc signature + # Otherwise it will be terminated by SIGABRT + _ = subprocess.run( + ["codesign", "--force", "--deep", "--sign", "-", macho], + universal_newlines=True, + stdout=subprocess.DEVNULL, + stderr=subprocess.DEVNULL, + ) + + return success + + +def entry(): + # Create the parser + parser = argparse.ArgumentParser( + description="Make a self-contained AliceVision bundle on Darwin" + ) + + # Optional output directory + _ = parser.add_argument( + "-o", + "--output", + type=Path, + default=Path.cwd().joinpath("bundle"), + help="Output directory", + ) + + # Positional arguments: arbitrary number of file paths + _ = parser.add_argument( + "input_files", type=Path, nargs="+", help="Input files to process" + ) + + # Parse args + args = parser.parse_args() + + # Access them + outputDir: Path = args.output + inputFiles: list[Path] = args.input_files + + # Log Info about input + print(f"[ INFO ] Placing bundle at: {outputDir}.") + print("[ INFO ] Attempting for files:") + for inputFile in inputFiles: + print("[ INFO ] \t" + str(inputFile)) + print("\n[ INFO ] ### Patience... ###\n") + + # Get all binaries + inputMachOBins: list[Path] = [] + for file in inputFiles: + fileCmd = subprocess.run( + ["file", file], universal_newlines=True, stdout=subprocess.PIPE + ) + if "Mach-O" in fileCmd.stdout: + inputMachOBins.append(file) + + # Get all libraries and Frameworks + inputMachOLibs: list[Path] = [] + for file in inputFiles: + if "dylib" in file.suffix: + fileCmd = subprocess.run( + ["file", file], universal_newlines=True, stdout=subprocess.PIPE + ) + if "Mach-O" in fileCmd.stdout: + inputMachOLibs.append(file) + elif "framework" in file.suffix: + fileCmd = subprocess.run( + ["file", file.joinpath(file.stem)], + universal_newlines=True, + stdout=subprocess.PIPE, + ) + if "Mach-O" in fileCmd.stdout: + inputMachOLibs.append(file) + else: + continue + + # Create concurrent interpreters + concurrentPool = Pool(cpu_count()) + + print("[ INFO ] (1 / 5) Looking up required dependencies and embedded rpaths...") + + # Extract required dependencies and rpaths + depsAndRpathsPerInput: list[tuple[Path, list[Path], list[Path]]] = ( + concurrentPool.map(get_deps_and_rpaths, inputMachOLibs + inputMachOBins) + ) + + print("[ INFO ] (2 / 5) Attempting to resolve all dependencies...") + + # Resolve per input and subdependency + manager = Manager() + globalCache: DictProxy[Path, set[str]] = manager.dict() + resolveFunc = partial(traverse_deps_and_resolve, globalCache=globalCache) + resolverResultPerInput: list[ + tuple[ + bool, + list[Path], + list[tuple[list[str], Path]], + list[tuple[list[Path], Path]], + ] + ] = concurrentPool.map(resolveFunc, depsAndRpathsPerInput) + + # Check for Results + isSuccessful = True + for result in resolverResultPerInput: + if not result[0]: + print("[ ERROR ] An error occured during the resolving process:") + for unresolved in result[2]: + print( + f"[ ERROR ] \tDependency: {str(unresolved[1])}, failed with: {str(unresolved[0])}" + ) + isSuccessful = False + if len(result[3]) != 0: + for conflictingDep in result[3]: + print( + f"[ WARN ] Multiple paths were found to resolve {str(conflictingDep[1])}:" + ) + for conflictingPath in conflictingDep[0]: + print(f"[ WARN ] \tFound suitable: {str(conflictingPath)}") + + if not isSuccessful: + print("[ ERROR ] Errors occured! Refusing to build bundle.") + exit(-1) + + print("[ INFO ] (3 / 5) Making bundle structure...") + + # Create the output directory + shutil.rmtree(outputDir, ignore_errors=True) + outputDir.mkdir(parents=True, exist_ok=True) + (outputDir / "lib").mkdir(parents=True, exist_ok=True) + (outputDir / "bin").mkdir(parents=True, exist_ok=True) + + # Copy input files + # Determine if they are dylibs/Frameworks or executables + inputBins: set[Path] = set() + inputLibs: set[Path] = set() + for inputFile in inputFiles: + if inputFile.suffix == "": + inputBins.add(inputFile) + else: + inputLibs.add(inputFile) + + # Copy bins + dstBin = partial(copy_safe, dst_dir=outputDir / "bin") + destBins = set(list(concurrentPool.map(dstBin, inputBins))) + + # Copy libs + dstLib = partial(copy_safe, dst_dir=outputDir / "lib") + destLibs = set(list(concurrentPool.map(dstLib, inputLibs))) + + print("[ INFO ] (4 / 5) Copying required files...") + + # Create set for files to copy + filesToCopy: set[Path] = set() + # Must handle special cases of Frameworks + for result in resolverResultPerInput: + for resolvedPath in result[1]: + if resolvedPath.suffix == "": + # We want to get the actual .framework folder. + # Means we call parent until the suffix is .framework + frameworkFolder = resolvedPath + while not frameworkFolder.suffix == ".framework": + frameworkFolder = frameworkFolder.parent + filesToCopy.add(frameworkFolder) + else: + filesToCopy.add(resolvedPath) + + # Copy all into new bundle + dstResolvedLibs = set(list(concurrentPool.map(dstLib, filesToCopy))) + + # Create destination list + allDstFiles = dstResolvedLibs.union(destLibs).union(destBins) + + print("[ INFO ] (5 / 5) Fixing up copied files...") + + # Fixup all destination files + successList: list[bool] = concurrentPool.map( + fixup_macho_with_predefined_rpaths, allDstFiles + ) + + # Done + if False in successList: + print("[ ERROR ] Errors occured during fixup. Bundle will be unfunctional.") + exit(-1) + else: + print( + f"\n[ INFO ] ### Successfully created self-contained bundle at {outputDir.resolve()}. ###" + ) + + +# Only launch when called directly +if __name__ == "__main__": + entry() diff --git a/src/dependencies/MeshSDFilter/CMakeLists.txt b/src/dependencies/MeshSDFilter/CMakeLists.txt index 16dc98c4ce..0b65d50681 100644 --- a/src/dependencies/MeshSDFilter/CMakeLists.txt +++ b/src/dependencies/MeshSDFilter/CMakeLists.txt @@ -29,7 +29,8 @@ endif() # Detect OpenMP environment set(OPENMP ON CACHE BOOL "OpenMP") -if (OPENMP) +# Only use OpenMP if enabled at the top level +if (OPENMP AND ALICEVISION_USE_OPENMP) find_package(OpenMP QUIET) if (OPENMP_FOUND) message("OpenMP found. OpenMP activated in release.") @@ -64,7 +65,7 @@ add_executable(MeshSDFilter MeshNormalFilter.h MeshSDFilter.cpp ) -target_link_libraries(MeshSDFilter MeshSDLibrary) +target_link_libraries(MeshSDFilter MeshSDLibrary ${ALICEVISION_OPENMP_CXX_TARGETS}) # Executable for denoising @@ -76,7 +77,7 @@ add_executable(MeshDenoiser MeshNormalDenoising.h MeshDenoiser.cpp ) -target_link_libraries(MeshDenoiser MeshSDLibrary) +target_link_libraries(MeshDenoiser MeshSDLibrary ${ALICEVISION_OPENMP_CXX_TARGETS}) if (OPENMP_FOUND) @@ -84,11 +85,7 @@ if (OPENMP_FOUND) # target_compile_definitions(MeshSDLibrary PUBLIC "$<$:USE_OPENMP>") # target_link_libraries(MeshSDLibrary "$<$:${OpenMP_CXX_FLAGS}>") - target_compile_options(MeshSDFilter PUBLIC "$<$:${OpenMP_CXX_FLAGS}>") target_compile_definitions(MeshSDFilter PUBLIC "$<$:USE_OPENMP>") - target_link_libraries(MeshSDFilter "$<$:${OpenMP_CXX_FLAGS}>") - target_compile_options(MeshDenoiser PUBLIC "$<$:${OpenMP_CXX_FLAGS}>") target_compile_definitions(MeshDenoiser PUBLIC "$<$:USE_OPENMP>") - target_link_libraries(MeshDenoiser "$<$:${OpenMP_CXX_FLAGS}>") endif() diff --git a/src/nonFree/sift/CMakeLists.txt b/src/nonFree/sift/CMakeLists.txt index c3b066fc11..776978fd59 100644 --- a/src/nonFree/sift/CMakeLists.txt +++ b/src/nonFree/sift/CMakeLists.txt @@ -44,7 +44,10 @@ set(FEATS_H set_source_files_properties(${FEATS} ${FEATS_H} PROPERTIES LANGUAGE C) set_source_files_properties(${FEATS_H} PROPERTIES HEADER_FILE_ONLY TRUE) -set(SIMD_DEFINITIONS "-DVL_DISABLE_AVX") +set(SIMD_DEFINITIONS) +if (NOT ALICEVISION_HAVE_AVX) + list(APPEND SIMD_DEFINITIONS "-DVL_DISABLE_AVX") +endif() if (NOT ALICEVISION_HAVE_SSE) list(APPEND SIMD_DEFINITIONS "-DVL_DISABLE_SSE2") endif() @@ -52,6 +55,8 @@ endif() alicevision_add_library(vlsift SOURCES ${FEATS} ${FEATS_H} + PRIVATE_LINKS + ${ALICEVISION_OPENMP_C_TARGETS} PUBLIC_DEFINITIONS ${SIMD_DEFINITIONS} PRIVATE_DEFINITIONS diff --git a/src/software/utils/CMakeLists.txt b/src/software/utils/CMakeLists.txt index c6d9ed9161..2e9527b0ba 100644 --- a/src/software/utils/CMakeLists.txt +++ b/src/software/utils/CMakeLists.txt @@ -615,7 +615,6 @@ if (ALICEVISION_BUILD_MVS) aliceVision_cmdline aliceVision_mvsData aliceVision_mvsUtils - aliceVision_depthMap aliceVision_sfmData aliceVision_sfmDataIO Boost::program_options