From 3e608b5495b7b7746bc4f31e5092bdb71e1c5ec9 Mon Sep 17 00:00:00 2001 From: Mergen Nachin Date: Mon, 8 Sep 2025 11:29:07 -0400 Subject: [PATCH] Fix aarch64/arm64 Linux wheel build failure due to missing libatomic linking Problem The tokenizers library fails to build on aarch64 Linux systems during CI with: /opt/rh/gcc-toolset-13/root/usr/libexec/gcc/aarch64-redhat-linux/13/ld: cannot find -latomic: No such file or directory Root Cause - On aarch64, certain atomic operations require explicit linking with libatomic - The sentencepiece dependency detects libatomic (Found atomic: /usr/lib64/libatomic.so.1) but incorrectly adds just the string "atomic" to link flags instead of the actual library path - This causes the linker to fail when building the Python extension Solution Added proper libatomic detection and linking for aarch64/arm64 systems in two places: 1. For the tokenizers static library 2. For the pytorch_tokenizers_cpp Python extension module The fix uses CMake's find_library to locate libatomic and explicitly links it when building on aarch64/arm64 architectures. --- CMakeLists.txt | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/CMakeLists.txt b/CMakeLists.txt index 5fec1ea..acae029 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -94,6 +94,15 @@ target_include_directories( ) target_link_libraries(tokenizers PUBLIC sentencepiece-static re2::re2) +# Link with atomic library on aarch64/arm64 systems +# Some aarch64 systems require explicit linking with libatomic for certain atomic operations +if(CMAKE_SYSTEM_PROCESSOR MATCHES "aarch64|arm64") + find_library(ATOMIC_LIB NAMES atomic libatomic.so libatomic.so.1) + if(ATOMIC_LIB) + target_link_libraries(tokenizers PUBLIC ${ATOMIC_LIB}) + endif() +endif() + if(SUPPORT_REGEX_LOOKAHEAD) set(PCRE2_STATIC_PIC ON) set(PCRE2_BUILD_PCRE2_8 ON) @@ -160,6 +169,18 @@ if(TOKENIZERS_BUILD_PYTHON) # Link with the tokenizers library target_link_libraries(pytorch_tokenizers_cpp PRIVATE tokenizers) + # Link with atomic library on aarch64/arm64 systems + # Some aarch64 systems require explicit linking with libatomic for certain atomic operations + if(CMAKE_SYSTEM_PROCESSOR MATCHES "aarch64|arm64") + find_library(ATOMIC_LIB NAMES atomic libatomic.so libatomic.so.1) + if(ATOMIC_LIB) + target_link_libraries(pytorch_tokenizers_cpp PRIVATE ${ATOMIC_LIB}) + message(STATUS "Found libatomic: ${ATOMIC_LIB}") + else() + message(WARNING "libatomic not found on aarch64 - build may fail") + endif() + endif() + # Set properties for the Python extension target_compile_definitions(pytorch_tokenizers_cpp PRIVATE VERSION_INFO=${PROJECT_VERSION})