Skip to content

Commit 7b65d53

Browse files
authored
ML-KEM: Import AArch64 backend from mlkem-native (#2498)
The ML-KEM implementation in AWS-LC is imported from mlkem-native. mlkem-native comes in a "C-only" version, but also offers AArch64 and x86_64 backends for (a) arithmetic, and (b) FIPS-202. Currently, only the "C-only" version is imported into AWS-LC. This commit extends the mlkem-native->AWS-LC import to include the AArch64 arithmetic backend.
1 parent 0beb210 commit 7b65d53

22 files changed

+3890
-7
lines changed

crypto/fipsmodule/CMakeLists.txt

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -295,6 +295,32 @@ if((((ARCH STREQUAL "x86_64") AND NOT MY_ASSEMBLER_IS_TOO_OLD_FOR_512AVX) OR
295295

296296
endif()
297297

298+
299+
# mlkem-native assembly files can be compiled on Unix platforms for x86_64 and arm64 only.
300+
if((ARCH STREQUAL "aarch64") AND UNIX)
301+
302+
# Set the source directory for s2n-bignum assembly files
303+
set(MLKEM_NATIVE_DIR "${AWSLC_SOURCE_DIR}/crypto/fipsmodule/ml_kem")
304+
305+
set(MLKEM_NATIVE_AARCH64_ASM_SOURCES
306+
307+
${MLKEM_NATIVE_DIR}/mlkem/native/aarch64/src/intt.S
308+
${MLKEM_NATIVE_DIR}/mlkem/native/aarch64/src/ntt.S
309+
${MLKEM_NATIVE_DIR}/mlkem/native/aarch64/src/poly_mulcache_compute_asm.S
310+
${MLKEM_NATIVE_DIR}/mlkem/native/aarch64/src/poly_reduce_asm.S
311+
${MLKEM_NATIVE_DIR}/mlkem/native/aarch64/src/poly_tobytes_asm.S
312+
${MLKEM_NATIVE_DIR}/mlkem/native/aarch64/src/poly_tomont_asm.S
313+
${MLKEM_NATIVE_DIR}/mlkem/native/aarch64/src/polyvec_basemul_acc_montgomery_cached_asm_k2.S
314+
${MLKEM_NATIVE_DIR}/mlkem/native/aarch64/src/polyvec_basemul_acc_montgomery_cached_asm_k3.S
315+
${MLKEM_NATIVE_DIR}/mlkem/native/aarch64/src/polyvec_basemul_acc_montgomery_cached_asm_k4.S
316+
${MLKEM_NATIVE_DIR}/mlkem/native/aarch64/src/rej_uniform_asm.S
317+
)
318+
319+
list(APPEND BCM_ASM_SOURCES ${MLKEM_NATIVE_AARCH64_ASM_SOURCES})
320+
321+
endif()
322+
323+
298324
if(FIPS_DELOCATE)
299325
if(FIPS_SHARED)
300326
message(FATAL_ERROR "Can't set both delocate and shared mode for FIPS build")

crypto/fipsmodule/ml_kem/META.yml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
name: mlkem-native
22
source: pq-code-package/mlkem-native.git
3-
branch: v1.0.0
4-
commit: 048fc2a7a7b4ba0ad4c989c1ac82491aa94d5bfa
5-
imported-at: 2025-06-04T13:38:24+0100
3+
branch: main
4+
commit: a67a02ee3fa05713ae01572efce741caa63285e6
5+
imported-at: 2025-06-25T12:11:38+0100

crypto/fipsmodule/ml_kem/importer.sh

Lines changed: 33 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -72,10 +72,18 @@ popd
7272

7373
echo "Pull source code from remote repository..."
7474

75-
# Copy mlkem-native source tree -- C-only, no FIPS-202
75+
# Copy mlkem-native source tree -- C source
7676
mkdir $SRC
7777
cp $TMP/mlkem/src/* $SRC
7878

79+
# Copy AArch64 backend
80+
mkdir -p $SRC/native/aarch64/src
81+
# Backend API and specification assumed by mlkem-native frontend
82+
cp $TMP/mlkem/src/native/api.h $SRC/native
83+
# Copy AArch64 backend implementation
84+
cp $TMP/mlkem/src/native/aarch64/meta.h $SRC/native/aarch64
85+
cp $TMP/mlkem/src/native/aarch64/src/* $SRC/native/aarch64/src
86+
7987
# We use the custom `mlkem_native_config.h`, so can remove the default one
8088
rm $SRC/config.h
8189

@@ -84,11 +92,11 @@ cp $TMP/.clang-format $SRC
8492

8593
# Copy and statically simplify BCM file
8694
# The static simplification is not necessary, but improves readability
87-
# by removing directives related to native backends that are irrelevant
88-
# for the C-only import.
95+
# by removing directives related to the FIPS-202 backend and the x86_64
96+
# arithmetic backend that are not yet imported.
8997
unifdef -DMLK_CONFIG_FIPS202_CUSTOM_HEADER \
90-
-UMLK_CONFIG_USE_NATIVE_BACKEND_ARITH \
9198
-UMLK_CONFIG_USE_NATIVE_BACKEND_FIPS202 \
99+
-UMLK_SYS_X86_64 \
92100
$TMP/mlkem/mlkem_native.c \
93101
> $SRC/mlkem_native_bcm.c
94102

@@ -107,9 +115,30 @@ if [[ "$(uname)" == "Darwin" ]]; then
107115
else
108116
SED_I=(-i)
109117
fi
118+
110119
echo "Fixup include paths"
111120
sed "${SED_I[@]}" 's/#include "src\/\([^"]*\)"/#include "\1"/' $SRC/mlkem_native_bcm.c
112121

122+
echo "Fixup AArch64 assembly backend to use s2n-bignum macros"
123+
for file in $SRC/native/aarch64/src/*.S; do
124+
echo "Processing $file"
125+
tmp_file=$(mktemp)
126+
127+
# Flatten multiline preprocessor directives, then process with unifdef
128+
sed -e ':a' -e 'N' -e '$!ba' -e 's/\\\n/ /g' "$file" | \
129+
unifdef -DMLK_ARITH_BACKEND_AARCH64 -UMLK_CONFIG_MULTILEVEL_NO_SHARED -DMLK_CONFIG_MULTILEVEL_WITH_SHARED > "$tmp_file"
130+
mv "$tmp_file" "$file"
131+
132+
# Replace common.h include and assembly macros
133+
sed "${SED_I[@]}" 's/#include "\.\.\/\.\.\/\.\.\/common\.h"/#include "_internal_s2n_bignum.h"/' "$file"
134+
135+
func_name=$(grep -o '\.global MLK_ASM_NAMESPACE(\([^)]*\))' "$file" | sed 's/\.global MLK_ASM_NAMESPACE(\([^)]*\))/\1/')
136+
if [ -n "$func_name" ]; then
137+
sed "${SED_I[@]}" "s/\.global MLK_ASM_NAMESPACE($func_name)/ S2N_BN_SYM_VISIBILITY_DIRECTIVE(mlkem_$func_name)\n S2N_BN_SYM_PRIVACY_DIRECTIVE(mlkem_$func_name)/" "$file"
138+
sed "${SED_I[@]}" "s/MLK_ASM_FN_SYMBOL($func_name)/S2N_BN_SYMBOL(mlkem_$func_name):/" "$file"
139+
fi
140+
done
141+
113142
echo "Remove temporary artifacts ..."
114143
rm -rf $TMP
115144

crypto/fipsmodule/ml_kem/mlkem/mlkem_native_bcm.c

Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55

66
/*
77
* WARNING: This file is auto-generated from scripts/autogen
8+
* in the mlkem-native repository.
89
* Do not modify it directly.
910
*/
1011

@@ -68,6 +69,12 @@
6869
#include "verify.c"
6970

7071

72+
#if defined(MLK_CONFIG_USE_NATIVE_BACKEND_ARITH)
73+
#if defined(MLK_SYS_AARCH64)
74+
#include "native/aarch64/src/aarch64_zetas.c"
75+
#include "native/aarch64/src/rej_uniform_table.c"
76+
#endif
77+
#endif /* MLK_CONFIG_USE_NATIVE_BACKEND_ARITH */
7178

7279

7380
/* Macro #undef's
@@ -310,4 +317,47 @@
310317
#undef __loop__
311318

312319

320+
#if defined(MLK_CONFIG_USE_NATIVE_BACKEND_ARITH)
321+
/* mlkem/src/native/api.h */
322+
#undef MLK_INVNTT_BOUND
323+
#undef MLK_NATIVE_API_H
324+
#undef MLK_NTT_BOUND
325+
/* mlkem/src/native/meta.h */
326+
#undef MLK_NATIVE_META_H
327+
#if defined(MLK_SYS_AARCH64)
328+
/*
329+
* Undefine macros from native code (Arith, AArch64)
330+
*/
331+
/* mlkem/src/native/aarch64/meta.h */
332+
#undef MLK_ARITH_BACKEND_AARCH64
333+
#undef MLK_NATIVE_AARCH64_META_H
334+
#undef MLK_USE_NATIVE_INTT
335+
#undef MLK_USE_NATIVE_NTT
336+
#undef MLK_USE_NATIVE_POLYVEC_BASEMUL_ACC_MONTGOMERY_CACHED
337+
#undef MLK_USE_NATIVE_POLY_MULCACHE_COMPUTE
338+
#undef MLK_USE_NATIVE_POLY_REDUCE
339+
#undef MLK_USE_NATIVE_POLY_TOBYTES
340+
#undef MLK_USE_NATIVE_POLY_TOMONT
341+
#undef MLK_USE_NATIVE_REJ_UNIFORM
342+
/* mlkem/src/native/aarch64/src/arith_native_aarch64.h */
343+
#undef MLK_NATIVE_AARCH64_SRC_ARITH_NATIVE_AARCH64_H
344+
#undef mlk_aarch64_invntt_zetas_layer12345
345+
#undef mlk_aarch64_invntt_zetas_layer67
346+
#undef mlk_aarch64_ntt_zetas_layer12345
347+
#undef mlk_aarch64_ntt_zetas_layer67
348+
#undef mlk_aarch64_zetas_mulcache_native
349+
#undef mlk_aarch64_zetas_mulcache_twisted_native
350+
#undef mlk_intt_asm
351+
#undef mlk_ntt_asm
352+
#undef mlk_poly_mulcache_compute_asm
353+
#undef mlk_poly_reduce_asm
354+
#undef mlk_poly_tobytes_asm
355+
#undef mlk_poly_tomont_asm
356+
#undef mlk_polyvec_basemul_acc_montgomery_cached_asm_k2
357+
#undef mlk_polyvec_basemul_acc_montgomery_cached_asm_k3
358+
#undef mlk_polyvec_basemul_acc_montgomery_cached_asm_k4
359+
#undef mlk_rej_uniform_asm
360+
#undef mlk_rej_uniform_table
361+
#endif /* MLK_SYS_AARCH64 */
362+
#endif /* MLK_CONFIG_USE_NATIVE_BACKEND_ARITH */
313363
#endif /* !MLK_CONFIG_MONOBUILD_KEEP_SHARED_HEADERS */
Lines changed: 101 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,101 @@
1+
/*
2+
* Copyright (c) The mlkem-native project authors
3+
* SPDX-License-Identifier: Apache-2.0 OR ISC OR MIT
4+
*/
5+
6+
#ifndef MLK_NATIVE_AARCH64_META_H
7+
#define MLK_NATIVE_AARCH64_META_H
8+
9+
/* Set of primitives that this backend replaces */
10+
#define MLK_USE_NATIVE_NTT
11+
#define MLK_USE_NATIVE_INTT
12+
#define MLK_USE_NATIVE_POLY_REDUCE
13+
#define MLK_USE_NATIVE_POLY_TOMONT
14+
#define MLK_USE_NATIVE_POLY_MULCACHE_COMPUTE
15+
#define MLK_USE_NATIVE_POLYVEC_BASEMUL_ACC_MONTGOMERY_CACHED
16+
#define MLK_USE_NATIVE_POLY_TOBYTES
17+
#define MLK_USE_NATIVE_REJ_UNIFORM
18+
19+
/* Identifier for this backend so that source and assembly files
20+
* in the build can be appropriately guarded. */
21+
#define MLK_ARITH_BACKEND_AARCH64
22+
23+
24+
#if !defined(__ASSEMBLER__)
25+
#include "src/arith_native_aarch64.h"
26+
27+
static MLK_INLINE void mlk_ntt_native(int16_t data[MLKEM_N])
28+
{
29+
mlk_ntt_asm(data, mlk_aarch64_ntt_zetas_layer12345,
30+
mlk_aarch64_ntt_zetas_layer67);
31+
}
32+
33+
static MLK_INLINE void mlk_intt_native(int16_t data[MLKEM_N])
34+
{
35+
mlk_intt_asm(data, mlk_aarch64_invntt_zetas_layer12345,
36+
mlk_aarch64_invntt_zetas_layer67);
37+
}
38+
39+
static MLK_INLINE void mlk_poly_reduce_native(int16_t data[MLKEM_N])
40+
{
41+
mlk_poly_reduce_asm(data);
42+
}
43+
44+
static MLK_INLINE void mlk_poly_tomont_native(int16_t data[MLKEM_N])
45+
{
46+
mlk_poly_tomont_asm(data);
47+
}
48+
49+
static MLK_INLINE void mlk_poly_mulcache_compute_native(
50+
int16_t x[MLKEM_N / 2], const int16_t y[MLKEM_N])
51+
{
52+
mlk_poly_mulcache_compute_asm(x, y, mlk_aarch64_zetas_mulcache_native,
53+
mlk_aarch64_zetas_mulcache_twisted_native);
54+
}
55+
56+
#if defined(MLK_CONFIG_MULTILEVEL_WITH_SHARED) || MLKEM_K == 2
57+
static MLK_INLINE void mlk_polyvec_basemul_acc_montgomery_cached_k2_native(
58+
int16_t r[MLKEM_N], const int16_t a[2 * MLKEM_N],
59+
const int16_t b[2 * MLKEM_N], const int16_t b_cache[2 * (MLKEM_N / 2)])
60+
{
61+
mlk_polyvec_basemul_acc_montgomery_cached_asm_k2(r, a, b, b_cache);
62+
}
63+
#endif /* MLK_CONFIG_MULTILEVEL_WITH_SHARED || MLKEM_K == 2 */
64+
65+
#if defined(MLK_CONFIG_MULTILEVEL_WITH_SHARED) || MLKEM_K == 3
66+
static MLK_INLINE void mlk_polyvec_basemul_acc_montgomery_cached_k3_native(
67+
int16_t r[MLKEM_N], const int16_t a[3 * MLKEM_N],
68+
const int16_t b[3 * MLKEM_N], const int16_t b_cache[3 * (MLKEM_N / 2)])
69+
{
70+
mlk_polyvec_basemul_acc_montgomery_cached_asm_k3(r, a, b, b_cache);
71+
}
72+
#endif /* MLK_CONFIG_MULTILEVEL_WITH_SHARED || MLKEM_K == 3 */
73+
74+
#if defined(MLK_CONFIG_MULTILEVEL_WITH_SHARED) || MLKEM_K == 4
75+
static MLK_INLINE void mlk_polyvec_basemul_acc_montgomery_cached_k4_native(
76+
int16_t r[MLKEM_N], const int16_t a[4 * MLKEM_N],
77+
const int16_t b[4 * MLKEM_N], const int16_t b_cache[4 * (MLKEM_N / 2)])
78+
{
79+
mlk_polyvec_basemul_acc_montgomery_cached_asm_k4(r, a, b, b_cache);
80+
}
81+
#endif /* MLK_CONFIG_MULTILEVEL_WITH_SHARED || MLKEM_K == 4 */
82+
83+
static MLK_INLINE void mlk_poly_tobytes_native(uint8_t r[MLKEM_POLYBYTES],
84+
const int16_t a[MLKEM_N])
85+
{
86+
mlk_poly_tobytes_asm(r, a);
87+
}
88+
89+
static MLK_INLINE int mlk_rej_uniform_native(int16_t *r, unsigned len,
90+
const uint8_t *buf,
91+
unsigned buflen)
92+
{
93+
if (len != MLKEM_N || buflen % 24 != 0)
94+
{
95+
return -1;
96+
}
97+
return (int)mlk_rej_uniform_asm(r, buf, buflen, mlk_rej_uniform_table);
98+
}
99+
#endif /* !__ASSEMBLER__ */
100+
101+
#endif /* !MLK_NATIVE_AARCH64_META_H */

0 commit comments

Comments
 (0)