From bd3f1a2931bd67da4f6fdf943379ab32690f6db1 Mon Sep 17 00:00:00 2001 From: Erin Chapman <13040499+m271828@users.noreply.github.com> Date: Fri, 1 Aug 2025 14:48:57 -0700 Subject: [PATCH 01/34] Remove dynamic dispatching, update names to match BoringSSL --- crypto/fipsmodule/bn/asm/x86_64-mont5.pl | 86 +++++++----------------- 1 file changed, 26 insertions(+), 60 deletions(-) diff --git a/crypto/fipsmodule/bn/asm/x86_64-mont5.pl b/crypto/fipsmodule/bn/asm/x86_64-mont5.pl index c108f3a13b..4508992e68 100755 --- a/crypto/fipsmodule/bn/asm/x86_64-mont5.pl +++ b/crypto/fipsmodule/bn/asm/x86_64-mont5.pl @@ -54,7 +54,7 @@ $addx = 1; for (@ARGV) { $addx = 0 if (/-DMY_ASSEMBLER_IS_TOO_OLD_FOR_512AVX/); } -# int bn_mul_mont_gather5( +# int bn_mul_mont_gather5_nohw( $rp="%rdi"; # BN_ULONG *rp, $ap="%rsi"; # const BN_ULONG *ap, $bp="%rdx"; # const BN_ULONG *bp, @@ -76,31 +76,15 @@ $code=<<___; .text -.extern OPENSSL_ia32cap_P - -.globl bn_mul_mont_gather5 -.type bn_mul_mont_gather5,\@function,6 +.globl bn_mul_mont_gather5_nohw +.type bn_mul_mont_gather5_nohw,\@function,6 .align 64 -bn_mul_mont_gather5: +bn_mul_mont_gather5_nohw: .cfi_startproc _CET_ENDBR mov ${num}d,${num}d mov %rsp,%rax .cfi_def_cfa_register %rax - test \$7,${num}d - jnz .Lmul_enter -___ -$code.=<<___ if ($addx); -#ifndef MY_ASSEMBLER_IS_TOO_OLD_FOR_512AVX - leaq OPENSSL_ia32cap_P(%rip),%r11 - mov 8(%r11),%r11d -#endif -___ -$code.=<<___; - jmp .Lmul4x_enter - -.align 16 -.Lmul_enter: movd `($win64?56:8)`(%rsp),%xmm5 # load 7th argument push %rbx .cfi_push %rbx @@ -460,29 +444,21 @@ .Lmul_epilogue: ret .cfi_endproc -.size bn_mul_mont_gather5,.-bn_mul_mont_gather5 +.size bn_mul_mont_gather5_nohw,.-bn_mul_mont_gather5_nohw ___ {{{ my @A=("%r10","%r11"); my @N=("%r13","%rdi"); $code.=<<___; +.global bn_mul4x_mont_gather5 .type bn_mul4x_mont_gather5,\@function,6 .align 32 bn_mul4x_mont_gather5: .cfi_startproc + _CET_ENDBR .byte 0x67 mov %rsp,%rax .cfi_def_cfa_register %rax -.Lmul4x_enter: -___ -$code.=<<___ if ($addx); -#ifndef MY_ASSEMBLER_IS_TOO_OLD_FOR_512AVX - and \$0x80108,%r11d - cmp \$0x80108,%r11d # check for AD*X+BMI2+BMI1 - je .Lmulx4x_enter -#endif -___ -$code.=<<___; push %rbx .cfi_push %rbx push %rbp @@ -1087,7 +1063,7 @@ }}} {{{ ###################################################################### -# void bn_power5( +# void bn_power5_nohw( my $rptr="%rdi"; # BN_ULONG *rptr, my $aptr="%rsi"; # const BN_ULONG *aptr, my $bptr="%rdx"; # const BN_ULONG *table, @@ -1102,25 +1078,14 @@ my ($a0,$a1,$ai)=("%r14","%r15","%rbx"); $code.=<<___; -.globl bn_power5 -.type bn_power5,\@function,6 +.globl bn_power5_nohw +.type bn_power5_nohw,\@function,6 .align 32 -bn_power5: +bn_power5_nohw: .cfi_startproc _CET_ENDBR mov %rsp,%rax .cfi_def_cfa_register %rax -___ -$code.=<<___ if ($addx); -#ifndef MY_ASSEMBLER_IS_TOO_OLD_FOR_512AVX - leaq OPENSSL_ia32cap_P(%rip),%r11 - mov 8(%r11),%r11d - and \$0x80108,%r11d - cmp \$0x80108,%r11d # check for AD*X+BMI2+BMI1 - je .Lpowerx5_enter -#endif -___ -$code.=<<___; push %rbx .cfi_push %rbx push %rbp @@ -1243,7 +1208,7 @@ .Lpower5_epilogue: ret .cfi_endproc -.size bn_power5,.-bn_power5 +.size bn_power5_nohw,.-bn_power5_nohw .globl bn_sqr8x_internal .hidden bn_sqr8x_internal @@ -2118,14 +2083,14 @@ my $bp="%rdx"; # restore original value $code.=<<___; -#ifndef MY_ASSEMBLER_IS_TOO_OLD_FOR_512AVX +.global bn_mulx4x_mont_gather5 .type bn_mulx4x_mont_gather5,\@function,6 .align 32 bn_mulx4x_mont_gather5: .cfi_startproc + _CET_ENDBR mov %rsp,%rax .cfi_def_cfa_register %rax -.Lmulx4x_enter: push %rbx .cfi_push %rbx push %rbp @@ -2594,7 +2559,7 @@ ___ } { ###################################################################### -# void bn_power5( +# void bn_powerx5( my $rptr="%rdi"; # BN_ULONG *rptr, my $aptr="%rsi"; # const BN_ULONG *aptr, my $bptr="%rdx"; # const BN_ULONG *table, @@ -2609,13 +2574,14 @@ my ($a0,$a1,$ai)=("%r14","%r15","%rbx"); $code.=<<___; +.global bn_powerx5 .type bn_powerx5,\@function,6 .align 32 bn_powerx5: .cfi_startproc + _CET_ENDBR mov %rsp,%rax .cfi_def_cfa_register %rax -.Lpowerx5_enter: push %rbx .cfi_push %rbx push %rbp @@ -3717,17 +3683,17 @@ .section .pdata .align 4 - .rva .LSEH_begin_bn_mul_mont_gather5 - .rva .LSEH_end_bn_mul_mont_gather5 - .rva .LSEH_info_bn_mul_mont_gather5 + .rva .LSEH_begin_bn_mul_mont_gather5_nohw + .rva .LSEH_end_bn_mul_mont_gather5_nohw + .rva .LSEH_info_bn_mul_mont_gather5_nohw .rva .LSEH_begin_bn_mul4x_mont_gather5 .rva .LSEH_end_bn_mul4x_mont_gather5 .rva .LSEH_info_bn_mul4x_mont_gather5 - .rva .LSEH_begin_bn_power5 - .rva .LSEH_end_bn_power5 - .rva .LSEH_info_bn_power5 + .rva .LSEH_begin_bn_power5_nohw + .rva .LSEH_end_bn_power5_nohw + .rva .LSEH_info_bn_power5_nohw ___ $code.=<<___ if ($addx); #ifndef MY_ASSEMBLER_IS_TOO_OLD_FOR_512AVX @@ -3747,17 +3713,17 @@ .section .xdata .align 8 -.LSEH_info_bn_mul_mont_gather5: +.LSEH_info_bn_mul_mont_gather5_nohw: .byte 9,0,0,0 .rva mul_handler .rva .Lmul_body,.Lmul_body,.Lmul_epilogue # HandlerData[] .align 8 -.LSEH_info_bn_mul4x_mont_gather5: +.LSEH_info_bn_mul4x_mont_gather5_nohw: .byte 9,0,0,0 .rva mul_handler .rva .Lmul4x_prologue,.Lmul4x_body,.Lmul4x_epilogue # HandlerData[] .align 8 -.LSEH_info_bn_power5: +.LSEH_info_bn_power5_nohw: .byte 9,0,0,0 .rva mul_handler .rva .Lpower5_prologue,.Lpower5_body,.Lpower5_epilogue # HandlerData[] From 28c2368ff276eafe72c549d8fba6ac798d536da7 Mon Sep 17 00:00:00 2001 From: Erin Chapman <13040499+m271828@users.noreply.github.com> Date: Fri, 1 Aug 2025 16:58:14 -0700 Subject: [PATCH 02/34] Move dispatching into C --- crypto/fipsmodule/bn/bn_test.cc | 28 +++++++++++--- crypto/fipsmodule/bn/exponentiation.c | 47 ++++++++++++++++++++++- crypto/fipsmodule/bn/internal.h | 55 ++++++++++++++++----------- 3 files changed, 101 insertions(+), 29 deletions(-) diff --git a/crypto/fipsmodule/bn/bn_test.cc b/crypto/fipsmodule/bn/bn_test.cc index ea6ba285f8..20fe592822 100644 --- a/crypto/fipsmodule/bn/bn_test.cc +++ b/crypto/fipsmodule/bn/bn_test.cc @@ -3029,17 +3029,35 @@ TEST_F(BNTest, BNMulMont5ABI) { } CHECK_ABI(bn_gather5, r.data(), words, table.data(), 13); - CHECK_ABI(bn_mul_mont_gather5, r.data(), r.data(), table.data(), m->d, + if (bn_mulx4x_mont_gather5_capable(words)) { + CHECK_ABI(bn_mulx4x_mont_gather5, r.data(), r.data(), table.data(), m->d, + mont->n0, words, 13); + CHECK_ABI(bn_mulx4x_mont_gather5, r.data(), a.data(), table.data(), m->d, + mont->n0, words, 13); + } + if (bn_mul4x_mont_gather5_capable(words)) { + CHECK_ABI(bn_mul4x_mont_gather5, r.data(), r.data(), table.data(), m->d, + mont->n0, words, 13); + CHECK_ABI(bn_mul4x_mont_gather5, r.data(), a.data(), table.data(), m->d, + mont->n0, words, 13); + } + CHECK_ABI(bn_mul_mont_gather5_nohw, r.data(), r.data(), table.data(), m->d, mont->n0, words, 13); - CHECK_ABI(bn_mul_mont_gather5, r.data(), a.data(), table.data(), m->d, + CHECK_ABI(bn_mul_mont_gather5_nohw, r.data(), a.data(), table.data(), m->d, mont->n0, words, 13); - if (words % 8 == 0) { - CHECK_ABI(bn_power5, r.data(), r.data(), table.data(), m->d, mont->n0, + if (bn_powerx5_capable(words)) { + CHECK_ABI(bn_powerx5, r.data(), r.data(), table.data(), m->d, mont->n0, words, 13); - CHECK_ABI(bn_power5, r.data(), a.data(), table.data(), m->d, mont->n0, + CHECK_ABI(bn_powerx5, r.data(), a.data(), table.data(), m->d, mont->n0, words, 13); } + if (bn_power5_capable(words)) { + CHECK_ABI(bn_power5_nohw, r.data(), r.data(), table.data(), m->d, + mont->n0, words, 13); + CHECK_ABI(bn_power5_nohw, r.data(), a.data(), table.data(), m->d, + mont->n0, words, 13); + } } } #endif // OPENSSL_BN_ASM_MONT5 && SUPPORTS_ABI_TEST diff --git a/crypto/fipsmodule/bn/exponentiation.c b/crypto/fipsmodule/bn/exponentiation.c index 8713715b03..ae4ba1011d 100644 --- a/crypto/fipsmodule/bn/exponentiation.c +++ b/crypto/fipsmodule/bn/exponentiation.c @@ -163,6 +163,51 @@ static void exponentiation_s2n_bignum_copy_from_prebuf(BN_ULONG *dest, int width #endif } +#if defined(OPENSSL_BN_ASM_MONT5) + +// bn_mul_mont_gather5 multiples loads index |power| of |table|, multiplies it +// by |ap| modulo |np|, and stores the result in |rp|. The values are |num| +// words long and represented in Montgomery form. |n0| is a pointer to the +// corresponding field in |BN_MONT_CTX|. |table| must be aligned to at least +// 16 bytes. |power| must be less than 32 and is treated as secret. +// +// WARNING: This function implements Almost Montgomery Multiplication from +// https://eprint.iacr.org/2011/239. The inputs do not need to be fully reduced. +// However, even if they are fully reduced, the output may not be. +static void bn_mul_mont_gather5( + BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *table, const BN_ULONG *np, + const BN_ULONG *n0, int num, int power) { + if (bn_mulx4x_mont_gather5_capable(num)) { + bn_mulx4x_mont_gather5(rp, ap, table, np, n0, num, power); + } else if (bn_mul4x_mont_gather5_capable(num)) { + bn_mul4x_mont_gather5(rp, ap, table, np, n0, num, power); + } else { + bn_mul_mont_gather5_nohw(rp, ap, table, np, n0, num, power); + } +} + +// bn_power5 squares |ap| five times and multiplies it by the value stored at +// index |power| of |table|, modulo |np|. It stores the result in |rp|. The +// values are |num| words long and represented in Montgomery form. |n0| is a +// pointer to the corresponding field in |BN_MONT_CTX|. |num| must be divisible +// by 8. |power| must be less than 32 and is treated as secret. +// +// WARNING: This function implements Almost Montgomery Multiplication from +// https://eprint.iacr.org/2011/239. The inputs do not need to be fully reduced. +// However, even if they are fully reduced, the output may not be. +static void bn_power5(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *table, + const BN_ULONG *np, const BN_ULONG *n0, int num, + int power) +{ + assert(bn_power5_capable(num)); + if (bn_powerx5_capable(num)) { + bn_powerx5(rp, ap, table, np, n0, num, power); + } else { + bn_power5_nohw(rp, ap, table, np, n0, num, power); + } +} + +#endif // defined(OPENSSL_BN_ASM_MONT5) int BN_exp(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, BN_CTX *ctx) { int i, bits, ret = 0; @@ -1122,7 +1167,7 @@ int BN_mod_exp_mont_consttime(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p, // Scan the exponent one window at a time starting from the most // significant bits. - if (top & 7) { + if (!bn_power5_capable(top)) { while (bits >= 0) { for (wvalue = 0, i = 0; i < 5; i++, bits--) { wvalue = (wvalue << 1) + BN_is_bit_set(p, bits); diff --git a/crypto/fipsmodule/bn/internal.h b/crypto/fipsmodule/bn/internal.h index c5ae9364f2..1a2a477752 100644 --- a/crypto/fipsmodule/bn/internal.h +++ b/crypto/fipsmodule/bn/internal.h @@ -445,18 +445,24 @@ int bn_sqr8x_mont(BN_ULONG *rp, const BN_ULONG *ap, BN_ULONG mulx_adx_capable, #if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_X86_64) #define OPENSSL_BN_ASM_MONT5 -// bn_mul_mont_gather5 multiples loads index |power| of |table|, multiplies it -// by |ap| modulo |np|, and stores the result in |rp|. The values are |num| -// words long and represented in Montgomery form. |n0| is a pointer to the -// corresponding field in |BN_MONT_CTX|. |table| must be aligned to at least -// 16 bytes. |power| must be less than 32 and is treated as secret. -// -// WARNING: This function implements Almost Montgomery Multiplication from -// https://eprint.iacr.org/2011/239. The inputs do not need to be fully reduced. -// However, even if they are fully reduced, the output may not be. -void bn_mul_mont_gather5(BN_ULONG *rp, const BN_ULONG *ap, - const BN_ULONG *table, const BN_ULONG *np, - const BN_ULONG *n0, int num, int power); + OPENSSL_INLINE int bn_mul4x_mont_gather5_capable(int num) { + return (num & 7) == 0; + } + void bn_mul4x_mont_gather5(BN_ULONG *rp, const BN_ULONG *ap, + const BN_ULONG *table, const BN_ULONG *np, + const BN_ULONG *n0, int num, int power); + + OPENSSL_INLINE int bn_mulx4x_mont_gather5_capable(int num) { + return bn_mul4x_mont_gather5_capable(num) && CRYPTO_is_ADX_capable() && + CRYPTO_is_BMI1_capable() && CRYPTO_is_BMI2_capable(); + } + void bn_mulx4x_mont_gather5(BN_ULONG *rp, const BN_ULONG *ap, + const BN_ULONG *table, const BN_ULONG *np, + const BN_ULONG *n0, int num, int power); + + void bn_mul_mont_gather5_nohw(BN_ULONG *rp, const BN_ULONG *ap, + const BN_ULONG *table, const BN_ULONG *np, + const BN_ULONG *n0, int num, int power); // bn_scatter5 stores |inp| to index |power| of |table|. |inp| and each entry of // |table| are |num| words long. |power| must be less than 32 and is treated as @@ -470,17 +476,20 @@ void bn_scatter5(const BN_ULONG *inp, size_t num, BN_ULONG *table, // is treated as secret. |table| must be aligned to at least 16 bytes. void bn_gather5(BN_ULONG *out, size_t num, const BN_ULONG *table, size_t power); -// bn_power5 squares |ap| five times and multiplies it by the value stored at -// index |power| of |table|, modulo |np|. It stores the result in |rp|. The -// values are |num| words long and represented in Montgomery form. |n0| is a -// pointer to the corresponding field in |BN_MONT_CTX|. |num| must be divisible -// by 8. |power| must be less than 32 and is treated as secret. -// -// WARNING: This function implements Almost Montgomery Multiplication from -// https://eprint.iacr.org/2011/239. The inputs do not need to be fully reduced. -// However, even if they are fully reduced, the output may not be. -void bn_power5(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *table, - const BN_ULONG *np, const BN_ULONG *n0, int num, int power); + void bn_power5_nohw(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *table, + const BN_ULONG *np, const BN_ULONG *n0, int num, int power); + + OPENSSL_INLINE int bn_power5_capable(int num) { + return (num & 7) == 0; + } + + OPENSSL_INLINE int bn_powerx5_capable(int num) { + return bn_power5_capable(num) && CRYPTO_is_ADX_capable() && + CRYPTO_is_BMI1_capable() && CRYPTO_is_BMI2_capable(); + } + void bn_powerx5(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *table, + const BN_ULONG *np, const BN_ULONG *n0, int num, int power); + #endif // !OPENSSL_NO_ASM && OPENSSL_X86_64 uint64_t bn_mont_n0(const BIGNUM *n); From d6720b2f5db1e3b63b66a88ac14dec4e89848896 Mon Sep 17 00:00:00 2001 From: Erin Chapman <13040499+m271828@users.noreply.github.com> Date: Fri, 1 Aug 2025 17:09:05 -0700 Subject: [PATCH 03/34] Fix inconsistent name --- crypto/fipsmodule/bn/asm/x86_64-mont5.pl | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/crypto/fipsmodule/bn/asm/x86_64-mont5.pl b/crypto/fipsmodule/bn/asm/x86_64-mont5.pl index 4508992e68..7dd8d89db2 100755 --- a/crypto/fipsmodule/bn/asm/x86_64-mont5.pl +++ b/crypto/fipsmodule/bn/asm/x86_64-mont5.pl @@ -1061,7 +1061,7 @@ .size mul4x_internal,.-mul4x_internal ___ }}} - {{{ +{{{ ###################################################################### # void bn_power5_nohw( my $rptr="%rdi"; # BN_ULONG *rptr, @@ -2559,7 +2559,7 @@ ___ } { ###################################################################### -# void bn_powerx5( +# void bn_power5( my $rptr="%rdi"; # BN_ULONG *rptr, my $aptr="%rsi"; # const BN_ULONG *aptr, my $bptr="%rdx"; # const BN_ULONG *table, @@ -2574,10 +2574,10 @@ my ($a0,$a1,$ai)=("%r14","%r15","%rbx"); $code.=<<___; -.global bn_powerx5 -.type bn_powerx5,\@function,6 +.global bn_power5 +.type bn_power5,\@function,6 .align 32 -bn_powerx5: +bn_power5: .cfi_startproc _CET_ENDBR mov %rsp,%rax @@ -2708,7 +2708,7 @@ .Lpowerx5_epilogue: ret .cfi_endproc -.size bn_powerx5,.-bn_powerx5 +.size bn_power5,.-bn_power5 .globl bn_sqrx8x_internal .hidden bn_sqrx8x_internal @@ -3701,9 +3701,9 @@ .rva .LSEH_end_bn_mulx4x_mont_gather5 .rva .LSEH_info_bn_mulx4x_mont_gather5 - .rva .LSEH_begin_bn_powerx5 - .rva .LSEH_end_bn_powerx5 - .rva .LSEH_info_bn_powerx5 + .rva .LSEH_begin_bn_power5 + .rva .LSEH_end_bn_power5 + .rva .LSEH_info_bn_power5 #endif ___ $code.=<<___; @@ -3736,7 +3736,7 @@ .rva mul_handler .rva .Lmulx4x_prologue,.Lmulx4x_body,.Lmulx4x_epilogue # HandlerData[] .align 8 -.LSEH_info_bn_powerx5: +.LSEH_info_bn_power5: .byte 9,0,0,0 .rva mul_handler .rva .Lpowerx5_prologue,.Lpowerx5_body,.Lpowerx5_epilogue # HandlerData[] From 63c621b6475c82c047a6bac5be025d3eca056e4b Mon Sep 17 00:00:00 2001 From: Erin Chapman <13040499+m271828@users.noreply.github.com> Date: Fri, 1 Aug 2025 17:23:53 -0700 Subject: [PATCH 04/34] Accidentally removed guard --- crypto/fipsmodule/bn/asm/x86_64-mont5.pl | 1 + 1 file changed, 1 insertion(+) diff --git a/crypto/fipsmodule/bn/asm/x86_64-mont5.pl b/crypto/fipsmodule/bn/asm/x86_64-mont5.pl index 7dd8d89db2..777e05cb4d 100755 --- a/crypto/fipsmodule/bn/asm/x86_64-mont5.pl +++ b/crypto/fipsmodule/bn/asm/x86_64-mont5.pl @@ -2083,6 +2083,7 @@ my $bp="%rdx"; # restore original value $code.=<<___; +#ifndef MY_ASSEMBLER_IS_TOO_OLD_FOR_512AVX .global bn_mulx4x_mont_gather5 .type bn_mulx4x_mont_gather5,\@function,6 .align 32 From e42eb0a0d608e74e5222939cf1f493de6e4a19da Mon Sep 17 00:00:00 2001 From: Erin Chapman <13040499+m271828@users.noreply.github.com> Date: Tue, 5 Aug 2025 05:01:01 -0700 Subject: [PATCH 05/34] Add generated-src files --- crypto/fipsmodule/bn/asm/x86_64-mont5.pl | 16 ++++++++-------- .../crypto/fipsmodule/x86_64-mont5.S | 6 +++--- .../mac-x86_64/crypto/fipsmodule/x86_64-mont5.S | 2 +- .../crypto/fipsmodule/x86_64-mont5.asm | 14 +++++++------- 4 files changed, 19 insertions(+), 19 deletions(-) diff --git a/crypto/fipsmodule/bn/asm/x86_64-mont5.pl b/crypto/fipsmodule/bn/asm/x86_64-mont5.pl index 777e05cb4d..6791eee341 100755 --- a/crypto/fipsmodule/bn/asm/x86_64-mont5.pl +++ b/crypto/fipsmodule/bn/asm/x86_64-mont5.pl @@ -2575,10 +2575,10 @@ my ($a0,$a1,$ai)=("%r14","%r15","%rbx"); $code.=<<___; -.global bn_power5 -.type bn_power5,\@function,6 +.global bn_powerx5 +.type bn_powerx5,\@function,6 .align 32 -bn_power5: +bn_powerx5: .cfi_startproc _CET_ENDBR mov %rsp,%rax @@ -2709,7 +2709,7 @@ .Lpowerx5_epilogue: ret .cfi_endproc -.size bn_power5,.-bn_power5 +.size bn_powerx5,.-bn_powerx5 .globl bn_sqrx8x_internal .hidden bn_sqrx8x_internal @@ -3702,9 +3702,9 @@ .rva .LSEH_end_bn_mulx4x_mont_gather5 .rva .LSEH_info_bn_mulx4x_mont_gather5 - .rva .LSEH_begin_bn_power5 - .rva .LSEH_end_bn_power5 - .rva .LSEH_info_bn_power5 + .rva .LSEH_begin_bn_powerx5 + .rva .LSEH_end_bn_powerx5 + .rva .LSEH_info_bn_powerx5 #endif ___ $code.=<<___; @@ -3737,7 +3737,7 @@ .rva mul_handler .rva .Lmulx4x_prologue,.Lmulx4x_body,.Lmulx4x_epilogue # HandlerData[] .align 8 -.LSEH_info_bn_power5: +.LSEH_info_bn_powerx5: .byte 9,0,0,0 .rva mul_handler .rva .Lpowerx5_prologue,.Lpowerx5_body,.Lpowerx5_epilogue # HandlerData[] diff --git a/generated-src/linux-x86_64/crypto/fipsmodule/x86_64-mont5.S b/generated-src/linux-x86_64/crypto/fipsmodule/x86_64-mont5.S index 4450c3ef72..4ee483281d 100644 --- a/generated-src/linux-x86_64/crypto/fipsmodule/x86_64-mont5.S +++ b/generated-src/linux-x86_64/crypto/fipsmodule/x86_64-mont5.S @@ -2611,9 +2611,9 @@ mulx4x_internal: jmp .Lsqrx4x_sub_entry .cfi_endproc .size mulx4x_internal,.-mulx4x_internal -.type bn_powerx5,@function +.type bn_power5,@function .align 32 -bn_powerx5: +bn_power5: .cfi_startproc movq %rsp,%rax .cfi_def_cfa_register %rax @@ -2744,7 +2744,7 @@ bn_powerx5: .Lpowerx5_epilogue: .byte 0xf3,0xc3 .cfi_endproc -.size bn_powerx5,.-bn_powerx5 +.size bn_power5,.-bn_power5 .globl bn_sqrx8x_internal .hidden bn_sqrx8x_internal diff --git a/generated-src/mac-x86_64/crypto/fipsmodule/x86_64-mont5.S b/generated-src/mac-x86_64/crypto/fipsmodule/x86_64-mont5.S index cd7c7dadf1..326142552e 100644 --- a/generated-src/mac-x86_64/crypto/fipsmodule/x86_64-mont5.S +++ b/generated-src/mac-x86_64/crypto/fipsmodule/x86_64-mont5.S @@ -2612,7 +2612,7 @@ L$mulx4x_inner: .p2align 5 -bn_powerx5: +bn_power5: movq %rsp,%rax diff --git a/generated-src/win-x86_64/crypto/fipsmodule/x86_64-mont5.asm b/generated-src/win-x86_64/crypto/fipsmodule/x86_64-mont5.asm index 3a3316cb94..8cd23d6a62 100644 --- a/generated-src/win-x86_64/crypto/fipsmodule/x86_64-mont5.asm +++ b/generated-src/win-x86_64/crypto/fipsmodule/x86_64-mont5.asm @@ -2671,11 +2671,11 @@ $L$mulx4x_inner: ALIGN 32 -bn_powerx5: +bn_power5: mov QWORD[8+rsp],rdi ;WIN64 prologue mov QWORD[16+rsp],rsi mov rax,rsp -$L$SEH_begin_bn_powerx5: +$L$SEH_begin_bn_power5: mov rdi,rcx mov rsi,rdx mov rdx,r8 @@ -2816,7 +2816,7 @@ $L$powerx5_epilogue: mov rsi,QWORD[16+rsp] DB 0F3h,0C3h ;repret -$L$SEH_end_bn_powerx5: +$L$SEH_end_bn_power5: global bn_sqrx8x_internal @@ -3826,9 +3826,9 @@ ALIGN 4 DD $L$SEH_end_bn_mulx4x_mont_gather5 wrt ..imagebase DD $L$SEH_info_bn_mulx4x_mont_gather5 wrt ..imagebase - DD $L$SEH_begin_bn_powerx5 wrt ..imagebase - DD $L$SEH_end_bn_powerx5 wrt ..imagebase - DD $L$SEH_info_bn_powerx5 wrt ..imagebase + DD $L$SEH_begin_bn_power5 wrt ..imagebase + DD $L$SEH_end_bn_power5 wrt ..imagebase + DD $L$SEH_info_bn_power5 wrt ..imagebase %endif DD $L$SEH_begin_bn_gather5 wrt ..imagebase DD $L$SEH_end_bn_gather5 wrt ..imagebase @@ -3857,7 +3857,7 @@ $L$SEH_info_bn_mulx4x_mont_gather5: DD mul_handler wrt ..imagebase DD $L$mulx4x_prologue wrt ..imagebase,$L$mulx4x_body wrt ..imagebase,$L$mulx4x_epilogue wrt ..imagebase ALIGN 8 -$L$SEH_info_bn_powerx5: +$L$SEH_info_bn_power5: DB 9,0,0,0 DD mul_handler wrt ..imagebase DD $L$powerx5_prologue wrt ..imagebase,$L$powerx5_body wrt ..imagebase,$L$powerx5_epilogue wrt ..imagebase From d305b221a5d0b2ee9a94528108543ee74cb2b5b2 Mon Sep 17 00:00:00 2001 From: Erin Chapman <13040499+m271828@users.noreply.github.com> Date: Tue, 5 Aug 2025 05:14:23 -0700 Subject: [PATCH 06/34] Generate for other platforms --- .../crypto/fipsmodule/x86_64-mont5.S | 63 +++++---------- .../crypto/fipsmodule/x86_64-mont5.S | 54 +++++-------- .../crypto/fipsmodule/x86_64-mont5.asm | 81 +++++++------------ 3 files changed, 70 insertions(+), 128 deletions(-) diff --git a/generated-src/linux-x86_64/crypto/fipsmodule/x86_64-mont5.S b/generated-src/linux-x86_64/crypto/fipsmodule/x86_64-mont5.S index 4ee483281d..e42d7cffbb 100644 --- a/generated-src/linux-x86_64/crypto/fipsmodule/x86_64-mont5.S +++ b/generated-src/linux-x86_64/crypto/fipsmodule/x86_64-mont5.S @@ -6,29 +6,16 @@ #if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_X86_64) && defined(__ELF__) .text -.extern OPENSSL_ia32cap_P -.hidden OPENSSL_ia32cap_P - -.globl bn_mul_mont_gather5 -.hidden bn_mul_mont_gather5 -.type bn_mul_mont_gather5,@function +.globl bn_mul_mont_gather5_nohw +.hidden bn_mul_mont_gather5_nohw +.type bn_mul_mont_gather5_nohw,@function .align 64 -bn_mul_mont_gather5: +bn_mul_mont_gather5_nohw: .cfi_startproc _CET_ENDBR movl %r9d,%r9d movq %rsp,%rax .cfi_def_cfa_register %rax - testl $7,%r9d - jnz .Lmul_enter -#ifndef MY_ASSEMBLER_IS_TOO_OLD_FOR_512AVX - leaq OPENSSL_ia32cap_P(%rip),%r11 - movl 8(%r11),%r11d -#endif - jmp .Lmul4x_enter - -.align 16 -.Lmul_enter: movd 8(%rsp),%xmm5 pushq %rbx .cfi_offset %rbx,-16 @@ -454,20 +441,17 @@ _CET_ENDBR .Lmul_epilogue: .byte 0xf3,0xc3 .cfi_endproc -.size bn_mul_mont_gather5,.-bn_mul_mont_gather5 +.size bn_mul_mont_gather5_nohw,.-bn_mul_mont_gather5_nohw +.global bn_mul4x_mont_gather5 +.hidden bn_mul4x_mont_gather5 .type bn_mul4x_mont_gather5,@function .align 32 bn_mul4x_mont_gather5: .cfi_startproc +_CET_ENDBR .byte 0x67 movq %rsp,%rax .cfi_def_cfa_register %rax -.Lmul4x_enter: -#ifndef MY_ASSEMBLER_IS_TOO_OLD_FOR_512AVX - andl $0x80108,%r11d - cmpl $0x80108,%r11d - je .Lmulx4x_enter -#endif pushq %rbx .cfi_offset %rbx,-16 pushq %rbp @@ -1092,22 +1076,15 @@ mul4x_internal: jmp .Lsqr4x_sub_entry .cfi_endproc .size mul4x_internal,.-mul4x_internal -.globl bn_power5 -.hidden bn_power5 -.type bn_power5,@function +.globl bn_power5_nohw +.hidden bn_power5_nohw +.type bn_power5_nohw,@function .align 32 -bn_power5: +bn_power5_nohw: .cfi_startproc _CET_ENDBR movq %rsp,%rax .cfi_def_cfa_register %rax -#ifndef MY_ASSEMBLER_IS_TOO_OLD_FOR_512AVX - leaq OPENSSL_ia32cap_P(%rip),%r11 - movl 8(%r11),%r11d - andl $0x80108,%r11d - cmpl $0x80108,%r11d - je .Lpowerx5_enter -#endif pushq %rbx .cfi_offset %rbx,-16 pushq %rbp @@ -1230,7 +1207,7 @@ _CET_ENDBR .Lpower5_epilogue: .byte 0xf3,0xc3 .cfi_endproc -.size bn_power5,.-bn_power5 +.size bn_power5_nohw,.-bn_power5_nohw .globl bn_sqr8x_internal .hidden bn_sqr8x_internal @@ -2074,13 +2051,15 @@ __bn_post4x_internal: .cfi_endproc .size __bn_post4x_internal,.-__bn_post4x_internal #ifndef MY_ASSEMBLER_IS_TOO_OLD_FOR_512AVX +.global bn_mulx4x_mont_gather5 +.hidden bn_mulx4x_mont_gather5 .type bn_mulx4x_mont_gather5,@function .align 32 bn_mulx4x_mont_gather5: .cfi_startproc +_CET_ENDBR movq %rsp,%rax .cfi_def_cfa_register %rax -.Lmulx4x_enter: pushq %rbx .cfi_offset %rbx,-16 pushq %rbp @@ -2611,13 +2590,15 @@ mulx4x_internal: jmp .Lsqrx4x_sub_entry .cfi_endproc .size mulx4x_internal,.-mulx4x_internal -.type bn_power5,@function +.global bn_powerx5 +.hidden bn_powerx5 +.type bn_powerx5,@function .align 32 -bn_power5: +bn_powerx5: .cfi_startproc +_CET_ENDBR movq %rsp,%rax .cfi_def_cfa_register %rax -.Lpowerx5_enter: pushq %rbx .cfi_offset %rbx,-16 pushq %rbp @@ -2744,7 +2725,7 @@ bn_power5: .Lpowerx5_epilogue: .byte 0xf3,0xc3 .cfi_endproc -.size bn_power5,.-bn_power5 +.size bn_powerx5,.-bn_powerx5 .globl bn_sqrx8x_internal .hidden bn_sqrx8x_internal diff --git a/generated-src/mac-x86_64/crypto/fipsmodule/x86_64-mont5.S b/generated-src/mac-x86_64/crypto/fipsmodule/x86_64-mont5.S index 326142552e..31e468a4f4 100644 --- a/generated-src/mac-x86_64/crypto/fipsmodule/x86_64-mont5.S +++ b/generated-src/mac-x86_64/crypto/fipsmodule/x86_64-mont5.S @@ -6,28 +6,16 @@ #if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_X86_64) && defined(__APPLE__) .text - - -.globl _bn_mul_mont_gather5 -.private_extern _bn_mul_mont_gather5 +.globl _bn_mul_mont_gather5_nohw +.private_extern _bn_mul_mont_gather5_nohw .p2align 6 -_bn_mul_mont_gather5: +_bn_mul_mont_gather5_nohw: _CET_ENDBR movl %r9d,%r9d movq %rsp,%rax - testl $7,%r9d - jnz L$mul_enter -#ifndef MY_ASSEMBLER_IS_TOO_OLD_FOR_512AVX - leaq _OPENSSL_ia32cap_P(%rip),%r11 - movl 8(%r11),%r11d -#endif - jmp L$mul4x_enter - -.p2align 4 -L$mul_enter: movd 8(%rsp),%xmm5 pushq %rbx @@ -454,19 +442,16 @@ L$mul_epilogue: .byte 0xf3,0xc3 +.global _bn_mul4x_mont_gather5 +.private_extern _bn_mul4x_mont_gather5 .p2align 5 -bn_mul4x_mont_gather5: +_bn_mul4x_mont_gather5: +_CET_ENDBR .byte 0x67 movq %rsp,%rax -L$mul4x_enter: -#ifndef MY_ASSEMBLER_IS_TOO_OLD_FOR_512AVX - andl $0x80108,%r11d - cmpl $0x80108,%r11d - je L$mulx4x_enter -#endif pushq %rbx pushq %rbp @@ -1091,22 +1076,15 @@ L$inner4x: jmp L$sqr4x_sub_entry -.globl _bn_power5 -.private_extern _bn_power5 +.globl _bn_power5_nohw +.private_extern _bn_power5_nohw .p2align 5 -_bn_power5: +_bn_power5_nohw: _CET_ENDBR movq %rsp,%rax -#ifndef MY_ASSEMBLER_IS_TOO_OLD_FOR_512AVX - leaq _OPENSSL_ia32cap_P(%rip),%r11 - movl 8(%r11),%r11d - andl $0x80108,%r11d - cmpl $0x80108,%r11d - je L$powerx5_enter -#endif pushq %rbx pushq %rbp @@ -2073,13 +2051,15 @@ L$sqr4x_sub_entry: #ifndef MY_ASSEMBLER_IS_TOO_OLD_FOR_512AVX +.global _bn_mulx4x_mont_gather5 +.private_extern _bn_mulx4x_mont_gather5 .p2align 5 -bn_mulx4x_mont_gather5: +_bn_mulx4x_mont_gather5: +_CET_ENDBR movq %rsp,%rax -L$mulx4x_enter: pushq %rbx pushq %rbp @@ -2610,13 +2590,15 @@ L$mulx4x_inner: jmp L$sqrx4x_sub_entry +.global _bn_powerx5 +.private_extern _bn_powerx5 .p2align 5 -bn_power5: +_bn_powerx5: +_CET_ENDBR movq %rsp,%rax -L$powerx5_enter: pushq %rbx pushq %rbp diff --git a/generated-src/win-x86_64/crypto/fipsmodule/x86_64-mont5.asm b/generated-src/win-x86_64/crypto/fipsmodule/x86_64-mont5.asm index 8cd23d6a62..25858755c9 100644 --- a/generated-src/win-x86_64/crypto/fipsmodule/x86_64-mont5.asm +++ b/generated-src/win-x86_64/crypto/fipsmodule/x86_64-mont5.asm @@ -12,16 +12,14 @@ default rel section .text code align=64 -EXTERN OPENSSL_ia32cap_P - -global bn_mul_mont_gather5 +global bn_mul_mont_gather5_nohw ALIGN 64 -bn_mul_mont_gather5: +bn_mul_mont_gather5_nohw: mov QWORD[8+rsp],rdi ;WIN64 prologue mov QWORD[16+rsp],rsi mov rax,rsp -$L$SEH_begin_bn_mul_mont_gather5: +$L$SEH_begin_bn_mul_mont_gather5_nohw: mov rdi,rcx mov rsi,rdx mov rdx,r8 @@ -35,16 +33,6 @@ _CET_ENDBR mov r9d,r9d mov rax,rsp - test r9d,7 - jnz NEAR $L$mul_enter -%ifndef MY_ASSEMBLER_IS_TOO_OLD_FOR_512AVX - lea r11,[OPENSSL_ia32cap_P] - mov r11d,DWORD[8+r11] -%endif - jmp NEAR $L$mul4x_enter - -ALIGN 16 -$L$mul_enter: movd xmm5,DWORD[56+rsp] push rbx @@ -472,7 +460,8 @@ $L$mul_epilogue: mov rsi,QWORD[16+rsp] DB 0F3h,0C3h ;repret -$L$SEH_end_bn_mul_mont_gather5: +$L$SEH_end_bn_mul_mont_gather5_nohw: +global bn_mul4x_mont_gather5 ALIGN 32 bn_mul4x_mont_gather5: @@ -489,15 +478,10 @@ $L$SEH_begin_bn_mul4x_mont_gather5: +_CET_ENDBR DB 0x67 mov rax,rsp -$L$mul4x_enter: -%ifndef MY_ASSEMBLER_IS_TOO_OLD_FOR_512AVX - and r11d,0x80108 - cmp r11d,0x80108 - je NEAR $L$mulx4x_enter -%endif push rbx push rbp @@ -1124,14 +1108,14 @@ $L$inner4x: jmp NEAR $L$sqr4x_sub_entry -global bn_power5 +global bn_power5_nohw ALIGN 32 -bn_power5: +bn_power5_nohw: mov QWORD[8+rsp],rdi ;WIN64 prologue mov QWORD[16+rsp],rsi mov rax,rsp -$L$SEH_begin_bn_power5: +$L$SEH_begin_bn_power5_nohw: mov rdi,rcx mov rsi,rdx mov rdx,r8 @@ -1144,13 +1128,6 @@ $L$SEH_begin_bn_power5: _CET_ENDBR mov rax,rsp -%ifndef MY_ASSEMBLER_IS_TOO_OLD_FOR_512AVX - lea r11,[OPENSSL_ia32cap_P] - mov r11d,DWORD[8+r11] - and r11d,0x80108 - cmp r11d,0x80108 - je NEAR $L$powerx5_enter -%endif push rbx push rbp @@ -1275,7 +1252,7 @@ $L$power5_epilogue: mov rsi,QWORD[16+rsp] DB 0F3h,0C3h ;repret -$L$SEH_end_bn_power5: +$L$SEH_end_bn_power5_nohw: global bn_sqr8x_internal @@ -2118,6 +2095,7 @@ $L$sqr4x_sub_entry: %ifndef MY_ASSEMBLER_IS_TOO_OLD_FOR_512AVX +global bn_mulx4x_mont_gather5 ALIGN 32 bn_mulx4x_mont_gather5: @@ -2134,9 +2112,9 @@ $L$SEH_begin_bn_mulx4x_mont_gather5: +_CET_ENDBR mov rax,rsp -$L$mulx4x_enter: push rbx push rbp @@ -2669,13 +2647,14 @@ $L$mulx4x_inner: jmp NEAR $L$sqrx4x_sub_entry +global bn_powerx5 ALIGN 32 -bn_power5: +bn_powerx5: mov QWORD[8+rsp],rdi ;WIN64 prologue mov QWORD[16+rsp],rsi mov rax,rsp -$L$SEH_begin_bn_power5: +$L$SEH_begin_bn_powerx5: mov rdi,rcx mov rsi,rdx mov rdx,r8 @@ -2685,9 +2664,9 @@ $L$SEH_begin_bn_power5: +_CET_ENDBR mov rax,rsp -$L$powerx5_enter: push rbx push rbp @@ -2816,7 +2795,7 @@ $L$powerx5_epilogue: mov rsi,QWORD[16+rsp] DB 0F3h,0C3h ;repret -$L$SEH_end_bn_power5: +$L$SEH_end_bn_powerx5: global bn_sqrx8x_internal @@ -3810,25 +3789,25 @@ $L$common_seh_tail: section .pdata rdata align=4 ALIGN 4 - DD $L$SEH_begin_bn_mul_mont_gather5 wrt ..imagebase - DD $L$SEH_end_bn_mul_mont_gather5 wrt ..imagebase - DD $L$SEH_info_bn_mul_mont_gather5 wrt ..imagebase + DD $L$SEH_begin_bn_mul_mont_gather5_nohw wrt ..imagebase + DD $L$SEH_end_bn_mul_mont_gather5_nohw wrt ..imagebase + DD $L$SEH_info_bn_mul_mont_gather5_nohw wrt ..imagebase DD $L$SEH_begin_bn_mul4x_mont_gather5 wrt ..imagebase DD $L$SEH_end_bn_mul4x_mont_gather5 wrt ..imagebase DD $L$SEH_info_bn_mul4x_mont_gather5 wrt ..imagebase - DD $L$SEH_begin_bn_power5 wrt ..imagebase - DD $L$SEH_end_bn_power5 wrt ..imagebase - DD $L$SEH_info_bn_power5 wrt ..imagebase + DD $L$SEH_begin_bn_power5_nohw wrt ..imagebase + DD $L$SEH_end_bn_power5_nohw wrt ..imagebase + DD $L$SEH_info_bn_power5_nohw wrt ..imagebase %ifndef MY_ASSEMBLER_IS_TOO_OLD_FOR_512AVX DD $L$SEH_begin_bn_mulx4x_mont_gather5 wrt ..imagebase DD $L$SEH_end_bn_mulx4x_mont_gather5 wrt ..imagebase DD $L$SEH_info_bn_mulx4x_mont_gather5 wrt ..imagebase - DD $L$SEH_begin_bn_power5 wrt ..imagebase - DD $L$SEH_end_bn_power5 wrt ..imagebase - DD $L$SEH_info_bn_power5 wrt ..imagebase + DD $L$SEH_begin_bn_powerx5 wrt ..imagebase + DD $L$SEH_end_bn_powerx5 wrt ..imagebase + DD $L$SEH_info_bn_powerx5 wrt ..imagebase %endif DD $L$SEH_begin_bn_gather5 wrt ..imagebase DD $L$SEH_end_bn_gather5 wrt ..imagebase @@ -3836,17 +3815,17 @@ ALIGN 4 section .xdata rdata align=8 ALIGN 8 -$L$SEH_info_bn_mul_mont_gather5: +$L$SEH_info_bn_mul_mont_gather5_nohw: DB 9,0,0,0 DD mul_handler wrt ..imagebase DD $L$mul_body wrt ..imagebase,$L$mul_body wrt ..imagebase,$L$mul_epilogue wrt ..imagebase ALIGN 8 -$L$SEH_info_bn_mul4x_mont_gather5: +$L$SEH_info_bn_mul4x_mont_gather5_nohw: DB 9,0,0,0 DD mul_handler wrt ..imagebase DD $L$mul4x_prologue wrt ..imagebase,$L$mul4x_body wrt ..imagebase,$L$mul4x_epilogue wrt ..imagebase ALIGN 8 -$L$SEH_info_bn_power5: +$L$SEH_info_bn_power5_nohw: DB 9,0,0,0 DD mul_handler wrt ..imagebase DD $L$power5_prologue wrt ..imagebase,$L$power5_body wrt ..imagebase,$L$power5_epilogue wrt ..imagebase @@ -3857,7 +3836,7 @@ $L$SEH_info_bn_mulx4x_mont_gather5: DD mul_handler wrt ..imagebase DD $L$mulx4x_prologue wrt ..imagebase,$L$mulx4x_body wrt ..imagebase,$L$mulx4x_epilogue wrt ..imagebase ALIGN 8 -$L$SEH_info_bn_power5: +$L$SEH_info_bn_powerx5: DB 9,0,0,0 DD mul_handler wrt ..imagebase DD $L$powerx5_prologue wrt ..imagebase,$L$powerx5_body wrt ..imagebase,$L$powerx5_epilogue wrt ..imagebase From 8e44aa59e19b62da62a929366dd89348ae46ba2d Mon Sep 17 00:00:00 2001 From: Erin Chapman <13040499+m271828@users.noreply.github.com> Date: Tue, 5 Aug 2025 05:31:35 -0700 Subject: [PATCH 07/34] Fix name for windows --- crypto/fipsmodule/bn/asm/x86_64-mont5.pl | 2 +- generated-src/win-x86_64/crypto/fipsmodule/x86_64-mont5.asm | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/crypto/fipsmodule/bn/asm/x86_64-mont5.pl b/crypto/fipsmodule/bn/asm/x86_64-mont5.pl index 6791eee341..6f737f7794 100755 --- a/crypto/fipsmodule/bn/asm/x86_64-mont5.pl +++ b/crypto/fipsmodule/bn/asm/x86_64-mont5.pl @@ -3719,7 +3719,7 @@ .rva mul_handler .rva .Lmul_body,.Lmul_body,.Lmul_epilogue # HandlerData[] .align 8 -.LSEH_info_bn_mul4x_mont_gather5_nohw: +.LSEH_info_bn_mul4x_mont_gather5: .byte 9,0,0,0 .rva mul_handler .rva .Lmul4x_prologue,.Lmul4x_body,.Lmul4x_epilogue # HandlerData[] diff --git a/generated-src/win-x86_64/crypto/fipsmodule/x86_64-mont5.asm b/generated-src/win-x86_64/crypto/fipsmodule/x86_64-mont5.asm index 25858755c9..6d99b00485 100644 --- a/generated-src/win-x86_64/crypto/fipsmodule/x86_64-mont5.asm +++ b/generated-src/win-x86_64/crypto/fipsmodule/x86_64-mont5.asm @@ -3820,7 +3820,7 @@ $L$SEH_info_bn_mul_mont_gather5_nohw: DD mul_handler wrt ..imagebase DD $L$mul_body wrt ..imagebase,$L$mul_body wrt ..imagebase,$L$mul_epilogue wrt ..imagebase ALIGN 8 -$L$SEH_info_bn_mul4x_mont_gather5_nohw: +$L$SEH_info_bn_mul4x_mont_gather5: DB 9,0,0,0 DD mul_handler wrt ..imagebase DD $L$mul4x_prologue wrt ..imagebase,$L$mul4x_body wrt ..imagebase,$L$mul4x_epilogue wrt ..imagebase From 486527155f58484e49cac4734fa66376b19b619b Mon Sep 17 00:00:00 2001 From: Erin Chapman <13040499+m271828@users.noreply.github.com> Date: Tue, 5 Aug 2025 12:32:48 -0700 Subject: [PATCH 08/34] Fix function name --- crypto/fipsmodule/bn/asm/x86_64-mont5.pl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crypto/fipsmodule/bn/asm/x86_64-mont5.pl b/crypto/fipsmodule/bn/asm/x86_64-mont5.pl index 6f737f7794..5e182e6d44 100755 --- a/crypto/fipsmodule/bn/asm/x86_64-mont5.pl +++ b/crypto/fipsmodule/bn/asm/x86_64-mont5.pl @@ -2560,7 +2560,7 @@ ___ } { ###################################################################### -# void bn_power5( +# void bn_powerx5( my $rptr="%rdi"; # BN_ULONG *rptr, my $aptr="%rsi"; # const BN_ULONG *aptr, my $bptr="%rdx"; # const BN_ULONG *table, From 4085be6f9a070cf968695fbe7971b3efcd268687 Mon Sep 17 00:00:00 2001 From: Erin Chapman <13040499+m271828@users.noreply.github.com> Date: Tue, 5 Aug 2025 13:00:10 -0700 Subject: [PATCH 09/34] AVX fix --- crypto/fipsmodule/bn/exponentiation.c | 12 +++++++++--- crypto/fipsmodule/bn/internal.h | 5 ++++- 2 files changed, 13 insertions(+), 4 deletions(-) diff --git a/crypto/fipsmodule/bn/exponentiation.c b/crypto/fipsmodule/bn/exponentiation.c index ae4ba1011d..860dfed0b6 100644 --- a/crypto/fipsmodule/bn/exponentiation.c +++ b/crypto/fipsmodule/bn/exponentiation.c @@ -177,9 +177,13 @@ static void exponentiation_s2n_bignum_copy_from_prebuf(BN_ULONG *dest, int width static void bn_mul_mont_gather5( BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *table, const BN_ULONG *np, const BN_ULONG *n0, int num, int power) { +#if !defined(MY_ASSEMBLER_IS_TOO_OLD_FOR_512AVX) if (bn_mulx4x_mont_gather5_capable(num)) { bn_mulx4x_mont_gather5(rp, ap, table, np, n0, num, power); - } else if (bn_mul4x_mont_gather5_capable(num)) { + return; + } +#endif // !defined(MY_ASSEMBLER_IS_TOO_OLD_FOR_512AVX) + if (bn_mul4x_mont_gather5_capable(num)) { bn_mul4x_mont_gather5(rp, ap, table, np, n0, num, power); } else { bn_mul_mont_gather5_nohw(rp, ap, table, np, n0, num, power); @@ -200,11 +204,13 @@ static void bn_power5(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *table, int power) { assert(bn_power5_capable(num)); +#if !defined(MY_ASSEMBLER_IS_TOO_OLD_FOR_512AVX) if (bn_powerx5_capable(num)) { bn_powerx5(rp, ap, table, np, n0, num, power); - } else { - bn_power5_nohw(rp, ap, table, np, n0, num, power); + return; } +#endif + bn_power5_nohw(rp, ap, table, np, n0, num, power); } #endif // defined(OPENSSL_BN_ASM_MONT5) diff --git a/crypto/fipsmodule/bn/internal.h b/crypto/fipsmodule/bn/internal.h index 1a2a477752..95d0ba5749 100644 --- a/crypto/fipsmodule/bn/internal.h +++ b/crypto/fipsmodule/bn/internal.h @@ -452,6 +452,7 @@ int bn_sqr8x_mont(BN_ULONG *rp, const BN_ULONG *ap, BN_ULONG mulx_adx_capable, const BN_ULONG *table, const BN_ULONG *np, const BN_ULONG *n0, int num, int power); +#if !defined(MY_ASSEMBLER_IS_TOO_OLD_FOR_512AVX) OPENSSL_INLINE int bn_mulx4x_mont_gather5_capable(int num) { return bn_mul4x_mont_gather5_capable(num) && CRYPTO_is_ADX_capable() && CRYPTO_is_BMI1_capable() && CRYPTO_is_BMI2_capable(); @@ -459,6 +460,7 @@ int bn_sqr8x_mont(BN_ULONG *rp, const BN_ULONG *ap, BN_ULONG mulx_adx_capable, void bn_mulx4x_mont_gather5(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *table, const BN_ULONG *np, const BN_ULONG *n0, int num, int power); +#endif // !defined(MY_ASSEMBLER_IS_TOO_OLD_FOR_512AVX) void bn_mul_mont_gather5_nohw(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *table, const BN_ULONG *np, @@ -483,13 +485,14 @@ void bn_gather5(BN_ULONG *out, size_t num, const BN_ULONG *table, size_t power); return (num & 7) == 0; } +#if !defined(MY_ASSEMBLER_IS_TOO_OLD_FOR_512AVX) OPENSSL_INLINE int bn_powerx5_capable(int num) { return bn_power5_capable(num) && CRYPTO_is_ADX_capable() && CRYPTO_is_BMI1_capable() && CRYPTO_is_BMI2_capable(); } void bn_powerx5(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *table, const BN_ULONG *np, const BN_ULONG *n0, int num, int power); - +#endif // !defined(MY_ASSEMBLER_IS_TOO_OLD_FOR_512AVX) #endif // !OPENSSL_NO_ASM && OPENSSL_X86_64 uint64_t bn_mont_n0(const BIGNUM *n); From 12073c37b462036fb394e00758b3e1de705bba9a Mon Sep 17 00:00:00 2001 From: Erin Chapman <13040499+m271828@users.noreply.github.com> Date: Tue, 5 Aug 2025 13:40:46 -0700 Subject: [PATCH 10/34] Test moving macro guard --- crypto/fipsmodule/bn/bn_test.cc | 2 ++ crypto/fipsmodule/bn/internal.h | 5 +++-- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/crypto/fipsmodule/bn/bn_test.cc b/crypto/fipsmodule/bn/bn_test.cc index 20fe592822..faa2f5c4df 100644 --- a/crypto/fipsmodule/bn/bn_test.cc +++ b/crypto/fipsmodule/bn/bn_test.cc @@ -3029,12 +3029,14 @@ TEST_F(BNTest, BNMulMont5ABI) { } CHECK_ABI(bn_gather5, r.data(), words, table.data(), 13); +#if !defined(MY_ASSEMBLER_IS_TOO_OLD_FOR_512AVX) if (bn_mulx4x_mont_gather5_capable(words)) { CHECK_ABI(bn_mulx4x_mont_gather5, r.data(), r.data(), table.data(), m->d, mont->n0, words, 13); CHECK_ABI(bn_mulx4x_mont_gather5, r.data(), a.data(), table.data(), m->d, mont->n0, words, 13); } +#endif // !defined(MY_ASSEMBLER_IS_TOO_OLD_FOR_512AVX) if (bn_mul4x_mont_gather5_capable(words)) { CHECK_ABI(bn_mul4x_mont_gather5, r.data(), r.data(), table.data(), m->d, mont->n0, words, 13); diff --git a/crypto/fipsmodule/bn/internal.h b/crypto/fipsmodule/bn/internal.h index 95d0ba5749..c25f0fe4d1 100644 --- a/crypto/fipsmodule/bn/internal.h +++ b/crypto/fipsmodule/bn/internal.h @@ -452,15 +452,16 @@ int bn_sqr8x_mont(BN_ULONG *rp, const BN_ULONG *ap, BN_ULONG mulx_adx_capable, const BN_ULONG *table, const BN_ULONG *np, const BN_ULONG *n0, int num, int power); -#if !defined(MY_ASSEMBLER_IS_TOO_OLD_FOR_512AVX) OPENSSL_INLINE int bn_mulx4x_mont_gather5_capable(int num) { + #if !defined(MY_ASSEMBLER_IS_TOO_OLD_FOR_512AVX) + return 0; + #endif // !defined(MY_ASSEMBLER_IS_TOO_OLD_FOR_512AVX) return bn_mul4x_mont_gather5_capable(num) && CRYPTO_is_ADX_capable() && CRYPTO_is_BMI1_capable() && CRYPTO_is_BMI2_capable(); } void bn_mulx4x_mont_gather5(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *table, const BN_ULONG *np, const BN_ULONG *n0, int num, int power); -#endif // !defined(MY_ASSEMBLER_IS_TOO_OLD_FOR_512AVX) void bn_mul_mont_gather5_nohw(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *table, const BN_ULONG *np, From b3844eac3df4e6c6490fc877df7c60f5433bfe58 Mon Sep 17 00:00:00 2001 From: Erin Chapman <13040499+m271828@users.noreply.github.com> Date: Thu, 7 Aug 2025 11:57:15 -0700 Subject: [PATCH 11/34] Pull guards in --- crypto/fipsmodule/bn/internal.h | 21 ++++++++++++++++----- 1 file changed, 16 insertions(+), 5 deletions(-) diff --git a/crypto/fipsmodule/bn/internal.h b/crypto/fipsmodule/bn/internal.h index c25f0fe4d1..bb55a6b715 100644 --- a/crypto/fipsmodule/bn/internal.h +++ b/crypto/fipsmodule/bn/internal.h @@ -418,18 +418,25 @@ OPENSSL_INLINE int bn_mul4x_mont_capable(size_t num) { } int bn_mul4x_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, const BN_ULONG *np, const BN_ULONG *n0, size_t num); -#if !defined(MY_ASSEMBLER_IS_TOO_OLD_FOR_512AVX) + OPENSSL_INLINE int bn_mulx4x_mont_capable(size_t num) { +#if !defined(MY_ASSEMBLER_IS_TOO_OLD_FOR_512AVX) + return 0; +#else return bn_mul4x_mont_capable(num) && bn_mulx_adx_capable(); +#endif // !defined(MY_ASSEMBLER_IS_TOO_OLD_FOR_512AVX) } int bn_mulx4x_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, const BN_ULONG *np, const BN_ULONG *n0, size_t num); OPENSSL_INLINE int bn_sqr8x_mont_capable(size_t num) { +#if !defined(MY_ASSEMBLER_IS_TOO_OOLD_FOR_512AVX) + return 0; +#else return (num >= 8) && ((num & 7) == 0); +#endif // !defined(MY_ASSEMBLER_IS_TOO_OLD_FOR_512AVX) } int bn_sqr8x_mont(BN_ULONG *rp, const BN_ULONG *ap, BN_ULONG mulx_adx_capable, const BN_ULONG *np, const BN_ULONG *n0, size_t num); -#endif // !defined(MY_ASSEMBLER_IS_TOO_OLD_FOR_512AVX) #elif defined(OPENSSL_ARM) OPENSSL_INLINE int bn_mul8x_mont_neon_capable(size_t num) { return (num & 7) == 0 && CRYPTO_is_NEON_capable(); @@ -455,9 +462,10 @@ int bn_sqr8x_mont(BN_ULONG *rp, const BN_ULONG *ap, BN_ULONG mulx_adx_capable, OPENSSL_INLINE int bn_mulx4x_mont_gather5_capable(int num) { #if !defined(MY_ASSEMBLER_IS_TOO_OLD_FOR_512AVX) return 0; - #endif // !defined(MY_ASSEMBLER_IS_TOO_OLD_FOR_512AVX) + #else return bn_mul4x_mont_gather5_capable(num) && CRYPTO_is_ADX_capable() && CRYPTO_is_BMI1_capable() && CRYPTO_is_BMI2_capable(); + #endif // !defined(MY_ASSEMBLER_IS_TOO_OLD_FOR_512AVX) } void bn_mulx4x_mont_gather5(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *table, const BN_ULONG *np, @@ -486,14 +494,17 @@ void bn_gather5(BN_ULONG *out, size_t num, const BN_ULONG *table, size_t power); return (num & 7) == 0; } -#if !defined(MY_ASSEMBLER_IS_TOO_OLD_FOR_512AVX) OPENSSL_INLINE int bn_powerx5_capable(int num) { +#if !defined(MY_ASSEMBLER_IS_TOO_OLD_FOR_512AVX) + return 0; +#else return bn_power5_capable(num) && CRYPTO_is_ADX_capable() && CRYPTO_is_BMI1_capable() && CRYPTO_is_BMI2_capable(); + +#endif // !defined(MY_ASSEMBLER_IS_TOO_OLD_FOR_512AVX) } void bn_powerx5(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *table, const BN_ULONG *np, const BN_ULONG *n0, int num, int power); -#endif // !defined(MY_ASSEMBLER_IS_TOO_OLD_FOR_512AVX) #endif // !OPENSSL_NO_ASM && OPENSSL_X86_64 uint64_t bn_mont_n0(const BIGNUM *n); From 45b2eb7f2d0ab2446afa22ee6ba90d86ccb7a7af Mon Sep 17 00:00:00 2001 From: Erin Chapman <13040499+m271828@users.noreply.github.com> Date: Thu, 7 Aug 2025 12:13:14 -0700 Subject: [PATCH 12/34] Add default implementation --- crypto/fipsmodule/bn/internal.h | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/crypto/fipsmodule/bn/internal.h b/crypto/fipsmodule/bn/internal.h index bb55a6b715..61e1a1f31b 100644 --- a/crypto/fipsmodule/bn/internal.h +++ b/crypto/fipsmodule/bn/internal.h @@ -503,8 +503,15 @@ void bn_gather5(BN_ULONG *out, size_t num, const BN_ULONG *table, size_t power); #endif // !defined(MY_ASSEMBLER_IS_TOO_OLD_FOR_512AVX) } +#if !defined(MY_ASSEMBLER_IS_TOO_OLD_FOR_512AVX) void bn_powerx5(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *table, const BN_ULONG *np, const BN_ULONG *n0, int num, int power); +#else + void bn_powerx5(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *table, + const BN_ULONG *np, const BN_ULONG *n0, int num, int power) { + assert(0); + } +#endif // !defined(MY_ASSEMBLER_IS_TOO_OLD_FOR_512AVX) #endif // !OPENSSL_NO_ASM && OPENSSL_X86_64 uint64_t bn_mont_n0(const BIGNUM *n); From e5418f558546725e32b1209d8a2672da5f4b89f8 Mon Sep 17 00:00:00 2001 From: Erin Chapman <13040499+m271828@users.noreply.github.com> Date: Thu, 7 Aug 2025 12:24:39 -0700 Subject: [PATCH 13/34] Fix compiler warning --- crypto/fipsmodule/bn/internal.h | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/crypto/fipsmodule/bn/internal.h b/crypto/fipsmodule/bn/internal.h index 61e1a1f31b..78272b9319 100644 --- a/crypto/fipsmodule/bn/internal.h +++ b/crypto/fipsmodule/bn/internal.h @@ -503,15 +503,14 @@ void bn_gather5(BN_ULONG *out, size_t num, const BN_ULONG *table, size_t power); #endif // !defined(MY_ASSEMBLER_IS_TOO_OLD_FOR_512AVX) } -#if !defined(MY_ASSEMBLER_IS_TOO_OLD_FOR_512AVX) void bn_powerx5(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *table, const BN_ULONG *np, const BN_ULONG *n0, int num, int power); -#else +#if defined(MY_ASSEMBLER_IS_TOO_OLD_FOR_512AVX) void bn_powerx5(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *table, const BN_ULONG *np, const BN_ULONG *n0, int num, int power) { assert(0); } -#endif // !defined(MY_ASSEMBLER_IS_TOO_OLD_FOR_512AVX) +#endif // defined(MY_ASSEMBLER_IS_TOO_OLD_FOR_512AVX) #endif // !OPENSSL_NO_ASM && OPENSSL_X86_64 uint64_t bn_mont_n0(const BIGNUM *n); From 39cb8166deb64cd4b3b66388a19180eef3180b3f Mon Sep 17 00:00:00 2001 From: Erin Chapman <13040499+m271828@users.noreply.github.com> Date: Thu, 7 Aug 2025 16:25:01 -0700 Subject: [PATCH 14/34] Fix compiler warning --- crypto/fipsmodule/bn/internal.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crypto/fipsmodule/bn/internal.h b/crypto/fipsmodule/bn/internal.h index 78272b9319..d4ba9247d9 100644 --- a/crypto/fipsmodule/bn/internal.h +++ b/crypto/fipsmodule/bn/internal.h @@ -506,7 +506,7 @@ void bn_gather5(BN_ULONG *out, size_t num, const BN_ULONG *table, size_t power); void bn_powerx5(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *table, const BN_ULONG *np, const BN_ULONG *n0, int num, int power); #if defined(MY_ASSEMBLER_IS_TOO_OLD_FOR_512AVX) - void bn_powerx5(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *table, + OPENSSL_INLINE void bn_powerx5(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *table, const BN_ULONG *np, const BN_ULONG *n0, int num, int power) { assert(0); } From cb40a8a2656b8131a035194669b9aea9ceb72a14 Mon Sep 17 00:00:00 2001 From: Erin Chapman <13040499+m271828@users.noreply.github.com> Date: Thu, 7 Aug 2025 16:35:07 -0700 Subject: [PATCH 15/34] Fix compiler warning --- crypto/fipsmodule/bn/exponentiation.c | 7 +++++++ crypto/fipsmodule/bn/internal.h | 7 +------ 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/crypto/fipsmodule/bn/exponentiation.c b/crypto/fipsmodule/bn/exponentiation.c index 860dfed0b6..3255c6f785 100644 --- a/crypto/fipsmodule/bn/exponentiation.c +++ b/crypto/fipsmodule/bn/exponentiation.c @@ -165,6 +165,13 @@ static void exponentiation_s2n_bignum_copy_from_prebuf(BN_ULONG *dest, int width #if defined(OPENSSL_BN_ASM_MONT5) +#if defined(MY_ASSEMBLER_IS_TOO_OLD_FOR_512AVX) +void bn_powerx5(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *table, + const BN_ULONG *np, const BN_ULONG *n0, int num, int power) { + assert(0); +} +#endif // defined(MY_ASSEMBLER_IS_TOO_OLD_FOR_512AVX) + // bn_mul_mont_gather5 multiples loads index |power| of |table|, multiplies it // by |ap| modulo |np|, and stores the result in |rp|. The values are |num| // words long and represented in Montgomery form. |n0| is a pointer to the diff --git a/crypto/fipsmodule/bn/internal.h b/crypto/fipsmodule/bn/internal.h index d4ba9247d9..38f4e877f1 100644 --- a/crypto/fipsmodule/bn/internal.h +++ b/crypto/fipsmodule/bn/internal.h @@ -505,12 +505,7 @@ void bn_gather5(BN_ULONG *out, size_t num, const BN_ULONG *table, size_t power); } void bn_powerx5(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *table, const BN_ULONG *np, const BN_ULONG *n0, int num, int power); -#if defined(MY_ASSEMBLER_IS_TOO_OLD_FOR_512AVX) - OPENSSL_INLINE void bn_powerx5(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *table, - const BN_ULONG *np, const BN_ULONG *n0, int num, int power) { - assert(0); - } -#endif // defined(MY_ASSEMBLER_IS_TOO_OLD_FOR_512AVX) + #endif // !OPENSSL_NO_ASM && OPENSSL_X86_64 uint64_t bn_mont_n0(const BIGNUM *n); From 79cc722d6a7c325e63c2f97c520a332a9a471f67 Mon Sep 17 00:00:00 2001 From: Erin Chapman <13040499+m271828@users.noreply.github.com> Date: Thu, 7 Aug 2025 20:09:56 -0700 Subject: [PATCH 16/34] Clean up header --- crypto/fipsmodule/bn/bn_test.cc | 3 --- crypto/fipsmodule/bn/internal.h | 12 ++++-------- 2 files changed, 4 insertions(+), 11 deletions(-) diff --git a/crypto/fipsmodule/bn/bn_test.cc b/crypto/fipsmodule/bn/bn_test.cc index faa2f5c4df..74533a6c67 100644 --- a/crypto/fipsmodule/bn/bn_test.cc +++ b/crypto/fipsmodule/bn/bn_test.cc @@ -3028,15 +3028,12 @@ TEST_F(BNTest, BNMulMont5ABI) { bn_scatter5(r.data(), words, table.data(), i); } CHECK_ABI(bn_gather5, r.data(), words, table.data(), 13); - -#if !defined(MY_ASSEMBLER_IS_TOO_OLD_FOR_512AVX) if (bn_mulx4x_mont_gather5_capable(words)) { CHECK_ABI(bn_mulx4x_mont_gather5, r.data(), r.data(), table.data(), m->d, mont->n0, words, 13); CHECK_ABI(bn_mulx4x_mont_gather5, r.data(), a.data(), table.data(), m->d, mont->n0, words, 13); } -#endif // !defined(MY_ASSEMBLER_IS_TOO_OLD_FOR_512AVX) if (bn_mul4x_mont_gather5_capable(words)) { CHECK_ABI(bn_mul4x_mont_gather5, r.data(), r.data(), table.data(), m->d, mont->n0, words, 13); diff --git a/crypto/fipsmodule/bn/internal.h b/crypto/fipsmodule/bn/internal.h index 38f4e877f1..bf01de38a3 100644 --- a/crypto/fipsmodule/bn/internal.h +++ b/crypto/fipsmodule/bn/internal.h @@ -431,9 +431,8 @@ int bn_mulx4x_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, OPENSSL_INLINE int bn_sqr8x_mont_capable(size_t num) { #if !defined(MY_ASSEMBLER_IS_TOO_OOLD_FOR_512AVX) return 0; -#else - return (num >= 8) && ((num & 7) == 0); #endif // !defined(MY_ASSEMBLER_IS_TOO_OLD_FOR_512AVX) + return (num >= 8) && ((num & 7) == 0); } int bn_sqr8x_mont(BN_ULONG *rp, const BN_ULONG *ap, BN_ULONG mulx_adx_capable, const BN_ULONG *np, const BN_ULONG *n0, size_t num); @@ -462,10 +461,9 @@ int bn_sqr8x_mont(BN_ULONG *rp, const BN_ULONG *ap, BN_ULONG mulx_adx_capable, OPENSSL_INLINE int bn_mulx4x_mont_gather5_capable(int num) { #if !defined(MY_ASSEMBLER_IS_TOO_OLD_FOR_512AVX) return 0; - #else + #endif // !defined(MY_ASSEMBLER_IS_TOO_OLD_FOR_512AVX) return bn_mul4x_mont_gather5_capable(num) && CRYPTO_is_ADX_capable() && CRYPTO_is_BMI1_capable() && CRYPTO_is_BMI2_capable(); - #endif // !defined(MY_ASSEMBLER_IS_TOO_OLD_FOR_512AVX) } void bn_mulx4x_mont_gather5(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *table, const BN_ULONG *np, @@ -495,13 +493,11 @@ void bn_gather5(BN_ULONG *out, size_t num, const BN_ULONG *table, size_t power); } OPENSSL_INLINE int bn_powerx5_capable(int num) { -#if !defined(MY_ASSEMBLER_IS_TOO_OLD_FOR_512AVX) + #if !defined(MY_ASSEMBLER_IS_TOO_OLD_FOR_512AVX) return 0; -#else + #endif // !defined(MY_ASSEMBLER_IS_TOO_OLD_FOR_512AVX) return bn_power5_capable(num) && CRYPTO_is_ADX_capable() && CRYPTO_is_BMI1_capable() && CRYPTO_is_BMI2_capable(); - -#endif // !defined(MY_ASSEMBLER_IS_TOO_OLD_FOR_512AVX) } void bn_powerx5(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *table, const BN_ULONG *np, const BN_ULONG *n0, int num, int power); From c4ecf917ee03a60e4f90178766b9593d1b525564 Mon Sep 17 00:00:00 2001 From: Erin Chapman <13040499+m271828@users.noreply.github.com> Date: Fri, 8 Aug 2025 17:09:53 -0700 Subject: [PATCH 17/34] Invert guard logic --- crypto/fipsmodule/bn/exponentiation.c | 11 +++++++++++ crypto/fipsmodule/bn/internal.h | 9 ++++----- 2 files changed, 15 insertions(+), 5 deletions(-) diff --git a/crypto/fipsmodule/bn/exponentiation.c b/crypto/fipsmodule/bn/exponentiation.c index 3255c6f785..87c066112e 100644 --- a/crypto/fipsmodule/bn/exponentiation.c +++ b/crypto/fipsmodule/bn/exponentiation.c @@ -166,6 +166,17 @@ static void exponentiation_s2n_bignum_copy_from_prebuf(BN_ULONG *dest, int width #if defined(OPENSSL_BN_ASM_MONT5) #if defined(MY_ASSEMBLER_IS_TOO_OLD_FOR_512AVX) +void bn_mulx4x_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, + const BN_ULONG *np, const BN_ULONG *n0, size_t num) { + assert(0); +} + +void bn_mulx4x_mont_gather5(BN_ULONG *rp, const BN_ULONG *ap, + const BN_ULONG *table, const BN_ULONG *np, + const BN_ULONG *n0, int num, int power) { + assert(0); +} + void bn_powerx5(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *table, const BN_ULONG *np, const BN_ULONG *n0, int num, int power) { assert(0); diff --git a/crypto/fipsmodule/bn/internal.h b/crypto/fipsmodule/bn/internal.h index bf01de38a3..221dc631c0 100644 --- a/crypto/fipsmodule/bn/internal.h +++ b/crypto/fipsmodule/bn/internal.h @@ -420,11 +420,10 @@ int bn_mul4x_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, const BN_ULONG *np, const BN_ULONG *n0, size_t num); OPENSSL_INLINE int bn_mulx4x_mont_capable(size_t num) { -#if !defined(MY_ASSEMBLER_IS_TOO_OLD_FOR_512AVX) +#if defined(MY_ASSEMBLER_IS_TOO_OLD_FOR_512AVX) return 0; -#else - return bn_mul4x_mont_capable(num) && bn_mulx_adx_capable(); #endif // !defined(MY_ASSEMBLER_IS_TOO_OLD_FOR_512AVX) + return bn_mul4x_mont_capable(num) && bn_mulx_adx_capable(); } int bn_mulx4x_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, const BN_ULONG *np, const BN_ULONG *n0, size_t num); @@ -459,7 +458,7 @@ int bn_sqr8x_mont(BN_ULONG *rp, const BN_ULONG *ap, BN_ULONG mulx_adx_capable, const BN_ULONG *n0, int num, int power); OPENSSL_INLINE int bn_mulx4x_mont_gather5_capable(int num) { - #if !defined(MY_ASSEMBLER_IS_TOO_OLD_FOR_512AVX) + #if defined(MY_ASSEMBLER_IS_TOO_OLD_FOR_512AVX) return 0; #endif // !defined(MY_ASSEMBLER_IS_TOO_OLD_FOR_512AVX) return bn_mul4x_mont_gather5_capable(num) && CRYPTO_is_ADX_capable() && @@ -493,7 +492,7 @@ void bn_gather5(BN_ULONG *out, size_t num, const BN_ULONG *table, size_t power); } OPENSSL_INLINE int bn_powerx5_capable(int num) { - #if !defined(MY_ASSEMBLER_IS_TOO_OLD_FOR_512AVX) + #if defined(MY_ASSEMBLER_IS_TOO_OLD_FOR_512AVX) return 0; #endif // !defined(MY_ASSEMBLER_IS_TOO_OLD_FOR_512AVX) return bn_power5_capable(num) && CRYPTO_is_ADX_capable() && From 92af303f4b7bd8ef88f8d4ba2a95a8f50f2e3c69 Mon Sep 17 00:00:00 2001 From: Erin Chapman <13040499+m271828@users.noreply.github.com> Date: Fri, 8 Aug 2025 19:21:47 -0700 Subject: [PATCH 18/34] Remove guards --- crypto/fipsmodule/bn/exponentiation.c | 14 ++++---------- 1 file changed, 4 insertions(+), 10 deletions(-) diff --git a/crypto/fipsmodule/bn/exponentiation.c b/crypto/fipsmodule/bn/exponentiation.c index 87c066112e..eb4b54d4e3 100644 --- a/crypto/fipsmodule/bn/exponentiation.c +++ b/crypto/fipsmodule/bn/exponentiation.c @@ -166,7 +166,7 @@ static void exponentiation_s2n_bignum_copy_from_prebuf(BN_ULONG *dest, int width #if defined(OPENSSL_BN_ASM_MONT5) #if defined(MY_ASSEMBLER_IS_TOO_OLD_FOR_512AVX) -void bn_mulx4x_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, +int bn_mulx4x_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, const BN_ULONG *np, const BN_ULONG *n0, size_t num) { assert(0); } @@ -195,13 +195,9 @@ void bn_powerx5(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *table, static void bn_mul_mont_gather5( BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *table, const BN_ULONG *np, const BN_ULONG *n0, int num, int power) { -#if !defined(MY_ASSEMBLER_IS_TOO_OLD_FOR_512AVX) if (bn_mulx4x_mont_gather5_capable(num)) { bn_mulx4x_mont_gather5(rp, ap, table, np, n0, num, power); - return; - } -#endif // !defined(MY_ASSEMBLER_IS_TOO_OLD_FOR_512AVX) - if (bn_mul4x_mont_gather5_capable(num)) { + } else if (bn_mul4x_mont_gather5_capable(num)) { bn_mul4x_mont_gather5(rp, ap, table, np, n0, num, power); } else { bn_mul_mont_gather5_nohw(rp, ap, table, np, n0, num, power); @@ -222,13 +218,11 @@ static void bn_power5(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *table, int power) { assert(bn_power5_capable(num)); -#if !defined(MY_ASSEMBLER_IS_TOO_OLD_FOR_512AVX) if (bn_powerx5_capable(num)) { bn_powerx5(rp, ap, table, np, n0, num, power); - return; + } else { + bn_power5_nohw(rp, ap, table, np, n0, num, power); } -#endif - bn_power5_nohw(rp, ap, table, np, n0, num, power); } #endif // defined(OPENSSL_BN_ASM_MONT5) From 3dafe95f7d880d8dfceb929162d6350bd0bb8f62 Mon Sep 17 00:00:00 2001 From: Erin Chapman <13040499+m271828@users.noreply.github.com> Date: Fri, 8 Aug 2025 23:34:42 -0700 Subject: [PATCH 19/34] Change to perror/abort pattern --- crypto/fipsmodule/bn/exponentiation.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/crypto/fipsmodule/bn/exponentiation.c b/crypto/fipsmodule/bn/exponentiation.c index eb4b54d4e3..303fda3dc5 100644 --- a/crypto/fipsmodule/bn/exponentiation.c +++ b/crypto/fipsmodule/bn/exponentiation.c @@ -168,18 +168,21 @@ static void exponentiation_s2n_bignum_copy_from_prebuf(BN_ULONG *dest, int width #if defined(MY_ASSEMBLER_IS_TOO_OLD_FOR_512AVX) int bn_mulx4x_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, const BN_ULONG *np, const BN_ULONG *n0, size_t num) { - assert(0); + perror("bn_mulx4x_mont"); + abort(); } void bn_mulx4x_mont_gather5(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *table, const BN_ULONG *np, const BN_ULONG *n0, int num, int power) { - assert(0); + perror("bn_mulx4x_mont_gather5"); + abort(); } void bn_powerx5(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *table, const BN_ULONG *np, const BN_ULONG *n0, int num, int power) { - assert(0); + perror("bn_powerx5"); + abort(); } #endif // defined(MY_ASSEMBLER_IS_TOO_OLD_FOR_512AVX) From 9b8c35504fd4c2e22e725daf5b4fe5646e442e05 Mon Sep 17 00:00:00 2001 From: Erin Chapman <13040499+m271828@users.noreply.github.com> Date: Sat, 9 Aug 2025 00:23:05 -0700 Subject: [PATCH 20/34] Move default implementations to more logical location --- crypto/fipsmodule/bn/asm/x86_64-mont5.pl | 2 +- crypto/fipsmodule/bn/exponentiation.c | 21 ----------------- crypto/fipsmodule/bn/generic.c | 29 ++++++++++++++++++++++++ 3 files changed, 30 insertions(+), 22 deletions(-) diff --git a/crypto/fipsmodule/bn/asm/x86_64-mont5.pl b/crypto/fipsmodule/bn/asm/x86_64-mont5.pl index 6ca5766b19..7a6954d0ca 100755 --- a/crypto/fipsmodule/bn/asm/x86_64-mont5.pl +++ b/crypto/fipsmodule/bn/asm/x86_64-mont5.pl @@ -1061,7 +1061,7 @@ .size mul4x_internal,.-mul4x_internal ___ }}} -{{{ + {{{ ###################################################################### # void bn_power5_nohw( my $rptr="%rdi"; # BN_ULONG *rptr, diff --git a/crypto/fipsmodule/bn/exponentiation.c b/crypto/fipsmodule/bn/exponentiation.c index 303fda3dc5..ae4ba1011d 100644 --- a/crypto/fipsmodule/bn/exponentiation.c +++ b/crypto/fipsmodule/bn/exponentiation.c @@ -165,27 +165,6 @@ static void exponentiation_s2n_bignum_copy_from_prebuf(BN_ULONG *dest, int width #if defined(OPENSSL_BN_ASM_MONT5) -#if defined(MY_ASSEMBLER_IS_TOO_OLD_FOR_512AVX) -int bn_mulx4x_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, - const BN_ULONG *np, const BN_ULONG *n0, size_t num) { - perror("bn_mulx4x_mont"); - abort(); -} - -void bn_mulx4x_mont_gather5(BN_ULONG *rp, const BN_ULONG *ap, - const BN_ULONG *table, const BN_ULONG *np, - const BN_ULONG *n0, int num, int power) { - perror("bn_mulx4x_mont_gather5"); - abort(); -} - -void bn_powerx5(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *table, - const BN_ULONG *np, const BN_ULONG *n0, int num, int power) { - perror("bn_powerx5"); - abort(); -} -#endif // defined(MY_ASSEMBLER_IS_TOO_OLD_FOR_512AVX) - // bn_mul_mont_gather5 multiples loads index |power| of |table|, multiplies it // by |ap| modulo |np|, and stores the result in |rp|. The values are |num| // words long and represented in Montgomery form. |n0| is a pointer to the diff --git a/crypto/fipsmodule/bn/generic.c b/crypto/fipsmodule/bn/generic.c index 247398fddb..63835eb7c3 100644 --- a/crypto/fipsmodule/bn/generic.c +++ b/crypto/fipsmodule/bn/generic.c @@ -622,3 +622,32 @@ BN_ULONG bn_sub_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b, } #endif // !BN_ADD_ASM + +// Default implementations of hardware accelerated functions that throw errors if reached. +// Used to reduce the number of guards needed in code. +#if defined(MY_ASSEMBLER_IS_TOO_OLD_FOR_512AVX) + +#if defined(OPENSSL_X86_64) && defined(OPENSSL_BN_ASM_MONT) +int bn_mulx4x_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, + const BN_ULONG *np, const BN_ULONG *n0, size_t num) { + perror("bn_mulx4x_mont"); + abort(); +} +#endif + +#if defined(OPENSSL_BN_ASM_MONT5) +void bn_mulx4x_mont_gather5(BN_ULONG *rp, const BN_ULONG *ap, + const BN_ULONG *table, const BN_ULONG *np, + const BN_ULONG *n0, int num, int power) { + perror("bn_mulx4x_mont_gather5"); + abort(); +} + +void bn_powerx5(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *table, + const BN_ULONG *np, const BN_ULONG *n0, int num, int power) { + perror("bn_powerx5"); + abort(); +} +#endif + +#endif // defined(MY_ASSEMBLER_IS_TOO_OLD_FOR_512AVX) From d96d329e64c57c5aca83b35f344f0816c53e0f3a Mon Sep 17 00:00:00 2001 From: Erin Chapman <13040499+m271828@users.noreply.github.com> Date: Fri, 15 Aug 2025 12:47:18 -0700 Subject: [PATCH 21/34] Fix guards and comments --- crypto/fipsmodule/bn/internal.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/crypto/fipsmodule/bn/internal.h b/crypto/fipsmodule/bn/internal.h index 221dc631c0..0398a56b93 100644 --- a/crypto/fipsmodule/bn/internal.h +++ b/crypto/fipsmodule/bn/internal.h @@ -422,15 +422,15 @@ int bn_mul4x_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, OPENSSL_INLINE int bn_mulx4x_mont_capable(size_t num) { #if defined(MY_ASSEMBLER_IS_TOO_OLD_FOR_512AVX) return 0; -#endif // !defined(MY_ASSEMBLER_IS_TOO_OLD_FOR_512AVX) +#endif // defined(MY_ASSEMBLER_IS_TOO_OLD_FOR_512AVX) return bn_mul4x_mont_capable(num) && bn_mulx_adx_capable(); } int bn_mulx4x_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, const BN_ULONG *np, const BN_ULONG *n0, size_t num); OPENSSL_INLINE int bn_sqr8x_mont_capable(size_t num) { -#if !defined(MY_ASSEMBLER_IS_TOO_OOLD_FOR_512AVX) +#if defined(MY_ASSEMBLER_IS_TOO_OOLD_FOR_512AVX) return 0; -#endif // !defined(MY_ASSEMBLER_IS_TOO_OLD_FOR_512AVX) +#endif // defined(MY_ASSEMBLER_IS_TOO_OOLD_FOR_512AVX) return (num >= 8) && ((num & 7) == 0); } int bn_sqr8x_mont(BN_ULONG *rp, const BN_ULONG *ap, BN_ULONG mulx_adx_capable, @@ -460,7 +460,7 @@ int bn_sqr8x_mont(BN_ULONG *rp, const BN_ULONG *ap, BN_ULONG mulx_adx_capable, OPENSSL_INLINE int bn_mulx4x_mont_gather5_capable(int num) { #if defined(MY_ASSEMBLER_IS_TOO_OLD_FOR_512AVX) return 0; - #endif // !defined(MY_ASSEMBLER_IS_TOO_OLD_FOR_512AVX) + #endif // defined(MY_ASSEMBLER_IS_TOO_OLD_FOR_512AVX) return bn_mul4x_mont_gather5_capable(num) && CRYPTO_is_ADX_capable() && CRYPTO_is_BMI1_capable() && CRYPTO_is_BMI2_capable(); } @@ -494,7 +494,7 @@ void bn_gather5(BN_ULONG *out, size_t num, const BN_ULONG *table, size_t power); OPENSSL_INLINE int bn_powerx5_capable(int num) { #if defined(MY_ASSEMBLER_IS_TOO_OLD_FOR_512AVX) return 0; - #endif // !defined(MY_ASSEMBLER_IS_TOO_OLD_FOR_512AVX) + #endif // defined(MY_ASSEMBLER_IS_TOO_OLD_FOR_512AVX) return bn_power5_capable(num) && CRYPTO_is_ADX_capable() && CRYPTO_is_BMI1_capable() && CRYPTO_is_BMI2_capable(); } From c369e8840ebe5bd977bd298e77fac565379ccf39 Mon Sep 17 00:00:00 2001 From: Erin Chapman <13040499+m271828@users.noreply.github.com> Date: Fri, 15 Aug 2025 16:55:55 -0700 Subject: [PATCH 22/34] Update sqr8x_mont to new style --- crypto/fipsmodule/bn/bn_test.cc | 4 ---- crypto/fipsmodule/bn/generic.c | 6 ++++++ crypto/fipsmodule/bn/montgomery.c | 11 ++++------- 3 files changed, 10 insertions(+), 11 deletions(-) diff --git a/crypto/fipsmodule/bn/bn_test.cc b/crypto/fipsmodule/bn/bn_test.cc index 74533a6c67..5f8f400f0a 100644 --- a/crypto/fipsmodule/bn/bn_test.cc +++ b/crypto/fipsmodule/bn/bn_test.cc @@ -2959,14 +2959,12 @@ TEST_F(BNTest, BNMulMontABI) { b[0] = 42; #if defined(OPENSSL_X86_64) -#if !defined(MY_ASSEMBLER_IS_TOO_OLD_FOR_512AVX) if (bn_mulx4x_mont_capable(words)) { CHECK_ABI(bn_mulx4x_mont, r.data(), a.data(), b.data(), mont->N.d, mont->n0, words); CHECK_ABI(bn_mulx4x_mont, r.data(), a.data(), a.data(), mont->N.d, mont->n0, words); } -#endif // !defined(MY_ASSEMBLER_IS_TOO_OLD_FOR_512AVX) if (bn_mul4x_mont_capable(words)) { CHECK_ABI(bn_mul4x_mont, r.data(), a.data(), b.data(), mont->N.d, mont->n0, words); @@ -2977,12 +2975,10 @@ TEST_F(BNTest, BNMulMontABI) { mont->n0, words); CHECK_ABI(bn_mul_mont_nohw, r.data(), a.data(), a.data(), mont->N.d, mont->n0, words); -#if !defined(MY_ASSEMBLER_IS_TOO_OLD_FOR_512AVX) if (bn_sqr8x_mont_capable(words)) { CHECK_ABI(bn_sqr8x_mont, r.data(), a.data(), bn_mulx_adx_capable(), mont->N.d, mont->n0, words); } -#endif // !defined(MY_ASSEMBLER_IS_TOO_OLD_FOR_512AVX) #elif defined(OPENSSL_ARM) if (bn_mul8x_mont_neon_capable(words)) { CHECK_ABI(bn_mul8x_mont_neon, r.data(), a.data(), b.data(), mont->N.d, diff --git a/crypto/fipsmodule/bn/generic.c b/crypto/fipsmodule/bn/generic.c index 63835eb7c3..5a5f61d0db 100644 --- a/crypto/fipsmodule/bn/generic.c +++ b/crypto/fipsmodule/bn/generic.c @@ -648,6 +648,12 @@ void bn_powerx5(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *table, perror("bn_powerx5"); abort(); } + +int bn_sqr8x_mont(BN_ULONG *rp, const BN_ULONG *ap, BN_ULONG mulx_adx_capable, + const BN_ULONG *np, const BN_ULONG *n0, size_t num) { + perror("bn_sqr8x_mont"); + abort(); +} #endif #endif // defined(MY_ASSEMBLER_IS_TOO_OLD_FOR_512AVX) diff --git a/crypto/fipsmodule/bn/montgomery.c b/crypto/fipsmodule/bn/montgomery.c index c7ac15c18d..0a4432b7ae 100644 --- a/crypto/fipsmodule/bn/montgomery.c +++ b/crypto/fipsmodule/bn/montgomery.c @@ -627,18 +627,15 @@ void bn_mod_mul_montgomery_small(BN_ULONG *r, const BN_ULONG *a, int bn_mul_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, const BN_ULONG *np, const BN_ULONG *n0, size_t num) { -#if !defined(MY_ASSEMBLER_IS_TOO_OLD_FOR_512AVX) if (ap == bp && bn_sqr8x_mont_capable(num)) { return bn_sqr8x_mont(rp, ap, bn_mulx_adx_capable(), np, n0, num); - } - if (bn_mulx4x_mont_capable(num)) { + } else if (bn_mulx4x_mont_capable(num)) { return bn_mulx4x_mont(rp, ap, bp, np, n0, num); - } -#endif // !defined(MY_ASSEMBLER_IS_TOO_OLD_FOR_512AVX) - if (bn_mul4x_mont_capable(num)) { + } else if (bn_mul4x_mont_capable(num)) { return bn_mul4x_mont(rp, ap, bp, np, n0, num); + } else { + return bn_mul_mont_nohw(rp, ap, bp, np, n0, num); } - return bn_mul_mont_nohw(rp, ap, bp, np, n0, num); } #endif From 980bcffcbe1880a976325b9c7de0342b7cfad2d1 Mon Sep 17 00:00:00 2001 From: Erin Chapman <13040499+m271828@users.noreply.github.com> Date: Wed, 27 Aug 2025 14:05:00 -0700 Subject: [PATCH 23/34] Fix typo --- crypto/fipsmodule/bn/internal.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crypto/fipsmodule/bn/internal.h b/crypto/fipsmodule/bn/internal.h index 0398a56b93..f1afe6cb61 100644 --- a/crypto/fipsmodule/bn/internal.h +++ b/crypto/fipsmodule/bn/internal.h @@ -428,7 +428,7 @@ OPENSSL_INLINE int bn_mulx4x_mont_capable(size_t num) { int bn_mulx4x_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, const BN_ULONG *np, const BN_ULONG *n0, size_t num); OPENSSL_INLINE int bn_sqr8x_mont_capable(size_t num) { -#if defined(MY_ASSEMBLER_IS_TOO_OOLD_FOR_512AVX) +#if defined(MY_ASSEMBLER_IS_TOO_OLD_FOR_512AVX) return 0; #endif // defined(MY_ASSEMBLER_IS_TOO_OOLD_FOR_512AVX) return (num >= 8) && ((num & 7) == 0); From 10a46a0d07f152a2980dc2a205c1b3a53bcfe66e Mon Sep 17 00:00:00 2001 From: Erin Chapman <13040499+m271828@users.noreply.github.com> Date: Thu, 28 Aug 2025 11:16:24 -0700 Subject: [PATCH 24/34] Add implicit dispatch tests --- crypto/fipsmodule/bn/exponentiation.c | 5 ++ crypto/impl_dispatch_test.cc | 89 +++++++++++++++++++++++++++ 2 files changed, 94 insertions(+) diff --git a/crypto/fipsmodule/bn/exponentiation.c b/crypto/fipsmodule/bn/exponentiation.c index ae4ba1011d..c40460dd79 100644 --- a/crypto/fipsmodule/bn/exponentiation.c +++ b/crypto/fipsmodule/bn/exponentiation.c @@ -178,10 +178,13 @@ static void bn_mul_mont_gather5( BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *table, const BN_ULONG *np, const BN_ULONG *n0, int num, int power) { if (bn_mulx4x_mont_gather5_capable(num)) { + keccak_log_dispatch(15); bn_mulx4x_mont_gather5(rp, ap, table, np, n0, num, power); } else if (bn_mul4x_mont_gather5_capable(num)) { + keccak_log_dispatch(16); bn_mul4x_mont_gather5(rp, ap, table, np, n0, num, power); } else { + keccak_log_dispatch(17); bn_mul_mont_gather5_nohw(rp, ap, table, np, n0, num, power); } } @@ -201,8 +204,10 @@ static void bn_power5(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *table, { assert(bn_power5_capable(num)); if (bn_powerx5_capable(num)) { + keccak_log_dispatch(18); bn_powerx5(rp, ap, table, np, n0, num, power); } else { + keccak_log_dispatch(19); bn_power5_nohw(rp, ap, table, np, n0, num, power); } } diff --git a/crypto/impl_dispatch_test.cc b/crypto/impl_dispatch_test.cc index c971419b83..8168a08d9a 100644 --- a/crypto/impl_dispatch_test.cc +++ b/crypto/impl_dispatch_test.cc @@ -140,6 +140,11 @@ class ImplDispatchTest : public ::testing::Test { bool is_assembler_too_old = false; bool is_assembler_too_old_avx512 = false; bool ifma_avx512 = false; + bool is_bn_mulx4x_mont_gather5 = false; + bool is_bn_mul4x_mont_gather5 = false; + bool is_bn_mul_mont_gather5_nohw = false; + bool is_power5 = false; + bool is_powerx5 = false; #else // AARCH64 bool aes_gcm_pmull_ = false; bool aes_gcm_8x_ = false; @@ -167,6 +172,11 @@ constexpr size_t kFlag_sha256_hw = 6; constexpr size_t kFlag_aesni_gcm_encrypt = 2; constexpr size_t kFlag_aes_gcm_encrypt_avx512 = 7; constexpr size_t kFlag_RSAZ_mod_exp_avx512_x2 = 8; +constexpr size_t kFlag_bn_mulx4x_mont_gather5 = 15; +constexpr size_t kFlag_bn_mul4x_mont_gather5 = 16; +constexpr size_t kFlag_bn_mul_mont_gather5_nohw = 17; +constexpr size_t kFlag_bn_powerx5 = 18; +constexpr size_t kFlag_bn_power5_nohw = 19; #else // AARCH64 constexpr size_t kFlag_aes_gcm_enc_kernel = 2; constexpr size_t kFlag_aesv8_gcm_8x_enc_128 = 7; @@ -179,6 +189,85 @@ constexpr size_t kFlag_sha3_keccak4_f1600_alt = 13; constexpr size_t kFlag_sha3_keccak4_f1600_alt2 = 14; #endif +#if defined(OPENSSL_BN_ASM_MONT5) +TEST_F(ImplDispatchTest, BN_mul_mont_gather5) { + for (size_t words : {4, 5, 6, 7, 8, 16, 32}) { + SCOPED_TRACE(words); + + bssl::UniquePtr m(BN_new()); + ASSERT_TRUE(m); + ASSERT_TRUE(BN_set_bit(m.get(), 0)); + ASSERT_TRUE(BN_set_bit(m.get(), words * BN_BITS2 - 1)); + bssl::UniquePtr mont( + BN_MONT_CTX_new_for_modulus(m.get(), ctx())); + ASSERT_TRUE(mont); + + std::vector r(words), a(words), b(words), table(words * 32); + a[0] = 1; + b[0] = 42; + + bn_mul_mont(r.data(), a.data(), b.data(), mont->N.d, mont->n0, words); + + is_bn_mulx4x_mont_gather5 = bn_mulx4x_mont_gather5_capable(words); + if (is_bn_mulx4x_mont_gather5) { + is_bn_mul4x_mont_gather5 = false; + is_bn_mul_mont_gather5_nohw = false; + } else { + is_bn_mul4x_mont_gather5 = bn_mul4x_mont_gather5_capable(words); + if (is_bn_mul4x_mont_gather5) { + is_bn_mul_mont_gather5_nohw = false; + } else { + is_bn_mul_mont_gather5_nohw = true; + } + } + + AssertFunctionsHit( + { + {kFlag_bn_mulx4x_mont_gather5, is_bn_mulx4x_mont_gather5}, + {kFlag_bn_mul4x_mont_gather5, is_bn_mul4x_mont_gather5}, + {kFlag_bn_mul_mont_gather5_nohw, is_bn_mul_mont_gather5_nohw}, + }, + [] { + bn_mul_mont_gather5(r.data(), r.data(), table.data(), m->d, mont->n0, words, 13); + bn_mul_mont_gather5(r.data(), a.data(), table.data(), m->d, mont->n0, words, 13); + }); + } +} + +TEST_F(ImplDispatchTest, BN_power5) { + for (size_t words : {4, 5, 6, 7, 8, 16, 32}) { + SCOPED_TRACE(words); + + bssl::UniquePtr m(BN_new()); + ASSERT_TRUE(m); + ASSERT_TRUE(BN_set_bit(m.get(), 0)); + ASSERT_TRUE(BN_set_bit(m.get(), words * BN_BITS2 - 1)); + bssl::UniquePtr mont( + BN_MONT_CTX_new_for_modulus(m.get(), ctx())); + ASSERT_TRUE(mont); + + std::vector r(words), a(words), b(words), table(words * 32); + a[0] = 1; + b[0] = 42; + + bn_mul_mont(r.data(), a.data(), b.data(), mont->N.d, mont->n0, words); + + is_power5 = bn_power5_capable(words); + is_powerx5 = bn_powerx5_capable(words); + + AssertFunctionsHit( + { + {kFlag_bn_powerx5, is_powerx5}, + {kFlag_bn_power5_nohw, !is_powerx5 && is_power5}, + }, + [] { + bn_power5(r.data(), r.data(), table.data(), m->d, mont->n0, words, 13); + bn_power5(r.data(), a.data(), table.data(), m->d, mont->n0, words, 13); + }); + } +} +#endif // defined(OPENSSL_BN_ASM_MONT5) + TEST_F(ImplDispatchTest, AEAD_AES_GCM) { AssertFunctionsHit( { From 6062827399f09ff1181d75e97dff8acf5221e45a Mon Sep 17 00:00:00 2001 From: Erin Chapman <13040499+m271828@users.noreply.github.com> Date: Mon, 15 Sep 2025 11:35:41 -0700 Subject: [PATCH 25/34] Add dispatch function --- crypto/fipsmodule/bn/exponentiation.c | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/crypto/fipsmodule/bn/exponentiation.c b/crypto/fipsmodule/bn/exponentiation.c index c40460dd79..7029459cf4 100644 --- a/crypto/fipsmodule/bn/exponentiation.c +++ b/crypto/fipsmodule/bn/exponentiation.c @@ -165,6 +165,12 @@ static void exponentiation_s2n_bignum_copy_from_prebuf(BN_ULONG *dest, int width #if defined(OPENSSL_BN_ASM_MONT5) +static void bn_log_dispatch(size_t id) { +#if BORINGSSL_DISPATCH_TEST + BORINGSSL_function_hit[id] = 1; +#endif +} + // bn_mul_mont_gather5 multiples loads index |power| of |table|, multiplies it // by |ap| modulo |np|, and stores the result in |rp|. The values are |num| // words long and represented in Montgomery form. |n0| is a pointer to the @@ -178,13 +184,13 @@ static void bn_mul_mont_gather5( BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *table, const BN_ULONG *np, const BN_ULONG *n0, int num, int power) { if (bn_mulx4x_mont_gather5_capable(num)) { - keccak_log_dispatch(15); + bn_log_dispatch(15); bn_mulx4x_mont_gather5(rp, ap, table, np, n0, num, power); } else if (bn_mul4x_mont_gather5_capable(num)) { - keccak_log_dispatch(16); + bn_log_dispatch(16); bn_mul4x_mont_gather5(rp, ap, table, np, n0, num, power); } else { - keccak_log_dispatch(17); + bn_log_dispatch(17); bn_mul_mont_gather5_nohw(rp, ap, table, np, n0, num, power); } } @@ -204,10 +210,10 @@ static void bn_power5(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *table, { assert(bn_power5_capable(num)); if (bn_powerx5_capable(num)) { - keccak_log_dispatch(18); + bn_log_dispatch(18); bn_powerx5(rp, ap, table, np, n0, num, power); } else { - keccak_log_dispatch(19); + bn_log_dispatch(19); bn_power5_nohw(rp, ap, table, np, n0, num, power); } } From 0cf8b2d6bf00f03574564f89aa8a01d6b106b06c Mon Sep 17 00:00:00 2001 From: Erin Chapman <13040499+m271828@users.noreply.github.com> Date: Tue, 16 Sep 2025 19:03:41 -0700 Subject: [PATCH 26/34] Change to single dispatch log function --- crypto/fipsmodule/bn/exponentiation.c | 17 ++++++----------- crypto/fipsmodule/cpucap/cpucap.c | 6 ++++++ crypto/fipsmodule/sha/keccak1600.c | 21 +++++++-------------- include/openssl/cpu.h | 5 +++++ 4 files changed, 24 insertions(+), 25 deletions(-) diff --git a/crypto/fipsmodule/bn/exponentiation.c b/crypto/fipsmodule/bn/exponentiation.c index 7029459cf4..4f08008481 100644 --- a/crypto/fipsmodule/bn/exponentiation.c +++ b/crypto/fipsmodule/bn/exponentiation.c @@ -107,6 +107,7 @@ * Hudson (tjh@cryptsoft.com). */ #include +#include #include #include @@ -165,12 +166,6 @@ static void exponentiation_s2n_bignum_copy_from_prebuf(BN_ULONG *dest, int width #if defined(OPENSSL_BN_ASM_MONT5) -static void bn_log_dispatch(size_t id) { -#if BORINGSSL_DISPATCH_TEST - BORINGSSL_function_hit[id] = 1; -#endif -} - // bn_mul_mont_gather5 multiples loads index |power| of |table|, multiplies it // by |ap| modulo |np|, and stores the result in |rp|. The values are |num| // words long and represented in Montgomery form. |n0| is a pointer to the @@ -184,13 +179,13 @@ static void bn_mul_mont_gather5( BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *table, const BN_ULONG *np, const BN_ULONG *n0, int num, int power) { if (bn_mulx4x_mont_gather5_capable(num)) { - bn_log_dispatch(15); + log_dispatch(15); bn_mulx4x_mont_gather5(rp, ap, table, np, n0, num, power); } else if (bn_mul4x_mont_gather5_capable(num)) { - bn_log_dispatch(16); + log_dispatch(16); bn_mul4x_mont_gather5(rp, ap, table, np, n0, num, power); } else { - bn_log_dispatch(17); + log_dispatch(17); bn_mul_mont_gather5_nohw(rp, ap, table, np, n0, num, power); } } @@ -210,10 +205,10 @@ static void bn_power5(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *table, { assert(bn_power5_capable(num)); if (bn_powerx5_capable(num)) { - bn_log_dispatch(18); + log_dispatch(18); bn_powerx5(rp, ap, table, np, n0, num, power); } else { - bn_log_dispatch(19); + log_dispatch(19); bn_power5_nohw(rp, ap, table, np, n0, num, power); } } diff --git a/crypto/fipsmodule/cpucap/cpucap.c b/crypto/fipsmodule/cpucap/cpucap.c index 07874c7788..1cfcd8263b 100644 --- a/crypto/fipsmodule/cpucap/cpucap.c +++ b/crypto/fipsmodule/cpucap/cpucap.c @@ -92,6 +92,12 @@ HIDDEN uint32_t OPENSSL_armcap_P = 0; #if defined(BORINGSSL_DISPATCH_TEST) // This value must be explicitly initialized to zero. See similar comment above. HIDDEN uint8_t BORINGSSL_function_hit[15] = {0}; + +void log_dispatch(size_t id) { +#if BORINGSSL_DISPATCH_TEST + BORINGSSL_function_hit[id] = 1; +#endif +} #endif // BORINGSSL_DISPATCH_TEST // This variable is used only for testing purposes to ensure that the library diff --git a/crypto/fipsmodule/sha/keccak1600.c b/crypto/fipsmodule/sha/keccak1600.c index f97d03a6a9..bbe8682e76 100644 --- a/crypto/fipsmodule/sha/keccak1600.c +++ b/crypto/fipsmodule/sha/keccak1600.c @@ -8,6 +8,7 @@ */ #include +#include #include "internal.h" #include "../../internal.h" #include "../cpucap/internal.h" @@ -321,14 +322,6 @@ void Keccak1600_Squeeze(uint64_t A[KECCAK1600_ROWS][KECCAK1600_ROWS], uint8_t *o // Scalar implementation from OpenSSL provided by keccak1600-armv8.pl extern void KeccakF1600_hw(uint64_t state[25]); -#if defined(OPENSSL_AARCH64) -static void keccak_log_dispatch(size_t id) { -#if BORINGSSL_DISPATCH_TEST - BORINGSSL_function_hit[id] = 1; -#endif -} -#endif - void KeccakF1600(uint64_t A[KECCAK1600_ROWS][KECCAK1600_ROWS]) { // Dispatch logic for Keccak-x1 on AArch64: // @@ -354,21 +347,21 @@ void KeccakF1600(uint64_t A[KECCAK1600_ROWS][KECCAK1600_ROWS]) { #if defined(OPENSSL_AARCH64) #if defined(KECCAK1600_S2N_BIGNUM_ASM) if (CRYPTO_is_Neoverse_N1() || CRYPTO_is_Neoverse_V1() || CRYPTO_is_Neoverse_V2()) { - keccak_log_dispatch(10); // kFlag_sha3_keccak_f1600 + log_dispatch(10); // kFlag_sha3_keccak_f1600 sha3_keccak_f1600((uint64_t *)A, iotas); return; } #if defined(MY_ASSEMBLER_SUPPORTS_NEON_SHA3_EXTENSION) if (CRYPTO_is_ARMv8_SHA3_capable()) { - keccak_log_dispatch(11); // kFlag_sha3_keccak_f1600_alt + log_dispatch(11); // kFlag_sha3_keccak_f1600_alt sha3_keccak_f1600_alt((uint64_t *)A, iotas); return; } #endif #endif - keccak_log_dispatch(9); // kFlag_KeccakF1600_hw + log_dispatch(9); // kFlag_KeccakF1600_hw KeccakF1600_hw((uint64_t *) A); #elif defined(OPENSSL_X86_64) @@ -427,20 +420,20 @@ static void Keccak1600_x4(uint64_t A[4][KECCAK1600_ROWS][KECCAK1600_ROWS]) { // (which has its own dispatch logic). #if defined(KECCAK1600_S2N_BIGNUM_ASM) && defined(OPENSSL_AARCH64) if (CRYPTO_is_Neoverse_N1()) { - keccak_log_dispatch(13); // kFlag_sha3_keccak4_f1600_alt + log_dispatch(13); // kFlag_sha3_keccak4_f1600_alt sha3_keccak4_f1600_alt((uint64_t *)A, iotas); return; } #if defined(MY_ASSEMBLER_SUPPORTS_NEON_SHA3_EXTENSION) if (CRYPTO_is_Neoverse_V1() || CRYPTO_is_Neoverse_V2()) { - keccak_log_dispatch(14); // kFlag_sha3_keccak4_f1600_alt2 + log_dispatch(14); // kFlag_sha3_keccak4_f1600_alt2 sha3_keccak4_f1600_alt2((uint64_t *)A, iotas); return; } if (CRYPTO_is_ARMv8_SHA3_capable()) { - keccak_log_dispatch(12); // kFlag_sha3_keccak2_f1600 + log_dispatch(12); // kFlag_sha3_keccak2_f1600 // Use 2-fold function twice: A[0:1] and A[2:3] sha3_keccak2_f1600((uint64_t *)&A[0], iotas); sha3_keccak2_f1600((uint64_t *)&A[2], iotas); diff --git a/include/openssl/cpu.h b/include/openssl/cpu.h index d865020c1c..bb2d9a31cf 100644 --- a/include/openssl/cpu.h +++ b/include/openssl/cpu.h @@ -16,3 +16,8 @@ // TODO(davidben): Remove this header. #include "crypto.h" + +#if defined(BORINGSSL_DISPATCH_TEST) +__attribute__((unused)) +void log_dispatch(size_t id); +#endif From 03d8c2dd6a3ff7a6af7c80f00dbc4e6a6f0deae5 Mon Sep 17 00:00:00 2001 From: Erin Chapman <13040499+m271828@users.noreply.github.com> Date: Tue, 16 Sep 2025 19:11:59 -0700 Subject: [PATCH 27/34] Add ctx --- crypto/impl_dispatch_test.cc | 2 ++ 1 file changed, 2 insertions(+) diff --git a/crypto/impl_dispatch_test.cc b/crypto/impl_dispatch_test.cc index 8168a08d9a..5b440b68ce 100644 --- a/crypto/impl_dispatch_test.cc +++ b/crypto/impl_dispatch_test.cc @@ -196,6 +196,8 @@ TEST_F(ImplDispatchTest, BN_mul_mont_gather5) { bssl::UniquePtr m(BN_new()); ASSERT_TRUE(m); + bssl::UniquePtr ctx(BN_CTX_new()); + BN_CTX_start(ctx.get()); ASSERT_TRUE(BN_set_bit(m.get(), 0)); ASSERT_TRUE(BN_set_bit(m.get(), words * BN_BITS2 - 1)); bssl::UniquePtr mont( From d993aeb065e62eb6348699a2585569561083a494 Mon Sep 17 00:00:00 2001 From: Erin Chapman <13040499+m271828@users.noreply.github.com> Date: Wed, 17 Sep 2025 14:35:41 -0700 Subject: [PATCH 28/34] Header guard --- include/openssl/cpu.h | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/include/openssl/cpu.h b/include/openssl/cpu.h index bb2d9a31cf..de6f991c15 100644 --- a/include/openssl/cpu.h +++ b/include/openssl/cpu.h @@ -14,6 +14,8 @@ // This header is provided for compatibility with older revisions of BoringSSL. // TODO(davidben): Remove this header. +#ifndef OPENSSL_HEADER_CPU_H +#define OPENSSL_HEADER_CPU_H #include "crypto.h" @@ -21,3 +23,5 @@ __attribute__((unused)) void log_dispatch(size_t id); #endif + +#endif // OPENSSL_HEADER_CPU_H From b18b2509fa4003c57766fe02a0c7b420426e8b98 Mon Sep 17 00:00:00 2001 From: Erin Chapman <13040499+m271828@users.noreply.github.com> Date: Wed, 17 Sep 2025 14:43:36 -0700 Subject: [PATCH 29/34] Add capture --- crypto/impl_dispatch_test.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/crypto/impl_dispatch_test.cc b/crypto/impl_dispatch_test.cc index 5b440b68ce..6d0f8524f4 100644 --- a/crypto/impl_dispatch_test.cc +++ b/crypto/impl_dispatch_test.cc @@ -201,7 +201,7 @@ TEST_F(ImplDispatchTest, BN_mul_mont_gather5) { ASSERT_TRUE(BN_set_bit(m.get(), 0)); ASSERT_TRUE(BN_set_bit(m.get(), words * BN_BITS2 - 1)); bssl::UniquePtr mont( - BN_MONT_CTX_new_for_modulus(m.get(), ctx())); + BN_MONT_CTX_new_for_modulus(m.get(), ctx.get())); ASSERT_TRUE(mont); std::vector r(words), a(words), b(words), table(words * 32); @@ -229,7 +229,7 @@ TEST_F(ImplDispatchTest, BN_mul_mont_gather5) { {kFlag_bn_mul4x_mont_gather5, is_bn_mul4x_mont_gather5}, {kFlag_bn_mul_mont_gather5_nohw, is_bn_mul_mont_gather5_nohw}, }, - [] { + [r, table, m, mont, words] { bn_mul_mont_gather5(r.data(), r.data(), table.data(), m->d, mont->n0, words, 13); bn_mul_mont_gather5(r.data(), a.data(), table.data(), m->d, mont->n0, words, 13); }); From c4313266b3879c7e8e4246b7aa8be4bd0304045b Mon Sep 17 00:00:00 2001 From: Erin Chapman <13040499+m271828@users.noreply.github.com> Date: Thu, 18 Sep 2025 13:57:49 -0700 Subject: [PATCH 30/34] Remove guard --- include/openssl/cpu.h | 2 -- 1 file changed, 2 deletions(-) diff --git a/include/openssl/cpu.h b/include/openssl/cpu.h index de6f991c15..742a7a1d3a 100644 --- a/include/openssl/cpu.h +++ b/include/openssl/cpu.h @@ -19,9 +19,7 @@ #include "crypto.h" -#if defined(BORINGSSL_DISPATCH_TEST) __attribute__((unused)) void log_dispatch(size_t id); -#endif #endif // OPENSSL_HEADER_CPU_H From 5e4cf239e367027de63bc6cfe07b5970926a7c5f Mon Sep 17 00:00:00 2001 From: Erin Chapman <13040499+m271828@users.noreply.github.com> Date: Thu, 18 Sep 2025 14:39:57 -0700 Subject: [PATCH 31/34] Change scope of function defininition --- crypto/fipsmodule/cpucap/cpucap.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crypto/fipsmodule/cpucap/cpucap.c b/crypto/fipsmodule/cpucap/cpucap.c index 1cfcd8263b..014096a56e 100644 --- a/crypto/fipsmodule/cpucap/cpucap.c +++ b/crypto/fipsmodule/cpucap/cpucap.c @@ -92,13 +92,13 @@ HIDDEN uint32_t OPENSSL_armcap_P = 0; #if defined(BORINGSSL_DISPATCH_TEST) // This value must be explicitly initialized to zero. See similar comment above. HIDDEN uint8_t BORINGSSL_function_hit[15] = {0}; +#endif // BORINGSSL_DISPATCH_TEST void log_dispatch(size_t id) { #if BORINGSSL_DISPATCH_TEST BORINGSSL_function_hit[id] = 1; #endif } -#endif // BORINGSSL_DISPATCH_TEST // This variable is used only for testing purposes to ensure that the library // constructor is executed and the capability variable is initialized. From 13e40c13844c71a42b69f4627c4ae3d1bda5b948 Mon Sep 17 00:00:00 2001 From: Erin Chapman <13040499+m271828@users.noreply.github.com> Date: Thu, 18 Sep 2025 15:06:05 -0700 Subject: [PATCH 32/34] Add header file for bn functions --- crypto/impl_dispatch_test.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crypto/impl_dispatch_test.cc b/crypto/impl_dispatch_test.cc index 6d0f8524f4..6509e988dc 100644 --- a/crypto/impl_dispatch_test.cc +++ b/crypto/impl_dispatch_test.cc @@ -31,8 +31,8 @@ #include "fipsmodule/cpucap/internal.h" #include "fipsmodule/modes/internal.h" #include "fipsmodule/bn/rsaz_exp.h" +#include "fipsmodule/bn/internal.h" #include "fipsmodule/sha/internal.h" - #include "test/file_test.h" class ImplDispatchTest : public ::testing::Test { From c7e253fca9da1a7750f7149dffab2d734cff0ab1 Mon Sep 17 00:00:00 2001 From: Erin Chapman <13040499+m271828@users.noreply.github.com> Date: Thu, 18 Sep 2025 15:59:52 -0700 Subject: [PATCH 33/34] Comment out dynamic distpatch tests using static decls --- crypto/impl_dispatch_test.cc | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/crypto/impl_dispatch_test.cc b/crypto/impl_dispatch_test.cc index 6509e988dc..eef69a7a31 100644 --- a/crypto/impl_dispatch_test.cc +++ b/crypto/impl_dispatch_test.cc @@ -190,6 +190,7 @@ constexpr size_t kFlag_sha3_keccak4_f1600_alt2 = 14; #endif #if defined(OPENSSL_BN_ASM_MONT5) +/** TEST_F(ImplDispatchTest, BN_mul_mont_gather5) { for (size_t words : {4, 5, 6, 7, 8, 16, 32}) { SCOPED_TRACE(words); @@ -235,7 +236,9 @@ TEST_F(ImplDispatchTest, BN_mul_mont_gather5) { }); } } +*/ +/** TEST_F(ImplDispatchTest, BN_power5) { for (size_t words : {4, 5, 6, 7, 8, 16, 32}) { SCOPED_TRACE(words); @@ -268,6 +271,7 @@ TEST_F(ImplDispatchTest, BN_power5) { }); } } +*/ #endif // defined(OPENSSL_BN_ASM_MONT5) TEST_F(ImplDispatchTest, AEAD_AES_GCM) { From e176f58c43127d6d0d49087a48cac14ba62f8d07 Mon Sep 17 00:00:00 2001 From: Erin Chapman <13040499+m271828@users.noreply.github.com> Date: Fri, 19 Sep 2025 12:10:28 -0700 Subject: [PATCH 34/34] Add unused attribute --- crypto/impl_dispatch_test.cc | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/crypto/impl_dispatch_test.cc b/crypto/impl_dispatch_test.cc index eef69a7a31..87c3a193f8 100644 --- a/crypto/impl_dispatch_test.cc +++ b/crypto/impl_dispatch_test.cc @@ -172,10 +172,15 @@ constexpr size_t kFlag_sha256_hw = 6; constexpr size_t kFlag_aesni_gcm_encrypt = 2; constexpr size_t kFlag_aes_gcm_encrypt_avx512 = 7; constexpr size_t kFlag_RSAZ_mod_exp_avx512_x2 = 8; +__attribute__((unused)) constexpr size_t kFlag_bn_mulx4x_mont_gather5 = 15; +__attribute__((unused)) constexpr size_t kFlag_bn_mul4x_mont_gather5 = 16; +__attribute__((unused)) constexpr size_t kFlag_bn_mul_mont_gather5_nohw = 17; +__attribute__((unused)) constexpr size_t kFlag_bn_powerx5 = 18; +__attribute__((unused)) constexpr size_t kFlag_bn_power5_nohw = 19; #else // AARCH64 constexpr size_t kFlag_aes_gcm_enc_kernel = 2;