diff --git a/crypto/fipsmodule/bn/asm/x86_64-mont5.pl b/crypto/fipsmodule/bn/asm/x86_64-mont5.pl index 2d5f653032..7a6954d0ca 100755 --- a/crypto/fipsmodule/bn/asm/x86_64-mont5.pl +++ b/crypto/fipsmodule/bn/asm/x86_64-mont5.pl @@ -54,7 +54,7 @@ $addx = 1; for (@ARGV) { $addx = 0 if (/-DMY_ASSEMBLER_IS_TOO_OLD_FOR_512AVX/); } -# int bn_mul_mont_gather5( +# int bn_mul_mont_gather5_nohw( $rp="%rdi"; # BN_ULONG *rp, $ap="%rsi"; # const BN_ULONG *ap, $bp="%rdx"; # const BN_ULONG *bp, @@ -76,31 +76,15 @@ $code=<<___; .text -.extern OPENSSL_ia32cap_P - -.globl bn_mul_mont_gather5 -.type bn_mul_mont_gather5,\@function,6 +.globl bn_mul_mont_gather5_nohw +.type bn_mul_mont_gather5_nohw,\@function,6 .align 64 -bn_mul_mont_gather5: +bn_mul_mont_gather5_nohw: .cfi_startproc _CET_ENDBR mov ${num}d,${num}d mov %rsp,%rax .cfi_def_cfa_register %rax - test \$7,${num}d - jnz .Lmul_enter -___ -$code.=<<___ if ($addx); -#ifndef MY_ASSEMBLER_IS_TOO_OLD_FOR_512AVX - leaq OPENSSL_ia32cap_P(%rip),%r11 - mov 8(%r11),%r11d -#endif -___ -$code.=<<___; - jmp .Lmul4x_enter - -.align 16 -.Lmul_enter: movd `($win64?56:8)`(%rsp),%xmm5 # load 7th argument push %rbx .cfi_push %rbx @@ -460,29 +444,21 @@ .Lmul_epilogue: ret .cfi_endproc -.size bn_mul_mont_gather5,.-bn_mul_mont_gather5 +.size bn_mul_mont_gather5_nohw,.-bn_mul_mont_gather5_nohw ___ {{{ my @A=("%r10","%r11"); my @N=("%r13","%rdi"); $code.=<<___; +.global bn_mul4x_mont_gather5 .type bn_mul4x_mont_gather5,\@function,6 .align 32 bn_mul4x_mont_gather5: .cfi_startproc + _CET_ENDBR .byte 0x67 mov %rsp,%rax .cfi_def_cfa_register %rax -.Lmul4x_enter: -___ -$code.=<<___ if ($addx); -#ifndef MY_ASSEMBLER_IS_TOO_OLD_FOR_512AVX - and \$0x80108,%r11d - cmp \$0x80108,%r11d # check for AD*X+BMI2+BMI1 - je .Lmulx4x_enter -#endif -___ -$code.=<<___; push %rbx .cfi_push %rbx push %rbp @@ -1087,7 +1063,7 @@ }}} {{{ ###################################################################### -# void bn_power5( +# void bn_power5_nohw( my $rptr="%rdi"; # BN_ULONG *rptr, my $aptr="%rsi"; # const BN_ULONG *aptr, my $bptr="%rdx"; # const BN_ULONG *table, @@ -1102,25 +1078,14 @@ my ($a0,$a1,$ai)=("%r14","%r15","%rbx"); $code.=<<___; -.globl bn_power5 -.type bn_power5,\@function,6 +.globl bn_power5_nohw +.type bn_power5_nohw,\@function,6 .align 32 -bn_power5: +bn_power5_nohw: .cfi_startproc _CET_ENDBR mov %rsp,%rax .cfi_def_cfa_register %rax -___ -$code.=<<___ if ($addx); -#ifndef MY_ASSEMBLER_IS_TOO_OLD_FOR_512AVX - leaq OPENSSL_ia32cap_P(%rip),%r11 - mov 8(%r11),%r11d - and \$0x80108,%r11d - cmp \$0x80108,%r11d # check for AD*X+BMI2+BMI1 - je .Lpowerx5_enter -#endif -___ -$code.=<<___; push %rbx .cfi_push %rbx push %rbp @@ -1243,7 +1208,7 @@ .Lpower5_epilogue: ret .cfi_endproc -.size bn_power5,.-bn_power5 +.size bn_power5_nohw,.-bn_power5_nohw .globl bn_sqr8x_internal .hidden bn_sqr8x_internal @@ -2119,13 +2084,14 @@ $code.=<<___; #ifndef MY_ASSEMBLER_IS_TOO_OLD_FOR_512AVX +.global bn_mulx4x_mont_gather5 .type bn_mulx4x_mont_gather5,\@function,6 .align 32 bn_mulx4x_mont_gather5: .cfi_startproc + _CET_ENDBR mov %rsp,%rax .cfi_def_cfa_register %rax -.Lmulx4x_enter: push %rbx .cfi_push %rbx push %rbp @@ -2594,7 +2560,7 @@ ___ } { ###################################################################### -# void bn_power5( +# void bn_powerx5( my $rptr="%rdi"; # BN_ULONG *rptr, my $aptr="%rsi"; # const BN_ULONG *aptr, my $bptr="%rdx"; # const BN_ULONG *table, @@ -2609,13 +2575,14 @@ my ($a0,$a1,$ai)=("%r14","%r15","%rbx"); $code.=<<___; +.global bn_powerx5 .type bn_powerx5,\@function,6 .align 32 bn_powerx5: .cfi_startproc + _CET_ENDBR mov %rsp,%rax .cfi_def_cfa_register %rax -.Lpowerx5_enter: push %rbx .cfi_push %rbx push %rbp @@ -3717,17 +3684,17 @@ .section .pdata .align 4 - .rva .LSEH_begin_bn_mul_mont_gather5 - .rva .LSEH_end_bn_mul_mont_gather5 - .rva .LSEH_info_bn_mul_mont_gather5 + .rva .LSEH_begin_bn_mul_mont_gather5_nohw + .rva .LSEH_end_bn_mul_mont_gather5_nohw + .rva .LSEH_info_bn_mul_mont_gather5_nohw .rva .LSEH_begin_bn_mul4x_mont_gather5 .rva .LSEH_end_bn_mul4x_mont_gather5 .rva .LSEH_info_bn_mul4x_mont_gather5 - .rva .LSEH_begin_bn_power5 - .rva .LSEH_end_bn_power5 - .rva .LSEH_info_bn_power5 + .rva .LSEH_begin_bn_power5_nohw + .rva .LSEH_end_bn_power5_nohw + .rva .LSEH_info_bn_power5_nohw ___ $code.=<<___ if ($addx); #ifndef MY_ASSEMBLER_IS_TOO_OLD_FOR_512AVX @@ -3747,7 +3714,7 @@ .section .xdata .align 4 -.LSEH_info_bn_mul_mont_gather5: +.LSEH_info_bn_mul_mont_gather5_nohw: .byte 9,0,0,0 .rva mul_handler .rva .Lmul_body,.Lmul_body,.Lmul_epilogue # HandlerData[] @@ -3757,7 +3724,7 @@ .rva mul_handler .rva .Lmul4x_prologue,.Lmul4x_body,.Lmul4x_epilogue # HandlerData[] .align 4 -.LSEH_info_bn_power5: +.LSEH_info_bn_power5_nohw: .byte 9,0,0,0 .rva mul_handler .rva .Lpower5_prologue,.Lpower5_body,.Lpower5_epilogue # HandlerData[] diff --git a/crypto/fipsmodule/bn/bn_test.cc b/crypto/fipsmodule/bn/bn_test.cc index ea6ba285f8..5f8f400f0a 100644 --- a/crypto/fipsmodule/bn/bn_test.cc +++ b/crypto/fipsmodule/bn/bn_test.cc @@ -2959,14 +2959,12 @@ TEST_F(BNTest, BNMulMontABI) { b[0] = 42; #if defined(OPENSSL_X86_64) -#if !defined(MY_ASSEMBLER_IS_TOO_OLD_FOR_512AVX) if (bn_mulx4x_mont_capable(words)) { CHECK_ABI(bn_mulx4x_mont, r.data(), a.data(), b.data(), mont->N.d, mont->n0, words); CHECK_ABI(bn_mulx4x_mont, r.data(), a.data(), a.data(), mont->N.d, mont->n0, words); } -#endif // !defined(MY_ASSEMBLER_IS_TOO_OLD_FOR_512AVX) if (bn_mul4x_mont_capable(words)) { CHECK_ABI(bn_mul4x_mont, r.data(), a.data(), b.data(), mont->N.d, mont->n0, words); @@ -2977,12 +2975,10 @@ TEST_F(BNTest, BNMulMontABI) { mont->n0, words); CHECK_ABI(bn_mul_mont_nohw, r.data(), a.data(), a.data(), mont->N.d, mont->n0, words); -#if !defined(MY_ASSEMBLER_IS_TOO_OLD_FOR_512AVX) if (bn_sqr8x_mont_capable(words)) { CHECK_ABI(bn_sqr8x_mont, r.data(), a.data(), bn_mulx_adx_capable(), mont->N.d, mont->n0, words); } -#endif // !defined(MY_ASSEMBLER_IS_TOO_OLD_FOR_512AVX) #elif defined(OPENSSL_ARM) if (bn_mul8x_mont_neon_capable(words)) { CHECK_ABI(bn_mul8x_mont_neon, r.data(), a.data(), b.data(), mont->N.d, @@ -3028,18 +3024,35 @@ TEST_F(BNTest, BNMulMont5ABI) { bn_scatter5(r.data(), words, table.data(), i); } CHECK_ABI(bn_gather5, r.data(), words, table.data(), 13); - - CHECK_ABI(bn_mul_mont_gather5, r.data(), r.data(), table.data(), m->d, + if (bn_mulx4x_mont_gather5_capable(words)) { + CHECK_ABI(bn_mulx4x_mont_gather5, r.data(), r.data(), table.data(), m->d, + mont->n0, words, 13); + CHECK_ABI(bn_mulx4x_mont_gather5, r.data(), a.data(), table.data(), m->d, + mont->n0, words, 13); + } + if (bn_mul4x_mont_gather5_capable(words)) { + CHECK_ABI(bn_mul4x_mont_gather5, r.data(), r.data(), table.data(), m->d, + mont->n0, words, 13); + CHECK_ABI(bn_mul4x_mont_gather5, r.data(), a.data(), table.data(), m->d, + mont->n0, words, 13); + } + CHECK_ABI(bn_mul_mont_gather5_nohw, r.data(), r.data(), table.data(), m->d, mont->n0, words, 13); - CHECK_ABI(bn_mul_mont_gather5, r.data(), a.data(), table.data(), m->d, + CHECK_ABI(bn_mul_mont_gather5_nohw, r.data(), a.data(), table.data(), m->d, mont->n0, words, 13); - if (words % 8 == 0) { - CHECK_ABI(bn_power5, r.data(), r.data(), table.data(), m->d, mont->n0, + if (bn_powerx5_capable(words)) { + CHECK_ABI(bn_powerx5, r.data(), r.data(), table.data(), m->d, mont->n0, words, 13); - CHECK_ABI(bn_power5, r.data(), a.data(), table.data(), m->d, mont->n0, + CHECK_ABI(bn_powerx5, r.data(), a.data(), table.data(), m->d, mont->n0, words, 13); } + if (bn_power5_capable(words)) { + CHECK_ABI(bn_power5_nohw, r.data(), r.data(), table.data(), m->d, + mont->n0, words, 13); + CHECK_ABI(bn_power5_nohw, r.data(), a.data(), table.data(), m->d, + mont->n0, words, 13); + } } } #endif // OPENSSL_BN_ASM_MONT5 && SUPPORTS_ABI_TEST diff --git a/crypto/fipsmodule/bn/exponentiation.c b/crypto/fipsmodule/bn/exponentiation.c index 8713715b03..4f08008481 100644 --- a/crypto/fipsmodule/bn/exponentiation.c +++ b/crypto/fipsmodule/bn/exponentiation.c @@ -107,6 +107,7 @@ * Hudson (tjh@cryptsoft.com). */ #include +#include #include #include @@ -163,6 +164,56 @@ static void exponentiation_s2n_bignum_copy_from_prebuf(BN_ULONG *dest, int width #endif } +#if defined(OPENSSL_BN_ASM_MONT5) + +// bn_mul_mont_gather5 multiples loads index |power| of |table|, multiplies it +// by |ap| modulo |np|, and stores the result in |rp|. The values are |num| +// words long and represented in Montgomery form. |n0| is a pointer to the +// corresponding field in |BN_MONT_CTX|. |table| must be aligned to at least +// 16 bytes. |power| must be less than 32 and is treated as secret. +// +// WARNING: This function implements Almost Montgomery Multiplication from +// https://eprint.iacr.org/2011/239. The inputs do not need to be fully reduced. +// However, even if they are fully reduced, the output may not be. +static void bn_mul_mont_gather5( + BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *table, const BN_ULONG *np, + const BN_ULONG *n0, int num, int power) { + if (bn_mulx4x_mont_gather5_capable(num)) { + log_dispatch(15); + bn_mulx4x_mont_gather5(rp, ap, table, np, n0, num, power); + } else if (bn_mul4x_mont_gather5_capable(num)) { + log_dispatch(16); + bn_mul4x_mont_gather5(rp, ap, table, np, n0, num, power); + } else { + log_dispatch(17); + bn_mul_mont_gather5_nohw(rp, ap, table, np, n0, num, power); + } +} + +// bn_power5 squares |ap| five times and multiplies it by the value stored at +// index |power| of |table|, modulo |np|. It stores the result in |rp|. The +// values are |num| words long and represented in Montgomery form. |n0| is a +// pointer to the corresponding field in |BN_MONT_CTX|. |num| must be divisible +// by 8. |power| must be less than 32 and is treated as secret. +// +// WARNING: This function implements Almost Montgomery Multiplication from +// https://eprint.iacr.org/2011/239. The inputs do not need to be fully reduced. +// However, even if they are fully reduced, the output may not be. +static void bn_power5(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *table, + const BN_ULONG *np, const BN_ULONG *n0, int num, + int power) +{ + assert(bn_power5_capable(num)); + if (bn_powerx5_capable(num)) { + log_dispatch(18); + bn_powerx5(rp, ap, table, np, n0, num, power); + } else { + log_dispatch(19); + bn_power5_nohw(rp, ap, table, np, n0, num, power); + } +} + +#endif // defined(OPENSSL_BN_ASM_MONT5) int BN_exp(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, BN_CTX *ctx) { int i, bits, ret = 0; @@ -1122,7 +1173,7 @@ int BN_mod_exp_mont_consttime(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p, // Scan the exponent one window at a time starting from the most // significant bits. - if (top & 7) { + if (!bn_power5_capable(top)) { while (bits >= 0) { for (wvalue = 0, i = 0; i < 5; i++, bits--) { wvalue = (wvalue << 1) + BN_is_bit_set(p, bits); diff --git a/crypto/fipsmodule/bn/generic.c b/crypto/fipsmodule/bn/generic.c index 247398fddb..5a5f61d0db 100644 --- a/crypto/fipsmodule/bn/generic.c +++ b/crypto/fipsmodule/bn/generic.c @@ -622,3 +622,38 @@ BN_ULONG bn_sub_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b, } #endif // !BN_ADD_ASM + +// Default implementations of hardware accelerated functions that throw errors if reached. +// Used to reduce the number of guards needed in code. +#if defined(MY_ASSEMBLER_IS_TOO_OLD_FOR_512AVX) + +#if defined(OPENSSL_X86_64) && defined(OPENSSL_BN_ASM_MONT) +int bn_mulx4x_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, + const BN_ULONG *np, const BN_ULONG *n0, size_t num) { + perror("bn_mulx4x_mont"); + abort(); +} +#endif + +#if defined(OPENSSL_BN_ASM_MONT5) +void bn_mulx4x_mont_gather5(BN_ULONG *rp, const BN_ULONG *ap, + const BN_ULONG *table, const BN_ULONG *np, + const BN_ULONG *n0, int num, int power) { + perror("bn_mulx4x_mont_gather5"); + abort(); +} + +void bn_powerx5(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *table, + const BN_ULONG *np, const BN_ULONG *n0, int num, int power) { + perror("bn_powerx5"); + abort(); +} + +int bn_sqr8x_mont(BN_ULONG *rp, const BN_ULONG *ap, BN_ULONG mulx_adx_capable, + const BN_ULONG *np, const BN_ULONG *n0, size_t num) { + perror("bn_sqr8x_mont"); + abort(); +} +#endif + +#endif // defined(MY_ASSEMBLER_IS_TOO_OLD_FOR_512AVX) diff --git a/crypto/fipsmodule/bn/internal.h b/crypto/fipsmodule/bn/internal.h index a52bb5782f..b837aca823 100644 --- a/crypto/fipsmodule/bn/internal.h +++ b/crypto/fipsmodule/bn/internal.h @@ -419,18 +419,23 @@ OPENSSL_INLINE int bn_mul4x_mont_capable(size_t num) { } int bn_mul4x_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, const BN_ULONG *np, const BN_ULONG *n0, size_t num); -#if !defined(MY_ASSEMBLER_IS_TOO_OLD_FOR_512AVX) + OPENSSL_INLINE int bn_mulx4x_mont_capable(size_t num) { +#if defined(MY_ASSEMBLER_IS_TOO_OLD_FOR_512AVX) + return 0; +#endif // defined(MY_ASSEMBLER_IS_TOO_OLD_FOR_512AVX) return bn_mul4x_mont_capable(num) && bn_mulx_adx_capable(); } int bn_mulx4x_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, const BN_ULONG *np, const BN_ULONG *n0, size_t num); OPENSSL_INLINE int bn_sqr8x_mont_capable(size_t num) { +#if defined(MY_ASSEMBLER_IS_TOO_OLD_FOR_512AVX) + return 0; +#endif // defined(MY_ASSEMBLER_IS_TOO_OOLD_FOR_512AVX) return (num >= 8) && ((num & 7) == 0); } int bn_sqr8x_mont(BN_ULONG *rp, const BN_ULONG *ap, BN_ULONG mulx_adx_capable, const BN_ULONG *np, const BN_ULONG *n0, size_t num); -#endif // !defined(MY_ASSEMBLER_IS_TOO_OLD_FOR_512AVX) #elif defined(OPENSSL_ARM) OPENSSL_INLINE int bn_mul8x_mont_neon_capable(size_t num) { return (num & 7) == 0 && CRYPTO_is_NEON_capable(); @@ -446,18 +451,27 @@ int bn_sqr8x_mont(BN_ULONG *rp, const BN_ULONG *ap, BN_ULONG mulx_adx_capable, #if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_X86_64) #define OPENSSL_BN_ASM_MONT5 -// bn_mul_mont_gather5 multiples loads index |power| of |table|, multiplies it -// by |ap| modulo |np|, and stores the result in |rp|. The values are |num| -// words long and represented in Montgomery form. |n0| is a pointer to the -// corresponding field in |BN_MONT_CTX|. |table| must be aligned to at least -// 16 bytes. |power| must be less than 32 and is treated as secret. -// -// WARNING: This function implements Almost Montgomery Multiplication from -// https://eprint.iacr.org/2011/239. The inputs do not need to be fully reduced. -// However, even if they are fully reduced, the output may not be. -void bn_mul_mont_gather5(BN_ULONG *rp, const BN_ULONG *ap, - const BN_ULONG *table, const BN_ULONG *np, - const BN_ULONG *n0, int num, int power); + OPENSSL_INLINE int bn_mul4x_mont_gather5_capable(int num) { + return (num & 7) == 0; + } + void bn_mul4x_mont_gather5(BN_ULONG *rp, const BN_ULONG *ap, + const BN_ULONG *table, const BN_ULONG *np, + const BN_ULONG *n0, int num, int power); + + OPENSSL_INLINE int bn_mulx4x_mont_gather5_capable(int num) { + #if defined(MY_ASSEMBLER_IS_TOO_OLD_FOR_512AVX) + return 0; + #endif // defined(MY_ASSEMBLER_IS_TOO_OLD_FOR_512AVX) + return bn_mul4x_mont_gather5_capable(num) && CRYPTO_is_ADX_capable() && + CRYPTO_is_BMI1_capable() && CRYPTO_is_BMI2_capable(); + } + void bn_mulx4x_mont_gather5(BN_ULONG *rp, const BN_ULONG *ap, + const BN_ULONG *table, const BN_ULONG *np, + const BN_ULONG *n0, int num, int power); + + void bn_mul_mont_gather5_nohw(BN_ULONG *rp, const BN_ULONG *ap, + const BN_ULONG *table, const BN_ULONG *np, + const BN_ULONG *n0, int num, int power); // bn_scatter5 stores |inp| to index |power| of |table|. |inp| and each entry of // |table| are |num| words long. |power| must be less than 32 and is treated as @@ -471,17 +485,23 @@ void bn_scatter5(const BN_ULONG *inp, size_t num, BN_ULONG *table, // is treated as secret. |table| must be aligned to at least 16 bytes. void bn_gather5(BN_ULONG *out, size_t num, const BN_ULONG *table, size_t power); -// bn_power5 squares |ap| five times and multiplies it by the value stored at -// index |power| of |table|, modulo |np|. It stores the result in |rp|. The -// values are |num| words long and represented in Montgomery form. |n0| is a -// pointer to the corresponding field in |BN_MONT_CTX|. |num| must be divisible -// by 8. |power| must be less than 32 and is treated as secret. -// -// WARNING: This function implements Almost Montgomery Multiplication from -// https://eprint.iacr.org/2011/239. The inputs do not need to be fully reduced. -// However, even if they are fully reduced, the output may not be. -void bn_power5(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *table, - const BN_ULONG *np, const BN_ULONG *n0, int num, int power); + void bn_power5_nohw(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *table, + const BN_ULONG *np, const BN_ULONG *n0, int num, int power); + + OPENSSL_INLINE int bn_power5_capable(int num) { + return (num & 7) == 0; + } + + OPENSSL_INLINE int bn_powerx5_capable(int num) { + #if defined(MY_ASSEMBLER_IS_TOO_OLD_FOR_512AVX) + return 0; + #endif // defined(MY_ASSEMBLER_IS_TOO_OLD_FOR_512AVX) + return bn_power5_capable(num) && CRYPTO_is_ADX_capable() && + CRYPTO_is_BMI1_capable() && CRYPTO_is_BMI2_capable(); + } + void bn_powerx5(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *table, + const BN_ULONG *np, const BN_ULONG *n0, int num, int power); + #endif // !OPENSSL_NO_ASM && OPENSSL_X86_64 uint64_t bn_mont_n0(const BIGNUM *n); diff --git a/crypto/fipsmodule/bn/montgomery.c b/crypto/fipsmodule/bn/montgomery.c index c7ac15c18d..0a4432b7ae 100644 --- a/crypto/fipsmodule/bn/montgomery.c +++ b/crypto/fipsmodule/bn/montgomery.c @@ -627,18 +627,15 @@ void bn_mod_mul_montgomery_small(BN_ULONG *r, const BN_ULONG *a, int bn_mul_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, const BN_ULONG *np, const BN_ULONG *n0, size_t num) { -#if !defined(MY_ASSEMBLER_IS_TOO_OLD_FOR_512AVX) if (ap == bp && bn_sqr8x_mont_capable(num)) { return bn_sqr8x_mont(rp, ap, bn_mulx_adx_capable(), np, n0, num); - } - if (bn_mulx4x_mont_capable(num)) { + } else if (bn_mulx4x_mont_capable(num)) { return bn_mulx4x_mont(rp, ap, bp, np, n0, num); - } -#endif // !defined(MY_ASSEMBLER_IS_TOO_OLD_FOR_512AVX) - if (bn_mul4x_mont_capable(num)) { + } else if (bn_mul4x_mont_capable(num)) { return bn_mul4x_mont(rp, ap, bp, np, n0, num); + } else { + return bn_mul_mont_nohw(rp, ap, bp, np, n0, num); } - return bn_mul_mont_nohw(rp, ap, bp, np, n0, num); } #endif diff --git a/crypto/fipsmodule/cpucap/cpucap.c b/crypto/fipsmodule/cpucap/cpucap.c index 07874c7788..014096a56e 100644 --- a/crypto/fipsmodule/cpucap/cpucap.c +++ b/crypto/fipsmodule/cpucap/cpucap.c @@ -94,6 +94,12 @@ HIDDEN uint32_t OPENSSL_armcap_P = 0; HIDDEN uint8_t BORINGSSL_function_hit[15] = {0}; #endif // BORINGSSL_DISPATCH_TEST +void log_dispatch(size_t id) { +#if BORINGSSL_DISPATCH_TEST + BORINGSSL_function_hit[id] = 1; +#endif +} + // This variable is used only for testing purposes to ensure that the library // constructor is executed and the capability variable is initialized. HIDDEN uint8_t OPENSSL_cpucap_initialized = 0; diff --git a/crypto/fipsmodule/sha/keccak1600.c b/crypto/fipsmodule/sha/keccak1600.c index f97d03a6a9..bbe8682e76 100644 --- a/crypto/fipsmodule/sha/keccak1600.c +++ b/crypto/fipsmodule/sha/keccak1600.c @@ -8,6 +8,7 @@ */ #include +#include #include "internal.h" #include "../../internal.h" #include "../cpucap/internal.h" @@ -321,14 +322,6 @@ void Keccak1600_Squeeze(uint64_t A[KECCAK1600_ROWS][KECCAK1600_ROWS], uint8_t *o // Scalar implementation from OpenSSL provided by keccak1600-armv8.pl extern void KeccakF1600_hw(uint64_t state[25]); -#if defined(OPENSSL_AARCH64) -static void keccak_log_dispatch(size_t id) { -#if BORINGSSL_DISPATCH_TEST - BORINGSSL_function_hit[id] = 1; -#endif -} -#endif - void KeccakF1600(uint64_t A[KECCAK1600_ROWS][KECCAK1600_ROWS]) { // Dispatch logic for Keccak-x1 on AArch64: // @@ -354,21 +347,21 @@ void KeccakF1600(uint64_t A[KECCAK1600_ROWS][KECCAK1600_ROWS]) { #if defined(OPENSSL_AARCH64) #if defined(KECCAK1600_S2N_BIGNUM_ASM) if (CRYPTO_is_Neoverse_N1() || CRYPTO_is_Neoverse_V1() || CRYPTO_is_Neoverse_V2()) { - keccak_log_dispatch(10); // kFlag_sha3_keccak_f1600 + log_dispatch(10); // kFlag_sha3_keccak_f1600 sha3_keccak_f1600((uint64_t *)A, iotas); return; } #if defined(MY_ASSEMBLER_SUPPORTS_NEON_SHA3_EXTENSION) if (CRYPTO_is_ARMv8_SHA3_capable()) { - keccak_log_dispatch(11); // kFlag_sha3_keccak_f1600_alt + log_dispatch(11); // kFlag_sha3_keccak_f1600_alt sha3_keccak_f1600_alt((uint64_t *)A, iotas); return; } #endif #endif - keccak_log_dispatch(9); // kFlag_KeccakF1600_hw + log_dispatch(9); // kFlag_KeccakF1600_hw KeccakF1600_hw((uint64_t *) A); #elif defined(OPENSSL_X86_64) @@ -427,20 +420,20 @@ static void Keccak1600_x4(uint64_t A[4][KECCAK1600_ROWS][KECCAK1600_ROWS]) { // (which has its own dispatch logic). #if defined(KECCAK1600_S2N_BIGNUM_ASM) && defined(OPENSSL_AARCH64) if (CRYPTO_is_Neoverse_N1()) { - keccak_log_dispatch(13); // kFlag_sha3_keccak4_f1600_alt + log_dispatch(13); // kFlag_sha3_keccak4_f1600_alt sha3_keccak4_f1600_alt((uint64_t *)A, iotas); return; } #if defined(MY_ASSEMBLER_SUPPORTS_NEON_SHA3_EXTENSION) if (CRYPTO_is_Neoverse_V1() || CRYPTO_is_Neoverse_V2()) { - keccak_log_dispatch(14); // kFlag_sha3_keccak4_f1600_alt2 + log_dispatch(14); // kFlag_sha3_keccak4_f1600_alt2 sha3_keccak4_f1600_alt2((uint64_t *)A, iotas); return; } if (CRYPTO_is_ARMv8_SHA3_capable()) { - keccak_log_dispatch(12); // kFlag_sha3_keccak2_f1600 + log_dispatch(12); // kFlag_sha3_keccak2_f1600 // Use 2-fold function twice: A[0:1] and A[2:3] sha3_keccak2_f1600((uint64_t *)&A[0], iotas); sha3_keccak2_f1600((uint64_t *)&A[2], iotas); diff --git a/crypto/impl_dispatch_test.cc b/crypto/impl_dispatch_test.cc index c971419b83..87c3a193f8 100644 --- a/crypto/impl_dispatch_test.cc +++ b/crypto/impl_dispatch_test.cc @@ -31,8 +31,8 @@ #include "fipsmodule/cpucap/internal.h" #include "fipsmodule/modes/internal.h" #include "fipsmodule/bn/rsaz_exp.h" +#include "fipsmodule/bn/internal.h" #include "fipsmodule/sha/internal.h" - #include "test/file_test.h" class ImplDispatchTest : public ::testing::Test { @@ -140,6 +140,11 @@ class ImplDispatchTest : public ::testing::Test { bool is_assembler_too_old = false; bool is_assembler_too_old_avx512 = false; bool ifma_avx512 = false; + bool is_bn_mulx4x_mont_gather5 = false; + bool is_bn_mul4x_mont_gather5 = false; + bool is_bn_mul_mont_gather5_nohw = false; + bool is_power5 = false; + bool is_powerx5 = false; #else // AARCH64 bool aes_gcm_pmull_ = false; bool aes_gcm_8x_ = false; @@ -167,6 +172,16 @@ constexpr size_t kFlag_sha256_hw = 6; constexpr size_t kFlag_aesni_gcm_encrypt = 2; constexpr size_t kFlag_aes_gcm_encrypt_avx512 = 7; constexpr size_t kFlag_RSAZ_mod_exp_avx512_x2 = 8; +__attribute__((unused)) +constexpr size_t kFlag_bn_mulx4x_mont_gather5 = 15; +__attribute__((unused)) +constexpr size_t kFlag_bn_mul4x_mont_gather5 = 16; +__attribute__((unused)) +constexpr size_t kFlag_bn_mul_mont_gather5_nohw = 17; +__attribute__((unused)) +constexpr size_t kFlag_bn_powerx5 = 18; +__attribute__((unused)) +constexpr size_t kFlag_bn_power5_nohw = 19; #else // AARCH64 constexpr size_t kFlag_aes_gcm_enc_kernel = 2; constexpr size_t kFlag_aesv8_gcm_8x_enc_128 = 7; @@ -179,6 +194,91 @@ constexpr size_t kFlag_sha3_keccak4_f1600_alt = 13; constexpr size_t kFlag_sha3_keccak4_f1600_alt2 = 14; #endif +#if defined(OPENSSL_BN_ASM_MONT5) +/** +TEST_F(ImplDispatchTest, BN_mul_mont_gather5) { + for (size_t words : {4, 5, 6, 7, 8, 16, 32}) { + SCOPED_TRACE(words); + + bssl::UniquePtr m(BN_new()); + ASSERT_TRUE(m); + bssl::UniquePtr ctx(BN_CTX_new()); + BN_CTX_start(ctx.get()); + ASSERT_TRUE(BN_set_bit(m.get(), 0)); + ASSERT_TRUE(BN_set_bit(m.get(), words * BN_BITS2 - 1)); + bssl::UniquePtr mont( + BN_MONT_CTX_new_for_modulus(m.get(), ctx.get())); + ASSERT_TRUE(mont); + + std::vector r(words), a(words), b(words), table(words * 32); + a[0] = 1; + b[0] = 42; + + bn_mul_mont(r.data(), a.data(), b.data(), mont->N.d, mont->n0, words); + + is_bn_mulx4x_mont_gather5 = bn_mulx4x_mont_gather5_capable(words); + if (is_bn_mulx4x_mont_gather5) { + is_bn_mul4x_mont_gather5 = false; + is_bn_mul_mont_gather5_nohw = false; + } else { + is_bn_mul4x_mont_gather5 = bn_mul4x_mont_gather5_capable(words); + if (is_bn_mul4x_mont_gather5) { + is_bn_mul_mont_gather5_nohw = false; + } else { + is_bn_mul_mont_gather5_nohw = true; + } + } + + AssertFunctionsHit( + { + {kFlag_bn_mulx4x_mont_gather5, is_bn_mulx4x_mont_gather5}, + {kFlag_bn_mul4x_mont_gather5, is_bn_mul4x_mont_gather5}, + {kFlag_bn_mul_mont_gather5_nohw, is_bn_mul_mont_gather5_nohw}, + }, + [r, table, m, mont, words] { + bn_mul_mont_gather5(r.data(), r.data(), table.data(), m->d, mont->n0, words, 13); + bn_mul_mont_gather5(r.data(), a.data(), table.data(), m->d, mont->n0, words, 13); + }); + } +} +*/ + +/** +TEST_F(ImplDispatchTest, BN_power5) { + for (size_t words : {4, 5, 6, 7, 8, 16, 32}) { + SCOPED_TRACE(words); + + bssl::UniquePtr m(BN_new()); + ASSERT_TRUE(m); + ASSERT_TRUE(BN_set_bit(m.get(), 0)); + ASSERT_TRUE(BN_set_bit(m.get(), words * BN_BITS2 - 1)); + bssl::UniquePtr mont( + BN_MONT_CTX_new_for_modulus(m.get(), ctx())); + ASSERT_TRUE(mont); + + std::vector r(words), a(words), b(words), table(words * 32); + a[0] = 1; + b[0] = 42; + + bn_mul_mont(r.data(), a.data(), b.data(), mont->N.d, mont->n0, words); + + is_power5 = bn_power5_capable(words); + is_powerx5 = bn_powerx5_capable(words); + + AssertFunctionsHit( + { + {kFlag_bn_powerx5, is_powerx5}, + {kFlag_bn_power5_nohw, !is_powerx5 && is_power5}, + }, + [] { + bn_power5(r.data(), r.data(), table.data(), m->d, mont->n0, words, 13); + bn_power5(r.data(), a.data(), table.data(), m->d, mont->n0, words, 13); + }); + } +} +*/ +#endif // defined(OPENSSL_BN_ASM_MONT5) + TEST_F(ImplDispatchTest, AEAD_AES_GCM) { AssertFunctionsHit( { diff --git a/generated-src/linux-x86_64/crypto/fipsmodule/x86_64-mont5.S b/generated-src/linux-x86_64/crypto/fipsmodule/x86_64-mont5.S index 4450c3ef72..e42d7cffbb 100644 --- a/generated-src/linux-x86_64/crypto/fipsmodule/x86_64-mont5.S +++ b/generated-src/linux-x86_64/crypto/fipsmodule/x86_64-mont5.S @@ -6,29 +6,16 @@ #if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_X86_64) && defined(__ELF__) .text -.extern OPENSSL_ia32cap_P -.hidden OPENSSL_ia32cap_P - -.globl bn_mul_mont_gather5 -.hidden bn_mul_mont_gather5 -.type bn_mul_mont_gather5,@function +.globl bn_mul_mont_gather5_nohw +.hidden bn_mul_mont_gather5_nohw +.type bn_mul_mont_gather5_nohw,@function .align 64 -bn_mul_mont_gather5: +bn_mul_mont_gather5_nohw: .cfi_startproc _CET_ENDBR movl %r9d,%r9d movq %rsp,%rax .cfi_def_cfa_register %rax - testl $7,%r9d - jnz .Lmul_enter -#ifndef MY_ASSEMBLER_IS_TOO_OLD_FOR_512AVX - leaq OPENSSL_ia32cap_P(%rip),%r11 - movl 8(%r11),%r11d -#endif - jmp .Lmul4x_enter - -.align 16 -.Lmul_enter: movd 8(%rsp),%xmm5 pushq %rbx .cfi_offset %rbx,-16 @@ -454,20 +441,17 @@ _CET_ENDBR .Lmul_epilogue: .byte 0xf3,0xc3 .cfi_endproc -.size bn_mul_mont_gather5,.-bn_mul_mont_gather5 +.size bn_mul_mont_gather5_nohw,.-bn_mul_mont_gather5_nohw +.global bn_mul4x_mont_gather5 +.hidden bn_mul4x_mont_gather5 .type bn_mul4x_mont_gather5,@function .align 32 bn_mul4x_mont_gather5: .cfi_startproc +_CET_ENDBR .byte 0x67 movq %rsp,%rax .cfi_def_cfa_register %rax -.Lmul4x_enter: -#ifndef MY_ASSEMBLER_IS_TOO_OLD_FOR_512AVX - andl $0x80108,%r11d - cmpl $0x80108,%r11d - je .Lmulx4x_enter -#endif pushq %rbx .cfi_offset %rbx,-16 pushq %rbp @@ -1092,22 +1076,15 @@ mul4x_internal: jmp .Lsqr4x_sub_entry .cfi_endproc .size mul4x_internal,.-mul4x_internal -.globl bn_power5 -.hidden bn_power5 -.type bn_power5,@function +.globl bn_power5_nohw +.hidden bn_power5_nohw +.type bn_power5_nohw,@function .align 32 -bn_power5: +bn_power5_nohw: .cfi_startproc _CET_ENDBR movq %rsp,%rax .cfi_def_cfa_register %rax -#ifndef MY_ASSEMBLER_IS_TOO_OLD_FOR_512AVX - leaq OPENSSL_ia32cap_P(%rip),%r11 - movl 8(%r11),%r11d - andl $0x80108,%r11d - cmpl $0x80108,%r11d - je .Lpowerx5_enter -#endif pushq %rbx .cfi_offset %rbx,-16 pushq %rbp @@ -1230,7 +1207,7 @@ _CET_ENDBR .Lpower5_epilogue: .byte 0xf3,0xc3 .cfi_endproc -.size bn_power5,.-bn_power5 +.size bn_power5_nohw,.-bn_power5_nohw .globl bn_sqr8x_internal .hidden bn_sqr8x_internal @@ -2074,13 +2051,15 @@ __bn_post4x_internal: .cfi_endproc .size __bn_post4x_internal,.-__bn_post4x_internal #ifndef MY_ASSEMBLER_IS_TOO_OLD_FOR_512AVX +.global bn_mulx4x_mont_gather5 +.hidden bn_mulx4x_mont_gather5 .type bn_mulx4x_mont_gather5,@function .align 32 bn_mulx4x_mont_gather5: .cfi_startproc +_CET_ENDBR movq %rsp,%rax .cfi_def_cfa_register %rax -.Lmulx4x_enter: pushq %rbx .cfi_offset %rbx,-16 pushq %rbp @@ -2611,13 +2590,15 @@ mulx4x_internal: jmp .Lsqrx4x_sub_entry .cfi_endproc .size mulx4x_internal,.-mulx4x_internal +.global bn_powerx5 +.hidden bn_powerx5 .type bn_powerx5,@function .align 32 bn_powerx5: .cfi_startproc +_CET_ENDBR movq %rsp,%rax .cfi_def_cfa_register %rax -.Lpowerx5_enter: pushq %rbx .cfi_offset %rbx,-16 pushq %rbp diff --git a/generated-src/mac-x86_64/crypto/fipsmodule/x86_64-mont5.S b/generated-src/mac-x86_64/crypto/fipsmodule/x86_64-mont5.S index cd7c7dadf1..31e468a4f4 100644 --- a/generated-src/mac-x86_64/crypto/fipsmodule/x86_64-mont5.S +++ b/generated-src/mac-x86_64/crypto/fipsmodule/x86_64-mont5.S @@ -6,28 +6,16 @@ #if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_X86_64) && defined(__APPLE__) .text - - -.globl _bn_mul_mont_gather5 -.private_extern _bn_mul_mont_gather5 +.globl _bn_mul_mont_gather5_nohw +.private_extern _bn_mul_mont_gather5_nohw .p2align 6 -_bn_mul_mont_gather5: +_bn_mul_mont_gather5_nohw: _CET_ENDBR movl %r9d,%r9d movq %rsp,%rax - testl $7,%r9d - jnz L$mul_enter -#ifndef MY_ASSEMBLER_IS_TOO_OLD_FOR_512AVX - leaq _OPENSSL_ia32cap_P(%rip),%r11 - movl 8(%r11),%r11d -#endif - jmp L$mul4x_enter - -.p2align 4 -L$mul_enter: movd 8(%rsp),%xmm5 pushq %rbx @@ -454,19 +442,16 @@ L$mul_epilogue: .byte 0xf3,0xc3 +.global _bn_mul4x_mont_gather5 +.private_extern _bn_mul4x_mont_gather5 .p2align 5 -bn_mul4x_mont_gather5: +_bn_mul4x_mont_gather5: +_CET_ENDBR .byte 0x67 movq %rsp,%rax -L$mul4x_enter: -#ifndef MY_ASSEMBLER_IS_TOO_OLD_FOR_512AVX - andl $0x80108,%r11d - cmpl $0x80108,%r11d - je L$mulx4x_enter -#endif pushq %rbx pushq %rbp @@ -1091,22 +1076,15 @@ L$inner4x: jmp L$sqr4x_sub_entry -.globl _bn_power5 -.private_extern _bn_power5 +.globl _bn_power5_nohw +.private_extern _bn_power5_nohw .p2align 5 -_bn_power5: +_bn_power5_nohw: _CET_ENDBR movq %rsp,%rax -#ifndef MY_ASSEMBLER_IS_TOO_OLD_FOR_512AVX - leaq _OPENSSL_ia32cap_P(%rip),%r11 - movl 8(%r11),%r11d - andl $0x80108,%r11d - cmpl $0x80108,%r11d - je L$powerx5_enter -#endif pushq %rbx pushq %rbp @@ -2073,13 +2051,15 @@ L$sqr4x_sub_entry: #ifndef MY_ASSEMBLER_IS_TOO_OLD_FOR_512AVX +.global _bn_mulx4x_mont_gather5 +.private_extern _bn_mulx4x_mont_gather5 .p2align 5 -bn_mulx4x_mont_gather5: +_bn_mulx4x_mont_gather5: +_CET_ENDBR movq %rsp,%rax -L$mulx4x_enter: pushq %rbx pushq %rbp @@ -2610,13 +2590,15 @@ L$mulx4x_inner: jmp L$sqrx4x_sub_entry +.global _bn_powerx5 +.private_extern _bn_powerx5 .p2align 5 -bn_powerx5: +_bn_powerx5: +_CET_ENDBR movq %rsp,%rax -L$powerx5_enter: pushq %rbx pushq %rbp diff --git a/generated-src/win-x86_64/crypto/fipsmodule/x86_64-mont5.asm b/generated-src/win-x86_64/crypto/fipsmodule/x86_64-mont5.asm index a3bb9883fb..6b868427db 100644 --- a/generated-src/win-x86_64/crypto/fipsmodule/x86_64-mont5.asm +++ b/generated-src/win-x86_64/crypto/fipsmodule/x86_64-mont5.asm @@ -12,16 +12,14 @@ default rel section .text code align=64 -EXTERN OPENSSL_ia32cap_P - -global bn_mul_mont_gather5 +global bn_mul_mont_gather5_nohw ALIGN 64 -bn_mul_mont_gather5: +bn_mul_mont_gather5_nohw: mov QWORD[8+rsp],rdi ;WIN64 prologue mov QWORD[16+rsp],rsi mov rax,rsp -$L$SEH_begin_bn_mul_mont_gather5: +$L$SEH_begin_bn_mul_mont_gather5_nohw: mov rdi,rcx mov rsi,rdx mov rdx,r8 @@ -35,16 +33,6 @@ _CET_ENDBR mov r9d,r9d mov rax,rsp - test r9d,7 - jnz NEAR $L$mul_enter -%ifndef MY_ASSEMBLER_IS_TOO_OLD_FOR_512AVX - lea r11,[OPENSSL_ia32cap_P] - mov r11d,DWORD[8+r11] -%endif - jmp NEAR $L$mul4x_enter - -ALIGN 16 -$L$mul_enter: movd xmm5,DWORD[56+rsp] push rbx @@ -472,7 +460,8 @@ $L$mul_epilogue: mov rsi,QWORD[16+rsp] DB 0F3h,0C3h ;repret -$L$SEH_end_bn_mul_mont_gather5: +$L$SEH_end_bn_mul_mont_gather5_nohw: +global bn_mul4x_mont_gather5 ALIGN 32 bn_mul4x_mont_gather5: @@ -489,15 +478,10 @@ $L$SEH_begin_bn_mul4x_mont_gather5: +_CET_ENDBR DB 0x67 mov rax,rsp -$L$mul4x_enter: -%ifndef MY_ASSEMBLER_IS_TOO_OLD_FOR_512AVX - and r11d,0x80108 - cmp r11d,0x80108 - je NEAR $L$mulx4x_enter -%endif push rbx push rbp @@ -1124,14 +1108,14 @@ $L$inner4x: jmp NEAR $L$sqr4x_sub_entry -global bn_power5 +global bn_power5_nohw ALIGN 32 -bn_power5: +bn_power5_nohw: mov QWORD[8+rsp],rdi ;WIN64 prologue mov QWORD[16+rsp],rsi mov rax,rsp -$L$SEH_begin_bn_power5: +$L$SEH_begin_bn_power5_nohw: mov rdi,rcx mov rsi,rdx mov rdx,r8 @@ -1144,13 +1128,6 @@ $L$SEH_begin_bn_power5: _CET_ENDBR mov rax,rsp -%ifndef MY_ASSEMBLER_IS_TOO_OLD_FOR_512AVX - lea r11,[OPENSSL_ia32cap_P] - mov r11d,DWORD[8+r11] - and r11d,0x80108 - cmp r11d,0x80108 - je NEAR $L$powerx5_enter -%endif push rbx push rbp @@ -1275,7 +1252,7 @@ $L$power5_epilogue: mov rsi,QWORD[16+rsp] DB 0F3h,0C3h ;repret -$L$SEH_end_bn_power5: +$L$SEH_end_bn_power5_nohw: global bn_sqr8x_internal @@ -2118,6 +2095,7 @@ $L$sqr4x_sub_entry: %ifndef MY_ASSEMBLER_IS_TOO_OLD_FOR_512AVX +global bn_mulx4x_mont_gather5 ALIGN 32 bn_mulx4x_mont_gather5: @@ -2134,9 +2112,9 @@ $L$SEH_begin_bn_mulx4x_mont_gather5: +_CET_ENDBR mov rax,rsp -$L$mulx4x_enter: push rbx push rbp @@ -2669,6 +2647,7 @@ $L$mulx4x_inner: jmp NEAR $L$sqrx4x_sub_entry +global bn_powerx5 ALIGN 32 bn_powerx5: @@ -2685,9 +2664,9 @@ $L$SEH_begin_bn_powerx5: +_CET_ENDBR mov rax,rsp -$L$powerx5_enter: push rbx push rbp @@ -3810,17 +3789,17 @@ $L$common_seh_tail: section .pdata rdata align=4 ALIGN 4 - DD $L$SEH_begin_bn_mul_mont_gather5 wrt ..imagebase - DD $L$SEH_end_bn_mul_mont_gather5 wrt ..imagebase - DD $L$SEH_info_bn_mul_mont_gather5 wrt ..imagebase + DD $L$SEH_begin_bn_mul_mont_gather5_nohw wrt ..imagebase + DD $L$SEH_end_bn_mul_mont_gather5_nohw wrt ..imagebase + DD $L$SEH_info_bn_mul_mont_gather5_nohw wrt ..imagebase DD $L$SEH_begin_bn_mul4x_mont_gather5 wrt ..imagebase DD $L$SEH_end_bn_mul4x_mont_gather5 wrt ..imagebase DD $L$SEH_info_bn_mul4x_mont_gather5 wrt ..imagebase - DD $L$SEH_begin_bn_power5 wrt ..imagebase - DD $L$SEH_end_bn_power5 wrt ..imagebase - DD $L$SEH_info_bn_power5 wrt ..imagebase + DD $L$SEH_begin_bn_power5_nohw wrt ..imagebase + DD $L$SEH_end_bn_power5_nohw wrt ..imagebase + DD $L$SEH_info_bn_power5_nohw wrt ..imagebase %ifndef MY_ASSEMBLER_IS_TOO_OLD_FOR_512AVX DD $L$SEH_begin_bn_mulx4x_mont_gather5 wrt ..imagebase DD $L$SEH_end_bn_mulx4x_mont_gather5 wrt ..imagebase @@ -3836,7 +3815,7 @@ ALIGN 4 section .xdata rdata align=4 ALIGN 4 -$L$SEH_info_bn_mul_mont_gather5: +$L$SEH_info_bn_mul_mont_gather5_nohw: DB 9,0,0,0 DD mul_handler wrt ..imagebase DD $L$mul_body wrt ..imagebase,$L$mul_body wrt ..imagebase,$L$mul_epilogue wrt ..imagebase @@ -3846,7 +3825,7 @@ $L$SEH_info_bn_mul4x_mont_gather5: DD mul_handler wrt ..imagebase DD $L$mul4x_prologue wrt ..imagebase,$L$mul4x_body wrt ..imagebase,$L$mul4x_epilogue wrt ..imagebase ALIGN 4 -$L$SEH_info_bn_power5: +$L$SEH_info_bn_power5_nohw: DB 9,0,0,0 DD mul_handler wrt ..imagebase DD $L$power5_prologue wrt ..imagebase,$L$power5_body wrt ..imagebase,$L$power5_epilogue wrt ..imagebase diff --git a/include/openssl/cpu.h b/include/openssl/cpu.h index d865020c1c..742a7a1d3a 100644 --- a/include/openssl/cpu.h +++ b/include/openssl/cpu.h @@ -14,5 +14,12 @@ // This header is provided for compatibility with older revisions of BoringSSL. // TODO(davidben): Remove this header. +#ifndef OPENSSL_HEADER_CPU_H +#define OPENSSL_HEADER_CPU_H #include "crypto.h" + +__attribute__((unused)) +void log_dispatch(size_t id); + +#endif // OPENSSL_HEADER_CPU_H