Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
39 commits
Select commit Hold shift + click to select a range
bd3f1a2
Remove dynamic dispatching, update names to match BoringSSL
m271828 Aug 1, 2025
28c2368
Move dispatching into C
m271828 Aug 1, 2025
d6720b2
Fix inconsistent name
m271828 Aug 2, 2025
63c621b
Accidentally removed guard
m271828 Aug 2, 2025
2037e02
Merge branch 'main' into move_x86_64-mont5_dispatching
m271828 Aug 2, 2025
e42eb0a
Add generated-src files
m271828 Aug 5, 2025
8a6ce05
Merge branch 'main' into move_x86_64-mont5_dispatching
m271828 Aug 5, 2025
d305b22
Generate for other platforms
m271828 Aug 5, 2025
8e44aa5
Fix name for windows
m271828 Aug 5, 2025
4865271
Fix function name
m271828 Aug 5, 2025
4085be6
AVX fix
m271828 Aug 5, 2025
12073c3
Test moving macro guard
m271828 Aug 5, 2025
b3844ea
Pull guards in
m271828 Aug 7, 2025
45b2eb7
Add default implementation
m271828 Aug 7, 2025
e5418f5
Fix compiler warning
m271828 Aug 7, 2025
39cb816
Fix compiler warning
m271828 Aug 7, 2025
cb40a8a
Fix compiler warning
m271828 Aug 7, 2025
79cc722
Clean up header
m271828 Aug 8, 2025
c4ecf91
Invert guard logic
m271828 Aug 9, 2025
d0dd56f
Merge branch 'main' into move_x86_64-mont5_dispatching
m271828 Aug 9, 2025
92af303
Remove guards
m271828 Aug 9, 2025
3dafe95
Change to perror/abort pattern
m271828 Aug 9, 2025
9b8c355
Move default implementations to more logical location
m271828 Aug 9, 2025
d96d329
Fix guards and comments
m271828 Aug 15, 2025
4542f58
Merge branch 'main' into move_x86_64-mont5_dispatching
m271828 Aug 15, 2025
c369e88
Update sqr8x_mont to new style
m271828 Aug 15, 2025
980bcff
Fix typo
m271828 Aug 27, 2025
10a46a0
Add implicit dispatch tests
m271828 Aug 28, 2025
6062827
Add dispatch function
m271828 Sep 15, 2025
96a03ef
Merge branch 'main' into move_x86_64-mont5_dispatching
m271828 Sep 15, 2025
0cf8b2d
Change to single dispatch log function
m271828 Sep 17, 2025
03d8c2d
Add ctx
m271828 Sep 17, 2025
3e143d4
Merge branch 'main' into move_x86_64-mont5_dispatching
m271828 Sep 17, 2025
d993aeb
Header guard
m271828 Sep 17, 2025
b18b250
Add capture
m271828 Sep 17, 2025
c431326
Remove guard
m271828 Sep 18, 2025
5e4cf23
Change scope of function defininition
m271828 Sep 18, 2025
13e40c1
Add header file for bn functions
m271828 Sep 18, 2025
c7e253f
Comment out dynamic distpatch tests using static decls
m271828 Sep 18, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
83 changes: 25 additions & 58 deletions crypto/fipsmodule/bn/asm/x86_64-mont5.pl
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@
$addx = 1;
for (@ARGV) { $addx = 0 if (/-DMY_ASSEMBLER_IS_TOO_OLD_FOR_512AVX/); }

# int bn_mul_mont_gather5(
# int bn_mul_mont_gather5_nohw(
$rp="%rdi"; # BN_ULONG *rp,
$ap="%rsi"; # const BN_ULONG *ap,
$bp="%rdx"; # const BN_ULONG *bp,
Expand All @@ -76,31 +76,15 @@
$code=<<___;
.text

.extern OPENSSL_ia32cap_P

.globl bn_mul_mont_gather5
.type bn_mul_mont_gather5,\@function,6
.globl bn_mul_mont_gather5_nohw
.type bn_mul_mont_gather5_nohw,\@function,6
.align 64
bn_mul_mont_gather5:
bn_mul_mont_gather5_nohw:
.cfi_startproc
_CET_ENDBR
mov ${num}d,${num}d
mov %rsp,%rax
.cfi_def_cfa_register %rax
test \$7,${num}d
jnz .Lmul_enter
___
$code.=<<___ if ($addx);
#ifndef MY_ASSEMBLER_IS_TOO_OLD_FOR_512AVX
leaq OPENSSL_ia32cap_P(%rip),%r11
mov 8(%r11),%r11d
#endif
___
$code.=<<___;
jmp .Lmul4x_enter

.align 16
.Lmul_enter:
movd `($win64?56:8)`(%rsp),%xmm5 # load 7th argument
push %rbx
.cfi_push %rbx
Expand Down Expand Up @@ -460,29 +444,21 @@
.Lmul_epilogue:
ret
.cfi_endproc
.size bn_mul_mont_gather5,.-bn_mul_mont_gather5
.size bn_mul_mont_gather5_nohw,.-bn_mul_mont_gather5_nohw
___
{{{
my @A=("%r10","%r11");
my @N=("%r13","%rdi");
$code.=<<___;
.global bn_mul4x_mont_gather5
.type bn_mul4x_mont_gather5,\@function,6
.align 32
bn_mul4x_mont_gather5:
.cfi_startproc
_CET_ENDBR
.byte 0x67
mov %rsp,%rax
.cfi_def_cfa_register %rax
.Lmul4x_enter:
___
$code.=<<___ if ($addx);
#ifndef MY_ASSEMBLER_IS_TOO_OLD_FOR_512AVX
and \$0x80108,%r11d
cmp \$0x80108,%r11d # check for AD*X+BMI2+BMI1
je .Lmulx4x_enter
#endif
___
$code.=<<___;
push %rbx
.cfi_push %rbx
push %rbp
Expand Down Expand Up @@ -1087,7 +1063,7 @@
}}}
{{{
######################################################################
# void bn_power5(
# void bn_power5_nohw(
my $rptr="%rdi"; # BN_ULONG *rptr,
my $aptr="%rsi"; # const BN_ULONG *aptr,
my $bptr="%rdx"; # const BN_ULONG *table,
Expand All @@ -1102,25 +1078,14 @@
my ($a0,$a1,$ai)=("%r14","%r15","%rbx");

$code.=<<___;
.globl bn_power5
.type bn_power5,\@function,6
.globl bn_power5_nohw
.type bn_power5_nohw,\@function,6
.align 32
bn_power5:
bn_power5_nohw:
.cfi_startproc
_CET_ENDBR
mov %rsp,%rax
.cfi_def_cfa_register %rax
___
$code.=<<___ if ($addx);
#ifndef MY_ASSEMBLER_IS_TOO_OLD_FOR_512AVX
leaq OPENSSL_ia32cap_P(%rip),%r11
mov 8(%r11),%r11d
and \$0x80108,%r11d
cmp \$0x80108,%r11d # check for AD*X+BMI2+BMI1
je .Lpowerx5_enter
#endif
___
$code.=<<___;
push %rbx
.cfi_push %rbx
push %rbp
Expand Down Expand Up @@ -1243,7 +1208,7 @@
.Lpower5_epilogue:
ret
.cfi_endproc
.size bn_power5,.-bn_power5
.size bn_power5_nohw,.-bn_power5_nohw

.globl bn_sqr8x_internal
.hidden bn_sqr8x_internal
Expand Down Expand Up @@ -2119,13 +2084,14 @@

$code.=<<___;
#ifndef MY_ASSEMBLER_IS_TOO_OLD_FOR_512AVX
.global bn_mulx4x_mont_gather5
.type bn_mulx4x_mont_gather5,\@function,6
.align 32
bn_mulx4x_mont_gather5:
.cfi_startproc
_CET_ENDBR
mov %rsp,%rax
.cfi_def_cfa_register %rax
.Lmulx4x_enter:
push %rbx
.cfi_push %rbx
push %rbp
Expand Down Expand Up @@ -2594,7 +2560,7 @@
___
} {
######################################################################
# void bn_power5(
# void bn_powerx5(
my $rptr="%rdi"; # BN_ULONG *rptr,
my $aptr="%rsi"; # const BN_ULONG *aptr,
my $bptr="%rdx"; # const BN_ULONG *table,
Expand All @@ -2609,13 +2575,14 @@
my ($a0,$a1,$ai)=("%r14","%r15","%rbx");

$code.=<<___;
.global bn_powerx5
.type bn_powerx5,\@function,6
.align 32
bn_powerx5:
.cfi_startproc
_CET_ENDBR
mov %rsp,%rax
.cfi_def_cfa_register %rax
.Lpowerx5_enter:
push %rbx
.cfi_push %rbx
push %rbp
Expand Down Expand Up @@ -3717,17 +3684,17 @@

.section .pdata
.align 4
.rva .LSEH_begin_bn_mul_mont_gather5
.rva .LSEH_end_bn_mul_mont_gather5
.rva .LSEH_info_bn_mul_mont_gather5
.rva .LSEH_begin_bn_mul_mont_gather5_nohw
.rva .LSEH_end_bn_mul_mont_gather5_nohw
.rva .LSEH_info_bn_mul_mont_gather5_nohw

.rva .LSEH_begin_bn_mul4x_mont_gather5
.rva .LSEH_end_bn_mul4x_mont_gather5
.rva .LSEH_info_bn_mul4x_mont_gather5

.rva .LSEH_begin_bn_power5
.rva .LSEH_end_bn_power5
.rva .LSEH_info_bn_power5
.rva .LSEH_begin_bn_power5_nohw
.rva .LSEH_end_bn_power5_nohw
.rva .LSEH_info_bn_power5_nohw
___
$code.=<<___ if ($addx);
#ifndef MY_ASSEMBLER_IS_TOO_OLD_FOR_512AVX
Expand All @@ -3747,7 +3714,7 @@

.section .xdata
.align 4
.LSEH_info_bn_mul_mont_gather5:
.LSEH_info_bn_mul_mont_gather5_nohw:
.byte 9,0,0,0
.rva mul_handler
.rva .Lmul_body,.Lmul_body,.Lmul_epilogue # HandlerData[]
Expand All @@ -3757,7 +3724,7 @@
.rva mul_handler
.rva .Lmul4x_prologue,.Lmul4x_body,.Lmul4x_epilogue # HandlerData[]
.align 4
.LSEH_info_bn_power5:
.LSEH_info_bn_power5_nohw:
.byte 9,0,0,0
.rva mul_handler
.rva .Lpower5_prologue,.Lpower5_body,.Lpower5_epilogue # HandlerData[]
Expand Down
33 changes: 23 additions & 10 deletions crypto/fipsmodule/bn/bn_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -2959,14 +2959,12 @@ TEST_F(BNTest, BNMulMontABI) {
b[0] = 42;

#if defined(OPENSSL_X86_64)
#if !defined(MY_ASSEMBLER_IS_TOO_OLD_FOR_512AVX)
if (bn_mulx4x_mont_capable(words)) {
CHECK_ABI(bn_mulx4x_mont, r.data(), a.data(), b.data(), mont->N.d,
mont->n0, words);
CHECK_ABI(bn_mulx4x_mont, r.data(), a.data(), a.data(), mont->N.d,
mont->n0, words);
}
#endif // !defined(MY_ASSEMBLER_IS_TOO_OLD_FOR_512AVX)
if (bn_mul4x_mont_capable(words)) {
CHECK_ABI(bn_mul4x_mont, r.data(), a.data(), b.data(), mont->N.d,
mont->n0, words);
Expand All @@ -2977,12 +2975,10 @@ TEST_F(BNTest, BNMulMontABI) {
mont->n0, words);
CHECK_ABI(bn_mul_mont_nohw, r.data(), a.data(), a.data(), mont->N.d,
mont->n0, words);
#if !defined(MY_ASSEMBLER_IS_TOO_OLD_FOR_512AVX)
if (bn_sqr8x_mont_capable(words)) {
CHECK_ABI(bn_sqr8x_mont, r.data(), a.data(), bn_mulx_adx_capable(),
mont->N.d, mont->n0, words);
}
#endif // !defined(MY_ASSEMBLER_IS_TOO_OLD_FOR_512AVX)
#elif defined(OPENSSL_ARM)
if (bn_mul8x_mont_neon_capable(words)) {
CHECK_ABI(bn_mul8x_mont_neon, r.data(), a.data(), b.data(), mont->N.d,
Expand Down Expand Up @@ -3028,18 +3024,35 @@ TEST_F(BNTest, BNMulMont5ABI) {
bn_scatter5(r.data(), words, table.data(), i);
}
CHECK_ABI(bn_gather5, r.data(), words, table.data(), 13);

CHECK_ABI(bn_mul_mont_gather5, r.data(), r.data(), table.data(), m->d,
if (bn_mulx4x_mont_gather5_capable(words)) {
CHECK_ABI(bn_mulx4x_mont_gather5, r.data(), r.data(), table.data(), m->d,
mont->n0, words, 13);
CHECK_ABI(bn_mulx4x_mont_gather5, r.data(), a.data(), table.data(), m->d,
mont->n0, words, 13);
}
if (bn_mul4x_mont_gather5_capable(words)) {
CHECK_ABI(bn_mul4x_mont_gather5, r.data(), r.data(), table.data(), m->d,
mont->n0, words, 13);
CHECK_ABI(bn_mul4x_mont_gather5, r.data(), a.data(), table.data(), m->d,
mont->n0, words, 13);
}
CHECK_ABI(bn_mul_mont_gather5_nohw, r.data(), r.data(), table.data(), m->d,
mont->n0, words, 13);
CHECK_ABI(bn_mul_mont_gather5, r.data(), a.data(), table.data(), m->d,
CHECK_ABI(bn_mul_mont_gather5_nohw, r.data(), a.data(), table.data(), m->d,
mont->n0, words, 13);

if (words % 8 == 0) {
CHECK_ABI(bn_power5, r.data(), r.data(), table.data(), m->d, mont->n0,
if (bn_powerx5_capable(words)) {
CHECK_ABI(bn_powerx5, r.data(), r.data(), table.data(), m->d, mont->n0,
words, 13);
CHECK_ABI(bn_power5, r.data(), a.data(), table.data(), m->d, mont->n0,
CHECK_ABI(bn_powerx5, r.data(), a.data(), table.data(), m->d, mont->n0,
words, 13);
}
if (bn_power5_capable(words)) {
CHECK_ABI(bn_power5_nohw, r.data(), r.data(), table.data(), m->d,
mont->n0, words, 13);
CHECK_ABI(bn_power5_nohw, r.data(), a.data(), table.data(), m->d,
mont->n0, words, 13);
}
}
}
#endif // OPENSSL_BN_ASM_MONT5 && SUPPORTS_ABI_TEST
Expand Down
53 changes: 52 additions & 1 deletion crypto/fipsmodule/bn/exponentiation.c
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,7 @@
* Hudson ([email protected]). */

#include <openssl/bn.h>
#include <openssl/cpu.h>

#include <assert.h>
#include <limits.h>
Expand Down Expand Up @@ -163,6 +164,56 @@ static void exponentiation_s2n_bignum_copy_from_prebuf(BN_ULONG *dest, int width
#endif
}

#if defined(OPENSSL_BN_ASM_MONT5)

// bn_mul_mont_gather5 multiples loads index |power| of |table|, multiplies it
// by |ap| modulo |np|, and stores the result in |rp|. The values are |num|
// words long and represented in Montgomery form. |n0| is a pointer to the
// corresponding field in |BN_MONT_CTX|. |table| must be aligned to at least
// 16 bytes. |power| must be less than 32 and is treated as secret.
//
// WARNING: This function implements Almost Montgomery Multiplication from
// https://eprint.iacr.org/2011/239. The inputs do not need to be fully reduced.
// However, even if they are fully reduced, the output may not be.
static void bn_mul_mont_gather5(
BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *table, const BN_ULONG *np,
const BN_ULONG *n0, int num, int power) {
if (bn_mulx4x_mont_gather5_capable(num)) {
log_dispatch(15);
bn_mulx4x_mont_gather5(rp, ap, table, np, n0, num, power);
} else if (bn_mul4x_mont_gather5_capable(num)) {
log_dispatch(16);
bn_mul4x_mont_gather5(rp, ap, table, np, n0, num, power);
} else {
log_dispatch(17);
bn_mul_mont_gather5_nohw(rp, ap, table, np, n0, num, power);
}
}

// bn_power5 squares |ap| five times and multiplies it by the value stored at
// index |power| of |table|, modulo |np|. It stores the result in |rp|. The
// values are |num| words long and represented in Montgomery form. |n0| is a
// pointer to the corresponding field in |BN_MONT_CTX|. |num| must be divisible
// by 8. |power| must be less than 32 and is treated as secret.
//
// WARNING: This function implements Almost Montgomery Multiplication from
// https://eprint.iacr.org/2011/239. The inputs do not need to be fully reduced.
// However, even if they are fully reduced, the output may not be.
static void bn_power5(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *table,
const BN_ULONG *np, const BN_ULONG *n0, int num,
int power)
{
assert(bn_power5_capable(num));
if (bn_powerx5_capable(num)) {
log_dispatch(18);
bn_powerx5(rp, ap, table, np, n0, num, power);
} else {
log_dispatch(19);
bn_power5_nohw(rp, ap, table, np, n0, num, power);
}
}

#endif // defined(OPENSSL_BN_ASM_MONT5)

int BN_exp(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, BN_CTX *ctx) {
int i, bits, ret = 0;
Expand Down Expand Up @@ -1122,7 +1173,7 @@ int BN_mod_exp_mont_consttime(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p,

// Scan the exponent one window at a time starting from the most
// significant bits.
if (top & 7) {
if (!bn_power5_capable(top)) {
while (bits >= 0) {
for (wvalue = 0, i = 0; i < 5; i++, bits--) {
wvalue = (wvalue << 1) + BN_is_bit_set(p, bits);
Expand Down
35 changes: 35 additions & 0 deletions crypto/fipsmodule/bn/generic.c
Original file line number Diff line number Diff line change
Expand Up @@ -622,3 +622,38 @@ BN_ULONG bn_sub_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b,
}

#endif // !BN_ADD_ASM

// Default implementations of hardware accelerated functions that throw errors if reached.
// Used to reduce the number of guards needed in code.
#if defined(MY_ASSEMBLER_IS_TOO_OLD_FOR_512AVX)

#if defined(OPENSSL_X86_64) && defined(OPENSSL_BN_ASM_MONT)
int bn_mulx4x_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp,
const BN_ULONG *np, const BN_ULONG *n0, size_t num) {
perror("bn_mulx4x_mont");
abort();
}
#endif

#if defined(OPENSSL_BN_ASM_MONT5)
void bn_mulx4x_mont_gather5(BN_ULONG *rp, const BN_ULONG *ap,
const BN_ULONG *table, const BN_ULONG *np,
const BN_ULONG *n0, int num, int power) {
perror("bn_mulx4x_mont_gather5");
abort();
}

void bn_powerx5(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *table,
const BN_ULONG *np, const BN_ULONG *n0, int num, int power) {
perror("bn_powerx5");
abort();
}

int bn_sqr8x_mont(BN_ULONG *rp, const BN_ULONG *ap, BN_ULONG mulx_adx_capable,
const BN_ULONG *np, const BN_ULONG *n0, size_t num) {
perror("bn_sqr8x_mont");
abort();
}
#endif

#endif // defined(MY_ASSEMBLER_IS_TOO_OLD_FOR_512AVX)
Loading
Loading