@@ -82,22 +82,15 @@ where
82
82
assert ! ( N <= 64 , "number of elements can't be greater than 64" ) ;
83
83
}
84
84
85
- // LLVM assumes bit order should match endianness
86
- let bitmask = if cfg ! ( target_endian = "big" ) {
87
- let rev = self . reverse_bits ( ) ;
88
- if N < 64 {
89
- // Shift things back to the right
90
- rev >> ( 64 - N )
91
- } else {
92
- rev
93
- }
94
- } else {
95
- self
96
- } ;
97
-
98
85
#[ inline]
99
- unsafe fn select_impl < T , U , const M : usize , const N : usize > (
86
+ unsafe fn select_impl <
87
+ T ,
88
+ U : core:: ops:: Shr < usize , Output = U > ,
89
+ const M : usize ,
90
+ const N : usize ,
91
+ > (
100
92
bitmask : U ,
93
+ bitmask_reversed : U ,
101
94
true_values : Simd < T , N > ,
102
95
false_values : Simd < T , N > ,
103
96
) -> Simd < T , N >
@@ -110,6 +103,13 @@ where
110
103
let true_values = true_values. resize :: < M > ( default) ;
111
104
let false_values = false_values. resize :: < M > ( default) ;
112
105
106
+ // LLVM assumes bit order should match endianness
107
+ let bitmask = if cfg ! ( target_endian = "big" ) {
108
+ bitmask_reversed >> ( M - N )
109
+ } else {
110
+ bitmask
111
+ } ;
112
+
113
113
// Safety: the caller guarantees that the size of U matches M
114
114
let selected = unsafe {
115
115
core:: intrinsics:: simd:: simd_select_bitmask ( bitmask, true_values, false_values)
@@ -120,17 +120,49 @@ where
120
120
121
121
// TODO modify simd_bitmask_select to truncate input, making this unnecessary
122
122
if N <= 8 {
123
+ let bitmask = self as u8 ;
123
124
// Safety: bitmask matches length
124
- unsafe { select_impl :: < T , u8 , 8 , N > ( bitmask as u8 , true_values, false_values) }
125
+ unsafe {
126
+ select_impl :: < T , u8 , 8 , N > (
127
+ bitmask,
128
+ bitmask. reverse_bits ( ) ,
129
+ true_values,
130
+ false_values,
131
+ )
132
+ }
125
133
} else if N <= 16 {
134
+ let bitmask = self as u16 ;
126
135
// Safety: bitmask matches length
127
- unsafe { select_impl :: < T , u16 , 16 , N > ( bitmask as u16 , true_values, false_values) }
136
+ unsafe {
137
+ select_impl :: < T , u16 , 16 , N > (
138
+ bitmask,
139
+ bitmask. reverse_bits ( ) ,
140
+ true_values,
141
+ false_values,
142
+ )
143
+ }
128
144
} else if N <= 32 {
145
+ let bitmask = self as u32 ;
129
146
// Safety: bitmask matches length
130
- unsafe { select_impl :: < T , u32 , 32 , N > ( bitmask as u32 , true_values, false_values) }
147
+ unsafe {
148
+ select_impl :: < T , u32 , 32 , N > (
149
+ bitmask,
150
+ bitmask. reverse_bits ( ) ,
151
+ true_values,
152
+ false_values,
153
+ )
154
+ }
131
155
} else {
156
+ let bitmask = self as u64 ;
132
157
// Safety: bitmask matches length
133
- unsafe { select_impl :: < T , u64 , 64 , N > ( bitmask, true_values, false_values) }
158
+ unsafe {
159
+ select_impl :: < T , u64 , 64 , N > (
160
+ bitmask,
161
+ bitmask. reverse_bits ( ) ,
162
+ true_values,
163
+ false_values,
164
+ )
165
+ }
134
166
}
135
167
}
136
168
}
0 commit comments