@@ -148,6 +148,9 @@ Return Value:
148
148
// instead.
149
149
normal = _mm_min_epi16 (normal, MaximumExponent);
150
150
normal = _mm_max_epi16 (normal, MinimumExponent);
151
+ #elif defined(MLAS_LSX_INTRINSICS)
152
+ normal = __lsx_vmin_h (normal, MaximumExponent);
153
+ normal = __lsx_vmax_h (normal, MinimumExponent);
151
154
#else
152
155
normal = MlasMinimumInt32x4 (normal, MaximumExponent);
153
156
normal = MlasMaximumInt32x4 (normal, MinimumExponent);
@@ -215,6 +218,8 @@ Return Value:
215
218
// N.B. SSE2 lacks a broadcast load instruction, so avoid a shuffle
216
219
// and use zeroes for the upper elements.
217
220
Vector = _mm_load_ss (Input);
221
+ #elif defined(MLAS_LSX_INTRINSICS)
222
+ Vector = (MLAS_FLOAT32X4)__lsx_vldrepl_w (Input, 0 );
218
223
#else
219
224
Vector = MlasBroadcastFloat32x4 (Input);
220
225
#endif
@@ -467,6 +472,8 @@ Return Value:
467
472
// N.B. SSE2 lacks a broadcast load instruction, so avoid a shuffle and
468
473
// use zeroes for the upper elements.
469
474
MLAS_FLOAT32X4 Vector = _mm_load_ss (Input);
475
+ #elif defined(MLAS_LSX_INTRINSICS)
476
+ MLAS_FLOAT32X4 Vector = (MLAS_FLOAT32X4)__lsx_vldrepl_w (Input, 0 );
470
477
#else
471
478
MLAS_FLOAT32X4 Vector = MlasBroadcastFloat32x4 (Input);
472
479
#endif
@@ -849,7 +856,7 @@ Return Value:
849
856
// Find the maximum value for the row.
850
857
//
851
858
852
- #if defined(MLAS_TARGET_AMD64)
859
+ #if defined(MLAS_TARGET_AMD64) || defined(MLAS_TARGET_LARCH64)
853
860
float Maximum = GetMlasPlatform ().ReduceMaximumF32Kernel (Input, D);
854
861
#else
855
862
float Maximum = MlasReduceMaximumF32Kernel (Input, D);
@@ -874,7 +881,7 @@ Return Value:
874
881
875
882
float Parameters[] = { NegativeMaximum, std::log (Accumulation)};
876
883
877
- #if defined(MLAS_TARGET_AMD64)
884
+ #if defined(MLAS_TARGET_AMD64) || defined(MLAS_TARGET_LARCH64)
878
885
GetMlasPlatform ().ComputeLogSoftmaxOutputF32Kernel (Input, Output, D, Parameters);
879
886
#else
880
887
MlasComputeLogSoftmaxOutputF32Kernel (Input, Output, D, Parameters);
@@ -899,7 +906,7 @@ Return Value:
899
906
900
907
float Parameters[] = { 1 .0f / Accumulation };
901
908
902
- #if defined(MLAS_TARGET_AMD64)
909
+ #if defined(MLAS_TARGET_AMD64) || defined(MLAS_TARGET_LARCH64)
903
910
GetMlasPlatform ().ComputeSoftmaxOutputF32Kernel (Output, D, Parameters);
904
911
#else
905
912
MlasComputeSoftmaxOutputF32Kernel (Output, D, Parameters);
0 commit comments