diff --git a/celt/_kiss_fft_guts.h b/celt/_kiss_fft_guts.h index 4de1137f9..0343adf8d 100644 --- a/celt/_kiss_fft_guts.h +++ b/celt/_kiss_fft_guts.h @@ -102,7 +102,7 @@ #if defined(OPUS_ARM_INLINE_EDSP) #include "arm/kiss_fft_armv5e.h" #endif -#if defined(__mips_dsp) && __mips == 32 +#if defined(__mips) #include "mips/kiss_fft_mipsr1.h" #endif diff --git a/celt/arch.h b/celt/arch.h index dd095b218..1b722a44e 100644 --- a/celt/arch.h +++ b/celt/arch.h @@ -227,9 +227,10 @@ typedef opus_val16 celt_coef; #define ABS16(x) ((x) < 0 ? (-(x)) : (x)) #define ABS32(x) ((x) < 0 ? (-(x)) : (x)) -static OPUS_INLINE opus_int16 SAT16(opus_int32 x) { +static OPUS_INLINE opus_int16 SAT16_default(opus_int32 x) { return x > 32767 ? 32767 : x < -32768 ? -32768 : (opus_int16)x; } +#define SAT16(x) SAT16_default(x) #ifdef FIXED_DEBUG #include "fixed_debug.h" diff --git a/celt/celt.c b/celt/celt.c index cc2c8c8e7..3838684fa 100644 --- a/celt/celt.c +++ b/celt/celt.c @@ -54,7 +54,7 @@ #define PACKAGE_VERSION "unknown" #endif -#if defined(FIXED_POINT) && defined(__mips_dsp) && __mips == 32 +#if defined(FIXED_POINT) && defined(__mips) #include "mips/celt_mipsr1.h" #endif diff --git a/celt/celt_lpc.c b/celt/celt_lpc.c index 2a98b7e9d..43dffbdf4 100644 --- a/celt/celt_lpc.c +++ b/celt/celt_lpc.c @@ -59,8 +59,15 @@ int p for (i = 0; i < p; i++) { /* Sum up this iteration's reflection coefficient */ opus_val32 rr = 0; +#if defined (FIXED_POINT) && OPUS_FAST_INT64 + opus_int64 acc = 0; + for (j = 0; j < i; j++) + acc += (opus_int64)(lpc[j]) * (opus_int64)(ac[i - j]); + rr = (opus_val32)SHR(acc, 31); +#else for (j = 0; j < i; j++) rr += MULT32_32_Q31(lpc[j],ac[i - j]); +#endif rr += SHR32(ac[i + 1],6); r = -frac_div32(SHL32(rr,6), error); /* Update LPC coefficients and total error */ diff --git a/celt/fixed_generic.h b/celt/fixed_generic.h index f499f4f89..d750e6ce8 100644 --- a/celt/fixed_generic.h +++ b/celt/fixed_generic.h @@ -129,7 +129,8 @@ #define SHR(a,shift) ((a) >> (shift)) #define SHL(a,shift) SHL32(a,shift) #define PSHR(a,shift) (SHR((a)+((EXTEND32(1)<<((shift))>>1)),shift)) -#define SATURATE(x,a) (((x)>(a) ? (a) : (x)<-(a) ? -(a) : (x))) +#define SATURATE_generic(x,a) (((x)>(a) ? (a) : (x)<-(a) ? -(a) : (x))) +#define SATURATE(x,a) SATURATE_generic((x), (a)) #define SATURATE16(x) (EXTRACT16((x)>32767 ? 32767 : (x)<-32768 ? -32768 : (x))) @@ -200,10 +201,6 @@ /** Divide a 32-bit value by a 32-bit value. Result fits in 32 bits */ #define DIV32(a,b) (((opus_val32)(a))/((opus_val32)(b))) -#if defined(__mips_dsp) && __mips == 32 -#include "mips/fixed_generic_mipsr1.h" -#endif - static OPUS_INLINE opus_val16 SIG2WORD16_generic(celt_sig x) { x = PSHR32(x, SIG_SHIFT); @@ -213,4 +210,8 @@ static OPUS_INLINE opus_val16 SIG2WORD16_generic(celt_sig x) } #define SIG2WORD16(x) (SIG2WORD16_generic(x)) +#if defined(__mips) +#include "mips/fixed_generic_mipsr1.h" +#endif + #endif diff --git a/celt/kiss_fft.c b/celt/kiss_fft.c index 7c49a4f32..24400e862 100644 --- a/celt/kiss_fft.c +++ b/celt/kiss_fft.c @@ -535,6 +535,7 @@ void opus_fft_free(const kiss_fft_state *cfg, int arch) #endif /* CUSTOM_MODES */ #ifdef FIXED_POINT +#ifndef OVERRIDE_fft_downshift static void fft_downshift(kiss_fft_cpx *x, int N, int *total, int step) { int shift; shift = IMIN(step, *total); @@ -553,6 +554,7 @@ static void fft_downshift(kiss_fft_cpx *x, int N, int *total, int step) { } } } +#endif /* OVERRIDE_fft_downshift */ #else #define fft_downshift(x, N, total, step) #endif diff --git a/celt/mdct.c b/celt/mdct.c index a60081b5c..2b8a0bce9 100644 --- a/celt/mdct.c +++ b/celt/mdct.c @@ -53,7 +53,7 @@ #include "mathops.h" #include "stack_alloc.h" -#if defined(FIXED_POINT) && defined(__mips_dsp) && __mips == 32 +#if defined(FIXED_POINT) && defined(__mips) && __mips == 32 #include "mips/mdct_mipsr1.h" #endif diff --git a/celt/mips/celt_mipsr1.h b/celt/mips/celt_mipsr1.h index 7fa8d4358..8a5d0c90a 100644 --- a/celt/mips/celt_mipsr1.h +++ b/celt/mips/celt_mipsr1.h @@ -36,6 +36,16 @@ #define CELT_C +#if defined (__mips_dsp) && __mips == 32 + +#define OVERRIDE_COMB_FILTER_CONST +#define OVERRIDE_comb_filter +#elif defined(__mips_isa_rev) && __mips_isa_rev < 6 + +#define OVERRIDE_COMB_FILTER_CONST +#define OVERRIDE_comb_filter +#endif + #include "os_support.h" #include "mdct.h" #include @@ -53,8 +63,43 @@ #include "celt_lpc.h" #include "vq.h" -#define OVERRIDE_COMB_FILTER_CONST -#define OVERRIDE_comb_filter +#if defined (__mips_dsp) && __mips == 32 + +#define MIPS_MULT __builtin_mips_mult +#define MIPS_MADD __builtin_mips_madd +#define MIPS_EXTR __builtin_mips_extr_w + +#elif defined(__mips_isa_rev) && __mips_isa_rev < 6 + +static inline long long MIPS_MULT(int a, int b) { + long long acc; + + asm volatile ( + "mult %[a], %[b] \n" + : [acc] "=x"(acc) + : [a] "r"(a), [b] "r"(b) + : + ); + return acc; +} + +static inline long long MIPS_MADD(long long acc, int a, int b) { + asm volatile ( + "madd %[a], %[b] \n" + : [acc] "+x"(acc) + : [a] "r"(a), [b] "r"(b) + : + ); + return acc; +} + +static inline opus_val32 MIPS_EXTR(long long acc, int shift) { + return (opus_val32)(acc >> shift); +} + +#endif + +#if defined (OVERRIDE_comb_filter) void comb_filter(opus_val32 *y, opus_val32 *x, int T0, int T1, int N, opus_val16 g0, opus_val16 g1, int tapset0, int tapset1, const opus_val16 *window, int overlap, int arch) @@ -101,13 +146,13 @@ void comb_filter(opus_val32 *y, opus_val32 *x, int T0, int T1, int N, f = MULT16_16_Q15(window[i],window[i]); x0= x[i-T1+2]; - acc = __builtin_mips_mult((int)MULT16_16_Q15((Q15ONE-f),g00), (int)x[i-T0]); - acc = __builtin_mips_madd(acc, (int)MULT16_16_Q15((Q15ONE-f),g01), (int)ADD32(x[i-T0-1],x[i-T0+1])); - acc = __builtin_mips_madd(acc, (int)MULT16_16_Q15((Q15ONE-f),g02), (int)ADD32(x[i-T0-2],x[i-T0+2])); - acc = __builtin_mips_madd(acc, (int)MULT16_16_Q15(f,g10), (int)x2); - acc = __builtin_mips_madd(acc, (int)MULT16_16_Q15(f,g11), (int)ADD32(x3,x1)); - acc = __builtin_mips_madd(acc, (int)MULT16_16_Q15(f,g12), (int)ADD32(x4,x0)); - res = __builtin_mips_extr_w(acc, 15); + acc = MIPS_MULT((int)MULT16_16_Q15((Q15ONE-f),g00), (int)x[i-T0]); + acc = MIPS_MADD(acc, (int)MULT16_16_Q15((Q15ONE-f),g01), (int)ADD32(x[i-T0-1],x[i-T0+1])); + acc = MIPS_MADD(acc, (int)MULT16_16_Q15((Q15ONE-f),g02), (int)ADD32(x[i-T0-2],x[i-T0+2])); + acc = MIPS_MADD(acc, (int)MULT16_16_Q15(f,g10), (int)x2); + acc = MIPS_MADD(acc, (int)MULT16_16_Q15(f,g11), (int)ADD32(x3,x1)); + acc = MIPS_MADD(acc, (int)MULT16_16_Q15(f,g12), (int)ADD32(x4,x0)); + res = MIPS_EXTR(acc, 15); y[i] = x[i] + res; @@ -136,10 +181,10 @@ void comb_filter(opus_val32 *y, opus_val32 *x, int T0, int T1, int N, long long acc; x0=x[i-T1+2]; - acc = __builtin_mips_mult((int)g10, (int)x2); - acc = __builtin_mips_madd(acc, (int)g11, (int)ADD32(x3,x1)); - acc = __builtin_mips_madd(acc, (int)g12, (int)ADD32(x4,x0)); - res = __builtin_mips_extr_w(acc, 15); + acc = MIPS_MULT((int)g10, (int)x2); + acc = MIPS_MADD(acc, (int)g11, (int)ADD32(x3,x1)); + acc = MIPS_MADD(acc, (int)g12, (int)ADD32(x4,x0)); + res = MIPS_EXTR(acc, 15); y[i] = x[i] + res; x4=x3; @@ -148,5 +193,6 @@ void comb_filter(opus_val32 *y, opus_val32 *x, int T0, int T1, int N, x1=x0; } } +#endif /* OVERRIDE_comb_filter */ #endif /* CELT_MIPSR1_H__ */ diff --git a/celt/mips/fixed_generic_mipsr1.h b/celt/mips/fixed_generic_mipsr1.h index 42f0e4047..1888e3004 100644 --- a/celt/mips/fixed_generic_mipsr1.h +++ b/celt/mips/fixed_generic_mipsr1.h @@ -33,34 +33,10 @@ #ifndef CELT_FIXED_GENERIC_MIPSR1_H #define CELT_FIXED_GENERIC_MIPSR1_H -#undef MULT16_32_Q15_ADD -static inline int MULT16_32_Q15_ADD(int a, int b, int c, int d) { - long long acc = __builtin_mips_mult(a, b); - acc = __builtin_mips_madd(acc, c, d); - return __builtin_mips_extr_w(acc, 15); -} - -#undef MULT16_32_Q15_SUB -static inline int MULT16_32_Q15_SUB(int a, int b, int c, int d) { - long long acc = __builtin_mips_mult(a, b); - acc = __builtin_mips_msub(acc, c, d); - return __builtin_mips_extr_w(acc, 15); -} - -#undef MULT16_16_Q15_ADD -static inline int MULT16_16_Q15_ADD(int a, int b, int c, int d) { - long long acc = __builtin_mips_mult(a, b); - acc = __builtin_mips_madd(acc, c, d); - return __builtin_mips_extr_w(acc, 15); -} - -#undef MULT16_16_Q15_SUB -static inline int MULT16_16_Q15_SUB(int a, int b, int c, int d) { - long long acc = __builtin_mips_mult(a, b); - acc = __builtin_mips_msub(acc, c, d); - return __builtin_mips_extr_w(acc, 15); -} +#if defined (__mips_dsp) && __mips == 32 +typedef short v2i16 __attribute__((vector_size(4))); +typedef char v2i8 __attribute__((vector_size(4))); #undef MULT16_32_Q16 static inline int MULT16_32_Q16(int a, int b) @@ -103,4 +79,104 @@ static inline int MULT16_16_P15(int a, int b) return __builtin_mips_shra_r_w(r, 15); } +#define OVERRIDE_CELT_MAXABS16 +static OPUS_INLINE opus_val32 celt_maxabs16(const opus_val16 *x, int len) +{ + int i; + v2i16 v2max = (v2i16){ 0, 0 }; + v2i16 x01, x23; + const v2i16 *x2; + opus_val16 maxlo, maxhi; + int loops; + + if ((long)x & 2 && len > 0) { + v2max = (v2i16){ 0, ABS16(*x) }; + x++; + len--; + } + x2 = __builtin_assume_aligned(x, 4); + loops = len / 4; + + for (i = 0; i < loops; i++) + { + x01 = *x2++; + x23 = *x2++; + x01 = __builtin_mips_absq_s_ph(x01); + x23 = __builtin_mips_absq_s_ph(x23); + __builtin_mips_cmp_lt_ph(v2max, x01); + v2max = __builtin_mips_pick_ph(x01, v2max); + __builtin_mips_cmp_lt_ph(v2max, x23); + v2max = __builtin_mips_pick_ph(x23, v2max); + } + + switch (len & 3) { + case 3: + x01 = __builtin_mips_absq_s_ph(*x2); + __builtin_mips_cmp_lt_ph(v2max, x01); + v2max = __builtin_mips_pick_ph(x01, v2max); + maxlo = EXTRACT16((opus_val32)v2max); + maxhi = EXTRACT16((opus_val32)v2max >> 16); + maxlo = MAX16(MAX16(maxlo, maxhi), ABS16(x[len - 1])); + break; + case 2: + x01 = __builtin_mips_absq_s_ph(*x2); + __builtin_mips_cmp_lt_ph(v2max, x01); + v2max = __builtin_mips_pick_ph(x01, v2max); + maxlo = EXTRACT16((opus_val32)v2max); + maxhi = EXTRACT16((opus_val32)v2max >> 16); + maxlo = MAX16(maxlo, maxhi); + break; + case 1: + maxlo = EXTRACT16((opus_val32)v2max); + maxhi = EXTRACT16((opus_val32)v2max >> 16); + return MAX16(MAX16(maxlo, maxhi), ABS16(x[len - 1])); + break; + case 0: + maxlo = EXTRACT16((opus_val32)v2max); + maxhi = EXTRACT16((opus_val32)v2max >> 16); + maxlo = MAX16(maxlo, maxhi); + break; + default: + __builtin_unreachable(); + } + /* C version might return 0x8000, this one can't + * because abs is saturated here. Since result + * used only for determine dynamic range + * in ilog2-like context it's worth to add 1 + * for proper magnitude whether saturated + */ + return (opus_val32)maxlo + 1; +} + +#undef SATURATE +static OPUS_INLINE int SATURATE(int x, int a) +{ + if (__builtin_constant_p(a) && __builtin_popcount(a + 1) == 1) { + const int shift = __builtin_clz(a + 1); + int ret = __builtin_mips_shll_s_w(x, shift); + return ret >> shift; + } + return SATURATE_generic(x, a); +} + +#undef SATURATE16 +#define SATURATE16(x) EXTRACT16(SATURATE(x, 32767)) + +#undef SAT16 +#define SAT16(x) EXTRACT16(SATURATE(x, 32767)) + +#undef SIG2WORD16 +static OPUS_INLINE opus_val16 SIG2WORD16(celt_sig x) +{ + x = PSHR32(x, SIG_SHIFT); + return SATURATE16(x); +} + +#elif __mips == 32 + +#undef MULT16_32_Q16 +#define MULT16_32_Q16(a,b) ((opus_val32)SHR((opus_int64)(SHL32((a), 16))*(b),32)) + +#endif + #endif /* CELT_FIXED_GENERIC_MIPSR1_H */ diff --git a/celt/mips/kiss_fft_mipsr1.h b/celt/mips/kiss_fft_mipsr1.h index bdb5df804..215dd0d3c 100644 --- a/celt/mips/kiss_fft_mipsr1.h +++ b/celt/mips/kiss_fft_mipsr1.h @@ -32,17 +32,14 @@ #ifdef FIXED_POINT -#define S_MUL_ADD(a, b, c, d) (S_MUL(a,b)+S_MUL(c,d)) -#define S_MUL_SUB(a, b, c, d) (S_MUL(a,b)-S_MUL(c,d)) +#if __mips == 32 && defined (__mips_dsp) -#undef S_MUL_ADD static inline int S_MUL_ADD(int a, int b, int c, int d) { long long acc = __builtin_mips_mult(a, b); acc = __builtin_mips_madd(acc, c, d); return __builtin_mips_extr_w(acc, 15); } -#undef S_MUL_SUB static inline int S_MUL_SUB(int a, int b, int c, int d) { long long acc = __builtin_mips_mult(a, b); acc = __builtin_mips_msub(acc, c, d); @@ -76,9 +73,66 @@ static inline kiss_fft_cpx C_MULC_fun(kiss_fft_cpx a, kiss_twiddle_cpx b) { return m; } -#endif /* FIXED_POINT */ +#define OVERRIDE_kf_bfly5 + +#elif __mips == 32 && defined(__mips_isa_rev) && __mips_isa_rev < 6 + +static inline int S_MUL_ADD(int a, int b, int c, int d) { + long long acc; + + asm volatile ( + "mult %[a], %[b] \n" + "madd %[c], %[d] \n" + : [acc] "=x"(acc) + : [a] "r"(a), [b] "r"(b), [c] "r"(c), [d] "r"(d) + : + ); + return (int)(acc >> 15); +} + +static inline int S_MUL_SUB(int a, int b, int c, int d) { + long long acc; + + asm volatile ( + "mult %[a], %[b] \n" + "msub %[c], %[d] \n" + : [acc] "=x"(acc) + : [a] "r"(a), [b] "r"(b), [c] "r"(c), [d] "r"(d) + : + ); + return (int)(acc >> 15); +} + +#undef C_MUL +# define C_MUL(m,a,b) (m=C_MUL_fun(a,b)) +static inline kiss_fft_cpx C_MUL_fun(kiss_fft_cpx a, kiss_twiddle_cpx b) { + kiss_fft_cpx m; + + m.r = S_MUL_SUB(a.r, b.r, a.i, b.i); + m.i = S_MUL_ADD(a.r, b.i, a.i, b.r); + + return m; +} + +#undef C_MULC +# define C_MULC(m,a,b) (m=C_MULC_fun(a,b)) +static inline kiss_fft_cpx C_MULC_fun(kiss_fft_cpx a, kiss_twiddle_cpx b) { + kiss_fft_cpx m; + + m.r = S_MUL_ADD(a.r, b.r, a.i, b.i); + m.i = S_MUL_SUB(a.i, b.r, a.r, b.i); + + return m; +} #define OVERRIDE_kf_bfly5 + +#endif + +#endif /* FIXED_POINT */ + +#if defined(OVERRIDE_kf_bfly5) + static void kf_bfly5( kiss_fft_cpx * Fout, const size_t fstride, @@ -157,5 +211,61 @@ static void kf_bfly5( } } +#endif /* defined(OVERRIDE_kf_bfly5) */ + +#define OVERRIDE_fft_downshift +/* Just unroll tight loop, should be ok for any mips */ +static void fft_downshift(kiss_fft_cpx *x, int N, int *total, int step) { + int shift; + shift = IMIN(step, *total); + *total -= shift; + if (shift == 1) { + int i; + for (i = 0; i < N - 1; i += 2) { + x[i].r = SHR32(x[i].r, 1); + x[i].i = SHR32(x[i].i, 1); + x[i+1].r = SHR32(x[i+1].r, 1); + x[i+1].i = SHR32(x[i+1].i, 1); + } + if (N & 1) { + x[i].r = SHR32(x[i].r, 1); + x[i].i = SHR32(x[i].i, 1); + } + } else if (shift > 0) { + int i; + for (i = 0; i < N - 3; i += 4) { + x[i].r = PSHR32(x[i].r, shift); + x[i].i = PSHR32(x[i].i, shift); + x[i+1].r = PSHR32(x[i+1].r, shift); + x[i+1].i = PSHR32(x[i+1].i, shift); + x[i+2].r = PSHR32(x[i+2].r, shift); + x[i+2].i = PSHR32(x[i+2].i, shift); + x[i+3].r = PSHR32(x[i+3].r, shift); + x[i+3].i = PSHR32(x[i+3].i, shift); + } + switch (N & 3) { + case 3: + x[i].r = PSHR32(x[i].r, shift); + x[i].i = PSHR32(x[i].i, shift); + x[i+1].r = PSHR32(x[i+1].r, shift); + x[i+1].i = PSHR32(x[i+1].i, shift); + x[i+2].r = PSHR32(x[i+2].r, shift); + x[i+2].i = PSHR32(x[i+2].i, shift); + break; + case 2: + x[i].r = PSHR32(x[i].r, shift); + x[i].i = PSHR32(x[i].i, shift); + x[i+1].r = PSHR32(x[i+1].r, shift); + x[i+1].i = PSHR32(x[i+1].i, shift); + break; + case 1: + x[i].r = PSHR32(x[i].r, shift); + x[i].i = PSHR32(x[i].i, shift); + break; + case 0: + break; + } + } +} #endif /* KISS_FFT_MIPSR1_H */ diff --git a/celt/mips/mdct_mipsr1.h b/celt/mips/mdct_mipsr1.h index c8accc093..bc349cc25 100644 --- a/celt/mips/mdct_mipsr1.h +++ b/celt/mips/mdct_mipsr1.h @@ -55,6 +55,7 @@ #include "mathops.h" #include "stack_alloc.h" +#if defined (__mips_dsp) static inline int S_MUL_ADD_PSR(int a, int b, int c, int d, int shift) { long long acc = __builtin_mips_mult(a, b); acc = __builtin_mips_madd(acc, c, d); @@ -67,8 +68,45 @@ static inline int S_MUL_SUB_PSR(int a, int b, int c, int d, int shift) { return __builtin_mips_extr_w(acc, 15+shift); } -/* Forward MDCT trashes the input array */ #define OVERRIDE_clt_mdct_forward +#define OVERRIDE_clt_mdct_backward + +#elif defined(__mips_isa_rev) && __mips_isa_rev < 6 + +static inline int S_MUL_ADD_PSR(int a, int b, int c, int d, int shift) { + long long acc; + + asm volatile ( + "mult %[a], %[b] \n" + "madd %[c], %[d] \n" + : [acc] "=x"(acc) + : [a] "r"(a), [b] "r"(b), [c] "r"(c), [d] "r"(d) + : + ); + return (int)(acc >> (15 + shift)); +} + +static inline int S_MUL_SUB_PSR(int a, int b, int c, int d, int shift) { + long long acc; + + asm volatile ( + "mult %[a], %[b] \n" + "msub %[c], %[d] \n" + : [acc] "=x"(acc) + : [a] "r"(a), [b] "r"(b), [c] "r"(c), [d] "r"(d) + : + ); + return (int)(acc >> (15 + shift)); +} + +#define OVERRIDE_clt_mdct_forward +#define OVERRIDE_clt_mdct_backward + +#endif + +#if defined (OVERRIDE_clt_mdct_forward) + +/* Forward MDCT trashes the input array */ void clt_mdct_forward(const mdct_lookup *l, kiss_fft_scalar *in, kiss_fft_scalar * OPUS_RESTRICT out, const celt_coef *window, int overlap, int shift, int stride, int arch) { @@ -213,7 +251,10 @@ void clt_mdct_forward(const mdct_lookup *l, kiss_fft_scalar *in, kiss_fft_scalar RESTORE_STACK; } -#define OVERRIDE_clt_mdct_backward +#endif /* OVERRIDE_clt_mdct_forward */ + +#if defined(OVERRIDE_clt_mdct_backward) + void clt_mdct_backward(const mdct_lookup *l, kiss_fft_scalar *in, kiss_fft_scalar * OPUS_RESTRICT out, const celt_coef * OPUS_RESTRICT window, int overlap, int shift, int stride, int arch) { @@ -336,4 +377,7 @@ void clt_mdct_backward(const mdct_lookup *l, kiss_fft_scalar *in, kiss_fft_scala } } } + +#endif /* OVERRIDE_clt_mdct_backward */ + #endif /* MDCT_MIPSR1_H__ */ diff --git a/celt/mips/pitch_mipsr1.h b/celt/mips/pitch_mipsr1.h index 6cbdd78d3..9b73a126b 100644 --- a/celt/mips/pitch_mipsr1.h +++ b/celt/mips/pitch_mipsr1.h @@ -34,28 +34,223 @@ #ifndef PITCH_MIPSR1_H #define PITCH_MIPSR1_H +#include "fixed_generic_mipsr1.h" + +#if defined (__mips_dsp) && __mips == 32 + +#define accumulator_t opus_int64 +#define MIPS_MAC(acc,a,b) \ + __builtin_mips_madd((acc), (int)(a), (int)(b)) + +#define MIPS_MAC16x16_2X(acc,a2x,b2x) \ + __builtin_mips_dpaq_s_w_ph((acc), (a2x), (b2x)) + +#define OVERRIDE_CELT_INNER_PROD #define OVERRIDE_DUAL_INNER_PROD +#define OVERRIDE_XCORR_KERNEL + +#else /* any other MIPS */ + +/* using madd is slower due to single accumulator */ +#define accumulator_t opus_int32 +#define MIPS_MAC MAC16_16 + +#define OVERRIDE_CELT_INNER_PROD +#define OVERRIDE_DUAL_INNER_PROD +#define OVERRIDE_XCORR_KERNEL + +#endif /* any other MIPS */ + + +#if defined(OVERRIDE_CELT_INNER_PROD) + +static OPUS_INLINE opus_val32 celt_inner_prod(const opus_val16 *x, + const opus_val16 *y, int N, int arch) +{ + int j; + accumulator_t acc = 0; + +#if defined (MIPS_MAC16x16_2X) + const v2i16 *x2x; + const v2i16 *y2x; + int loops; + + /* misaligned */ + if (((long)x | (long)y) & 3) + goto fallback; + + x2x = __builtin_assume_aligned(x, 4); + y2x = __builtin_assume_aligned(y, 4); + loops = N / 8; + for (j = 0; j < loops; j++) + { + acc = MIPS_MAC16x16_2X(acc, x2x[0], y2x[0]); + acc = MIPS_MAC16x16_2X(acc, x2x[1], y2x[1]); + acc = MIPS_MAC16x16_2X(acc, x2x[2], y2x[2]); + acc = MIPS_MAC16x16_2X(acc, x2x[3], y2x[3]); + x2x += 4; y2x += 4; + } + + switch (N & 7) { + case 7: + acc = MIPS_MAC16x16_2X(acc, x2x[0], y2x[0]); + acc = MIPS_MAC16x16_2X(acc, x2x[1], y2x[1]); + acc = MIPS_MAC16x16_2X(acc, x2x[2], y2x[2]); + acc = MIPS_MAC(acc, x[N-1], y[N-1]); + break; + case 6: + acc = MIPS_MAC16x16_2X(acc, x2x[0], y2x[0]); + acc = MIPS_MAC16x16_2X(acc, x2x[1], y2x[1]); + acc = MIPS_MAC16x16_2X(acc, x2x[2], y2x[2]); + break; + case 5: + acc = MIPS_MAC16x16_2X(acc, x2x[0], y2x[0]); + acc = MIPS_MAC16x16_2X(acc, x2x[1], y2x[1]); + acc = MIPS_MAC(acc, x[N-1], y[N-1]); + break; + case 4: + acc = MIPS_MAC16x16_2X(acc, x2x[0], y2x[0]); + acc = MIPS_MAC16x16_2X(acc, x2x[1], y2x[1]); + break; + case 3: + acc = MIPS_MAC16x16_2X(acc, x2x[0], y2x[0]); + acc = MIPS_MAC(acc, x[N-1], y[N-1]); + break; + case 2: + acc = MIPS_MAC16x16_2X(acc, x2x[0], y2x[0]); + break; + case 1: + acc = MIPS_MAC(acc, x[N-1], y[N-1]); + break; + case 0: + break; + } + return __builtin_mips_extr_w(acc, 1); + +fallback: +#endif + for (j = 0; j < N - 3; j += 4) + { + acc = MIPS_MAC(acc, x[j], y[j]); + acc = MIPS_MAC(acc, x[j+1], y[j+1]); + acc = MIPS_MAC(acc, x[j+2], y[j+2]); + acc = MIPS_MAC(acc, x[j+3], y[j+3]); + } + + switch (N & 3) { + case 3: + acc = MIPS_MAC(acc, x[j], y[j]); + acc = MIPS_MAC(acc, x[j+1], y[j+1]); + acc = MIPS_MAC(acc, x[j+2], y[j+2]); + break; + case 2: + acc = MIPS_MAC(acc, x[j], y[j]); + acc = MIPS_MAC(acc, x[j+1], y[j+1]); + break; + case 1: + acc = MIPS_MAC(acc, x[j], y[j]); + break; + case 0: + break; + } + + (void)arch; + + return (opus_val32)acc; +} +#endif /* OVERRIDE_CELT_INNER_PROD */ + +#if defined(OVERRIDE_DUAL_INNER_PROD) static inline void dual_inner_prod(const opus_val16 *x, const opus_val16 *y01, const opus_val16 *y02, int N, opus_val32 *xy1, opus_val32 *xy2, int arch) { int j; - long long acc1 = 0; - long long acc2 = 0; + accumulator_t acc1 = 0; + accumulator_t acc2 = 0; - (void)arch; +#if defined (MIPS_MAC16x16_2X) + const v2i16 *x2x; + const v2i16 *y01_2x; + const v2i16 *y02_2x; + + /* misaligned */ + if (((long)x | (long)y01 | (long)y02) & 3) + goto fallback; + x2x = __builtin_assume_aligned(x, 4); + y01_2x = __builtin_assume_aligned(y01, 4); + y02_2x = __builtin_assume_aligned(y02, 4); + N /= 2; + + for (j = 0; j < N - 3; j += 4) + { + acc1 = MIPS_MAC16x16_2X(acc1, x2x[j], y01_2x[j]); + acc2 = MIPS_MAC16x16_2X(acc2, x2x[j], y02_2x[j]); + acc1 = MIPS_MAC16x16_2X(acc1, x2x[j+1], y01_2x[j+1]); + acc2 = MIPS_MAC16x16_2X(acc2, x2x[j+1], y02_2x[j+1]); + acc1 = MIPS_MAC16x16_2X(acc1, x2x[j+2], y01_2x[j+2]); + acc2 = MIPS_MAC16x16_2X(acc2, x2x[j+2], y02_2x[j+2]); + acc1 = MIPS_MAC16x16_2X(acc1, x2x[j+3], y01_2x[j+3]); + acc2 = MIPS_MAC16x16_2X(acc2, x2x[j+3], y02_2x[j+3]); + } + + switch (N & 3) { + case 3: + acc1 = MIPS_MAC16x16_2X(acc1, x2x[j], y01_2x[j]); + acc2 = MIPS_MAC16x16_2X(acc2, x2x[j], y02_2x[j]); + acc1 = MIPS_MAC16x16_2X(acc1, x2x[j+1], y01_2x[j+1]); + acc2 = MIPS_MAC16x16_2X(acc2, x2x[j+1], y02_2x[j+1]); + acc1 = MIPS_MAC16x16_2X(acc1, x2x[j+2], y01_2x[j+2]); + acc2 = MIPS_MAC16x16_2X(acc2, x2x[j+2], y02_2x[j+2]); + break; + case 2: + acc1 = MIPS_MAC16x16_2X(acc1, x2x[j], y01_2x[j]); + acc2 = MIPS_MAC16x16_2X(acc2, x2x[j], y02_2x[j]); + acc1 = MIPS_MAC16x16_2X(acc1, x2x[j+1], y01_2x[j+1]); + acc2 = MIPS_MAC16x16_2X(acc2, x2x[j+1], y02_2x[j+1]); + break; + case 1: + acc1 = MIPS_MAC16x16_2X(acc1, x2x[j], y01_2x[j]); + acc2 = MIPS_MAC16x16_2X(acc2, x2x[j], y02_2x[j]); + break; + case 0: + break; + } + + *xy1 = __builtin_mips_extr_w(acc1, 1); + *xy2 = __builtin_mips_extr_w(acc2, 1); + return; + +fallback: +#endif /* Compute the norm of X+Y and X-Y as |X|^2 + |Y|^2 +/- sum(xy) */ - for (j=0;j=0;i--) - { - celt_norm x1, x2; - x1 = Xptr[0]; - x2 = Xptr[stride]; - Xptr[stride] = EXTRACT16(PSHR32(MAC16_16(MULT16_16(c, x2), s, x1), 15)); - *Xptr-- = EXTRACT16(PSHR32(MAC16_16(MULT16_16(c, x1), ms, x2), 15)); - } -} - -#define OVERRIDE_renormalise_vector -void renormalise_vector(celt_norm *X, int N, opus_val32 gain, int arch) -{ - int i; -#ifdef FIXED_POINT - int k; -#endif - long long acc = EPSILON; - opus_val32 E; - opus_val16 g; - opus_val32 t; - celt_norm *xptr = X; - int X0, X1; - - (void)arch; - - /*if(N %4) - printf("error");*/ - for (i=0;i>1; -#endif - t = VSHR32(E, 2*(k-7)); - g = MULT32_32_Q31(celt_rsqrt_norm(t),gain); - - xptr = X; - for (i=0;isShape; - opus_int k, i, nSamples, Qnrg, b_Q14, warping_Q16, scale = 0; - opus_int32 SNR_adj_dB_Q7, HarmBoost_Q16, HarmShapeGain_Q16, Tilt_Q16, tmp32; - opus_int32 nrg, pre_nrg_Q30, log_energy_Q7, log_energy_prev_Q7, energy_variation_Q7; - opus_int32 delta_Q16, BWExp1_Q16, BWExp2_Q16, gain_mult_Q16, gain_add_Q16, strength_Q16, b_Q8; - opus_int32 auto_corr[ MAX_SHAPE_LPC_ORDER + 1 ]; - opus_int32 refl_coef_Q16[ MAX_SHAPE_LPC_ORDER ]; - opus_int32 AR1_Q24[ MAX_SHAPE_LPC_ORDER ]; - opus_int32 AR2_Q24[ MAX_SHAPE_LPC_ORDER ]; - VARDECL( opus_int16, x_windowed ); - const opus_int16 *x_ptr, *pitch_res_ptr; - SAVE_STACK; - - /* Point to start of first LPC analysis block */ - x_ptr = x - psEnc->sCmn.la_shape; - - /****************/ - /* GAIN CONTROL */ - /****************/ - SNR_adj_dB_Q7 = psEnc->sCmn.SNR_dB_Q7; - - /* Input quality is the average of the quality in the lowest two VAD bands */ - psEncCtrl->input_quality_Q14 = ( opus_int )silk_RSHIFT( (opus_int32)psEnc->sCmn.input_quality_bands_Q15[ 0 ] - + psEnc->sCmn.input_quality_bands_Q15[ 1 ], 2 ); - - /* Coding quality level, between 0.0_Q0 and 1.0_Q0, but in Q14 */ - psEncCtrl->coding_quality_Q14 = silk_RSHIFT( silk_sigm_Q15( silk_RSHIFT_ROUND( SNR_adj_dB_Q7 - - SILK_FIX_CONST( 20.0, 7 ), 4 ) ), 1 ); - - /* Reduce coding SNR during low speech activity */ - if( psEnc->sCmn.useCBR == 0 ) { - b_Q8 = SILK_FIX_CONST( 1.0, 8 ) - psEnc->sCmn.speech_activity_Q8; - b_Q8 = silk_SMULWB( silk_LSHIFT( b_Q8, 8 ), b_Q8 ); - SNR_adj_dB_Q7 = silk_SMLAWB( SNR_adj_dB_Q7, - silk_SMULBB( SILK_FIX_CONST( -BG_SNR_DECR_dB, 7 ) >> ( 4 + 1 ), b_Q8 ), /* Q11*/ - silk_SMULWB( SILK_FIX_CONST( 1.0, 14 ) + psEncCtrl->input_quality_Q14, psEncCtrl->coding_quality_Q14 ) ); /* Q12*/ - } - - if( psEnc->sCmn.indices.signalType == TYPE_VOICED ) { - /* Reduce gains for periodic signals */ - SNR_adj_dB_Q7 = silk_SMLAWB( SNR_adj_dB_Q7, SILK_FIX_CONST( HARM_SNR_INCR_dB, 8 ), psEnc->LTPCorr_Q15 ); - } else { - /* For unvoiced signals and low-quality input, adjust the quality slower than SNR_dB setting */ - SNR_adj_dB_Q7 = silk_SMLAWB( SNR_adj_dB_Q7, - silk_SMLAWB( SILK_FIX_CONST( 6.0, 9 ), -SILK_FIX_CONST( 0.4, 18 ), psEnc->sCmn.SNR_dB_Q7 ), - SILK_FIX_CONST( 1.0, 14 ) - psEncCtrl->input_quality_Q14 ); - } - - /*************************/ - /* SPARSENESS PROCESSING */ - /*************************/ - /* Set quantizer offset */ - if( psEnc->sCmn.indices.signalType == TYPE_VOICED ) { - /* Initially set to 0; may be overruled in process_gains(..) */ - psEnc->sCmn.indices.quantOffsetType = 0; - psEncCtrl->sparseness_Q8 = 0; - } else { - /* Sparseness measure, based on relative fluctuations of energy per 2 milliseconds */ - nSamples = silk_LSHIFT( psEnc->sCmn.fs_kHz, 1 ); - energy_variation_Q7 = 0; - log_energy_prev_Q7 = 0; - pitch_res_ptr = pitch_res; - for( k = 0; k < silk_SMULBB( SUB_FRAME_LENGTH_MS, psEnc->sCmn.nb_subfr ) / 2; k++ ) { - silk_sum_sqr_shift( &nrg, &scale, pitch_res_ptr, nSamples ); - nrg += silk_RSHIFT( nSamples, scale ); /* Q(-scale)*/ - - log_energy_Q7 = silk_lin2log( nrg ); - if( k > 0 ) { - energy_variation_Q7 += silk_abs( log_energy_Q7 - log_energy_prev_Q7 ); - } - log_energy_prev_Q7 = log_energy_Q7; - pitch_res_ptr += nSamples; - } - - psEncCtrl->sparseness_Q8 = silk_RSHIFT( silk_sigm_Q15( silk_SMULWB( energy_variation_Q7 - - SILK_FIX_CONST( 5.0, 7 ), SILK_FIX_CONST( 0.1, 16 ) ) ), 7 ); - - /* Set quantization offset depending on sparseness measure */ - if( psEncCtrl->sparseness_Q8 > SILK_FIX_CONST( SPARSENESS_THRESHOLD_QNT_OFFSET, 8 ) ) { - psEnc->sCmn.indices.quantOffsetType = 0; - } else { - psEnc->sCmn.indices.quantOffsetType = 1; - } - - /* Increase coding SNR for sparse signals */ - SNR_adj_dB_Q7 = silk_SMLAWB( SNR_adj_dB_Q7, SILK_FIX_CONST( SPARSE_SNR_INCR_dB, 15 ), psEncCtrl->sparseness_Q8 - SILK_FIX_CONST( 0.5, 8 ) ); - } - - /*******************************/ - /* Control bandwidth expansion */ - /*******************************/ - /* More BWE for signals with high prediction gain */ - strength_Q16 = silk_SMULWB( psEncCtrl->predGain_Q16, SILK_FIX_CONST( FIND_PITCH_WHITE_NOISE_FRACTION, 16 ) ); - BWExp1_Q16 = BWExp2_Q16 = silk_DIV32_varQ( SILK_FIX_CONST( BANDWIDTH_EXPANSION, 16 ), - silk_SMLAWW( SILK_FIX_CONST( 1.0, 16 ), strength_Q16, strength_Q16 ), 16 ); - delta_Q16 = silk_SMULWB( SILK_FIX_CONST( 1.0, 16 ) - silk_SMULBB( 3, psEncCtrl->coding_quality_Q14 ), - SILK_FIX_CONST( LOW_RATE_BANDWIDTH_EXPANSION_DELTA, 16 ) ); - BWExp1_Q16 = silk_SUB32( BWExp1_Q16, delta_Q16 ); - BWExp2_Q16 = silk_ADD32( BWExp2_Q16, delta_Q16 ); - /* BWExp1 will be applied after BWExp2, so make it relative */ - BWExp1_Q16 = silk_DIV32_16( silk_LSHIFT( BWExp1_Q16, 14 ), silk_RSHIFT( BWExp2_Q16, 2 ) ); - - if( psEnc->sCmn.warping_Q16 > 0 ) { - /* Slightly more warping in analysis will move quantization noise up in frequency, where it's better masked */ - warping_Q16 = silk_SMLAWB( psEnc->sCmn.warping_Q16, (opus_int32)psEncCtrl->coding_quality_Q14, SILK_FIX_CONST( 0.01, 18 ) ); - } else { - warping_Q16 = 0; - } - - /********************************************/ - /* Compute noise shaping AR coefs and gains */ - /********************************************/ - ALLOC( x_windowed, psEnc->sCmn.shapeWinLength, opus_int16 ); - for( k = 0; k < psEnc->sCmn.nb_subfr; k++ ) { - /* Apply window: sine slope followed by flat part followed by cosine slope */ - opus_int shift, slope_part, flat_part; - flat_part = psEnc->sCmn.fs_kHz * 3; - slope_part = silk_RSHIFT( psEnc->sCmn.shapeWinLength - flat_part, 1 ); - - silk_apply_sine_window( x_windowed, x_ptr, 1, slope_part ); - shift = slope_part; - silk_memcpy( x_windowed + shift, x_ptr + shift, flat_part * sizeof(opus_int16) ); - shift += flat_part; - silk_apply_sine_window( x_windowed + shift, x_ptr + shift, 2, slope_part ); - - /* Update pointer: next LPC analysis block */ - x_ptr += psEnc->sCmn.subfr_length; - - if( psEnc->sCmn.warping_Q16 > 0 ) { - /* Calculate warped auto correlation */ - silk_warped_autocorrelation_FIX( auto_corr, &scale, x_windowed, warping_Q16, psEnc->sCmn.shapeWinLength, psEnc->sCmn.shapingLPCOrder, arch ); - } else { - /* Calculate regular auto correlation */ - silk_autocorr( auto_corr, &scale, x_windowed, psEnc->sCmn.shapeWinLength, psEnc->sCmn.shapingLPCOrder + 1, arch ); - } - - /* Add white noise, as a fraction of energy */ - auto_corr[0] = silk_ADD32( auto_corr[0], silk_max_32( silk_SMULWB( silk_RSHIFT( auto_corr[ 0 ], 4 ), - SILK_FIX_CONST( SHAPE_WHITE_NOISE_FRACTION, 20 ) ), 1 ) ); - - /* Calculate the reflection coefficients using schur */ - nrg = silk_schur64( refl_coef_Q16, auto_corr, psEnc->sCmn.shapingLPCOrder ); - silk_assert( nrg >= 0 ); - - /* Convert reflection coefficients to prediction coefficients */ - silk_k2a_Q16( AR2_Q24, refl_coef_Q16, psEnc->sCmn.shapingLPCOrder ); - - Qnrg = -scale; /* range: -12...30*/ - silk_assert( Qnrg >= -12 ); - silk_assert( Qnrg <= 30 ); - - /* Make sure that Qnrg is an even number */ - if( Qnrg & 1 ) { - Qnrg -= 1; - nrg >>= 1; - } - - tmp32 = silk_SQRT_APPROX( nrg ); - Qnrg >>= 1; /* range: -6...15*/ - - psEncCtrl->Gains_Q16[ k ] = (silk_LSHIFT32( silk_LIMIT( (tmp32), silk_RSHIFT32( silk_int32_MIN, (16 - Qnrg) ), \ - silk_RSHIFT32( silk_int32_MAX, (16 - Qnrg) ) ), (16 - Qnrg) )); - - if( psEnc->sCmn.warping_Q16 > 0 ) { - /* Adjust gain for warping */ - gain_mult_Q16 = warped_gain( AR2_Q24, warping_Q16, psEnc->sCmn.shapingLPCOrder ); - silk_assert( psEncCtrl->Gains_Q16[ k ] >= 0 ); - if ( silk_SMULWW( silk_RSHIFT_ROUND( psEncCtrl->Gains_Q16[ k ], 1 ), gain_mult_Q16 ) >= ( silk_int32_MAX >> 1 ) ) { - psEncCtrl->Gains_Q16[ k ] = silk_int32_MAX; - } else { - psEncCtrl->Gains_Q16[ k ] = silk_SMULWW( psEncCtrl->Gains_Q16[ k ], gain_mult_Q16 ); - } - } - - /* Bandwidth expansion for synthesis filter shaping */ - silk_bwexpander_32( AR2_Q24, psEnc->sCmn.shapingLPCOrder, BWExp2_Q16 ); - - /* Compute noise shaping filter coefficients */ - silk_memcpy( AR1_Q24, AR2_Q24, psEnc->sCmn.shapingLPCOrder * sizeof( opus_int32 ) ); - - /* Bandwidth expansion for analysis filter shaping */ - silk_assert( BWExp1_Q16 <= SILK_FIX_CONST( 1.0, 16 ) ); - silk_bwexpander_32( AR1_Q24, psEnc->sCmn.shapingLPCOrder, BWExp1_Q16 ); - - /* Ratio of prediction gains, in energy domain */ - pre_nrg_Q30 = silk_LPC_inverse_pred_gain_Q24( AR2_Q24, psEnc->sCmn.shapingLPCOrder, arch ); - nrg = silk_LPC_inverse_pred_gain_Q24( AR1_Q24, psEnc->sCmn.shapingLPCOrder, arch ); - - /*psEncCtrl->GainsPre[ k ] = 1.0f - 0.7f * ( 1.0f - pre_nrg / nrg ) = 0.3f + 0.7f * pre_nrg / nrg;*/ - pre_nrg_Q30 = silk_LSHIFT32( silk_SMULWB( pre_nrg_Q30, SILK_FIX_CONST( 0.7, 15 ) ), 1 ); - psEncCtrl->GainsPre_Q14[ k ] = ( opus_int ) SILK_FIX_CONST( 0.3, 14 ) + silk_DIV32_varQ( pre_nrg_Q30, nrg, 14 ); - - /* Convert to monic warped prediction coefficients and limit absolute values */ - limit_warped_coefs( AR2_Q24, AR1_Q24, warping_Q16, SILK_FIX_CONST( 3.999, 24 ), psEnc->sCmn.shapingLPCOrder ); - - /* Convert from Q24 to Q13 and store in int16 */ - for( i = 0; i < psEnc->sCmn.shapingLPCOrder; i++ ) { - psEncCtrl->AR1_Q13[ k * MAX_SHAPE_LPC_ORDER + i ] = (opus_int16)silk_SAT16( silk_RSHIFT_ROUND( AR1_Q24[ i ], 11 ) ); - psEncCtrl->AR2_Q13[ k * MAX_SHAPE_LPC_ORDER + i ] = (opus_int16)silk_SAT16( silk_RSHIFT_ROUND( AR2_Q24[ i ], 11 ) ); - } - } - - /*****************/ - /* Gain tweaking */ - /*****************/ - /* Increase gains during low speech activity and put lower limit on gains */ - gain_mult_Q16 = silk_log2lin( -silk_SMLAWB( -SILK_FIX_CONST( 16.0, 7 ), SNR_adj_dB_Q7, SILK_FIX_CONST( 0.16, 16 ) ) ); - gain_add_Q16 = silk_log2lin( silk_SMLAWB( SILK_FIX_CONST( 16.0, 7 ), SILK_FIX_CONST( MIN_QGAIN_DB, 7 ), SILK_FIX_CONST( 0.16, 16 ) ) ); - silk_assert( gain_mult_Q16 > 0 ); - for( k = 0; k < psEnc->sCmn.nb_subfr; k++ ) { - psEncCtrl->Gains_Q16[ k ] = silk_SMULWW( psEncCtrl->Gains_Q16[ k ], gain_mult_Q16 ); - silk_assert( psEncCtrl->Gains_Q16[ k ] >= 0 ); - psEncCtrl->Gains_Q16[ k ] = silk_ADD_POS_SAT32( psEncCtrl->Gains_Q16[ k ], gain_add_Q16 ); - } - - gain_mult_Q16 = SILK_FIX_CONST( 1.0, 16 ) + silk_RSHIFT_ROUND( silk_MLA( SILK_FIX_CONST( INPUT_TILT, 26 ), - psEncCtrl->coding_quality_Q14, SILK_FIX_CONST( HIGH_RATE_INPUT_TILT, 12 ) ), 10 ); - for( k = 0; k < psEnc->sCmn.nb_subfr; k++ ) { - psEncCtrl->GainsPre_Q14[ k ] = silk_SMULWB( gain_mult_Q16, psEncCtrl->GainsPre_Q14[ k ] ); - } - - /************************************************/ - /* Control low-frequency shaping and noise tilt */ - /************************************************/ - /* Less low frequency shaping for noisy inputs */ - strength_Q16 = silk_MUL( SILK_FIX_CONST( LOW_FREQ_SHAPING, 4 ), silk_SMLAWB( SILK_FIX_CONST( 1.0, 12 ), - SILK_FIX_CONST( LOW_QUALITY_LOW_FREQ_SHAPING_DECR, 13 ), psEnc->sCmn.input_quality_bands_Q15[ 0 ] - SILK_FIX_CONST( 1.0, 15 ) ) ); - strength_Q16 = silk_RSHIFT( silk_MUL( strength_Q16, psEnc->sCmn.speech_activity_Q8 ), 8 ); - if( psEnc->sCmn.indices.signalType == TYPE_VOICED ) { - /* Reduce low frequencies quantization noise for periodic signals, depending on pitch lag */ - /*f = 400; freqz([1, -0.98 + 2e-4 * f], [1, -0.97 + 7e-4 * f], 2^12, Fs); axis([0, 1000, -10, 1])*/ - opus_int fs_kHz_inv = silk_DIV32_16( SILK_FIX_CONST( 0.2, 14 ), psEnc->sCmn.fs_kHz ); - for( k = 0; k < psEnc->sCmn.nb_subfr; k++ ) { - b_Q14 = fs_kHz_inv + silk_DIV32_16( SILK_FIX_CONST( 3.0, 14 ), psEncCtrl->pitchL[ k ] ); - /* Pack two coefficients in one int32 */ - psEncCtrl->LF_shp_Q14[ k ] = silk_LSHIFT( SILK_FIX_CONST( 1.0, 14 ) - b_Q14 - silk_SMULWB( strength_Q16, b_Q14 ), 16 ); - psEncCtrl->LF_shp_Q14[ k ] |= (opus_uint16)( b_Q14 - SILK_FIX_CONST( 1.0, 14 ) ); - } - silk_assert( SILK_FIX_CONST( HARM_HP_NOISE_COEF, 24 ) < SILK_FIX_CONST( 0.5, 24 ) ); /* Guarantees that second argument to SMULWB() is within range of an opus_int16*/ - Tilt_Q16 = - SILK_FIX_CONST( HP_NOISE_COEF, 16 ) - - silk_SMULWB( SILK_FIX_CONST( 1.0, 16 ) - SILK_FIX_CONST( HP_NOISE_COEF, 16 ), - silk_SMULWB( SILK_FIX_CONST( HARM_HP_NOISE_COEF, 24 ), psEnc->sCmn.speech_activity_Q8 ) ); - } else { - b_Q14 = silk_DIV32_16( 21299, psEnc->sCmn.fs_kHz ); /* 1.3_Q0 = 21299_Q14*/ - /* Pack two coefficients in one int32 */ - psEncCtrl->LF_shp_Q14[ 0 ] = silk_LSHIFT( SILK_FIX_CONST( 1.0, 14 ) - b_Q14 - - silk_SMULWB( strength_Q16, silk_SMULWB( SILK_FIX_CONST( 0.6, 16 ), b_Q14 ) ), 16 ); - psEncCtrl->LF_shp_Q14[ 0 ] |= (opus_uint16)( b_Q14 - SILK_FIX_CONST( 1.0, 14 ) ); - for( k = 1; k < psEnc->sCmn.nb_subfr; k++ ) { - psEncCtrl->LF_shp_Q14[ k ] = psEncCtrl->LF_shp_Q14[ 0 ]; - } - Tilt_Q16 = -SILK_FIX_CONST( HP_NOISE_COEF, 16 ); - } - - /****************************/ - /* HARMONIC SHAPING CONTROL */ - /****************************/ - /* Control boosting of harmonic frequencies */ - HarmBoost_Q16 = silk_SMULWB( silk_SMULWB( SILK_FIX_CONST( 1.0, 17 ) - silk_LSHIFT( psEncCtrl->coding_quality_Q14, 3 ), - psEnc->LTPCorr_Q15 ), SILK_FIX_CONST( LOW_RATE_HARMONIC_BOOST, 16 ) ); - - /* More harmonic boost for noisy input signals */ - HarmBoost_Q16 = silk_SMLAWB( HarmBoost_Q16, - SILK_FIX_CONST( 1.0, 16 ) - silk_LSHIFT( psEncCtrl->input_quality_Q14, 2 ), SILK_FIX_CONST( LOW_INPUT_QUALITY_HARMONIC_BOOST, 16 ) ); - - if( USE_HARM_SHAPING && psEnc->sCmn.indices.signalType == TYPE_VOICED ) { - /* More harmonic noise shaping for high bitrates or noisy input */ - HarmShapeGain_Q16 = silk_SMLAWB( SILK_FIX_CONST( HARMONIC_SHAPING, 16 ), - SILK_FIX_CONST( 1.0, 16 ) - silk_SMULWB( SILK_FIX_CONST( 1.0, 18 ) - silk_LSHIFT( psEncCtrl->coding_quality_Q14, 4 ), - psEncCtrl->input_quality_Q14 ), SILK_FIX_CONST( HIGH_RATE_OR_LOW_QUALITY_HARMONIC_SHAPING, 16 ) ); - - /* Less harmonic noise shaping for less periodic signals */ - HarmShapeGain_Q16 = silk_SMULWB( silk_LSHIFT( HarmShapeGain_Q16, 1 ), - silk_SQRT_APPROX( silk_LSHIFT( psEnc->LTPCorr_Q15, 15 ) ) ); - } else { - HarmShapeGain_Q16 = 0; - } - - /*************************/ - /* Smooth over subframes */ - /*************************/ - for( k = 0; k < MAX_NB_SUBFR; k++ ) { - psShapeSt->HarmBoost_smth_Q16 = - silk_SMLAWB( psShapeSt->HarmBoost_smth_Q16, HarmBoost_Q16 - psShapeSt->HarmBoost_smth_Q16, SILK_FIX_CONST( SUBFR_SMTH_COEF, 16 ) ); - psShapeSt->HarmShapeGain_smth_Q16 = - silk_SMLAWB( psShapeSt->HarmShapeGain_smth_Q16, HarmShapeGain_Q16 - psShapeSt->HarmShapeGain_smth_Q16, SILK_FIX_CONST( SUBFR_SMTH_COEF, 16 ) ); - psShapeSt->Tilt_smth_Q16 = - silk_SMLAWB( psShapeSt->Tilt_smth_Q16, Tilt_Q16 - psShapeSt->Tilt_smth_Q16, SILK_FIX_CONST( SUBFR_SMTH_COEF, 16 ) ); - - psEncCtrl->HarmBoost_Q14[ k ] = ( opus_int )silk_RSHIFT_ROUND( psShapeSt->HarmBoost_smth_Q16, 2 ); - psEncCtrl->HarmShapeGain_Q14[ k ] = ( opus_int )silk_RSHIFT_ROUND( psShapeSt->HarmShapeGain_smth_Q16, 2 ); - psEncCtrl->Tilt_Q14[ k ] = ( opus_int )silk_RSHIFT_ROUND( psShapeSt->Tilt_smth_Q16, 2 ); - } - RESTORE_STACK; -} diff --git a/silk/fixed/mips/prefilter_FIX_mipsr1.h b/silk/fixed/mips/prefilter_FIX_mipsr1.h deleted file mode 100644 index e50df1685..000000000 --- a/silk/fixed/mips/prefilter_FIX_mipsr1.h +++ /dev/null @@ -1,184 +0,0 @@ -/*********************************************************************** -Copyright (c) 2006-2011, Skype Limited. All rights reserved. -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: -- Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. -- Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. -- Neither the name of Internet Society, IETF or IETF Trust, nor the -names of specific contributors, may be used to endorse or promote -products derived from this software without specific prior written -permission. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. -***********************************************************************/ -#ifndef __PREFILTER_FIX_MIPSR1_H__ -#define __PREFILTER_FIX_MIPSR1_H__ - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#include "main_FIX.h" -#include "stack_alloc.h" -#include "tuning_parameters.h" - -#define OVERRIDE_silk_warped_LPC_analysis_filter_FIX -void silk_warped_LPC_analysis_filter_FIX( - opus_int32 state[], /* I/O State [order + 1] */ - opus_int32 res_Q2[], /* O Residual signal [length] */ - const opus_int16 coef_Q13[], /* I Coefficients [order] */ - const opus_int16 input[], /* I Input signal [length] */ - const opus_int16 lambda_Q16, /* I Warping factor */ - const opus_int length, /* I Length of input signal */ - const opus_int order, /* I Filter order (even) */ - int arch -) -{ - opus_int n, i; - opus_int32 acc_Q11, acc_Q22, tmp1, tmp2, tmp3, tmp4; - opus_int32 state_cur, state_next; - - (void)arch; - - /* Order must be even */ - /* Length must be even */ - - silk_assert( ( order & 1 ) == 0 ); - silk_assert( ( length & 1 ) == 0 ); - - for( n = 0; n < length; n+=2 ) { - /* Output of lowpass section */ - tmp2 = silk_SMLAWB( state[ 0 ], state[ 1 ], lambda_Q16 ); - state_cur = silk_LSHIFT( input[ n ], 14 ); - /* Output of allpass section */ - tmp1 = silk_SMLAWB( state[ 1 ], state[ 2 ] - tmp2, lambda_Q16 ); - state_next = tmp2; - acc_Q11 = silk_RSHIFT( order, 1 ); - acc_Q11 = silk_SMLAWB( acc_Q11, tmp2, coef_Q13[ 0 ] ); - - - /* Output of lowpass section */ - tmp4 = silk_SMLAWB( state_cur, state_next, lambda_Q16 ); - state[ 0 ] = silk_LSHIFT( input[ n+1 ], 14 ); - /* Output of allpass section */ - tmp3 = silk_SMLAWB( state_next, tmp1 - tmp4, lambda_Q16 ); - state[ 1 ] = tmp4; - acc_Q22 = silk_RSHIFT( order, 1 ); - acc_Q22 = silk_SMLAWB( acc_Q22, tmp4, coef_Q13[ 0 ] ); - - /* Loop over allpass sections */ - for( i = 2; i < order; i += 2 ) { - /* Output of allpass section */ - tmp2 = silk_SMLAWB( state[ i ], state[ i + 1 ] - tmp1, lambda_Q16 ); - state_cur = tmp1; - acc_Q11 = silk_SMLAWB( acc_Q11, tmp1, coef_Q13[ i - 1 ] ); - /* Output of allpass section */ - tmp1 = silk_SMLAWB( state[ i + 1 ], state[ i + 2 ] - tmp2, lambda_Q16 ); - state_next = tmp2; - acc_Q11 = silk_SMLAWB( acc_Q11, tmp2, coef_Q13[ i ] ); - - - /* Output of allpass section */ - tmp4 = silk_SMLAWB( state_cur, state_next - tmp3, lambda_Q16 ); - state[ i ] = tmp3; - acc_Q22 = silk_SMLAWB( acc_Q22, tmp3, coef_Q13[ i - 1 ] ); - /* Output of allpass section */ - tmp3 = silk_SMLAWB( state_next, tmp1 - tmp4, lambda_Q16 ); - state[ i + 1 ] = tmp4; - acc_Q22 = silk_SMLAWB( acc_Q22, tmp4, coef_Q13[ i ] ); - } - acc_Q11 = silk_SMLAWB( acc_Q11, tmp1, coef_Q13[ order - 1 ] ); - res_Q2[ n ] = silk_LSHIFT( (opus_int32)input[ n ], 2 ) - silk_RSHIFT_ROUND( acc_Q11, 9 ); - - state[ order ] = tmp3; - acc_Q22 = silk_SMLAWB( acc_Q22, tmp3, coef_Q13[ order - 1 ] ); - res_Q2[ n+1 ] = silk_LSHIFT( (opus_int32)input[ n+1 ], 2 ) - silk_RSHIFT_ROUND( acc_Q22, 9 ); - } -} - - - -/* Prefilter for finding Quantizer input signal */ -#define OVERRIDE_silk_prefilt_FIX -static inline void silk_prefilt_FIX( - silk_prefilter_state_FIX *P, /* I/O state */ - opus_int32 st_res_Q12[], /* I short term residual signal */ - opus_int32 xw_Q3[], /* O prefiltered signal */ - opus_int32 HarmShapeFIRPacked_Q12, /* I Harmonic shaping coefficients */ - opus_int Tilt_Q14, /* I Tilt shaping coefficients */ - opus_int32 LF_shp_Q14, /* I Low-frequancy shaping coefficients */ - opus_int lag, /* I Lag for harmonic shaping */ - opus_int length /* I Length of signals */ -) -{ - opus_int i, idx, LTP_shp_buf_idx; - opus_int32 n_LTP_Q12, n_Tilt_Q10, n_LF_Q10; - opus_int32 sLF_MA_shp_Q12, sLF_AR_shp_Q12; - opus_int16 *LTP_shp_buf; - - /* To speed up use temp variables instead of using the struct */ - LTP_shp_buf = P->sLTP_shp; - LTP_shp_buf_idx = P->sLTP_shp_buf_idx; - sLF_AR_shp_Q12 = P->sLF_AR_shp_Q12; - sLF_MA_shp_Q12 = P->sLF_MA_shp_Q12; - - if( lag > 0 ) { - for( i = 0; i < length; i++ ) { - /* unrolled loop */ - silk_assert( HARM_SHAPE_FIR_TAPS == 3 ); - idx = lag + LTP_shp_buf_idx; - n_LTP_Q12 = silk_SMULBB( LTP_shp_buf[ ( idx - HARM_SHAPE_FIR_TAPS / 2 - 1) & LTP_MASK ], HarmShapeFIRPacked_Q12 ); - n_LTP_Q12 = silk_SMLABT( n_LTP_Q12, LTP_shp_buf[ ( idx - HARM_SHAPE_FIR_TAPS / 2 ) & LTP_MASK ], HarmShapeFIRPacked_Q12 ); - n_LTP_Q12 = silk_SMLABB( n_LTP_Q12, LTP_shp_buf[ ( idx - HARM_SHAPE_FIR_TAPS / 2 + 1) & LTP_MASK ], HarmShapeFIRPacked_Q12 ); - - n_Tilt_Q10 = silk_SMULWB( sLF_AR_shp_Q12, Tilt_Q14 ); - n_LF_Q10 = silk_SMLAWB( silk_SMULWT( sLF_AR_shp_Q12, LF_shp_Q14 ), sLF_MA_shp_Q12, LF_shp_Q14 ); - - sLF_AR_shp_Q12 = silk_SUB32( st_res_Q12[ i ], silk_LSHIFT( n_Tilt_Q10, 2 ) ); - sLF_MA_shp_Q12 = silk_SUB32( sLF_AR_shp_Q12, silk_LSHIFT( n_LF_Q10, 2 ) ); - - LTP_shp_buf_idx = ( LTP_shp_buf_idx - 1 ) & LTP_MASK; - LTP_shp_buf[ LTP_shp_buf_idx ] = (opus_int16)silk_SAT16( silk_RSHIFT_ROUND( sLF_MA_shp_Q12, 12 ) ); - - xw_Q3[i] = silk_RSHIFT_ROUND( silk_SUB32( sLF_MA_shp_Q12, n_LTP_Q12 ), 9 ); - } - } - else - { - for( i = 0; i < length; i++ ) { - - n_LTP_Q12 = 0; - - n_Tilt_Q10 = silk_SMULWB( sLF_AR_shp_Q12, Tilt_Q14 ); - n_LF_Q10 = silk_SMLAWB( silk_SMULWT( sLF_AR_shp_Q12, LF_shp_Q14 ), sLF_MA_shp_Q12, LF_shp_Q14 ); - - sLF_AR_shp_Q12 = silk_SUB32( st_res_Q12[ i ], silk_LSHIFT( n_Tilt_Q10, 2 ) ); - sLF_MA_shp_Q12 = silk_SUB32( sLF_AR_shp_Q12, silk_LSHIFT( n_LF_Q10, 2 ) ); - - LTP_shp_buf_idx = ( LTP_shp_buf_idx - 1 ) & LTP_MASK; - LTP_shp_buf[ LTP_shp_buf_idx ] = (opus_int16)silk_SAT16( silk_RSHIFT_ROUND( sLF_MA_shp_Q12, 12 ) ); - - xw_Q3[i] = silk_RSHIFT_ROUND( sLF_MA_shp_Q12, 9 ); - } - } - - /* Copy temp variable back to state */ - P->sLF_AR_shp_Q12 = sLF_AR_shp_Q12; - P->sLF_MA_shp_Q12 = sLF_MA_shp_Q12; - P->sLTP_shp_buf_idx = LTP_shp_buf_idx; -} - -#endif /* __PREFILTER_FIX_MIPSR1_H__ */ diff --git a/silk/fixed/mips/warped_autocorrelation_FIX_mipsr1.h b/silk/fixed/mips/warped_autocorrelation_FIX_mipsr1.h index 66eb2ed26..9d9cb0551 100644 --- a/silk/fixed/mips/warped_autocorrelation_FIX_mipsr1.h +++ b/silk/fixed/mips/warped_autocorrelation_FIX_mipsr1.h @@ -34,14 +34,36 @@ POSSIBILITY OF SUCH DAMAGE. #include "main_FIX.h" +#if defined (__mips_dsp) && __mips == 32 + +#define MIPS_MADD __builtin_mips_madd +#define MIPS_SHILO __builtin_mips_shilo + +#define OVERRIDE_silk_warped_autocorrelation_FIX_c + +#elif defined (__mips_isa_rev) && __mips == 32 + +static inline long long MIPS_MADD(long long acc, int a, int b) { + return acc + (long long)a * b; +} + +static inline long long MIPS_SHILO(long long acc, int sh) { + return (sh >= 0) ? (acc >> sh) : (acc << -sh); +} + +#define OVERRIDE_silk_warped_autocorrelation_FIX_c + +#endif + +/* Autocorrelations for a warped frequency axis */ +#if defined (OVERRIDE_silk_warped_autocorrelation_FIX_c) + #undef QC #define QC 10 #undef QS #define QS 14 -/* Autocorrelations for a warped frequency axis */ -#define OVERRIDE_silk_warped_autocorrelation_FIX_c void silk_warped_autocorrelation_FIX_c( opus_int32 *corr, /* O Result [order + 1] */ opus_int *scale, /* O Scaling of the correlation vector */ @@ -79,39 +101,39 @@ void silk_warped_autocorrelation_FIX_c( for( i = 0; i < order; i += 2 ) { /* Output of allpass section */ tmp2_QS = silk_SMLAWB( state_QS[ i ], state_QS[ i + 1 ] - tmp1_QS, warping_Q16 ); - corr_QC[ i ] = __builtin_mips_madd( corr_QC[ i ], tmp1_QS, start_1); + corr_QC[ i ] = MIPS_MADD( corr_QC[ i ], tmp1_QS, start_1); tmp4_QS = silk_SMLAWB( tmp1_QS, tmp2_QS - tmp3_QS, warping_Q16 ); - corr_QC[ i ] = __builtin_mips_madd( corr_QC[ i ], tmp3_QS, start_2); + corr_QC[ i ] = MIPS_MADD( corr_QC[ i ], tmp3_QS, start_2); tmp6_QS = silk_SMLAWB( tmp3_QS, tmp4_QS - tmp5_QS, warping_Q16 ); - corr_QC[ i ] = __builtin_mips_madd( corr_QC[ i ], tmp5_QS, start_3); + corr_QC[ i ] = MIPS_MADD( corr_QC[ i ], tmp5_QS, start_3); tmp8_QS = silk_SMLAWB( tmp5_QS, tmp6_QS - tmp7_QS, warping_Q16 ); state_QS[ i ] = tmp7_QS; - corr_QC[ i ] = __builtin_mips_madd( corr_QC[ i ], tmp7_QS, state_QS[0]); + corr_QC[ i ] = MIPS_MADD( corr_QC[ i ], tmp7_QS, state_QS[0]); /* Output of allpass section */ tmp1_QS = silk_SMLAWB( state_QS[ i + 1 ], state_QS[ i + 2 ] - tmp2_QS, warping_Q16 ); - corr_QC[ i+1 ] = __builtin_mips_madd( corr_QC[ i+1 ], tmp2_QS, start_1); + corr_QC[ i+1 ] = MIPS_MADD( corr_QC[ i+1 ], tmp2_QS, start_1); tmp3_QS = silk_SMLAWB( tmp2_QS, tmp1_QS - tmp4_QS, warping_Q16 ); - corr_QC[ i+1 ] = __builtin_mips_madd( corr_QC[ i+1 ], tmp4_QS, start_2); + corr_QC[ i+1 ] = MIPS_MADD( corr_QC[ i+1 ], tmp4_QS, start_2); tmp5_QS = silk_SMLAWB( tmp4_QS, tmp3_QS - tmp6_QS, warping_Q16 ); - corr_QC[ i+1 ] = __builtin_mips_madd( corr_QC[ i+1 ], tmp6_QS, start_3); + corr_QC[ i+1 ] = MIPS_MADD( corr_QC[ i+1 ], tmp6_QS, start_3); tmp7_QS = silk_SMLAWB( tmp6_QS, tmp5_QS - tmp8_QS, warping_Q16 ); state_QS[ i + 1 ] = tmp8_QS; - corr_QC[ i+1 ] = __builtin_mips_madd( corr_QC[ i+1 ], tmp8_QS, state_QS[ 0 ]); + corr_QC[ i+1 ] = MIPS_MADD( corr_QC[ i+1 ], tmp8_QS, state_QS[ 0 ]); } state_QS[ order ] = tmp7_QS; - corr_QC[ order ] = __builtin_mips_madd( corr_QC[ order ], tmp1_QS, start_1); - corr_QC[ order ] = __builtin_mips_madd( corr_QC[ order ], tmp3_QS, start_2); - corr_QC[ order ] = __builtin_mips_madd( corr_QC[ order ], tmp5_QS, start_3); - corr_QC[ order ] = __builtin_mips_madd( corr_QC[ order ], tmp7_QS, state_QS[ 0 ]); + corr_QC[ order ] = MIPS_MADD( corr_QC[ order ], tmp1_QS, start_1); + corr_QC[ order ] = MIPS_MADD( corr_QC[ order ], tmp3_QS, start_2); + corr_QC[ order ] = MIPS_MADD( corr_QC[ order ], tmp5_QS, start_3); + corr_QC[ order ] = MIPS_MADD( corr_QC[ order ], tmp7_QS, state_QS[ 0 ]); } for(;n< length; n++ ) { @@ -124,19 +146,19 @@ void silk_warped_autocorrelation_FIX_c( /* Output of allpass section */ tmp2_QS = silk_SMLAWB( state_QS[ i ], state_QS[ i + 1 ] - tmp1_QS, warping_Q16 ); state_QS[ i ] = tmp1_QS; - corr_QC[ i ] = __builtin_mips_madd( corr_QC[ i ], tmp1_QS, state_QS[ 0 ]); + corr_QC[ i ] = MIPS_MADD( corr_QC[ i ], tmp1_QS, state_QS[ 0 ]); /* Output of allpass section */ tmp1_QS = silk_SMLAWB( state_QS[ i + 1 ], state_QS[ i + 2 ] - tmp2_QS, warping_Q16 ); state_QS[ i + 1 ] = tmp2_QS; - corr_QC[ i+1 ] = __builtin_mips_madd( corr_QC[ i+1 ], tmp2_QS, state_QS[ 0 ]); + corr_QC[ i+1 ] = MIPS_MADD( corr_QC[ i+1 ], tmp2_QS, state_QS[ 0 ]); } state_QS[ order ] = tmp1_QS; - corr_QC[ order ] = __builtin_mips_madd( corr_QC[ order ], tmp1_QS, state_QS[ 0 ]); + corr_QC[ order ] = MIPS_MADD( corr_QC[ order ], tmp1_QS, state_QS[ 0 ]); } temp64 = corr_QC[ 0 ]; - temp64 = __builtin_mips_shilo(temp64, val); + temp64 = MIPS_SHILO(temp64, val); lsh = silk_CLZ64( temp64 ) - 35; lsh = silk_LIMIT( lsh, -12 - QC, 30 - QC ); @@ -145,21 +167,23 @@ void silk_warped_autocorrelation_FIX_c( if( lsh >= 0 ) { for( i = 0; i < order + 1; i++ ) { temp64 = corr_QC[ i ]; - //temp64 = __builtin_mips_shilo(temp64, val); + //temp64 = MIPS_SHILO(temp64, val); temp64 = (val >= 0) ? (temp64 >> val) : (temp64 << -val); - corr[ i ] = (opus_int32)silk_CHECK_FIT32( __builtin_mips_shilo( temp64, -lsh ) ); + corr[ i ] = (opus_int32)silk_CHECK_FIT32( MIPS_SHILO( temp64, -lsh ) ); } } else { for( i = 0; i < order + 1; i++ ) { temp64 = corr_QC[ i ]; - //temp64 = __builtin_mips_shilo(temp64, val); + //temp64 = MIPS_SHILO(temp64, val); temp64 = (val >= 0) ? (temp64 >> val) : (temp64 << -val); - corr[ i ] = (opus_int32)silk_CHECK_FIT32( __builtin_mips_shilo( temp64, -lsh ) ); + corr[ i ] = (opus_int32)silk_CHECK_FIT32( MIPS_SHILO( temp64, -lsh ) ); } } - corr_QC[ 0 ] = __builtin_mips_shilo(corr_QC[ 0 ], val); + corr_QC[ 0 ] = MIPS_SHILO(corr_QC[ 0 ], val); silk_assert( corr_QC[ 0 ] >= 0 ); /* If breaking, decrease QC*/ } +#endif /* OVERRIDE_silk_warped_autocorrelation_FIX_c */ + #endif /* __WARPED_AUTOCORRELATION_FIX_MIPSR1_H__ */ diff --git a/silk/fixed/noise_shape_analysis_FIX.c b/silk/fixed/noise_shape_analysis_FIX.c index d90d7ca58..49ea59e99 100644 --- a/silk/fixed/noise_shape_analysis_FIX.c +++ b/silk/fixed/noise_shape_analysis_FIX.c @@ -128,15 +128,9 @@ static OPUS_INLINE void limit_warped_coefs( silk_assert( 0 ); } -/* Disable MIPS DSP version until it's updated. */ -#if 0 && defined(__mips_dsp) && __mips == 32 -#include "mips/noise_shape_analysis_FIX_mipsr1.h" -#endif - /**************************************************************/ /* Compute noise shaping coefficients and initial gain values */ /**************************************************************/ -#ifndef OVERRIDE_silk_noise_shape_analysis_FIX void silk_noise_shape_analysis_FIX( silk_encoder_state_FIX *psEnc, /* I/O Encoder state FIX */ silk_encoder_control_FIX *psEncCtrl, /* I/O Encoder control FIX */ @@ -404,4 +398,3 @@ void silk_noise_shape_analysis_FIX( } RESTORE_STACK; } -#endif /* OVERRIDE_silk_noise_shape_analysis_FIX */ diff --git a/silk/fixed/warped_autocorrelation_FIX.c b/silk/fixed/warped_autocorrelation_FIX.c index e185df59c..00bd9ff64 100644 --- a/silk/fixed/warped_autocorrelation_FIX.c +++ b/silk/fixed/warped_autocorrelation_FIX.c @@ -31,7 +31,7 @@ POSSIBILITY OF SUCH DAMAGE. #include "main_FIX.h" -#if defined(__mips_dsp) && __mips == 32 +#if defined(__mips) #include "mips/warped_autocorrelation_FIX_mipsr1.h" #endif diff --git a/silk/macros.h b/silk/macros.h index 099f84e51..26cfeb61a 100644 --- a/silk/macros.h +++ b/silk/macros.h @@ -104,7 +104,7 @@ POSSIBILITY OF SUCH DAMAGE. (( (a) & ((b)^0x80000000) & 0x80000000) ? silk_int32_MIN : (a)-(b)) : \ ((((a)^0x80000000) & (b) & 0x80000000) ? silk_int32_MAX : (a)-(b)) ) -#if defined(FIXED_POINT) && defined(__mips_dsp) && __mips == 32 +#if defined(FIXED_POINT) && defined(__mips) #include "mips/macros_mipsr1.h" #endif diff --git a/silk/mips/NSQ_del_dec_mipsr1.h b/silk/mips/NSQ_del_dec_mipsr1.h deleted file mode 100644 index 26e0df672..000000000 --- a/silk/mips/NSQ_del_dec_mipsr1.h +++ /dev/null @@ -1,410 +0,0 @@ -/*********************************************************************** -Copyright (c) 2006-2011, Skype Limited. All rights reserved. -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: -- Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. -- Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. -- Neither the name of Internet Society, IETF or IETF Trust, nor the -names of specific contributors, may be used to endorse or promote -products derived from this software without specific prior written -permission. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. -***********************************************************************/ - -#ifndef NSQ_DEL_DEC_MIPSR1_H__ -#define NSQ_DEL_DEC_MIPSR1_H__ - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#include "main.h" -#include "stack_alloc.h" - -#define OVERRIDE_silk_noise_shape_quantizer_del_dec -static inline void silk_noise_shape_quantizer_del_dec( - silk_nsq_state *NSQ, /* I/O NSQ state */ - NSQ_del_dec_struct psDelDec[], /* I/O Delayed decision states */ - opus_int signalType, /* I Signal type */ - const opus_int32 x_Q10[], /* I */ - opus_int8 pulses[], /* O */ - opus_int16 xq[], /* O */ - opus_int32 sLTP_Q15[], /* I/O LTP filter state */ - opus_int32 delayedGain_Q10[], /* I/O Gain delay buffer */ - const opus_int16 a_Q12[], /* I Short term prediction coefs */ - const opus_int16 b_Q14[], /* I Long term prediction coefs */ - const opus_int16 AR_shp_Q13[], /* I Noise shaping coefs */ - opus_int lag, /* I Pitch lag */ - opus_int32 HarmShapeFIRPacked_Q14, /* I */ - opus_int Tilt_Q14, /* I Spectral tilt */ - opus_int32 LF_shp_Q14, /* I */ - opus_int32 Gain_Q16, /* I */ - opus_int Lambda_Q10, /* I */ - opus_int offset_Q10, /* I */ - opus_int length, /* I Input length */ - opus_int subfr, /* I Subframe number */ - opus_int shapingLPCOrder, /* I Shaping LPC filter order */ - opus_int predictLPCOrder, /* I Prediction filter order */ - opus_int warping_Q16, /* I */ - opus_int nStatesDelayedDecision, /* I Number of states in decision tree */ - opus_int *smpl_buf_idx, /* I/O Index to newest samples in buffers */ - opus_int decisionDelay, /* I */ - int arch /* I */ -) -{ - opus_int i, j, k, Winner_ind, RDmin_ind, RDmax_ind, last_smple_idx; - opus_int32 Winner_rand_state; - opus_int32 LTP_pred_Q14, LPC_pred_Q14, n_AR_Q14, n_LTP_Q14; - opus_int32 n_LF_Q14, r_Q10, rr_Q10, rd1_Q10, rd2_Q10, RDmin_Q10, RDmax_Q10; - opus_int32 q1_Q0, q1_Q10, q2_Q10, exc_Q14, LPC_exc_Q14, xq_Q14, Gain_Q10; - opus_int32 tmp1, tmp2, sLF_AR_shp_Q14; - opus_int32 *pred_lag_ptr, *shp_lag_ptr, *psLPC_Q14; - NSQ_sample_struct psSampleState[ MAX_DEL_DEC_STATES ][ 2 ]; - NSQ_del_dec_struct *psDD; - NSQ_sample_struct *psSS; - opus_int16 b_Q14_0, b_Q14_1, b_Q14_2, b_Q14_3, b_Q14_4; - opus_int16 a_Q12_0, a_Q12_1, a_Q12_2, a_Q12_3, a_Q12_4, a_Q12_5, a_Q12_6; - opus_int16 a_Q12_7, a_Q12_8, a_Q12_9, a_Q12_10, a_Q12_11, a_Q12_12, a_Q12_13; - opus_int16 a_Q12_14, a_Q12_15; - - opus_int32 cur, prev, next; - - /*Unused.*/ - (void)arch; - - //Initialize b_Q14 variables - b_Q14_0 = b_Q14[ 0 ]; - b_Q14_1 = b_Q14[ 1 ]; - b_Q14_2 = b_Q14[ 2 ]; - b_Q14_3 = b_Q14[ 3 ]; - b_Q14_4 = b_Q14[ 4 ]; - - //Initialize a_Q12 variables - a_Q12_0 = a_Q12[0]; - a_Q12_1 = a_Q12[1]; - a_Q12_2 = a_Q12[2]; - a_Q12_3 = a_Q12[3]; - a_Q12_4 = a_Q12[4]; - a_Q12_5 = a_Q12[5]; - a_Q12_6 = a_Q12[6]; - a_Q12_7 = a_Q12[7]; - a_Q12_8 = a_Q12[8]; - a_Q12_9 = a_Q12[9]; - a_Q12_10 = a_Q12[10]; - a_Q12_11 = a_Q12[11]; - a_Q12_12 = a_Q12[12]; - a_Q12_13 = a_Q12[13]; - a_Q12_14 = a_Q12[14]; - a_Q12_15 = a_Q12[15]; - - long long temp64; - - silk_assert( nStatesDelayedDecision > 0 ); - - shp_lag_ptr = &NSQ->sLTP_shp_Q14[ NSQ->sLTP_shp_buf_idx - lag + HARM_SHAPE_FIR_TAPS / 2 ]; - pred_lag_ptr = &sLTP_Q15[ NSQ->sLTP_buf_idx - lag + LTP_ORDER / 2 ]; - Gain_Q10 = silk_RSHIFT( Gain_Q16, 6 ); - - for( i = 0; i < length; i++ ) { - /* Perform common calculations used in all states */ - - /* Long-term prediction */ - if( signalType == TYPE_VOICED ) { - /* Unrolled loop */ - /* Avoids introducing a bias because silk_SMLAWB() always rounds to -inf */ - temp64 = __builtin_mips_mult(pred_lag_ptr[ 0 ], b_Q14_0 ); - temp64 = __builtin_mips_madd( temp64, pred_lag_ptr[ -1 ], b_Q14_1 ); - temp64 = __builtin_mips_madd( temp64, pred_lag_ptr[ -2 ], b_Q14_2 ); - temp64 = __builtin_mips_madd( temp64, pred_lag_ptr[ -3 ], b_Q14_3 ); - temp64 = __builtin_mips_madd( temp64, pred_lag_ptr[ -4 ], b_Q14_4 ); - temp64 += 32768; - LTP_pred_Q14 = __builtin_mips_extr_w(temp64, 16); - LTP_pred_Q14 = silk_LSHIFT( LTP_pred_Q14, 1 ); /* Q13 -> Q14 */ - pred_lag_ptr++; - } else { - LTP_pred_Q14 = 0; - } - - /* Long-term shaping */ - if( lag > 0 ) { - /* Symmetric, packed FIR coefficients */ - n_LTP_Q14 = silk_SMULWB( silk_ADD32( shp_lag_ptr[ 0 ], shp_lag_ptr[ -2 ] ), HarmShapeFIRPacked_Q14 ); - n_LTP_Q14 = silk_SMLAWT( n_LTP_Q14, shp_lag_ptr[ -1 ], HarmShapeFIRPacked_Q14 ); - n_LTP_Q14 = silk_SUB_LSHIFT32( LTP_pred_Q14, n_LTP_Q14, 2 ); /* Q12 -> Q14 */ - shp_lag_ptr++; - } else { - n_LTP_Q14 = 0; - } - - for( k = 0; k < nStatesDelayedDecision; k++ ) { - /* Delayed decision state */ - psDD = &psDelDec[ k ]; - - /* Sample state */ - psSS = psSampleState[ k ]; - - /* Generate dither */ - psDD->Seed = silk_RAND( psDD->Seed ); - - /* Pointer used in short term prediction and shaping */ - psLPC_Q14 = &psDD->sLPC_Q14[ NSQ_LPC_BUF_LENGTH - 1 + i ]; - /* Short-term prediction */ - silk_assert( predictLPCOrder == 10 || predictLPCOrder == 16 ); - temp64 = __builtin_mips_mult(psLPC_Q14[ 0 ], a_Q12_0 ); - temp64 = __builtin_mips_madd( temp64, psLPC_Q14[ -1 ], a_Q12_1 ); - temp64 = __builtin_mips_madd( temp64, psLPC_Q14[ -2 ], a_Q12_2 ); - temp64 = __builtin_mips_madd( temp64, psLPC_Q14[ -3 ], a_Q12_3 ); - temp64 = __builtin_mips_madd( temp64, psLPC_Q14[ -4 ], a_Q12_4 ); - temp64 = __builtin_mips_madd( temp64, psLPC_Q14[ -5 ], a_Q12_5 ); - temp64 = __builtin_mips_madd( temp64, psLPC_Q14[ -6 ], a_Q12_6 ); - temp64 = __builtin_mips_madd( temp64, psLPC_Q14[ -7 ], a_Q12_7 ); - temp64 = __builtin_mips_madd( temp64, psLPC_Q14[ -8 ], a_Q12_8 ); - temp64 = __builtin_mips_madd( temp64, psLPC_Q14[ -9 ], a_Q12_9 ); - if( predictLPCOrder == 16 ) { - temp64 = __builtin_mips_madd( temp64, psLPC_Q14[ -10 ], a_Q12_10 ); - temp64 = __builtin_mips_madd( temp64, psLPC_Q14[ -11 ], a_Q12_11 ); - temp64 = __builtin_mips_madd( temp64, psLPC_Q14[ -12 ], a_Q12_12 ); - temp64 = __builtin_mips_madd( temp64, psLPC_Q14[ -13 ], a_Q12_13 ); - temp64 = __builtin_mips_madd( temp64, psLPC_Q14[ -14 ], a_Q12_14 ); - temp64 = __builtin_mips_madd( temp64, psLPC_Q14[ -15 ], a_Q12_15 ); - } - temp64 += 32768; - LPC_pred_Q14 = __builtin_mips_extr_w(temp64, 16); - - LPC_pred_Q14 = silk_LSHIFT( LPC_pred_Q14, 4 ); /* Q10 -> Q14 */ - - /* Noise shape feedback */ - silk_assert( ( shapingLPCOrder & 1 ) == 0 ); /* check that order is even */ - /* Output of lowpass section */ - tmp2 = silk_SMLAWB( psLPC_Q14[ 0 ], psDD->sAR2_Q14[ 0 ], warping_Q16 ); - /* Output of allpass section */ - tmp1 = silk_SMLAWB( psDD->sAR2_Q14[ 0 ], psDD->sAR2_Q14[ 1 ] - tmp2, warping_Q16 ); - psDD->sAR2_Q14[ 0 ] = tmp2; - - temp64 = __builtin_mips_mult(tmp2, AR_shp_Q13[ 0 ] ); - - prev = psDD->sAR2_Q14[ 1 ]; - - /* Loop over allpass sections */ - for( j = 2; j < shapingLPCOrder; j += 2 ) { - cur = psDD->sAR2_Q14[ j ]; - next = psDD->sAR2_Q14[ j+1 ]; - /* Output of allpass section */ - tmp2 = silk_SMLAWB( prev, cur - tmp1, warping_Q16 ); - psDD->sAR2_Q14[ j - 1 ] = tmp1; - temp64 = __builtin_mips_madd( temp64, tmp1, AR_shp_Q13[ j - 1 ] ); - temp64 = __builtin_mips_madd( temp64, tmp2, AR_shp_Q13[ j ] ); - /* Output of allpass section */ - tmp1 = silk_SMLAWB( cur, next - tmp2, warping_Q16 ); - psDD->sAR2_Q14[ j + 0 ] = tmp2; - prev = next; - } - psDD->sAR2_Q14[ shapingLPCOrder - 1 ] = tmp1; - temp64 = __builtin_mips_madd( temp64, tmp1, AR_shp_Q13[ shapingLPCOrder - 1 ] ); - temp64 += 32768; - n_AR_Q14 = __builtin_mips_extr_w(temp64, 16); - n_AR_Q14 = silk_LSHIFT( n_AR_Q14, 1 ); /* Q11 -> Q12 */ - n_AR_Q14 = silk_SMLAWB( n_AR_Q14, psDD->LF_AR_Q14, Tilt_Q14 ); /* Q12 */ - n_AR_Q14 = silk_LSHIFT( n_AR_Q14, 2 ); /* Q12 -> Q14 */ - - n_LF_Q14 = silk_SMULWB( psDD->Shape_Q14[ *smpl_buf_idx ], LF_shp_Q14 ); /* Q12 */ - n_LF_Q14 = silk_SMLAWT( n_LF_Q14, psDD->LF_AR_Q14, LF_shp_Q14 ); /* Q12 */ - n_LF_Q14 = silk_LSHIFT( n_LF_Q14, 2 ); /* Q12 -> Q14 */ - - /* Input minus prediction plus noise feedback */ - /* r = x[ i ] - LTP_pred - LPC_pred + n_AR + n_Tilt + n_LF + n_LTP */ - tmp1 = silk_ADD32( n_AR_Q14, n_LF_Q14 ); /* Q14 */ - tmp2 = silk_ADD32( n_LTP_Q14, LPC_pred_Q14 ); /* Q13 */ - tmp1 = silk_SUB32( tmp2, tmp1 ); /* Q13 */ - tmp1 = silk_RSHIFT_ROUND( tmp1, 4 ); /* Q10 */ - - r_Q10 = silk_SUB32( x_Q10[ i ], tmp1 ); /* residual error Q10 */ - - /* Flip sign depending on dither */ - if ( psDD->Seed < 0 ) { - r_Q10 = -r_Q10; - } - r_Q10 = silk_LIMIT_32( r_Q10, -(31 << 10), 30 << 10 ); - - /* Find two quantization level candidates and measure their rate-distortion */ - q1_Q10 = silk_SUB32( r_Q10, offset_Q10 ); - q1_Q0 = silk_RSHIFT( q1_Q10, 10 ); - if( q1_Q0 > 0 ) { - q1_Q10 = silk_SUB32( silk_LSHIFT( q1_Q0, 10 ), QUANT_LEVEL_ADJUST_Q10 ); - q1_Q10 = silk_ADD32( q1_Q10, offset_Q10 ); - q2_Q10 = silk_ADD32( q1_Q10, 1024 ); - rd1_Q10 = silk_SMULBB( q1_Q10, Lambda_Q10 ); - rd2_Q10 = silk_SMULBB( q2_Q10, Lambda_Q10 ); - } else if( q1_Q0 == 0 ) { - q1_Q10 = offset_Q10; - q2_Q10 = silk_ADD32( q1_Q10, 1024 - QUANT_LEVEL_ADJUST_Q10 ); - rd1_Q10 = silk_SMULBB( q1_Q10, Lambda_Q10 ); - rd2_Q10 = silk_SMULBB( q2_Q10, Lambda_Q10 ); - } else if( q1_Q0 == -1 ) { - q2_Q10 = offset_Q10; - q1_Q10 = silk_SUB32( q2_Q10, 1024 - QUANT_LEVEL_ADJUST_Q10 ); - rd1_Q10 = silk_SMULBB( -q1_Q10, Lambda_Q10 ); - rd2_Q10 = silk_SMULBB( q2_Q10, Lambda_Q10 ); - } else { /* q1_Q0 < -1 */ - q1_Q10 = silk_ADD32( silk_LSHIFT( q1_Q0, 10 ), QUANT_LEVEL_ADJUST_Q10 ); - q1_Q10 = silk_ADD32( q1_Q10, offset_Q10 ); - q2_Q10 = silk_ADD32( q1_Q10, 1024 ); - rd1_Q10 = silk_SMULBB( -q1_Q10, Lambda_Q10 ); - rd2_Q10 = silk_SMULBB( -q2_Q10, Lambda_Q10 ); - } - rr_Q10 = silk_SUB32( r_Q10, q1_Q10 ); - rd1_Q10 = silk_RSHIFT( silk_SMLABB( rd1_Q10, rr_Q10, rr_Q10 ), 10 ); - rr_Q10 = silk_SUB32( r_Q10, q2_Q10 ); - rd2_Q10 = silk_RSHIFT( silk_SMLABB( rd2_Q10, rr_Q10, rr_Q10 ), 10 ); - - if( rd1_Q10 < rd2_Q10 ) { - psSS[ 0 ].RD_Q10 = silk_ADD32( psDD->RD_Q10, rd1_Q10 ); - psSS[ 1 ].RD_Q10 = silk_ADD32( psDD->RD_Q10, rd2_Q10 ); - psSS[ 0 ].Q_Q10 = q1_Q10; - psSS[ 1 ].Q_Q10 = q2_Q10; - } else { - psSS[ 0 ].RD_Q10 = silk_ADD32( psDD->RD_Q10, rd2_Q10 ); - psSS[ 1 ].RD_Q10 = silk_ADD32( psDD->RD_Q10, rd1_Q10 ); - psSS[ 0 ].Q_Q10 = q2_Q10; - psSS[ 1 ].Q_Q10 = q1_Q10; - } - - /* Update states for best quantization */ - - /* Quantized excitation */ - exc_Q14 = silk_LSHIFT32( psSS[ 0 ].Q_Q10, 4 ); - if ( psDD->Seed < 0 ) { - exc_Q14 = -exc_Q14; - } - - /* Add predictions */ - LPC_exc_Q14 = silk_ADD32( exc_Q14, LTP_pred_Q14 ); - xq_Q14 = silk_ADD32( LPC_exc_Q14, LPC_pred_Q14 ); - - /* Update states */ - sLF_AR_shp_Q14 = silk_SUB32( xq_Q14, n_AR_Q14 ); - psSS[ 0 ].sLTP_shp_Q14 = silk_SUB32( sLF_AR_shp_Q14, n_LF_Q14 ); - psSS[ 0 ].LF_AR_Q14 = sLF_AR_shp_Q14; - psSS[ 0 ].LPC_exc_Q14 = LPC_exc_Q14; - psSS[ 0 ].xq_Q14 = xq_Q14; - - /* Update states for second best quantization */ - - /* Quantized excitation */ - exc_Q14 = silk_LSHIFT32( psSS[ 1 ].Q_Q10, 4 ); - if ( psDD->Seed < 0 ) { - exc_Q14 = -exc_Q14; - } - - - /* Add predictions */ - LPC_exc_Q14 = silk_ADD32( exc_Q14, LTP_pred_Q14 ); - xq_Q14 = silk_ADD32( LPC_exc_Q14, LPC_pred_Q14 ); - - /* Update states */ - sLF_AR_shp_Q14 = silk_SUB32( xq_Q14, n_AR_Q14 ); - psSS[ 1 ].sLTP_shp_Q14 = silk_SUB32( sLF_AR_shp_Q14, n_LF_Q14 ); - psSS[ 1 ].LF_AR_Q14 = sLF_AR_shp_Q14; - psSS[ 1 ].LPC_exc_Q14 = LPC_exc_Q14; - psSS[ 1 ].xq_Q14 = xq_Q14; - } - - *smpl_buf_idx = ( *smpl_buf_idx - 1 ) % DECISION_DELAY; - if( *smpl_buf_idx < 0 ) *smpl_buf_idx += DECISION_DELAY; - last_smple_idx = ( *smpl_buf_idx + decisionDelay ) % DECISION_DELAY; - - /* Find winner */ - RDmin_Q10 = psSampleState[ 0 ][ 0 ].RD_Q10; - Winner_ind = 0; - for( k = 1; k < nStatesDelayedDecision; k++ ) { - if( psSampleState[ k ][ 0 ].RD_Q10 < RDmin_Q10 ) { - RDmin_Q10 = psSampleState[ k ][ 0 ].RD_Q10; - Winner_ind = k; - } - } - - /* Increase RD values of expired states */ - Winner_rand_state = psDelDec[ Winner_ind ].RandState[ last_smple_idx ]; - for( k = 0; k < nStatesDelayedDecision; k++ ) { - if( psDelDec[ k ].RandState[ last_smple_idx ] != Winner_rand_state ) { - psSampleState[ k ][ 0 ].RD_Q10 = silk_ADD32( psSampleState[ k ][ 0 ].RD_Q10, silk_int32_MAX >> 4 ); - psSampleState[ k ][ 1 ].RD_Q10 = silk_ADD32( psSampleState[ k ][ 1 ].RD_Q10, silk_int32_MAX >> 4 ); - silk_assert( psSampleState[ k ][ 0 ].RD_Q10 >= 0 ); - } - } - - /* Find worst in first set and best in second set */ - RDmax_Q10 = psSampleState[ 0 ][ 0 ].RD_Q10; - RDmin_Q10 = psSampleState[ 0 ][ 1 ].RD_Q10; - RDmax_ind = 0; - RDmin_ind = 0; - for( k = 1; k < nStatesDelayedDecision; k++ ) { - /* find worst in first set */ - if( psSampleState[ k ][ 0 ].RD_Q10 > RDmax_Q10 ) { - RDmax_Q10 = psSampleState[ k ][ 0 ].RD_Q10; - RDmax_ind = k; - } - /* find best in second set */ - if( psSampleState[ k ][ 1 ].RD_Q10 < RDmin_Q10 ) { - RDmin_Q10 = psSampleState[ k ][ 1 ].RD_Q10; - RDmin_ind = k; - } - } - - /* Replace a state if best from second set outperforms worst in first set */ - if( RDmin_Q10 < RDmax_Q10 ) { - silk_memcpy( ( (opus_int32 *)&psDelDec[ RDmax_ind ] ) + i, - ( (opus_int32 *)&psDelDec[ RDmin_ind ] ) + i, sizeof( NSQ_del_dec_struct ) - i * sizeof( opus_int32) ); - silk_memcpy( &psSampleState[ RDmax_ind ][ 0 ], &psSampleState[ RDmin_ind ][ 1 ], sizeof( NSQ_sample_struct ) ); - } - - /* Write samples from winner to output and long-term filter states */ - psDD = &psDelDec[ Winner_ind ]; - if( subfr > 0 || i >= decisionDelay ) { - pulses[ i - decisionDelay ] = (opus_int8)silk_RSHIFT_ROUND( psDD->Q_Q10[ last_smple_idx ], 10 ); - xq[ i - decisionDelay ] = (opus_int16)silk_SAT16( silk_RSHIFT_ROUND( - silk_SMULWW( psDD->Xq_Q14[ last_smple_idx ], delayedGain_Q10[ last_smple_idx ] ), 8 ) ); - NSQ->sLTP_shp_Q14[ NSQ->sLTP_shp_buf_idx - decisionDelay ] = psDD->Shape_Q14[ last_smple_idx ]; - sLTP_Q15[ NSQ->sLTP_buf_idx - decisionDelay ] = psDD->Pred_Q15[ last_smple_idx ]; - } - NSQ->sLTP_shp_buf_idx++; - NSQ->sLTP_buf_idx++; - - /* Update states */ - for( k = 0; k < nStatesDelayedDecision; k++ ) { - psDD = &psDelDec[ k ]; - psSS = &psSampleState[ k ][ 0 ]; - psDD->LF_AR_Q14 = psSS->LF_AR_Q14; - psDD->sLPC_Q14[ NSQ_LPC_BUF_LENGTH + i ] = psSS->xq_Q14; - psDD->Xq_Q14[ *smpl_buf_idx ] = psSS->xq_Q14; - psDD->Q_Q10[ *smpl_buf_idx ] = psSS->Q_Q10; - psDD->Pred_Q15[ *smpl_buf_idx ] = silk_LSHIFT32( psSS->LPC_exc_Q14, 1 ); - psDD->Shape_Q14[ *smpl_buf_idx ] = psSS->sLTP_shp_Q14; - psDD->Seed = silk_ADD32_ovflw( psDD->Seed, silk_RSHIFT_ROUND( psSS->Q_Q10, 10 ) ); - psDD->RandState[ *smpl_buf_idx ] = psDD->Seed; - psDD->RD_Q10 = psSS->RD_Q10; - } - delayedGain_Q10[ *smpl_buf_idx ] = Gain_Q10; - } - /* Update LPC states */ - for( k = 0; k < nStatesDelayedDecision; k++ ) { - psDD = &psDelDec[ k ]; - silk_memcpy( psDD->sLPC_Q14, &psDD->sLPC_Q14[ length ], NSQ_LPC_BUF_LENGTH * sizeof( opus_int32 ) ); - } -} - -#endif /* NSQ_DEL_DEC_MIPSR1_H__ */ diff --git a/silk/mips/NSQ_mips.h b/silk/mips/NSQ_mips.h new file mode 100644 index 000000000..fd691ee33 --- /dev/null +++ b/silk/mips/NSQ_mips.h @@ -0,0 +1,137 @@ +/*********************************************************************** +Copyright (c) 2006-2011, Skype Limited. All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: +- Redistributions of source code must retain the above copyright notice, +this list of conditions and the following disclaimer. +- Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. +- Neither the name of Internet Society, IETF or IETF Trust, nor the +names of specific contributors, may be used to endorse or promote +products derived from this software without specific prior written +permission. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. +***********************************************************************/ + +#ifndef NSQ_MIPS_H__ +#define NSQ_MIPS_H__ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include "main.h" +#include "macros.h" + +#if defined (__mips_dsp) && __mips == 32 + +#define MIPS_MULT __builtin_mips_mult +#define MIPS_MADD __builtin_mips_madd +#define MIPS_EXTR_R __builtin_mips_extr_r_w + +#define OVERRIDE_silk_noise_shape_quantizer_short_prediction +/* suddenly performance is worse */ +#define dont_OVERRIDE_silk_NSQ_noise_shape_feedback_loop + +/* gets worst performance result */ +#elif defined(__mips_isa_rev) && __mips == 32 + +static inline long long MIPS_MULT(int a, int b) { + return (long long)a * b; +} + +static inline long long MIPS_MADD(long long acc, int a, int b) { + return acc + (long long)a * b; +} + +static inline opus_val32 MIPS_EXTR_R(long long acc, int shift) { + return (opus_val32)((acc + (1 << shift) / 2) >> shift); +} + +#define OVERRIDE_silk_noise_shape_quantizer_short_prediction +#define OVERRIDE_silk_NSQ_noise_shape_feedback_loop + +#endif + +#if defined(OVERRIDE_silk_noise_shape_quantizer_short_prediction) + +static OPUS_INLINE opus_int32 silk_noise_shape_quantizer_short_prediction_mips(const opus_int32 *buf32, const opus_int16 *coef16, opus_int order) +{ + opus_int64 out; + silk_assert( order == 10 || order == 16 ); + + out = MIPS_MULT( buf32[ 0 ], coef16[ 0 ] ); + out = MIPS_MADD( out, buf32[ -1 ], coef16[ 1 ] ); + out = MIPS_MADD( out, buf32[ -2 ], coef16[ 2 ] ); + out = MIPS_MADD( out, buf32[ -3 ], coef16[ 3 ] ); + out = MIPS_MADD( out, buf32[ -4 ], coef16[ 4 ] ); + out = MIPS_MADD( out, buf32[ -5 ], coef16[ 5 ] ); + out = MIPS_MADD( out, buf32[ -6 ], coef16[ 6 ] ); + out = MIPS_MADD( out, buf32[ -7 ], coef16[ 7 ] ); + out = MIPS_MADD( out, buf32[ -8 ], coef16[ 8 ] ); + out = MIPS_MADD( out, buf32[ -9 ], coef16[ 9 ] ); + + if( order == 16 ) + { + out = MIPS_MADD( out, buf32[ -10 ], coef16[ 10 ] ); + out = MIPS_MADD( out, buf32[ -11 ], coef16[ 11 ] ); + out = MIPS_MADD( out, buf32[ -12 ], coef16[ 12 ] ); + out = MIPS_MADD( out, buf32[ -13 ], coef16[ 13 ] ); + out = MIPS_MADD( out, buf32[ -14 ], coef16[ 14 ] ); + out = MIPS_MADD( out, buf32[ -15 ], coef16[ 15 ] ); + } + return MIPS_EXTR_R(out, 16); +} + +#undef silk_noise_shape_quantizer_short_prediction +#define silk_noise_shape_quantizer_short_prediction(in, coef, coefRev, order, arch) ((void)arch,silk_noise_shape_quantizer_short_prediction_mips(in, coef, order)) + +#endif /* OVERRIDE_silk_noise_shape_quantizer_short_prediction */ + + +#if defined(OVERRIDE_silk_NSQ_noise_shape_feedback_loop) + +static OPUS_INLINE opus_int32 silk_NSQ_noise_shape_feedback_loop_mips(const opus_int32 *data0, opus_int32 *data1, const opus_int16 *coef, opus_int order) +{ + opus_int32 out; + opus_int32 tmp1, tmp2; + opus_int j; + + tmp2 = data0[0]; + tmp1 = data1[0]; + data1[0] = tmp2; + + out = MIPS_MULT(tmp2, coef[0]); + + for (j = 2; j < order; j += 2) { + tmp2 = data1[j - 1]; + data1[j - 1] = tmp1; + out = MIPS_MADD(out, tmp1, coef[j - 1]); + tmp1 = data1[j + 0]; + data1[j + 0] = tmp2; + out = MIPS_MADD(out, tmp2, coef[j]); + } + data1[order - 1] = tmp1; + out = MIPS_MADD(out, tmp1, coef[order - 1]); + /* silk_SMLAWB: shift right by 16 && Q11 -> Q12: shift left by 1 */ + return MIPS_EXTR_R( out, (16 - 1) ); +} + +#undef silk_NSQ_noise_shape_feedback_loop +#define silk_NSQ_noise_shape_feedback_loop(data0, data1, coef, order, arch) ((void)arch,silk_NSQ_noise_shape_feedback_loop_mips(data0, data1, coef, order)) + +#endif /* OVERRIDE_silk_NSQ_noise_shape_feedback_loop */ + +#endif /* NSQ_DEL_DEC_MIPSR1_H__ */ diff --git a/silk/mips/macros_mipsr1.h b/silk/mips/macros_mipsr1.h index 0393a33fa..c9c01f3df 100644 --- a/silk/mips/macros_mipsr1.h +++ b/silk/mips/macros_mipsr1.h @@ -29,10 +29,8 @@ POSSIBILITY OF SUCH DAMAGE. #ifndef SILK_MACROS_MIPSR1_H__ #define SILK_MACROS_MIPSR1_H__ -static inline int mips_clz(opus_uint32 x) -{ - return x ? __builtin_clz(x) : 32; -} + +#if defined (__mips_dsp) && __mips == 32 #undef silk_SMULWB static inline int silk_SMULWB(int a, int b) @@ -74,6 +72,46 @@ static inline int silk_SMLAWW(int a, int b, int c) return res; } +#undef silk_ADD_SAT32 +static inline int silk_ADD_SAT32(int a, int b) +{ + return __builtin_mips_addq_s_w(a, b); +} + +#undef silk_SUB_SAT32 +static inline int silk_SUB_SAT32(int a, int b) +{ + return __builtin_mips_subq_s_w(a, b); +} + +#elif defined (__mips_isa_rev) && __mips == 32 + +#undef silk_SMULWB +static inline int silk_SMULWB(int a, int b) +{ + long long ac = (long long)a * (int)(b << 16); + + return ac >> 32; +} + +/* a32 + (b32 * (opus_int32)((opus_int16)(c32))) >> 16 output have to be 32bit int */ +#undef silk_SMLAWB +static inline int silk_SMLAWB(int a, int b, int c) +{ + long long ac = (long long)b * (int)(c << 16); + + return a + (ac >> 32); +} + +#endif + +#if defined (__mips_isa_rev) /* MIPS32r1+ */ + +static inline int mips_clz(opus_uint32 x) +{ + return x ? __builtin_clz(x) : 32; +} + #define OVERRIDE_silk_CLZ16 static inline opus_int32 silk_CLZ16(opus_int16 in16) { @@ -92,4 +130,6 @@ static inline opus_int32 silk_CLZ32(opus_int32 in32) return re32; } +#endif /* __mips_isa_rev */ + #endif /* SILK_MACROS_MIPSR1_H__ */ diff --git a/silk/mips/sigproc_fix_mipsr1.h b/silk/mips/sigproc_fix_mipsr1.h index 51520c0a6..2d7615aa8 100644 --- a/silk/mips/sigproc_fix_mipsr1.h +++ b/silk/mips/sigproc_fix_mipsr1.h @@ -57,4 +57,10 @@ static inline int silk_RSHIFT_ROUND(int a, int shift) return r; } +#undef silk_ADD_POS_SAT32 +static inline int silk_ADD_POS_SAT32(int a, int b) +{ + return __builtin_mips_addq_s_w(a, b); +} + #endif /* SILK_SIGPROC_FIX_MIPSR1_H */ diff --git a/silk_headers.mk b/silk_headers.mk index 2588067c7..0586ea1ce 100644 --- a/silk_headers.mk +++ b/silk_headers.mk @@ -34,11 +34,10 @@ silk/arm/NSQ_neon.h \ silk/fixed/main_FIX.h \ silk/fixed/structs_FIX.h \ silk/fixed/arm/warped_autocorrelation_FIX_arm.h \ -silk/fixed/mips/noise_shape_analysis_FIX_mipsr1.h \ silk/fixed/mips/warped_autocorrelation_FIX_mipsr1.h \ silk/float/main_FLP.h \ silk/float/structs_FLP.h \ silk/float/SigProc_FLP.h \ silk/mips/macros_mipsr1.h \ -silk/mips/NSQ_del_dec_mipsr1.h \ +silk/mips/NSQ_mips.h \ silk/mips/sigproc_fix_mipsr1.h