diff --git a/celt/_kiss_fft_guts.h b/celt/_kiss_fft_guts.h
index 4de1137f9..0343adf8d 100644
--- a/celt/_kiss_fft_guts.h
+++ b/celt/_kiss_fft_guts.h
@@ -102,7 +102,7 @@
 #if defined(OPUS_ARM_INLINE_EDSP)
 #include "arm/kiss_fft_armv5e.h"
 #endif
-#if defined(__mips_dsp) && __mips == 32
+#if defined(__mips)
 #include "mips/kiss_fft_mipsr1.h"
 #endif
 
diff --git a/celt/arch.h b/celt/arch.h
index dd095b218..1b722a44e 100644
--- a/celt/arch.h
+++ b/celt/arch.h
@@ -227,9 +227,10 @@ typedef opus_val16 celt_coef;
 #define ABS16(x) ((x) < 0 ? (-(x)) : (x))
 #define ABS32(x) ((x) < 0 ? (-(x)) : (x))
 
-static OPUS_INLINE opus_int16 SAT16(opus_int32 x) {
+static OPUS_INLINE opus_int16 SAT16_default(opus_int32 x) {
    return x > 32767 ? 32767 : x < -32768 ? -32768 : (opus_int16)x;
 }
+#define SAT16(x) SAT16_default(x)
 
 #ifdef FIXED_DEBUG
 #include "fixed_debug.h"
diff --git a/celt/celt.c b/celt/celt.c
index cc2c8c8e7..3838684fa 100644
--- a/celt/celt.c
+++ b/celt/celt.c
@@ -54,7 +54,7 @@
 #define PACKAGE_VERSION "unknown"
 #endif
 
-#if defined(FIXED_POINT) && defined(__mips_dsp) && __mips == 32
+#if defined(FIXED_POINT) && defined(__mips)
 #include "mips/celt_mipsr1.h"
 #endif
 
diff --git a/celt/celt_lpc.c b/celt/celt_lpc.c
index 2a98b7e9d..43dffbdf4 100644
--- a/celt/celt_lpc.c
+++ b/celt/celt_lpc.c
@@ -59,8 +59,15 @@ int          p
       for (i = 0; i < p; i++) {
          /* Sum up this iteration's reflection coefficient */
          opus_val32 rr = 0;
+#if defined (FIXED_POINT) && OPUS_FAST_INT64
+         opus_int64 acc = 0;
+         for (j = 0; j < i; j++)
+            acc += (opus_int64)(lpc[j]) * (opus_int64)(ac[i - j]);
+         rr = (opus_val32)SHR(acc, 31);
+#else
          for (j = 0; j < i; j++)
             rr += MULT32_32_Q31(lpc[j],ac[i - j]);
+#endif
          rr += SHR32(ac[i + 1],6);
          r = -frac_div32(SHL32(rr,6), error);
          /*  Update LPC coefficients and total error */
diff --git a/celt/fixed_generic.h b/celt/fixed_generic.h
index f499f4f89..d750e6ce8 100644
--- a/celt/fixed_generic.h
+++ b/celt/fixed_generic.h
@@ -129,7 +129,8 @@
 #define SHR(a,shift) ((a) >> (shift))
 #define SHL(a,shift) SHL32(a,shift)
 #define PSHR(a,shift) (SHR((a)+((EXTEND32(1)<<((shift))>>1)),shift))
-#define SATURATE(x,a) (((x)>(a) ? (a) : (x)<-(a) ? -(a) : (x)))
+#define SATURATE_generic(x,a) (((x)>(a) ? (a) : (x)<-(a) ? -(a) : (x)))
+#define SATURATE(x,a) SATURATE_generic((x), (a))
 
 #define SATURATE16(x) (EXTRACT16((x)>32767 ? 32767 : (x)<-32768 ? -32768 : (x)))
 
@@ -200,10 +201,6 @@
 /** Divide a 32-bit value by a 32-bit value. Result fits in 32 bits */
 #define DIV32(a,b) (((opus_val32)(a))/((opus_val32)(b)))
 
-#if defined(__mips_dsp) && __mips == 32
-#include "mips/fixed_generic_mipsr1.h"
-#endif
-
 static OPUS_INLINE opus_val16 SIG2WORD16_generic(celt_sig x)
 {
    x = PSHR32(x, SIG_SHIFT);
@@ -213,4 +210,8 @@ static OPUS_INLINE opus_val16 SIG2WORD16_generic(celt_sig x)
 }
 #define SIG2WORD16(x) (SIG2WORD16_generic(x))
 
+#if defined(__mips)
+#include "mips/fixed_generic_mipsr1.h"
+#endif
+
 #endif
diff --git a/celt/kiss_fft.c b/celt/kiss_fft.c
index 7c49a4f32..24400e862 100644
--- a/celt/kiss_fft.c
+++ b/celt/kiss_fft.c
@@ -535,6 +535,7 @@ void opus_fft_free(const kiss_fft_state *cfg, int arch)
 #endif /* CUSTOM_MODES */
 
 #ifdef FIXED_POINT
+#ifndef OVERRIDE_fft_downshift
 static void fft_downshift(kiss_fft_cpx *x, int N, int *total, int step) {
    int shift;
    shift = IMIN(step, *total);
@@ -553,6 +554,7 @@ static void fft_downshift(kiss_fft_cpx *x, int N, int *total, int step) {
       }
    }
 }
+#endif /* OVERRIDE_fft_downshift */
 #else
 #define fft_downshift(x, N, total, step)
 #endif
diff --git a/celt/mdct.c b/celt/mdct.c
index a60081b5c..2b8a0bce9 100644
--- a/celt/mdct.c
+++ b/celt/mdct.c
@@ -53,7 +53,7 @@
 #include "mathops.h"
 #include "stack_alloc.h"
 
-#if defined(FIXED_POINT) && defined(__mips_dsp) && __mips == 32
+#if defined(FIXED_POINT) && defined(__mips) && __mips == 32
 #include "mips/mdct_mipsr1.h"
 #endif
 
diff --git a/celt/mips/celt_mipsr1.h b/celt/mips/celt_mipsr1.h
index 7fa8d4358..8a5d0c90a 100644
--- a/celt/mips/celt_mipsr1.h
+++ b/celt/mips/celt_mipsr1.h
@@ -36,6 +36,16 @@
 
 #define CELT_C
 
+#if defined (__mips_dsp) && __mips == 32
+
+#define OVERRIDE_COMB_FILTER_CONST
+#define OVERRIDE_comb_filter
+#elif defined(__mips_isa_rev) && __mips_isa_rev < 6
+
+#define OVERRIDE_COMB_FILTER_CONST
+#define OVERRIDE_comb_filter
+#endif
+
 #include "os_support.h"
 #include "mdct.h"
 #include <math.h>
@@ -53,8 +63,43 @@
 #include "celt_lpc.h"
 #include "vq.h"
 
-#define OVERRIDE_COMB_FILTER_CONST
-#define OVERRIDE_comb_filter
+#if defined (__mips_dsp) && __mips == 32
+
+#define MIPS_MULT __builtin_mips_mult
+#define MIPS_MADD __builtin_mips_madd
+#define MIPS_EXTR __builtin_mips_extr_w
+
+#elif defined(__mips_isa_rev) && __mips_isa_rev < 6
+
+static inline long long MIPS_MULT(int a, int b) {
+    long long acc;
+
+    asm volatile (
+            "mult %[a], %[b]  \n"
+        : [acc] "=x"(acc)
+        : [a] "r"(a), [b] "r"(b)
+        :
+    );
+    return acc;
+}
+
+static inline long long MIPS_MADD(long long acc, int a, int b) {
+    asm volatile (
+            "madd %[a], %[b]  \n"
+        : [acc] "+x"(acc)
+        : [a] "r"(a), [b] "r"(b)
+        :
+    );
+    return acc;
+}
+
+static inline opus_val32 MIPS_EXTR(long long acc, int shift) {
+    return (opus_val32)(acc >> shift);
+}
+
+#endif
+
+#if defined (OVERRIDE_comb_filter)
 void comb_filter(opus_val32 *y, opus_val32 *x, int T0, int T1, int N,
       opus_val16 g0, opus_val16 g1, int tapset0, int tapset1,
       const opus_val16 *window, int overlap, int arch)
@@ -101,13 +146,13 @@ void comb_filter(opus_val32 *y, opus_val32 *x, int T0, int T1, int N,
       f = MULT16_16_Q15(window[i],window[i]);
       x0= x[i-T1+2];
 
-      acc = __builtin_mips_mult((int)MULT16_16_Q15((Q15ONE-f),g00), (int)x[i-T0]);
-      acc = __builtin_mips_madd(acc, (int)MULT16_16_Q15((Q15ONE-f),g01), (int)ADD32(x[i-T0-1],x[i-T0+1]));
-      acc = __builtin_mips_madd(acc, (int)MULT16_16_Q15((Q15ONE-f),g02), (int)ADD32(x[i-T0-2],x[i-T0+2]));
-      acc = __builtin_mips_madd(acc, (int)MULT16_16_Q15(f,g10), (int)x2);
-      acc = __builtin_mips_madd(acc, (int)MULT16_16_Q15(f,g11), (int)ADD32(x3,x1));
-      acc = __builtin_mips_madd(acc, (int)MULT16_16_Q15(f,g12), (int)ADD32(x4,x0));
-      res = __builtin_mips_extr_w(acc, 15);
+      acc = MIPS_MULT((int)MULT16_16_Q15((Q15ONE-f),g00), (int)x[i-T0]);
+      acc = MIPS_MADD(acc, (int)MULT16_16_Q15((Q15ONE-f),g01), (int)ADD32(x[i-T0-1],x[i-T0+1]));
+      acc = MIPS_MADD(acc, (int)MULT16_16_Q15((Q15ONE-f),g02), (int)ADD32(x[i-T0-2],x[i-T0+2]));
+      acc = MIPS_MADD(acc, (int)MULT16_16_Q15(f,g10), (int)x2);
+      acc = MIPS_MADD(acc, (int)MULT16_16_Q15(f,g11), (int)ADD32(x3,x1));
+      acc = MIPS_MADD(acc, (int)MULT16_16_Q15(f,g12), (int)ADD32(x4,x0));
+      res = MIPS_EXTR(acc, 15);
 
       y[i] = x[i] + res;
 
@@ -136,10 +181,10 @@ void comb_filter(opus_val32 *y, opus_val32 *x, int T0, int T1, int N,
       long long acc;
       x0=x[i-T1+2];
 
-      acc = __builtin_mips_mult((int)g10, (int)x2);
-      acc = __builtin_mips_madd(acc, (int)g11, (int)ADD32(x3,x1));
-      acc = __builtin_mips_madd(acc, (int)g12, (int)ADD32(x4,x0));
-      res = __builtin_mips_extr_w(acc, 15);
+      acc = MIPS_MULT((int)g10, (int)x2);
+      acc = MIPS_MADD(acc, (int)g11, (int)ADD32(x3,x1));
+      acc = MIPS_MADD(acc, (int)g12, (int)ADD32(x4,x0));
+      res = MIPS_EXTR(acc, 15);
 
       y[i] = x[i] + res;
       x4=x3;
@@ -148,5 +193,6 @@ void comb_filter(opus_val32 *y, opus_val32 *x, int T0, int T1, int N,
       x1=x0;
    }
 }
+#endif /* OVERRIDE_comb_filter */
 
 #endif /* CELT_MIPSR1_H__ */
diff --git a/celt/mips/fixed_generic_mipsr1.h b/celt/mips/fixed_generic_mipsr1.h
index 42f0e4047..1888e3004 100644
--- a/celt/mips/fixed_generic_mipsr1.h
+++ b/celt/mips/fixed_generic_mipsr1.h
@@ -33,34 +33,10 @@
 #ifndef CELT_FIXED_GENERIC_MIPSR1_H
 #define CELT_FIXED_GENERIC_MIPSR1_H
 
-#undef MULT16_32_Q15_ADD
-static inline int MULT16_32_Q15_ADD(int a, int b, int c, int d) {
-    long long acc = __builtin_mips_mult(a, b);
-    acc = __builtin_mips_madd(acc, c, d);
-    return __builtin_mips_extr_w(acc, 15);
-}
-
-#undef MULT16_32_Q15_SUB
-static inline int MULT16_32_Q15_SUB(int a, int b, int c, int d) {
-    long long acc = __builtin_mips_mult(a, b);
-    acc = __builtin_mips_msub(acc, c, d);
-    return __builtin_mips_extr_w(acc, 15);
-}
-
-#undef MULT16_16_Q15_ADD
-static inline int MULT16_16_Q15_ADD(int a, int b, int c, int d) {
-    long long acc = __builtin_mips_mult(a, b);
-    acc = __builtin_mips_madd(acc, c, d);
-    return __builtin_mips_extr_w(acc, 15);
-}
-
-#undef MULT16_16_Q15_SUB
-static inline int MULT16_16_Q15_SUB(int a, int b, int c, int d) {
-    long long acc = __builtin_mips_mult(a, b);
-    acc = __builtin_mips_msub(acc, c, d);
-    return __builtin_mips_extr_w(acc, 15);
-}
+#if defined (__mips_dsp) && __mips == 32
 
+typedef short v2i16 __attribute__((vector_size(4)));
+typedef char  v2i8  __attribute__((vector_size(4)));
 
 #undef MULT16_32_Q16
 static inline int MULT16_32_Q16(int a, int b)
@@ -103,4 +79,104 @@ static inline int MULT16_16_P15(int a, int b)
     return __builtin_mips_shra_r_w(r, 15);
 }
 
+#define OVERRIDE_CELT_MAXABS16
+static OPUS_INLINE opus_val32 celt_maxabs16(const opus_val16 *x, int len)
+{
+   int i;
+   v2i16 v2max = (v2i16){ 0, 0 };
+   v2i16 x01, x23;
+   const v2i16 *x2;
+   opus_val16 maxlo, maxhi;
+   int loops;
+
+   if ((long)x & 2 && len > 0) {
+      v2max = (v2i16){ 0, ABS16(*x) };
+      x++;
+      len--;
+   }
+   x2 = __builtin_assume_aligned(x, 4);
+   loops = len / 4;
+
+   for (i = 0; i < loops; i++)
+   {
+       x01 = *x2++;
+       x23 = *x2++;
+       x01 = __builtin_mips_absq_s_ph(x01);
+       x23 = __builtin_mips_absq_s_ph(x23);
+       __builtin_mips_cmp_lt_ph(v2max, x01);
+       v2max = __builtin_mips_pick_ph(x01, v2max);
+       __builtin_mips_cmp_lt_ph(v2max, x23);
+       v2max = __builtin_mips_pick_ph(x23, v2max);
+   }
+
+   switch (len & 3) {
+   case 3:
+       x01 = __builtin_mips_absq_s_ph(*x2);
+       __builtin_mips_cmp_lt_ph(v2max, x01);
+       v2max = __builtin_mips_pick_ph(x01, v2max);
+       maxlo = EXTRACT16((opus_val32)v2max);
+       maxhi = EXTRACT16((opus_val32)v2max >> 16);
+       maxlo = MAX16(MAX16(maxlo, maxhi), ABS16(x[len - 1]));
+       break;
+   case 2:
+       x01 = __builtin_mips_absq_s_ph(*x2);
+       __builtin_mips_cmp_lt_ph(v2max, x01);
+       v2max = __builtin_mips_pick_ph(x01, v2max);
+       maxlo = EXTRACT16((opus_val32)v2max);
+       maxhi = EXTRACT16((opus_val32)v2max >> 16);
+       maxlo = MAX16(maxlo, maxhi);
+       break;
+   case 1:
+       maxlo = EXTRACT16((opus_val32)v2max);
+       maxhi = EXTRACT16((opus_val32)v2max >> 16);
+       return MAX16(MAX16(maxlo, maxhi), ABS16(x[len - 1]));
+       break;
+   case 0:
+       maxlo = EXTRACT16((opus_val32)v2max);
+       maxhi = EXTRACT16((opus_val32)v2max >> 16);
+       maxlo = MAX16(maxlo, maxhi);
+       break;
+   default:
+       __builtin_unreachable();
+   }
+   /* C version might return 0x8000, this one can't
+    * because abs is saturated here. Since result
+    * used only for determine dynamic range
+    * in ilog2-like context it's worth to add 1
+    * for proper magnitude whether saturated
+    */
+   return (opus_val32)maxlo + 1;
+}
+
+#undef SATURATE
+static OPUS_INLINE int SATURATE(int x, int a)
+{
+    if (__builtin_constant_p(a) && __builtin_popcount(a + 1) == 1) {
+        const int shift = __builtin_clz(a + 1);
+        int ret = __builtin_mips_shll_s_w(x, shift);
+        return ret >> shift;
+    }
+    return SATURATE_generic(x, a);
+}
+
+#undef SATURATE16
+#define SATURATE16(x) EXTRACT16(SATURATE(x, 32767))
+
+#undef SAT16
+#define SAT16(x) EXTRACT16(SATURATE(x, 32767))
+
+#undef SIG2WORD16
+static OPUS_INLINE opus_val16 SIG2WORD16(celt_sig x)
+{
+   x = PSHR32(x, SIG_SHIFT);
+   return SATURATE16(x);
+}
+
+#elif __mips == 32
+
+#undef MULT16_32_Q16
+#define MULT16_32_Q16(a,b) ((opus_val32)SHR((opus_int64)(SHL32((a), 16))*(b),32))
+
+#endif
+
 #endif /* CELT_FIXED_GENERIC_MIPSR1_H */
diff --git a/celt/mips/kiss_fft_mipsr1.h b/celt/mips/kiss_fft_mipsr1.h
index bdb5df804..215dd0d3c 100644
--- a/celt/mips/kiss_fft_mipsr1.h
+++ b/celt/mips/kiss_fft_mipsr1.h
@@ -32,17 +32,14 @@
 
 #ifdef FIXED_POINT
 
-#define S_MUL_ADD(a, b, c, d) (S_MUL(a,b)+S_MUL(c,d))
-#define S_MUL_SUB(a, b, c, d) (S_MUL(a,b)-S_MUL(c,d))
+#if __mips == 32 && defined (__mips_dsp)
 
-#undef S_MUL_ADD
 static inline int S_MUL_ADD(int a, int b, int c, int d) {
     long long acc = __builtin_mips_mult(a, b);
     acc = __builtin_mips_madd(acc, c, d);
     return __builtin_mips_extr_w(acc, 15);
 }
 
-#undef S_MUL_SUB
 static inline int S_MUL_SUB(int a, int b, int c, int d) {
     long long acc = __builtin_mips_mult(a, b);
     acc = __builtin_mips_msub(acc, c, d);
@@ -76,9 +73,66 @@ static inline kiss_fft_cpx C_MULC_fun(kiss_fft_cpx a, kiss_twiddle_cpx b) {
     return m;
 }
 
-#endif /* FIXED_POINT */
+#define OVERRIDE_kf_bfly5
+
+#elif __mips == 32 && defined(__mips_isa_rev) && __mips_isa_rev < 6
+
+static inline int S_MUL_ADD(int a, int b, int c, int d) {
+    long long acc;
+
+    asm volatile (
+            "mult %[a], %[b]  \n"
+            "madd %[c], %[d]  \n"
+        : [acc] "=x"(acc)
+        : [a] "r"(a), [b] "r"(b), [c] "r"(c), [d] "r"(d)
+        :
+    );
+    return (int)(acc >> 15);
+}
+
+static inline int S_MUL_SUB(int a, int b, int c, int d) {
+    long long acc;
+
+    asm volatile (
+            "mult %[a], %[b]  \n"
+            "msub %[c], %[d]  \n"
+        : [acc] "=x"(acc)
+        : [a] "r"(a), [b] "r"(b), [c] "r"(c), [d] "r"(d)
+        :
+    );
+    return (int)(acc >> 15);
+}
+
+#undef C_MUL
+#   define C_MUL(m,a,b) (m=C_MUL_fun(a,b))
+static inline kiss_fft_cpx C_MUL_fun(kiss_fft_cpx a, kiss_twiddle_cpx b) {
+    kiss_fft_cpx m;
+
+    m.r = S_MUL_SUB(a.r, b.r, a.i, b.i);
+    m.i = S_MUL_ADD(a.r, b.i, a.i, b.r);
+
+    return m;
+}
+
+#undef C_MULC
+#   define C_MULC(m,a,b) (m=C_MULC_fun(a,b))
+static inline kiss_fft_cpx C_MULC_fun(kiss_fft_cpx a, kiss_twiddle_cpx b) {
+    kiss_fft_cpx m;
+
+    m.r = S_MUL_ADD(a.r, b.r, a.i, b.i);
+    m.i = S_MUL_SUB(a.i, b.r, a.r, b.i);
+
+    return m;
+}
 
 #define OVERRIDE_kf_bfly5
+
+#endif
+
+#endif /* FIXED_POINT */
+
+#if defined(OVERRIDE_kf_bfly5)
+
 static void kf_bfly5(
                      kiss_fft_cpx * Fout,
                      const size_t fstride,
@@ -157,5 +211,61 @@ static void kf_bfly5(
    }
 }
 
+#endif /* defined(OVERRIDE_kf_bfly5) */
+
+#define OVERRIDE_fft_downshift
+/* Just unroll tight loop, should be ok for any mips */
+static void fft_downshift(kiss_fft_cpx *x, int N, int *total, int step) {
+    int shift;
+    shift = IMIN(step, *total);
+    *total -= shift;
+    if (shift == 1) {
+        int i;
+        for (i = 0; i < N - 1; i += 2) {
+            x[i].r   = SHR32(x[i].r,   1);
+            x[i].i   = SHR32(x[i].i,   1);
+            x[i+1].r = SHR32(x[i+1].r, 1);
+            x[i+1].i = SHR32(x[i+1].i, 1);
+        }
+        if (N & 1) {
+            x[i].r = SHR32(x[i].r, 1);
+            x[i].i = SHR32(x[i].i, 1);
+        }
+    } else if (shift > 0) {
+        int i;
+        for (i = 0; i < N - 3; i += 4) {
+            x[i].r   = PSHR32(x[i].r,   shift);
+            x[i].i   = PSHR32(x[i].i,   shift);
+            x[i+1].r = PSHR32(x[i+1].r, shift);
+            x[i+1].i = PSHR32(x[i+1].i, shift);
+            x[i+2].r = PSHR32(x[i+2].r, shift);
+            x[i+2].i = PSHR32(x[i+2].i, shift);
+            x[i+3].r = PSHR32(x[i+3].r, shift);
+            x[i+3].i = PSHR32(x[i+3].i, shift);
+        }
+        switch (N & 3) {
+        case 3:
+            x[i].r   = PSHR32(x[i].r,   shift);
+            x[i].i   = PSHR32(x[i].i,   shift);
+            x[i+1].r = PSHR32(x[i+1].r, shift);
+            x[i+1].i = PSHR32(x[i+1].i, shift);
+            x[i+2].r = PSHR32(x[i+2].r, shift);
+            x[i+2].i = PSHR32(x[i+2].i, shift);
+            break;
+        case 2:
+            x[i].r   = PSHR32(x[i].r,   shift);
+            x[i].i   = PSHR32(x[i].i,   shift);
+            x[i+1].r = PSHR32(x[i+1].r, shift);
+            x[i+1].i = PSHR32(x[i+1].i, shift);
+            break;
+        case 1:
+            x[i].r   = PSHR32(x[i].r,   shift);
+            x[i].i   = PSHR32(x[i].i,   shift);
+            break;
+        case 0:
+            break;
+        }
+    }
+}
 
 #endif /* KISS_FFT_MIPSR1_H */
diff --git a/celt/mips/mdct_mipsr1.h b/celt/mips/mdct_mipsr1.h
index c8accc093..bc349cc25 100644
--- a/celt/mips/mdct_mipsr1.h
+++ b/celt/mips/mdct_mipsr1.h
@@ -55,6 +55,7 @@
 #include "mathops.h"
 #include "stack_alloc.h"
 
+#if defined (__mips_dsp)
 static inline int S_MUL_ADD_PSR(int a, int b, int c, int d, int shift) {
     long long acc = __builtin_mips_mult(a, b);
     acc = __builtin_mips_madd(acc, c, d);
@@ -67,8 +68,45 @@ static inline int S_MUL_SUB_PSR(int a, int b, int c, int d, int shift) {
     return __builtin_mips_extr_w(acc, 15+shift);
 }
 
-/* Forward MDCT trashes the input array */
 #define OVERRIDE_clt_mdct_forward
+#define OVERRIDE_clt_mdct_backward
+
+#elif defined(__mips_isa_rev) && __mips_isa_rev < 6
+
+static inline int S_MUL_ADD_PSR(int a, int b, int c, int d, int shift) {
+    long long acc;
+
+    asm volatile (
+            "mult %[a], %[b]  \n"
+            "madd %[c], %[d]  \n"
+        : [acc] "=x"(acc)
+        : [a] "r"(a), [b] "r"(b), [c] "r"(c), [d] "r"(d)
+        :
+    );
+    return (int)(acc >> (15 + shift));
+}
+
+static inline int S_MUL_SUB_PSR(int a, int b, int c, int d, int shift) {
+    long long acc;
+
+    asm volatile (
+            "mult %[a], %[b]  \n"
+            "msub %[c], %[d]  \n"
+        : [acc] "=x"(acc)
+        : [a] "r"(a), [b] "r"(b), [c] "r"(c), [d] "r"(d)
+        :
+    );
+    return (int)(acc >> (15 + shift));
+}
+
+#define OVERRIDE_clt_mdct_forward
+#define OVERRIDE_clt_mdct_backward
+
+#endif
+
+#if defined (OVERRIDE_clt_mdct_forward)
+
+/* Forward MDCT trashes the input array */
 void clt_mdct_forward(const mdct_lookup *l, kiss_fft_scalar *in, kiss_fft_scalar * OPUS_RESTRICT out,
       const celt_coef *window, int overlap, int shift, int stride, int arch)
 {
@@ -213,7 +251,10 @@ void clt_mdct_forward(const mdct_lookup *l, kiss_fft_scalar *in, kiss_fft_scalar
    RESTORE_STACK;
 }
 
-#define OVERRIDE_clt_mdct_backward
+#endif /* OVERRIDE_clt_mdct_forward */
+
+#if defined(OVERRIDE_clt_mdct_backward)
+
 void clt_mdct_backward(const mdct_lookup *l, kiss_fft_scalar *in, kiss_fft_scalar * OPUS_RESTRICT out,
       const celt_coef * OPUS_RESTRICT window, int overlap, int shift, int stride, int arch)
 {
@@ -336,4 +377,7 @@ void clt_mdct_backward(const mdct_lookup *l, kiss_fft_scalar *in, kiss_fft_scala
       }
    }
 }
+
+#endif /* OVERRIDE_clt_mdct_backward */
+
 #endif /* MDCT_MIPSR1_H__ */
diff --git a/celt/mips/pitch_mipsr1.h b/celt/mips/pitch_mipsr1.h
index 6cbdd78d3..9b73a126b 100644
--- a/celt/mips/pitch_mipsr1.h
+++ b/celt/mips/pitch_mipsr1.h
@@ -34,28 +34,223 @@
 #ifndef PITCH_MIPSR1_H
 #define PITCH_MIPSR1_H
 
+#include "fixed_generic_mipsr1.h"
+
+#if defined (__mips_dsp) && __mips == 32
+
+#define accumulator_t opus_int64
+#define MIPS_MAC(acc,a,b) \
+    __builtin_mips_madd((acc), (int)(a), (int)(b))
+
+#define MIPS_MAC16x16_2X(acc,a2x,b2x) \
+    __builtin_mips_dpaq_s_w_ph((acc), (a2x), (b2x))
+
+#define OVERRIDE_CELT_INNER_PROD
 #define OVERRIDE_DUAL_INNER_PROD
+#define OVERRIDE_XCORR_KERNEL
+
+#else /* any other MIPS */
+
+/* using madd is slower due to single accumulator */
+#define accumulator_t opus_int32
+#define MIPS_MAC MAC16_16
+
+#define OVERRIDE_CELT_INNER_PROD
+#define OVERRIDE_DUAL_INNER_PROD
+#define OVERRIDE_XCORR_KERNEL
+
+#endif /* any other MIPS */
+
+
+#if defined(OVERRIDE_CELT_INNER_PROD)
+
+static OPUS_INLINE opus_val32 celt_inner_prod(const opus_val16 *x,
+      const opus_val16 *y, int N, int arch)
+{
+   int j;
+   accumulator_t acc = 0;
+
+#if defined (MIPS_MAC16x16_2X)
+   const v2i16 *x2x;
+   const v2i16 *y2x;
+   int loops;
+
+   /* misaligned */
+   if (((long)x | (long)y) & 3)
+       goto fallback;
+
+   x2x = __builtin_assume_aligned(x, 4);
+   y2x = __builtin_assume_aligned(y, 4);
+   loops = N / 8;
+   for (j = 0; j < loops; j++)
+   {
+      acc = MIPS_MAC16x16_2X(acc, x2x[0], y2x[0]);
+      acc = MIPS_MAC16x16_2X(acc, x2x[1], y2x[1]);
+      acc = MIPS_MAC16x16_2X(acc, x2x[2], y2x[2]);
+      acc = MIPS_MAC16x16_2X(acc, x2x[3], y2x[3]);
+      x2x += 4; y2x += 4;
+   }
+
+   switch (N & 7) {
+   case 7:
+      acc = MIPS_MAC16x16_2X(acc, x2x[0], y2x[0]);
+      acc = MIPS_MAC16x16_2X(acc, x2x[1], y2x[1]);
+      acc = MIPS_MAC16x16_2X(acc, x2x[2], y2x[2]);
+      acc = MIPS_MAC(acc, x[N-1], y[N-1]);
+      break;
+   case 6:
+      acc = MIPS_MAC16x16_2X(acc, x2x[0], y2x[0]);
+      acc = MIPS_MAC16x16_2X(acc, x2x[1], y2x[1]);
+      acc = MIPS_MAC16x16_2X(acc, x2x[2], y2x[2]);
+      break;
+   case 5:
+      acc = MIPS_MAC16x16_2X(acc, x2x[0], y2x[0]);
+      acc = MIPS_MAC16x16_2X(acc, x2x[1], y2x[1]);
+      acc = MIPS_MAC(acc, x[N-1], y[N-1]);
+      break;
+   case 4:
+      acc = MIPS_MAC16x16_2X(acc, x2x[0], y2x[0]);
+      acc = MIPS_MAC16x16_2X(acc, x2x[1], y2x[1]);
+      break;
+   case 3:
+      acc = MIPS_MAC16x16_2X(acc, x2x[0], y2x[0]);
+      acc = MIPS_MAC(acc, x[N-1], y[N-1]);
+      break;
+   case 2:
+      acc = MIPS_MAC16x16_2X(acc, x2x[0], y2x[0]);
+      break;
+   case 1:
+      acc = MIPS_MAC(acc, x[N-1], y[N-1]);
+      break;
+   case 0:
+      break;
+   }
+   return __builtin_mips_extr_w(acc, 1);
+
+fallback:
+#endif
+   for (j = 0; j < N - 3; j += 4)
+   {
+      acc = MIPS_MAC(acc, x[j],   y[j]);
+      acc = MIPS_MAC(acc, x[j+1], y[j+1]);
+      acc = MIPS_MAC(acc, x[j+2], y[j+2]);
+      acc = MIPS_MAC(acc, x[j+3], y[j+3]);
+   }
+
+   switch (N & 3) {
+   case 3:
+      acc = MIPS_MAC(acc, x[j],   y[j]);
+      acc = MIPS_MAC(acc, x[j+1], y[j+1]);
+      acc = MIPS_MAC(acc, x[j+2], y[j+2]);
+      break;
+   case 2:
+      acc = MIPS_MAC(acc, x[j],   y[j]);
+      acc = MIPS_MAC(acc, x[j+1], y[j+1]);
+      break;
+   case 1:
+      acc = MIPS_MAC(acc, x[j],   y[j]);
+      break;
+   case 0:
+      break;
+   }
+
+   (void)arch;
+
+   return (opus_val32)acc;
+}
+#endif /* OVERRIDE_CELT_INNER_PROD */
+
+#if defined(OVERRIDE_DUAL_INNER_PROD)
 static inline void dual_inner_prod(const opus_val16 *x, const opus_val16 *y01, const opus_val16 *y02,
       int N, opus_val32 *xy1, opus_val32 *xy2, int arch)
 {
    int j;
-   long long acc1 = 0;
-   long long acc2 = 0;
+   accumulator_t acc1 = 0;
+   accumulator_t acc2 = 0;
 
-   (void)arch;
+#if defined (MIPS_MAC16x16_2X)
+   const v2i16 *x2x;
+   const v2i16 *y01_2x;
+   const v2i16 *y02_2x;
+
+   /* misaligned */
+   if (((long)x | (long)y01 | (long)y02) & 3)
+       goto fallback;
 
+   x2x = __builtin_assume_aligned(x, 4);
+   y01_2x = __builtin_assume_aligned(y01, 4);
+   y02_2x = __builtin_assume_aligned(y02, 4);
+   N /= 2;
+
+   for (j = 0; j < N - 3; j += 4)
+   {
+      acc1 = MIPS_MAC16x16_2X(acc1, x2x[j],   y01_2x[j]);
+      acc2 = MIPS_MAC16x16_2X(acc2, x2x[j],   y02_2x[j]);
+      acc1 = MIPS_MAC16x16_2X(acc1, x2x[j+1], y01_2x[j+1]);
+      acc2 = MIPS_MAC16x16_2X(acc2, x2x[j+1], y02_2x[j+1]);
+      acc1 = MIPS_MAC16x16_2X(acc1, x2x[j+2], y01_2x[j+2]);
+      acc2 = MIPS_MAC16x16_2X(acc2, x2x[j+2], y02_2x[j+2]);
+      acc1 = MIPS_MAC16x16_2X(acc1, x2x[j+3], y01_2x[j+3]);
+      acc2 = MIPS_MAC16x16_2X(acc2, x2x[j+3], y02_2x[j+3]);
+   }
+
+   switch (N & 3) {
+   case 3:
+      acc1 = MIPS_MAC16x16_2X(acc1, x2x[j],   y01_2x[j]);
+      acc2 = MIPS_MAC16x16_2X(acc2, x2x[j],   y02_2x[j]);
+      acc1 = MIPS_MAC16x16_2X(acc1, x2x[j+1], y01_2x[j+1]);
+      acc2 = MIPS_MAC16x16_2X(acc2, x2x[j+1], y02_2x[j+1]);
+      acc1 = MIPS_MAC16x16_2X(acc1, x2x[j+2], y01_2x[j+2]);
+      acc2 = MIPS_MAC16x16_2X(acc2, x2x[j+2], y02_2x[j+2]);
+      break;
+   case 2:
+      acc1 = MIPS_MAC16x16_2X(acc1, x2x[j],   y01_2x[j]);
+      acc2 = MIPS_MAC16x16_2X(acc2, x2x[j],   y02_2x[j]);
+      acc1 = MIPS_MAC16x16_2X(acc1, x2x[j+1], y01_2x[j+1]);
+      acc2 = MIPS_MAC16x16_2X(acc2, x2x[j+1], y02_2x[j+1]);
+      break;
+   case 1:
+      acc1 = MIPS_MAC16x16_2X(acc1, x2x[j],   y01_2x[j]);
+      acc2 = MIPS_MAC16x16_2X(acc2, x2x[j],   y02_2x[j]);
+      break;
+   case 0:
+      break;
+   }
+
+   *xy1 = __builtin_mips_extr_w(acc1, 1);
+   *xy2 = __builtin_mips_extr_w(acc2, 1);
+   return;
+
+fallback:
+#endif
    /* Compute the norm of X+Y and X-Y as |X|^2 + |Y|^2 +/- sum(xy) */
-   for (j=0;j<N;j+=2)
+   for (j = 0; j < N - 3; j += 4)
    {
-       acc1 = __builtin_mips_madd(acc1, (int)x[j],   (int)y01[j]);
-       acc2 = __builtin_mips_madd(acc2, (int)x[j],   (int)y02[j]);
-       acc1 = __builtin_mips_madd(acc1, (int)x[j+1], (int)y01[j+1]);
-       acc2 = __builtin_mips_madd(acc2, (int)x[j+1], (int)y02[j+1]);
+      acc1 = MIPS_MAC(acc1, x[j],   y01[j]);
+      acc2 = MIPS_MAC(acc2, x[j],   y02[j]);
+      acc1 = MIPS_MAC(acc1, x[j+1], y01[j+1]);
+      acc2 = MIPS_MAC(acc2, x[j+1], y02[j+1]);
+      acc1 = MIPS_MAC(acc1, x[j+2], y01[j+2]);
+      acc2 = MIPS_MAC(acc2, x[j+2], y02[j+2]);
+      acc1 = MIPS_MAC(acc1, x[j+3], y01[j+3]);
+      acc2 = MIPS_MAC(acc2, x[j+3], y02[j+3]);
+   }
+
+   if (j < N) {
+      acc1 = MIPS_MAC(acc1, x[j],   y01[j]);
+      acc2 = MIPS_MAC(acc2, x[j],   y02[j]);
+      acc1 = MIPS_MAC(acc1, x[j+1], y01[j+1]);
+      acc2 = MIPS_MAC(acc2, x[j+1], y02[j+1]);
    }
 
+   (void)arch;
+
    *xy1 = (opus_val32)acc1;
    *xy2 = (opus_val32)acc2;
 }
+#endif /* OVERRIDE_DUAL_INNER_PROD */
+
+#if defined(OVERRIDE_XCORR_KERNEL)
 
 static inline void xcorr_kernel_mips(const opus_val16 * x,
       const opus_val16 * y, opus_val32 sum[4], int len)
@@ -63,13 +258,12 @@ static inline void xcorr_kernel_mips(const opus_val16 * x,
    int j;
    opus_val16 y_0, y_1, y_2, y_3;
 
-    opus_int64 sum_0, sum_1, sum_2, sum_3;
-    sum_0 =  (opus_int64)sum[0];
-    sum_1 =  (opus_int64)sum[1];
-    sum_2 =  (opus_int64)sum[2];
-    sum_3 =  (opus_int64)sum[3];
+    accumulator_t sum_0, sum_1, sum_2, sum_3;
+    sum_0 =  (accumulator_t)sum[0];
+    sum_1 =  (accumulator_t)sum[1];
+    sum_2 =  (accumulator_t)sum[2];
+    sum_3 =  (accumulator_t)sum[3];
 
-    y_3=0; /* gcc doesn't realize that y_3 can't be used uninitialized */
     y_0=*y++;
     y_1=*y++;
     y_2=*y++;
@@ -79,69 +273,73 @@ static inline void xcorr_kernel_mips(const opus_val16 * x,
         tmp = *x++;
         y_3=*y++;
 
-        sum_0 = __builtin_mips_madd( sum_0, tmp, y_0);
-        sum_1 = __builtin_mips_madd( sum_1, tmp, y_1);
-        sum_2 = __builtin_mips_madd( sum_2, tmp, y_2);
-        sum_3 = __builtin_mips_madd( sum_3, tmp, y_3);
+        sum_0 = MIPS_MAC(sum_0, tmp, y_0);
+        sum_1 = MIPS_MAC(sum_1, tmp, y_1);
+        sum_2 = MIPS_MAC(sum_2, tmp, y_2);
+        sum_3 = MIPS_MAC(sum_3, tmp, y_3);
 
         tmp=*x++;
         y_0=*y++;
 
-        sum_0 = __builtin_mips_madd( sum_0, tmp, y_1 );
-        sum_1 = __builtin_mips_madd( sum_1, tmp, y_2 );
-        sum_2 = __builtin_mips_madd( sum_2, tmp, y_3);
-        sum_3 = __builtin_mips_madd( sum_3, tmp, y_0);
+        sum_0 = MIPS_MAC(sum_0, tmp, y_1);
+        sum_1 = MIPS_MAC(sum_1, tmp, y_2);
+        sum_2 = MIPS_MAC(sum_2, tmp, y_3);
+        sum_3 = MIPS_MAC(sum_3, tmp, y_0);
 
        tmp=*x++;
        y_1=*y++;
 
-       sum_0 = __builtin_mips_madd( sum_0, tmp, y_2 );
-       sum_1 = __builtin_mips_madd( sum_1, tmp, y_3 );
-       sum_2 = __builtin_mips_madd( sum_2, tmp, y_0);
-       sum_3 = __builtin_mips_madd( sum_3, tmp, y_1);
+       sum_0 = MIPS_MAC(sum_0, tmp, y_2);
+       sum_1 = MIPS_MAC(sum_1, tmp, y_3);
+       sum_2 = MIPS_MAC(sum_2, tmp, y_0);
+       sum_3 = MIPS_MAC(sum_3, tmp, y_1);
 
 
       tmp=*x++;
       y_2=*y++;
 
-       sum_0 = __builtin_mips_madd( sum_0, tmp, y_3 );
-       sum_1 = __builtin_mips_madd( sum_1, tmp, y_0 );
-       sum_2 = __builtin_mips_madd( sum_2, tmp, y_1);
-       sum_3 = __builtin_mips_madd( sum_3, tmp, y_2);
-
-   }
-   if (j++<len)
-   {
-      opus_val16 tmp = *x++;
-      y_3=*y++;
-
-       sum_0 = __builtin_mips_madd( sum_0, tmp, y_0 );
-       sum_1 = __builtin_mips_madd( sum_1, tmp, y_1 );
-       sum_2 = __builtin_mips_madd( sum_2, tmp, y_2);
-       sum_3 = __builtin_mips_madd( sum_3, tmp, y_3);
+      sum_0 = MIPS_MAC(sum_0, tmp, y_3);
+      sum_1 = MIPS_MAC(sum_1, tmp, y_0);
+      sum_2 = MIPS_MAC(sum_2, tmp, y_1);
+      sum_3 = MIPS_MAC(sum_3, tmp, y_2);
    }
 
-   if (j++<len)
-   {
-      opus_val16 tmp=*x++;
-      y_0=*y++;
-
-      sum_0 = __builtin_mips_madd( sum_0, tmp, y_1 );
-      sum_1 = __builtin_mips_madd( sum_1, tmp, y_2 );
-      sum_2 = __builtin_mips_madd( sum_2, tmp, y_3);
-      sum_3 = __builtin_mips_madd( sum_3, tmp, y_0);
-   }
+   switch (len & 3) {
+   case 3:
+      sum_0 = MIPS_MAC(sum_0, x[2], y_2);
+      sum_1 = MIPS_MAC(sum_1, x[2], y[0]);
+      sum_2 = MIPS_MAC(sum_2, x[2], y[1]);
+      sum_3 = MIPS_MAC(sum_3, x[2], y[2]);
 
-   if (j<len)
-   {
-      opus_val16 tmp=*x++;
-      y_1=*y++;
+      sum_0 = MIPS_MAC(sum_0, x[1], y_1);
+      sum_1 = MIPS_MAC(sum_1, x[1], y_2);
+      sum_2 = MIPS_MAC(sum_2, x[1], y[0]);
+      sum_3 = MIPS_MAC(sum_3, x[1], y[1]);
 
-       sum_0 = __builtin_mips_madd( sum_0, tmp, y_2 );
-       sum_1 = __builtin_mips_madd( sum_1, tmp, y_3 );
-       sum_2 = __builtin_mips_madd( sum_2, tmp, y_0);
-       sum_3 = __builtin_mips_madd( sum_3, tmp, y_1);
+      sum_0 = MIPS_MAC(sum_0, x[0], y_0);
+      sum_1 = MIPS_MAC(sum_1, x[0], y_1);
+      sum_2 = MIPS_MAC(sum_2, x[0], y_2);
+      sum_3 = MIPS_MAC(sum_3, x[0], y[0]);
+      break;
+   case 2:
+      sum_0 = MIPS_MAC(sum_0, x[1], y_1);
+      sum_1 = MIPS_MAC(sum_1, x[1], y_2);
+      sum_2 = MIPS_MAC(sum_2, x[1], y[0]);
+      sum_3 = MIPS_MAC(sum_3, x[1], y[1]);
 
+      sum_0 = MIPS_MAC(sum_0, x[0], y_0);
+      sum_1 = MIPS_MAC(sum_1, x[0], y_1);
+      sum_2 = MIPS_MAC(sum_2, x[0], y_2);
+      sum_3 = MIPS_MAC(sum_3, x[0], y[0]);
+      break;
+   case 1:
+      sum_0 = MIPS_MAC(sum_0, x[0], y_0);
+      sum_1 = MIPS_MAC(sum_1, x[0], y_1);
+      sum_2 = MIPS_MAC(sum_2, x[0], y_2);
+      sum_3 = MIPS_MAC(sum_3, x[0], y[0]);
+      break;
+   case 0:
+      break;
    }
 
    sum[0] = (opus_val32)sum_0;
@@ -150,8 +348,12 @@ static inline void xcorr_kernel_mips(const opus_val16 * x,
    sum[3] = (opus_val32)sum_3;
 }
 
-#define OVERRIDE_XCORR_KERNEL
 #define xcorr_kernel(x, y, sum, len, arch) \
     ((void)(arch), xcorr_kernel_mips(x, y, sum, len))
 
+#undef accumulator_t
+#undef MIPS_MAC
+
+#endif /* OVERRIDE_XCORR_KERNEL */
+
 #endif /* PITCH_MIPSR1_H */
diff --git a/celt/mips/vq_mipsr1.h b/celt/mips/vq_mipsr1.h
deleted file mode 100644
index 009c3ef3e..000000000
--- a/celt/mips/vq_mipsr1.h
+++ /dev/null
@@ -1,114 +0,0 @@
-/* Copyright (c) 2007-2008 CSIRO
-   Copyright (c) 2007-2009 Xiph.Org Foundation
-   Written by Jean-Marc Valin */
-/*
-   Redistribution and use in source and binary forms, with or without
-   modification, are permitted provided that the following conditions
-   are met:
-
-   - Redistributions of source code must retain the above copyright
-   notice, this list of conditions and the following disclaimer.
-
-   - Redistributions in binary form must reproduce the above copyright
-   notice, this list of conditions and the following disclaimer in the
-   documentation and/or other materials provided with the distribution.
-
-   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
-   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
-   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
-   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-*/
-
-#ifndef VQ_MIPSR1_H__
-#define VQ_MIPSR1_H__
-
-#ifdef HAVE_CONFIG_H
-#include "config.h"
-#endif
-
-#include "mathops.h"
-#include "arch.h"
-
-#define OVERRIDE_vq_exp_rotation1
-static void exp_rotation1(celt_norm *X, int len, int stride, opus_val16 c, opus_val16 s)
-{
-   int i;
-   opus_val16 ms;
-   celt_norm *Xptr;
-   Xptr = X;
-   ms = NEG16(s);
-   for (i=0;i<len-stride;i++)
-   {
-      celt_norm x1, x2;
-      x1 = Xptr[0];
-      x2 = Xptr[stride];
-      Xptr[stride] = EXTRACT16(PSHR32(MAC16_16(MULT16_16(c, x2),  s, x1), 15));
-      *Xptr++      = EXTRACT16(PSHR32(MAC16_16(MULT16_16(c, x1), ms, x2), 15));
-   }
-   Xptr = &X[len-2*stride-1];
-   for (i=len-2*stride-1;i>=0;i--)
-   {
-      celt_norm x1, x2;
-      x1 = Xptr[0];
-      x2 = Xptr[stride];
-      Xptr[stride] = EXTRACT16(PSHR32(MAC16_16(MULT16_16(c, x2),  s, x1), 15));
-      *Xptr--      = EXTRACT16(PSHR32(MAC16_16(MULT16_16(c, x1), ms, x2), 15));
-   }
-}
-
-#define OVERRIDE_renormalise_vector
-void renormalise_vector(celt_norm *X, int N, opus_val32 gain, int arch)
-{
-   int i;
-#ifdef FIXED_POINT
-   int k;
-#endif
-   long long acc = EPSILON;
-   opus_val32 E;
-   opus_val16 g;
-   opus_val32 t;
-   celt_norm *xptr = X;
-   int X0, X1;
-
-   (void)arch;
-
-   /*if(N %4)
-       printf("error");*/
-   for (i=0;i<N-2;i+=2)
-   {
-      X0 = (int)*xptr++;
-      X1 = (int)*xptr++;
-      acc = __builtin_mips_madd(acc, X0, X0);
-      acc = __builtin_mips_madd(acc, X1, X1);
-   }
-
-   for (;i<N;i++)
-   {
-      X0 = (int)*xptr++;
-      acc = __builtin_mips_madd(acc, X0, X0);
-   }
-
-   E = (opus_val32)acc;
-#ifdef FIXED_POINT
-   k = celt_ilog2(E)>>1;
-#endif
-   t = VSHR32(E, 2*(k-7));
-   g = MULT32_32_Q31(celt_rsqrt_norm(t),gain);
-
-   xptr = X;
-   for (i=0;i<N;i++)
-   {
-      *xptr = EXTRACT16(PSHR32(MULT16_16(g, *xptr), k+1));
-      xptr++;
-   }
-   /*return celt_sqrt(E);*/
-}
-
-#endif /* VQ_MIPSR1_H__ */
diff --git a/celt/pitch.h b/celt/pitch.h
index 7aa99bb84..36dcb6892 100644
--- a/celt/pitch.h
+++ b/celt/pitch.h
@@ -42,7 +42,7 @@
 #include "x86/pitch_sse.h"
 #endif
 
-#if defined(FIXED_POINT) && defined(__mips_dsp) && __mips == 32
+#if defined(FIXED_POINT) && defined(__mips)
 #include "mips/pitch_mipsr1.h"
 #endif
 
diff --git a/celt/vq.c b/celt/vq.c
index c3424a86b..3433dcddd 100644
--- a/celt/vq.c
+++ b/celt/vq.c
@@ -39,10 +39,6 @@
 #include "rate.h"
 #include "pitch.h"
 
-#if defined(FIXED_POINT) && defined(__mips_dsp) && __mips == 32
-#include "mips/vq_mipsr1.h"
-#endif
-
 #ifndef OVERRIDE_vq_exp_rotation1
 static void exp_rotation1(celt_norm *X, int len, int stride, opus_val16 c, opus_val16 s)
 {
diff --git a/celt_headers.mk b/celt_headers.mk
index 267b418bf..7a3047f90 100644
--- a/celt_headers.mk
+++ b/celt_headers.mk
@@ -48,7 +48,6 @@ celt/mips/fixed_generic_mipsr1.h \
 celt/mips/kiss_fft_mipsr1.h \
 celt/mips/mdct_mipsr1.h \
 celt/mips/pitch_mipsr1.h \
-celt/mips/vq_mipsr1.h \
 celt/x86/pitch_sse.h \
 celt/x86/vq_sse.h \
 celt/x86/x86_arch_macros.h \
diff --git a/silk/NSQ.h b/silk/NSQ.h
index 971832f66..c0f2343c8 100644
--- a/silk/NSQ.h
+++ b/silk/NSQ.h
@@ -98,4 +98,8 @@ static OPUS_INLINE opus_int32 silk_NSQ_noise_shape_feedback_loop_c(const opus_in
 #include "arm/NSQ_neon.h"
 #endif
 
+#if defined(__mips)
+#include "mips/NSQ_mips.h"
+#endif
+
 #endif /* SILK_NSQ_H */
diff --git a/silk/NSQ_del_dec.c b/silk/NSQ_del_dec.c
index 5a3cdebc6..31ac11af4 100644
--- a/silk/NSQ_del_dec.c
+++ b/silk/NSQ_del_dec.c
@@ -61,9 +61,6 @@ typedef struct {
 
 typedef NSQ_sample_struct  NSQ_sample_pair[ 2 ];
 
-#if defined(FIXED_POINT) && defined(__mips_dsp) && __mips == 32
-#include "mips/NSQ_del_dec_mipsr1.h"
-#endif
 static OPUS_INLINE void silk_nsq_del_dec_scale_states(
     const silk_encoder_state *psEncC,               /* I    Encoder State                       */
     silk_nsq_state      *NSQ,                       /* I/O  NSQ state                           */
diff --git a/silk/SigProc_FIX.h b/silk/SigProc_FIX.h
index 88b2119e7..4adfadb2e 100644
--- a/silk/SigProc_FIX.h
+++ b/silk/SigProc_FIX.h
@@ -600,11 +600,13 @@ static OPUS_INLINE opus_int64 silk_max_64(opus_int64 a, opus_int64 b)
 #define RAND_INCREMENT                      907633515
 #define silk_RAND(seed)                     (silk_MLA_ovflw((RAND_INCREMENT), (seed), (RAND_MULTIPLIER)))
 
-/*  Add some multiplication functions that can be easily mapped to ARM. */
+/*  Add some multiplication functions that can be easily mapped to ARM/MIPS32. */
 
 /*    silk_SMMUL: Signed top word multiply.
           ARMv6        2 instruction cycles.
-          ARMv3M+      3 instruction cycles. use SMULL and ignore LSB registers.(except xM)*/
+          ARMv3M+      3 instruction cycles. use SMULL and ignore LSB registers.(except xM)
+          MIPS32       2 instructions mul+mfhi
+          MIPS32r6     1 instruction muh */
 /*#define silk_SMMUL(a32, b32)                (opus_int32)silk_RSHIFT(silk_SMLAL(silk_SMULWB((a32), (b32)), (a32), silk_RSHIFT_ROUND((b32), 16)), 16)*/
 /* the following seems faster on x86 */
 #define silk_SMMUL(a32, b32)                (opus_int32)silk_RSHIFT64(silk_SMULL((a32), (b32)), 32)
diff --git a/silk/fixed/mips/noise_shape_analysis_FIX_mipsr1.h b/silk/fixed/mips/noise_shape_analysis_FIX_mipsr1.h
deleted file mode 100644
index 3999b5bd0..000000000
--- a/silk/fixed/mips/noise_shape_analysis_FIX_mipsr1.h
+++ /dev/null
@@ -1,336 +0,0 @@
-/***********************************************************************
-Copyright (c) 2006-2011, Skype Limited. All rights reserved.
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions
-are met:
-- Redistributions of source code must retain the above copyright notice,
-this list of conditions and the following disclaimer.
-- Redistributions in binary form must reproduce the above copyright
-notice, this list of conditions and the following disclaimer in the
-documentation and/or other materials provided with the distribution.
-- Neither the name of Internet Society, IETF or IETF Trust, nor the
-names of specific contributors, may be used to endorse or promote
-products derived from this software without specific prior written
-permission.
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
-LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-POSSIBILITY OF SUCH DAMAGE.
-***********************************************************************/
-
-
-/**************************************************************/
-/* Compute noise shaping coefficients and initial gain values */
-/**************************************************************/
-#define OVERRIDE_silk_noise_shape_analysis_FIX
-
-void silk_noise_shape_analysis_FIX(
-    silk_encoder_state_FIX          *psEnc,                                 /* I/O  Encoder state FIX                                                           */
-    silk_encoder_control_FIX        *psEncCtrl,                             /* I/O  Encoder control FIX                                                         */
-    const opus_int16                *pitch_res,                             /* I    LPC residual from pitch analysis                                            */
-    const opus_int16                *x,                                     /* I    Input signal [ frame_length + la_shape ]                                    */
-    int                              arch                                   /* I    Run-time architecture                                                       */
-)
-{
-    silk_shape_state_FIX *psShapeSt = &psEnc->sShape;
-    opus_int     k, i, nSamples, Qnrg, b_Q14, warping_Q16, scale = 0;
-    opus_int32   SNR_adj_dB_Q7, HarmBoost_Q16, HarmShapeGain_Q16, Tilt_Q16, tmp32;
-    opus_int32   nrg, pre_nrg_Q30, log_energy_Q7, log_energy_prev_Q7, energy_variation_Q7;
-    opus_int32   delta_Q16, BWExp1_Q16, BWExp2_Q16, gain_mult_Q16, gain_add_Q16, strength_Q16, b_Q8;
-    opus_int32   auto_corr[     MAX_SHAPE_LPC_ORDER + 1 ];
-    opus_int32   refl_coef_Q16[ MAX_SHAPE_LPC_ORDER ];
-    opus_int32   AR1_Q24[       MAX_SHAPE_LPC_ORDER ];
-    opus_int32   AR2_Q24[       MAX_SHAPE_LPC_ORDER ];
-    VARDECL( opus_int16, x_windowed );
-    const opus_int16 *x_ptr, *pitch_res_ptr;
-    SAVE_STACK;
-
-    /* Point to start of first LPC analysis block */
-    x_ptr = x - psEnc->sCmn.la_shape;
-
-    /****************/
-    /* GAIN CONTROL */
-    /****************/
-    SNR_adj_dB_Q7 = psEnc->sCmn.SNR_dB_Q7;
-
-    /* Input quality is the average of the quality in the lowest two VAD bands */
-    psEncCtrl->input_quality_Q14 = ( opus_int )silk_RSHIFT( (opus_int32)psEnc->sCmn.input_quality_bands_Q15[ 0 ]
-        + psEnc->sCmn.input_quality_bands_Q15[ 1 ], 2 );
-
-    /* Coding quality level, between 0.0_Q0 and 1.0_Q0, but in Q14 */
-    psEncCtrl->coding_quality_Q14 = silk_RSHIFT( silk_sigm_Q15( silk_RSHIFT_ROUND( SNR_adj_dB_Q7 -
-        SILK_FIX_CONST( 20.0, 7 ), 4 ) ), 1 );
-
-    /* Reduce coding SNR during low speech activity */
-    if( psEnc->sCmn.useCBR == 0 ) {
-        b_Q8 = SILK_FIX_CONST( 1.0, 8 ) - psEnc->sCmn.speech_activity_Q8;
-        b_Q8 = silk_SMULWB( silk_LSHIFT( b_Q8, 8 ), b_Q8 );
-        SNR_adj_dB_Q7 = silk_SMLAWB( SNR_adj_dB_Q7,
-            silk_SMULBB( SILK_FIX_CONST( -BG_SNR_DECR_dB, 7 ) >> ( 4 + 1 ), b_Q8 ),                                       /* Q11*/
-            silk_SMULWB( SILK_FIX_CONST( 1.0, 14 ) + psEncCtrl->input_quality_Q14, psEncCtrl->coding_quality_Q14 ) );     /* Q12*/
-    }
-
-    if( psEnc->sCmn.indices.signalType == TYPE_VOICED ) {
-        /* Reduce gains for periodic signals */
-        SNR_adj_dB_Q7 = silk_SMLAWB( SNR_adj_dB_Q7, SILK_FIX_CONST( HARM_SNR_INCR_dB, 8 ), psEnc->LTPCorr_Q15 );
-    } else {
-        /* For unvoiced signals and low-quality input, adjust the quality slower than SNR_dB setting */
-        SNR_adj_dB_Q7 = silk_SMLAWB( SNR_adj_dB_Q7,
-            silk_SMLAWB( SILK_FIX_CONST( 6.0, 9 ), -SILK_FIX_CONST( 0.4, 18 ), psEnc->sCmn.SNR_dB_Q7 ),
-            SILK_FIX_CONST( 1.0, 14 ) - psEncCtrl->input_quality_Q14 );
-    }
-
-    /*************************/
-    /* SPARSENESS PROCESSING */
-    /*************************/
-    /* Set quantizer offset */
-    if( psEnc->sCmn.indices.signalType == TYPE_VOICED ) {
-        /* Initially set to 0; may be overruled in process_gains(..) */
-        psEnc->sCmn.indices.quantOffsetType = 0;
-        psEncCtrl->sparseness_Q8 = 0;
-    } else {
-        /* Sparseness measure, based on relative fluctuations of energy per 2 milliseconds */
-        nSamples = silk_LSHIFT( psEnc->sCmn.fs_kHz, 1 );
-        energy_variation_Q7 = 0;
-        log_energy_prev_Q7  = 0;
-        pitch_res_ptr = pitch_res;
-        for( k = 0; k < silk_SMULBB( SUB_FRAME_LENGTH_MS, psEnc->sCmn.nb_subfr ) / 2; k++ ) {
-            silk_sum_sqr_shift( &nrg, &scale, pitch_res_ptr, nSamples );
-            nrg += silk_RSHIFT( nSamples, scale );           /* Q(-scale)*/
-
-            log_energy_Q7 = silk_lin2log( nrg );
-            if( k > 0 ) {
-                energy_variation_Q7 += silk_abs( log_energy_Q7 - log_energy_prev_Q7 );
-            }
-            log_energy_prev_Q7 = log_energy_Q7;
-            pitch_res_ptr += nSamples;
-        }
-
-        psEncCtrl->sparseness_Q8 = silk_RSHIFT( silk_sigm_Q15( silk_SMULWB( energy_variation_Q7 -
-            SILK_FIX_CONST( 5.0, 7 ), SILK_FIX_CONST( 0.1, 16 ) ) ), 7 );
-
-        /* Set quantization offset depending on sparseness measure */
-        if( psEncCtrl->sparseness_Q8 > SILK_FIX_CONST( SPARSENESS_THRESHOLD_QNT_OFFSET, 8 ) ) {
-            psEnc->sCmn.indices.quantOffsetType = 0;
-        } else {
-            psEnc->sCmn.indices.quantOffsetType = 1;
-        }
-
-        /* Increase coding SNR for sparse signals */
-        SNR_adj_dB_Q7 = silk_SMLAWB( SNR_adj_dB_Q7, SILK_FIX_CONST( SPARSE_SNR_INCR_dB, 15 ), psEncCtrl->sparseness_Q8 - SILK_FIX_CONST( 0.5, 8 ) );
-    }
-
-    /*******************************/
-    /* Control bandwidth expansion */
-    /*******************************/
-    /* More BWE for signals with high prediction gain */
-    strength_Q16 = silk_SMULWB( psEncCtrl->predGain_Q16, SILK_FIX_CONST( FIND_PITCH_WHITE_NOISE_FRACTION, 16 ) );
-    BWExp1_Q16 = BWExp2_Q16 = silk_DIV32_varQ( SILK_FIX_CONST( BANDWIDTH_EXPANSION, 16 ),
-        silk_SMLAWW( SILK_FIX_CONST( 1.0, 16 ), strength_Q16, strength_Q16 ), 16 );
-    delta_Q16  = silk_SMULWB( SILK_FIX_CONST( 1.0, 16 ) - silk_SMULBB( 3, psEncCtrl->coding_quality_Q14 ),
-        SILK_FIX_CONST( LOW_RATE_BANDWIDTH_EXPANSION_DELTA, 16 ) );
-    BWExp1_Q16 = silk_SUB32( BWExp1_Q16, delta_Q16 );
-    BWExp2_Q16 = silk_ADD32( BWExp2_Q16, delta_Q16 );
-    /* BWExp1 will be applied after BWExp2, so make it relative */
-    BWExp1_Q16 = silk_DIV32_16( silk_LSHIFT( BWExp1_Q16, 14 ), silk_RSHIFT( BWExp2_Q16, 2 ) );
-
-    if( psEnc->sCmn.warping_Q16 > 0 ) {
-        /* Slightly more warping in analysis will move quantization noise up in frequency, where it's better masked */
-        warping_Q16 = silk_SMLAWB( psEnc->sCmn.warping_Q16, (opus_int32)psEncCtrl->coding_quality_Q14, SILK_FIX_CONST( 0.01, 18 ) );
-    } else {
-        warping_Q16 = 0;
-    }
-
-    /********************************************/
-    /* Compute noise shaping AR coefs and gains */
-    /********************************************/
-    ALLOC( x_windowed, psEnc->sCmn.shapeWinLength, opus_int16 );
-    for( k = 0; k < psEnc->sCmn.nb_subfr; k++ ) {
-        /* Apply window: sine slope followed by flat part followed by cosine slope */
-        opus_int shift, slope_part, flat_part;
-        flat_part = psEnc->sCmn.fs_kHz * 3;
-        slope_part = silk_RSHIFT( psEnc->sCmn.shapeWinLength - flat_part, 1 );
-
-        silk_apply_sine_window( x_windowed, x_ptr, 1, slope_part );
-        shift = slope_part;
-        silk_memcpy( x_windowed + shift, x_ptr + shift, flat_part * sizeof(opus_int16) );
-        shift += flat_part;
-        silk_apply_sine_window( x_windowed + shift, x_ptr + shift, 2, slope_part );
-
-        /* Update pointer: next LPC analysis block */
-        x_ptr += psEnc->sCmn.subfr_length;
-
-        if( psEnc->sCmn.warping_Q16 > 0 ) {
-            /* Calculate warped auto correlation */
-            silk_warped_autocorrelation_FIX( auto_corr, &scale, x_windowed, warping_Q16, psEnc->sCmn.shapeWinLength, psEnc->sCmn.shapingLPCOrder, arch );
-        } else {
-            /* Calculate regular auto correlation */
-            silk_autocorr( auto_corr, &scale, x_windowed, psEnc->sCmn.shapeWinLength, psEnc->sCmn.shapingLPCOrder + 1, arch );
-        }
-
-        /* Add white noise, as a fraction of energy */
-        auto_corr[0] = silk_ADD32( auto_corr[0], silk_max_32( silk_SMULWB( silk_RSHIFT( auto_corr[ 0 ], 4 ),
-            SILK_FIX_CONST( SHAPE_WHITE_NOISE_FRACTION, 20 ) ), 1 ) );
-
-        /* Calculate the reflection coefficients using schur */
-        nrg = silk_schur64( refl_coef_Q16, auto_corr, psEnc->sCmn.shapingLPCOrder );
-        silk_assert( nrg >= 0 );
-
-        /* Convert reflection coefficients to prediction coefficients */
-        silk_k2a_Q16( AR2_Q24, refl_coef_Q16, psEnc->sCmn.shapingLPCOrder );
-
-        Qnrg = -scale;          /* range: -12...30*/
-        silk_assert( Qnrg >= -12 );
-        silk_assert( Qnrg <=  30 );
-
-        /* Make sure that Qnrg is an even number */
-        if( Qnrg & 1 ) {
-            Qnrg -= 1;
-            nrg >>= 1;
-        }
-
-        tmp32 = silk_SQRT_APPROX( nrg );
-        Qnrg >>= 1;             /* range: -6...15*/
-
-        psEncCtrl->Gains_Q16[ k ] = (silk_LSHIFT32( silk_LIMIT( (tmp32), silk_RSHIFT32( silk_int32_MIN, (16 - Qnrg) ), \
-                            silk_RSHIFT32( silk_int32_MAX, (16 - Qnrg) ) ), (16 - Qnrg) ));
-
-        if( psEnc->sCmn.warping_Q16 > 0 ) {
-            /* Adjust gain for warping */
-            gain_mult_Q16 = warped_gain( AR2_Q24, warping_Q16, psEnc->sCmn.shapingLPCOrder );
-            silk_assert( psEncCtrl->Gains_Q16[ k ] >= 0 );
-            if ( silk_SMULWW( silk_RSHIFT_ROUND( psEncCtrl->Gains_Q16[ k ], 1 ), gain_mult_Q16 ) >= ( silk_int32_MAX >> 1 ) ) {
-               psEncCtrl->Gains_Q16[ k ] = silk_int32_MAX;
-            } else {
-               psEncCtrl->Gains_Q16[ k ] = silk_SMULWW( psEncCtrl->Gains_Q16[ k ], gain_mult_Q16 );
-            }
-        }
-
-        /* Bandwidth expansion for synthesis filter shaping */
-        silk_bwexpander_32( AR2_Q24, psEnc->sCmn.shapingLPCOrder, BWExp2_Q16 );
-
-        /* Compute noise shaping filter coefficients */
-        silk_memcpy( AR1_Q24, AR2_Q24, psEnc->sCmn.shapingLPCOrder * sizeof( opus_int32 ) );
-
-        /* Bandwidth expansion for analysis filter shaping */
-        silk_assert( BWExp1_Q16 <= SILK_FIX_CONST( 1.0, 16 ) );
-        silk_bwexpander_32( AR1_Q24, psEnc->sCmn.shapingLPCOrder, BWExp1_Q16 );
-
-        /* Ratio of prediction gains, in energy domain */
-        pre_nrg_Q30 = silk_LPC_inverse_pred_gain_Q24( AR2_Q24, psEnc->sCmn.shapingLPCOrder, arch );
-        nrg         = silk_LPC_inverse_pred_gain_Q24( AR1_Q24, psEnc->sCmn.shapingLPCOrder, arch );
-
-        /*psEncCtrl->GainsPre[ k ] = 1.0f - 0.7f * ( 1.0f - pre_nrg / nrg ) = 0.3f + 0.7f * pre_nrg / nrg;*/
-        pre_nrg_Q30 = silk_LSHIFT32( silk_SMULWB( pre_nrg_Q30, SILK_FIX_CONST( 0.7, 15 ) ), 1 );
-        psEncCtrl->GainsPre_Q14[ k ] = ( opus_int ) SILK_FIX_CONST( 0.3, 14 ) + silk_DIV32_varQ( pre_nrg_Q30, nrg, 14 );
-
-        /* Convert to monic warped prediction coefficients and limit absolute values */
-        limit_warped_coefs( AR2_Q24, AR1_Q24, warping_Q16, SILK_FIX_CONST( 3.999, 24 ), psEnc->sCmn.shapingLPCOrder );
-
-        /* Convert from Q24 to Q13 and store in int16 */
-        for( i = 0; i < psEnc->sCmn.shapingLPCOrder; i++ ) {
-            psEncCtrl->AR1_Q13[ k * MAX_SHAPE_LPC_ORDER + i ] = (opus_int16)silk_SAT16( silk_RSHIFT_ROUND( AR1_Q24[ i ], 11 ) );
-            psEncCtrl->AR2_Q13[ k * MAX_SHAPE_LPC_ORDER + i ] = (opus_int16)silk_SAT16( silk_RSHIFT_ROUND( AR2_Q24[ i ], 11 ) );
-        }
-    }
-
-    /*****************/
-    /* Gain tweaking */
-    /*****************/
-    /* Increase gains during low speech activity and put lower limit on gains */
-    gain_mult_Q16 = silk_log2lin( -silk_SMLAWB( -SILK_FIX_CONST( 16.0, 7 ), SNR_adj_dB_Q7, SILK_FIX_CONST( 0.16, 16 ) ) );
-    gain_add_Q16  = silk_log2lin(  silk_SMLAWB(  SILK_FIX_CONST( 16.0, 7 ), SILK_FIX_CONST( MIN_QGAIN_DB, 7 ), SILK_FIX_CONST( 0.16, 16 ) ) );
-    silk_assert( gain_mult_Q16 > 0 );
-    for( k = 0; k < psEnc->sCmn.nb_subfr; k++ ) {
-        psEncCtrl->Gains_Q16[ k ] = silk_SMULWW( psEncCtrl->Gains_Q16[ k ], gain_mult_Q16 );
-        silk_assert( psEncCtrl->Gains_Q16[ k ] >= 0 );
-        psEncCtrl->Gains_Q16[ k ] = silk_ADD_POS_SAT32( psEncCtrl->Gains_Q16[ k ], gain_add_Q16 );
-    }
-
-    gain_mult_Q16 = SILK_FIX_CONST( 1.0, 16 ) + silk_RSHIFT_ROUND( silk_MLA( SILK_FIX_CONST( INPUT_TILT, 26 ),
-        psEncCtrl->coding_quality_Q14, SILK_FIX_CONST( HIGH_RATE_INPUT_TILT, 12 ) ), 10 );
-    for( k = 0; k < psEnc->sCmn.nb_subfr; k++ ) {
-        psEncCtrl->GainsPre_Q14[ k ] = silk_SMULWB( gain_mult_Q16, psEncCtrl->GainsPre_Q14[ k ] );
-    }
-
-    /************************************************/
-    /* Control low-frequency shaping and noise tilt */
-    /************************************************/
-    /* Less low frequency shaping for noisy inputs */
-    strength_Q16 = silk_MUL( SILK_FIX_CONST( LOW_FREQ_SHAPING, 4 ), silk_SMLAWB( SILK_FIX_CONST( 1.0, 12 ),
-        SILK_FIX_CONST( LOW_QUALITY_LOW_FREQ_SHAPING_DECR, 13 ), psEnc->sCmn.input_quality_bands_Q15[ 0 ] - SILK_FIX_CONST( 1.0, 15 ) ) );
-    strength_Q16 = silk_RSHIFT( silk_MUL( strength_Q16, psEnc->sCmn.speech_activity_Q8 ), 8 );
-    if( psEnc->sCmn.indices.signalType == TYPE_VOICED ) {
-        /* Reduce low frequencies quantization noise for periodic signals, depending on pitch lag */
-        /*f = 400; freqz([1, -0.98 + 2e-4 * f], [1, -0.97 + 7e-4 * f], 2^12, Fs); axis([0, 1000, -10, 1])*/
-        opus_int fs_kHz_inv = silk_DIV32_16( SILK_FIX_CONST( 0.2, 14 ), psEnc->sCmn.fs_kHz );
-        for( k = 0; k < psEnc->sCmn.nb_subfr; k++ ) {
-            b_Q14 = fs_kHz_inv + silk_DIV32_16( SILK_FIX_CONST( 3.0, 14 ), psEncCtrl->pitchL[ k ] );
-            /* Pack two coefficients in one int32 */
-            psEncCtrl->LF_shp_Q14[ k ]  = silk_LSHIFT( SILK_FIX_CONST( 1.0, 14 ) - b_Q14 - silk_SMULWB( strength_Q16, b_Q14 ), 16 );
-            psEncCtrl->LF_shp_Q14[ k ] |= (opus_uint16)( b_Q14 - SILK_FIX_CONST( 1.0, 14 ) );
-        }
-        silk_assert( SILK_FIX_CONST( HARM_HP_NOISE_COEF, 24 ) < SILK_FIX_CONST( 0.5, 24 ) ); /* Guarantees that second argument to SMULWB() is within range of an opus_int16*/
-        Tilt_Q16 = - SILK_FIX_CONST( HP_NOISE_COEF, 16 ) -
-            silk_SMULWB( SILK_FIX_CONST( 1.0, 16 ) - SILK_FIX_CONST( HP_NOISE_COEF, 16 ),
-                silk_SMULWB( SILK_FIX_CONST( HARM_HP_NOISE_COEF, 24 ), psEnc->sCmn.speech_activity_Q8 ) );
-    } else {
-        b_Q14 = silk_DIV32_16( 21299, psEnc->sCmn.fs_kHz ); /* 1.3_Q0 = 21299_Q14*/
-        /* Pack two coefficients in one int32 */
-        psEncCtrl->LF_shp_Q14[ 0 ]  = silk_LSHIFT( SILK_FIX_CONST( 1.0, 14 ) - b_Q14 -
-            silk_SMULWB( strength_Q16, silk_SMULWB( SILK_FIX_CONST( 0.6, 16 ), b_Q14 ) ), 16 );
-        psEncCtrl->LF_shp_Q14[ 0 ] |= (opus_uint16)( b_Q14 - SILK_FIX_CONST( 1.0, 14 ) );
-        for( k = 1; k < psEnc->sCmn.nb_subfr; k++ ) {
-            psEncCtrl->LF_shp_Q14[ k ] = psEncCtrl->LF_shp_Q14[ 0 ];
-        }
-        Tilt_Q16 = -SILK_FIX_CONST( HP_NOISE_COEF, 16 );
-    }
-
-    /****************************/
-    /* HARMONIC SHAPING CONTROL */
-    /****************************/
-    /* Control boosting of harmonic frequencies */
-    HarmBoost_Q16 = silk_SMULWB( silk_SMULWB( SILK_FIX_CONST( 1.0, 17 ) - silk_LSHIFT( psEncCtrl->coding_quality_Q14, 3 ),
-        psEnc->LTPCorr_Q15 ), SILK_FIX_CONST( LOW_RATE_HARMONIC_BOOST, 16 ) );
-
-    /* More harmonic boost for noisy input signals */
-    HarmBoost_Q16 = silk_SMLAWB( HarmBoost_Q16,
-        SILK_FIX_CONST( 1.0, 16 ) - silk_LSHIFT( psEncCtrl->input_quality_Q14, 2 ), SILK_FIX_CONST( LOW_INPUT_QUALITY_HARMONIC_BOOST, 16 ) );
-
-    if( USE_HARM_SHAPING && psEnc->sCmn.indices.signalType == TYPE_VOICED ) {
-        /* More harmonic noise shaping for high bitrates or noisy input */
-        HarmShapeGain_Q16 = silk_SMLAWB( SILK_FIX_CONST( HARMONIC_SHAPING, 16 ),
-                SILK_FIX_CONST( 1.0, 16 ) - silk_SMULWB( SILK_FIX_CONST( 1.0, 18 ) - silk_LSHIFT( psEncCtrl->coding_quality_Q14, 4 ),
-                psEncCtrl->input_quality_Q14 ), SILK_FIX_CONST( HIGH_RATE_OR_LOW_QUALITY_HARMONIC_SHAPING, 16 ) );
-
-        /* Less harmonic noise shaping for less periodic signals */
-        HarmShapeGain_Q16 = silk_SMULWB( silk_LSHIFT( HarmShapeGain_Q16, 1 ),
-            silk_SQRT_APPROX( silk_LSHIFT( psEnc->LTPCorr_Q15, 15 ) ) );
-    } else {
-        HarmShapeGain_Q16 = 0;
-    }
-
-    /*************************/
-    /* Smooth over subframes */
-    /*************************/
-    for( k = 0; k < MAX_NB_SUBFR; k++ ) {
-        psShapeSt->HarmBoost_smth_Q16 =
-            silk_SMLAWB( psShapeSt->HarmBoost_smth_Q16,     HarmBoost_Q16     - psShapeSt->HarmBoost_smth_Q16,     SILK_FIX_CONST( SUBFR_SMTH_COEF, 16 ) );
-        psShapeSt->HarmShapeGain_smth_Q16 =
-            silk_SMLAWB( psShapeSt->HarmShapeGain_smth_Q16, HarmShapeGain_Q16 - psShapeSt->HarmShapeGain_smth_Q16, SILK_FIX_CONST( SUBFR_SMTH_COEF, 16 ) );
-        psShapeSt->Tilt_smth_Q16 =
-            silk_SMLAWB( psShapeSt->Tilt_smth_Q16,          Tilt_Q16          - psShapeSt->Tilt_smth_Q16,          SILK_FIX_CONST( SUBFR_SMTH_COEF, 16 ) );
-
-        psEncCtrl->HarmBoost_Q14[ k ]     = ( opus_int )silk_RSHIFT_ROUND( psShapeSt->HarmBoost_smth_Q16,     2 );
-        psEncCtrl->HarmShapeGain_Q14[ k ] = ( opus_int )silk_RSHIFT_ROUND( psShapeSt->HarmShapeGain_smth_Q16, 2 );
-        psEncCtrl->Tilt_Q14[ k ]          = ( opus_int )silk_RSHIFT_ROUND( psShapeSt->Tilt_smth_Q16,          2 );
-    }
-    RESTORE_STACK;
-}
diff --git a/silk/fixed/mips/prefilter_FIX_mipsr1.h b/silk/fixed/mips/prefilter_FIX_mipsr1.h
deleted file mode 100644
index e50df1685..000000000
--- a/silk/fixed/mips/prefilter_FIX_mipsr1.h
+++ /dev/null
@@ -1,184 +0,0 @@
-/***********************************************************************
-Copyright (c) 2006-2011, Skype Limited. All rights reserved.
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions
-are met:
-- Redistributions of source code must retain the above copyright notice,
-this list of conditions and the following disclaimer.
-- Redistributions in binary form must reproduce the above copyright
-notice, this list of conditions and the following disclaimer in the
-documentation and/or other materials provided with the distribution.
-- Neither the name of Internet Society, IETF or IETF Trust, nor the
-names of specific contributors, may be used to endorse or promote
-products derived from this software without specific prior written
-permission.
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
-LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-POSSIBILITY OF SUCH DAMAGE.
-***********************************************************************/
-#ifndef __PREFILTER_FIX_MIPSR1_H__
-#define __PREFILTER_FIX_MIPSR1_H__
-
-#ifdef HAVE_CONFIG_H
-#include "config.h"
-#endif
-
-#include "main_FIX.h"
-#include "stack_alloc.h"
-#include "tuning_parameters.h"
-
-#define OVERRIDE_silk_warped_LPC_analysis_filter_FIX
-void silk_warped_LPC_analysis_filter_FIX(
-          opus_int32            state[],                    /* I/O  State [order + 1]                   */
-          opus_int32            res_Q2[],                   /* O    Residual signal [length]            */
-    const opus_int16            coef_Q13[],                 /* I    Coefficients [order]                */
-    const opus_int16            input[],                    /* I    Input signal [length]               */
-    const opus_int16            lambda_Q16,                 /* I    Warping factor                      */
-    const opus_int              length,                     /* I    Length of input signal              */
-    const opus_int              order,                      /* I    Filter order (even)                 */
-               int              arch
-)
-{
-    opus_int     n, i;
-    opus_int32   acc_Q11, acc_Q22, tmp1, tmp2, tmp3, tmp4;
-    opus_int32   state_cur, state_next;
-
-    (void)arch;
-
-    /* Order must be even */
-    /* Length must be even */
-
-    silk_assert( ( order & 1 ) == 0 );
-    silk_assert( ( length & 1 ) == 0 );
-
-    for( n = 0; n < length; n+=2 ) {
-        /* Output of lowpass section */
-        tmp2 = silk_SMLAWB( state[ 0 ], state[ 1 ], lambda_Q16 );
-        state_cur = silk_LSHIFT( input[ n ], 14 );
-        /* Output of allpass section */
-        tmp1 = silk_SMLAWB( state[ 1 ], state[ 2 ] - tmp2, lambda_Q16 );
-        state_next = tmp2;
-        acc_Q11 = silk_RSHIFT( order, 1 );
-        acc_Q11 = silk_SMLAWB( acc_Q11, tmp2, coef_Q13[ 0 ] );
-
-
-        /* Output of lowpass section */
-        tmp4 = silk_SMLAWB( state_cur, state_next, lambda_Q16 );
-        state[ 0 ] = silk_LSHIFT( input[ n+1 ], 14 );
-        /* Output of allpass section */
-        tmp3 = silk_SMLAWB( state_next, tmp1 - tmp4, lambda_Q16 );
-        state[ 1 ] = tmp4;
-        acc_Q22 = silk_RSHIFT( order, 1 );
-        acc_Q22 = silk_SMLAWB( acc_Q22, tmp4, coef_Q13[ 0 ] );
-
-        /* Loop over allpass sections */
-        for( i = 2; i < order; i += 2 ) {
-            /* Output of allpass section */
-            tmp2 = silk_SMLAWB( state[ i ], state[ i + 1 ] - tmp1, lambda_Q16 );
-            state_cur = tmp1;
-            acc_Q11 = silk_SMLAWB( acc_Q11, tmp1, coef_Q13[ i - 1 ] );
-            /* Output of allpass section */
-            tmp1 = silk_SMLAWB( state[ i + 1 ], state[ i + 2 ] - tmp2, lambda_Q16 );
-            state_next = tmp2;
-            acc_Q11 = silk_SMLAWB( acc_Q11, tmp2, coef_Q13[ i ] );
-
-
-            /* Output of allpass section */
-            tmp4 = silk_SMLAWB( state_cur, state_next - tmp3, lambda_Q16 );
-            state[ i ] = tmp3;
-            acc_Q22 = silk_SMLAWB( acc_Q22, tmp3, coef_Q13[ i - 1 ] );
-            /* Output of allpass section */
-            tmp3 = silk_SMLAWB( state_next, tmp1 - tmp4, lambda_Q16 );
-            state[ i + 1 ] = tmp4;
-            acc_Q22 = silk_SMLAWB( acc_Q22, tmp4, coef_Q13[ i ] );
-        }
-        acc_Q11 = silk_SMLAWB( acc_Q11, tmp1, coef_Q13[ order - 1 ] );
-        res_Q2[ n ] = silk_LSHIFT( (opus_int32)input[ n ], 2 ) - silk_RSHIFT_ROUND( acc_Q11, 9 );
-
-        state[ order ] = tmp3;
-        acc_Q22 = silk_SMLAWB( acc_Q22, tmp3, coef_Q13[ order - 1 ] );
-        res_Q2[ n+1 ] = silk_LSHIFT( (opus_int32)input[ n+1 ], 2 ) - silk_RSHIFT_ROUND( acc_Q22, 9 );
-    }
-}
-
-
-
-/* Prefilter for finding Quantizer input signal */
-#define OVERRIDE_silk_prefilt_FIX
-static inline void silk_prefilt_FIX(
-    silk_prefilter_state_FIX    *P,                         /* I/O  state                               */
-    opus_int32                  st_res_Q12[],               /* I    short term residual signal          */
-    opus_int32                  xw_Q3[],                    /* O    prefiltered signal                  */
-    opus_int32                  HarmShapeFIRPacked_Q12,     /* I    Harmonic shaping coefficients       */
-    opus_int                    Tilt_Q14,                   /* I    Tilt shaping coefficients           */
-    opus_int32                  LF_shp_Q14,                 /* I    Low-frequancy shaping coefficients  */
-    opus_int                    lag,                        /* I    Lag for harmonic shaping            */
-    opus_int                    length                      /* I    Length of signals                   */
-)
-{
-    opus_int   i, idx, LTP_shp_buf_idx;
-    opus_int32 n_LTP_Q12, n_Tilt_Q10, n_LF_Q10;
-    opus_int32 sLF_MA_shp_Q12, sLF_AR_shp_Q12;
-    opus_int16 *LTP_shp_buf;
-
-    /* To speed up use temp variables instead of using the struct */
-    LTP_shp_buf     = P->sLTP_shp;
-    LTP_shp_buf_idx = P->sLTP_shp_buf_idx;
-    sLF_AR_shp_Q12  = P->sLF_AR_shp_Q12;
-    sLF_MA_shp_Q12  = P->sLF_MA_shp_Q12;
-
-    if( lag > 0 ) {
-        for( i = 0; i < length; i++ ) {
-            /* unrolled loop */
-            silk_assert( HARM_SHAPE_FIR_TAPS == 3 );
-            idx = lag + LTP_shp_buf_idx;
-            n_LTP_Q12 = silk_SMULBB(            LTP_shp_buf[ ( idx - HARM_SHAPE_FIR_TAPS / 2 - 1) & LTP_MASK ], HarmShapeFIRPacked_Q12 );
-            n_LTP_Q12 = silk_SMLABT( n_LTP_Q12, LTP_shp_buf[ ( idx - HARM_SHAPE_FIR_TAPS / 2    ) & LTP_MASK ], HarmShapeFIRPacked_Q12 );
-            n_LTP_Q12 = silk_SMLABB( n_LTP_Q12, LTP_shp_buf[ ( idx - HARM_SHAPE_FIR_TAPS / 2 + 1) & LTP_MASK ], HarmShapeFIRPacked_Q12 );
-
-            n_Tilt_Q10 = silk_SMULWB( sLF_AR_shp_Q12, Tilt_Q14 );
-            n_LF_Q10   = silk_SMLAWB( silk_SMULWT( sLF_AR_shp_Q12, LF_shp_Q14 ), sLF_MA_shp_Q12, LF_shp_Q14 );
-
-            sLF_AR_shp_Q12 = silk_SUB32( st_res_Q12[ i ], silk_LSHIFT( n_Tilt_Q10, 2 ) );
-            sLF_MA_shp_Q12 = silk_SUB32( sLF_AR_shp_Q12,  silk_LSHIFT( n_LF_Q10,   2 ) );
-
-            LTP_shp_buf_idx = ( LTP_shp_buf_idx - 1 ) & LTP_MASK;
-            LTP_shp_buf[ LTP_shp_buf_idx ] = (opus_int16)silk_SAT16( silk_RSHIFT_ROUND( sLF_MA_shp_Q12, 12 ) );
-
-            xw_Q3[i] = silk_RSHIFT_ROUND( silk_SUB32( sLF_MA_shp_Q12, n_LTP_Q12 ), 9 );
-        }
-    }
-    else
-    {
-        for( i = 0; i < length; i++ ) {
-
-            n_LTP_Q12 = 0;
-
-            n_Tilt_Q10 = silk_SMULWB( sLF_AR_shp_Q12, Tilt_Q14 );
-            n_LF_Q10   = silk_SMLAWB( silk_SMULWT( sLF_AR_shp_Q12, LF_shp_Q14 ), sLF_MA_shp_Q12, LF_shp_Q14 );
-
-            sLF_AR_shp_Q12 = silk_SUB32( st_res_Q12[ i ], silk_LSHIFT( n_Tilt_Q10, 2 ) );
-            sLF_MA_shp_Q12 = silk_SUB32( sLF_AR_shp_Q12,  silk_LSHIFT( n_LF_Q10,   2 ) );
-
-            LTP_shp_buf_idx = ( LTP_shp_buf_idx - 1 ) & LTP_MASK;
-            LTP_shp_buf[ LTP_shp_buf_idx ] = (opus_int16)silk_SAT16( silk_RSHIFT_ROUND( sLF_MA_shp_Q12, 12 ) );
-
-            xw_Q3[i] = silk_RSHIFT_ROUND( sLF_MA_shp_Q12, 9 );
-        }
-    }
-
-    /* Copy temp variable back to state */
-    P->sLF_AR_shp_Q12   = sLF_AR_shp_Q12;
-    P->sLF_MA_shp_Q12   = sLF_MA_shp_Q12;
-    P->sLTP_shp_buf_idx = LTP_shp_buf_idx;
-}
-
-#endif /* __PREFILTER_FIX_MIPSR1_H__ */
diff --git a/silk/fixed/mips/warped_autocorrelation_FIX_mipsr1.h b/silk/fixed/mips/warped_autocorrelation_FIX_mipsr1.h
index 66eb2ed26..9d9cb0551 100644
--- a/silk/fixed/mips/warped_autocorrelation_FIX_mipsr1.h
+++ b/silk/fixed/mips/warped_autocorrelation_FIX_mipsr1.h
@@ -34,14 +34,36 @@ POSSIBILITY OF SUCH DAMAGE.
 
 #include "main_FIX.h"
 
+#if defined (__mips_dsp) && __mips == 32
+
+#define MIPS_MADD __builtin_mips_madd
+#define MIPS_SHILO __builtin_mips_shilo
+
+#define OVERRIDE_silk_warped_autocorrelation_FIX_c
+
+#elif defined (__mips_isa_rev) && __mips == 32
+
+static inline long long MIPS_MADD(long long acc, int a, int b) {
+    return acc + (long long)a * b;
+}
+
+static inline long long MIPS_SHILO(long long acc, int sh) {
+    return (sh >= 0) ? (acc >> sh) : (acc << -sh);
+}
+
+#define OVERRIDE_silk_warped_autocorrelation_FIX_c
+
+#endif
+
+/* Autocorrelations for a warped frequency axis */
+#if defined (OVERRIDE_silk_warped_autocorrelation_FIX_c)
+
 #undef QC
 #define QC  10
 
 #undef QS
 #define QS  14
 
-/* Autocorrelations for a warped frequency axis */
-#define OVERRIDE_silk_warped_autocorrelation_FIX_c
 void silk_warped_autocorrelation_FIX_c(
           opus_int32                *corr,                                  /* O    Result [order + 1]                                                          */
           opus_int                  *scale,                                 /* O    Scaling of the correlation vector                                           */
@@ -79,39 +101,39 @@ void silk_warped_autocorrelation_FIX_c(
         for( i = 0; i < order; i += 2 ) {
             /* Output of allpass section */
             tmp2_QS = silk_SMLAWB( state_QS[ i ], state_QS[ i + 1 ] - tmp1_QS, warping_Q16 );
-            corr_QC[  i ] = __builtin_mips_madd( corr_QC[  i ], tmp1_QS,  start_1);
+            corr_QC[  i ] = MIPS_MADD( corr_QC[  i ], tmp1_QS,  start_1);
 
             tmp4_QS = silk_SMLAWB( tmp1_QS, tmp2_QS - tmp3_QS, warping_Q16 );
-            corr_QC[  i ] = __builtin_mips_madd( corr_QC[  i ], tmp3_QS,  start_2);
+            corr_QC[  i ] = MIPS_MADD( corr_QC[  i ], tmp3_QS,  start_2);
 
             tmp6_QS = silk_SMLAWB( tmp3_QS, tmp4_QS - tmp5_QS, warping_Q16 );
-            corr_QC[  i ] = __builtin_mips_madd( corr_QC[  i ], tmp5_QS,  start_3);
+            corr_QC[  i ] = MIPS_MADD( corr_QC[  i ], tmp5_QS,  start_3);
 
             tmp8_QS = silk_SMLAWB( tmp5_QS, tmp6_QS - tmp7_QS, warping_Q16 );
             state_QS[ i ]  = tmp7_QS;
-            corr_QC[  i ] = __builtin_mips_madd( corr_QC[  i ], tmp7_QS, state_QS[0]);
+            corr_QC[  i ] = MIPS_MADD( corr_QC[  i ], tmp7_QS, state_QS[0]);
 
             /* Output of allpass section */
             tmp1_QS = silk_SMLAWB( state_QS[ i + 1 ], state_QS[ i + 2 ] - tmp2_QS, warping_Q16 );
-            corr_QC[  i+1 ] = __builtin_mips_madd( corr_QC[  i+1 ], tmp2_QS,  start_1);
+            corr_QC[  i+1 ] = MIPS_MADD( corr_QC[  i+1 ], tmp2_QS,  start_1);
 
             tmp3_QS = silk_SMLAWB( tmp2_QS, tmp1_QS - tmp4_QS, warping_Q16 );
-            corr_QC[  i+1 ] = __builtin_mips_madd( corr_QC[  i+1 ], tmp4_QS,  start_2);
+            corr_QC[  i+1 ] = MIPS_MADD( corr_QC[  i+1 ], tmp4_QS,  start_2);
 
             tmp5_QS = silk_SMLAWB( tmp4_QS, tmp3_QS - tmp6_QS, warping_Q16 );
-            corr_QC[  i+1 ] = __builtin_mips_madd( corr_QC[  i+1 ], tmp6_QS,  start_3);
+            corr_QC[  i+1 ] = MIPS_MADD( corr_QC[  i+1 ], tmp6_QS,  start_3);
 
             tmp7_QS = silk_SMLAWB( tmp6_QS, tmp5_QS - tmp8_QS, warping_Q16 );
             state_QS[ i + 1 ]  = tmp8_QS;
-            corr_QC[  i+1 ] = __builtin_mips_madd( corr_QC[  i+1 ], tmp8_QS,  state_QS[ 0 ]);
+            corr_QC[  i+1 ] = MIPS_MADD( corr_QC[  i+1 ], tmp8_QS,  state_QS[ 0 ]);
 
         }
         state_QS[ order ] = tmp7_QS;
 
-        corr_QC[  order ] = __builtin_mips_madd( corr_QC[  order ], tmp1_QS,  start_1);
-        corr_QC[  order ] = __builtin_mips_madd( corr_QC[  order ], tmp3_QS,  start_2);
-        corr_QC[  order ] = __builtin_mips_madd( corr_QC[  order ], tmp5_QS,  start_3);
-        corr_QC[  order ] = __builtin_mips_madd( corr_QC[  order ], tmp7_QS,  state_QS[ 0 ]);
+        corr_QC[  order ] = MIPS_MADD( corr_QC[  order ], tmp1_QS,  start_1);
+        corr_QC[  order ] = MIPS_MADD( corr_QC[  order ], tmp3_QS,  start_2);
+        corr_QC[  order ] = MIPS_MADD( corr_QC[  order ], tmp5_QS,  start_3);
+        corr_QC[  order ] = MIPS_MADD( corr_QC[  order ], tmp7_QS,  state_QS[ 0 ]);
     }
 
     for(;n< length; n++ ) {
@@ -124,19 +146,19 @@ void silk_warped_autocorrelation_FIX_c(
             /* Output of allpass section */
             tmp2_QS = silk_SMLAWB( state_QS[ i ], state_QS[ i + 1 ] - tmp1_QS, warping_Q16 );
             state_QS[ i ] = tmp1_QS;
-            corr_QC[  i ] = __builtin_mips_madd( corr_QC[  i ], tmp1_QS,   state_QS[ 0 ]);
+            corr_QC[  i ] = MIPS_MADD( corr_QC[  i ], tmp1_QS,   state_QS[ 0 ]);
 
             /* Output of allpass section */
             tmp1_QS = silk_SMLAWB( state_QS[ i + 1 ], state_QS[ i + 2 ] - tmp2_QS, warping_Q16 );
             state_QS[ i + 1 ]  = tmp2_QS;
-            corr_QC[  i+1 ] = __builtin_mips_madd( corr_QC[  i+1 ], tmp2_QS,   state_QS[ 0 ]);
+            corr_QC[  i+1 ] = MIPS_MADD( corr_QC[  i+1 ], tmp2_QS,   state_QS[ 0 ]);
         }
         state_QS[ order ] = tmp1_QS;
-        corr_QC[  order ] = __builtin_mips_madd( corr_QC[  order ], tmp1_QS,   state_QS[ 0 ]);
+        corr_QC[  order ] = MIPS_MADD( corr_QC[  order ], tmp1_QS,   state_QS[ 0 ]);
     }
 
     temp64 =  corr_QC[ 0 ];
-    temp64 = __builtin_mips_shilo(temp64, val);
+    temp64 = MIPS_SHILO(temp64, val);
 
     lsh = silk_CLZ64( temp64 ) - 35;
     lsh = silk_LIMIT( lsh, -12 - QC, 30 - QC );
@@ -145,21 +167,23 @@ void silk_warped_autocorrelation_FIX_c(
     if( lsh >= 0 ) {
         for( i = 0; i < order + 1; i++ ) {
             temp64 = corr_QC[ i ];
-            //temp64 = __builtin_mips_shilo(temp64, val);
+            //temp64 = MIPS_SHILO(temp64, val);
             temp64 = (val >= 0) ? (temp64 >> val) : (temp64 << -val);
-            corr[ i ] = (opus_int32)silk_CHECK_FIT32( __builtin_mips_shilo( temp64, -lsh ) );
+            corr[ i ] = (opus_int32)silk_CHECK_FIT32( MIPS_SHILO( temp64, -lsh ) );
         }
     } else {
         for( i = 0; i < order + 1; i++ ) {
             temp64 = corr_QC[ i ];
-            //temp64 = __builtin_mips_shilo(temp64, val);
+            //temp64 = MIPS_SHILO(temp64, val);
             temp64 = (val >= 0) ? (temp64 >> val) : (temp64 << -val);
-            corr[ i ] = (opus_int32)silk_CHECK_FIT32( __builtin_mips_shilo( temp64, -lsh ) );
+            corr[ i ] = (opus_int32)silk_CHECK_FIT32( MIPS_SHILO( temp64, -lsh ) );
         }
     }
 
-     corr_QC[ 0 ] = __builtin_mips_shilo(corr_QC[ 0 ], val);
+     corr_QC[ 0 ] = MIPS_SHILO(corr_QC[ 0 ], val);
 
      silk_assert( corr_QC[ 0 ] >= 0 ); /* If breaking, decrease QC*/
 }
+#endif /* OVERRIDE_silk_warped_autocorrelation_FIX_c */
+
 #endif /* __WARPED_AUTOCORRELATION_FIX_MIPSR1_H__ */
diff --git a/silk/fixed/noise_shape_analysis_FIX.c b/silk/fixed/noise_shape_analysis_FIX.c
index d90d7ca58..49ea59e99 100644
--- a/silk/fixed/noise_shape_analysis_FIX.c
+++ b/silk/fixed/noise_shape_analysis_FIX.c
@@ -128,15 +128,9 @@ static OPUS_INLINE void limit_warped_coefs(
     silk_assert( 0 );
 }
 
-/* Disable MIPS DSP version until it's updated. */
-#if 0 && defined(__mips_dsp) && __mips == 32
-#include "mips/noise_shape_analysis_FIX_mipsr1.h"
-#endif
-
 /**************************************************************/
 /* Compute noise shaping coefficients and initial gain values */
 /**************************************************************/
-#ifndef OVERRIDE_silk_noise_shape_analysis_FIX
 void silk_noise_shape_analysis_FIX(
     silk_encoder_state_FIX          *psEnc,                                 /* I/O  Encoder state FIX                                                           */
     silk_encoder_control_FIX        *psEncCtrl,                             /* I/O  Encoder control FIX                                                         */
@@ -404,4 +398,3 @@ void silk_noise_shape_analysis_FIX(
     }
     RESTORE_STACK;
 }
-#endif /* OVERRIDE_silk_noise_shape_analysis_FIX */
diff --git a/silk/fixed/warped_autocorrelation_FIX.c b/silk/fixed/warped_autocorrelation_FIX.c
index e185df59c..00bd9ff64 100644
--- a/silk/fixed/warped_autocorrelation_FIX.c
+++ b/silk/fixed/warped_autocorrelation_FIX.c
@@ -31,7 +31,7 @@ POSSIBILITY OF SUCH DAMAGE.
 
 #include "main_FIX.h"
 
-#if defined(__mips_dsp) && __mips == 32
+#if defined(__mips)
 #include "mips/warped_autocorrelation_FIX_mipsr1.h"
 #endif
 
diff --git a/silk/macros.h b/silk/macros.h
index 099f84e51..26cfeb61a 100644
--- a/silk/macros.h
+++ b/silk/macros.h
@@ -104,7 +104,7 @@ POSSIBILITY OF SUCH DAMAGE.
                                         (( (a) & ((b)^0x80000000) & 0x80000000) ? silk_int32_MIN : (a)-(b)) :    \
                                         ((((a)^0x80000000) & (b)  & 0x80000000) ? silk_int32_MAX : (a)-(b)) )
 
-#if defined(FIXED_POINT) && defined(__mips_dsp) && __mips == 32
+#if defined(FIXED_POINT) && defined(__mips)
 #include "mips/macros_mipsr1.h"
 #endif
 
diff --git a/silk/mips/NSQ_del_dec_mipsr1.h b/silk/mips/NSQ_del_dec_mipsr1.h
deleted file mode 100644
index 26e0df672..000000000
--- a/silk/mips/NSQ_del_dec_mipsr1.h
+++ /dev/null
@@ -1,410 +0,0 @@
-/***********************************************************************
-Copyright (c) 2006-2011, Skype Limited. All rights reserved.
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions
-are met:
-- Redistributions of source code must retain the above copyright notice,
-this list of conditions and the following disclaimer.
-- Redistributions in binary form must reproduce the above copyright
-notice, this list of conditions and the following disclaimer in the
-documentation and/or other materials provided with the distribution.
-- Neither the name of Internet Society, IETF or IETF Trust, nor the
-names of specific contributors, may be used to endorse or promote
-products derived from this software without specific prior written
-permission.
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
-LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-POSSIBILITY OF SUCH DAMAGE.
-***********************************************************************/
-
-#ifndef NSQ_DEL_DEC_MIPSR1_H__
-#define NSQ_DEL_DEC_MIPSR1_H__
-
-#ifdef HAVE_CONFIG_H
-#include "config.h"
-#endif
-
-#include "main.h"
-#include "stack_alloc.h"
-
-#define OVERRIDE_silk_noise_shape_quantizer_del_dec
-static inline void silk_noise_shape_quantizer_del_dec(
-    silk_nsq_state      *NSQ,                   /* I/O  NSQ state                           */
-    NSQ_del_dec_struct  psDelDec[],             /* I/O  Delayed decision states             */
-    opus_int            signalType,             /* I    Signal type                         */
-    const opus_int32    x_Q10[],                /* I                                        */
-    opus_int8           pulses[],               /* O                                        */
-    opus_int16          xq[],                   /* O                                        */
-    opus_int32          sLTP_Q15[],             /* I/O  LTP filter state                    */
-    opus_int32          delayedGain_Q10[],      /* I/O  Gain delay buffer                   */
-    const opus_int16    a_Q12[],                /* I    Short term prediction coefs         */
-    const opus_int16    b_Q14[],                /* I    Long term prediction coefs          */
-    const opus_int16    AR_shp_Q13[],           /* I    Noise shaping coefs                 */
-    opus_int            lag,                    /* I    Pitch lag                           */
-    opus_int32          HarmShapeFIRPacked_Q14, /* I                                        */
-    opus_int            Tilt_Q14,               /* I    Spectral tilt                       */
-    opus_int32          LF_shp_Q14,             /* I                                        */
-    opus_int32          Gain_Q16,               /* I                                        */
-    opus_int            Lambda_Q10,             /* I                                        */
-    opus_int            offset_Q10,             /* I                                        */
-    opus_int            length,                 /* I    Input length                        */
-    opus_int            subfr,                  /* I    Subframe number                     */
-    opus_int            shapingLPCOrder,        /* I    Shaping LPC filter order            */
-    opus_int            predictLPCOrder,        /* I    Prediction filter order             */
-    opus_int            warping_Q16,            /* I                                        */
-    opus_int            nStatesDelayedDecision, /* I    Number of states in decision tree   */
-    opus_int            *smpl_buf_idx,          /* I/O  Index to newest samples in buffers  */
-    opus_int            decisionDelay,          /* I                                        */
-    int                 arch                    /* I                                        */
-)
-{
-    opus_int     i, j, k, Winner_ind, RDmin_ind, RDmax_ind, last_smple_idx;
-    opus_int32   Winner_rand_state;
-    opus_int32   LTP_pred_Q14, LPC_pred_Q14, n_AR_Q14, n_LTP_Q14;
-    opus_int32   n_LF_Q14, r_Q10, rr_Q10, rd1_Q10, rd2_Q10, RDmin_Q10, RDmax_Q10;
-    opus_int32   q1_Q0, q1_Q10, q2_Q10, exc_Q14, LPC_exc_Q14, xq_Q14, Gain_Q10;
-    opus_int32   tmp1, tmp2, sLF_AR_shp_Q14;
-    opus_int32   *pred_lag_ptr, *shp_lag_ptr, *psLPC_Q14;
-    NSQ_sample_struct  psSampleState[ MAX_DEL_DEC_STATES ][ 2 ];
-    NSQ_del_dec_struct *psDD;
-    NSQ_sample_struct  *psSS;
-    opus_int16 b_Q14_0, b_Q14_1, b_Q14_2, b_Q14_3, b_Q14_4;
-    opus_int16 a_Q12_0, a_Q12_1, a_Q12_2, a_Q12_3, a_Q12_4, a_Q12_5, a_Q12_6;
-    opus_int16 a_Q12_7, a_Q12_8, a_Q12_9, a_Q12_10, a_Q12_11, a_Q12_12, a_Q12_13;
-    opus_int16 a_Q12_14, a_Q12_15;
-
-    opus_int32 cur, prev, next;
-
-    /*Unused.*/
-    (void)arch;
-
-    //Initialize b_Q14 variables
-    b_Q14_0 = b_Q14[ 0 ];
-    b_Q14_1 = b_Q14[ 1 ];
-    b_Q14_2 = b_Q14[ 2 ];
-    b_Q14_3 = b_Q14[ 3 ];
-    b_Q14_4 = b_Q14[ 4 ];
-
-    //Initialize a_Q12 variables
-    a_Q12_0 = a_Q12[0];
-    a_Q12_1 = a_Q12[1];
-    a_Q12_2 = a_Q12[2];
-    a_Q12_3 = a_Q12[3];
-    a_Q12_4 = a_Q12[4];
-    a_Q12_5 = a_Q12[5];
-    a_Q12_6 = a_Q12[6];
-    a_Q12_7 = a_Q12[7];
-    a_Q12_8 = a_Q12[8];
-    a_Q12_9 = a_Q12[9];
-    a_Q12_10 = a_Q12[10];
-    a_Q12_11 = a_Q12[11];
-    a_Q12_12 = a_Q12[12];
-    a_Q12_13 = a_Q12[13];
-    a_Q12_14 = a_Q12[14];
-    a_Q12_15 = a_Q12[15];
-
-    long long temp64;
-
-    silk_assert( nStatesDelayedDecision > 0 );
-
-    shp_lag_ptr  = &NSQ->sLTP_shp_Q14[ NSQ->sLTP_shp_buf_idx - lag + HARM_SHAPE_FIR_TAPS / 2 ];
-    pred_lag_ptr = &sLTP_Q15[ NSQ->sLTP_buf_idx - lag + LTP_ORDER / 2 ];
-    Gain_Q10     = silk_RSHIFT( Gain_Q16, 6 );
-
-    for( i = 0; i < length; i++ ) {
-        /* Perform common calculations used in all states */
-
-        /* Long-term prediction */
-        if( signalType == TYPE_VOICED ) {
-            /* Unrolled loop */
-            /* Avoids introducing a bias because silk_SMLAWB() always rounds to -inf */
-            temp64 = __builtin_mips_mult(pred_lag_ptr[ 0 ], b_Q14_0 );
-            temp64 = __builtin_mips_madd( temp64, pred_lag_ptr[ -1 ], b_Q14_1 );
-            temp64 = __builtin_mips_madd( temp64, pred_lag_ptr[ -2 ], b_Q14_2 );
-            temp64 = __builtin_mips_madd( temp64, pred_lag_ptr[ -3 ], b_Q14_3 );
-            temp64 = __builtin_mips_madd( temp64, pred_lag_ptr[ -4 ], b_Q14_4 );
-            temp64 += 32768;
-            LTP_pred_Q14 = __builtin_mips_extr_w(temp64, 16);
-            LTP_pred_Q14 = silk_LSHIFT( LTP_pred_Q14, 1 );                          /* Q13 -> Q14 */
-            pred_lag_ptr++;
-        } else {
-            LTP_pred_Q14 = 0;
-        }
-
-        /* Long-term shaping */
-        if( lag > 0 ) {
-            /* Symmetric, packed FIR coefficients */
-            n_LTP_Q14 = silk_SMULWB( silk_ADD32( shp_lag_ptr[ 0 ], shp_lag_ptr[ -2 ] ), HarmShapeFIRPacked_Q14 );
-            n_LTP_Q14 = silk_SMLAWT( n_LTP_Q14, shp_lag_ptr[ -1 ],                      HarmShapeFIRPacked_Q14 );
-            n_LTP_Q14 = silk_SUB_LSHIFT32( LTP_pred_Q14, n_LTP_Q14, 2 );            /* Q12 -> Q14 */
-            shp_lag_ptr++;
-        } else {
-            n_LTP_Q14 = 0;
-        }
-
-        for( k = 0; k < nStatesDelayedDecision; k++ ) {
-            /* Delayed decision state */
-            psDD = &psDelDec[ k ];
-
-            /* Sample state */
-            psSS = psSampleState[ k ];
-
-            /* Generate dither */
-            psDD->Seed = silk_RAND( psDD->Seed );
-
-            /* Pointer used in short term prediction and shaping */
-            psLPC_Q14 = &psDD->sLPC_Q14[ NSQ_LPC_BUF_LENGTH - 1 + i ];
-            /* Short-term prediction */
-            silk_assert( predictLPCOrder == 10 || predictLPCOrder == 16 );
-            temp64 = __builtin_mips_mult(psLPC_Q14[  0 ], a_Q12_0 );
-            temp64 = __builtin_mips_madd( temp64, psLPC_Q14[ -1 ], a_Q12_1 );
-            temp64 = __builtin_mips_madd( temp64, psLPC_Q14[ -2 ], a_Q12_2 );
-            temp64 = __builtin_mips_madd( temp64, psLPC_Q14[ -3 ], a_Q12_3 );
-            temp64 = __builtin_mips_madd( temp64, psLPC_Q14[ -4 ], a_Q12_4 );
-            temp64 = __builtin_mips_madd( temp64, psLPC_Q14[ -5 ], a_Q12_5 );
-            temp64 = __builtin_mips_madd( temp64, psLPC_Q14[ -6 ], a_Q12_6 );
-            temp64 = __builtin_mips_madd( temp64, psLPC_Q14[ -7 ], a_Q12_7 );
-            temp64 = __builtin_mips_madd( temp64, psLPC_Q14[ -8 ], a_Q12_8 );
-            temp64 = __builtin_mips_madd( temp64, psLPC_Q14[ -9 ], a_Q12_9 );
-            if( predictLPCOrder == 16 ) {
-                temp64 = __builtin_mips_madd( temp64, psLPC_Q14[ -10 ], a_Q12_10 );
-                temp64 = __builtin_mips_madd( temp64, psLPC_Q14[ -11 ], a_Q12_11 );
-                temp64 = __builtin_mips_madd( temp64, psLPC_Q14[ -12 ], a_Q12_12 );
-                temp64 = __builtin_mips_madd( temp64, psLPC_Q14[ -13 ], a_Q12_13 );
-                temp64 = __builtin_mips_madd( temp64, psLPC_Q14[ -14 ], a_Q12_14 );
-                temp64 = __builtin_mips_madd( temp64, psLPC_Q14[ -15 ], a_Q12_15 );
-            }
-            temp64 += 32768;
-            LPC_pred_Q14 = __builtin_mips_extr_w(temp64, 16);
-
-            LPC_pred_Q14 = silk_LSHIFT( LPC_pred_Q14, 4 );                              /* Q10 -> Q14 */
-
-            /* Noise shape feedback */
-            silk_assert( ( shapingLPCOrder & 1 ) == 0 );   /* check that order is even */
-            /* Output of lowpass section */
-            tmp2 = silk_SMLAWB( psLPC_Q14[ 0 ], psDD->sAR2_Q14[ 0 ], warping_Q16 );
-            /* Output of allpass section */
-            tmp1 = silk_SMLAWB( psDD->sAR2_Q14[ 0 ], psDD->sAR2_Q14[ 1 ] - tmp2, warping_Q16 );
-            psDD->sAR2_Q14[ 0 ] = tmp2;
-
-            temp64 = __builtin_mips_mult(tmp2, AR_shp_Q13[ 0 ] );
-
-            prev = psDD->sAR2_Q14[ 1 ];
-
-            /* Loop over allpass sections */
-            for( j = 2; j < shapingLPCOrder; j += 2 ) {
-                cur = psDD->sAR2_Q14[ j ];
-                next = psDD->sAR2_Q14[ j+1 ];
-                /* Output of allpass section */
-                tmp2 = silk_SMLAWB( prev, cur - tmp1, warping_Q16 );
-                psDD->sAR2_Q14[ j - 1 ] = tmp1;
-                temp64 = __builtin_mips_madd( temp64, tmp1, AR_shp_Q13[ j - 1 ] );
-                temp64 = __builtin_mips_madd( temp64, tmp2, AR_shp_Q13[ j ] );
-                /* Output of allpass section */
-                tmp1 = silk_SMLAWB( cur, next - tmp2, warping_Q16 );
-                psDD->sAR2_Q14[ j + 0 ] = tmp2;
-                prev = next;
-            }
-            psDD->sAR2_Q14[ shapingLPCOrder - 1 ] = tmp1;
-            temp64 = __builtin_mips_madd( temp64, tmp1, AR_shp_Q13[ shapingLPCOrder - 1 ] );
-            temp64 += 32768;
-            n_AR_Q14 = __builtin_mips_extr_w(temp64, 16);
-            n_AR_Q14 = silk_LSHIFT( n_AR_Q14, 1 );                                      /* Q11 -> Q12 */
-            n_AR_Q14 = silk_SMLAWB( n_AR_Q14, psDD->LF_AR_Q14, Tilt_Q14 );              /* Q12 */
-            n_AR_Q14 = silk_LSHIFT( n_AR_Q14, 2 );                                      /* Q12 -> Q14 */
-
-            n_LF_Q14 = silk_SMULWB( psDD->Shape_Q14[ *smpl_buf_idx ], LF_shp_Q14 );     /* Q12 */
-            n_LF_Q14 = silk_SMLAWT( n_LF_Q14, psDD->LF_AR_Q14, LF_shp_Q14 );            /* Q12 */
-            n_LF_Q14 = silk_LSHIFT( n_LF_Q14, 2 );                                      /* Q12 -> Q14 */
-
-            /* Input minus prediction plus noise feedback                       */
-            /* r = x[ i ] - LTP_pred - LPC_pred + n_AR + n_Tilt + n_LF + n_LTP  */
-            tmp1 = silk_ADD32( n_AR_Q14, n_LF_Q14 );                                    /* Q14 */
-            tmp2 = silk_ADD32( n_LTP_Q14, LPC_pred_Q14 );                               /* Q13 */
-            tmp1 = silk_SUB32( tmp2, tmp1 );                                            /* Q13 */
-            tmp1 = silk_RSHIFT_ROUND( tmp1, 4 );                                        /* Q10 */
-
-            r_Q10 = silk_SUB32( x_Q10[ i ], tmp1 );                                     /* residual error Q10 */
-
-            /* Flip sign depending on dither */
-            if ( psDD->Seed < 0 ) {
-                r_Q10 = -r_Q10;
-            }
-            r_Q10 = silk_LIMIT_32( r_Q10, -(31 << 10), 30 << 10 );
-
-            /* Find two quantization level candidates and measure their rate-distortion */
-            q1_Q10 = silk_SUB32( r_Q10, offset_Q10 );
-            q1_Q0 = silk_RSHIFT( q1_Q10, 10 );
-            if( q1_Q0 > 0 ) {
-                q1_Q10  = silk_SUB32( silk_LSHIFT( q1_Q0, 10 ), QUANT_LEVEL_ADJUST_Q10 );
-                q1_Q10  = silk_ADD32( q1_Q10, offset_Q10 );
-                q2_Q10  = silk_ADD32( q1_Q10, 1024 );
-                rd1_Q10 = silk_SMULBB( q1_Q10, Lambda_Q10 );
-                rd2_Q10 = silk_SMULBB( q2_Q10, Lambda_Q10 );
-            } else if( q1_Q0 == 0 ) {
-                q1_Q10  = offset_Q10;
-                q2_Q10  = silk_ADD32( q1_Q10, 1024 - QUANT_LEVEL_ADJUST_Q10 );
-                rd1_Q10 = silk_SMULBB( q1_Q10, Lambda_Q10 );
-                rd2_Q10 = silk_SMULBB( q2_Q10, Lambda_Q10 );
-            } else if( q1_Q0 == -1 ) {
-                q2_Q10  = offset_Q10;
-                q1_Q10  = silk_SUB32( q2_Q10, 1024 - QUANT_LEVEL_ADJUST_Q10 );
-                rd1_Q10 = silk_SMULBB( -q1_Q10, Lambda_Q10 );
-                rd2_Q10 = silk_SMULBB(  q2_Q10, Lambda_Q10 );
-            } else {            /* q1_Q0 < -1 */
-                q1_Q10  = silk_ADD32( silk_LSHIFT( q1_Q0, 10 ), QUANT_LEVEL_ADJUST_Q10 );
-                q1_Q10  = silk_ADD32( q1_Q10, offset_Q10 );
-                q2_Q10  = silk_ADD32( q1_Q10, 1024 );
-                rd1_Q10 = silk_SMULBB( -q1_Q10, Lambda_Q10 );
-                rd2_Q10 = silk_SMULBB( -q2_Q10, Lambda_Q10 );
-            }
-            rr_Q10  = silk_SUB32( r_Q10, q1_Q10 );
-            rd1_Q10 = silk_RSHIFT( silk_SMLABB( rd1_Q10, rr_Q10, rr_Q10 ), 10 );
-            rr_Q10  = silk_SUB32( r_Q10, q2_Q10 );
-            rd2_Q10 = silk_RSHIFT( silk_SMLABB( rd2_Q10, rr_Q10, rr_Q10 ), 10 );
-
-            if( rd1_Q10 < rd2_Q10 ) {
-                psSS[ 0 ].RD_Q10 = silk_ADD32( psDD->RD_Q10, rd1_Q10 );
-                psSS[ 1 ].RD_Q10 = silk_ADD32( psDD->RD_Q10, rd2_Q10 );
-                psSS[ 0 ].Q_Q10  = q1_Q10;
-                psSS[ 1 ].Q_Q10  = q2_Q10;
-            } else {
-                psSS[ 0 ].RD_Q10 = silk_ADD32( psDD->RD_Q10, rd2_Q10 );
-                psSS[ 1 ].RD_Q10 = silk_ADD32( psDD->RD_Q10, rd1_Q10 );
-                psSS[ 0 ].Q_Q10  = q2_Q10;
-                psSS[ 1 ].Q_Q10  = q1_Q10;
-            }
-
-            /* Update states for best quantization */
-
-            /* Quantized excitation */
-            exc_Q14 = silk_LSHIFT32( psSS[ 0 ].Q_Q10, 4 );
-            if ( psDD->Seed < 0 ) {
-                exc_Q14 = -exc_Q14;
-            }
-
-            /* Add predictions */
-            LPC_exc_Q14 = silk_ADD32( exc_Q14, LTP_pred_Q14 );
-            xq_Q14      = silk_ADD32( LPC_exc_Q14, LPC_pred_Q14 );
-
-            /* Update states */
-            sLF_AR_shp_Q14         = silk_SUB32( xq_Q14, n_AR_Q14 );
-            psSS[ 0 ].sLTP_shp_Q14 = silk_SUB32( sLF_AR_shp_Q14, n_LF_Q14 );
-            psSS[ 0 ].LF_AR_Q14    = sLF_AR_shp_Q14;
-            psSS[ 0 ].LPC_exc_Q14  = LPC_exc_Q14;
-            psSS[ 0 ].xq_Q14       = xq_Q14;
-
-            /* Update states for second best quantization */
-
-            /* Quantized excitation */
-            exc_Q14 = silk_LSHIFT32( psSS[ 1 ].Q_Q10, 4 );
-            if ( psDD->Seed < 0 ) {
-                exc_Q14 = -exc_Q14;
-            }
-
-
-            /* Add predictions */
-            LPC_exc_Q14 = silk_ADD32( exc_Q14, LTP_pred_Q14 );
-            xq_Q14      = silk_ADD32( LPC_exc_Q14, LPC_pred_Q14 );
-
-            /* Update states */
-            sLF_AR_shp_Q14         = silk_SUB32( xq_Q14, n_AR_Q14 );
-            psSS[ 1 ].sLTP_shp_Q14 = silk_SUB32( sLF_AR_shp_Q14, n_LF_Q14 );
-            psSS[ 1 ].LF_AR_Q14    = sLF_AR_shp_Q14;
-            psSS[ 1 ].LPC_exc_Q14  = LPC_exc_Q14;
-            psSS[ 1 ].xq_Q14       = xq_Q14;
-        }
-
-        *smpl_buf_idx  = ( *smpl_buf_idx - 1 ) % DECISION_DELAY;
-        if( *smpl_buf_idx < 0 ) *smpl_buf_idx += DECISION_DELAY;
-        last_smple_idx = ( *smpl_buf_idx + decisionDelay ) % DECISION_DELAY;
-
-        /* Find winner */
-        RDmin_Q10 = psSampleState[ 0 ][ 0 ].RD_Q10;
-        Winner_ind = 0;
-        for( k = 1; k < nStatesDelayedDecision; k++ ) {
-            if( psSampleState[ k ][ 0 ].RD_Q10 < RDmin_Q10 ) {
-                RDmin_Q10  = psSampleState[ k ][ 0 ].RD_Q10;
-                Winner_ind = k;
-            }
-        }
-
-        /* Increase RD values of expired states */
-        Winner_rand_state = psDelDec[ Winner_ind ].RandState[ last_smple_idx ];
-        for( k = 0; k < nStatesDelayedDecision; k++ ) {
-            if( psDelDec[ k ].RandState[ last_smple_idx ] != Winner_rand_state ) {
-                psSampleState[ k ][ 0 ].RD_Q10 = silk_ADD32( psSampleState[ k ][ 0 ].RD_Q10, silk_int32_MAX >> 4 );
-                psSampleState[ k ][ 1 ].RD_Q10 = silk_ADD32( psSampleState[ k ][ 1 ].RD_Q10, silk_int32_MAX >> 4 );
-                silk_assert( psSampleState[ k ][ 0 ].RD_Q10 >= 0 );
-            }
-        }
-
-        /* Find worst in first set and best in second set */
-        RDmax_Q10  = psSampleState[ 0 ][ 0 ].RD_Q10;
-        RDmin_Q10  = psSampleState[ 0 ][ 1 ].RD_Q10;
-        RDmax_ind = 0;
-        RDmin_ind = 0;
-        for( k = 1; k < nStatesDelayedDecision; k++ ) {
-            /* find worst in first set */
-            if( psSampleState[ k ][ 0 ].RD_Q10 > RDmax_Q10 ) {
-                RDmax_Q10  = psSampleState[ k ][ 0 ].RD_Q10;
-                RDmax_ind = k;
-            }
-            /* find best in second set */
-            if( psSampleState[ k ][ 1 ].RD_Q10 < RDmin_Q10 ) {
-                RDmin_Q10  = psSampleState[ k ][ 1 ].RD_Q10;
-                RDmin_ind = k;
-            }
-        }
-
-        /* Replace a state if best from second set outperforms worst in first set */
-        if( RDmin_Q10 < RDmax_Q10 ) {
-            silk_memcpy( ( (opus_int32 *)&psDelDec[ RDmax_ind ] ) + i,
-                         ( (opus_int32 *)&psDelDec[ RDmin_ind ] ) + i, sizeof( NSQ_del_dec_struct ) - i * sizeof( opus_int32) );
-            silk_memcpy( &psSampleState[ RDmax_ind ][ 0 ], &psSampleState[ RDmin_ind ][ 1 ], sizeof( NSQ_sample_struct ) );
-        }
-
-        /* Write samples from winner to output and long-term filter states */
-        psDD = &psDelDec[ Winner_ind ];
-        if( subfr > 0 || i >= decisionDelay ) {
-            pulses[  i - decisionDelay ] = (opus_int8)silk_RSHIFT_ROUND( psDD->Q_Q10[ last_smple_idx ], 10 );
-            xq[ i - decisionDelay ] = (opus_int16)silk_SAT16( silk_RSHIFT_ROUND(
-                silk_SMULWW( psDD->Xq_Q14[ last_smple_idx ], delayedGain_Q10[ last_smple_idx ] ), 8 ) );
-            NSQ->sLTP_shp_Q14[ NSQ->sLTP_shp_buf_idx - decisionDelay ] = psDD->Shape_Q14[ last_smple_idx ];
-            sLTP_Q15[          NSQ->sLTP_buf_idx     - decisionDelay ] = psDD->Pred_Q15[  last_smple_idx ];
-        }
-        NSQ->sLTP_shp_buf_idx++;
-        NSQ->sLTP_buf_idx++;
-
-        /* Update states */
-        for( k = 0; k < nStatesDelayedDecision; k++ ) {
-            psDD                                     = &psDelDec[ k ];
-            psSS                                     = &psSampleState[ k ][ 0 ];
-            psDD->LF_AR_Q14                          = psSS->LF_AR_Q14;
-            psDD->sLPC_Q14[ NSQ_LPC_BUF_LENGTH + i ] = psSS->xq_Q14;
-            psDD->Xq_Q14[    *smpl_buf_idx ]         = psSS->xq_Q14;
-            psDD->Q_Q10[     *smpl_buf_idx ]         = psSS->Q_Q10;
-            psDD->Pred_Q15[  *smpl_buf_idx ]         = silk_LSHIFT32( psSS->LPC_exc_Q14, 1 );
-            psDD->Shape_Q14[ *smpl_buf_idx ]         = psSS->sLTP_shp_Q14;
-            psDD->Seed                               = silk_ADD32_ovflw( psDD->Seed, silk_RSHIFT_ROUND( psSS->Q_Q10, 10 ) );
-            psDD->RandState[ *smpl_buf_idx ]         = psDD->Seed;
-            psDD->RD_Q10                             = psSS->RD_Q10;
-        }
-        delayedGain_Q10[     *smpl_buf_idx ]         = Gain_Q10;
-    }
-    /* Update LPC states */
-    for( k = 0; k < nStatesDelayedDecision; k++ ) {
-        psDD = &psDelDec[ k ];
-        silk_memcpy( psDD->sLPC_Q14, &psDD->sLPC_Q14[ length ], NSQ_LPC_BUF_LENGTH * sizeof( opus_int32 ) );
-    }
-}
-
-#endif /* NSQ_DEL_DEC_MIPSR1_H__ */
diff --git a/silk/mips/NSQ_mips.h b/silk/mips/NSQ_mips.h
new file mode 100644
index 000000000..fd691ee33
--- /dev/null
+++ b/silk/mips/NSQ_mips.h
@@ -0,0 +1,137 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifndef NSQ_MIPS_H__
+#define NSQ_MIPS_H__
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "main.h"
+#include "macros.h"
+
+#if defined (__mips_dsp) && __mips == 32
+
+#define MIPS_MULT __builtin_mips_mult
+#define MIPS_MADD __builtin_mips_madd
+#define MIPS_EXTR_R __builtin_mips_extr_r_w
+
+#define OVERRIDE_silk_noise_shape_quantizer_short_prediction
+/* suddenly performance is worse */
+#define dont_OVERRIDE_silk_NSQ_noise_shape_feedback_loop
+
+/* gets worst performance result */
+#elif defined(__mips_isa_rev) && __mips == 32
+
+static inline long long MIPS_MULT(int a, int b) {
+    return (long long)a * b;
+}
+
+static inline long long MIPS_MADD(long long acc, int a, int b) {
+    return acc + (long long)a * b;
+}
+
+static inline opus_val32 MIPS_EXTR_R(long long acc, int shift) {
+    return (opus_val32)((acc + (1 << shift) / 2) >> shift);
+}
+
+#define OVERRIDE_silk_noise_shape_quantizer_short_prediction
+#define OVERRIDE_silk_NSQ_noise_shape_feedback_loop
+
+#endif
+
+#if defined(OVERRIDE_silk_noise_shape_quantizer_short_prediction)
+
+static OPUS_INLINE opus_int32 silk_noise_shape_quantizer_short_prediction_mips(const opus_int32 *buf32, const opus_int16 *coef16, opus_int order)
+{
+    opus_int64 out;
+    silk_assert( order == 10 || order == 16 );
+
+    out = MIPS_MULT(      buf32[  0 ], coef16[ 0 ] );
+    out = MIPS_MADD( out, buf32[ -1 ], coef16[ 1 ] );
+    out = MIPS_MADD( out, buf32[ -2 ], coef16[ 2 ] );
+    out = MIPS_MADD( out, buf32[ -3 ], coef16[ 3 ] );
+    out = MIPS_MADD( out, buf32[ -4 ], coef16[ 4 ] );
+    out = MIPS_MADD( out, buf32[ -5 ], coef16[ 5 ] );
+    out = MIPS_MADD( out, buf32[ -6 ], coef16[ 6 ] );
+    out = MIPS_MADD( out, buf32[ -7 ], coef16[ 7 ] );
+    out = MIPS_MADD( out, buf32[ -8 ], coef16[ 8 ] );
+    out = MIPS_MADD( out, buf32[ -9 ], coef16[ 9 ] );
+
+    if( order == 16 )
+    {
+        out = MIPS_MADD( out, buf32[ -10 ], coef16[ 10 ] );
+        out = MIPS_MADD( out, buf32[ -11 ], coef16[ 11 ] );
+        out = MIPS_MADD( out, buf32[ -12 ], coef16[ 12 ] );
+        out = MIPS_MADD( out, buf32[ -13 ], coef16[ 13 ] );
+        out = MIPS_MADD( out, buf32[ -14 ], coef16[ 14 ] );
+        out = MIPS_MADD( out, buf32[ -15 ], coef16[ 15 ] );
+    }
+    return MIPS_EXTR_R(out, 16);
+}
+
+#undef  silk_noise_shape_quantizer_short_prediction
+#define silk_noise_shape_quantizer_short_prediction(in, coef, coefRev, order, arch)  ((void)arch,silk_noise_shape_quantizer_short_prediction_mips(in, coef, order))
+
+#endif /* OVERRIDE_silk_noise_shape_quantizer_short_prediction */
+
+
+#if defined(OVERRIDE_silk_NSQ_noise_shape_feedback_loop)
+
+static OPUS_INLINE opus_int32 silk_NSQ_noise_shape_feedback_loop_mips(const opus_int32 *data0, opus_int32 *data1, const opus_int16 *coef, opus_int order)
+{
+    opus_int32 out;
+    opus_int32 tmp1, tmp2;
+    opus_int j;
+
+    tmp2 = data0[0];
+    tmp1 = data1[0];
+    data1[0] = tmp2;
+
+    out = MIPS_MULT(tmp2, coef[0]);
+
+    for (j = 2; j < order; j += 2) {
+        tmp2 = data1[j - 1];
+        data1[j - 1] = tmp1;
+        out = MIPS_MADD(out, tmp1, coef[j - 1]);
+        tmp1 = data1[j + 0];
+        data1[j + 0] = tmp2;
+        out = MIPS_MADD(out, tmp2, coef[j]);
+    }
+    data1[order - 1] = tmp1;
+    out = MIPS_MADD(out, tmp1, coef[order - 1]);
+    /* silk_SMLAWB: shift right by 16  &&  Q11 -> Q12: shift left by 1 */
+    return MIPS_EXTR_R( out, (16 - 1) );
+}
+
+#undef  silk_NSQ_noise_shape_feedback_loop
+#define silk_NSQ_noise_shape_feedback_loop(data0, data1, coef, order, arch)  ((void)arch,silk_NSQ_noise_shape_feedback_loop_mips(data0, data1, coef, order))
+
+#endif /* OVERRIDE_silk_NSQ_noise_shape_feedback_loop */
+
+#endif /* NSQ_DEL_DEC_MIPSR1_H__ */
diff --git a/silk/mips/macros_mipsr1.h b/silk/mips/macros_mipsr1.h
index 0393a33fa..c9c01f3df 100644
--- a/silk/mips/macros_mipsr1.h
+++ b/silk/mips/macros_mipsr1.h
@@ -29,10 +29,8 @@ POSSIBILITY OF SUCH DAMAGE.
 #ifndef SILK_MACROS_MIPSR1_H__
 #define SILK_MACROS_MIPSR1_H__
 
-static inline int mips_clz(opus_uint32 x)
-{
-    return x ? __builtin_clz(x) : 32;
-}
+
+#if defined (__mips_dsp) && __mips == 32
 
 #undef silk_SMULWB
 static inline int silk_SMULWB(int a, int b)
@@ -74,6 +72,46 @@ static inline int silk_SMLAWW(int a, int b, int c)
     return res;
 }
 
+#undef silk_ADD_SAT32
+static inline int silk_ADD_SAT32(int a, int b)
+{
+    return __builtin_mips_addq_s_w(a, b);
+}
+
+#undef silk_SUB_SAT32
+static inline int silk_SUB_SAT32(int a, int b)
+{
+    return __builtin_mips_subq_s_w(a, b);
+}
+
+#elif defined (__mips_isa_rev) && __mips == 32
+
+#undef silk_SMULWB
+static inline int silk_SMULWB(int a, int b)
+{
+    long long ac = (long long)a * (int)(b << 16);
+
+    return ac >> 32;
+}
+
+/* a32 + (b32 * (opus_int32)((opus_int16)(c32))) >> 16 output have to be 32bit int */
+#undef silk_SMLAWB
+static inline int silk_SMLAWB(int a, int b, int c)
+{
+    long long ac = (long long)b * (int)(c << 16);
+
+    return a + (ac >> 32);
+}
+
+#endif
+
+#if defined (__mips_isa_rev) /* MIPS32r1+ */
+
+static inline int mips_clz(opus_uint32 x)
+{
+    return x ? __builtin_clz(x) : 32;
+}
+
 #define OVERRIDE_silk_CLZ16
 static inline opus_int32 silk_CLZ16(opus_int16 in16)
 {
@@ -92,4 +130,6 @@ static inline opus_int32 silk_CLZ32(opus_int32 in32)
     return re32;
 }
 
+#endif /* __mips_isa_rev */
+
 #endif /* SILK_MACROS_MIPSR1_H__ */
diff --git a/silk/mips/sigproc_fix_mipsr1.h b/silk/mips/sigproc_fix_mipsr1.h
index 51520c0a6..2d7615aa8 100644
--- a/silk/mips/sigproc_fix_mipsr1.h
+++ b/silk/mips/sigproc_fix_mipsr1.h
@@ -57,4 +57,10 @@ static inline int silk_RSHIFT_ROUND(int a, int shift)
     return r;
 }
 
+#undef silk_ADD_POS_SAT32
+static inline int silk_ADD_POS_SAT32(int a, int b)
+{
+    return __builtin_mips_addq_s_w(a, b);
+}
+
 #endif /* SILK_SIGPROC_FIX_MIPSR1_H */
diff --git a/silk_headers.mk b/silk_headers.mk
index 2588067c7..0586ea1ce 100644
--- a/silk_headers.mk
+++ b/silk_headers.mk
@@ -34,11 +34,10 @@ silk/arm/NSQ_neon.h \
 silk/fixed/main_FIX.h \
 silk/fixed/structs_FIX.h \
 silk/fixed/arm/warped_autocorrelation_FIX_arm.h \
-silk/fixed/mips/noise_shape_analysis_FIX_mipsr1.h \
 silk/fixed/mips/warped_autocorrelation_FIX_mipsr1.h \
 silk/float/main_FLP.h \
 silk/float/structs_FLP.h \
 silk/float/SigProc_FLP.h \
 silk/mips/macros_mipsr1.h \
-silk/mips/NSQ_del_dec_mipsr1.h \
+silk/mips/NSQ_mips.h \
 silk/mips/sigproc_fix_mipsr1.h