diff --git a/lib/rbcodec/codecs/libopus/celt/celt.c b/lib/rbcodec/codecs/libopus/celt/celt.c
index a4afb24..9b54285 100644
--- a/lib/rbcodec/codecs/libopus/celt/celt.c
+++ b/lib/rbcodec/codecs/libopus/celt/celt.c
@@ -415,6 +415,7 @@ static void compute_mdcts(const CELTMode *mode, int shortBlocks, celt_sig * OPUS
 }
 #endif
 
+static opus_val32 s_x[1080] IBSS_ATTR;
 /** Compute the IMDCT and apply window for all sub-frames and
     all channels in a frame */
 static void compute_inv_mdcts(const CELTMode *mode, int shortBlocks, celt_sig *X,
@@ -424,10 +425,11 @@ static void compute_inv_mdcts(const CELTMode *mode, int shortBlocks, celt_sig *X
    int c;
    const int N = mode->shortMdctSize<<LM;
    const int overlap = OVERLAP(mode);
-   VARDECL(opus_val32, x);
+//AB   VARDECL(opus_val32, x);
+   opus_val32 *x = s_x;
    SAVE_STACK;
 
-   ALLOC(x, N+overlap, opus_val32);
+//AB   ALLOC(x, N+overlap, opus_val32);
    c=0; do {
       int j;
       int b;
@@ -448,12 +450,16 @@ static void compute_inv_mdcts(const CELTMode *mode, int shortBlocks, celt_sig *X
          clt_mdct_backward(&mode->mdct, &X[b+c*N2*B], x+N2*b, mode->window, overlap, shortBlocks ? mode->maxLM : mode->maxLM-LM, B);
       }
 
-      for (j=0;j<overlap;j++)
-         out_mem[c][j] = x[j] + overlap_mem[c][j];
-      for (;j<N;j++)
-         out_mem[c][j] = x[j];
-      for (j=0;j<overlap;j++)
-         overlap_mem[c][j] = x[N+j];
+    /* overlap can be divided by 4 */
+      for (j=0;j<overlap;j+=4)
+    {
+    out_mem[c][j  ] = x[j  ] + overlap_mem[c][j  ];
+    out_mem[c][j+1] = x[j+1] + overlap_mem[c][j+1];
+    out_mem[c][j+2] = x[j+2] + overlap_mem[c][j+2];
+    out_mem[c][j+3] = x[j+3] + overlap_mem[c][j+3];
+    }
+    memcpy(out_mem[c]+overlap, x+overlap, (N-overlap)*sizeof(celt_sig));
+    memcpy(overlap_mem[c]    , x+N      , (  overlap)*sizeof(celt_sig));
    } while (++c<C);
    RESTORE_STACK;
 }
@@ -496,7 +502,7 @@ static void comb_filter(opus_val32 *y, opus_val32 *x, int T0, int T1, int N,
 {
    int i;
    /* printf ("%d %d %f %f\n", T0, T1, g0, g1); */
-   opus_val16 g00, g01, g02, g10, g11, g12;
+   opus_val16 g00, g01, g02, g10, g11, g12, idx0, idx1;
    static const opus_val16 gains[3][3] = {
          {QCONST16(0.3066406250f, 15), QCONST16(0.2170410156f, 15), QCONST16(0.1296386719f, 15)},
          {QCONST16(0.4638671875f, 15), QCONST16(0.2680664062f, 15), QCONST16(0.f, 15)},
@@ -507,30 +513,29 @@ static void comb_filter(opus_val32 *y, opus_val32 *x, int T0, int T1, int N,
    g10 = MULT16_16_Q15(g1, gains[tapset1][0]);
    g11 = MULT16_16_Q15(g1, gains[tapset1][1]);
    g12 = MULT16_16_Q15(g1, gains[tapset1][2]);
-   for (i=0;i<overlap;i++)
+   idx0 = -T0;
+   idx1 = -T1;
+   for (i=0;i<overlap;i++,idx0++,idx1++)
    {
-      opus_val16 f;
-      f = MULT16_16_Q15(window[i],window[i]);
-      y[i] = x[i]
-               + MULT16_32_Q15(MULT16_16_Q15((Q15ONE-f),g00),x[i-T0])
-               + MULT16_32_Q15(MULT16_16_Q15((Q15ONE-f),g01),x[i-T0-1])
-               + MULT16_32_Q15(MULT16_16_Q15((Q15ONE-f),g01),x[i-T0+1])
-               + MULT16_32_Q15(MULT16_16_Q15((Q15ONE-f),g02),x[i-T0-2])
-               + MULT16_32_Q15(MULT16_16_Q15((Q15ONE-f),g02),x[i-T0+2])
-               + MULT16_32_Q15(MULT16_16_Q15(f,g10),x[i-T1])
-               + MULT16_32_Q15(MULT16_16_Q15(f,g11),x[i-T1-1])
-               + MULT16_32_Q15(MULT16_16_Q15(f,g11),x[i-T1+1])
-               + MULT16_32_Q15(MULT16_16_Q15(f,g12),x[i-T1-2])
-               + MULT16_32_Q15(MULT16_16_Q15(f,g12),x[i-T1+2]);
-
+      opus_val16 f0, f1;
+      f1 = MULT16_16_Q15(window[i],window[i]);
+    f0 = Q15ONE - f1;
+    y[i] = x[i]
+               + MULT16_32_Q15(MULT16_16_Q15(f0,g00), x[idx0  ])
+               + MULT16_32_Q15(MULT16_16_Q15(f0,g01),(x[idx0-1]+x[idx0+1]))
+               + MULT16_32_Q15(MULT16_16_Q15(f0,g02),(x[idx0-2]+x[idx0+2]))
+               + MULT16_32_Q15(MULT16_16_Q15(f1,g10), x[idx1  ])
+               + MULT16_32_Q15(MULT16_16_Q15(f1,g11),(x[idx1-1]+x[idx1+1]))
+               + MULT16_32_Q15(MULT16_16_Q15(f1,g12),(x[idx1-2]+x[idx1+2]));
+   }
+   idx1 = overlap-T1;
+   for (i=overlap;i<N;i++,idx1++)
+   {
+    y[i] = x[i]
+         + MULT16_32_Q15(g10, x[idx1  ])
+         + MULT16_32_Q15(g11,(x[idx1-1]+x[idx1+1]))
+         + MULT16_32_Q15(g12,(x[idx1-2]+x[idx1+2]));
    }
-   for (i=overlap;i<N;i++)
-      y[i] = x[i]
-               + MULT16_32_Q15(g10,x[i-T1])
-               + MULT16_32_Q15(g11,x[i-T1-1])
-               + MULT16_32_Q15(g11,x[i-T1+1])
-               + MULT16_32_Q15(g12,x[i-T1-2])
-               + MULT16_32_Q15(g12,x[i-T1+2]);
 }
 
 static const signed char tf_select_table[4][8] = {
@@ -2293,14 +2298,16 @@ static void celt_decode_lost(CELTDecoder * OPUS_RESTRICT st, opus_val16 * OPUS_R
    RESTORE_STACK;
 }
 
+static celt_sig freq[1920] IBSS_ATTR;
+static celt_norm X[1920] IBSS_ATTR;
 int celt_decode_with_ec(CELTDecoder * OPUS_RESTRICT st, const unsigned char *data, int len, opus_val16 * OPUS_RESTRICT pcm, int frame_size, ec_dec *dec)
 {
    int c, i, N;
    int spread_decision;
    opus_int32 bits;
    ec_dec _dec;
-   VARDECL(celt_sig, freq);
-   VARDECL(celt_norm, X);
+//AB   VARDECL(celt_sig, freq);
+//AB   VARDECL(celt_norm, X);
    VARDECL(celt_ener, bandE);
    VARDECL(int, fine_quant);
    VARDECL(int, pulses);
@@ -2395,8 +2402,8 @@ int celt_decode_with_ec(CELTDecoder * OPUS_RESTRICT st, const unsigned char *dat
    if (effEnd > st->mode->effEBands)
       effEnd = st->mode->effEBands;
 
-   ALLOC(freq, IMAX(CC,C)*N, celt_sig); /**< Interleaved signal MDCTs */
-   ALLOC(X, C*N, celt_norm);   /**< Interleaved normalised MDCTs */
+//AB   ALLOC(freq, IMAX(CC,C)*N, celt_sig); /**< Interleaved signal MDCTs */
+//AB   ALLOC(X, C*N, celt_norm);   /**< Interleaved normalised MDCTs */
    ALLOC(bandE, st->mode->nbEBands*C, celt_ener);
    c=0; do
       for (i=0;i<M*st->mode->eBands[st->start];i++)
diff --git a/lib/rbcodec/codecs/libopus/celt/fixed_generic.h b/lib/rbcodec/codecs/libopus/celt/fixed_generic.h
index bc6820f..84c786c 100644
--- a/lib/rbcodec/codecs/libopus/celt/fixed_generic.h
+++ b/lib/rbcodec/codecs/libopus/celt/fixed_generic.h
@@ -71,8 +71,21 @@ static inline int32_t MULT16_32_Q15(int32_t a, int32_t b)
 #define MULT16_32_Q15(a,b) ADD32(SHL(MULT16_16((a),SHR((b),16)),1), SHR(MULT16_16SU((a),((b)&0x0000ffff)),15))
 #endif
 
+#if defined(CPU_ARM)
+static inline int32_t MULT32_32_Q31(int32_t a, int32_t b)
+{
+  int32_t lo, hi;
+  asm volatile("smull %[lo], %[hi], %[a], %[b] \n\t"
+               "mov %[lo], %[lo], lsr #31 \n\t"
+               "orr %[hi], %[lo], %[hi], lsl #1 \n\t"
+               : [lo] "=&r" (lo), [hi] "=&r" (hi)
+               : [a] "r" (a), [b] "r" (b) );
+  return(hi);
+}
+#else
 /** 32x32 multiplication, followed by a 31-bit shift right. Results fits in 32 bits */
 #define MULT32_32_Q31(a,b) ADD32(ADD32(SHL(MULT16_16(SHR((a),16),SHR((b),16)),1), SHR(MULT16_16SU(SHR((a),16),((b)&0x0000ffff)),15)), SHR(MULT16_16SU(SHR((b),16),((a)&0x0000ffff)),15))
+#endif
 
 /** Compile-time conversion of float constant to 16-bit value */
 #define QCONST16(x,bits) ((opus_val16)(.5+(x)*(((opus_val32)1)<<(bits))))
diff --git a/lib/rbcodec/codecs/libopus/celt/kiss_fft.c b/lib/rbcodec/codecs/libopus/celt/kiss_fft.c
index 01049d5..15dd76a 100644
--- a/lib/rbcodec/codecs/libopus/celt/kiss_fft.c
+++ b/lib/rbcodec/codecs/libopus/celt/kiss_fft.c
@@ -398,13 +398,12 @@ static void ki_bfly5(
    int i, u;
    kiss_fft_cpx scratch[13];
    const kiss_twiddle_cpx * twiddles = st->twiddles;
-   const kiss_twiddle_cpx *tw;
+   const kiss_twiddle_cpx *tw1,*tw2, *tw3, *tw4;
    kiss_twiddle_cpx ya,yb;
    kiss_fft_cpx * Fout_beg = Fout;
 
    ya = twiddles[fstride*m];
    yb = twiddles[fstride*2*m];
-   tw=st->twiddles;
 
    for (i=0;i<N;i++)
    {
@@ -414,14 +413,15 @@ static void ki_bfly5(
       Fout2=Fout0+2*m;
       Fout3=Fout0+3*m;
       Fout4=Fout0+4*m;
-
-      for ( u=0; u<m; ++u ) {
+    tw1 = tw2 = tw3 = tw4 = st->twiddles;
+    
+      for ( u=0; u<m; ++u) {
          scratch[0] = *Fout0;
-
-         C_MULC(scratch[1] ,*Fout1, tw[u*fstride]);
-         C_MULC(scratch[2] ,*Fout2, tw[2*u*fstride]);
-         C_MULC(scratch[3] ,*Fout3, tw[3*u*fstride]);
-         C_MULC(scratch[4] ,*Fout4, tw[4*u*fstride]);
+   
+     C_MULC(scratch[1] ,*Fout1, *tw1);
+     C_MULC(scratch[2] ,*Fout2, *tw2);
+     C_MULC(scratch[3] ,*Fout3, *tw3);
+     C_MULC(scratch[4] ,*Fout4, *tw4);
 
          C_ADD( scratch[7],scratch[1],scratch[4]);
          C_SUB( scratch[10],scratch[1],scratch[4]);
@@ -449,6 +449,10 @@ static void ki_bfly5(
          C_SUB(*Fout3,scratch[11],scratch[12]);
 
          ++Fout0;++Fout1;++Fout2;++Fout3;++Fout4;
+     tw1 +=   fstride;
+     tw2 += 2*fstride;
+     tw3 += 3*fstride;
+     tw4 += 4*fstride;
       }
    }
 }
diff --git a/lib/rbcodec/codecs/libopus/celt/mdct.c b/lib/rbcodec/codecs/libopus/celt/mdct.c
index 15d2393..0422fef 100644
--- a/lib/rbcodec/codecs/libopus/celt/mdct.c
+++ b/lib/rbcodec/codecs/libopus/celt/mdct.c
@@ -208,6 +208,8 @@ void clt_mdct_forward(const mdct_lookup *l, kiss_fft_scalar *in, kiss_fft_scalar
 }
 #endif
 
+static kiss_fft_scalar f[1920>>1] IBSS_ATTR;
+static kiss_fft_scalar f2[1920>>1] IBSS_ATTR;
 void clt_mdct_backward(const mdct_lookup *l, kiss_fft_scalar *in, kiss_fft_scalar * OPUS_RESTRICT out,
       const opus_val16 * OPUS_RESTRICT window, int overlap, int shift, int stride)
 {
@@ -215,15 +217,15 @@ void clt_mdct_backward(const mdct_lookup *l, kiss_fft_scalar *in, kiss_fft_scala
    int N, N2, N4;
    kiss_twiddle_scalar sine;
 /*   VARDECL(kiss_fft_scalar, f); */
-   VARDECL(kiss_fft_scalar, f2);
+//AB   VARDECL(kiss_fft_scalar, f2);
    SAVE_STACK;
    N = l->n; /* static modes => N = 1920 */
    N >>= shift;
    N2 = N>>1;
    N4 = N>>2;
 /*   ALLOC(f, N2, kiss_fft_scalar); */
-   kiss_fft_scalar f[N2]; /* worst case 3840b */
-   ALLOC(f2, N2, kiss_fft_scalar);
+//AB   kiss_fft_scalar f[N2]; /* worst case 3840b */
+//AB   ALLOC(f2, N2, kiss_fft_scalar);
    /* sin(x) ~= x here */
 #ifdef FIXED_POINT
    sine = TRIG_UPSCALE*(QCONST16(0.7853981f, 15)+N2)/N;
@@ -241,8 +243,11 @@ void clt_mdct_backward(const mdct_lookup *l, kiss_fft_scalar *in, kiss_fft_scala
       for(i=0;i<N4;i++)
       {
          kiss_fft_scalar yr, yi;
-         yr = -S_MUL(*xp2, t[i<<shift]) + S_MUL(*xp1,t[(N4-i)<<shift]);
-         yi =  -S_MUL(*xp2, t[(N4-i)<<shift]) - S_MUL(*xp1,t[i<<shift]);
+     int idx0, idx1;
+     idx0 = i<<shift;
+     idx1 = (N4-i)<<shift;
+         yr = -S_MUL(*xp2, t[idx0]) + S_MUL(*xp1,t[idx1]);
+         yi = -S_MUL(*xp2, t[idx1]) - S_MUL(*xp1,t[idx0]);
          /* works because the cos is nearly one */
          *yp++ = yr - S_MUL(yi,sine);
          *yp++ = yi + S_MUL(yr,sine);
@@ -262,11 +267,14 @@ void clt_mdct_backward(const mdct_lookup *l, kiss_fft_scalar *in, kiss_fft_scala
       for(i=0;i<N4;i++)
       {
          kiss_fft_scalar re, im, yr, yi;
+     int idx0, idx1;
+     idx0 = i<<shift;
+     idx1 = (N4-i)<<shift;
          re = fp[0];
          im = fp[1];
          /* We'd scale up by 2 here, but instead it's done when mixing the windows */
-         yr = S_MUL(re,t[i<<shift]) - S_MUL(im,t[(N4-i)<<shift]);
-         yi = S_MUL(im,t[i<<shift]) + S_MUL(re,t[(N4-i)<<shift]);
+         yr = S_MUL(re,t[idx0]) - S_MUL(im,t[idx1]);
+         yi = S_MUL(im,t[idx0]) + S_MUL(re,t[idx1]);
          /* works because the cos is nearly one */
          *fp++ = yr - S_MUL(yi,sine);
          *fp++ = yi + S_MUL(yr,sine);
diff --git a/lib/rbcodec/codecs/libopus/celt/static_modes_fixed.h b/lib/rbcodec/codecs/libopus/celt/static_modes_fixed.h
index 92e5fe5..155f697 100644
--- a/lib/rbcodec/codecs/libopus/celt/static_modes_fixed.h
+++ b/lib/rbcodec/codecs/libopus/celt/static_modes_fixed.h
@@ -340,7 +340,7 @@ static const kiss_twiddle_cpx fft_twiddles48000_960[480] ICONST_ATTR = {
 };
 #ifndef FFT_BITREV480
 #define FFT_BITREV480
-static const opus_int16 fft_bitrev480[480] = {
+static const opus_int16 fft_bitrev480[480] ICONST_ATTR = {
 0, 120, 240, 360, 30, 150, 270, 390, 60, 180, 300, 420, 90, 210, 330,
 450, 15, 135, 255, 375, 45, 165, 285, 405, 75, 195, 315, 435, 105, 225,
 345, 465, 5, 125, 245, 365, 35, 155, 275, 395, 65, 185, 305, 425, 95,
@@ -378,7 +378,7 @@ static const opus_int16 fft_bitrev480[480] = {
 
 #ifndef FFT_BITREV240
 #define FFT_BITREV240
-static const opus_int16 fft_bitrev240[240] = {
+static const opus_int16 fft_bitrev240[240] ICONST_ATTR = {
 0, 60, 120, 180, 15, 75, 135, 195, 30, 90, 150, 210, 45, 105, 165,
 225, 5, 65, 125, 185, 20, 80, 140, 200, 35, 95, 155, 215, 50, 110,
 170, 230, 10, 70, 130, 190, 25, 85, 145, 205, 40, 100, 160, 220, 55,
@@ -400,7 +400,7 @@ static const opus_int16 fft_bitrev240[240] = {
 
 #ifndef FFT_BITREV120
 #define FFT_BITREV120
-static const opus_int16 fft_bitrev120[120] = {
+static const opus_int16 fft_bitrev120[120] ICONST_ATTR = {
 0, 30, 60, 90, 15, 45, 75, 105, 5, 35, 65, 95, 20, 50, 80,
 110, 10, 40, 70, 100, 25, 55, 85, 115, 1, 31, 61, 91, 16, 46,
 76, 106, 6, 36, 66, 96, 21, 51, 81, 111, 11, 41, 71, 101, 26,
@@ -414,7 +414,7 @@ static const opus_int16 fft_bitrev120[120] = {
 
 #ifndef FFT_BITREV60
 #define FFT_BITREV60
-static const opus_int16 fft_bitrev60[60] = {
+static const opus_int16 fft_bitrev60[60] ICONST_ATTR = {
 0, 15, 30, 45, 5, 20, 35, 50, 10, 25, 40, 55, 1, 16, 31,
 46, 6, 21, 36, 51, 11, 26, 41, 56, 2, 17, 32, 47, 7, 22,
 37, 52, 12, 27, 42, 57, 3, 18, 33, 48, 8, 23, 38, 53, 13,
diff --git a/lib/rbcodec/codecs/libopus/opus_decoder.c b/lib/rbcodec/codecs/libopus/opus_decoder.c
index 7103b18..d7e3a66 100644
--- a/lib/rbcodec/codecs/libopus/opus_decoder.c
+++ b/lib/rbcodec/codecs/libopus/opus_decoder.c
@@ -131,6 +131,7 @@ int opus_decoder_init(OpusDecoder *st, opus_int32 Fs, int channels)
    return OPUS_OK;
 }
 
+static char sDec[26468] IBSS_ATTR;
 OpusDecoder *opus_decoder_create(opus_int32 Fs, int channels, int *error)
 {
    int ret;
@@ -142,7 +143,8 @@ OpusDecoder *opus_decoder_create(opus_int32 Fs, int channels, int *error)
          *error = OPUS_BAD_ARG;
       return NULL;
    }
-   st = (OpusDecoder *)opus_alloc(opus_decoder_get_size(channels));
+   st = (OpusDecoder *)sDec;
+//AB   st = (OpusDecoder *)opus_alloc(opus_decoder_get_size(channels));
    if (st == NULL)
    {
       if (error)