Thank you to anyone who has already donated - your generous donations helped make three months of treatment possible.
My brother Nate continues to fight stage IV Hodgkin's lymphoma. He's just 31, with a wife and baby girl. They have no active income (since he's been unable to return to work), no insurance, and cannot afford the treatment he needs. Nate and his family need your help. Please consider a donation, every dollar helps. Thanks.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 |
diff --git a/lib/rbcodec/codecs/libopus/celt/_kiss_fft_guts.h b/lib/rbcodec/codecs/libopus/celt/_kiss_fft_guts.h index b1fe8fb..f387b40 100644 --- a/lib/rbcodec/codecs/libopus/celt/_kiss_fft_guts.h +++ b/lib/rbcodec/codecs/libopus/celt/_kiss_fft_guts.h @@ -57,6 +57,23 @@ # define S_MUL(a,b) MULT16_32_Q15(b, a) +#if defined(CPU_ARM) +static inline int32_t S_MULADD(int32_t a, int32_t b, int32_t c, int32_t d) +{ + int32_t lo, hi; + asm volatile("smull %[lo], %[hi], %[a], %[b] \n\t" + "smlal %[lo], %[hi], %[c], %[d] \n\t" + "mov %[lo], %[lo], lsr #15 \n\t" + "orr %[hi], %[lo], %[hi], lsl #17 \n\t" + : [lo] "=&r" (lo), [hi] "=&r" (hi) + : [a] "r" (a), [b] "r" (b), [c] "r" (c), [d] "r" (d) ); + return(hi); +} +#else +# define S_MULADD(a,b,c,d) \ + ADD32(S_MUL((a),(b)) , S_MUL((c),(d))); +#endif + # define C_MUL(m,a,b) \ do{(m).r = SUB32(S_MUL((a).r,(b).r) , S_MUL((a).i,(b).i)); \ (m).i = ADD32(S_MUL((a).r,(b).i) , S_MUL((a).i,(b).r)); }while(0) @@ -132,11 +149,10 @@ #define C_ADDTO( res , a)\ do {(res).r = ADD32((res).r, (a).r); (res).i = ADD32((res).i,(a).i);\ }while(0) - #define C_SUBFROM( res , a)\ do {(res).r = ADD32((res).r,(a).r); (res).i = SUB32((res).i,(a).i); \ }while(0) - + #else /* not FIXED_POINT*/ # define S_MUL(a,b) ( (a)*(b) ) diff --git a/lib/rbcodec/codecs/libopus/celt/kiss_fft.c b/lib/rbcodec/codecs/libopus/celt/kiss_fft.c index 01049d5..a0d7ab9 100644 --- a/lib/rbcodec/codecs/libopus/celt/kiss_fft.c +++ b/lib/rbcodec/codecs/libopus/celt/kiss_fft.c @@ -396,7 +396,7 @@ static void ki_bfly5( { kiss_fft_cpx *Fout0,*Fout1,*Fout2,*Fout3,*Fout4; int i, u; - kiss_fft_cpx scratch[13]; + kiss_fft_cpx scratch[12]; const kiss_twiddle_cpx * twiddles = st->twiddles; const kiss_twiddle_cpx *tw; kiss_twiddle_cpx ya,yb; @@ -416,37 +416,33 @@ static void ki_bfly5( Fout4=Fout0+4*m; for ( u=0; u<m; ++u ) { - scratch[0] = *Fout0; - - C_MULC(scratch[1] ,*Fout1, tw[u*fstride]); + C_MULC(scratch[1] ,*Fout1, tw[ u*fstride]); C_MULC(scratch[2] ,*Fout2, tw[2*u*fstride]); C_MULC(scratch[3] ,*Fout3, tw[3*u*fstride]); C_MULC(scratch[4] ,*Fout4, tw[4*u*fstride]); - C_ADD( scratch[7],scratch[1],scratch[4]); + C_ADD( scratch[ 7],scratch[1],scratch[4]); C_SUB( scratch[10],scratch[1],scratch[4]); - C_ADD( scratch[8],scratch[2],scratch[3]); - C_SUB( scratch[9],scratch[2],scratch[3]); - + C_ADD( scratch[ 8],scratch[2],scratch[3]); + C_SUB( scratch[ 9],scratch[2],scratch[3]); + + scratch[ 5].r = Fout0->r + S_MULADD(scratch[7].r,ya.r,scratch[8].r,yb.r); + scratch[ 5].i = Fout0->i + S_MULADD(scratch[7].i,ya.r,scratch[8].i,yb.r); + scratch[11].r = Fout0->r + S_MULADD(scratch[7].r,yb.r,scratch[8].r,ya.r); + scratch[11].i = Fout0->i + S_MULADD(scratch[7].i,yb.r,scratch[8].i,ya.r); + Fout0->r += scratch[7].r + scratch[8].r; Fout0->i += scratch[7].i + scratch[8].i; - scratch[5].r = scratch[0].r + S_MUL(scratch[7].r,ya.r) + S_MUL(scratch[8].r,yb.r); - scratch[5].i = scratch[0].i + S_MUL(scratch[7].i,ya.r) + S_MUL(scratch[8].i,yb.r); - - scratch[6].r = -S_MUL(scratch[10].i,ya.i) - S_MUL(scratch[9].i,yb.i); - scratch[6].i = S_MUL(scratch[10].r,ya.i) + S_MUL(scratch[9].r,yb.i); - - C_SUB(*Fout1,scratch[5],scratch[6]); - C_ADD(*Fout4,scratch[5],scratch[6]); - - scratch[11].r = scratch[0].r + S_MUL(scratch[7].r,yb.r) + S_MUL(scratch[8].r,ya.r); - scratch[11].i = scratch[0].i + S_MUL(scratch[7].i,yb.r) + S_MUL(scratch[8].i,ya.r); - scratch[12].r = S_MUL(scratch[10].i,yb.i) - S_MUL(scratch[9].i,ya.i); - scratch[12].i = -S_MUL(scratch[10].r,yb.i) + S_MUL(scratch[9].r,ya.i); - - C_ADD(*Fout2,scratch[11],scratch[12]); - C_SUB(*Fout3,scratch[11],scratch[12]); + scratch[6].r = -S_MULADD( scratch[10].i, ya.i, scratch[9].i, yb.i); + scratch[6].i = S_MULADD( scratch[10].r, ya.i, scratch[9].r, yb.i); + scratch[0].r = S_MULADD( scratch[10].i, yb.i,-scratch[9].i, ya.i); + scratch[0].i = S_MULADD(-scratch[10].r, yb.i, scratch[9].r, ya.i); + + C_ADD(*Fout4,scratch[ 5],scratch[6]); + C_SUB(*Fout1,scratch[ 5],scratch[6]); + C_ADD(*Fout2,scratch[11],scratch[0]); + C_SUB(*Fout3,scratch[11],scratch[0]); ++Fout0;++Fout1;++Fout2;++Fout3;++Fout4; } |