Thank you to anyone who has already donated - your generous donations helped make three months of treatment possible.
My brother Nate continues to fight stage IV Hodgkin's lymphoma. He's just 31, with a wife and baby girl. They have no active income (since he's been unable to return to work), no insurance, and cannot afford the treatment he needs. Nate and his family need your help. Please consider a donation, every dollar helps. Thanks.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 |
diff --git a/asm_arm.h b/asm_arm.h index 5ea2bf4..d0e249e 100644 #ifdef _ARM_ASSEM_ +#ifdef __thumb__ +# define THUMB(a) a +#else +# define THUMB(a) +#endif + /* * This should be used as a memory barrier, forcing all cached values in * registers to wr writen back to memory. Might or might not be beneficial void vect_mult_bw(ogg_int32_t *data, ogg_int32_t *window, int n) { while (n>=4) { asm volatile ("ldmia %[d], {r0, r1, r2, r3};" +#ifdef __thumb__ + "ldmda %[w], {r4, r5, r6, r7};" + "add %[w], #16;" +#else "ldmda %[w]!, {r4, r5, r6, r7};" +#endif "smull r8, r9, r0, r7;" "mov r0, r9, lsl #1;" "smull r8, r9, r1, r6;" void vect_mult_bw(ogg_int32_t *data, ogg_int32_t *window, int n) static inline ogg_int32_t CLIP_TO_15(ogg_int32_t x) { int tmp; asm volatile("subs %1, %0, #32768\n\t" +THUMB( "itt pl\n\t") "movpl %0, #0x7f00\n\t" "orrpl %0, %0, #0xff\n" "adds %1, %0, #32768\n\t" +THUMB( "it mi\n\t") "movmi %0, #0x8000" : "+r"(x),"=r"(tmp) : static inline void lsp_loop_asm(ogg_uint32_t *qip,ogg_uint32_t *pip, ogg_uint32_t qi=*qip,pi=*pip; ogg_int32_t qexp=*qexpp; - asm("mov r0,%3;" + asm("mov r12,%3;" "movs r1,%5,asr#1;" - "add r0,r0,r1,lsl#3;" + "add r12,r12,r1,lsl#3;" "beq 2f;\n" "1:" - "ldmdb r0!,{r1,r3};" + "ldmdb r12!,{r1,r3};" "subs r1,r1,%4;" //ilsp[j]-wi +THUMB("it mi;") "rsbmi r1,r1,#0;" //labs(ilsp[j]-wi) "umull %0,r2,r1,%0;" //qi*=labs(ilsp[j]-wi) "subs r1,r3,%4;" //ilsp[j+1]-wi +THUMB("it mi;") "rsbmi r1,r1,#0;" //labs(ilsp[j+1]-wi) "umull %1,r3,r1,%1;" //pi*=labs(ilsp[j+1]-wi) static inline void lsp_loop_asm(ogg_uint32_t *qip,ogg_uint32_t *pip, "mov %1,%1,lsr #16;" "orr %1,%1,r3,lsl #16;" "0:" - "cmp r0,%3;\n" + "cmp r12,%3;\n" "bhi 1b;\n" "2:" // odd filter assymetry - "ands r0,%5,#1;\n" + "ands r12,%5,#1;\n" "beq 3f;\n" - "add r0,%3,%5,lsl#2;\n" + "add r12,%3,%5,lsl#2;\n" - "ldr r1,[r0,#-4];\n" - "mov r0,#0x4000;\n" + "ldr r1,[r12,#-4];\n" + "mov r12,#0x4000;\n" "subs r1,r1,%4;\n" //ilsp[j]-wi +THUMB("it mi;") "rsbmi r1,r1,#0;\n" //labs(ilsp[j]-wi) "umull %0,r2,r1,%0;\n" //qi*=labs(ilsp[j]-wi) - "umull %1,r3,r0,%1;\n" //pi*=labs(ilsp[j+1]-wi) + "umull %1,r3,r12,%1;\n" //pi*=labs(ilsp[j+1]-wi) "cmn r2,r3;\n" // shift down 16? "beq 3f;\n" static inline void lsp_loop_asm(ogg_uint32_t *qip,ogg_uint32_t *pip, "mov r2,#0;" "orr r1,%0,%1;" "tst r1,#0xff000000;" +THUMB("itt ne;") "addne r2,r2,#8;" "movne r1,r1,lsr #8;" "tst r1,#0x00f00000;" +THUMB("itt ne;") "addne r2,r2,#4;" "movne r1,r1,lsr #4;" "tst r1,#0x000c0000;" +THUMB("itt ne;") "addne r2,r2,#2;" "movne r1,r1,lsr #2;" "tst r1,#0x00020000;" +THUMB("itt ne;") "addne r2,r2,#1;" "movne r1,r1,lsr #1;" "tst r1,#0x00010000;" +THUMB("it ne;") "addne r2,r2,#1;" "mov %0,%0,lsr r2;" "mov %1,%1,lsr r2;" static inline void lsp_loop_asm(ogg_uint32_t *qip,ogg_uint32_t *pip, : "+r"(qi),"+r"(pi),"+r"(qexp) : "r"(ilsp),"r"(wi),"r"(m) - : "r0","r1","r2","r3","cc"); + : "r1","r2","r3","r12","cc"); *qip=qi; *pip=pi; static inline void lsp_norm_asm(ogg_uint32_t *qip,ogg_int32_t *qexpp){ ogg_int32_t qexp=*qexpp; asm("tst %0,#0x0000ff00;" +THUMB("itt eq;") "moveq %0,%0,lsl #8;" "subeq %1,%1,#8;" "tst %0,#0x0000f000;" +THUMB("itt eq;") "moveq %0,%0,lsl #4;" "subeq %1,%1,#4;" "tst %0,#0x0000c000;" +THUMB("itt eq;") "moveq %0,%0,lsl #2;" "subeq %1,%1,#2;" "tst %0,#0x00008000;" +THUMB("itt eq;") "moveq %0,%0,lsl #1;" "subeq %1,%1,#1;" : "+r"(qi),"+r"(qexp) |