1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
Index: apps/codecs/libmusepack/synth_filter.c
===================================================================
--- apps/codecs/libmusepack/synth_filter.c  (revision 28328)
+++ apps/codecs/libmusepack/synth_filter.c  (working copy)
@@ -191,6 +191,9 @@
 #define costab30 (0x0c8bd35e)  /* 0.098017140 */
 #define costab31 (0x0647d97c)  /* 0.049067674 */

+#define VMUL (102)
+#define VDIF (32*102-1)
+
   t0   = in[ 0] + in[31];  t16  = MPC_DCT32_MUL(in[ 0] - in[31], costab01);
   t1   = in[15] + in[16];  t17  = MPC_DCT32_MUL(in[15] - in[16], costab31);

@@ -277,22 +280,22 @@
   t113 = t69  + t70;
   t114 = t71  + t72;

-  /*  0 */ v[48] = -MPC_DCT32_SHIFT(t113 + t114);
-  /* 16 */ v[32] = -(v[ 0] = MPC_DCT32_SHIFT(MPC_DCT32_MUL(t113 - t114, costab16)));
+  /*  0 */ v[VMUL*48-VDIF] = -MPC_DCT32_SHIFT(t113 + t114);
+  /* 16 */ v[VMUL*32-VDIF] = -(v[VMUL* 0] = MPC_DCT32_SHIFT(MPC_DCT32_MUL(t113 - t114, costab16)));

   t115 = t73  + t74;
   t116 = t75  + t76;

   t32  = t115 + t116;

-  /*  1 */ v[49] = v[47] = -MPC_DCT32_SHIFT(t32);
+  /*  1 */ v[VMUL*49-VDIF] = v[VMUL*47-VDIF] = -MPC_DCT32_SHIFT(t32);

   t118 = t78  + t79;
   t119 = t80  + t81;

   t58  = t118 + t119;

-  /*  2 */ v[50] = v[46] = -MPC_DCT32_SHIFT(t58);
+  /*  2 */ v[VMUL*50-VDIF] = v[VMUL*46-VDIF] = -MPC_DCT32_SHIFT(t58);

   t121 = t83  + t84;
   t122 = t85  + t86;
@@ -301,14 +304,14 @@

   t49  = (t67 * 2) - t32;

-  /*  3 */ v[51] = v[45] = -MPC_DCT32_SHIFT(t49);
+  /*  3 */ v[VMUL*51-VDIF] = v[VMUL*45-VDIF] = -MPC_DCT32_SHIFT(t49);

   t125 = t89  + t90;
   t126 = t91  + t92;

   t93  = t125 + t126;

-  /*  4 */ v[52] = v[44] = -MPC_DCT32_SHIFT(t93);
+  /*  4 */ v[VMUL*52-VDIF] = v[VMUL*44-VDIF] = -MPC_DCT32_SHIFT(t93);

   t128 = t94  + t95;
   t129 = t96  + t97;
@@ -317,7 +320,7 @@

   t68  = (t98 * 2) - t49;

-  /*  5 */ v[53] = v[43] = -MPC_DCT32_SHIFT(t68);
+  /*  5 */ v[VMUL*53-VDIF] = v[VMUL*43-VDIF] = -MPC_DCT32_SHIFT(t68);

   t132 = t100 + t101;
   t133 = t102 + t103;
@@ -326,7 +329,7 @@

   t82  = (t104 * 2) - t58;

-  /*  6 */ v[54] = v[42] = -MPC_DCT32_SHIFT(t82);
+  /*  6 */ v[VMUL*54-VDIF] = v[VMUL*42-VDIF] = -MPC_DCT32_SHIFT(t82);

   t136 = t106 + t107;
   t137 = t108 + t109;
@@ -337,14 +340,14 @@

   t77  = (t87 * 2) - t68;

-  /*  7 */ v[55] = v[41] = -MPC_DCT32_SHIFT(t77);
+  /*  7 */ v[VMUL*55-VDIF] = v[VMUL*41-VDIF] = -MPC_DCT32_SHIFT(t77);

   t141 = MPC_DCT32_MUL(t69 - t70, costab08);
   t142 = MPC_DCT32_MUL(t71 - t72, costab24);
   t143 = t141 + t142;

-  /*  8 */ v[56] = v[40] = -MPC_DCT32_SHIFT(t143);
-  /* 24 */ v[24] = -(v[ 8] = MPC_DCT32_SHIFT((MPC_DCT32_MUL(t141 - t142, costab16) * 2) - t143));
+  /*  8 */ v[VMUL*56-VDIF] = v[VMUL*40-VDIF] = -MPC_DCT32_SHIFT(t143);
+  /* 24 */ v[VMUL*24] = -(v[VMUL* 8] = MPC_DCT32_SHIFT((MPC_DCT32_MUL(t141 - t142, costab16) * 2) - t143));

   t144 = MPC_DCT32_MUL(t73 - t74, costab08);
   t145 = MPC_DCT32_MUL(t75 - t76, costab24);
@@ -352,7 +355,7 @@

   t88  = (t146 * 2) - t77;

-  /*  9 */ v[57] = v[39] = -MPC_DCT32_SHIFT(t88);
+  /*  9 */ v[VMUL*57-VDIF] = v[VMUL*39-VDIF] = -MPC_DCT32_SHIFT(t88);

   t148 = MPC_DCT32_MUL(t78 - t79, costab08);
   t149 = MPC_DCT32_MUL(t80 - t81, costab24);
@@ -360,7 +363,7 @@

   t105 = (t150 * 2) - t82;

-  /* 10 */ v[58] = v[38] = -MPC_DCT32_SHIFT(t105);
+  /* 10 */ v[VMUL*58-VDIF] = v[VMUL*38-VDIF] = -MPC_DCT32_SHIFT(t105);

   t152 = MPC_DCT32_MUL(t83 - t84, costab08);
   t153 = MPC_DCT32_MUL(t85 - t86, costab24);
@@ -370,7 +373,7 @@

   t99  = (t111 * 2) - t88;

-  /* 11 */ v[59] = v[37] = -MPC_DCT32_SHIFT(t99);
+  /* 11 */ v[VMUL*59-VDIF] = v[VMUL*37-VDIF] = -MPC_DCT32_SHIFT(t99);

   t157 = MPC_DCT32_MUL(t89 - t90, costab08);
   t158 = MPC_DCT32_MUL(t91 - t92, costab24);
@@ -378,12 +381,12 @@

   t127 = (t159 * 2) - t93;

-  /* 12 */ v[60] = v[36] = -MPC_DCT32_SHIFT(t127);
+  /* 12 */ v[VMUL*60-VDIF] = v[VMUL*36-VDIF] = -MPC_DCT32_SHIFT(t127);

   t160 = (MPC_DCT32_MUL(t125 - t126, costab16) * 2) - t127;

-  /* 20 */ v[28] = -(v[ 4] = MPC_DCT32_SHIFT(t160));
-  /* 28 */ v[20] = -(v[12] = MPC_DCT32_SHIFT((((MPC_DCT32_MUL(t157 - t158, costab16) * 2) - t159) * 2) - t160));
+  /* 20 */ v[VMUL*28] = -(v[VMUL* 4] = MPC_DCT32_SHIFT(t160));
+  /* 28 */ v[VMUL*20] = -(v[VMUL*12] = MPC_DCT32_SHIFT((((MPC_DCT32_MUL(t157 - t158, costab16) * 2) - t159) * 2) - t160));

   t161 = MPC_DCT32_MUL(t94 - t95, costab08);
   t162 = MPC_DCT32_MUL(t96 - t97, costab24);
@@ -393,7 +396,7 @@

   t112 = (t130 * 2) - t99;

-  /* 13 */ v[61] = v[35] = -MPC_DCT32_SHIFT(t112);
+  /* 13 */ v[VMUL*61-VDIF] = v[VMUL*35-VDIF] = -MPC_DCT32_SHIFT(t112);

   t164 = (MPC_DCT32_MUL(t128 - t129, costab16) * 2) - t130;

@@ -405,22 +408,22 @@

   t120 = (t134 * 2) - t105;

-  /* 14 */ v[62] = v[34] = -MPC_DCT32_SHIFT(t120);
+  /* 14 */ v[VMUL*62-VDIF] = v[VMUL*34-VDIF] = -MPC_DCT32_SHIFT(t120);

   t135 = (MPC_DCT32_MUL(t118 - t119, costab16) * 2) - t120;

-  /* 18 */ v[30] = -(v[ 2] = MPC_DCT32_SHIFT(t135));
+  /* 18 */ v[VMUL*30] = -(v[VMUL* 2] = MPC_DCT32_SHIFT(t135));

   t169 = (MPC_DCT32_MUL(t132 - t133, costab16) * 2) - t134;

   t151 = (t169 * 2) - t135;

-  /* 22 */ v[26] = -(v[ 6] = MPC_DCT32_SHIFT(t151));
+  /* 22 */ v[VMUL*26] = -(v[VMUL* 6] = MPC_DCT32_SHIFT(t151));

   t170 = (((MPC_DCT32_MUL(t148 - t149, costab16) * 2) - t150) * 2) - t151;

-  /* 26 */ v[22] = -(v[10] = MPC_DCT32_SHIFT(t170));
-  /* 30 */ v[18] = -(v[14] = MPC_DCT32_SHIFT((((((MPC_DCT32_MUL(t166 - t167, costab16) * 2) - t168) * 2) - t169) * 2) - t170));
+  /* 26 */ v[VMUL*22] = -(v[VMUL*10] = MPC_DCT32_SHIFT(t170));
+  /* 30 */ v[VMUL*18] = -(v[VMUL*14] = MPC_DCT32_SHIFT((((((MPC_DCT32_MUL(t166 - t167, costab16) * 2) - t168) * 2) - t169) * 2) - t170));

   t171 = MPC_DCT32_MUL(t106 - t107, costab08);
   t172 = MPC_DCT32_MUL(t108 - t109, costab24);
@@ -434,19 +437,19 @@

   t117 = (t123 * 2) - t112;

-  /* 15 */ v[63] = v[33] =-MPC_DCT32_SHIFT(t117);
+  /* 15 */ v[VMUL*63-VDIF] = v[VMUL*33-VDIF] =-MPC_DCT32_SHIFT(t117);

   t124 = (MPC_DCT32_MUL(t115 - t116, costab16) * 2) - t117;

-  /* 17 */ v[31] = -(v[ 1] = MPC_DCT32_SHIFT(t124));
+  /* 17 */ v[VMUL*31] = -(v[VMUL* 1] = MPC_DCT32_SHIFT(t124));

   t131 = (t139 * 2) - t124;

-  /* 19 */ v[29] = -(v[ 3] = MPC_DCT32_SHIFT(t131));
+  /* 19 */ v[VMUL*29] = -(v[VMUL* 3] = MPC_DCT32_SHIFT(t131));

   t140 = (t164 * 2) - t131;

-  /* 21 */ v[27] = -(v[ 5] = MPC_DCT32_SHIFT(t140));
+  /* 21 */ v[VMUL*27] = -(v[VMUL* 5] = MPC_DCT32_SHIFT(t140));

   t174 = (MPC_DCT32_MUL(t136 - t137, costab16) * 2) - t138;

@@ -454,22 +457,22 @@

   t147 = (t155 * 2) - t140;

-  /* 23 */ v[25] = -(v[ 7] = MPC_DCT32_SHIFT(t147));
+  /* 23 */ v[VMUL*25] = -(v[VMUL* 7] = MPC_DCT32_SHIFT(t147));

   t156 = (((MPC_DCT32_MUL(t144 - t145, costab16) * 2) - t146) * 2) - t147;

-  /* 25 */ v[23] = -(v[ 9] = MPC_DCT32_SHIFT(t156));
+  /* 25 */ v[VMUL*23] = -(v[VMUL* 9] = MPC_DCT32_SHIFT(t156));

   t175 = (((MPC_DCT32_MUL(t152 - t153, costab16) * 2) - t154) * 2) - t155;

   t165 = (t175 * 2) - t156;

-  /* 27 */ v[21] = -(v[11] = MPC_DCT32_SHIFT(t165));
+  /* 27 */ v[VMUL*21] = -(v[VMUL*11] = MPC_DCT32_SHIFT(t165));

   t176 = (((((MPC_DCT32_MUL(t161 - t162, costab16) * 2) - t163) * 2) - t164) * 2) - t165;

-  /* 29 */ v[19] = -(v[13] = MPC_DCT32_SHIFT(t176));
-  /* 31 */ v[17] = -(v[15] = MPC_DCT32_SHIFT((((((((MPC_DCT32_MUL(t171 - t172, costab16) * 2) - t173) * 2) - t174) * 2) - t175) * 2) - t176));
+  /* 29 */ v[VMUL*19] = -(v[VMUL*13] = MPC_DCT32_SHIFT(t176));
+  /* 31 */ v[VMUL*17] = -(v[VMUL*15] = MPC_DCT32_SHIFT((((((((MPC_DCT32_MUL(t171 - t172, costab16) * 2) - t173) * 2) - t174) * 2) - t175) * 2) - t176));
 }

 #if defined(CPU_ARM) || defined(CPU_COLDFIRE)
@@ -486,16 +489,16 @@
     mpc_int32_t k;

     // 64=64x64-multiply (FIXED_POINT) or float=float*float (!FIXED_POINT) in C
-    for ( k = 0; k < 32; k++, D += 16, V++ )
+    for ( k = 0; k < 32; k++, D += 16, V += 102 )
     {
-        *Data = MPC_MULTIPLY_EX(V[  0],D[ 0],30) + MPC_MULTIPLY_EX(V[ 96],D[ 1],30)
-              + MPC_MULTIPLY_EX(V[128],D[ 2],30) + MPC_MULTIPLY_EX(V[224],D[ 3],30)
-              + MPC_MULTIPLY_EX(V[256],D[ 4],30) + MPC_MULTIPLY_EX(V[352],D[ 5],30)
-              + MPC_MULTIPLY_EX(V[384],D[ 6],30) + MPC_MULTIPLY_EX(V[480],D[ 7],30)
-              + MPC_MULTIPLY_EX(V[512],D[ 8],30) + MPC_MULTIPLY_EX(V[608],D[ 9],30)
-              + MPC_MULTIPLY_EX(V[640],D[10],30) + MPC_MULTIPLY_EX(V[736],D[11],30)
-              + MPC_MULTIPLY_EX(V[768],D[12],30) + MPC_MULTIPLY_EX(V[864],D[13],30)
-              + MPC_MULTIPLY_EX(V[896],D[14],30) + MPC_MULTIPLY_EX(V[992],D[15],30);
+        *Data = MPC_MULTIPLY_EX(V[ 0],D[ 0],30) + MPC_MULTIPLY_EX(V[ 3],D[ 1],30)
+              + MPC_MULTIPLY_EX(V[ 4],D[ 2],30) + MPC_MULTIPLY_EX(V[ 7],D[ 3],30)
+              + MPC_MULTIPLY_EX(V[ 8],D[ 4],30) + MPC_MULTIPLY_EX(V[11],D[ 5],30)
+              + MPC_MULTIPLY_EX(V[12],D[ 6],30) + MPC_MULTIPLY_EX(V[15],D[ 7],30)
+              + MPC_MULTIPLY_EX(V[16],D[ 8],30) + MPC_MULTIPLY_EX(V[19],D[ 9],30)
+              + MPC_MULTIPLY_EX(V[20],D[10],30) + MPC_MULTIPLY_EX(V[23],D[11],30)
+              + MPC_MULTIPLY_EX(V[24],D[12],30) + MPC_MULTIPLY_EX(V[27],D[13],30)
+              + MPC_MULTIPLY_EX(V[28],D[14],30) + MPC_MULTIPLY_EX(V[31],D[15],30);
         Data += 1;
         // total: 16 muls, 15 adds, 16 shifts
     }
@@ -511,7 +514,7 @@
     {    
         for ( n = 0; n < 36; n++, Y += 32, OutData += 32 ) 
         {
-            V -= 64;
+            V -= 2;
             mpc_dct32(Y, V);
             mpc_decoder_windowing_D( OutData, V, Di_opt );
         }
@@ -525,15 +528,15 @@
     (void)num_channels;

     /********* left channel ********/
-    memmove(d->V_L + MPC_V_MEM, d->V_L, 960 * sizeof(MPC_SAMPLE_FORMAT) );
+    memmove(d->V_L + 72, d->V_L, 3192 * sizeof(MPC_SAMPLE_FORMAT) );
     mpc_full_synthesis_filter(OutData,
-                              (MPC_SAMPLE_FORMAT *)(d->V_L + MPC_V_MEM),
+                              (MPC_SAMPLE_FORMAT *)(d->V_L + 72),
                               (MPC_SAMPLE_FORMAT *)(d->Y_L));

     /******** right channel ********/
-    memmove(d->V_R + MPC_V_MEM, d->V_R, 960 * sizeof(MPC_SAMPLE_FORMAT) );
+    memmove(d->V_R + 72, d->V_R, 3192 * sizeof(MPC_SAMPLE_FORMAT) );
     mpc_full_synthesis_filter((OutData == NULL ? NULL : OutData + MPC_FRAME_LENGTH),
-                              (MPC_SAMPLE_FORMAT *)(d->V_R + MPC_V_MEM),
+                              (MPC_SAMPLE_FORMAT *)(d->V_R + 72),
                               (MPC_SAMPLE_FORMAT *)(d->Y_R));
 }