/* calculate the 32-bit product of unsigned 16-bit op16 and 32-bit op32 */
static inline uint32_t mul_u16_u32(uint32_t op16, uint32_t op32)
{
unsigned t1, r;
asm (
"swap.w %[b], %[t1]\n\t"
"mulu %[a], %[t1]\n\t"
"sts macl, %[t1]\n\t"
"mulu %[a], %[b]\n\t"
"sts macl, %[r]\n\t"
"shll16 %[t1]\n\t"
"add %[t1], %[r]\n\t"
: [r] "=r" (r),
[t1] "=&r" (t1)
: [a] "r" (op16),
[b] "r" (op32)
);
return r;
}

/* calculate the 32-bit product of signed 32-bit op16 and unsigned 32-bit op32,
* where op16 is known to have only 16 significant bits */
static inline uint32_t mul_s16_u32(int32_t op16, int32_t op32)
{
unsigned t1, t2, r;
asm (
"swap.w %[b], %[t1]\n\t"
"mulu %[a], %[t1]\n\t"
"and %[a], %[t1]\n\t"
"mov %[t1], %[t2]\n\t"
"sts macl, %[t1]\n\t"
"mulu %[a], %[b]\n\t"
"shlr16 %[t2]\n\t"
"sts macl, %[r]\n\t"
"sub %[t2], %[t1]\n\t"
"shll16 %[t1]\n\t"
"add %[t1], %[r]\n\t"
: [r] "=&r" (r),
[t1] "=&r" (t1),
[t2] "=r" (t2)
: [a] "r" (op16),
[b] "r" (op32)
);
return r;
}