1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
diff --git a/apps/codecs/lib/udiv32_armv4.S b/apps/codecs/lib/udiv32_armv4.S
index c4aea14..e16c623 100644
--- a/apps/codecs/lib/udiv32_armv4.S
+++ b/apps/codecs/lib/udiv32_armv4.S
@@ -85,15 +85,41 @@
     .global udiv32_arm
     .type   udiv32_arm,%function

+.set recip_max, 4096
+
 udiv32_arm:
+#ifdef USE_IRAM
+    cmp     r1, #recip_max
+    bhi     .L_udiv
+    subs    r2, r1, #3
+    bcc     .L_udiv_tiny
+    adr     r3, .L_udiv_recip_table
+    ldr     r2, [r3, r2, lsl #2]
+    umull   ip, r3, r2, r0
+    mul     r2, r3, r1
+    sub     r0, r0, r2
+    cmp     r0, r1
+    movcc   r0, r3
+    bxcc    lr
+    addcs   r0, r3, #1
+    bx      lr
+.L_udiv_tiny:
+    cmp     r1, #1
+    bcc     .L_div0
+    movne   r0, r0, lsr #1
+    bx      lr
+#endif
+.L_udiv:
     /* Invert divisor. ARM_DIV_31_BODY uses adc to both subtract the divisor
        and add the next bit of the result. The correction code at .L_udiv32
        does not need the divisor inverted, but can be modified to work with it,
        and this allows the zero divisor test to be done early and without an
        explicit comparison. */
     rsbs    r1, r1, #0
-    beq     .L_div0
     tst     r0, r0
+#ifndef USE_IRAM
+    beq .L_div0
+#endif
     /* High bit must be unset, otherwise shift numerator right, calculate,
        and correct results. As this case is very uncommon we want to avoid
        any other delays on the main path in handling it, so the long divide
@@ -125,10 +151,32 @@ udiv32_arm:
 .L_div0:
     /* __div0 expects the calling address on the top of the stack */
     stmdb sp!, { lr }
+    mov     r0, #0
 #if defined(__ARM_EABI__) || !defined(USE_IRAM)
     bl      __div0
 #else
-    mov     lr, pc
-    bx      r3
+    ldr     pc, [pc, #-4]
+    .word   __div0
+#endif
+#ifdef USE_IRAM
+.L_udiv_recip_table:
+    .set div, 3
+    .rept recip_max - 2
+    .set q, 0x40000000 / div
+    .set r, (0x40000000 - (q * div))<<1
+    .set q, q << 1
+    .if r >= div
+    .set q, q + 1
+    .set r, r - div
+    .endif
+    .set r, r << 1
+    .set q, q << 1
+    .if r >= div
+    .set q, q + 1
+    .set r, r - div
+    .endif
+    .word q
+    .set div, div+1
+    .endr
 #endif
     .size udiv32_arm, . - udiv32_arm