1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82

/* Adapted from an algorithm given in ARM System Developer's Guide (7.3.1.2)
for dividing a 30bit value by a 15bit value, with two operations per
iteration by storing quotient and remainder together and adding the previous
quotient bit during trial subtraction. Modified to work with any dividend
and divisor both less than 1 << 30, and skipping trials by calculating bits
in output.
*/
.macro ARM_DIV_31_BODY dividend, divisor, result, bits, curbit, quotient, remainder
mov \bits, #1
cmp \divisor, \dividend, lsr #16
movls \divisor, \divisor, lsl #16
addls \bits, \bits, #16
cmp \divisor, \dividend, lsr #8
movls \divisor, \divisor, lsl #8
addls \bits, \bits, #8
cmp \divisor, \dividend, lsr #4
movls \divisor, \divisor, lsl #4
addls \bits, \bits, #4
cmp \divisor, \dividend, lsr #2
movls \divisor, \divisor, lsl #2
addls \bits, \bits, #2
cmp \divisor, \dividend, lsr #1
movls \divisor, \divisor, lsl #1
addls \bits, \bits, #1
rsbs \divisor, \divisor, #0
beq 20f
adds \result, \dividend, \divisor
subcc \result, \result, \divisor
rsb \curbit, \bits, #31
add pc, pc, \curbit, lsl #3
nop
.rept 30
adcs \result, \divisor, \result, lsl #1
subcc \result, \result, \divisor
.endr
/* shift remainder/quotient left one, add final quotient bit */
adc \result, \result, \result
mov \remainder, \result, lsr \bits
eor \quotient, \result, \dividend, lsl \bits
.endm
#ifdef USE_IRAM
.section .icode,"ax",%progbits
#else
.text
#endif
.align
.global udiv32_arm
.type udiv32_arm,%function
udiv32_arm:
tst r0, r0
/* High bit must be unset, otherwise shift numerator right, caluclate,
and correct results. As this case is very uncommon we want to avoid
any other delays on the main path in handling it, so the long divide
calls the short divide as a function. */
bmi 10f
udiv31_arm:
ARM_DIV_31_BODY r0, r1, r2, r3, ip, r0, r1
bx lr
10:
/* store original numerator and divisor, we'll need them to correct the
result, */
stmdb sp, { r0, r1, lr }
/* Call __div0 here if divisor is zero, otherwise it would report the wrong
address.
*/
cmp r0, #0
beq 20f
bl udiv31_arm
ldmdb sp, { r2, r3, lr }
movs r2, r2, lsr #1
adc r1, r1, r1
subs r1, r1, r3
adc r0, r0, r0
bx lr
20:
stmdb sp!, { lr }
bl __div0
.size udiv32_arm, .  udiv32_arm
