1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
/* Adapted from an algorithm given in ARM System Developer's Guide (7.3.1.2)
   for dividing a 30-bit value by a 15-bit value, with two operations per
   iteration by storing quotient and remainder together and adding the previous
   quotient bit during trial subtraction. Modified to work with any dividend
   and divisor both less than 1 << 30, and skipping trials by calculating bits
   in output.
*/
.macro ARM_DIV_31_BODY dividend, divisor, result, bits, curbit, quotient, remainder

    mov     \bits, #1
    cmp     \divisor, \dividend, lsr #16
    movls   \divisor, \divisor, lsl #16
    addls   \bits, \bits, #16
    cmp     \divisor, \dividend, lsr #8
    movls   \divisor, \divisor, lsl #8
    addls   \bits, \bits, #8
    cmp     \divisor, \dividend, lsr #4
    movls   \divisor, \divisor, lsl #4
    addls   \bits, \bits, #4
    cmp     \divisor, \dividend, lsr #2
    movls   \divisor, \divisor, lsl #2
    addls   \bits, \bits, #2
    cmp     \divisor, \dividend, lsr #1
    movls   \divisor, \divisor, lsl #1
    addls   \bits, \bits, #1
    rsbs    \divisor, \divisor, #0
    beq     20f
    adds    \result, \dividend, \divisor
    subcc   \result, \result, \divisor
    rsb     \curbit, \bits, #31
    add     pc, pc, \curbit, lsl #3
    nop
    .rept   30
    adcs    \result, \divisor, \result, lsl #1
    subcc   \result, \result, \divisor
    .endr
    /* shift remainder/quotient left one, add final quotient bit */
    adc     \result, \result, \result
    mov     \remainder, \result, lsr \bits
    eor     \quotient, \result, \dividend, lsl \bits
.endm

#ifdef USE_IRAM
    .section    .icode,"ax",%progbits
#else
    .text
#endif
    .align
    .global udiv32_arm
    .type   udiv32_arm,%function

udiv32_arm:
    tst     r0, r0
    /* High bit must be unset, otherwise shift numerator right, caluclate,
       and correct results. As this case is very uncommon we want to avoid
       any other delays on the main path in handling it, so the long divide
       calls the short divide as a function. */
    bmi     10f
udiv31_arm:
    ARM_DIV_31_BODY r0, r1, r2, r3, ip, r0, r1
    bx      lr

10:
    /* store original numerator and divisor, we'll need them to correct the
       result, */
    stmdb   sp, { r0, r1, lr }
    /* Call __div0 here if divisor is zero, otherwise it would report the wrong
       address.
    */
    cmp     r0, #0
    beq     20f
    bl      udiv31_arm
    ldmdb   sp, { r2, r3, lr }
    movs    r2, r2, lsr #1
    adc     r1, r1, r1
    subs    r1, r1, r3
    adc     r0, r0, r0
    bx      lr
20:
    stmdb sp!, { lr }
    bl      __div0
    .size udiv32_arm, . - udiv32_arm