u-boot/arch/arm/lib/lib1funcs.S

430 lines
8.6 KiB
ArmAsm
Raw Normal View History

/*
* linux/arch/arm/lib/lib1funcs.S: Optimized ARM division routines
*
* Author: Nicolas Pitre <nico@fluxnic.net>
* - contributed to gcc-3.4 on Sep 30, 2003
* - adapted for the Linux kernel on Oct 2, 2003
*/
/* Copyright 1995, 1996, 1998, 1999, 2000, 2003 Free Software Foundation, Inc.
* SPDX-License-Identifier: GPL-2.0+
*/
#include <linux/linkage.h>
#include <asm/assembler.h>
/*
* U-Boot compatibility bit, define empty UNWIND() macro as, since we
* do not support stack unwinding and define CONFIG_AEABI to make all
* of the functions available without diverging from Linux code.
*/
#ifdef __UBOOT__
#define UNWIND(x...)
#define CONFIG_AEABI
#endif
.macro ARM_DIV_BODY dividend, divisor, result, curbit
#if __LINUX_ARM_ARCH__ >= 5
clz \curbit, \divisor
clz \result, \dividend
sub \result, \curbit, \result
mov \curbit, #1
mov \divisor, \divisor, lsl \result
mov \curbit, \curbit, lsl \result
mov \result, #0
#else
@ Initially shift the divisor left 3 bits if possible,
@ set curbit accordingly. This allows for curbit to be located
@ at the left end of each 4 bit nibbles in the division loop
@ to save one loop in most cases.
tst \divisor, #0xe0000000
moveq \divisor, \divisor, lsl #3
moveq \curbit, #8
movne \curbit, #1
@ Unless the divisor is very big, shift it up in multiples of
@ four bits, since this is the amount of unwinding in the main
@ division loop. Continue shifting until the divisor is
@ larger than the dividend.
1: cmp \divisor, #0x10000000
cmplo \divisor, \dividend
movlo \divisor, \divisor, lsl #4
movlo \curbit, \curbit, lsl #4
blo 1b
@ For very big divisors, we must shift it a bit at a time, or
@ we will be in danger of overflowing.
1: cmp \divisor, #0x80000000
cmplo \divisor, \dividend
movlo \divisor, \divisor, lsl #1
movlo \curbit, \curbit, lsl #1
blo 1b
mov \result, #0
#endif
@ Division loop
1: cmp \dividend, \divisor
subhs \dividend, \dividend, \divisor
orrhs \result, \result, \curbit
cmp \dividend, \divisor, lsr #1
subhs \dividend, \dividend, \divisor, lsr #1
orrhs \result, \result, \curbit, lsr #1
cmp \dividend, \divisor, lsr #2
subhs \dividend, \dividend, \divisor, lsr #2
orrhs \result, \result, \curbit, lsr #2
cmp \dividend, \divisor, lsr #3
subhs \dividend, \dividend, \divisor, lsr #3
orrhs \result, \result, \curbit, lsr #3
cmp \dividend, #0 @ Early termination?
movsne \curbit, \curbit, lsr #4 @ No, any more bits to do?
movne \divisor, \divisor, lsr #4
bne 1b
.endm
.macro ARM_DIV2_ORDER divisor, order
#if __LINUX_ARM_ARCH__ >= 5
clz \order, \divisor
rsb \order, \order, #31
#else
cmp \divisor, #(1 << 16)
movhs \divisor, \divisor, lsr #16
movhs \order, #16
movlo \order, #0
cmp \divisor, #(1 << 8)
movhs \divisor, \divisor, lsr #8
addhs \order, \order, #8
cmp \divisor, #(1 << 4)
movhs \divisor, \divisor, lsr #4
addhs \order, \order, #4
cmp \divisor, #(1 << 2)
addhi \order, \order, #3
addls \order, \order, \divisor, lsr #1
#endif
.endm
.macro ARM_MOD_BODY dividend, divisor, order, spare
#if __LINUX_ARM_ARCH__ >= 5
clz \order, \divisor
clz \spare, \dividend
sub \order, \order, \spare
mov \divisor, \divisor, lsl \order
#else
mov \order, #0
@ Unless the divisor is very big, shift it up in multiples of
@ four bits, since this is the amount of unwinding in the main
@ division loop. Continue shifting until the divisor is
@ larger than the dividend.
1: cmp \divisor, #0x10000000
cmplo \divisor, \dividend
movlo \divisor, \divisor, lsl #4
addlo \order, \order, #4
blo 1b
@ For very big divisors, we must shift it a bit at a time, or
@ we will be in danger of overflowing.
1: cmp \divisor, #0x80000000
cmplo \divisor, \dividend
movlo \divisor, \divisor, lsl #1
addlo \order, \order, #1
blo 1b
#endif
@ Perform all needed subtractions to keep only the reminder.
@ Do comparisons in batch of 4 first.
subs \order, \order, #3 @ yes, 3 is intended here
blt 2f
1: cmp \dividend, \divisor
subhs \dividend, \dividend, \divisor
cmp \dividend, \divisor, lsr #1
subhs \dividend, \dividend, \divisor, lsr #1
cmp \dividend, \divisor, lsr #2
subhs \dividend, \dividend, \divisor, lsr #2
cmp \dividend, \divisor, lsr #3
subhs \dividend, \dividend, \divisor, lsr #3
cmp \dividend, #1
mov \divisor, \divisor, lsr #4
subsge \order, \order, #4
bge 1b
tst \order, #3
teqne \dividend, #0
beq 5f
@ Either 1, 2 or 3 comparison/subtractions are left.
2: cmn \order, #2
blt 4f
beq 3f
cmp \dividend, \divisor
subhs \dividend, \dividend, \divisor
mov \divisor, \divisor, lsr #1
3: cmp \dividend, \divisor
subhs \dividend, \dividend, \divisor
mov \divisor, \divisor, lsr #1
4: cmp \dividend, \divisor
subhs \dividend, \dividend, \divisor
5:
.endm
.pushsection .text.__udivsi3, "ax"
ENTRY(__udivsi3)
ENTRY(__aeabi_uidiv)
UNWIND(.fnstart)
subs r2, r1, #1
reteq lr
bcc Ldiv0
cmp r0, r1
bls 11f
tst r1, r2
beq 12f
ARM_DIV_BODY r0, r1, r2, r3
mov r0, r2
ret lr
11: moveq r0, #1
movne r0, #0
ret lr
12: ARM_DIV2_ORDER r1, r2
mov r0, r0, lsr r2
ret lr
UNWIND(.fnend)
ENDPROC(__udivsi3)
ENDPROC(__aeabi_uidiv)
.popsection
.pushsection .text.__umodsi3, "ax"
ENTRY(__umodsi3)
UNWIND(.fnstart)
subs r2, r1, #1 @ compare divisor with 1
bcc Ldiv0
cmpne r0, r1 @ compare dividend with divisor
moveq r0, #0
tsthi r1, r2 @ see if divisor is power of 2
andeq r0, r0, r2
retls lr
ARM_MOD_BODY r0, r1, r2, r3
ret lr
UNWIND(.fnend)
ENDPROC(__umodsi3)
.popsection
.pushsection .text.__divsi3, "ax"
ENTRY(__divsi3)
ENTRY(__aeabi_idiv)
UNWIND(.fnstart)
cmp r1, #0
eor ip, r0, r1 @ save the sign of the result.
beq Ldiv0
rsbmi r1, r1, #0 @ loops below use unsigned.
subs r2, r1, #1 @ division by 1 or -1 ?
beq 10f
movs r3, r0
rsbmi r3, r0, #0 @ positive dividend value
cmp r3, r1
bls 11f
tst r1, r2 @ divisor is power of 2 ?
beq 12f
ARM_DIV_BODY r3, r1, r0, r2
cmp ip, #0
rsbmi r0, r0, #0
ret lr
10: teq ip, r0 @ same sign ?
rsbmi r0, r0, #0
ret lr
11: movlo r0, #0
moveq r0, ip, asr #31
orreq r0, r0, #1
ret lr
12: ARM_DIV2_ORDER r1, r2
cmp ip, #0
mov r0, r3, lsr r2
rsbmi r0, r0, #0
ret lr
UNWIND(.fnend)
ENDPROC(__divsi3)
ENDPROC(__aeabi_idiv)
.popsection
.pushsection .text.__modsi3, "ax"
ENTRY(__modsi3)
UNWIND(.fnstart)
cmp r1, #0
beq Ldiv0
rsbmi r1, r1, #0 @ loops below use unsigned.
movs ip, r0 @ preserve sign of dividend
rsbmi r0, r0, #0 @ if negative make positive
subs r2, r1, #1 @ compare divisor with 1
cmpne r0, r1 @ compare dividend with divisor
moveq r0, #0
tsthi r1, r2 @ see if divisor is power of 2
andeq r0, r0, r2
bls 10f
ARM_MOD_BODY r0, r1, r2, r3
10: cmp ip, #0
rsbmi r0, r0, #0
ret lr
UNWIND(.fnend)
ENDPROC(__modsi3)
.popsection
#ifdef CONFIG_AEABI
.pushsection .text.__aeabi_uidivmod, "ax"
ENTRY(__aeabi_uidivmod)
UNWIND(.fnstart)
UNWIND(.save {r0, r1, ip, lr} )
stmfd sp!, {r0, r1, ip, lr}
bl __aeabi_uidiv
ldmfd sp!, {r1, r2, ip, lr}
mul r3, r0, r2
sub r1, r1, r3
ret lr
UNWIND(.fnend)
ENDPROC(__aeabi_uidivmod)
.popsection
.pushsection .text.__aeabi_uidivmod, "ax"
ENTRY(__aeabi_idivmod)
UNWIND(.fnstart)
UNWIND(.save {r0, r1, ip, lr} )
stmfd sp!, {r0, r1, ip, lr}
bl __aeabi_idiv
ldmfd sp!, {r1, r2, ip, lr}
mul r3, r0, r2
sub r1, r1, r3
ret lr
UNWIND(.fnend)
ENDPROC(__aeabi_idivmod)
.popsection
#endif
.pushsection .text.Ldiv0, "ax"
Ldiv0:
UNWIND(.fnstart)
UNWIND(.pad #4)
UNWIND(.save {lr})
str lr, [sp, #-8]!
bl __div0
mov r0, #0 @ About as wrong as it could be.
ldr pc, [sp], #8
UNWIND(.fnend)
ENDPROC(Ldiv0)
.popsection
/* Thumb-1 specialities */
#if defined(CONFIG_SYS_THUMB_BUILD) && !defined(CONFIG_HAS_THUMB2)
.pushsection .text.__gnu_thumb1_case_sqi, "ax"
ENTRY(__gnu_thumb1_case_sqi)
push {r1}
mov r1, lr
lsrs r1, r1, #1
lsls r1, r1, #1
ldrsb r1, [r1, r0]
lsls r1, r1, #1
add lr, lr, r1
pop {r1}
bx lr
ENDPROC(__gnu_thumb1_case_sqi)
.popsection
.pushsection .text.__gnu_thumb1_case_uqi, "ax"
ENTRY(__gnu_thumb1_case_uqi)
push {r1}
mov r1, lr
lsrs r1, r1, #1
lsls r1, r1, #1
ldrb r1, [r1, r0]
lsls r1, r1, #1
add lr, lr, r1
pop {r1}
bx lr
ENDPROC(__gnu_thumb1_case_uqi)
.popsection
.pushsection .text.__gnu_thumb1_case_shi, "ax"
ENTRY(__gnu_thumb1_case_shi)
push {r0, r1}
mov r1, lr
lsrs r1, r1, #1
lsls r0, r0, #1
lsls r1, r1, #1
ldrsh r1, [r1, r0]
lsls r1, r1, #1
add lr, lr, r1
pop {r0, r1}
bx lr
ENDPROC(__gnu_thumb1_case_shi)
.popsection
.pushsection .text.__gnu_thumb1_case_uhi, "ax"
ENTRY(__gnu_thumb1_case_uhi)
push {r0, r1}
mov r1, lr
lsrs r1, r1, #1
lsls r0, r0, #1
lsls r1, r1, #1
ldrh r1, [r1, r0]
lsls r1, r1, #1
add lr, lr, r1
pop {r0, r1}
bx lr
ENDPROC(__gnu_thumb1_case_uhi)
.popsection
#endif