123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333 |
- /*
- * linux/arch/arm/lib/csumpartialcopygeneric.S
- *
- * Copyright (C) 1995-2001 Russell King
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
- /*
- * unsigned int
- * csum_partial_copy_xxx(const char *src, char *dst, int len, int sum, )
- * r0 = src, r1 = dst, r2 = len, r3 = sum
- * Returns : r0 = checksum
- *
- * Note that 'tst' and 'teq' preserve the carry flag.
- */
- src .req r0
- dst .req r1
- len .req r2
- sum .req r3
- .Lzero: mov r0, sum
- load_regs
- /*
- * Align an unaligned destination pointer. We know that
- * we have >= 8 bytes here, so we don't need to check
- * the length. Note that the source pointer hasn't been
- * aligned yet.
- */
- .Ldst_unaligned:
- tst dst, #1
- beq .Ldst_16bit
- load1b ip
- sub len, len, #1
- adcs sum, sum, ip, put_byte_1 @ update checksum
- strb ip, [dst], #1
- tst dst, #2
- moveq pc, lr @ dst is now 32bit aligned
- .Ldst_16bit: load2b r8, ip
- sub len, len, #2
- adcs sum, sum, r8, put_byte_0
- strb r8, [dst], #1
- adcs sum, sum, ip, put_byte_1
- strb ip, [dst], #1
- mov pc, lr @ dst is now 32bit aligned
- /*
- * Handle 0 to 7 bytes, with any alignment of source and
- * destination pointers. Note that when we get here, C = 0
- */
- .Lless8: teq len, #0 @ check for zero count
- beq .Lzero
- /* we must have at least one byte. */
- tst dst, #1 @ dst 16-bit aligned
- beq .Lless8_aligned
- /* Align dst */
- load1b ip
- sub len, len, #1
- adcs sum, sum, ip, put_byte_1 @ update checksum
- strb ip, [dst], #1
- tst len, #6
- beq .Lless8_byteonly
- 1: load2b r8, ip
- sub len, len, #2
- adcs sum, sum, r8, put_byte_0
- strb r8, [dst], #1
- adcs sum, sum, ip, put_byte_1
- strb ip, [dst], #1
- .Lless8_aligned:
- tst len, #6
- bne 1b
- .Lless8_byteonly:
- tst len, #1
- beq .Ldone
- load1b r8
- adcs sum, sum, r8, put_byte_0 @ update checksum
- strb r8, [dst], #1
- b .Ldone
- FN_ENTRY
- save_regs
- cmp len, #8 @ Ensure that we have at least
- blo .Lless8 @ 8 bytes to copy.
- adds sum, sum, #0 @ C = 0
- tst dst, #3 @ Test destination alignment
- blne .Ldst_unaligned @ align destination, return here
- /*
- * Ok, the dst pointer is now 32bit aligned, and we know
- * that we must have more than 4 bytes to copy. Note
- * that C contains the carry from the dst alignment above.
- */
- tst src, #3 @ Test source alignment
- bne .Lsrc_not_aligned
- /* Routine for src & dst aligned */
- bics ip, len, #15
- beq 2f
- 1: load4l r4, r5, r6, r7
- stmia dst!, {r4, r5, r6, r7}
- adcs sum, sum, r4
- adcs sum, sum, r5
- adcs sum, sum, r6
- adcs sum, sum, r7
- sub ip, ip, #16
- teq ip, #0
- bne 1b
- 2: ands ip, len, #12
- beq 4f
- tst ip, #8
- beq 3f
- load2l r4, r5
- stmia dst!, {r4, r5}
- adcs sum, sum, r4
- adcs sum, sum, r5
- tst ip, #4
- beq 4f
- 3: load1l r4
- str r4, [dst], #4
- adcs sum, sum, r4
- 4: ands len, len, #3
- beq .Ldone
- load1l r4
- tst len, #2
- mov r5, r4, get_byte_0
- beq .Lexit
- adcs sum, sum, r4, push #16
- strb r5, [dst], #1
- mov r5, r4, get_byte_1
- strb r5, [dst], #1
- mov r5, r4, get_byte_2
- .Lexit: tst len, #1
- strneb r5, [dst], #1
- andne r5, r5, #255
- adcnes sum, sum, r5, put_byte_0
- /*
- * If the dst pointer was not 16-bit aligned, we
- * need to rotate the checksum here to get around
- * the inefficient byte manipulations in the
- * architecture independent code.
- */
- .Ldone: adc r0, sum, #0
- ldr sum, [sp, #0] @ dst
- tst sum, #1
- movne r0, r0, ror #8
- load_regs
- .Lsrc_not_aligned:
- adc sum, sum, #0 @ include C from dst alignment
- and ip, src, #3
- bic src, src, #3
- load1l r5
- cmp ip, #2
- beq .Lsrc2_aligned
- bhi .Lsrc3_aligned
- mov r4, r5, pull #8 @ C = 0
- bics ip, len, #15
- beq 2f
- 1: load4l r5, r6, r7, r8
- orr r4, r4, r5, push #24
- mov r5, r5, pull #8
- orr r5, r5, r6, push #24
- mov r6, r6, pull #8
- orr r6, r6, r7, push #24
- mov r7, r7, pull #8
- orr r7, r7, r8, push #24
- stmia dst!, {r4, r5, r6, r7}
- adcs sum, sum, r4
- adcs sum, sum, r5
- adcs sum, sum, r6
- adcs sum, sum, r7
- mov r4, r8, pull #8
- sub ip, ip, #16
- teq ip, #0
- bne 1b
- 2: ands ip, len, #12
- beq 4f
- tst ip, #8
- beq 3f
- load2l r5, r6
- orr r4, r4, r5, push #24
- mov r5, r5, pull #8
- orr r5, r5, r6, push #24
- stmia dst!, {r4, r5}
- adcs sum, sum, r4
- adcs sum, sum, r5
- mov r4, r6, pull #8
- tst ip, #4
- beq 4f
- 3: load1l r5
- orr r4, r4, r5, push #24
- str r4, [dst], #4
- adcs sum, sum, r4
- mov r4, r5, pull #8
- 4: ands len, len, #3
- beq .Ldone
- mov r5, r4, get_byte_0
- tst len, #2
- beq .Lexit
- adcs sum, sum, r4, push #16
- strb r5, [dst], #1
- mov r5, r4, get_byte_1
- strb r5, [dst], #1
- mov r5, r4, get_byte_2
- b .Lexit
- .Lsrc2_aligned: mov r4, r5, pull #16
- adds sum, sum, #0
- bics ip, len, #15
- beq 2f
- 1: load4l r5, r6, r7, r8
- orr r4, r4, r5, push #16
- mov r5, r5, pull #16
- orr r5, r5, r6, push #16
- mov r6, r6, pull #16
- orr r6, r6, r7, push #16
- mov r7, r7, pull #16
- orr r7, r7, r8, push #16
- stmia dst!, {r4, r5, r6, r7}
- adcs sum, sum, r4
- adcs sum, sum, r5
- adcs sum, sum, r6
- adcs sum, sum, r7
- mov r4, r8, pull #16
- sub ip, ip, #16
- teq ip, #0
- bne 1b
- 2: ands ip, len, #12
- beq 4f
- tst ip, #8
- beq 3f
- load2l r5, r6
- orr r4, r4, r5, push #16
- mov r5, r5, pull #16
- orr r5, r5, r6, push #16
- stmia dst!, {r4, r5}
- adcs sum, sum, r4
- adcs sum, sum, r5
- mov r4, r6, pull #16
- tst ip, #4
- beq 4f
- 3: load1l r5
- orr r4, r4, r5, push #16
- str r4, [dst], #4
- adcs sum, sum, r4
- mov r4, r5, pull #16
- 4: ands len, len, #3
- beq .Ldone
- mov r5, r4, get_byte_0
- tst len, #2
- beq .Lexit
- adcs sum, sum, r4
- strb r5, [dst], #1
- mov r5, r4, get_byte_1
- strb r5, [dst], #1
- tst len, #1
- beq .Ldone
- load1b r5
- b .Lexit
- .Lsrc3_aligned: mov r4, r5, pull #24
- adds sum, sum, #0
- bics ip, len, #15
- beq 2f
- 1: load4l r5, r6, r7, r8
- orr r4, r4, r5, push #8
- mov r5, r5, pull #24
- orr r5, r5, r6, push #8
- mov r6, r6, pull #24
- orr r6, r6, r7, push #8
- mov r7, r7, pull #24
- orr r7, r7, r8, push #8
- stmia dst!, {r4, r5, r6, r7}
- adcs sum, sum, r4
- adcs sum, sum, r5
- adcs sum, sum, r6
- adcs sum, sum, r7
- mov r4, r8, pull #24
- sub ip, ip, #16
- teq ip, #0
- bne 1b
- 2: ands ip, len, #12
- beq 4f
- tst ip, #8
- beq 3f
- load2l r5, r6
- orr r4, r4, r5, push #8
- mov r5, r5, pull #24
- orr r5, r5, r6, push #8
- stmia dst!, {r4, r5}
- adcs sum, sum, r4
- adcs sum, sum, r5
- mov r4, r6, pull #24
- tst ip, #4
- beq 4f
- 3: load1l r5
- orr r4, r4, r5, push #8
- str r4, [dst], #4
- adcs sum, sum, r4
- mov r4, r5, pull #24
- 4: ands len, len, #3
- beq .Ldone
- mov r5, r4, get_byte_0
- tst len, #2
- beq .Lexit
- strb r5, [dst], #1
- adcs sum, sum, r4
- load1l r4
- mov r5, r4, get_byte_0
- strb r5, [dst], #1
- adcs sum, sum, r4, push #24
- mov r5, r4, get_byte_1
- b .Lexit
- FN_EXIT
|