123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418 |
- /* $Id: checksum.S,v 1.10 2001/07/06 13:11:32 gniibe Exp $
- *
- * INET An implementation of the TCP/IP protocol suite for the LINUX
- * operating system. INET is implemented using the BSD Socket
- * interface as the means of communication with the user level.
- *
- * IP/TCP/UDP checksumming routines
- *
- * Authors: Jorge Cwik, <jorge@laser.satlink.net>
- * Arnt Gulbrandsen, <agulbra@nvg.unit.no>
- * Tom May, <ftom@netcom.com>
- * Pentium Pro/II routines:
- * Alexander Kjeldaas <astor@guardian.no>
- * Finn Arne Gangstad <finnag@guardian.no>
- * Lots of code moved from tcp.c and ip.c; see those files
- * for more names.
- *
- * Changes: Ingo Molnar, converted csum_partial_copy() to 2.1 exception
- * handling.
- * Andi Kleen, add zeroing on error
- * converted to pure assembler
- *
- * SuperH version: Copyright (C) 1999 Niibe Yutaka
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- */
- #include <asm/errno.h>
- #include <linux/linkage.h>
- /*
- * computes a partial checksum, e.g. for TCP/UDP fragments
- */
- /*
- * asmlinkage __wsum csum_partial(const void *buf, int len, __wsum sum);
- */
- .text
- ENTRY(csum_partial)
- /*
- * Experiments with Ethernet and SLIP connections show that buff
- * is aligned on either a 2-byte or 4-byte boundary. We get at
- * least a twofold speedup on 486 and Pentium if it is 4-byte aligned.
- * Fortunately, it is easy to convert 2-byte alignment to 4-byte
- * alignment for the unrolled loop.
- */
- mov r4, r0
- tst #3, r0 ! Check alignment.
- bt/s 2f ! Jump if alignment is ok.
- mov r4, r7 ! Keep a copy to check for alignment
- !
- tst #1, r0 ! Check alignment.
- bt 21f ! Jump if alignment is boundary of 2bytes.
- ! buf is odd
- tst r5, r5
- add #-1, r5
- bt 9f
- mov.b @r4+, r0
- extu.b r0, r0
- addc r0, r6 ! t=0 from previous tst
- mov r6, r0
- shll8 r6
- shlr16 r0
- shlr8 r0
- or r0, r6
- mov r4, r0
- tst #2, r0
- bt 2f
- 21:
- ! buf is 2 byte aligned (len could be 0)
- add #-2, r5 ! Alignment uses up two bytes.
- cmp/pz r5 !
- bt/s 1f ! Jump if we had at least two bytes.
- clrt
- bra 6f
- add #2, r5 ! r5 was < 2. Deal with it.
- 1:
- mov.w @r4+, r0
- extu.w r0, r0
- addc r0, r6
- bf 2f
- add #1, r6
- 2:
- ! buf is 4 byte aligned (len could be 0)
- mov r5, r1
- mov #-5, r0
- shld r0, r1
- tst r1, r1
- bt/s 4f ! if it's =0, go to 4f
- clrt
- .align 2
- 3:
- mov.l @r4+, r0
- mov.l @r4+, r2
- mov.l @r4+, r3
- addc r0, r6
- mov.l @r4+, r0
- addc r2, r6
- mov.l @r4+, r2
- addc r3, r6
- mov.l @r4+, r3
- addc r0, r6
- mov.l @r4+, r0
- addc r2, r6
- mov.l @r4+, r2
- addc r3, r6
- addc r0, r6
- addc r2, r6
- movt r0
- dt r1
- bf/s 3b
- cmp/eq #1, r0
- ! here, we know r1==0
- addc r1, r6 ! add carry to r6
- 4:
- mov r5, r0
- and #0x1c, r0
- tst r0, r0
- bt 6f
- ! 4 bytes or more remaining
- mov r0, r1
- shlr2 r1
- mov #0, r2
- 5:
- addc r2, r6
- mov.l @r4+, r2
- movt r0
- dt r1
- bf/s 5b
- cmp/eq #1, r0
- addc r2, r6
- addc r1, r6 ! r1==0 here, so it means add carry-bit
- 6:
- ! 3 bytes or less remaining
- mov #3, r0
- and r0, r5
- tst r5, r5
- bt 9f ! if it's =0 go to 9f
- mov #2, r1
- cmp/hs r1, r5
- bf 7f
- mov.w @r4+, r0
- extu.w r0, r0
- cmp/eq r1, r5
- bt/s 8f
- clrt
- shll16 r0
- addc r0, r6
- 7:
- mov.b @r4+, r0
- extu.b r0, r0
- #ifndef __LITTLE_ENDIAN__
- shll8 r0
- #endif
- 8:
- addc r0, r6
- mov #0, r0
- addc r0, r6
- 9:
- ! Check if the buffer was misaligned, if so realign sum
- mov r7, r0
- tst #1, r0
- bt 10f
- mov r6, r0
- shll8 r6
- shlr16 r0
- shlr8 r0
- or r0, r6
- 10:
- rts
- mov r6, r0
- /*
- unsigned int csum_partial_copy_generic (const char *src, char *dst, int len,
- int sum, int *src_err_ptr, int *dst_err_ptr)
- */
- /*
- * Copy from ds while checksumming, otherwise like csum_partial
- *
- * The macros SRC and DST specify the type of access for the instruction.
- * thus we can call a custom exception handler for all access types.
- *
- * FIXME: could someone double-check whether I haven't mixed up some SRC and
- * DST definitions? It's damn hard to trigger all cases. I hope I got
- * them all but there's no guarantee.
- */
- #define SRC(...) \
- 9999: __VA_ARGS__ ; \
- .section __ex_table, "a"; \
- .long 9999b, 6001f ; \
- .previous
- #define DST(...) \
- 9999: __VA_ARGS__ ; \
- .section __ex_table, "a"; \
- .long 9999b, 6002f ; \
- .previous
- !
- ! r4: const char *SRC
- ! r5: char *DST
- ! r6: int LEN
- ! r7: int SUM
- !
- ! on stack:
- ! int *SRC_ERR_PTR
- ! int *DST_ERR_PTR
- !
- ENTRY(csum_partial_copy_generic)
- mov.l r5,@-r15
- mov.l r6,@-r15
- mov #3,r0 ! Check src and dest are equally aligned
- mov r4,r1
- and r0,r1
- and r5,r0
- cmp/eq r1,r0
- bf 3f ! Different alignments, use slow version
- tst #1,r0 ! Check dest word aligned
- bf 3f ! If not, do it the slow way
- mov #2,r0
- tst r0,r5 ! Check dest alignment.
- bt 2f ! Jump if alignment is ok.
- add #-2,r6 ! Alignment uses up two bytes.
- cmp/pz r6 ! Jump if we had at least two bytes.
- bt/s 1f
- clrt
- add #2,r6 ! r6 was < 2. Deal with it.
- bra 4f
- mov r6,r2
- 3: ! Handle different src and dest alignments.
- ! This is not common, so simple byte by byte copy will do.
- mov r6,r2
- shlr r6
- tst r6,r6
- bt 4f
- clrt
- .align 2
- 5:
- SRC( mov.b @r4+,r1 )
- SRC( mov.b @r4+,r0 )
- extu.b r1,r1
- DST( mov.b r1,@r5 )
- DST( mov.b r0,@(1,r5) )
- extu.b r0,r0
- add #2,r5
- #ifdef __LITTLE_ENDIAN__
- shll8 r0
- #else
- shll8 r1
- #endif
- or r1,r0
- addc r0,r7
- movt r0
- dt r6
- bf/s 5b
- cmp/eq #1,r0
- mov #0,r0
- addc r0, r7
- mov r2, r0
- tst #1, r0
- bt 7f
- bra 5f
- clrt
- ! src and dest equally aligned, but to a two byte boundary.
- ! Handle first two bytes as a special case
- .align 2
- 1:
- SRC( mov.w @r4+,r0 )
- DST( mov.w r0,@r5 )
- add #2,r5
- extu.w r0,r0
- addc r0,r7
- mov #0,r0
- addc r0,r7
- 2:
- mov r6,r2
- mov #-5,r0
- shld r0,r6
- tst r6,r6
- bt/s 2f
- clrt
- .align 2
- 1:
- SRC( mov.l @r4+,r0 )
- SRC( mov.l @r4+,r1 )
- addc r0,r7
- DST( mov.l r0,@r5 )
- DST( mov.l r1,@(4,r5) )
- addc r1,r7
- SRC( mov.l @r4+,r0 )
- SRC( mov.l @r4+,r1 )
- addc r0,r7
- DST( mov.l r0,@(8,r5) )
- DST( mov.l r1,@(12,r5) )
- addc r1,r7
- SRC( mov.l @r4+,r0 )
- SRC( mov.l @r4+,r1 )
- addc r0,r7
- DST( mov.l r0,@(16,r5) )
- DST( mov.l r1,@(20,r5) )
- addc r1,r7
- SRC( mov.l @r4+,r0 )
- SRC( mov.l @r4+,r1 )
- addc r0,r7
- DST( mov.l r0,@(24,r5) )
- DST( mov.l r1,@(28,r5) )
- addc r1,r7
- add #32,r5
- movt r0
- dt r6
- bf/s 1b
- cmp/eq #1,r0
- mov #0,r0
- addc r0,r7
- 2: mov r2,r6
- mov #0x1c,r0
- and r0,r6
- cmp/pl r6
- bf/s 4f
- clrt
- shlr2 r6
- 3:
- SRC( mov.l @r4+,r0 )
- addc r0,r7
- DST( mov.l r0,@r5 )
- add #4,r5
- movt r0
- dt r6
- bf/s 3b
- cmp/eq #1,r0
- mov #0,r0
- addc r0,r7
- 4: mov r2,r6
- mov #3,r0
- and r0,r6
- cmp/pl r6
- bf 7f
- mov #2,r1
- cmp/hs r1,r6
- bf 5f
- SRC( mov.w @r4+,r0 )
- DST( mov.w r0,@r5 )
- extu.w r0,r0
- add #2,r5
- cmp/eq r1,r6
- bt/s 6f
- clrt
- shll16 r0
- addc r0,r7
- 5:
- SRC( mov.b @r4+,r0 )
- DST( mov.b r0,@r5 )
- extu.b r0,r0
- #ifndef __LITTLE_ENDIAN__
- shll8 r0
- #endif
- 6: addc r0,r7
- mov #0,r0
- addc r0,r7
- 7:
- 5000:
- # Exception handler:
- .section .fixup, "ax"
- 6001:
- mov.l @(8,r15),r0 ! src_err_ptr
- mov #-EFAULT,r1
- mov.l r1,@r0
- ! zero the complete destination - computing the rest
- ! is too much work
- mov.l @(4,r15),r5 ! dst
- mov.l @r15,r6 ! len
- mov #0,r7
- 1: mov.b r7,@r5
- dt r6
- bf/s 1b
- add #1,r5
- mov.l 8000f,r0
- jmp @r0
- nop
- .align 2
- 8000: .long 5000b
- 6002:
- mov.l @(12,r15),r0 ! dst_err_ptr
- mov #-EFAULT,r1
- mov.l r1,@r0
- mov.l 8001f,r0
- jmp @r0
- nop
- .align 2
- 8001: .long 5000b
- .previous
- add #8,r15
- rts
- mov r7,r0
|