strlen.S 1.1 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758
  1. /*
  2. * strlen.S (c) 1995 David Mosberger (davidm@cs.arizona.edu)
  3. *
  4. * Finds length of a 0-terminated string. Optimized for the
  5. * Alpha architecture:
  6. *
  7. * - memory accessed as aligned quadwords only
  8. * - uses bcmpge to compare 8 bytes in parallel
  9. * - does binary search to find 0 byte in last
  10. * quadword (HAKMEM needed 12 instructions to
  11. * do this instead of the 9 instructions that
  12. * binary search needs).
  13. */
  14. .set noreorder
  15. .set noat
  16. .align 3
  17. .globl strlen
  18. .ent strlen
  19. strlen:
  20. ldq_u $1, 0($16) # load first quadword ($16 may be misaligned)
  21. lda $2, -1($31)
  22. insqh $2, $16, $2
  23. andnot $16, 7, $0
  24. or $2, $1, $1
  25. cmpbge $31, $1, $2 # $2 <- bitmask: bit i == 1 <==> i-th byte == 0
  26. bne $2, found
  27. loop: ldq $1, 8($0)
  28. addq $0, 8, $0 # addr += 8
  29. nop # helps dual issue last two insns
  30. cmpbge $31, $1, $2
  31. beq $2, loop
  32. found: blbs $2, done # make aligned case fast
  33. negq $2, $3
  34. and $2, $3, $2
  35. and $2, 0x0f, $1
  36. addq $0, 4, $3
  37. cmoveq $1, $3, $0
  38. and $2, 0x33, $1
  39. addq $0, 2, $3
  40. cmoveq $1, $3, $0
  41. and $2, 0x55, $1
  42. addq $0, 1, $3
  43. cmoveq $1, $3, $0
  44. done: subq $0, $16, $0
  45. ret $31, ($26)
  46. .end strlen