x86.S 4.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204
  1. /*
  2. * Written by Solar Designer <solar at openwall.com> in 1998-2010.
  3. * No copyright is claimed, and the software is hereby placed in the public
  4. * domain. In case this attempt to disclaim copyright and place the software
  5. * in the public domain is deemed null and void, then the software is
  6. * Copyright (c) 1998-2010 Solar Designer and it is hereby released to the
  7. * general public under the following terms:
  8. *
  9. * Redistribution and use in source and binary forms, with or without
  10. * modification, are permitted.
  11. *
  12. * There's ABSOLUTELY NO WARRANTY, express or implied.
  13. *
  14. * See crypt_blowfish.c for more information.
  15. */
  16. #ifdef __i386__
  17. #if defined(__OpenBSD__) && !defined(__ELF__)
  18. #define UNDERSCORES
  19. #define ALIGN_LOG
  20. #endif
  21. #if defined(__CYGWIN32__) || defined(__MINGW32__)
  22. #define UNDERSCORES
  23. #endif
  24. #ifdef __DJGPP__
  25. #define UNDERSCORES
  26. #define ALIGN_LOG
  27. #endif
  28. #ifdef UNDERSCORES
  29. #define _BF_body_r __BF_body_r
  30. #endif
  31. #ifdef ALIGN_LOG
  32. #define DO_ALIGN(log) .align (log)
  33. #elif defined(DUMBAS)
  34. #define DO_ALIGN(log) .align 1 << log
  35. #else
  36. #define DO_ALIGN(log) .align (1 << (log))
  37. #endif
  38. #define BF_FRAME 0x200
  39. #define ctx %esp
  40. #define BF_ptr (ctx)
  41. #define S(N, r) N+BF_FRAME(ctx,r,4)
  42. #ifdef DUMBAS
  43. #define P(N) 0x1000+N+N+N+N+BF_FRAME(ctx)
  44. #else
  45. #define P(N) 0x1000+4*N+BF_FRAME(ctx)
  46. #endif
  47. /*
  48. * This version of the assembly code is optimized primarily for the original
  49. * Intel Pentium but is also careful to avoid partial register stalls on the
  50. * Pentium Pro family of processors (tested up to Pentium III Coppermine).
  51. *
  52. * It is possible to do 15% faster on the Pentium Pro family and probably on
  53. * many non-Intel x86 processors, but, unfortunately, that would make things
  54. * twice slower for the original Pentium.
  55. *
  56. * An additional 2% speedup may be achieved with non-reentrant code.
  57. */
  58. #define L %esi
  59. #define R %edi
  60. #define tmp1 %eax
  61. #define tmp1_lo %al
  62. #define tmp2 %ecx
  63. #define tmp2_hi %ch
  64. #define tmp3 %edx
  65. #define tmp3_lo %dl
  66. #define tmp4 %ebx
  67. #define tmp4_hi %bh
  68. #define tmp5 %ebp
  69. .text
  70. #define BF_ROUND(L, R, N) \
  71. xorl L,tmp2; \
  72. xorl tmp1,tmp1; \
  73. movl tmp2,L; \
  74. shrl $16,tmp2; \
  75. movl L,tmp4; \
  76. movb tmp2_hi,tmp1_lo; \
  77. andl $0xFF,tmp2; \
  78. movb tmp4_hi,tmp3_lo; \
  79. andl $0xFF,tmp4; \
  80. movl S(0,tmp1),tmp1; \
  81. movl S(0x400,tmp2),tmp5; \
  82. addl tmp5,tmp1; \
  83. movl S(0x800,tmp3),tmp5; \
  84. xorl tmp5,tmp1; \
  85. movl S(0xC00,tmp4),tmp5; \
  86. addl tmp1,tmp5; \
  87. movl 4+P(N),tmp2; \
  88. xorl tmp5,R
  89. #define BF_ENCRYPT_START \
  90. BF_ROUND(L, R, 0); \
  91. BF_ROUND(R, L, 1); \
  92. BF_ROUND(L, R, 2); \
  93. BF_ROUND(R, L, 3); \
  94. BF_ROUND(L, R, 4); \
  95. BF_ROUND(R, L, 5); \
  96. BF_ROUND(L, R, 6); \
  97. BF_ROUND(R, L, 7); \
  98. BF_ROUND(L, R, 8); \
  99. BF_ROUND(R, L, 9); \
  100. BF_ROUND(L, R, 10); \
  101. BF_ROUND(R, L, 11); \
  102. BF_ROUND(L, R, 12); \
  103. BF_ROUND(R, L, 13); \
  104. BF_ROUND(L, R, 14); \
  105. BF_ROUND(R, L, 15); \
  106. movl BF_ptr,tmp5; \
  107. xorl L,tmp2; \
  108. movl P(17),L
  109. #define BF_ENCRYPT_END \
  110. xorl R,L; \
  111. movl tmp2,R
  112. DO_ALIGN(5)
  113. .globl _BF_body_r
  114. _BF_body_r:
  115. movl 4(%esp),%eax
  116. pushl %ebp
  117. pushl %ebx
  118. pushl %esi
  119. pushl %edi
  120. subl $BF_FRAME-8,%eax
  121. xorl L,L
  122. cmpl %esp,%eax
  123. ja BF_die
  124. xchgl %eax,%esp
  125. xorl R,R
  126. pushl %eax
  127. leal 0x1000+BF_FRAME-4(ctx),%eax
  128. movl 0x1000+BF_FRAME-4(ctx),tmp2
  129. pushl %eax
  130. xorl tmp3,tmp3
  131. BF_loop_P:
  132. BF_ENCRYPT_START
  133. addl $8,tmp5
  134. BF_ENCRYPT_END
  135. leal 0x1000+18*4+BF_FRAME(ctx),tmp1
  136. movl tmp5,BF_ptr
  137. cmpl tmp5,tmp1
  138. movl L,-8(tmp5)
  139. movl R,-4(tmp5)
  140. movl P(0),tmp2
  141. ja BF_loop_P
  142. leal BF_FRAME(ctx),tmp5
  143. xorl tmp3,tmp3
  144. movl tmp5,BF_ptr
  145. BF_loop_S:
  146. BF_ENCRYPT_START
  147. BF_ENCRYPT_END
  148. movl P(0),tmp2
  149. movl L,(tmp5)
  150. movl R,4(tmp5)
  151. BF_ENCRYPT_START
  152. BF_ENCRYPT_END
  153. movl P(0),tmp2
  154. movl L,8(tmp5)
  155. movl R,12(tmp5)
  156. BF_ENCRYPT_START
  157. BF_ENCRYPT_END
  158. movl P(0),tmp2
  159. movl L,16(tmp5)
  160. movl R,20(tmp5)
  161. BF_ENCRYPT_START
  162. addl $32,tmp5
  163. BF_ENCRYPT_END
  164. leal 0x1000+BF_FRAME(ctx),tmp1
  165. movl tmp5,BF_ptr
  166. cmpl tmp5,tmp1
  167. movl P(0),tmp2
  168. movl L,-8(tmp5)
  169. movl R,-4(tmp5)
  170. ja BF_loop_S
  171. movl 4(%esp),%esp
  172. popl %edi
  173. popl %esi
  174. popl %ebx
  175. popl %ebp
  176. ret
  177. BF_die:
  178. /* Oops, need to re-compile with a larger BF_FRAME. */
  179. hlt
  180. jmp BF_die
  181. #endif
  182. #if defined(__ELF__) && defined(__linux__)
  183. .section .note.GNU-stack,"",@progbits
  184. #endif