mpih-mul3.S 2.0 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667
  1. /* AMD64 submul_1 -- Multiply a limb vector with a limb and add
  2. * the result to a second limb vector.
  3. *
  4. * Copyright (C) 1992, 1994, 1998,
  5. * 2001, 2002, 2006 Free Software Foundation, Inc.
  6. *
  7. * This file is part of Libgcrypt.
  8. *
  9. * Libgcrypt is free software; you can redistribute it and/or modify
  10. * it under the terms of the GNU Lesser General Public License as
  11. * published by the Free Software Foundation; either version 2.1 of
  12. * the License, or (at your option) any later version.
  13. *
  14. * Libgcrypt is distributed in the hope that it will be useful,
  15. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  16. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  17. * GNU Lesser General Public License for more details.
  18. *
  19. * You should have received a copy of the GNU Lesser General Public
  20. * License along with this program; if not, write to the Free Software
  21. * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
  22. *
  23. * Note: This code is heavily based on the GNU MP Library.
  24. * Actually it's the same code with only minor changes in the
  25. * way the data is stored; this is to support the abstraction
  26. * of an optional secure memory allocation which may be used
  27. * to avoid revealing of sensitive data due to paging etc.
  28. */
  29. #include "sysdep.h"
  30. #include "asm-syntax.h"
  31. /*******************
  32. * mpi_limb_t
  33. * _gcry_mpih_submul_1( mpi_ptr_t res_ptr, (rdi)
  34. * mpi_ptr_t s1_ptr, (rsi)
  35. * mpi_size_t s1_size, (rdx)
  36. * mpi_limb_t s2_limb) (rcx)
  37. */
  38. TEXT
  39. GLOBL C_SYMBOL_NAME(_gcry_mpih_submul_1)
  40. C_SYMBOL_NAME(_gcry_mpih_submul_1:)
  41. movq %rdx, %r11
  42. leaq (%rsi,%r11,8), %rsi
  43. leaq (%rdi,%r11,8), %rdi
  44. negq %r11
  45. xorl %r8d, %r8d
  46. ALIGN(3) /* minimal alignment for claimed speed */
  47. .Loop: movq (%rsi,%r11,8), %rax
  48. movq (%rdi,%r11,8), %r10
  49. mulq %rcx
  50. subq %r8, %r10
  51. movl $0, %r8d
  52. adcl %r8d, %r8d
  53. subq %rax, %r10
  54. adcq %rdx, %r8
  55. movq %r10, (%rdi,%r11,8)
  56. incq %r11
  57. jne .Loop
  58. movq %r8, %rax
  59. ret