poly1305.hpp 3.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123
  1. #pragma once
  2. #include <nall/arithmetic.hpp>
  3. namespace nall::MAC {
  4. struct Poly1305 {
  5. auto authenticate(array_view<uint8_t> memory, uint256_t nonce) -> uint128_t {
  6. initialize(nonce);
  7. process(memory.data(), memory.size());
  8. return finish();
  9. }
  10. auto initialize(uint256_t key) -> void {
  11. uint64_t t0 = key >> 0;
  12. uint64_t t1 = key >> 64;
  13. pad[0] = key >> 128;
  14. pad[1] = key >> 192;
  15. r[0] = (t0 ) & 0xffc0fffffff;
  16. r[1] = (t0 >> 44 | t1 << 20) & 0xfffffc0ffff;
  17. r[2] = ( t1 >> 24) & 0x00ffffffc0f;
  18. h[0] = 0, h[1] = 0, h[2] = 0;
  19. offset = 0;
  20. }
  21. auto process(const uint8_t* data, uint64_t size) -> void {
  22. while(size--) {
  23. buffer[offset++] = *data++;
  24. if(offset >= 16) {
  25. block();
  26. offset = 0;
  27. }
  28. }
  29. }
  30. auto finish() -> uint128_t {
  31. if(offset) {
  32. buffer[offset++] = 1;
  33. while(offset < 16) buffer[offset++] = 0;
  34. block(true);
  35. }
  36. uint64_t h0 = h[0], h1 = h[1], h2 = h[2];
  37. uint64_t c = h1 >> 44; h1 &= 0xfffffffffff;
  38. h2 += c; c = h2 >> 42; h2 &= 0x3ffffffffff;
  39. h0 += c * 5; c = h0 >> 44; h0 &= 0xfffffffffff;
  40. h1 += c; c = h1 >> 44; h1 &= 0xfffffffffff;
  41. h2 += c; c = h2 >> 42; h2 &= 0x3ffffffffff;
  42. h0 += c * 5; c = h0 >> 44; h0 &= 0xfffffffffff;
  43. h1 += c;
  44. uint64_t g0 = h0 + 5; c = g0 >> 44; g0 &= 0xfffffffffff;
  45. uint64_t g1 = h1 + c; c = g1 >> 44; g1 &= 0xfffffffffff;
  46. uint64_t g2 = h2 + c - (1ull << 42);
  47. c = (g2 >> 63) - 1;
  48. g0 &= c, g1 &= c, g2 &= c;
  49. c = ~c;
  50. h0 = (h0 & c) | g0;
  51. h1 = (h1 & c) | g1;
  52. h2 = (h2 & c) | g2;
  53. uint64_t t0 = pad[0], t1 = pad[1];
  54. h0 += ((t0 ) & 0xfffffffffff) ; c = h0 >> 44; h0 &= 0xfffffffffff;
  55. h1 += ((t0 >> 44 | t1 << 20) & 0xfffffffffff) + c; c = h1 >> 44; h1 &= 0xfffffffffff;
  56. h2 += (( t1 >> 24) & 0x3ffffffffff) + c; h2 &= 0x3ffffffffff;
  57. h0 = (h0 >> 0 | h1 << 44);
  58. h1 = (h1 >> 20 | h2 << 24);
  59. r[0] = 0, r[1] = 0, r[2] = 0;
  60. h[0] = 0, h[1] = 0, h[2] = 0;
  61. pad[0] = 0, pad[1] = 0;
  62. memory::fill(buffer, sizeof(buffer));
  63. offset = 0;
  64. return uint128_t(h1) << 64 | h0;
  65. }
  66. private:
  67. auto block(bool last = false) -> void {
  68. uint64_t r0 = r[0], r1 = r[1], r2 = r[2];
  69. uint64_t h0 = h[0], h1 = h[1], h2 = h[2];
  70. uint64_t s1 = r1 * 20;
  71. uint64_t s2 = r2 * 20;
  72. uint64_t t0 = memory::readl<8>(buffer + 0);
  73. uint64_t t1 = memory::readl<8>(buffer + 8);
  74. h0 += ((t0 ) & 0xfffffffffff);
  75. h1 += ((t0 >> 44 | t1 << 20) & 0xfffffffffff);
  76. h2 += (( t1 >> 24) & 0x3ffffffffff) | (last ? 0 : 1ull << 40);
  77. uint128_t d, d0, d1, d2;
  78. d0 = (uint128_t)h0 * r0; d = (uint128_t)h1 * s2; d0 += d; d = (uint128_t)h2 * s1; d0 += d;
  79. d1 = (uint128_t)h0 * r1; d = (uint128_t)h1 * r0; d1 += d; d = (uint128_t)h2 * s2; d1 += d;
  80. d2 = (uint128_t)h0 * r2; d = (uint128_t)h1 * r1; d2 += d; d = (uint128_t)h2 * r0; d2 += d;
  81. uint64_t c = (uint64_t)(d0 >> 44); h0 = (uint64_t)d0 & 0xfffffffffff;
  82. d1 += c; c = (uint64_t)(d1 >> 44); h1 = (uint64_t)d1 & 0xfffffffffff;
  83. d2 += c; c = (uint64_t)(d2 >> 42); h2 = (uint64_t)d2 & 0x3ffffffffff;
  84. h0 += c * 5; c = h0 >> 44; h0 &= 0xfffffffffff;
  85. h1 += c;
  86. h[0] = h0, h[1] = h1, h[2] = h2;
  87. }
  88. uint64_t r[3];
  89. uint64_t h[3];
  90. uint64_t pad[2];
  91. uint8_t buffer[16];
  92. uint offset;
  93. };
  94. }