main.cu 8.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296
  1. #include <cstdint>
  2. #include <memory.h>
  3. #include <cstdio>
  4. #include <ctime>
  5. #include <thread>
  6. #include <vector>
  7. #include <mutex>
  8. #include <chrono>
  9. #include <string>
  10. #define RANDOM_MULTIPLIER 0x5DEECE66DULL
  11. #define RANDOM_ADDEND 0xBULL
  12. #define RANDOM_MASK ((1ULL << 48ULL) - 1ULL)
  13. #define CHUNK_SEED_BOTTOM_4 (CHUNK_SEED & 0xFULL)
  14. #define CHUNK_SEED_BIT_5 ((CHUNK_SEED >> 4ULL) & 1ULL)
  15. #ifndef FLOOR_LEVEL
  16. #define FLOOR_LEVEL 63LL
  17. #endif
  18. #ifndef WANTED_CACTUS_HEIGHT
  19. #define WANTED_CACTUS_HEIGHT 20LL
  20. #endif
  21. #ifndef WORK_UNIT_SIZE
  22. #define WORK_UNIT_SIZE (1ULL << 23ULL)
  23. #endif
  24. #ifndef BLOCK_SIZE
  25. #define BLOCK_SIZE 256ULL
  26. #endif
  27. #ifndef GPU_COUNT
  28. #define GPU_COUNT 1ULL
  29. #endif
  30. #ifndef OFFSET
  31. #define OFFSET 0
  32. #endif
  33. #ifndef END
  34. #define END (1ULL << 48ULL)
  35. #endif
  36. #ifndef CHUNK_SEED
  37. #define CHUNK_SEED 9567961692053ULL
  38. #endif
  39. #ifndef NEIGHBOR1
  40. #define NEIGHBOR1 856ULL
  41. #endif
  42. #ifndef NEIGHBOR2
  43. #define NEIGHBOR2 344ULL
  44. #endif
  45. #ifndef NEIGHBOR3
  46. #define NEIGHBOR3 840ULL
  47. #endif
  48. #ifndef DIAGONAL_INDEX
  49. #define DIAGONAL_INDEX 0ULL
  50. #endif
  51. #ifndef CACTUS_HEIGHT
  52. #define CACTUS_HEIGHT 12ULL
  53. #endif
  54. namespace java_random {
  55. // Random::next(bits)
  56. __device__ inline uint32_t next(uint64_t *random, int32_t bits) {
  57. *random = (*random * RANDOM_MULTIPLIER + RANDOM_ADDEND) & RANDOM_MASK;
  58. return (uint32_t) (*random >> (48ULL - bits));
  59. }
  60. __device__ inline int32_t next_int_unknown(uint64_t *seed, int16_t bound) {
  61. if ((bound & -bound) == bound) {
  62. *seed = (*seed * RANDOM_MULTIPLIER + RANDOM_ADDEND) & RANDOM_MASK;
  63. return (int32_t) ((bound * (*seed >> 17ULL)) >> 31ULL);
  64. }
  65. int32_t bits, value;
  66. do {
  67. *seed = (*seed * RANDOM_MULTIPLIER + RANDOM_ADDEND) & RANDOM_MASK;
  68. bits = *seed >> 17ULL;
  69. value = bits % bound;
  70. } while (bits - value + (bound - 1) < 0);
  71. return value;
  72. }
  73. // Random::nextInt(bound)
  74. __device__ inline uint32_t next_int(uint64_t *random) {
  75. return java_random::next(random, 31) % 3;
  76. }
  77. }
  78. __global__ __launch_bounds__(256, 2) void crack(uint64_t seed_offset, int32_t *num_seeds, uint64_t *seeds) {
  79. uint64_t originalSeed = ((blockIdx.x * blockDim.x + threadIdx.x + seed_offset) << 4ULL) | CHUNK_SEED_BOTTOM_4;
  80. uint64_t seed = originalSeed;
  81. int8_t heightMap[1024];
  82. #pragma unroll
  83. for (int i = 0; i < 1024; i++) {
  84. heightMap[i] = 0;
  85. }
  86. int32_t currentHighestPos = 0, posMap;
  87. int16_t initialPosX, initialPosY, initialPosZ, initialPos;
  88. int16_t posX, posY, posZ;
  89. int16_t i, a, j;
  90. int8_t position = -1;
  91. for (i = -90; i < 0; i += 9) {
  92. // Keep, most threads finish early this way
  93. if (heightMap[currentHighestPos] - WANTED_CACTUS_HEIGHT < i)
  94. return;
  95. initialPosX = java_random::next(&seed, 4) + 8;
  96. initialPosZ = java_random::next(&seed, 4) + 8;
  97. initialPos = initialPosX + initialPosZ * 32;
  98. if (position == -1) {
  99. if (initialPos == NEIGHBOR1) {
  100. position = 0;
  101. } else if (initialPos == NEIGHBOR2) {
  102. position = 1;
  103. } else if (initialPos == NEIGHBOR3) {
  104. position = 2;
  105. }
  106. if (position != -1) {
  107. uint64_t bit = (originalSeed >> 4ULL) & 1ULL;
  108. if (position != DIAGONAL_INDEX) {
  109. if (bit == CHUNK_SEED_BIT_5) return;
  110. } else {
  111. if (bit != CHUNK_SEED_BIT_5) return;
  112. }
  113. heightMap[initialPos] += CACTUS_HEIGHT;
  114. if (heightMap[currentHighestPos] < heightMap[initialPos]) {
  115. currentHighestPos = initialPos;
  116. }
  117. }
  118. }
  119. initialPosY = java_random::next_int_unknown(&seed, ((heightMap[initialPosX + initialPosZ * 32] + 1) + FLOOR_LEVEL) * 2);
  120. for (a = 0; a < 10; a++) {
  121. posX = initialPosX + java_random::next(&seed, 3) - java_random::next(&seed, 3);
  122. posY = initialPosY + java_random::next(&seed, 2) - java_random::next(&seed, 2);
  123. posZ = initialPosZ + java_random::next(&seed, 3) - java_random::next(&seed, 3);
  124. posMap = posX + posZ * 32;
  125. if (position == -1) {
  126. if (posMap == NEIGHBOR1) {
  127. position = 0;
  128. } else if (posMap == NEIGHBOR2) {
  129. position = 1;
  130. } else if (posMap == NEIGHBOR3) {
  131. position = 2;
  132. }
  133. if (position != -1) {
  134. uint64_t bit = (originalSeed >> 4ULL) & 1ULL;
  135. if (position != DIAGONAL_INDEX) {
  136. if (bit == CHUNK_SEED_BIT_5) return;
  137. } else {
  138. if (bit != CHUNK_SEED_BIT_5) return;
  139. }
  140. heightMap[posMap] += CACTUS_HEIGHT;
  141. if (heightMap[currentHighestPos] < heightMap[posMap]) {
  142. currentHighestPos = posMap;
  143. }
  144. }
  145. }
  146. // Keep
  147. if (posY <= heightMap[posMap] + FLOOR_LEVEL)
  148. continue;
  149. for (j = 0; j < 1 + java_random::next_int_unknown(&seed, java_random::next_int(&seed) + 1); j++) {
  150. if ((posY + j - 1) > heightMap[posMap] + FLOOR_LEVEL || posY < 0) continue;
  151. if ((posY + j) <= heightMap[(posX + 1) + posZ * 32] + FLOOR_LEVEL) continue;
  152. if ((posY + j) <= heightMap[posX + (posZ - 1) * 32] + FLOOR_LEVEL) continue;
  153. if ((posY + j) <= heightMap[(posX - 1) + posZ * 32] + FLOOR_LEVEL) continue;
  154. if ((posY + j) <= heightMap[posX + (posZ + 1) * 32] + FLOOR_LEVEL) continue;
  155. heightMap[posMap]++;
  156. if (heightMap[currentHighestPos] < heightMap[posMap]) {
  157. currentHighestPos = posMap;
  158. }
  159. }
  160. }
  161. if (heightMap[currentHighestPos] >= WANTED_CACTUS_HEIGHT) {
  162. uint64_t addend = 0;
  163. if (position == 0)
  164. addend = NEIGHBOR1;
  165. if (position == 1)
  166. addend = NEIGHBOR2;
  167. if (position == 2)
  168. addend = NEIGHBOR3;
  169. seeds[atomicAdd(num_seeds, 1)] = (addend << 48ULL) | originalSeed;
  170. return;
  171. }
  172. }
  173. }
  174. struct GPU_Node {
  175. int* num_seeds;
  176. uint64_t* seeds;
  177. };
  178. void setup_gpu_node(GPU_Node* node, int32_t gpu) {
  179. cudaSetDevice(gpu);
  180. cudaMallocManaged(&node->num_seeds, sizeof(*node->num_seeds));
  181. cudaMallocManaged(&node->seeds, 1ULL << 10ULL); // approx 1kb
  182. }
  183. GPU_Node nodes[GPU_COUNT];
  184. uint64_t offset = OFFSET;
  185. uint64_t count = 0;
  186. std::mutex info_lock;
  187. void gpu_manager(int32_t gpu_index) {
  188. std::string fileName = "kaktoos_seeds" + std::to_string(gpu_index) + ".txt";
  189. FILE *out_file = fopen(fileName.c_str(), "a");
  190. cudaSetDevice(gpu_index);
  191. while (offset < END) {
  192. *nodes[gpu_index].num_seeds = 0;
  193. crack<<<WORK_UNIT_SIZE / BLOCK_SIZE, BLOCK_SIZE, 0>>> (offset, nodes[gpu_index].num_seeds, nodes[gpu_index].seeds);
  194. info_lock.lock();
  195. offset += WORK_UNIT_SIZE;
  196. info_lock.unlock();
  197. cudaDeviceSynchronize();
  198. for (int32_t i = 0, e = *nodes[gpu_index].num_seeds; i < e; i++) {
  199. fprintf(out_file, "%lld\n", (long long int)nodes[gpu_index].seeds[i]);
  200. printf("Found seed: %lld\n", (long long int)nodes[gpu_index].seeds[i]);
  201. }
  202. fflush(out_file);
  203. info_lock.lock();
  204. count += *nodes[gpu_index].num_seeds;
  205. info_lock.unlock();
  206. }
  207. fclose(out_file);
  208. }
  209. int main() {
  210. printf("Searching %ld total seeds...\n", END - OFFSET);
  211. std::thread threads[GPU_COUNT];
  212. time_t startTime = time(nullptr), currentTime;
  213. for(int32_t i = 0; i < GPU_COUNT; i++) {
  214. setup_gpu_node(&nodes[i], i);
  215. threads[i] = std::thread(gpu_manager, i);
  216. }
  217. using namespace std::chrono_literals;
  218. while (offset < END) {
  219. time(&currentTime);
  220. int timeElapsed = (int)(currentTime - startTime);
  221. double speed = (double)(offset - OFFSET) / (double)timeElapsed / 1000000.0;
  222. printf("Searched %lld seeds, offset: %lld found %lld matches. Time elapsed: %ds. Speed: %.2fm seeds/s. %f%%\n",
  223. (long long int)(offset - OFFSET),
  224. (long long int)offset,
  225. (long long int)count,
  226. timeElapsed,
  227. speed,
  228. (double)(offset - OFFSET) / (END - OFFSET) * 100);
  229. std::this_thread::sleep_for(0.5s);
  230. }
  231. for (auto &thread : threads) {
  232. thread.join();
  233. }
  234. printf("Done!\n");
  235. printf("But, verily, it be the nature of dreams to end.\n");
  236. }