main.cu 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362
  1. #include <cstdint>
  2. #include <memory.h>
  3. #include <cstdio>
  4. #include <ctime>
  5. #include <thread>
  6. #include <vector>
  7. #include <mutex>
  8. #include <chrono>
  9. #include <string>
  10. #define RANDOM_MULTIPLIER 0x5DEECE66DULL
  11. #define RANDOM_ADDEND 0xBULL
  12. #define RANDOM_MASK ((1ULL << 48ULL) - 1ULL)
  13. #define CHUNK_SEED_BOTTOM_4 (CHUNK_SEED & 0xFULL)
  14. #define CHUNK_SEED_BIT_5 ((CHUNK_SEED >> 4ULL) & 1ULL)
  15. #ifndef FLOOR_LEVEL
  16. #define FLOOR_LEVEL 63LL
  17. #endif
  18. #ifndef WANTED_CACTUS_HEIGHT
  19. #define WANTED_CACTUS_HEIGHT 18LL
  20. #endif
  21. #ifndef WORK_UNIT_SIZE
  22. #define WORK_UNIT_SIZE (1ULL << 23ULL)
  23. #endif
  24. #ifndef BLOCK_SIZE
  25. #define BLOCK_SIZE 256ULL
  26. #endif
  27. #ifndef GPU_COUNT
  28. #define GPU_COUNT 1ULL
  29. #endif
  30. #ifndef OFFSET
  31. #define OFFSET 0
  32. #endif
  33. #ifndef END
  34. #define END (1ULL << 44ULL)
  35. #endif
  36. #ifndef CHUNK_SEED
  37. #define CHUNK_SEED 9567961692053ULL
  38. #endif
  39. #ifndef NEIGHBOR1
  40. #define NEIGHBOR1 856ULL
  41. #endif
  42. #ifndef NEIGHBOR2
  43. #define NEIGHBOR2 344ULL
  44. #endif
  45. #ifndef NEIGHBOR3
  46. #define NEIGHBOR3 840ULL
  47. #endif
  48. #ifndef DIAGONAL_INDEX
  49. #define DIAGONAL_INDEX 0ULL
  50. #endif
  51. #ifndef CACTUS_HEIGHT
  52. #define CACTUS_HEIGHT 12ULL
  53. #endif
  54. #ifdef DEBUG
  55. // Magic values, don't touch
  56. __device__ int64_t DEBUG_ARR[]{
  57. 0, 18, 90, 20, 9, 308, -1, 79, 16, 78, 11, 368, -1, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 24, 76,
  58. 10, 344, 1, 0, 12, 344, 12, 3, 12, 0, 0, 0, 0, 13, 13, 13, 13, 0, 0, 0, 0, 14, 14, 14, 14, 0, 0, 0, 0, 15, 15,
  59. 15, 17, 80, 11, 369, 0, 1, 0, 0, 0, 0, 0, 16, 76, 3, 112, 0, 1, 0, 0, 0, 0, 0, 21, 78, 11, 373, 0, 2, 0, 0, 0,
  60. 0, 0, 0, 0, 0, 0, 0, 20, 82, 10, 340, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 24, 79, 10, 344, 15, 3, 15, 0, 0, 0,
  61. 0, 16, 16, 16, 16, 0, 0, 0, 0, 17, 17, 17, 17, 0, 0, 0, 0, 18, 18, 18, 17, 80, 6, 209, 0, 1, 0, 0, 0, 0, 0, 22,
  62. 81, 10, 342, 0, 1, 0, 0, 0, 0, 0, 22, 77, 8, 278, 0, 1, 0, 0, 0, 0, 0, 18, 344, 96827469838241317
  63. };
  64. #ifdef EXTRACT
  65. #define ASSERT(k, val) printf("%d %lld\n", k++, (long long)val)
  66. #else
  67. #define ASSERT(k, val) if (DEBUG_ARR[k++] != val) printf("Error at %d, expected %lld, got %lld\n", __LINE__, (long long)DEBUG_ARR[k - 1], (long long)val)
  68. #endif
  69. #else
  70. #define ASSERT(k, val)
  71. #endif
  72. /*inline __device__ int8_t extract(int8_t heightMap[], int32_t id) {
  73. return (*((int16_t*)(heightMap + ((id * 6U) >> 3U))) >> ((id * 6U) & 0b111U)) & 0b111111U;
  74. }
  75. inline __device__ void increase(int8_t heightMap[], int32_t id, int8_t val) {
  76. *((int16_t*)(heightMap + ((id * 6) >> 3U))) += val << ((id * 6) & 0b111U);
  77. }*/
  78. namespace java_random {
  79. // Random::next(bits)
  80. __device__ inline uint32_t next(uint64_t *random, int32_t bits) {
  81. *random = (*random * RANDOM_MULTIPLIER + RANDOM_ADDEND) & RANDOM_MASK;
  82. return (uint32_t) (*random >> (48ULL - bits));
  83. }
  84. __device__ inline int32_t next_int_unknown(uint64_t *seed, int16_t bound) {
  85. if ((bound & -bound) == bound) {
  86. *seed = (*seed * RANDOM_MULTIPLIER + RANDOM_ADDEND) & RANDOM_MASK;
  87. return (int32_t) ((bound * (*seed >> 17ULL)) >> 31ULL);
  88. }
  89. int32_t bits, value;
  90. do {
  91. *seed = (*seed * RANDOM_MULTIPLIER + RANDOM_ADDEND) & RANDOM_MASK;
  92. bits = *seed >> 17ULL;
  93. value = bits % bound;
  94. } while (bits - value + (bound - 1) < 0);
  95. return value;
  96. }
  97. // Random::nextInt(bound)
  98. __device__ inline uint32_t next_int(uint64_t *random) {
  99. return java_random::next(random, 31) % 3;
  100. }
  101. }
  102. __global__ __launch_bounds__(BLOCK_SIZE, 2) void crack(uint64_t seed_offset, int32_t *num_seeds, uint64_t *seeds) {
  103. #ifdef DEBUG
  104. int32_t debug_index = 0;
  105. uint64_t originalSeed = 77849775653ULL;
  106. #else
  107. uint64_t originalSeed = ((blockIdx.x * blockDim.x + threadIdx.x + seed_offset) << 4ULL) | CHUNK_SEED_BOTTOM_4;
  108. #endif
  109. uint64_t seed = originalSeed;
  110. int8_t heightMap[1024];
  111. #pragma unroll
  112. for (int i = 0; i < 1024; i++) {
  113. heightMap[i] = 0;
  114. }
  115. int32_t currentHighestPos = 0, posMap;
  116. int16_t initialPosX, initialPosY, initialPosZ, initialPos;
  117. int16_t posX, posY, posZ;
  118. int8_t position = -1;
  119. for (int32_t i = 0; i < 10; i++) {
  120. ASSERT(debug_index, i);
  121. ASSERT(debug_index, WANTED_CACTUS_HEIGHT - heightMap[currentHighestPos]);
  122. ASSERT(debug_index, 9 * (10 - i));
  123. if (WANTED_CACTUS_HEIGHT - heightMap[currentHighestPos] > 9 * (10 - i))
  124. return;
  125. initialPosX = java_random::next(&seed, 4) + 8;
  126. initialPosZ = java_random::next(&seed, 4) + 8;
  127. initialPos = initialPosX + initialPosZ * 32;
  128. ASSERT(debug_index, initialPosX);
  129. ASSERT(debug_index, initialPosZ);
  130. ASSERT(debug_index, initialPos);
  131. if (position == -1) {
  132. if (initialPos == NEIGHBOR1) {
  133. position = 0;
  134. } else if (initialPos == NEIGHBOR2) {
  135. position = 1;
  136. } else if (initialPos == NEIGHBOR3) {
  137. position = 2;
  138. }
  139. ASSERT(debug_index, position);
  140. if (position != -1) {
  141. uint64_t bit = (originalSeed >> 4ULL) & 1ULL;
  142. ASSERT(debug_index, bit);
  143. if (position != DIAGONAL_INDEX) {
  144. if (bit == CHUNK_SEED_BIT_5) return;
  145. } else {
  146. if (bit != CHUNK_SEED_BIT_5) return;
  147. }
  148. heightMap[initialPos] += CACTUS_HEIGHT;
  149. ASSERT(debug_index, heightMap[initialPos]);
  150. if (heightMap[currentHighestPos] < heightMap[initialPos]) {
  151. currentHighestPos = initialPos;
  152. ASSERT(debug_index, currentHighestPos);
  153. }
  154. }
  155. }
  156. initialPosY = java_random::next_int_unknown(&seed, (heightMap[initialPos] + FLOOR_LEVEL + 1) * 2);
  157. ASSERT(debug_index, initialPosY);
  158. for (int32_t a = 0; a < 10; a++) {
  159. posX = initialPosX + java_random::next(&seed, 3) - java_random::next(&seed, 3);
  160. posY = initialPosY + java_random::next(&seed, 2) - java_random::next(&seed, 2);
  161. posZ = initialPosZ + java_random::next(&seed, 3) - java_random::next(&seed, 3);
  162. posMap = posX + posZ * 32;
  163. ASSERT(debug_index, posX);
  164. ASSERT(debug_index, posY);
  165. ASSERT(debug_index, posZ);
  166. ASSERT(debug_index, posMap);
  167. if (position == -1) {
  168. if (posMap == NEIGHBOR1) {
  169. position = 0;
  170. } else if (posMap == NEIGHBOR2) {
  171. position = 1;
  172. } else if (posMap == NEIGHBOR3) {
  173. position = 2;
  174. }
  175. ASSERT(debug_index, position);
  176. if (position != -1) {
  177. uint64_t bit = (originalSeed >> 4ULL) & 1ULL;
  178. ASSERT(debug_index, bit);
  179. if (position != DIAGONAL_INDEX) {
  180. if (bit == CHUNK_SEED_BIT_5) return;
  181. } else {
  182. if (bit != CHUNK_SEED_BIT_5) return;
  183. }
  184. heightMap[posMap] += CACTUS_HEIGHT;
  185. ASSERT(debug_index, heightMap[posMap]);
  186. if (heightMap[currentHighestPos] < heightMap[posMap]) {
  187. currentHighestPos = posMap;
  188. ASSERT(debug_index, currentHighestPos);
  189. }
  190. }
  191. }
  192. ASSERT(debug_index, heightMap[posMap]);
  193. if (posY <= heightMap[posMap] + FLOOR_LEVEL)
  194. continue;
  195. int32_t offset = 1 + java_random::next_int_unknown(&seed, java_random::next_int(&seed) + 1);
  196. ASSERT(debug_index, offset);
  197. for (int32_t j = 0; j < offset; j++) {
  198. ASSERT(debug_index, heightMap[posMap]);
  199. ASSERT(debug_index, heightMap[(posX + 1) + posZ * 32]);
  200. ASSERT(debug_index, heightMap[(posX - 1) + posZ * 32]);
  201. ASSERT(debug_index, heightMap[posX + (posZ + 1) * 32]);
  202. ASSERT(debug_index, heightMap[posX + (posZ - 1) * 32]);
  203. if ((posY + j - 1) > heightMap[posMap] + FLOOR_LEVEL || posY < 0) continue;
  204. if ((posY + j) <= heightMap[(posX + 1) + posZ * 32] + FLOOR_LEVEL) continue;
  205. if ((posY + j) <= heightMap[(posX - 1) + posZ * 32] + FLOOR_LEVEL) continue;
  206. if ((posY + j) <= heightMap[posX + (posZ + 1) * 32] + FLOOR_LEVEL) continue;
  207. if ((posY + j) <= heightMap[posX + (posZ - 1) * 32] + FLOOR_LEVEL) continue;
  208. heightMap[posMap]++;
  209. ASSERT(debug_index, heightMap[posMap]);
  210. ASSERT(debug_index, heightMap[currentHighestPos]);
  211. ASSERT(debug_index, heightMap[posMap]);
  212. if (heightMap[currentHighestPos] < heightMap[posMap]) {
  213. currentHighestPos = posMap;
  214. ASSERT(debug_index, currentHighestPos);
  215. }
  216. }
  217. }
  218. ASSERT(debug_index, heightMap[currentHighestPos]);
  219. if (heightMap[currentHighestPos] >= WANTED_CACTUS_HEIGHT) {
  220. uint64_t neighbor = 0;
  221. if (position == 0)
  222. neighbor = NEIGHBOR1;
  223. if (position == 1)
  224. neighbor = NEIGHBOR2;
  225. if (position == 2)
  226. neighbor = NEIGHBOR3;
  227. ASSERT(debug_index, neighbor);
  228. seeds[atomicAdd(num_seeds, 1)] = (neighbor << 48ULL) | originalSeed;
  229. ASSERT(debug_index, ((neighbor << 48ULL) | originalSeed));
  230. return;
  231. }
  232. }
  233. }
  234. struct GPU_Node {
  235. int *num_seeds;
  236. uint64_t *seeds;
  237. };
  238. void setup_gpu_node(GPU_Node *node, int32_t gpu) {
  239. cudaSetDevice(gpu);
  240. cudaMallocManaged(&node->num_seeds, sizeof(*node->num_seeds));
  241. cudaMallocManaged(&node->seeds, 1ULL << 10ULL); // approx 1kb
  242. }
  243. GPU_Node nodes[GPU_COUNT];
  244. uint64_t offset = OFFSET;
  245. uint64_t count = 0;
  246. std::mutex info_lock;
  247. void gpu_manager(int32_t gpu_index) {
  248. std::string fileName = "kaktoos_seeds" + std::to_string(gpu_index) + ".txt";
  249. FILE *out_file = fopen(fileName.c_str(), "w");
  250. cudaSetDevice(gpu_index);
  251. while (offset < END) {
  252. *nodes[gpu_index].num_seeds = 0;
  253. crack<<<WORK_UNIT_SIZE / BLOCK_SIZE, BLOCK_SIZE, 0>>>(offset, nodes[gpu_index].num_seeds,
  254. nodes[gpu_index].seeds);
  255. info_lock.lock();
  256. offset += WORK_UNIT_SIZE;
  257. info_lock.unlock();
  258. cudaDeviceSynchronize();
  259. for (int32_t i = 0, e = *nodes[gpu_index].num_seeds; i < e; i++) {
  260. #ifndef DEBUG
  261. fprintf(out_file, "%llu %llu\n", nodes[gpu_index].seeds[i] & RANDOM_MASK, (unsigned long long)nodes[gpu_index].seeds[i] >> 48ULL);
  262. printf("Found seed: %lld\n", (long long int)nodes[gpu_index].seeds[i]);
  263. #endif
  264. }
  265. fflush(out_file);
  266. info_lock.lock();
  267. count += *nodes[gpu_index].num_seeds;
  268. info_lock.unlock();
  269. }
  270. fclose(out_file);
  271. }
  272. int main() {
  273. printf("Searching %ld total seeds...\n", END - OFFSET);
  274. std::thread threads[GPU_COUNT];
  275. time_t startTime = time(nullptr), currentTime;
  276. for (int32_t i = 0; i < GPU_COUNT; i++) {
  277. setup_gpu_node(&nodes[i], i);
  278. threads[i] = std::thread(gpu_manager, i);
  279. }
  280. using namespace std::chrono_literals;
  281. while (offset < END) {
  282. time(&currentTime);
  283. int timeElapsed = (int) (currentTime - startTime);
  284. double speed = (double) (offset - OFFSET) / (double) timeElapsed / 1000000.0;
  285. printf("Searched %lld seeds, offset: %lld found %lld matches. Time elapsed: %ds. Speed: %.2fm seeds/s. %f%%\n",
  286. (long long int) (offset - OFFSET),
  287. (long long int) offset,
  288. (long long int) count,
  289. timeElapsed,
  290. speed,
  291. (double) (offset - OFFSET) / (END - OFFSET) * 100);
  292. std::this_thread::sleep_for(0.5s);
  293. }
  294. for (auto &thread : threads) {
  295. thread.join();
  296. }
  297. printf("Done!\n");
  298. printf("But, verily, it be the nature of dreams to end.\n");
  299. }