sand.cu 40 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942
  1. ///nvcc -o fil main.cu -O3 -m=64 -arch=compute_61 -code=sm_61 -Xptxas -allow-expensive-optimizations=true -Xptxas -v
  2. #include <iostream>
  3. #include <chrono>
  4. #include <fstream>
  5. #include <algorithm>
  6. #include <inttypes.h>
  7. #include <bitset>
  8. #include <iostream>
  9. #include <vector>
  10. #include <map>
  11. #include <iomanip>
  12. #include <fstream>
  13. #include <chrono>
  14. #include <mutex>
  15. #include <time.h>
  16. #include "lcg.h"
  17. #ifdef BOINC
  18. #include "boinc_api.h"
  19. #if defined _WIN32 || defined _WIN64
  20. #include "boinc_win.h"
  21. #endif
  22. #endif
  23. uint64_t millis() {return (std::chrono::duration_cast< std::chrono::milliseconds >(std::chrono::system_clock::now().time_since_epoch())).count();}
  24. #define GPU_ASSERT(code) gpuAssert((code), __FILE__, __LINE__)
  25. inline void gpuAssert(cudaError_t code, const char *file, int line) {
  26. if (code != cudaSuccess) {
  27. fprintf(stderr, "GPUassert: %s (code %d) %s %d\n", cudaGetErrorString(code), code, file, line);
  28. exit(code);
  29. }
  30. }
  31. // ===== LCG IMPLEMENTATION ===== //
  32. namespace java_lcg { //region Java LCG
  33. #define Random uint64_t
  34. #define RANDOM_MULTIPLIER 0x5DEECE66DULL
  35. #define RANDOM_ADDEND 0xBULL
  36. #define RANDOM_MASK ((1ULL << 48u) - 1)
  37. #define get_random(seed) ((Random)((seed ^ RANDOM_MULTIPLIER) & RANDOM_MASK))
  38. __host__ __device__ __forceinline__ static int32_t random_next(Random *random, int bits) {
  39. *random = (*random * RANDOM_MULTIPLIER + RANDOM_ADDEND) & RANDOM_MASK;
  40. return (int32_t) (*random >> (48u - bits));
  41. }
  42. __device__ __forceinline__ static int32_t random_next_int(Random *random, const uint16_t bound) {
  43. int32_t r = random_next(random, 31);
  44. const uint16_t m = bound - 1u;
  45. if ((bound & m) == 0) {
  46. r = (int32_t) ((bound * (uint64_t) r) >> 31u);
  47. } else {
  48. for (int32_t u = r;
  49. u - (r = u % bound) + m < 0;
  50. u = random_next(random, 31));
  51. }
  52. return r;
  53. }
  54. __device__ __host__ __forceinline__ static int32_t random_next_int_nonpow(Random *random, const uint16_t bound) {
  55. int32_t r = random_next(random, 31);
  56. const uint16_t m = bound - 1u;
  57. for (int32_t u = r;
  58. u - (r = u % bound) + m < 0;
  59. u = random_next(random, 31));
  60. return r;
  61. }
  62. __host__ __device__ __forceinline__ static double next_double(Random *random) {
  63. return (double) ((((uint64_t) ((uint32_t) random_next(random, 26)) << 27u)) + random_next(random, 27)) / (double)(1ULL << 53);
  64. }
  65. __host__ __device__ __forceinline__ static uint64_t random_next_long (Random *random) {
  66. return (((uint64_t)random_next(random, 32)) << 32u) + (int32_t)random_next(random, 32);
  67. }
  68. __host__ __device__ __forceinline__ static void advance2(Random *random) {
  69. *random = (*random * 0xBB20B4600A69LLU + 0x40942DE6BALLU) & RANDOM_MASK;
  70. }
  71. __host__ __device__ __forceinline__ static void advance3759(Random *random) {
  72. *random = (*random * 0x6FE85C031F25LLU + 0x8F50ECFF899LLU) & RANDOM_MASK;
  73. }
  74. }
  75. using namespace java_lcg;
  76. namespace device_intrinsics { //region DEVICE INTRINSICS
  77. #define DEVICE_STATIC_INTRINSIC_QUALIFIERS static __device__ __forceinline__
  78. #if (defined(_MSC_VER) && defined(_WIN64)) || defined(__LP64__)
  79. #define PXL_GLOBAL_PTR "l"
  80. #else
  81. #define PXL_GLOBAL_PTR "r"
  82. #endif
  83. DEVICE_STATIC_INTRINSIC_QUALIFIERS void __prefetch_local_l1(const void* const ptr)
  84. {
  85. asm("prefetch.local.L1 [%0];" : : PXL_GLOBAL_PTR(ptr));
  86. }
  87. DEVICE_STATIC_INTRINSIC_QUALIFIERS void __prefetch_global_uniform(const void* const ptr)
  88. {
  89. asm("prefetchu.L1 [%0];" : : PXL_GLOBAL_PTR(ptr));
  90. }
  91. DEVICE_STATIC_INTRINSIC_QUALIFIERS void __prefetch_local_l2(const void* const ptr)
  92. {
  93. asm("prefetch.local.L2 [%0];" : : PXL_GLOBAL_PTR(ptr));
  94. }
  95. #if __CUDA__ < 10
  96. #define __ldg(ptr) (*(ptr))
  97. #endif
  98. }
  99. using namespace device_intrinsics;
  100. #define BLOCK_SIZE (128)
  101. //#define BLOCK_SIZE (128)
  102. #define WORK_SIZE_BITS 16
  103. #define SEEDS_PER_CALL ((1ULL << (WORK_SIZE_BITS)) * (BLOCK_SIZE))
  104. //#define SEEDS_PER_CALL 8000000
  105. //Specifying where the (1 = dirt/grass, 0 = sand) is
  106. // This will match the seed 76261196830436 (not pack.png ofc)
  107. // Double match: 76261206560653 (almost 100% confirmed, sans very last bit of sand in first match)
  108. // Triple match: 76273693341674 (100% match)
  109. #define CHUNK_X 6
  110. #define CHUNK_Z -1
  111. #define INNER_X_START 4
  112. #define INNER_Z_START 0
  113. #define INNER_X_END 13
  114. #define INNER_Z_END 2
  115. __constant__ uint8_t DIRT_HEIGHT_2D[INNER_Z_END - INNER_Z_START + 1][INNER_X_END - INNER_X_START + 1] = {{1,15,15,15,1,15,0,15,15,15},
  116. {15,1,15,15,15,1,15,1,15,15},
  117. {15,15,1,1,15,15,1,1,1,0}};
  118. __constant__ double LocalNoise2D[INNER_Z_END - INNER_Z_START + 1][INNER_X_END - INNER_X_START + 1];
  119. #define EARLY_RETURN (INNER_Z_END * 16 + INNER_X_END)
  120. #define CHUNK_X_2 6
  121. #define CHUNK_Z_2 -2
  122. #define INNER_X_START_2 0
  123. #define INNER_Z_START_2 6
  124. #define INNER_X_END_2 9
  125. #define INNER_Z_END_2 15
  126. __constant__ uint8_t DIRT_HEIGHT_2D_2[INNER_Z_END_2 - INNER_Z_START_2 + 1][INNER_X_END_2 - INNER_X_START_2 + 1] = {{0,15,15,15,15,15,15,15,15,15},
  127. {15,0,0,15,15,15,15,15,15,15},
  128. {0,15,15,0,15,15,15,15,15,15},
  129. {15,1,15,15,0,15,15,15,15,15},
  130. {15,15,0,15,15,0,15,15,15,15},
  131. {15,15,15,0,15,0,15,15,15,15},
  132. {15,15,15,15,0,15,0,15,15,15},
  133. {0,15,15,15,15,0,0,15,15,15},
  134. {0,0,15,15,15,15,0,0,0,15},
  135. {15,15,0,0,15,15,15,0,15,0}};
  136. __constant__ double LocalNoise2D_2[INNER_Z_END_2 - INNER_Z_START_2 + 1][INNER_X_END_2 - INNER_X_START_2 + 1];
  137. #define CHUNK_X_3 5
  138. #define CHUNK_Z_3 -1
  139. #define INNER_X_START_3 4
  140. #define INNER_Z_START_3 0
  141. #define INNER_X_END_3 15
  142. #define INNER_Z_END_3 10
  143. __constant__ uint8_t DIRT_HEIGHT_2D_3[INNER_Z_END_3 - INNER_Z_START_3 + 1][INNER_X_END_3 - INNER_X_START_3 + 1] = {{1,1,15,15,15,15,15,15,15,15,0,15},
  144. {15,15,15,15,15,15,15,15,15,15,0,15},
  145. {15,15,15,15,15,15,15,15,15,15,15,0},
  146. {15,15,15,0,15,15,15,15,15,15,15,0},
  147. {15,15,15,1,15,15,15,15,15,15,15,15},
  148. {15,15,15,0,15,15,15,15,15,15,15,0},
  149. {15,15,15,15,15,15,15,15,15,15,15,15},
  150. {15,15,0,15,15,15,15,15,15,15,15,15},
  151. {15,15,1,15,15,15,15,15,15,15,15,15},
  152. {15,15,15,1,15,15,15,15,15,15,15,15},
  153. {15,15,15,0,15,15,15,15,15,15,15,15}};
  154. __constant__ double LocalNoise2D_3[INNER_Z_END_3 - INNER_Z_START_3 + 1][INNER_X_END_3 - INNER_X_START_3 + 1];
  155. /*
  156. //Old test: matches 104703450999364
  157. #define CHUNK_X 2
  158. #define CHUNK_Z 11
  159. #define INNER_X_START 2
  160. #define INNER_Z_START 0
  161. #define INNER_X_END 11
  162. #define INNER_Z_END 0
  163. __constant__ uint8_t DIRT_HEIGHT_2D[INNER_Z_END - INNER_Z_START + 1][INNER_X_END - INNER_X_START + 1] = {{0,15,0,1,0,15,15,15,15,1}};
  164. __constant__ double LocalNoise2D[INNER_Z_END - INNER_Z_START + 1][INNER_X_END - INNER_X_START + 1];
  165. */
  166. //The generation of the simplex layers and noise
  167. namespace noise { //region Simplex layer gen
  168. /* End of constant for simplex noise*/
  169. struct Octave {
  170. double xo;
  171. double yo;
  172. double zo;
  173. uint8_t permutations[256];
  174. };
  175. __shared__ uint8_t permutations[256][BLOCK_SIZE];
  176. #define getValue(array, index) array[index][threadIdx.x]
  177. #define setValue(array, index, value) array[index][threadIdx.x] = value
  178. __device__ static inline void setupNoise(const uint8_t nbOctaves, Random *random, Octave resultArray[]) {
  179. for (int j = 0; j < nbOctaves; ++j) {
  180. __prefetch_local_l2(&resultArray[j]);
  181. resultArray[j].xo = next_double(random) * 256.0;
  182. resultArray[j].yo = next_double(random) * 256.0;
  183. resultArray[j].zo = next_double(random) * 256.0;
  184. #pragma unroll
  185. for(int w = 0; w<256; w++) {
  186. setValue(permutations, w, w);
  187. }
  188. for(int index = 0; index<256; index++) {
  189. uint32_t randomIndex = random_next_int(random, 256ull - index) + index;
  190. //if (randomIndex != index) {
  191. // swap
  192. uint8_t v1 = getValue(permutations,index);
  193. //uint8_t v2 = getValue(permutations,randomIndex);
  194. setValue(permutations,index, getValue(permutations,randomIndex));
  195. setValue(permutations, randomIndex, v1);
  196. //}
  197. }
  198. #pragma unroll
  199. for(int c = 0; c<256;c++) {
  200. __prefetch_local_l1(&(resultArray[j].permutations[c+1]));
  201. resultArray[j].permutations[c] = getValue(permutations,c);
  202. }
  203. //resultArray[j].xo = xo;
  204. //resultArray[j].yo = yo;
  205. //resultArray[j].zo = zo;
  206. }
  207. }
  208. __device__ static inline void SkipNoiseGen(const uint8_t nbOctaves, Random* random) {
  209. for (int j = 0; j < nbOctaves; ++j) {
  210. lcg::advance<2*3>(*random);
  211. for(int index = 0; index<256; index++) {
  212. random_next_int(random, 256ull - index);
  213. }
  214. }
  215. }
  216. __device__ static inline double lerp(double x, double a, double b) {
  217. return a + x * (b - a);
  218. }
  219. __device__ static inline double grad(uint8_t hash, double x, double y, double z) {
  220. switch (hash & 0xFu) {
  221. case 0x0:
  222. return x + y;
  223. case 0x1:
  224. return -x + y;
  225. case 0x2:
  226. return x - y;
  227. case 0x3:
  228. return -x - y;
  229. case 0x4:
  230. return x + z;
  231. case 0x5:
  232. return -x + z;
  233. case 0x6:
  234. return x - z;
  235. case 0x7:
  236. return -x - z;
  237. case 0x8:
  238. return y + z;
  239. case 0x9:
  240. return -y + z;
  241. case 0xA:
  242. return y - z;
  243. case 0xB:
  244. return -y - z;
  245. case 0xC:
  246. return y + x;
  247. case 0xD:
  248. return -y + z;
  249. case 0xE:
  250. return y - x;
  251. case 0xF:
  252. return -y - z;
  253. default:
  254. return 0; // never happens
  255. }
  256. }
  257. __device__ static inline void generateNormalPermutations(double *buffer, double x, double y, double z, int sizeX, int sizeY, int sizeZ, double noiseFactorX, double noiseFactorY, double noiseFactorZ, double octaveSize, Random* random) {
  258. double xo = lcg::next_double(*random) * 256.0;
  259. double yo = lcg::next_double(*random) * 256.0;
  260. double zo = lcg::next_double(*random) * 256.0;
  261. //Setup the permutation fresh xD
  262. #pragma unroll
  263. for(int w = 0; w<256; w++) {
  264. setValue(permutations, w, w);
  265. }
  266. for(int index = 0; index<256; index++) {
  267. uint32_t randomIndex = lcg::dynamic_next_int(*random, 256ull - index) + index;
  268. //if (randomIndex != index) {
  269. // swap
  270. uint8_t v1 = getValue(permutations,index);
  271. uint8_t v2 = getValue(permutations,randomIndex);
  272. setValue(permutations,index, v2);
  273. setValue(permutations, randomIndex, v1);
  274. //}
  275. }
  276. double octaveWidth = 1.0 / octaveSize;
  277. int32_t i2 = -1;
  278. double x1 = 0.0;
  279. double x2 = 0.0;
  280. double xx1 = 0.0;
  281. double xx2 = 0.0;
  282. double t;
  283. double w;
  284. int columnIndex = 0;
  285. for (int X = 0; X < sizeX; X++) {
  286. double xCoord = (x + (double) X) * noiseFactorX + xo;
  287. auto clampedXcoord = (int32_t) xCoord;
  288. if (xCoord < (double) clampedXcoord) {
  289. clampedXcoord--;
  290. }
  291. auto xBottoms = (uint8_t) ((uint32_t) clampedXcoord & 0xffu);
  292. xCoord -= clampedXcoord;
  293. t = xCoord * 6 - 15;
  294. w = (xCoord * t + 10);
  295. double fadeX = xCoord * xCoord * xCoord * w;
  296. for (int Z = 0; Z < sizeZ; Z++) {
  297. double zCoord = zo;
  298. auto clampedZCoord = (int32_t) zCoord;
  299. if (zCoord < (double) clampedZCoord) {
  300. clampedZCoord--;
  301. }
  302. auto zBottoms = (uint8_t) ((uint32_t) clampedZCoord & 0xffu);
  303. zCoord -= clampedZCoord;
  304. t = zCoord * 6 - 15;
  305. w = (zCoord * t + 10);
  306. double fadeZ = zCoord * zCoord * zCoord * w;
  307. for (int Y = 0; Y < sizeY; Y++) {
  308. double yCoords = (y + (double) Y) * noiseFactorY + yo;
  309. auto clampedYCoords = (int32_t) yCoords;
  310. if (yCoords < (double) clampedYCoords) {
  311. clampedYCoords--;
  312. }
  313. auto yBottoms = (uint8_t) ((uint32_t) clampedYCoords & 0xffu);
  314. yCoords -= clampedYCoords;
  315. t = yCoords * 6 - 15;
  316. w = yCoords * t + 10;
  317. double fadeY = yCoords * yCoords * yCoords * w;
  318. // ZCoord
  319. if (Y == 0 || yBottoms != i2) { // this is wrong on so many levels, same ybottoms doesnt mean x and z were the same...
  320. i2 = yBottoms;
  321. uint16_t k2 = getValue(permutations,(uint8_t)((uint16_t)(getValue(permutations,(uint8_t)(xBottoms& 0xffu)) + yBottoms)& 0xffu)) + zBottoms;
  322. uint16_t l2 = getValue(permutations,(uint8_t)((uint16_t)(getValue(permutations,(uint8_t)(xBottoms& 0xffu)) + yBottoms + 1u )& 0xffu)) + zBottoms;
  323. uint16_t k3 = getValue(permutations,(uint8_t)((uint16_t)(getValue(permutations,(uint8_t)((xBottoms + 1u)& 0xffu)) + yBottoms )& 0xffu)) + zBottoms;
  324. uint16_t l3 = getValue(permutations,(uint8_t)((uint16_t)(getValue(permutations,(uint8_t)((xBottoms + 1u)& 0xffu)) + yBottoms + 1u) & 0xffu)) + zBottoms;
  325. x1 = lerp(fadeX, grad(getValue(permutations,(uint8_t)(k2& 0xffu)), xCoord, yCoords, zCoord), grad(getValue(permutations,(uint8_t)(k3& 0xffu)), xCoord - 1.0, yCoords, zCoord));
  326. x2 = lerp(fadeX, grad(getValue(permutations,(uint8_t)(l2& 0xffu)), xCoord, yCoords - 1.0, zCoord), grad(getValue(permutations,(uint8_t)(l3& 0xffu)), xCoord - 1.0, yCoords - 1.0, zCoord));
  327. xx1 = lerp(fadeX, grad(getValue(permutations,(uint8_t)((k2+1u)& 0xffu)), xCoord, yCoords, zCoord - 1.0), grad(getValue(permutations,(uint8_t)((k3+1u)& 0xffu)), xCoord - 1.0, yCoords, zCoord - 1.0));
  328. xx2 = lerp(fadeX, grad(getValue(permutations,(uint8_t)((l2+1u)& 0xffu)), xCoord, yCoords - 1.0, zCoord - 1.0), grad(getValue(permutations,(uint8_t)((l3+1u)& 0xffu)), xCoord - 1.0, yCoords - 1.0, zCoord - 1.0));
  329. }
  330. if (columnIndex%16 >= INNER_X_START && columnIndex%16 <= INNER_X_END &&
  331. DIRT_HEIGHT_2D[columnIndex/16 - INNER_Z_START][columnIndex%16 - INNER_X_START] != 15){
  332. double y1 = lerp(fadeY, x1, x2);
  333. double y2 = lerp(fadeY, xx1, xx2);
  334. (buffer)[columnIndex] = (buffer)[columnIndex] + lerp(fadeZ, y1, y2) * octaveWidth;
  335. }
  336. if (columnIndex == EARLY_RETURN) return;
  337. columnIndex++;
  338. }
  339. }
  340. }
  341. }
  342. __device__ static inline void generateNormalPermutations_2(double *buffer, double x, double y, double z, int sizeX, int sizeY, int sizeZ, double noiseFactorX, double noiseFactorY, double noiseFactorZ, double octaveSize, Random* random) {
  343. double xo = lcg::next_double(*random) * 256.0;
  344. double yo = lcg::next_double(*random) * 256.0;
  345. double zo = lcg::next_double(*random) * 256.0;
  346. //Setup the permutation fresh xD
  347. #pragma unroll
  348. for(int w = 0; w<256; w++) {
  349. setValue(permutations, w, w);
  350. }
  351. for(int index = 0; index<256; index++) {
  352. uint32_t randomIndex = lcg::dynamic_next_int(*random, 256ull - index) + index;
  353. //if (randomIndex != index) {
  354. // swap
  355. uint8_t v1 = getValue(permutations,index);
  356. uint8_t v2 = getValue(permutations,randomIndex);
  357. setValue(permutations,index, v2);
  358. setValue(permutations, randomIndex, v1);
  359. //}
  360. }
  361. double octaveWidth = 1.0 / octaveSize;
  362. int32_t i2 = -1;
  363. double x1 = 0.0;
  364. double x2 = 0.0;
  365. double xx1 = 0.0;
  366. double xx2 = 0.0;
  367. double t;
  368. double w;
  369. int columnIndex = 0;
  370. for (int X = 0; X < sizeX; X++) {
  371. double xCoord = (x + (double) X) * noiseFactorX + xo;
  372. auto clampedXcoord = (int32_t) xCoord;
  373. if (xCoord < (double) clampedXcoord) {
  374. clampedXcoord--;
  375. }
  376. auto xBottoms = (uint8_t) ((uint32_t) clampedXcoord & 0xffu);
  377. xCoord -= clampedXcoord;
  378. t = xCoord * 6 - 15;
  379. w = (xCoord * t + 10);
  380. double fadeX = xCoord * xCoord * xCoord * w;
  381. for (int Z = 0; Z < sizeZ; Z++) {
  382. double zCoord = zo;
  383. auto clampedZCoord = (int32_t) zCoord;
  384. if (zCoord < (double) clampedZCoord) {
  385. clampedZCoord--;
  386. }
  387. auto zBottoms = (uint8_t) ((uint32_t) clampedZCoord & 0xffu);
  388. zCoord -= clampedZCoord;
  389. t = zCoord * 6 - 15;
  390. w = (zCoord * t + 10);
  391. double fadeZ = zCoord * zCoord * zCoord * w;
  392. for (int Y = 0; Y < sizeY; Y++) {
  393. double yCoords = (y + (double) Y) * noiseFactorY + yo;
  394. auto clampedYCoords = (int32_t) yCoords;
  395. if (yCoords < (double) clampedYCoords) {
  396. clampedYCoords--;
  397. }
  398. auto yBottoms = (uint8_t) ((uint32_t) clampedYCoords & 0xffu);
  399. yCoords -= clampedYCoords;
  400. t = yCoords * 6 - 15;
  401. w = yCoords * t + 10;
  402. double fadeY = yCoords * yCoords * yCoords * w;
  403. // ZCoord
  404. if (Y == 0 || yBottoms != i2) { // this is wrong on so many levels, same ybottoms doesnt mean x and z were the same...
  405. i2 = yBottoms;
  406. uint16_t k2 = getValue(permutations,(uint8_t)((uint16_t)(getValue(permutations,(uint8_t)(xBottoms& 0xffu)) + yBottoms)& 0xffu)) + zBottoms;
  407. uint16_t l2 = getValue(permutations,(uint8_t)((uint16_t)(getValue(permutations,(uint8_t)(xBottoms& 0xffu)) + yBottoms + 1u )& 0xffu)) + zBottoms;
  408. uint16_t k3 = getValue(permutations,(uint8_t)((uint16_t)(getValue(permutations,(uint8_t)((xBottoms + 1u)& 0xffu)) + yBottoms )& 0xffu)) + zBottoms;
  409. uint16_t l3 = getValue(permutations,(uint8_t)((uint16_t)(getValue(permutations,(uint8_t)((xBottoms + 1u)& 0xffu)) + yBottoms + 1u) & 0xffu)) + zBottoms;
  410. x1 = lerp(fadeX, grad(getValue(permutations,(uint8_t)(k2& 0xffu)), xCoord, yCoords, zCoord), grad(getValue(permutations,(uint8_t)(k3& 0xffu)), xCoord - 1.0, yCoords, zCoord));
  411. x2 = lerp(fadeX, grad(getValue(permutations,(uint8_t)(l2& 0xffu)), xCoord, yCoords - 1.0, zCoord), grad(getValue(permutations,(uint8_t)(l3& 0xffu)), xCoord - 1.0, yCoords - 1.0, zCoord));
  412. xx1 = lerp(fadeX, grad(getValue(permutations,(uint8_t)((k2+1u)& 0xffu)), xCoord, yCoords, zCoord - 1.0), grad(getValue(permutations,(uint8_t)((k3+1u)& 0xffu)), xCoord - 1.0, yCoords, zCoord - 1.0));
  413. xx2 = lerp(fadeX, grad(getValue(permutations,(uint8_t)((l2+1u)& 0xffu)), xCoord, yCoords - 1.0, zCoord - 1.0), grad(getValue(permutations,(uint8_t)((l3+1u)& 0xffu)), xCoord - 1.0, yCoords - 1.0, zCoord - 1.0));
  414. }
  415. if (columnIndex%16 >= INNER_X_START_2 && columnIndex%16 <= INNER_X_END_2 &&
  416. DIRT_HEIGHT_2D_2[columnIndex/16 - INNER_Z_START_2][columnIndex%16 - INNER_X_START_2] != 15){
  417. double y1 = lerp(fadeY, x1, x2);
  418. double y2 = lerp(fadeY, xx1, xx2);
  419. (buffer)[columnIndex] = (buffer)[columnIndex] + lerp(fadeZ, y1, y2) * octaveWidth;
  420. }
  421. columnIndex++;
  422. }
  423. }
  424. }
  425. }
  426. __device__ static inline void generateNormalPermutations_3(double *buffer, double x, double y, double z, int sizeX, int sizeY, int sizeZ, double noiseFactorX, double noiseFactorY, double noiseFactorZ, double octaveSize, Random* random) {
  427. double xo = lcg::next_double(*random) * 256.0;
  428. double yo = lcg::next_double(*random) * 256.0;
  429. double zo = lcg::next_double(*random) * 256.0;
  430. //Setup the permutation fresh xD
  431. #pragma unroll
  432. for(int w = 0; w<256; w++) {
  433. setValue(permutations, w, w);
  434. }
  435. for(int index = 0; index<256; index++) {
  436. uint32_t randomIndex = lcg::dynamic_next_int(*random, 256ull - index) + index;
  437. //if (randomIndex != index) {
  438. // swap
  439. uint8_t v1 = getValue(permutations,index);
  440. uint8_t v2 = getValue(permutations,randomIndex);
  441. setValue(permutations,index, v2);
  442. setValue(permutations, randomIndex, v1);
  443. //}
  444. }
  445. double octaveWidth = 1.0 / octaveSize;
  446. int32_t i2 = -1;
  447. double x1 = 0.0;
  448. double x2 = 0.0;
  449. double xx1 = 0.0;
  450. double xx2 = 0.0;
  451. double t;
  452. double w;
  453. int columnIndex = 0;
  454. for (int X = 0; X < sizeX; X++) {
  455. double xCoord = (x + (double) X) * noiseFactorX + xo;
  456. auto clampedXcoord = (int32_t) xCoord;
  457. if (xCoord < (double) clampedXcoord) {
  458. clampedXcoord--;
  459. }
  460. auto xBottoms = (uint8_t) ((uint32_t) clampedXcoord & 0xffu);
  461. xCoord -= clampedXcoord;
  462. t = xCoord * 6 - 15;
  463. w = (xCoord * t + 10);
  464. double fadeX = xCoord * xCoord * xCoord * w;
  465. for (int Z = 0; Z < sizeZ; Z++) {
  466. double zCoord = zo;
  467. auto clampedZCoord = (int32_t) zCoord;
  468. if (zCoord < (double) clampedZCoord) {
  469. clampedZCoord--;
  470. }
  471. auto zBottoms = (uint8_t) ((uint32_t) clampedZCoord & 0xffu);
  472. zCoord -= clampedZCoord;
  473. t = zCoord * 6 - 15;
  474. w = (zCoord * t + 10);
  475. double fadeZ = zCoord * zCoord * zCoord * w;
  476. for (int Y = 0; Y < sizeY; Y++) {
  477. double yCoords = (y + (double) Y) * noiseFactorY + yo;
  478. auto clampedYCoords = (int32_t) yCoords;
  479. if (yCoords < (double) clampedYCoords) {
  480. clampedYCoords--;
  481. }
  482. auto yBottoms = (uint8_t) ((uint32_t) clampedYCoords & 0xffu);
  483. yCoords -= clampedYCoords;
  484. t = yCoords * 6 - 15;
  485. w = yCoords * t + 10;
  486. double fadeY = yCoords * yCoords * yCoords * w;
  487. // ZCoord
  488. if (Y == 0 || yBottoms != i2) { // this is wrong on so many levels, same ybottoms doesnt mean x and z were the same...
  489. i2 = yBottoms;
  490. uint16_t k2 = getValue(permutations,(uint8_t)((uint16_t)(getValue(permutations,(uint8_t)(xBottoms& 0xffu)) + yBottoms)& 0xffu)) + zBottoms;
  491. uint16_t l2 = getValue(permutations,(uint8_t)((uint16_t)(getValue(permutations,(uint8_t)(xBottoms& 0xffu)) + yBottoms + 1u )& 0xffu)) + zBottoms;
  492. uint16_t k3 = getValue(permutations,(uint8_t)((uint16_t)(getValue(permutations,(uint8_t)((xBottoms + 1u)& 0xffu)) + yBottoms )& 0xffu)) + zBottoms;
  493. uint16_t l3 = getValue(permutations,(uint8_t)((uint16_t)(getValue(permutations,(uint8_t)((xBottoms + 1u)& 0xffu)) + yBottoms + 1u) & 0xffu)) + zBottoms;
  494. x1 = lerp(fadeX, grad(getValue(permutations,(uint8_t)(k2& 0xffu)), xCoord, yCoords, zCoord), grad(getValue(permutations,(uint8_t)(k3& 0xffu)), xCoord - 1.0, yCoords, zCoord));
  495. x2 = lerp(fadeX, grad(getValue(permutations,(uint8_t)(l2& 0xffu)), xCoord, yCoords - 1.0, zCoord), grad(getValue(permutations,(uint8_t)(l3& 0xffu)), xCoord - 1.0, yCoords - 1.0, zCoord));
  496. xx1 = lerp(fadeX, grad(getValue(permutations,(uint8_t)((k2+1u)& 0xffu)), xCoord, yCoords, zCoord - 1.0), grad(getValue(permutations,(uint8_t)((k3+1u)& 0xffu)), xCoord - 1.0, yCoords, zCoord - 1.0));
  497. xx2 = lerp(fadeX, grad(getValue(permutations,(uint8_t)((l2+1u)& 0xffu)), xCoord, yCoords - 1.0, zCoord - 1.0), grad(getValue(permutations,(uint8_t)((l3+1u)& 0xffu)), xCoord - 1.0, yCoords - 1.0, zCoord - 1.0));
  498. }
  499. if (columnIndex%16 >= INNER_X_START_3 && columnIndex%16 <= INNER_X_END_3 &&
  500. DIRT_HEIGHT_2D_3[columnIndex/16 - INNER_Z_START_3][columnIndex%16 - INNER_X_START_3] != 15){
  501. double y1 = lerp(fadeY, x1, x2);
  502. double y2 = lerp(fadeY, xx1, xx2);
  503. (buffer)[columnIndex] = (buffer)[columnIndex] + lerp(fadeZ, y1, y2) * octaveWidth;
  504. }
  505. columnIndex++;
  506. }
  507. }
  508. }
  509. }
  510. __device__ static inline void generateNoise(double *buffer, double chunkX, double chunkY, double chunkZ, int sizeX, int sizeY, int sizeZ, double offsetX, double offsetY, double offsetZ, Random random, int nbOctaves) {
  511. //memset(buffer, 0, sizeof(double) * sizeX * sizeZ * sizeY);
  512. double octavesFactor = 1.0;
  513. for (int octave = 0; octave < nbOctaves; octave++) {
  514. generateNormalPermutations(buffer, chunkX, chunkY, chunkZ, sizeX, sizeY, sizeZ, offsetX * octavesFactor, offsetY * octavesFactor, offsetZ * octavesFactor, octavesFactor, &random);
  515. octavesFactor /= 2.0;
  516. }
  517. }
  518. __device__ static inline void generateNoise_2(double *buffer, double chunkX, double chunkY, double chunkZ, int sizeX, int sizeY, int sizeZ, double offsetX, double offsetY, double offsetZ, Random random, int nbOctaves) {
  519. //memset(buffer, 0, sizeof(double) * sizeX * sizeZ * sizeY);
  520. double octavesFactor = 1.0;
  521. for (int octave = 0; octave < nbOctaves; octave++) {
  522. generateNormalPermutations_2(buffer, chunkX, chunkY, chunkZ, sizeX, sizeY, sizeZ, offsetX * octavesFactor, offsetY * octavesFactor, offsetZ * octavesFactor, octavesFactor, &random);
  523. octavesFactor /= 2.0;
  524. }
  525. }
  526. __device__ static inline void generateNoise_3(double *buffer, double chunkX, double chunkY, double chunkZ, int sizeX, int sizeY, int sizeZ, double offsetX, double offsetY, double offsetZ, Random random, int nbOctaves) {
  527. //memset(buffer, 0, sizeof(double) * sizeX * sizeZ * sizeY);
  528. double octavesFactor = 1.0;
  529. for (int octave = 0; octave < nbOctaves; octave++) {
  530. generateNormalPermutations_3(buffer, chunkX, chunkY, chunkZ, sizeX, sizeY, sizeZ, offsetX * octavesFactor, offsetY * octavesFactor, offsetZ * octavesFactor, octavesFactor, &random);
  531. octavesFactor /= 2.0;
  532. }
  533. }
  534. }
  535. using namespace noise;
  536. __device__ static inline bool match(uint64_t seed) {
  537. seed = get_random(seed);
  538. //SkipNoiseGen(16+16+8, &seed);
  539. lcg::advance<10480>(seed);//VERY VERY DODGY
  540. double heightField[EARLY_RETURN+1];
  541. #pragma unroll
  542. for(uint16_t i = 0; i<EARLY_RETURN+1;i++)
  543. heightField[i] = 0;
  544. const double noiseFactor = 0.03125;
  545. generateNoise(heightField, (double) (CHUNK_X <<4), (double) (CHUNK_Z<<4), 0.0, 16, 16, 1, noiseFactor, noiseFactor, 1.0, seed, 4);
  546. for(uint8_t z = 0; z < INNER_Z_END - INNER_Z_START + 1; z++) {
  547. for(uint8_t x = 0; x < INNER_X_END - INNER_X_START + 1; x++) {
  548. if (DIRT_HEIGHT_2D[z][x] != 15) {
  549. uint8_t dirty = heightField[INNER_X_START + x + (INNER_Z_START + z) * 16] + LocalNoise2D[z][x] * 0.2 > 0.0 ? 0 : 1;
  550. if (dirty!=(int8_t)DIRT_HEIGHT_2D[z][x])
  551. return false;
  552. }
  553. }
  554. }
  555. return true;
  556. }
  557. __device__ static inline bool match2(uint64_t seed) {
  558. seed = get_random(seed);
  559. //SkipNoiseGen(16+16+8, &seed);
  560. lcg::advance<10480>(seed);//VERY VERY DODGY
  561. double heightField[256];
  562. #pragma unroll
  563. for(uint16_t i = 0; i<256;i++)
  564. heightField[i] = 0;
  565. const double noiseFactor = 0.03125;
  566. generateNoise_2(heightField, (double) (CHUNK_X_2 <<4), (double) (CHUNK_Z_2<<4), 0.0, 16, 16, 1, noiseFactor, noiseFactor, 1.0, seed, 4);
  567. for(uint8_t z = 0; z < INNER_Z_END_2 - INNER_Z_START_2 + 1; z++) {
  568. for(uint8_t x = 0; x < INNER_X_END_2 - INNER_X_START_2 + 1; x++) {
  569. if (DIRT_HEIGHT_2D_2[z][x] != 15) {
  570. uint8_t dirty = heightField[INNER_X_START_2 + x + (INNER_Z_START_2 + z) * 16] + LocalNoise2D_2[z][x] * 0.2 > 0.0 ? 0 : 1;
  571. if (dirty!=(int8_t)DIRT_HEIGHT_2D_2[z][x])
  572. return false;
  573. }
  574. }
  575. }
  576. return true;
  577. }
  578. __device__ static inline bool match3(uint64_t seed) {
  579. seed = get_random(seed);
  580. //SkipNoiseGen(16+16+8, &seed);
  581. lcg::advance<10480>(seed);//VERY VERY DODGY
  582. double heightField[256];
  583. #pragma unroll
  584. for(uint16_t i = 0; i<256;i++)
  585. heightField[i] = 0;
  586. const double noiseFactor = 0.03125;
  587. generateNoise_3(heightField, (double) (CHUNK_X_3 <<4), (double) (CHUNK_Z_3<<4), 0.0, 16, 16, 1, noiseFactor, noiseFactor, 1.0, seed, 4);
  588. for(uint8_t z = 0; z < INNER_Z_END_3 - INNER_Z_START_3 + 1; z++) {
  589. for(uint8_t x = 0; x < INNER_X_END_3 - INNER_X_START_3 + 1; x++) {
  590. if (DIRT_HEIGHT_2D_3[z][x] != 15) {
  591. uint8_t dirty = heightField[INNER_X_START_3 + x + (INNER_Z_START_3 + z) * 16] + LocalNoise2D_3[z][x] * 0.2 > 0.0 ? 0 : 1;
  592. if (dirty!=(int8_t)DIRT_HEIGHT_2D_3[z][x])
  593. return false;
  594. }
  595. }
  596. }
  597. return true;
  598. }
  599. __global__ __launch_bounds__(BLOCK_SIZE,2) static void tempCheck(uint64_t offset, uint64_t* buffer, uint32_t* counter) {
  600. uint64_t seed = blockIdx.x * blockDim.x + threadIdx.x + offset;
  601. if (match(seed)) {
  602. buffer[atomicAdd(counter,1)] = seed;
  603. }
  604. }
  605. __global__ __launch_bounds__(BLOCK_SIZE,2) static void tempCheck2(uint32_t count, uint64_t* buffer) {
  606. uint64_t seedIndex = blockIdx.x * blockDim.x + threadIdx.x;
  607. if (seedIndex>=count)
  608. return;
  609. if (!match2(buffer[seedIndex])) {
  610. buffer[seedIndex] = 0;
  611. }
  612. }
  613. __global__ __launch_bounds__(BLOCK_SIZE,2) static void tempCheck3(uint32_t count, uint64_t* buffer) {
  614. uint64_t seedIndex = blockIdx.x * blockDim.x + threadIdx.x;
  615. if (seedIndex>=count)
  616. return;
  617. uint64_t seed = buffer[seedIndex];
  618. if (seed==0)
  619. return;
  620. if (!match3(seed)) {
  621. buffer[seedIndex] = 0;
  622. }
  623. }
  624. std::ifstream inSeeds;
  625. std::ofstream outSeeds;
  626. uint64_t* buffer;
  627. uint32_t* counter;
  628. double getNextDoubleForLocNoise(int x, int z);
  629. void setup(int gpu_device) {
  630. cudaSetDevice(gpu_device);
  631. GPU_ASSERT(cudaPeekAtLastError());
  632. GPU_ASSERT(cudaDeviceSynchronize());
  633. double locNoise2D[INNER_Z_END - INNER_Z_START + 1][INNER_X_END - INNER_X_START + 1];
  634. for(uint8_t z = 0; z < INNER_Z_END - INNER_Z_START + 1; z++) {
  635. for (uint8_t x = 0; x < INNER_X_END - INNER_X_START + 1; x++) {
  636. locNoise2D[z][x] = getNextDoubleForLocNoise((CHUNK_X<<4) + INNER_X_START + x, (CHUNK_Z<<4) + INNER_Z_START + z);
  637. }
  638. }
  639. GPU_ASSERT(cudaMemcpyToSymbol(LocalNoise2D, &locNoise2D, sizeof(locNoise2D)));
  640. GPU_ASSERT(cudaPeekAtLastError());
  641. double locNoise2D_2[INNER_Z_END_2 - INNER_Z_START_2 + 1][INNER_X_END_2 - INNER_X_START_2 + 1];
  642. for(uint8_t z = 0; z < INNER_Z_END_2 - INNER_Z_START_2 + 1; z++) {
  643. for (uint8_t x = 0; x < INNER_X_END_2 - INNER_X_START_2 + 1; x++) {
  644. locNoise2D_2[z][x] = getNextDoubleForLocNoise((CHUNK_X_2<<4) + INNER_X_START_2 + x, (CHUNK_Z_2<<4) + INNER_Z_START_2 + z);
  645. }
  646. }
  647. GPU_ASSERT(cudaMemcpyToSymbol(LocalNoise2D_2, &locNoise2D_2, sizeof(locNoise2D_2)));
  648. GPU_ASSERT(cudaPeekAtLastError());
  649. double locNoise2D_3[INNER_Z_END_3 - INNER_Z_START_3 + 1][INNER_X_END_3 - INNER_X_START_3 + 1];
  650. for(uint8_t z = 0; z < INNER_Z_END_3 - INNER_Z_START_3 + 1; z++) {
  651. for (uint8_t x = 0; x < INNER_X_END_3 - INNER_X_START_3 + 1; x++) {
  652. locNoise2D_3[z][x] = getNextDoubleForLocNoise((CHUNK_X_3<<4) + INNER_X_START_3 + x, (CHUNK_Z_3<<4) + INNER_Z_START_3 + z);
  653. }
  654. }
  655. GPU_ASSERT(cudaMemcpyToSymbol(LocalNoise2D_3, &locNoise2D_3, sizeof(locNoise2D_3)));
  656. GPU_ASSERT(cudaPeekAtLastError());
  657. }
  658. time_t elapsed_chkpoint = 0;
  659. struct checkpoint_vars {
  660. unsigned long long offset;
  661. time_t elapsed_chkpoint;
  662. };
  663. int main(int argc, char *argv[]) {
  664. int gpu_device = 0;
  665. uint64_t START;
  666. uint64_t offsetStart = 0;
  667. uint64_t COUNT;
  668. #ifdef BOINC
  669. BOINC_OPTIONS options;
  670. boinc_options_defaults(options);
  671. options.normal_thread_priority = true;
  672. boinc_init_options(&options);
  673. #endif
  674. for (int i = 1; i < argc; i += 2) {
  675. const char *param = argv[i];
  676. if (strcmp(param, "-d") == 0 || strcmp(param, "--device") == 0) {
  677. gpu_device = atoi(argv[i + 1]);
  678. } else if (strcmp(param, "-s") == 0 || strcmp(param, "--start") == 0) {
  679. sscanf(argv[i + 1], "%llu", &START);
  680. } else if (strcmp(param, "-e") == 0 || strcmp(param, "--count") == 0) {
  681. sscanf(argv[i + 1], "%llu", &COUNT);
  682. } else {
  683. fprintf(stderr,"Unknown parameter: %s\n", param);
  684. }
  685. }
  686. FILE *checkpoint_data = boinc_fopen("packpoint.txt", "rb");
  687. if(!checkpoint_data){
  688. fprintf(stderr, "No checkpoint to load\n");
  689. }
  690. else{
  691. #ifdef BOINC
  692. boinc_begin_critical_section();
  693. #endif
  694. struct checkpoint_vars data_store;
  695. fread(&data_store, sizeof(data_store), 1, checkpoint_data);
  696. offsetStart = data_store.offset;
  697. elapsed_chkpoint = data_store.elapsed_chkpoint;
  698. fprintf(stderr, "Checkpoint loaded, task time %d s, seed pos: %llu\n", elapsed_chkpoint, START);
  699. fclose(checkpoint_data);
  700. #ifdef BOINC
  701. boinc_end_critical_section();
  702. #endif
  703. }
  704. #ifdef BOINC
  705. APP_INIT_DATA aid;
  706. boinc_get_init_data(aid);
  707. if (aid.gpu_device_num >= 0) {
  708. gpu_device = aid.gpu_device_num;
  709. fprintf(stderr,"boinc gpu %i gpuindex: %i \n", aid.gpu_device_num, gpu_device);
  710. } else {
  711. fprintf(stderr,"stndalone gpuindex %i \n", gpu_device);
  712. }
  713. #endif
  714. setup(gpu_device);
  715. uint64_t seedCount = COUNT;
  716. std::cout << "Processing " << seedCount << " seeds" << std::endl;
  717. outSeeds.open("seedsout");
  718. GPU_ASSERT(cudaMallocManaged(&buffer, sizeof(*buffer) * SEEDS_PER_CALL));
  719. GPU_ASSERT(cudaPeekAtLastError());
  720. GPU_ASSERT(cudaMallocManaged(&counter, sizeof(*counter)));
  721. GPU_ASSERT(cudaPeekAtLastError());
  722. time_t start_time = time(NULL);
  723. int outCount = 0;
  724. int checkpointTemp = 0;
  725. for(uint64_t offset =offsetStart;offset<seedCount;offset+=SEEDS_PER_CALL) {
  726. // Normal filtering
  727. time_t elapsed = time(NULL) - start_time;
  728. double frac = (double) offset / (double)(seedCount);
  729. #ifdef BOINC
  730. boinc_fraction_done(frac);
  731. #endif
  732. *counter = 0;
  733. tempCheck<<<1ULL<<WORK_SIZE_BITS,BLOCK_SIZE>>>(START + offset, buffer,counter);
  734. GPU_ASSERT(cudaPeekAtLastError());
  735. GPU_ASSERT(cudaDeviceSynchronize());
  736. tempCheck2<<<((*counter)/BLOCK_SIZE)+1,BLOCK_SIZE>>>(*counter, buffer);
  737. GPU_ASSERT(cudaPeekAtLastError());
  738. GPU_ASSERT(cudaDeviceSynchronize());
  739. tempCheck3<<<((*counter)/BLOCK_SIZE)+1,BLOCK_SIZE>>>(*counter, buffer);
  740. GPU_ASSERT(cudaPeekAtLastError());
  741. GPU_ASSERT(cudaDeviceSynchronize());
  742. for(int i=0;i<*counter;i++) {
  743. if (buffer[i]!=0) {
  744. uint64_t seed = buffer[i];
  745. std::cout << "3rd level seed found: " << seed << std::endl;
  746. outSeeds << seed << std::endl;
  747. outCount++;
  748. }
  749. }
  750. if(checkpointTemp >= 180000000 || boinc_time_to_checkpoint()){
  751. #ifdef BOINC
  752. boinc_begin_critical_section(); // Boinc should not interrupt this
  753. #endif
  754. // Checkpointing section below
  755. boinc_delete_file("packpoint.txt"); // Don't touch, same func as normal fdel
  756. FILE *checkpoint_data = boinc_fopen("packpoint.txt", "wb");
  757. struct checkpoint_vars data_store;
  758. data_store.offset = offset;
  759. data_store.elapsed_chkpoint = elapsed_chkpoint + elapsed;
  760. fwrite(&data_store, sizeof(data_store), 1, checkpoint_data);
  761. fclose(checkpoint_data);
  762. checkpointTemp = 0;
  763. #ifdef BOINC
  764. boinc_end_critical_section();
  765. boinc_checkpoint_completed(); // Checkpointing completed
  766. #endif
  767. }
  768. checkpointTemp += SEEDS_PER_CALL;
  769. std::cout << "Seeds left:" << (((int64_t)seedCount-offset)-SEEDS_PER_CALL) << std::endl;
  770. }
  771. std::cout << "Done processing" << std::endl;
  772. #ifdef BOINC
  773. boinc_begin_critical_section();
  774. #endif
  775. time_t elapsed = time(NULL) - start_time;
  776. double done = (double)COUNT / 1000000.0;
  777. double speed = done / (double) elapsed;
  778. fprintf(stderr, "\nSpeed: %.2lfm/s\n", speed );
  779. fprintf(stderr, "Done\n");
  780. fprintf(stderr, "Processed: %llu seeds in %.2lfs seconds\n", COUNT, (double) elapsed_chkpoint + (double) elapsed );
  781. fprintf(stderr, "Have %llu output seeds.\n", outCount);
  782. fflush(stderr);
  783. outSeeds.close();
  784. boinc_delete_file("packpoint.txt");
  785. #ifdef BOINC
  786. boinc_end_critical_section();
  787. #endif
  788. boinc_finish(0);
  789. }
  790. double getNextDoubleForLocNoise(int x, int z) {
  791. Random rand = get_random((((int64_t)x) >> 4) * 341873128712LL + (((int64_t)z) >> 4) * 132897987541LL);
  792. for (int dx = 0; dx < 16; dx++) {
  793. for (int dz = 0; dz < 16; dz++) {
  794. if (dx == (x & 15) && dz == (z & 15)) {
  795. //advance2(&rand);
  796. //advance2(&rand);
  797. return next_double(&rand);
  798. }
  799. advance2(&rand);
  800. advance2(&rand);
  801. advance2(&rand);
  802. for(int k1 = 127; k1 >= 0; k1--) {
  803. random_next_int_nonpow(&rand,5);
  804. }
  805. //for (int i = 0; i < 67; i++) {
  806. // advance2(&rand);
  807. //}
  808. }
  809. }
  810. exit(-99);
  811. }