main-nv.c 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400
  1. #include <stdio.h>
  2. #include <stdlib.h>
  3. #include <unistd.h>
  4. #include <sys/time.h>
  5. #include <string.h>
  6. #include <time.h>
  7. #define __STDC_FORMAT_MACROS
  8. #include <inttypes.h>
  9. #include <stdbool.h>
  10. #define CL_TARGET_OPENCL_VERSION 200
  11. #ifdef __APPLE__
  12. #include <OpenCL/opencl.h>
  13. #include <OpenCL/cl_platform.h>
  14. #else
  15. #include <CL/cl.h>
  16. #include <CL/cl_platform.h>
  17. #endif
  18. #include "clutil.h"
  19. #ifdef _WIN64
  20. #include "boinc_win.h"
  21. #else
  22. #ifdef _WIN32
  23. #include "boinc_win.h"
  24. #endif
  25. #endif
  26. #include "boinc_api.h"
  27. #include "boinc_opencl.h"
  28. #define KERNEL_BUFFER_SIZE (0x4000)
  29. #define MAX_SEED_BUFFER_SIZE (0x10000)
  30. int main(int argc, char *argv[]) {
  31. BOINC_OPTIONS options;
  32. boinc_options_defaults(options);
  33. options.normal_thread_priority = true;
  34. boinc_init_options(&options);
  35. boinc_set_min_checkpoint_period(30);
  36. //boinc_init();
  37. int gpuIndex = 0; // Won't do anything for now
  38. cl_ulong start = 0;
  39. cl_ulong end = 0;
  40. cl_ulong chunkSeed = 0;
  41. int chunkSeedBottom4Bits = 0;
  42. int chunkSeedBit5 = 0;
  43. int neighbor1 = 0;
  44. int neighbor2 = 0;
  45. int neighbor3 = 0;
  46. int diagonalIndex = 0;
  47. int cactusHeight = 0;
  48. int retval = 0;
  49. int floor_level = 63;
  50. char *strend;
  51. size_t seedbuffer_size;
  52. struct checkpoint_vars {
  53. cl_ulong offset;
  54. cl_ulong start;
  55. cl_ulong end;
  56. int block;
  57. double elapsed_chkpoint;
  58. int total_seed_count;
  59. };
  60. if (argc % 2 != 1) {
  61. printf("Failed to parse arguments\n");
  62. exit(EXIT_FAILURE);
  63. }
  64. for (int i = 1; i < argc; i += 2) {
  65. const char *param = argv[i];
  66. if (strcmp(param, "-d") == 0 || strcmp(param, "--device") == 0) {
  67. gpuIndex = atoi(argv[i + 1]);
  68. } else if (strcmp(param, "-s") == 0 || strcmp(param, "--start") == 0) {
  69. sscanf(argv[i + 1], "%"
  70. SCNd64, &start);
  71. } else if (strcmp(param, "-e") == 0 || strcmp(param, "--end") == 0) {
  72. sscanf(argv[i + 1], "%"
  73. SCNd64, &end);
  74. } else if (strcmp(param, "-cs") == 0 || strcmp(param, "--chunkseed") == 0) {
  75. sscanf(argv[i + 1], "%"
  76. SCNd64, &chunkSeed);
  77. chunkSeedBottom4Bits = (int) (chunkSeed & 15U);
  78. chunkSeedBit5 = (int) ((chunkSeed >> 4U) & 1U);
  79. } else if (strcmp(param, "-n1") == 0 || strcmp(param, "--neighbor1") == 0) {
  80. neighbor1 = atoi(argv[i + 1]);
  81. } else if (strcmp(param, "-n2") == 0 || strcmp(param, "--neighbor2") == 0) {
  82. neighbor2 = atoi(argv[i + 1]);
  83. } else if (strcmp(param, "-n3") == 0 || strcmp(param, "--neighbor3") == 0) {
  84. neighbor3 = atoi(argv[i + 1]);
  85. } else if (strcmp(param, "-di") == 0 || strcmp(param, "--diagonalindex") == 0) {
  86. diagonalIndex = atoi(argv[i + 1]);
  87. } else if (strcmp(param, "-ch") == 0 || strcmp(param, "--cactusheight") == 0) {
  88. cactusHeight = atoi(argv[i + 1]);
  89. } else if (strcmp(param, "-f") == 0 || strcmp(param, "--floorlevel") == 0) {
  90. floor_level = atoi(argv[i + 1]);
  91. } else {
  92. printf("Unknown parameter: %s\n", param);
  93. }
  94. }
  95. fprintf(stderr, "Received work unit: %"
  96. SCNd64
  97. "\n", chunkSeed);
  98. fprintf(stderr, "Data: n1: %d, n2: %d, n3: %d, di: %d, ch: %d, f: %d, s: %" SCNd64 ", e: %" SCNd64 "\n",
  99. neighbor1,
  100. neighbor2,
  101. neighbor3,
  102. diagonalIndex,
  103. cactusHeight,
  104. floor_level,
  105. start,
  106. end);
  107. int arguments[10] = {
  108. 0,
  109. 0,
  110. 0,
  111. neighbor1,
  112. neighbor2,
  113. neighbor3,
  114. diagonalIndex,
  115. cactusHeight,
  116. chunkSeedBottom4Bits,
  117. chunkSeedBit5
  118. };
  119. fflush(stderr);
  120. cl_platform_id platform_id = NULL;
  121. cl_device_id device_ids;
  122. cl_int err;
  123. cl_uint num_devices_standalone;
  124. num_devices_standalone = 1;
  125. cl_uint num_entries;
  126. num_entries = 1;
  127. const char* kernel_name = "kaktwoos.cl";
  128. // Third arg is 1 for Nvidia
  129. retval = boinc_get_opencl_ids(argc, argv, 1, &device_ids, &platform_id);
  130. if (retval) {
  131. //Probably standalone mode
  132. fprintf(stderr, "Error: boinc_get_opencl_ids() failed with error %d\n", retval);
  133. retval = clGetPlatformIDs(num_entries, &platform_id, &num_devices_standalone);
  134. if (retval) {
  135. fprintf(stderr, "Error: clGetPlatformIDs() failed with error %d\n", retval);
  136. return 1;
  137. }
  138. retval = clGetDeviceIDs(platform_id, CL_DEVICE_TYPE_GPU, num_entries, &device_ids, &num_devices_standalone);
  139. if (retval) {
  140. fprintf(stderr, "Error: clGetDeviceIDs() failed with error %d\n", retval);
  141. return 1;
  142. }
  143. }
  144. char buffer[1024]; // Buffer to store rec'd GPU chip
  145. char *rtx="RTX"; // CASE SENSITIVE
  146. char *gtx16="GTX 16";
  147. clGetDeviceInfo(device_ids, CL_DEVICE_NAME, sizeof(buffer), buffer, NULL);
  148. fprintf(stderr,"DEVICE_NAME = %s\n", buffer);
  149. char* tmpBuffer;
  150. if ( 'N' == buffer[0] ){
  151. tmpBuffer = buffer+15;
  152. } else {
  153. tmpBuffer = buffer+8; // buffer+8 for GeForce name, +15 for Nvidia Geforce
  154. }
  155. tmpBuffer[3] = '\0';
  156. if (strcmp(rtx, tmpBuffer) == 0 ) {
  157. kernel_name = "kaktwoos-nv.cl";
  158. fprintf(stderr,"RTX, Optimizations applied!\n");
  159. }
  160. tmpBuffer[3]=' ';
  161. tmpBuffer[6] = '\0';
  162. if (strcmp(gtx16, tmpBuffer) == 0 ) {
  163. kernel_name = "kaktwoos-nv.cl";
  164. fprintf(stderr,"GTX 16XX, Optimizations applied!\n");
  165. }
  166. FILE *kernel_file = boinc_fopen(kernel_name, "r");
  167. if (!kernel_file) {
  168. fprintf(stderr,"Failed to open kernel\n");
  169. exit(1);
  170. }
  171. char *kernel_src = (char *)malloc(KERNEL_BUFFER_SIZE);
  172. size_t kernel_length = fread(kernel_src, 1, KERNEL_BUFFER_SIZE, kernel_file);
  173. fclose(kernel_file);
  174. cl_context_properties cps[3] = {CL_CONTEXT_PLATFORM, (cl_context_properties) platform_id, 0};
  175. cl_context context = clCreateContext(cps, 1, &device_ids, NULL, NULL, &err);
  176. check(err, "clCreateContext ");
  177. cl_command_queue command_queue = clCreateCommandQueueWithProperties(context, device_ids, 0, &err);
  178. check(err, "clCreateCommandQueueWithProperties ");
  179. seedbuffer_size = 0x40 * sizeof(cl_ulong);
  180. // 16 Kb of memory for seeds
  181. cl_mem seeds = clCreateBuffer(context, CL_MEM_READ_WRITE, seedbuffer_size, NULL, &err);
  182. check(err, "clCreateBuffer (seeds) ");
  183. cl_mem data = clCreateBuffer(context, CL_MEM_READ_ONLY, 10 * sizeof(int), NULL, &err);
  184. check(err, "clCreateBuffer (data) ");
  185. cl_program program = clCreateProgramWithSource(
  186. context,
  187. 1,
  188. (const char **) &kernel_src,
  189. &kernel_length,
  190. &err);
  191. check(err, "clCreateProgramWithSource ");
  192. char *opt = (char *) malloc(20 * sizeof(char));
  193. sprintf(opt, "-DFLOOR_LEVEL=%d", floor_level);
  194. err = clBuildProgram(program, 1, &device_ids, opt, NULL, NULL);
  195. if (err != CL_SUCCESS) {
  196. size_t len;
  197. clGetProgramBuildInfo(program, device_ids, CL_PROGRAM_BUILD_LOG, 0, NULL, &len);
  198. char *info = (char *) malloc(len);
  199. clGetProgramBuildInfo(program, device_ids, CL_PROGRAM_BUILD_LOG, len, info, NULL);
  200. printf("%s\n", info);
  201. free(info);
  202. }
  203. cl_kernel kernel = clCreateKernel(program, "crack", &err);
  204. check(err, "clCreateKernel ");
  205. check(clSetKernelArg(kernel, 0, sizeof(cl_mem), (void *) &data), "clSetKernelArg (0) ");
  206. check(clSetKernelArg(kernel, 1, sizeof(cl_mem), (void *) &seeds), "clSetKernelArg (1) ");
  207. size_t work_unit_size = 1048576;
  208. size_t block_size = 256;
  209. arguments[1] = work_unit_size;
  210. cl_ulong offset = start;
  211. int block = 0;
  212. int total_seed_count = 0;
  213. int chkpoint_ready = 0;
  214. double seedrange = (end - start);
  215. cl_ulong found_seeds[MAX_SEED_BUFFER_SIZE];
  216. clock_t start_time, end_time, elapsed_chkpoint;
  217. start_time = clock();
  218. FILE *checkpoint_data = boinc_fopen("kaktpoint.txt", "rb");
  219. if (!checkpoint_data) {
  220. fprintf(stderr, "No checkpoint to load\n");
  221. } else {
  222. boinc_begin_critical_section();
  223. struct checkpoint_vars data_store;
  224. fread(&data_store, sizeof(data_store), 1, checkpoint_data);
  225. offset = data_store.offset;
  226. start = data_store.start;
  227. end = data_store.end;
  228. block = data_store.block;
  229. elapsed_chkpoint = data_store.elapsed_chkpoint;
  230. total_seed_count = data_store.total_seed_count;
  231. fread(found_seeds, sizeof(cl_ulong), total_seed_count, checkpoint_data);
  232. fprintf(stderr, "Checkpoint loaded, task time %d s \n", elapsed_chkpoint);
  233. fclose(checkpoint_data);
  234. boinc_end_critical_section();
  235. }
  236. while (offset < end) {
  237. arguments[0] = block + start / work_unit_size;
  238. check(clEnqueueWriteBuffer(command_queue, data, CL_TRUE, 0, 10 * sizeof(int), arguments, 0, NULL, NULL),
  239. "clEnqueueWriteBuffer ");
  240. check(clEnqueueNDRangeKernel(command_queue, kernel, 1, NULL, &work_unit_size, &block_size, 0, NULL, NULL),
  241. "clEnqueueNDRangeKernel ");
  242. int *data_out = (int *) malloc(sizeof(int) * 10);
  243. check(clEnqueueReadBuffer(command_queue, data, CL_TRUE, 0, sizeof(int) * 10, data_out, 0, NULL, NULL),
  244. "clEnqueueReadBuffer (data) ");
  245. int seed_count = data_out[2];
  246. seedbuffer_size = sizeof(cl_ulong) + sizeof(cl_ulong) * seed_count;
  247. cl_ulong *result = (cl_ulong *) malloc(sizeof(cl_ulong) + sizeof(cl_ulong) * seed_count);
  248. check(clEnqueueReadBuffer(command_queue, seeds, CL_TRUE, 0, seedbuffer_size, result, 0, NULL, NULL),
  249. "clEnqueueReadBuffer (seeds) ");
  250. end_time = clock();
  251. for (int i = 0; i < seed_count; i++) {
  252. fprintf(stderr, " Found seed: %"
  253. SCNd64
  254. ", %llu, height: %d\n",
  255. result[i],
  256. result[i] & ((1ULL << 48ULL) - 1ULL),
  257. (int) (result[i] >> 58ULL));
  258. fprintf(stderr, "%"
  259. SCNd64
  260. "\n", (cl_ulong) result[i]);
  261. found_seeds[total_seed_count++] = result[i];
  262. }
  263. double elapsed = (double) (end_time - start_time) / CLOCKS_PER_SEC;
  264. offset += work_unit_size;
  265. block++;
  266. chkpoint_ready++;
  267. if (chkpoint_ready >= 200 || boinc_time_to_checkpoint()) { // 200 for 0.2bil seeds before checkpoint
  268. boinc_begin_critical_section(); // Boinc should not interrupt this
  269. boinc_delete_file("kaktpoint.txt");
  270. FILE *checkpoint_data = boinc_fopen("kaktpoint.txt", "wb");
  271. struct checkpoint_vars data_store;
  272. data_store.offset = offset;
  273. data_store.start = start;
  274. data_store.end = end;
  275. data_store.block = block;
  276. data_store.elapsed_chkpoint = (elapsed_chkpoint + (double) (end_time - start_time) / CLOCKS_PER_SEC);
  277. data_store.total_seed_count = total_seed_count;
  278. fwrite(&data_store, sizeof(data_store), 1, checkpoint_data);
  279. fwrite(found_seeds, sizeof(cl_ulong), total_seed_count, checkpoint_data);
  280. chkpoint_ready = 0;
  281. fclose(checkpoint_data);
  282. double fraction_done = ((offset - start) / (seedrange));
  283. boinc_fraction_done(fraction_done);
  284. boinc_end_critical_section();
  285. boinc_checkpoint_completed(); // Checkpointing completed
  286. }
  287. free(result);
  288. free(data_out);
  289. } // End of seed feed and processing loop
  290. boinc_begin_critical_section();
  291. double elapsed = (double) (end_time - start_time) / CLOCKS_PER_SEC;
  292. fprintf(stderr, "Speed: %.2fm/s \n", (offset - start) / (elapsed_chkpoint + elapsed) / 1000000);
  293. fprintf(stderr, "Done\n");
  294. fprintf(stderr, "Processed %"
  295. SCNd64
  296. " seeds in %f seconds\n",
  297. end - start,
  298. elapsed_chkpoint + ((double) (end_time - start_time) / CLOCKS_PER_SEC));
  299. fprintf(stderr, "Found seeds: \n");
  300. for (int i = 0; i < total_seed_count; i++) {
  301. fprintf(stderr, " %"
  302. SCNd64
  303. "\n", found_seeds[i]);
  304. }
  305. boinc_delete_file("kaktpoint.txt");
  306. check(clFlush(command_queue), "clFlush ");
  307. check(clFinish(command_queue), "clFinish ");
  308. check(clReleaseKernel(kernel), "clReleaseKernel ");
  309. check(clReleaseProgram(program), "clReleaseProgram ");
  310. check(clReleaseMemObject(seeds), "clReleaseMemObject (seeds) ");
  311. check(clReleaseMemObject(data), "clReleaseMemObject (data) ");
  312. check(clReleaseCommandQueue(command_queue), "clReleaseCommandQueue ");
  313. check(clReleaseContext(context), "clReleaseContext ");
  314. fflush(stderr);
  315. boinc_end_critical_section();
  316. boinc_finish(0);
  317. }