ethash_cl_miner.cpp 20 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604
  1. /*
  2. This file is part of c-ethash.
  3. c-ethash is free software: you can redistribute it and/or modify
  4. it under the terms of the GNU General Public License as published by
  5. the Free Software Foundation, either version 3 of the License, or
  6. (at your option) any later version.
  7. c-ethash is distributed in the hope that it will be useful,
  8. but WITHOUT ANY WARRANTY; without even the implied warranty of
  9. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  10. GNU General Public License for more details.
  11. You should have received a copy of the GNU General Public License
  12. along with cpp-ethereum. If not, see <http://www.gnu.org/licenses/>.
  13. */
  14. /** @file ethash_cl_miner.cpp
  15. * @author Tim Hughes <tim@twistedfury.com>
  16. * @date 2015
  17. */
  18. #define _CRT_SECURE_NO_WARNINGS
  19. #include <cstdio>
  20. #include <cstdlib>
  21. #include <chrono>
  22. #include <fstream>
  23. #include <iostream>
  24. #include <assert.h>
  25. #include <queue>
  26. #include <vector>
  27. #include <random>
  28. #include <random>
  29. #include <atomic>
  30. #include <sstream>
  31. #include <libethash/util.h>
  32. #include <libethash/ethash.h>
  33. #include <libethash/internal.h>
  34. #include "ethash_cl_miner.h"
  35. #include "ethash_cl_miner_kernel.h"
  36. #define ETHASH_BYTES 32
  37. // workaround lame platforms
  38. #if !CL_VERSION_1_2
  39. #define CL_MAP_WRITE_INVALIDATE_REGION CL_MAP_WRITE
  40. #define CL_MEM_HOST_READ_ONLY 0
  41. #endif
  42. #undef min
  43. #undef max
  44. using namespace std;
  45. unsigned const ethash_cl_miner::c_defaultLocalWorkSize = 64;
  46. unsigned const ethash_cl_miner::c_defaultGlobalWorkSizeMultiplier = 4096; // * CL_DEFAULT_LOCAL_WORK_SIZE
  47. unsigned const ethash_cl_miner::c_defaultMSPerBatch = 0;
  48. // TODO: If at any point we can use libdevcore in here then we should switch to using a LogChannel
  49. #if defined(_WIN32)
  50. extern "C" __declspec(dllimport) void __stdcall OutputDebugStringA(const char* lpOutputString);
  51. static std::atomic_flag s_logSpin = ATOMIC_FLAG_INIT;
  52. #define ETHCL_LOG(_contents) \
  53. do \
  54. { \
  55. std::stringstream ss; \
  56. ss << _contents; \
  57. while (s_logSpin.test_and_set(std::memory_order_acquire)) {} \
  58. OutputDebugStringA(ss.str().c_str()); \
  59. cerr << ss.str() << endl << flush; \
  60. s_logSpin.clear(std::memory_order_release); \
  61. } while (false)
  62. #else
  63. #define ETHCL_LOG(_contents) cout << "[OPENCL]:" << _contents << endl
  64. #endif
  65. // Types of OpenCL devices we are interested in
  66. #define ETHCL_QUERIED_DEVICE_TYPES (CL_DEVICE_TYPE_GPU | CL_DEVICE_TYPE_ACCELERATOR)
  67. static void addDefinition(string& _source, char const* _id, unsigned _value)
  68. {
  69. char buf[256];
  70. sprintf(buf, "#define %s %uu\n", _id, _value);
  71. _source.insert(_source.begin(), buf, buf + strlen(buf));
  72. }
  73. ethash_cl_miner::search_hook::~search_hook() {}
  74. ethash_cl_miner::ethash_cl_miner()
  75. : m_openclOnePointOne()
  76. {
  77. }
  78. ethash_cl_miner::~ethash_cl_miner()
  79. {
  80. finish();
  81. }
  82. std::vector<cl::Platform> ethash_cl_miner::getPlatforms()
  83. {
  84. vector<cl::Platform> platforms;
  85. try
  86. {
  87. cl::Platform::get(&platforms);
  88. }
  89. catch(cl::Error const& err)
  90. {
  91. #if defined(CL_PLATFORM_NOT_FOUND_KHR)
  92. if (err.err() == CL_PLATFORM_NOT_FOUND_KHR)
  93. ETHCL_LOG("No OpenCL platforms found");
  94. else
  95. #endif
  96. throw err;
  97. }
  98. return platforms;
  99. }
  100. string ethash_cl_miner::platform_info(unsigned _platformId, unsigned _deviceId)
  101. {
  102. vector<cl::Platform> platforms = getPlatforms();
  103. if (platforms.empty())
  104. return {};
  105. // get GPU device of the selected platform
  106. unsigned platform_num = min<unsigned>(_platformId, platforms.size() - 1);
  107. vector<cl::Device> devices = getDevices(platforms, _platformId);
  108. if (devices.empty())
  109. {
  110. ETHCL_LOG("No OpenCL devices found.");
  111. return {};
  112. }
  113. // use selected default device
  114. unsigned device_num = min<unsigned>(_deviceId, devices.size() - 1);
  115. cl::Device& device = devices[device_num];
  116. string device_version = device.getInfo<CL_DEVICE_VERSION>();
  117. return "{ \"platform\": \"" + platforms[platform_num].getInfo<CL_PLATFORM_NAME>() + "\", \"device\": \"" + device.getInfo<CL_DEVICE_NAME>() + "\", \"version\": \"" + device_version + "\" }";
  118. }
  119. std::vector<cl::Device> ethash_cl_miner::getDevices(std::vector<cl::Platform> const& _platforms, unsigned _platformId)
  120. {
  121. vector<cl::Device> devices;
  122. unsigned platform_num = min<unsigned>(_platformId, _platforms.size() - 1);
  123. try
  124. {
  125. _platforms[platform_num].getDevices(
  126. s_allowCPU ? CL_DEVICE_TYPE_ALL : ETHCL_QUERIED_DEVICE_TYPES,
  127. &devices
  128. );
  129. }
  130. catch (cl::Error const& err)
  131. {
  132. // if simply no devices found return empty vector
  133. if (err.err() != CL_DEVICE_NOT_FOUND)
  134. throw err;
  135. }
  136. return devices;
  137. }
  138. unsigned ethash_cl_miner::getNumPlatforms()
  139. {
  140. vector<cl::Platform> platforms = getPlatforms();
  141. if (platforms.empty())
  142. return 0;
  143. return platforms.size();
  144. }
  145. unsigned ethash_cl_miner::getNumDevices(unsigned _platformId)
  146. {
  147. vector<cl::Platform> platforms = getPlatforms();
  148. if (platforms.empty())
  149. return 0;
  150. vector<cl::Device> devices = getDevices(platforms, _platformId);
  151. if (devices.empty())
  152. {
  153. ETHCL_LOG("No OpenCL devices found.");
  154. return 0;
  155. }
  156. return devices.size();
  157. }
  158. bool ethash_cl_miner::configureGPU(
  159. unsigned _platformId,
  160. unsigned _localWorkSize,
  161. unsigned _globalWorkSize,
  162. unsigned _msPerBatch,
  163. bool _allowCPU,
  164. unsigned _extraGPUMemory,
  165. uint64_t _currentBlock
  166. )
  167. {
  168. s_workgroupSize = _localWorkSize;
  169. s_initialGlobalWorkSize = _globalWorkSize;
  170. s_msPerBatch = _msPerBatch;
  171. s_allowCPU = _allowCPU;
  172. s_extraRequiredGPUMem = _extraGPUMemory;
  173. // by default let's only consider the DAG of the first epoch
  174. uint64_t dagSize = ethash_get_datasize(_currentBlock);
  175. uint64_t requiredSize = dagSize + _extraGPUMemory;
  176. return searchForAllDevices(_platformId, [&requiredSize](cl::Device const& _device) -> bool
  177. {
  178. cl_ulong result;
  179. _device.getInfo(CL_DEVICE_GLOBAL_MEM_SIZE, &result);
  180. if (result >= requiredSize)
  181. {
  182. ETHCL_LOG(
  183. "Found suitable OpenCL device [" << _device.getInfo<CL_DEVICE_NAME>()
  184. << "] with " << result << " bytes of GPU memory"
  185. );
  186. return true;
  187. }
  188. ETHCL_LOG(
  189. "OpenCL device " << _device.getInfo<CL_DEVICE_NAME>()
  190. << " has insufficient GPU memory." << result <<
  191. " bytes of memory found < " << requiredSize << " bytes of memory required"
  192. );
  193. return false;
  194. }
  195. );
  196. }
  197. bool ethash_cl_miner::s_allowCPU = false;
  198. unsigned ethash_cl_miner::s_extraRequiredGPUMem;
  199. unsigned ethash_cl_miner::s_msPerBatch = ethash_cl_miner::c_defaultMSPerBatch;
  200. unsigned ethash_cl_miner::s_workgroupSize = ethash_cl_miner::c_defaultLocalWorkSize;
  201. unsigned ethash_cl_miner::s_initialGlobalWorkSize = ethash_cl_miner::c_defaultGlobalWorkSizeMultiplier * ethash_cl_miner::c_defaultLocalWorkSize;
  202. bool ethash_cl_miner::searchForAllDevices(function<bool(cl::Device const&)> _callback)
  203. {
  204. vector<cl::Platform> platforms = getPlatforms();
  205. if (platforms.empty())
  206. return false;
  207. for (unsigned i = 0; i < platforms.size(); ++i)
  208. if (searchForAllDevices(i, _callback))
  209. return true;
  210. return false;
  211. }
  212. bool ethash_cl_miner::searchForAllDevices(unsigned _platformId, function<bool(cl::Device const&)> _callback)
  213. {
  214. vector<cl::Platform> platforms = getPlatforms();
  215. if (platforms.empty())
  216. return false;
  217. if (_platformId >= platforms.size())
  218. return false;
  219. vector<cl::Device> devices = getDevices(platforms, _platformId);
  220. for (cl::Device const& device: devices)
  221. if (_callback(device))
  222. return true;
  223. return false;
  224. }
  225. void ethash_cl_miner::doForAllDevices(function<void(cl::Device const&)> _callback)
  226. {
  227. vector<cl::Platform> platforms = getPlatforms();
  228. if (platforms.empty())
  229. return;
  230. for (unsigned i = 0; i < platforms.size(); ++i)
  231. doForAllDevices(i, _callback);
  232. }
  233. void ethash_cl_miner::doForAllDevices(unsigned _platformId, function<void(cl::Device const&)> _callback)
  234. {
  235. vector<cl::Platform> platforms = getPlatforms();
  236. if (platforms.empty())
  237. return;
  238. if (_platformId >= platforms.size())
  239. return;
  240. vector<cl::Device> devices = getDevices(platforms, _platformId);
  241. for (cl::Device const& device: devices)
  242. _callback(device);
  243. }
  244. void ethash_cl_miner::listDevices()
  245. {
  246. string outString ="\nListing OpenCL devices.\nFORMAT: [deviceID] deviceName\n";
  247. unsigned int i = 0;
  248. doForAllDevices([&outString, &i](cl::Device const _device)
  249. {
  250. outString += "[" + to_string(i) + "] " + _device.getInfo<CL_DEVICE_NAME>() + "\n";
  251. outString += "\tCL_DEVICE_TYPE: ";
  252. switch (_device.getInfo<CL_DEVICE_TYPE>())
  253. {
  254. case CL_DEVICE_TYPE_CPU:
  255. outString += "CPU\n";
  256. break;
  257. case CL_DEVICE_TYPE_GPU:
  258. outString += "GPU\n";
  259. break;
  260. case CL_DEVICE_TYPE_ACCELERATOR:
  261. outString += "ACCELERATOR\n";
  262. break;
  263. default:
  264. outString += "DEFAULT\n";
  265. break;
  266. }
  267. outString += "\tCL_DEVICE_GLOBAL_MEM_SIZE: " + to_string(_device.getInfo<CL_DEVICE_GLOBAL_MEM_SIZE>()) + "\n";
  268. outString += "\tCL_DEVICE_MAX_MEM_ALLOC_SIZE: " + to_string(_device.getInfo<CL_DEVICE_MAX_MEM_ALLOC_SIZE>()) + "\n";
  269. outString += "\tCL_DEVICE_MAX_WORK_GROUP_SIZE: " + to_string(_device.getInfo<CL_DEVICE_MAX_WORK_GROUP_SIZE>()) + "\n";
  270. ++i;
  271. }
  272. );
  273. ETHCL_LOG(outString);
  274. }
  275. void ethash_cl_miner::finish()
  276. {
  277. if (m_queue())
  278. m_queue.finish();
  279. }
  280. bool ethash_cl_miner::init(
  281. uint8_t const* _dag,
  282. uint64_t _dagSize,
  283. unsigned _platformId,
  284. unsigned _deviceId
  285. )
  286. {
  287. // get all platforms
  288. try
  289. {
  290. vector<cl::Platform> platforms = getPlatforms();
  291. if (platforms.empty())
  292. return false;
  293. // use selected platform
  294. _platformId = min<unsigned>(_platformId, platforms.size() - 1);
  295. ETHCL_LOG("Using platform: " << platforms[_platformId].getInfo<CL_PLATFORM_NAME>().c_str());
  296. // get GPU device of the default platform
  297. vector<cl::Device> devices = getDevices(platforms, _platformId);
  298. if (devices.empty())
  299. {
  300. ETHCL_LOG("No OpenCL devices found.");
  301. return false;
  302. }
  303. // use selected device
  304. cl::Device& device = devices[min<unsigned>(_deviceId, devices.size() - 1)];
  305. string device_version = device.getInfo<CL_DEVICE_VERSION>();
  306. ETHCL_LOG("Using device: " << device.getInfo<CL_DEVICE_NAME>().c_str() << "(" << device_version.c_str() << ")");
  307. if (strncmp("OpenCL 1.0", device_version.c_str(), 10) == 0)
  308. {
  309. ETHCL_LOG("OpenCL 1.0 is not supported.");
  310. return false;
  311. }
  312. if (strncmp("OpenCL 1.1", device_version.c_str(), 10) == 0)
  313. m_openclOnePointOne = true;
  314. // create context
  315. m_context = cl::Context(vector<cl::Device>(&device, &device + 1));
  316. m_queue = cl::CommandQueue(m_context, device);
  317. // make sure that global work size is evenly divisible by the local workgroup size
  318. m_globalWorkSize = s_initialGlobalWorkSize;
  319. if (m_globalWorkSize % s_workgroupSize != 0)
  320. m_globalWorkSize = ((m_globalWorkSize / s_workgroupSize) + 1) * s_workgroupSize;
  321. // remember the device's address bits
  322. m_deviceBits = device.getInfo<CL_DEVICE_ADDRESS_BITS>();
  323. // make sure first step of global work size adjustment is large enough
  324. m_stepWorkSizeAdjust = pow(2, m_deviceBits / 2 + 1);
  325. // patch source code
  326. // note: ETHASH_CL_MINER_KERNEL is simply ethash_cl_miner_kernel.cl compiled
  327. // into a byte array by bin2h.cmake. There is no need to load the file by hand in runtime
  328. string code(ETHASH_CL_MINER_KERNEL, ETHASH_CL_MINER_KERNEL + ETHASH_CL_MINER_KERNEL_SIZE);
  329. addDefinition(code, "GROUP_SIZE", s_workgroupSize);
  330. addDefinition(code, "DAG_SIZE", (unsigned)(_dagSize / ETHASH_MIX_BYTES));
  331. addDefinition(code, "ACCESSES", ETHASH_ACCESSES);
  332. addDefinition(code, "MAX_OUTPUTS", c_maxSearchResults);
  333. //debugf("%s", code.c_str());
  334. // create miner OpenCL program
  335. cl::Program::Sources sources;
  336. sources.push_back({ code.c_str(), code.size() });
  337. cl::Program program(m_context, sources);
  338. try
  339. {
  340. program.build({ device });
  341. ETHCL_LOG("Printing program log");
  342. ETHCL_LOG(program.getBuildInfo<CL_PROGRAM_BUILD_LOG>(device).c_str());
  343. }
  344. catch (cl::Error const&)
  345. {
  346. ETHCL_LOG(program.getBuildInfo<CL_PROGRAM_BUILD_LOG>(device).c_str());
  347. return false;
  348. }
  349. // create buffer for dag
  350. try
  351. {
  352. m_dagChunksCount = 1;
  353. ETHCL_LOG("Creating one big buffer for the DAG");
  354. m_dagChunks.push_back(cl::Buffer(m_context, CL_MEM_READ_ONLY, _dagSize));
  355. ETHCL_LOG("Loading single big chunk kernels");
  356. m_hashKernel = cl::Kernel(program, "ethash_hash");
  357. m_searchKernel = cl::Kernel(program, "ethash_search");
  358. ETHCL_LOG("Mapping one big chunk.");
  359. m_queue.enqueueWriteBuffer(m_dagChunks[0], CL_TRUE, 0, _dagSize, _dag);
  360. }
  361. catch (cl::Error const& err)
  362. {
  363. ETHCL_LOG("Allocating/mapping single buffer failed with: " << err.what() << "(" << err.err() << "). GPU can't allocate the DAG in a single chunk. Bailing.");
  364. return false;
  365. #if 0 // Disabling chunking for release since it seems not to work. Never manages to mine a block. TODO: Fix when time is found.
  366. int errCode = err.err();
  367. if (errCode != CL_INVALID_BUFFER_SIZE || errCode != CL_MEM_OBJECT_ALLOCATION_FAILURE)
  368. ETHCL_LOG("Allocating/mapping single buffer failed with: " << err.what() << "(" << errCode << ")");
  369. cl_ulong result;
  370. // if we fail midway on the try above make sure we start clean
  371. m_dagChunks.clear();
  372. device.getInfo(CL_DEVICE_MAX_MEM_ALLOC_SIZE, &result);
  373. ETHCL_LOG(
  374. "Failed to allocate 1 big chunk. Max allocateable memory is "
  375. << result << ". Trying to allocate 4 chunks."
  376. );
  377. // The OpenCL kernel has a hard coded number of 4 chunks at the moment
  378. m_dagChunksCount = 4;
  379. for (unsigned i = 0; i < m_dagChunksCount; i++)
  380. {
  381. // TODO Note: If we ever change to _dagChunksNum other than 4, then the size would need recalculation
  382. ETHCL_LOG("Creating buffer for chunk " << i);
  383. m_dagChunks.push_back(cl::Buffer(
  384. m_context,
  385. CL_MEM_READ_ONLY,
  386. (i == 3) ? (_dagSize - 3 * ((_dagSize >> 9) << 7)) : (_dagSize >> 9) << 7
  387. ));
  388. }
  389. ETHCL_LOG("Loading chunk kernels");
  390. m_hashKernel = cl::Kernel(program, "ethash_hash_chunks");
  391. m_searchKernel = cl::Kernel(program, "ethash_search_chunks");
  392. // TODO Note: If we ever change to _dagChunksNum other than 4, then the size would need recalculation
  393. void* dag_ptr[4];
  394. for (unsigned i = 0; i < m_dagChunksCount; i++)
  395. {
  396. ETHCL_LOG("Mapping chunk " << i);
  397. dag_ptr[i] = m_queue.enqueueMapBuffer(m_dagChunks[i], true, m_openclOnePointOne ? CL_MAP_WRITE : CL_MAP_WRITE_INVALIDATE_REGION, 0, (i == 3) ? (_dagSize - 3 * ((_dagSize >> 9) << 7)) : (_dagSize >> 9) << 7);
  398. }
  399. for (unsigned i = 0; i < m_dagChunksCount; i++)
  400. {
  401. memcpy(dag_ptr[i], (char *)_dag + i*((_dagSize >> 9) << 7), (i == 3) ? (_dagSize - 3 * ((_dagSize >> 9) << 7)) : (_dagSize >> 9) << 7);
  402. m_queue.enqueueUnmapMemObject(m_dagChunks[i], dag_ptr[i]);
  403. }
  404. #endif
  405. }
  406. // create buffer for header
  407. ETHCL_LOG("Creating buffer for header.");
  408. m_header = cl::Buffer(m_context, CL_MEM_READ_ONLY, 32);
  409. // create mining buffers
  410. for (unsigned i = 0; i != c_bufferCount; ++i)
  411. {
  412. ETHCL_LOG("Creating mining buffer " << i);
  413. m_hashBuffer[i] = cl::Buffer(m_context, CL_MEM_WRITE_ONLY | (!m_openclOnePointOne ? CL_MEM_HOST_READ_ONLY : 0), 32 * c_hashBatchSize);
  414. m_searchBuffer[i] = cl::Buffer(m_context, CL_MEM_WRITE_ONLY, (c_maxSearchResults + 1) * sizeof(uint32_t));
  415. }
  416. }
  417. catch (cl::Error const& err)
  418. {
  419. ETHCL_LOG(err.what() << "(" << err.err() << ")");
  420. return false;
  421. }
  422. return true;
  423. }
  424. void ethash_cl_miner::search(uint8_t const* header, uint64_t target, search_hook& hook)
  425. {
  426. try
  427. {
  428. struct pending_batch
  429. {
  430. uint64_t start_nonce;
  431. unsigned buf;
  432. };
  433. queue<pending_batch> pending;
  434. // this can't be a static because in MacOSX OpenCL implementation a segfault occurs when a static is passed to OpenCL functions
  435. uint32_t const c_zero = 0;
  436. // update header constant buffer
  437. m_queue.enqueueWriteBuffer(m_header, false, 0, 32, header);
  438. for (unsigned i = 0; i != c_bufferCount; ++i)
  439. m_queue.enqueueWriteBuffer(m_searchBuffer[i], false, 0, 4, &c_zero);
  440. #if CL_VERSION_1_2 && 0
  441. cl::Event pre_return_event;
  442. if (!m_opencl_1_1)
  443. m_queue.enqueueBarrierWithWaitList(NULL, &pre_return_event);
  444. else
  445. #endif
  446. m_queue.finish();
  447. unsigned argPos = 2;
  448. m_searchKernel.setArg(1, m_header);
  449. for (unsigned i = 0; i < m_dagChunksCount; ++i, ++argPos)
  450. m_searchKernel.setArg(argPos, m_dagChunks[i]);
  451. // pass these to stop the compiler unrolling the loops
  452. m_searchKernel.setArg(argPos + 1, target);
  453. m_searchKernel.setArg(argPos + 2, ~0u);
  454. unsigned buf = 0;
  455. random_device engine;
  456. uint64_t start_nonce = uniform_int_distribution<uint64_t>()(engine);
  457. for (;; start_nonce += m_globalWorkSize)
  458. {
  459. auto t = chrono::high_resolution_clock::now();
  460. // supply output buffer to kernel
  461. m_searchKernel.setArg(0, m_searchBuffer[buf]);
  462. if (m_dagChunksCount == 1)
  463. m_searchKernel.setArg(3, start_nonce);
  464. else
  465. m_searchKernel.setArg(6, start_nonce);
  466. // execute it!
  467. m_queue.enqueueNDRangeKernel(m_searchKernel, cl::NullRange, m_globalWorkSize, s_workgroupSize);
  468. pending.push({ start_nonce, buf });
  469. buf = (buf + 1) % c_bufferCount;
  470. // read results
  471. if (pending.size() == c_bufferCount)
  472. {
  473. pending_batch const& batch = pending.front();
  474. // could use pinned host pointer instead
  475. uint32_t* results = (uint32_t*)m_queue.enqueueMapBuffer(m_searchBuffer[batch.buf], true, CL_MAP_READ, 0, (1 + c_maxSearchResults) * sizeof(uint32_t));
  476. unsigned num_found = min<unsigned>(results[0], c_maxSearchResults);
  477. uint64_t nonces[c_maxSearchResults];
  478. for (unsigned i = 0; i != num_found; ++i)
  479. nonces[i] = batch.start_nonce + results[i + 1];
  480. m_queue.enqueueUnmapMemObject(m_searchBuffer[batch.buf], results);
  481. bool exit = num_found && hook.found(nonces, num_found);
  482. exit |= hook.searched(batch.start_nonce, m_globalWorkSize); // always report searched before exit
  483. if (exit)
  484. break;
  485. // reset search buffer if we're still going
  486. if (num_found)
  487. m_queue.enqueueWriteBuffer(m_searchBuffer[batch.buf], true, 0, 4, &c_zero);
  488. pending.pop();
  489. }
  490. // adjust global work size depending on last search time
  491. if (s_msPerBatch)
  492. {
  493. // Global work size must be:
  494. // - less than or equal to 2 ^ DEVICE_BITS - 1
  495. // - divisible by lobal work size (workgroup size)
  496. auto d = chrono::duration_cast<chrono::milliseconds>(chrono::high_resolution_clock::now() - t);
  497. if (d != chrono::milliseconds(0)) // if duration is zero, we did not get in the actual searh/or search not finished
  498. {
  499. if (d > chrono::milliseconds(s_msPerBatch * 10 / 9))
  500. {
  501. // Divide the step by 2 when adjustment way change
  502. if (m_wayWorkSizeAdjust > -1)
  503. m_stepWorkSizeAdjust = max<unsigned>(1, m_stepWorkSizeAdjust / 2);
  504. m_wayWorkSizeAdjust = -1;
  505. // cerr << "m_stepWorkSizeAdjust: " << m_stepWorkSizeAdjust << ", m_wayWorkSizeAdjust: " << m_wayWorkSizeAdjust << endl;
  506. // cerr << "Batch of " << m_globalWorkSize << " took " << chrono::duration_cast<chrono::milliseconds>(d).count() << " ms, >> " << s_msPerBatch << " ms." << endl;
  507. m_globalWorkSize = max<unsigned>(128, m_globalWorkSize - m_stepWorkSizeAdjust);
  508. // cerr << "New global work size" << m_globalWorkSize << endl;
  509. }
  510. else if (d < chrono::milliseconds(s_msPerBatch * 9 / 10))
  511. {
  512. // Divide the step by 2 when adjustment way change
  513. if (m_wayWorkSizeAdjust < 1)
  514. m_stepWorkSizeAdjust = max<unsigned>(1, m_stepWorkSizeAdjust / 2);
  515. m_wayWorkSizeAdjust = 1;
  516. // cerr << "m_stepWorkSizeAdjust: " << m_stepWorkSizeAdjust << ", m_wayWorkSizeAdjust: " << m_wayWorkSizeAdjust << endl;
  517. // cerr << "Batch of " << m_globalWorkSize << " took " << chrono::duration_cast<chrono::milliseconds>(d).count() << " ms, << " << s_msPerBatch << " ms." << endl;
  518. m_globalWorkSize = min<unsigned>(pow(2, m_deviceBits) - 1, m_globalWorkSize + m_stepWorkSizeAdjust);
  519. // Global work size should never be less than the workgroup size
  520. m_globalWorkSize = max<unsigned>(s_workgroupSize, m_globalWorkSize);
  521. // cerr << "New global work size" << m_globalWorkSize << endl;
  522. }
  523. }
  524. }
  525. }
  526. // not safe to return until this is ready
  527. #if CL_VERSION_1_2 && 0
  528. if (!m_opencl_1_1)
  529. pre_return_event.wait();
  530. #endif
  531. }
  532. catch (cl::Error const& err)
  533. {
  534. ETHCL_LOG(err.what() << "(" << err.err() << ")");
  535. }
  536. }