CLMiner.cpp 29 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724
  1. /* Copyright (C) 1883 Thomas Edison - All Rights Reserved
  2. * You may use, distribute and modify this code under the
  3. * terms of the GPLv3 license, which unfortunately won't be
  4. * written for another century.
  5. *
  6. * You should have received a copy of the LICENSE file with
  7. * this file.
  8. */
  9. #include <boost/dll.hpp>
  10. #include <ethash/ethash.hpp>
  11. #include <libeth/Farm.h>
  12. #include "CLMiner.h"
  13. #include "ethash.h"
  14. using namespace dev;
  15. using namespace eth;
  16. namespace dev {
  17. namespace eth {
  18. const size_t c_maxSearchResults = 4;
  19. /**
  20. * Returns the name of a numerical cl_int error
  21. * Takes constants from CL/cl.h and returns them in a readable format
  22. */
  23. static const char* strClError(cl_int err) {
  24. switch (err) {
  25. case CL_SUCCESS:
  26. return "CL_SUCCESS";
  27. case CL_DEVICE_NOT_FOUND:
  28. return "CL_DEVICE_NOT_FOUND";
  29. case CL_DEVICE_NOT_AVAILABLE:
  30. return "CL_DEVICE_NOT_AVAILABLE";
  31. case CL_COMPILER_NOT_AVAILABLE:
  32. return "CL_COMPILER_NOT_AVAILABLE";
  33. case CL_MEM_OBJECT_ALLOCATION_FAILURE:
  34. return "CL_MEM_OBJECT_ALLOCATION_FAILURE";
  35. case CL_OUT_OF_RESOURCES:
  36. return "CL_OUT_OF_RESOURCES";
  37. case CL_OUT_OF_HOST_MEMORY:
  38. return "CL_OUT_OF_HOST_MEMORY";
  39. case CL_PROFILING_INFO_NOT_AVAILABLE:
  40. return "CL_PROFILING_INFO_NOT_AVAILABLE";
  41. case CL_MEM_COPY_OVERLAP:
  42. return "CL_MEM_COPY_OVERLAP";
  43. case CL_IMAGE_FORMAT_MISMATCH:
  44. return "CL_IMAGE_FORMAT_MISMATCH";
  45. case CL_IMAGE_FORMAT_NOT_SUPPORTED:
  46. return "CL_IMAGE_FORMAT_NOT_SUPPORTED";
  47. case CL_BUILD_PROGRAM_FAILURE:
  48. return "CL_BUILD_PROGRAM_FAILURE";
  49. case CL_MAP_FAILURE:
  50. return "CL_MAP_FAILURE";
  51. case CL_MISALIGNED_SUB_BUFFER_OFFSET:
  52. return "CL_MISALIGNED_SUB_BUFFER_OFFSET";
  53. case CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST:
  54. return "CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST";
  55. #ifdef CL_VERSION_1_2
  56. case CL_COMPILE_PROGRAM_FAILURE:
  57. return "CL_COMPILE_PROGRAM_FAILURE";
  58. case CL_LINKER_NOT_AVAILABLE:
  59. return "CL_LINKER_NOT_AVAILABLE";
  60. case CL_LINK_PROGRAM_FAILURE:
  61. return "CL_LINK_PROGRAM_FAILURE";
  62. case CL_DEVICE_PARTITION_FAILED:
  63. return "CL_DEVICE_PARTITION_FAILED";
  64. case CL_KERNEL_ARG_INFO_NOT_AVAILABLE:
  65. return "CL_KERNEL_ARG_INFO_NOT_AVAILABLE";
  66. #endif // CL_VERSION_1_2
  67. case CL_INVALID_VALUE:
  68. return "CL_INVALID_VALUE";
  69. case CL_INVALID_DEVICE_TYPE:
  70. return "CL_INVALID_DEVICE_TYPE";
  71. case CL_INVALID_PLATFORM:
  72. return "CL_INVALID_PLATFORM";
  73. case CL_INVALID_DEVICE:
  74. return "CL_INVALID_DEVICE";
  75. case CL_INVALID_CONTEXT:
  76. return "CL_INVALID_CONTEXT";
  77. case CL_INVALID_QUEUE_PROPERTIES:
  78. return "CL_INVALID_QUEUE_PROPERTIES";
  79. case CL_INVALID_COMMAND_QUEUE:
  80. return "CL_INVALID_COMMAND_QUEUE";
  81. case CL_INVALID_HOST_PTR:
  82. return "CL_INVALID_HOST_PTR";
  83. case CL_INVALID_MEM_OBJECT:
  84. return "CL_INVALID_MEM_OBJECT";
  85. case CL_INVALID_IMAGE_FORMAT_DESCRIPTOR:
  86. return "CL_INVALID_IMAGE_FORMAT_DESCRIPTOR";
  87. case CL_INVALID_IMAGE_SIZE:
  88. return "CL_INVALID_IMAGE_SIZE";
  89. case CL_INVALID_SAMPLER:
  90. return "CL_INVALID_SAMPLER";
  91. case CL_INVALID_BINARY:
  92. return "CL_INVALID_BINARY";
  93. case CL_INVALID_BUILD_OPTIONS:
  94. return "CL_INVALID_BUILD_OPTIONS";
  95. case CL_INVALID_PROGRAM:
  96. return "CL_INVALID_PROGRAM";
  97. case CL_INVALID_PROGRAM_EXECUTABLE:
  98. return "CL_INVALID_PROGRAM_EXECUTABLE";
  99. case CL_INVALID_KERNEL_NAME:
  100. return "CL_INVALID_KERNEL_NAME";
  101. case CL_INVALID_KERNEL_DEFINITION:
  102. return "CL_INVALID_KERNEL_DEFINITION";
  103. case CL_INVALID_KERNEL:
  104. return "CL_INVALID_KERNEL";
  105. case CL_INVALID_ARG_INDEX:
  106. return "CL_INVALID_ARG_INDEX";
  107. case CL_INVALID_ARG_VALUE:
  108. return "CL_INVALID_ARG_VALUE";
  109. case CL_INVALID_ARG_SIZE:
  110. return "CL_INVALID_ARG_SIZE";
  111. case CL_INVALID_KERNEL_ARGS:
  112. return "CL_INVALID_KERNEL_ARGS";
  113. case CL_INVALID_WORK_DIMENSION:
  114. return "CL_INVALID_WORK_DIMENSION";
  115. case CL_INVALID_WORK_GROUP_SIZE:
  116. return "CL_INVALID_WORK_GROUP_SIZE";
  117. case CL_INVALID_WORK_ITEM_SIZE:
  118. return "CL_INVALID_WORK_ITEM_SIZE";
  119. case CL_INVALID_GLOBAL_OFFSET:
  120. return "CL_INVALID_GLOBAL_OFFSET";
  121. case CL_INVALID_EVENT_WAIT_LIST:
  122. return "CL_INVALID_EVENT_WAIT_LIST";
  123. case CL_INVALID_EVENT:
  124. return "CL_INVALID_EVENT";
  125. case CL_INVALID_OPERATION:
  126. return "CL_INVALID_OPERATION";
  127. case CL_INVALID_GL_OBJECT:
  128. return "CL_INVALID_GL_OBJECT";
  129. case CL_INVALID_BUFFER_SIZE:
  130. return "CL_INVALID_BUFFER_SIZE";
  131. case CL_INVALID_MIP_LEVEL:
  132. return "CL_INVALID_MIP_LEVEL";
  133. case CL_INVALID_GLOBAL_WORK_SIZE:
  134. return "CL_INVALID_GLOBAL_WORK_SIZE";
  135. case CL_INVALID_PROPERTY:
  136. return "CL_INVALID_PROPERTY";
  137. #ifdef CL_VERSION_1_2
  138. case CL_INVALID_IMAGE_DESCRIPTOR:
  139. return "CL_INVALID_IMAGE_DESCRIPTOR";
  140. case CL_INVALID_COMPILER_OPTIONS:
  141. return "CL_INVALID_COMPILER_OPTIONS";
  142. case CL_INVALID_LINKER_OPTIONS:
  143. return "CL_INVALID_LINKER_OPTIONS";
  144. case CL_INVALID_DEVICE_PARTITION_COUNT:
  145. return "CL_INVALID_DEVICE_PARTITION_COUNT";
  146. #endif // CL_VERSION_1_2
  147. #ifdef CL_VERSION_2_0
  148. case CL_INVALID_PIPE_SIZE:
  149. return "CL_INVALID_PIPE_SIZE";
  150. case CL_INVALID_DEVICE_QUEUE:
  151. return "CL_INVALID_DEVICE_QUEUE";
  152. #endif // CL_VERSION_2_0
  153. #ifdef CL_VERSION_2_2
  154. case CL_INVALID_SPEC_ID:
  155. return "CL_INVALID_SPEC_ID";
  156. case CL_MAX_SIZE_RESTRICTION_EXCEEDED:
  157. return "CL_MAX_SIZE_RESTRICTION_EXCEEDED";
  158. #endif // CL_VERSION_2_2
  159. }
  160. return "Unknown CL error encountered";
  161. }
  162. /**
  163. * Prints cl::Errors in a uniform way
  164. * @param msg text prepending the error message
  165. * @param clerr cl:Error object
  166. *
  167. * Prints errors in the format:
  168. * msg: what(), string err() (numeric err())
  169. */
  170. static std::string ethCLErrorHelper(const char* msg, cl::Error const& clerr) {
  171. std::ostringstream osstream;
  172. osstream << msg << ": " << clerr.what() << ": " << strClError(clerr.err()) << " (" << clerr.err() << ")";
  173. return osstream.str();
  174. }
  175. namespace {
  176. void addDefinition(std::string& _source, char const* _id, unsigned _value) {
  177. char buf[256];
  178. sprintf(buf, "#define %s %uu\n", _id, _value);
  179. _source.insert(_source.begin(), buf, buf + strlen(buf));
  180. }
  181. std::vector<cl::Platform> getPlatforms() {
  182. std::vector<cl::Platform> platforms;
  183. try {
  184. cl::Platform::get(&platforms);
  185. } catch (cl::Error const& err) {
  186. #if defined(CL_PLATFORM_NOT_FOUND_KHR)
  187. if (err.err() == CL_PLATFORM_NOT_FOUND_KHR)
  188. std::cerr << "No OpenCL platforms found\n";
  189. else
  190. #endif
  191. std::cerr << "OpenCL error : " << err.what();
  192. }
  193. return platforms;
  194. }
  195. std::vector<cl::Device> getDevices(std::vector<cl::Platform> const& _platforms, unsigned _platformId) {
  196. std::vector<cl::Device> devices;
  197. size_t platform_num = std::min<size_t>(_platformId, _platforms.size() - 1);
  198. try {
  199. _platforms[platform_num].getDevices(CL_DEVICE_TYPE_GPU | CL_DEVICE_TYPE_ACCELERATOR, &devices);
  200. } catch (cl::Error const& err) {
  201. // if simply no devices found return empty vector
  202. if (err.err() != CL_DEVICE_NOT_FOUND)
  203. throw err;
  204. }
  205. return devices;
  206. }
  207. } // namespace
  208. } // namespace eth
  209. } // namespace dev
  210. CLMiner::CLMiner(unsigned _index, DeviceDescriptor& _device) : Miner("cl-", _index) {
  211. m_deviceDescriptor = _device;
  212. m_block_multiple = 200000;
  213. }
  214. CLMiner::~CLMiner() {
  215. stopWorking();
  216. kick_miner();
  217. }
  218. // NOTE: The following struct must match the one defined in
  219. // ethash.cl
  220. struct SearchResults {
  221. uint32_t count;
  222. uint32_t hashCount;
  223. uint32_t abort;
  224. uint32_t gid[c_maxSearchResults];
  225. };
  226. const static uint32_t zerox3[3] = {0, 0, 0};
  227. void CLMiner::workLoop() {
  228. // Memory for zero-ing buffers. Cannot be static or const because crashes on macOS.
  229. uint64_t startNonce = 0;
  230. // The work package currently processed by GPU.
  231. WorkPackage current;
  232. current.header = h256();
  233. if (!initDevice())
  234. return;
  235. try {
  236. while (!shouldStop()) {
  237. // Read results.
  238. SearchResults results;
  239. if (m_queue) {
  240. // synchronize and read the results.
  241. m_queue->enqueueReadBuffer(*m_searchBuffer, CL_TRUE, 0, sizeof(results), (void*)&results);
  242. // clear the solution count, hash count, and abort flag
  243. m_queue->enqueueWriteBuffer(*m_searchBuffer, CL_FALSE, 0, sizeof(zerox3), zerox3);
  244. } else
  245. results.count = 0;
  246. // Wait for work or 3 seconds (whichever the first)
  247. WorkPackage w = work();
  248. if (!w) {
  249. m_hung_miner.store(false);
  250. std::unique_lock<std::mutex> l(miner_work_mutex);
  251. m_new_work_signal.wait_for(l, std::chrono::seconds(3));
  252. continue;
  253. }
  254. if (current.header != w.header) {
  255. if (current.epoch != w.epoch) {
  256. setEpoch(w);
  257. if (g_seqDAG)
  258. g_seqDAGMutex.lock();
  259. bool b = initEpoch();
  260. if (g_seqDAG)
  261. g_seqDAGMutex.unlock();
  262. if (!b)
  263. break;
  264. freeCache();
  265. w = work();
  266. }
  267. startNonce = w.startNonce;
  268. // Update header constant buffer.
  269. m_queue->enqueueWriteBuffer(*m_header, CL_FALSE, 0, w.header.size, w.header.data());
  270. // zero the result count
  271. m_queue->enqueueWriteBuffer(*m_searchBuffer, CL_FALSE, offsetof(SearchResults, count), sizeof(zerox3),
  272. zerox3);
  273. m_searchKernel.setArg(6, (uint64_t)(u64)((u256)w.boundary >> 192));
  274. #ifdef DEV_BUILD
  275. if (g_logOptions & LOG_SWITCH)
  276. cnote << "Switch time: "
  277. << chrono::duration_cast<chrono::microseconds>(chrono::steady_clock::now() -
  278. m_workSwitchStart)
  279. .count()
  280. << " us.";
  281. #endif
  282. }
  283. float hr = RetrieveHashRate();
  284. if (hr > 1e7)
  285. m_block_multiple = uint32_t(hr * CL_TARGET_BATCH_TIME / m_deviceDescriptor.clGroupSize);
  286. uint32_t batch_blocks = m_deviceDescriptor.clGroupSize * m_block_multiple;
  287. // Run the kernel.
  288. m_searchKernel.setArg(5, startNonce);
  289. m_hung_miner.store(false);
  290. m_queue->enqueueNDRangeKernel(m_searchKernel, cl::NullRange, batch_blocks, m_deviceDescriptor.clGroupSize);
  291. // Report results while the kernel is running.
  292. if (results.count > c_maxSearchResults)
  293. results.count = c_maxSearchResults;
  294. for (uint32_t i = 0; i < results.count; i++) {
  295. uint64_t nonce = current.startNonce + results.gid[i];
  296. Farm::f().submitProof(Solution{nonce, h256(), current, std::chrono::steady_clock::now(), m_index});
  297. ReportSolution(current.header, nonce);
  298. }
  299. current = w; // kernel now processing newest work
  300. current.startNonce = startNonce;
  301. // Increase start nonce for following kernel execution.
  302. startNonce += batch_blocks;
  303. // Report hash count
  304. updateHashRate(m_deviceDescriptor.clGroupSize, results.hashCount);
  305. }
  306. if (m_queue)
  307. m_queue->finish();
  308. free_buffers();
  309. m_abortMutex.unlock();
  310. } catch (cl::Error const& _e) {
  311. std::string _what = ethCLErrorHelper("OpenCL Error", _e);
  312. free_buffers();
  313. m_abortMutex.unlock();
  314. throw std::runtime_error(_what);
  315. }
  316. }
  317. void CLMiner::kick_miner() {
  318. m_abortMutex.lock();
  319. // Memory for abort Cannot be static because crashes on macOS.
  320. if (m_abortqueue) {
  321. static uint32_t one = 1;
  322. m_abortqueue->enqueueWriteBuffer(*m_searchBuffer, CL_FALSE, offsetof(SearchResults, abort), sizeof(one), &one);
  323. }
  324. m_abortMutex.unlock();
  325. m_new_work_signal.notify_one();
  326. }
  327. void CLMiner::enumDevices(minerMap& _DevicesCollection) {
  328. // Load available platforms
  329. std::vector<cl::Platform> platforms = getPlatforms();
  330. if (platforms.empty())
  331. return;
  332. unsigned int dIdx = 0;
  333. for (unsigned int pIdx = 0; pIdx < platforms.size(); pIdx++) {
  334. std::string platformName = platforms.at(pIdx).getInfo<CL_PLATFORM_NAME>();
  335. ClPlatformTypeEnum platformType = ClPlatformTypeEnum::Unknown;
  336. if (platformName == "AMD Accelerated Parallel Processing")
  337. platformType = ClPlatformTypeEnum::Amd;
  338. else if (platformName == "Clover" || platformName == "Intel Gen OCL Driver")
  339. platformType = ClPlatformTypeEnum::Clover;
  340. else if (platformName == "NVIDIA CUDA")
  341. platformType = ClPlatformTypeEnum::Nvidia;
  342. else if (platformName.find("Intel") != std::string::npos)
  343. platformType = ClPlatformTypeEnum::Intel;
  344. else
  345. continue;
  346. std::string platformVersion = platforms.at(pIdx).getInfo<CL_PLATFORM_VERSION>();
  347. unsigned int platformVersionMajor = stoi(platformVersion.substr(7, 1));
  348. unsigned int platformVersionMinor = stoi(platformVersion.substr(9, 1));
  349. dIdx = 0;
  350. std::vector<cl::Device> devices = getDevices(platforms, pIdx);
  351. for (auto const& device : devices) {
  352. DeviceTypeEnum clDeviceType = DeviceTypeEnum::Unknown;
  353. cl_device_type detectedType = device.getInfo<CL_DEVICE_TYPE>();
  354. if (detectedType == CL_DEVICE_TYPE_GPU)
  355. clDeviceType = DeviceTypeEnum::Gpu;
  356. else if (detectedType == CL_DEVICE_TYPE_CPU)
  357. clDeviceType = DeviceTypeEnum::Cpu;
  358. else if (detectedType == CL_DEVICE_TYPE_ACCELERATOR)
  359. clDeviceType = DeviceTypeEnum::Accelerator;
  360. else
  361. continue;
  362. std::string uniqueId;
  363. DeviceDescriptor deviceDescriptor;
  364. if (clDeviceType == DeviceTypeEnum::Gpu && platformType == ClPlatformTypeEnum::Nvidia) {
  365. cl_int bus_id, slot_id;
  366. if (clGetDeviceInfo(device.get(), 0x4008 /*CL_DEVICE_PCI_BUS_ID_NV*/, sizeof(bus_id), &bus_id, NULL) ==
  367. CL_SUCCESS &&
  368. clGetDeviceInfo(device.get(), 0x4009 /*CL_DEVICE_PCI_SLOT_ID_NV*/, sizeof(slot_id), &slot_id,
  369. NULL) == CL_SUCCESS) {
  370. std::ostringstream s;
  371. s << "0000:" << std::setfill('0') << std::setw(2) << std::hex << bus_id << ":" << std::setw(2)
  372. << (unsigned int)(slot_id >> 3) << "." << (unsigned int)(slot_id & 0x7);
  373. uniqueId = s.str();
  374. }
  375. } else if (clDeviceType == DeviceTypeEnum::Gpu &&
  376. (platformType == ClPlatformTypeEnum::Amd || platformType == ClPlatformTypeEnum::Clover)) {
  377. cl_char t[24];
  378. if (clGetDeviceInfo(device.get(), 0x4037 /*CL_DEVICE_TOPOLOGY_AMD*/, sizeof(t), &t, NULL) ==
  379. CL_SUCCESS) {
  380. // NOTE" Till we can upgrade to opencl 2.x, there's no way to determine
  381. // the bus domain id. So we plug in a 0!
  382. std::ostringstream s;
  383. s << "0000:" << std::setfill('0') << std::setw(2) << std::hex << (unsigned int)(t[21]) << ":" << std::setw(2)
  384. << (unsigned int)(t[22]) << "." << (unsigned int)(t[23]);
  385. uniqueId = s.str();
  386. }
  387. } else if (clDeviceType == DeviceTypeEnum::Gpu && platformType == ClPlatformTypeEnum::Intel) {
  388. std::ostringstream s;
  389. s << "Intel GPU " << pIdx << "." << dIdx;
  390. uniqueId = s.str();
  391. } else if (clDeviceType == DeviceTypeEnum::Cpu) {
  392. std::ostringstream s;
  393. s << "CPU " << std::setfill('0') << std::setw(2) << std::hex << (pIdx + dIdx);
  394. uniqueId = s.str();
  395. } else {
  396. // We're not prepared (yet) to handle other platforms or types
  397. ++dIdx;
  398. continue;
  399. }
  400. if (_DevicesCollection.find(uniqueId) != _DevicesCollection.end())
  401. deviceDescriptor = _DevicesCollection[uniqueId];
  402. else
  403. deviceDescriptor = DeviceDescriptor();
  404. // Fill the blanks by OpenCL means
  405. deviceDescriptor.type = clDeviceType;
  406. deviceDescriptor.uniqueId = uniqueId;
  407. deviceDescriptor.clDetected = true;
  408. deviceDescriptor.clPlatformId = pIdx;
  409. deviceDescriptor.clPlatformName = platformName;
  410. deviceDescriptor.clPlatformType = platformType;
  411. deviceDescriptor.clPlatformVersion = platformVersion;
  412. deviceDescriptor.clPlatformVersionMajor = platformVersionMajor;
  413. deviceDescriptor.clPlatformVersionMinor = platformVersionMinor;
  414. deviceDescriptor.clDeviceOrdinal = dIdx;
  415. deviceDescriptor.clDeviceVersion = device.getInfo<CL_DEVICE_VERSION>();
  416. deviceDescriptor.clDeviceVersionMajor = stoi(deviceDescriptor.clDeviceVersion.substr(7, 1));
  417. deviceDescriptor.clDeviceVersionMinor = stoi(deviceDescriptor.clDeviceVersion.substr(9, 1));
  418. deviceDescriptor.totalMemory = device.getInfo<CL_DEVICE_GLOBAL_MEM_SIZE>();
  419. deviceDescriptor.clGroupSize = 64;
  420. // Is it an NVIDIA card ?
  421. if (platformType == ClPlatformTypeEnum::Nvidia) {
  422. size_t siz;
  423. clGetDeviceInfo(device.get(), CL_DEVICE_COMPUTE_CAPABILITY_MAJOR_NV,
  424. sizeof(deviceDescriptor.clNvComputeMajor), &deviceDescriptor.clNvComputeMajor, &siz);
  425. clGetDeviceInfo(device.get(), CL_DEVICE_COMPUTE_CAPABILITY_MINOR_NV,
  426. sizeof(deviceDescriptor.clNvComputeMinor), &deviceDescriptor.clNvComputeMinor, &siz);
  427. deviceDescriptor.clNvCompute =
  428. std::to_string(deviceDescriptor.clNvComputeMajor) + "." + std::to_string(deviceDescriptor.clNvComputeMinor);
  429. deviceDescriptor.boardName = device.getInfo<CL_DEVICE_NAME>();
  430. }
  431. // AMD GPU
  432. else {
  433. deviceDescriptor.clArch = device.getInfo<CL_DEVICE_NAME>();
  434. size_t s1;
  435. char s[256];
  436. #define CL_DEVICE_BOARD_NAME_AMD 0x4038
  437. clGetDeviceInfo(device.get(), CL_DEVICE_BOARD_NAME_AMD, sizeof(s), s, &s1);
  438. deviceDescriptor.boardName = s;
  439. }
  440. // Upsert Devices Collection
  441. _DevicesCollection[uniqueId] = deviceDescriptor;
  442. ++dIdx;
  443. }
  444. }
  445. }
  446. bool CLMiner::initDevice() {
  447. m_initialized = false;
  448. // LookUp device
  449. // Load available platforms
  450. std::vector<cl::Platform> platforms = getPlatforms();
  451. if (platforms.empty())
  452. return false;
  453. std::vector<cl::Device> devices = getDevices(platforms, m_deviceDescriptor.clPlatformId);
  454. if (devices.empty())
  455. return false;
  456. m_device = devices.at(m_deviceDescriptor.clDeviceOrdinal);
  457. // Set Hardware Monitor Info
  458. if (m_deviceDescriptor.clPlatformType == ClPlatformTypeEnum::Nvidia) {
  459. m_hwmoninfo.deviceType = HwMonitorInfoType::NVIDIA;
  460. m_hwmoninfo.devicePciId = m_deviceDescriptor.uniqueId;
  461. m_hwmoninfo.deviceIndex = -1; // Will be later on mapped by nvml (see Farm() constructor)
  462. } else if (m_deviceDescriptor.clPlatformType == ClPlatformTypeEnum::Amd) {
  463. m_hwmoninfo.deviceType = HwMonitorInfoType::AMD;
  464. m_hwmoninfo.devicePciId = m_deviceDescriptor.uniqueId;
  465. m_hwmoninfo.deviceIndex = -1; // Will be later on mapped by nvml (see Farm() constructor)
  466. } else if (m_deviceDescriptor.clPlatformType == ClPlatformTypeEnum::Clover) {
  467. m_hwmoninfo.deviceType = HwMonitorInfoType::UNKNOWN;
  468. m_hwmoninfo.devicePciId = m_deviceDescriptor.uniqueId;
  469. m_hwmoninfo.deviceIndex = -1; // Will be later on mapped by nvml (see Farm() constructor)
  470. } else if (m_deviceDescriptor.clPlatformType == ClPlatformTypeEnum::Intel) {
  471. m_hwmoninfo.deviceType = HwMonitorInfoType::UNKNOWN;
  472. m_hwmoninfo.devicePciId = m_deviceDescriptor.uniqueId;
  473. m_hwmoninfo.deviceIndex = -1; // Will be later on mapped by nvml (see Farm() constructor)
  474. } else {
  475. // Don't know what to do with this
  476. cwarn << "Unrecognized Platform";
  477. return false;
  478. }
  479. if (m_deviceDescriptor.clPlatformVersionMajor == 1 &&
  480. (m_deviceDescriptor.clPlatformVersionMinor == 0 || m_deviceDescriptor.clPlatformVersionMinor == 1)) {
  481. if (m_deviceDescriptor.clPlatformType == ClPlatformTypeEnum::Clover) {
  482. cwarn << "OpenCL " << m_deviceDescriptor.clPlatformVersion
  483. << " not supported, but platform Clover might work nevertheless. USE AT OWN RISK!";
  484. } else {
  485. cwarn << "OpenCL " << m_deviceDescriptor.clPlatformVersion
  486. << " not supported. Minimum required version is 1.2";
  487. throw std::runtime_error("OpenCL 1.2 required");
  488. }
  489. }
  490. std::ostringstream s;
  491. s << "Using Pci " << m_deviceDescriptor.uniqueId << ": " << m_deviceDescriptor.boardName;
  492. if (!m_deviceDescriptor.clNvCompute.empty())
  493. s << " (Compute " + m_deviceDescriptor.clNvCompute + ")";
  494. else
  495. s << " (" << m_deviceDescriptor.clDeviceVersion;
  496. s << ") Memory : " << dev::getFormattedMemory((double)m_deviceDescriptor.totalMemory);
  497. cextr << s.str();
  498. return true;
  499. }
  500. bool CLMiner::initEpoch() {
  501. m_initialized = false;
  502. auto startInit = std::chrono::steady_clock::now();
  503. size_t RequiredMemory = m_epochContext.dagSize + m_epochContext.lightSize + sizeof(SearchResults) + 32;
  504. ReportGPUMemoryRequired(m_epochContext.lightSize, m_epochContext.dagSize, sizeof(SearchResults) + 32);
  505. // Check whether the current device has sufficient memory every time we recreate the dag
  506. if (m_deviceDescriptor.totalMemory < RequiredMemory) {
  507. ReportGPUNoMemoryAndPause("total", RequiredMemory, m_deviceDescriptor.totalMemory);
  508. return false; // This will prevent to exit the thread and
  509. // Eventually resume mining when changing coin or epoch (NiceHash)
  510. }
  511. try {
  512. char options[256] = {0};
  513. int computeCapability = 0;
  514. // Nvidia
  515. if (!m_deviceDescriptor.clNvCompute.empty()) {
  516. computeCapability = m_deviceDescriptor.clNvComputeMajor * 10 + m_deviceDescriptor.clNvComputeMinor;
  517. int maxregs = computeCapability >= 35 ? 72 : 63;
  518. sprintf(options, "-cl-nv-maxrregcount=%d", maxregs);
  519. }
  520. free_buffers();
  521. // create context
  522. m_context = new cl::Context(std::vector<cl::Device>(&m_device, &m_device + 1));
  523. // create new queue with default in order execution property
  524. m_queue = new cl::CommandQueue(*m_context, m_device);
  525. m_abortqueue = new cl::CommandQueue(*m_context, m_device);
  526. m_dagItems = m_epochContext.dagNumItems;
  527. bool dagOk = true;
  528. // create buffer for dag
  529. try {
  530. // Create mining buffers
  531. m_searchBuffer = new cl::Buffer(*m_context, CL_MEM_WRITE_ONLY, sizeof(SearchResults));
  532. m_header = new cl::Buffer(*m_context, CL_MEM_READ_ONLY, 32);
  533. m_light = new cl::Buffer(*m_context, CL_MEM_READ_ONLY, m_epochContext.lightSize);
  534. if (!m_deviceDescriptor.clSplit) {
  535. try {
  536. m_dag[0] = new cl::Buffer(*m_context, CL_MEM_READ_ONLY, m_epochContext.dagSize);
  537. m_dag[1] = nullptr;
  538. } catch (cl::Error const&) {
  539. dagOk = false;
  540. }
  541. } else
  542. dagOk = false;
  543. if (!dagOk) {
  544. unsigned delta = (m_epochContext.dagNumItems & 1) ? 64 : 0;
  545. m_dag[0] = new cl::Buffer(*m_context, CL_MEM_READ_ONLY, m_epochContext.dagSize / 2 + delta);
  546. m_dag[1] = new cl::Buffer(*m_context, CL_MEM_READ_ONLY, m_epochContext.dagSize / 2 - delta);
  547. }
  548. } catch (cl::Error const& err) {
  549. if ((err.err() == CL_OUT_OF_RESOURCES) || (err.err() == CL_OUT_OF_HOST_MEMORY)) {
  550. cwarn << ethCLErrorHelper("Creating DAG buffer failed", err);
  551. pause(MinerPauseEnum::PauseDueToInitEpochError);
  552. free_buffers();
  553. return false;
  554. } else
  555. throw;
  556. }
  557. // Release the pause flag if any
  558. resume(MinerPauseEnum::PauseDueToInsufficientMemory);
  559. resume(MinerPauseEnum::PauseDueToInitEpochError);
  560. // patch source code
  561. // note: The kernels here are simply compiled version of the respective .cl kernels
  562. // into a byte array by bin2h.cmake. There is no need to load the file by hand in runtime
  563. // See libcl/CMakeLists.txt: add_custom_command()
  564. // TODO: Just use C++ raw string literal.
  565. std::string code{ethash_cl, ethash_cl + sizeof(ethash_cl)};
  566. addDefinition(code, "WORKSIZE", m_deviceDescriptor.clGroupSize);
  567. addDefinition(code, "ACCESSES", 64);
  568. addDefinition(code, "MAX_OUTPUTS", c_maxSearchResults);
  569. addDefinition(code, "PLATFORM", static_cast<unsigned>(m_deviceDescriptor.clPlatformType));
  570. addDefinition(code, "COMPUTE", computeCapability);
  571. if (!dagOk)
  572. addDefinition(code, "SPLIT_DAG", 1);
  573. // create miner OpenCL program
  574. cl::Program::Sources sources{{code.data(), code.size()}};
  575. cl::Program program(*m_context, sources);
  576. try {
  577. program.build({m_device}, options);
  578. } catch (cl::BuildError const& buildErr) {
  579. ccrit << "OpenCL kernel build log:\n" << program.getBuildInfo<CL_PROGRAM_BUILD_LOG>(m_device);
  580. ccrit << "OpenCL kernel build error (" << buildErr.err() << "):\n" << buildErr.what();
  581. pause(MinerPauseEnum::PauseDueToInitEpochError);
  582. free_buffers();
  583. return false;
  584. }
  585. try {
  586. m_searchKernel = cl::Kernel(program, "search");
  587. m_dagKernel = cl::Kernel(program, "GenerateDAG");
  588. m_queue->enqueueWriteBuffer(*m_light, CL_TRUE, 0, m_epochContext.lightSize, m_epochContext.lightCache);
  589. } catch (cl::Error const& err) {
  590. cwarn << ethCLErrorHelper("Creating opencl failed", err);
  591. pause(MinerPauseEnum::PauseDueToInitEpochError);
  592. free_buffers();
  593. return false;
  594. }
  595. // create buffer for header
  596. m_searchKernel.setArg(1, m_header[0]);
  597. m_searchKernel.setArg(2, *m_dag[0]);
  598. m_searchKernel.setArg(3, *m_dag[1]);
  599. m_searchKernel.setArg(4, m_dagItems);
  600. m_queue->enqueueWriteBuffer(*m_searchBuffer, CL_FALSE, 0, sizeof(zerox3), zerox3);
  601. m_dagKernel.setArg(1, *m_light);
  602. m_dagKernel.setArg(2, *m_dag[0]);
  603. m_dagKernel.setArg(3, *m_dag[1]);
  604. m_dagKernel.setArg(4, (uint32_t)(m_epochContext.lightSize / 64));
  605. const uint32_t workItems = m_dagItems * 2; // GPU computes partial 512-bit DAG items.
  606. uint32_t start, chunk = m_deviceDescriptor.clGroupSize * m_block_multiple;
  607. if (chunk > workItems)
  608. chunk = workItems;
  609. for (start = 0; start <= workItems - chunk; start += chunk) {
  610. m_dagKernel.setArg(0, start);
  611. m_queue->enqueueNDRangeKernel(m_dagKernel, cl::NullRange, chunk, m_deviceDescriptor.clGroupSize);
  612. m_queue->finish();
  613. }
  614. if (start < workItems) {
  615. uint32_t groupsLeft = workItems - start;
  616. groupsLeft = (groupsLeft + m_deviceDescriptor.clGroupSize - 1) / m_deviceDescriptor.clGroupSize;
  617. m_dagKernel.setArg(0, start);
  618. m_queue->enqueueNDRangeKernel(m_dagKernel, cl::NullRange, groupsLeft * m_deviceDescriptor.clGroupSize,
  619. m_deviceDescriptor.clGroupSize);
  620. m_queue->finish();
  621. }
  622. auto dagTime = std::chrono::duration_cast<std::chrono::milliseconds>(std::chrono::steady_clock::now() - startInit);
  623. m_searchKernel.setArg(0, *m_searchBuffer); // Supply output buffer to kernel.
  624. m_searchKernel.setArg(1, *m_header); // Supply header buffer to kernel.
  625. m_searchKernel.setArg(2, *m_dag[0]); // Supply DAG buffer to kernel.
  626. m_searchKernel.setArg(3, *m_dag[1]); // Supply DAG buffer to kernel.
  627. m_searchKernel.setArg(4, m_dagItems);
  628. ReportDAGDone(m_epochContext.dagSize, uint32_t(dagTime.count()), dagOk);
  629. } catch (cl::Error const& err) {
  630. ccrit << ethCLErrorHelper("OpenCL init failed", err);
  631. pause(MinerPauseEnum::PauseDueToInitEpochError);
  632. free_buffers();
  633. return false;
  634. }
  635. m_initialized = true;
  636. m_abortMutex.unlock();
  637. return true;
  638. }