cycles_cubin_cc.cpp 8.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312
  1. /*
  2. * Copyright 2017 Blender Foundation
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. #include <stdio.h>
  17. #include <stdint.h>
  18. #include <string>
  19. #include <vector>
  20. #include <OpenImageIO/argparse.h>
  21. #include <OpenImageIO/filesystem.h>
  22. #include "cuew.h"
  23. #ifdef _MSC_VER
  24. # include <Windows.h>
  25. #endif
  26. using std::string;
  27. using std::vector;
  28. namespace std {
  29. template<typename T> std::string to_string(const T &n)
  30. {
  31. std::ostringstream s;
  32. s << n;
  33. return s.str();
  34. }
  35. } // namespace std
  36. class CompilationSettings {
  37. public:
  38. CompilationSettings() : target_arch(0), bits(64), verbose(false), fast_math(false)
  39. {
  40. }
  41. string cuda_toolkit_dir;
  42. string input_file;
  43. string output_file;
  44. string ptx_file;
  45. vector<string> defines;
  46. vector<string> includes;
  47. int target_arch;
  48. int bits;
  49. bool verbose;
  50. bool fast_math;
  51. };
  52. static bool compile_cuda(CompilationSettings &settings)
  53. {
  54. const char *headers[] = {"stdlib.h", "float.h", "math.h", "stdio.h"};
  55. const char *header_content[] = {"\n", "\n", "\n", "\n"};
  56. printf("Building %s\n", settings.input_file.c_str());
  57. string code;
  58. if (!OIIO::Filesystem::read_text_file(settings.input_file, code)) {
  59. fprintf(stderr, "Error: unable to read %s\n", settings.input_file.c_str());
  60. return false;
  61. }
  62. vector<string> options;
  63. for (size_t i = 0; i < settings.includes.size(); i++) {
  64. options.push_back("-I" + settings.includes[i]);
  65. }
  66. for (size_t i = 0; i < settings.defines.size(); i++) {
  67. options.push_back("-D" + settings.defines[i]);
  68. }
  69. options.push_back("-D__KERNEL_CUDA_VERSION__=" + std::to_string(cuewNvrtcVersion()));
  70. options.push_back("-arch=compute_" + std::to_string(settings.target_arch));
  71. options.push_back("--device-as-default-execution-space");
  72. if (settings.fast_math)
  73. options.push_back("--use_fast_math");
  74. nvrtcProgram prog;
  75. nvrtcResult result = nvrtcCreateProgram(&prog,
  76. code.c_str(), // buffer
  77. NULL, // name
  78. sizeof(headers) / sizeof(void *), // numHeaders
  79. header_content, // headers
  80. headers); // includeNames
  81. if (result != NVRTC_SUCCESS) {
  82. fprintf(stderr, "Error: nvrtcCreateProgram failed (%d)\n\n", (int)result);
  83. return false;
  84. }
  85. /* Tranfer options to a classic C array. */
  86. vector<const char *> opts(options.size());
  87. for (size_t i = 0; i < options.size(); i++) {
  88. opts[i] = options[i].c_str();
  89. }
  90. result = nvrtcCompileProgram(prog, options.size(), &opts[0]);
  91. if (result != NVRTC_SUCCESS) {
  92. fprintf(stderr, "Error: nvrtcCompileProgram failed (%d)\n\n", (int)result);
  93. size_t log_size;
  94. nvrtcGetProgramLogSize(prog, &log_size);
  95. vector<char> log(log_size);
  96. nvrtcGetProgramLog(prog, &log[0]);
  97. fprintf(stderr, "%s\n", &log[0]);
  98. return false;
  99. }
  100. /* Retrieve the ptx code. */
  101. size_t ptx_size;
  102. result = nvrtcGetPTXSize(prog, &ptx_size);
  103. if (result != NVRTC_SUCCESS) {
  104. fprintf(stderr, "Error: nvrtcGetPTXSize failed (%d)\n\n", (int)result);
  105. return false;
  106. }
  107. vector<char> ptx_code(ptx_size);
  108. result = nvrtcGetPTX(prog, &ptx_code[0]);
  109. if (result != NVRTC_SUCCESS) {
  110. fprintf(stderr, "Error: nvrtcGetPTX failed (%d)\n\n", (int)result);
  111. return false;
  112. }
  113. /* Write a file in the temp folder with the ptx code. */
  114. settings.ptx_file = OIIO::Filesystem::temp_directory_path() + "/" +
  115. OIIO::Filesystem::unique_path();
  116. FILE *f = fopen(settings.ptx_file.c_str(), "wb");
  117. fwrite(&ptx_code[0], 1, ptx_size, f);
  118. fclose(f);
  119. return true;
  120. }
  121. static bool link_ptxas(CompilationSettings &settings)
  122. {
  123. string cudapath = "";
  124. if (settings.cuda_toolkit_dir.size())
  125. cudapath = settings.cuda_toolkit_dir + "/bin/";
  126. string ptx = "\"" + cudapath + "ptxas\" " + settings.ptx_file + " -o " + settings.output_file +
  127. " --gpu-name sm_" + std::to_string(settings.target_arch) + " -m" +
  128. std::to_string(settings.bits);
  129. if (settings.verbose) {
  130. ptx += " --verbose";
  131. printf("%s\n", ptx.c_str());
  132. }
  133. int pxresult = system(ptx.c_str());
  134. if (pxresult) {
  135. fprintf(stderr, "Error: ptxas failed (%d)\n\n", pxresult);
  136. return false;
  137. }
  138. if (!OIIO::Filesystem::remove(settings.ptx_file)) {
  139. fprintf(stderr, "Error: removing %s\n\n", settings.ptx_file.c_str());
  140. }
  141. return true;
  142. }
  143. static bool init(CompilationSettings &settings)
  144. {
  145. #ifdef _MSC_VER
  146. if (settings.cuda_toolkit_dir.size()) {
  147. SetDllDirectory((settings.cuda_toolkit_dir + "/bin").c_str());
  148. }
  149. #else
  150. (void)settings;
  151. #endif
  152. int cuewresult = cuewInit(CUEW_INIT_NVRTC);
  153. if (cuewresult != CUEW_SUCCESS) {
  154. fprintf(stderr, "Error: cuew init fialed (0x%d)\n\n", cuewresult);
  155. return false;
  156. }
  157. if (cuewNvrtcVersion() < 80) {
  158. fprintf(stderr, "Error: only cuda 8 and higher is supported, %d\n\n", cuewCompilerVersion());
  159. return false;
  160. }
  161. if (!nvrtcCreateProgram) {
  162. fprintf(stderr, "Error: nvrtcCreateProgram not resolved\n");
  163. return false;
  164. }
  165. if (!nvrtcCompileProgram) {
  166. fprintf(stderr, "Error: nvrtcCompileProgram not resolved\n");
  167. return false;
  168. }
  169. if (!nvrtcGetProgramLogSize) {
  170. fprintf(stderr, "Error: nvrtcGetProgramLogSize not resolved\n");
  171. return false;
  172. }
  173. if (!nvrtcGetProgramLog) {
  174. fprintf(stderr, "Error: nvrtcGetProgramLog not resolved\n");
  175. return false;
  176. }
  177. if (!nvrtcGetPTXSize) {
  178. fprintf(stderr, "Error: nvrtcGetPTXSize not resolved\n");
  179. return false;
  180. }
  181. if (!nvrtcGetPTX) {
  182. fprintf(stderr, "Error: nvrtcGetPTX not resolved\n");
  183. return false;
  184. }
  185. return true;
  186. }
  187. static bool parse_parameters(int argc, const char **argv, CompilationSettings &settings)
  188. {
  189. OIIO::ArgParse ap;
  190. ap.options("Usage: cycles_cubin_cc [options]",
  191. "-target %d",
  192. &settings.target_arch,
  193. "target shader model",
  194. "-m %d",
  195. &settings.bits,
  196. "Cuda architecture bits",
  197. "-i %s",
  198. &settings.input_file,
  199. "Input source filename",
  200. "-o %s",
  201. &settings.output_file,
  202. "Output cubin filename",
  203. "-I %L",
  204. &settings.includes,
  205. "Add additional includepath",
  206. "-D %L",
  207. &settings.defines,
  208. "Add additional defines",
  209. "-v",
  210. &settings.verbose,
  211. "Use verbose logging",
  212. "--use_fast_math",
  213. &settings.fast_math,
  214. "Use fast math",
  215. "-cuda-toolkit-dir %s",
  216. &settings.cuda_toolkit_dir,
  217. "path to the cuda toolkit binary directory",
  218. NULL);
  219. if (ap.parse(argc, argv) < 0) {
  220. fprintf(stderr, "%s\n", ap.geterror().c_str());
  221. ap.usage();
  222. return false;
  223. }
  224. if (!settings.output_file.size()) {
  225. fprintf(stderr, "Error: Output file not set(-o), required\n\n");
  226. return false;
  227. }
  228. if (!settings.input_file.size()) {
  229. fprintf(stderr, "Error: Input file not set(-i, required\n\n");
  230. return false;
  231. }
  232. if (!settings.target_arch) {
  233. fprintf(stderr, "Error: target shader model not set (-target), required\n\n");
  234. return false;
  235. }
  236. return true;
  237. }
  238. int main(int argc, const char **argv)
  239. {
  240. CompilationSettings settings;
  241. if (!parse_parameters(argc, argv, settings)) {
  242. fprintf(stderr, "Error: invalid parameters, exiting\n");
  243. exit(EXIT_FAILURE);
  244. }
  245. if (!init(settings)) {
  246. fprintf(stderr, "Error: initialization error, exiting\n");
  247. exit(EXIT_FAILURE);
  248. }
  249. if (!compile_cuda(settings)) {
  250. fprintf(stderr, "Error: compilation error, exiting\n");
  251. exit(EXIT_FAILURE);
  252. }
  253. if (!link_ptxas(settings)) {
  254. exit(EXIT_FAILURE);
  255. }
  256. return 0;
  257. }