unstable_cuda.py 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272
  1. # Copyright 2017 The Meson development team
  2. # Licensed under the Apache License, Version 2.0 (the "License");
  3. # you may not use this file except in compliance with the License.
  4. # You may obtain a copy of the License at
  5. # http://www.apache.org/licenses/LICENSE-2.0
  6. # Unless required by applicable law or agreed to in writing, software
  7. # distributed under the License is distributed on an "AS IS" BASIS,
  8. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  9. # See the License for the specific language governing permissions and
  10. # limitations under the License.
  11. import re
  12. from ..mesonlib import version_compare
  13. from ..interpreter import CompilerHolder
  14. from ..compilers import CudaCompiler
  15. from . import ExtensionModule, ModuleReturnValue
  16. from ..interpreterbase import (
  17. flatten, permittedKwargs, noKwargs,
  18. InvalidArguments, FeatureNew
  19. )
  20. class CudaModule(ExtensionModule):
  21. @FeatureNew('CUDA module', '0.50.0')
  22. def __init__(self, *args, **kwargs):
  23. super().__init__(*args, **kwargs)
  24. @noKwargs
  25. def min_driver_version(self, state, args, kwargs):
  26. argerror = InvalidArguments('min_driver_version must have exactly one positional argument: ' +
  27. 'an NVCC compiler object, or its version string.')
  28. if len(args) != 1:
  29. raise argerror
  30. else:
  31. cuda_version = self._version_from_compiler(args[0])
  32. if cuda_version == 'unknown':
  33. raise argerror
  34. driver_version_table = [
  35. {'cuda_version': '>=10.2.89', 'windows': '441.22', 'linux': '440.33'},
  36. {'cuda_version': '>=10.1.105', 'windows': '418.96', 'linux': '418.39'},
  37. {'cuda_version': '>=10.0.130', 'windows': '411.31', 'linux': '410.48'},
  38. {'cuda_version': '>=9.2.148', 'windows': '398.26', 'linux': '396.37'},
  39. {'cuda_version': '>=9.2.88', 'windows': '397.44', 'linux': '396.26'},
  40. {'cuda_version': '>=9.1.85', 'windows': '391.29', 'linux': '390.46'},
  41. {'cuda_version': '>=9.0.76', 'windows': '385.54', 'linux': '384.81'},
  42. {'cuda_version': '>=8.0.61', 'windows': '376.51', 'linux': '375.26'},
  43. {'cuda_version': '>=8.0.44', 'windows': '369.30', 'linux': '367.48'},
  44. {'cuda_version': '>=7.5.16', 'windows': '353.66', 'linux': '352.31'},
  45. {'cuda_version': '>=7.0.28', 'windows': '347.62', 'linux': '346.46'},
  46. ]
  47. driver_version = 'unknown'
  48. for d in driver_version_table:
  49. if version_compare(cuda_version, d['cuda_version']):
  50. driver_version = d.get(state.host_machine.system, d['linux'])
  51. break
  52. return ModuleReturnValue(driver_version, [driver_version])
  53. @permittedKwargs(['detected'])
  54. def nvcc_arch_flags(self, state, args, kwargs):
  55. nvcc_arch_args = self._validate_nvcc_arch_args(state, args, kwargs)
  56. ret = self._nvcc_arch_flags(*nvcc_arch_args)[0]
  57. return ModuleReturnValue(ret, [ret])
  58. @permittedKwargs(['detected'])
  59. def nvcc_arch_readable(self, state, args, kwargs):
  60. nvcc_arch_args = self._validate_nvcc_arch_args(state, args, kwargs)
  61. ret = self._nvcc_arch_flags(*nvcc_arch_args)[1]
  62. return ModuleReturnValue(ret, [ret])
  63. @staticmethod
  64. def _break_arch_string(s):
  65. s = re.sub('[ \t\r\n,;]+', ';', s)
  66. s = s.strip(';').split(';')
  67. return s
  68. @staticmethod
  69. def _detected_cc_from_compiler(c):
  70. if isinstance(c, CompilerHolder):
  71. c = c.compiler
  72. if isinstance(c, CudaCompiler):
  73. return c.detected_cc
  74. return ''
  75. @staticmethod
  76. def _version_from_compiler(c):
  77. if isinstance(c, CompilerHolder):
  78. c = c.compiler
  79. if isinstance(c, CudaCompiler):
  80. return c.version
  81. if isinstance(c, str):
  82. return c
  83. return 'unknown'
  84. def _validate_nvcc_arch_args(self, state, args, kwargs):
  85. argerror = InvalidArguments('The first argument must be an NVCC compiler object, or its version string!')
  86. if len(args) < 1:
  87. raise argerror
  88. else:
  89. compiler = args[0]
  90. cuda_version = self._version_from_compiler(compiler)
  91. if cuda_version == 'unknown':
  92. raise argerror
  93. arch_list = [] if len(args) <= 1 else flatten(args[1:])
  94. arch_list = [self._break_arch_string(a) for a in arch_list]
  95. arch_list = flatten(arch_list)
  96. if len(arch_list) > 1 and not set(arch_list).isdisjoint({'All', 'Common', 'Auto'}):
  97. raise InvalidArguments('''The special architectures 'All', 'Common' and 'Auto' must appear alone, as a positional argument!''')
  98. arch_list = arch_list[0] if len(arch_list) == 1 else arch_list
  99. detected = kwargs.get('detected', self._detected_cc_from_compiler(compiler))
  100. detected = flatten([detected])
  101. detected = [self._break_arch_string(a) for a in detected]
  102. detected = flatten(detected)
  103. if not set(detected).isdisjoint({'All', 'Common', 'Auto'}):
  104. raise InvalidArguments('''The special architectures 'All', 'Common' and 'Auto' must appear alone, as a positional argument!''')
  105. return cuda_version, arch_list, detected
  106. def _nvcc_arch_flags(self, cuda_version, cuda_arch_list='Auto', detected=''):
  107. """
  108. Using the CUDA Toolkit version (the NVCC version) and the target
  109. architectures, compute the NVCC architecture flags.
  110. """
  111. cuda_known_gpu_architectures = ['Fermi', 'Kepler', 'Maxwell'] # noqa: E221
  112. cuda_common_gpu_architectures = ['3.0', '3.5', '5.0'] # noqa: E221
  113. cuda_limit_gpu_architecture = None # noqa: E221
  114. cuda_all_gpu_architectures = ['3.0', '3.2', '3.5', '5.0'] # noqa: E221
  115. if version_compare(cuda_version, '<7.0'):
  116. cuda_limit_gpu_architecture = '5.2'
  117. if version_compare(cuda_version, '>=7.0'):
  118. cuda_known_gpu_architectures += ['Kepler+Tegra', 'Kepler+Tesla', 'Maxwell+Tegra'] # noqa: E221
  119. cuda_common_gpu_architectures += ['5.2'] # noqa: E221
  120. if version_compare(cuda_version, '<8.0'):
  121. cuda_common_gpu_architectures += ['5.2+PTX'] # noqa: E221
  122. cuda_limit_gpu_architecture = '6.0' # noqa: E221
  123. if version_compare(cuda_version, '>=8.0'):
  124. cuda_known_gpu_architectures += ['Pascal', 'Pascal+Tegra'] # noqa: E221
  125. cuda_common_gpu_architectures += ['6.0', '6.1'] # noqa: E221
  126. cuda_all_gpu_architectures += ['6.0', '6.1', '6.2'] # noqa: E221
  127. if version_compare(cuda_version, '<9.0'):
  128. cuda_common_gpu_architectures += ['6.1+PTX'] # noqa: E221
  129. cuda_limit_gpu_architecture = '7.0' # noqa: E221
  130. if version_compare(cuda_version, '>=9.0'):
  131. cuda_known_gpu_architectures += ['Volta', 'Xavier'] # noqa: E221
  132. cuda_common_gpu_architectures += ['7.0', '7.0+PTX'] # noqa: E221
  133. cuda_all_gpu_architectures += ['7.0', '7.0+PTX', '7.2', '7.2+PTX'] # noqa: E221
  134. if version_compare(cuda_version, '<10.0'):
  135. cuda_limit_gpu_architecture = '7.5'
  136. if version_compare(cuda_version, '>=10.0'):
  137. cuda_known_gpu_architectures += ['Turing'] # noqa: E221
  138. cuda_common_gpu_architectures += ['7.5', '7.5+PTX'] # noqa: E221
  139. cuda_all_gpu_architectures += ['7.5', '7.5+PTX'] # noqa: E221
  140. if version_compare(cuda_version, '<11.0'):
  141. cuda_limit_gpu_architecture = '8.0'
  142. if not cuda_arch_list:
  143. cuda_arch_list = 'Auto'
  144. if cuda_arch_list == 'All': # noqa: E271
  145. cuda_arch_list = cuda_known_gpu_architectures
  146. elif cuda_arch_list == 'Common': # noqa: E271
  147. cuda_arch_list = cuda_common_gpu_architectures
  148. elif cuda_arch_list == 'Auto': # noqa: E271
  149. if detected:
  150. if isinstance(detected, list):
  151. cuda_arch_list = detected
  152. else:
  153. cuda_arch_list = self._break_arch_string(detected)
  154. if cuda_limit_gpu_architecture:
  155. filtered_cuda_arch_list = []
  156. for arch in cuda_arch_list:
  157. if arch:
  158. if version_compare(arch, '>=' + cuda_limit_gpu_architecture):
  159. arch = cuda_common_gpu_architectures[-1]
  160. if arch not in filtered_cuda_arch_list:
  161. filtered_cuda_arch_list.append(arch)
  162. cuda_arch_list = filtered_cuda_arch_list
  163. else:
  164. cuda_arch_list = cuda_common_gpu_architectures
  165. elif isinstance(cuda_arch_list, str):
  166. cuda_arch_list = self._break_arch_string(cuda_arch_list)
  167. cuda_arch_list = sorted([x for x in set(cuda_arch_list) if x])
  168. cuda_arch_bin = []
  169. cuda_arch_ptx = []
  170. for arch_name in cuda_arch_list:
  171. arch_bin = []
  172. arch_ptx = []
  173. add_ptx = arch_name.endswith('+PTX')
  174. if add_ptx:
  175. arch_name = arch_name[:-len('+PTX')]
  176. if re.fullmatch('[0-9]+\\.[0-9](\\([0-9]+\\.[0-9]\\))?', arch_name):
  177. arch_bin, arch_ptx = [arch_name], [arch_name]
  178. else:
  179. arch_bin, arch_ptx = {
  180. 'Fermi': (['2.0', '2.1(2.0)'], []),
  181. 'Kepler+Tegra': (['3.2'], []),
  182. 'Kepler+Tesla': (['3.7'], []),
  183. 'Kepler': (['3.0', '3.5'], ['3.5']),
  184. 'Maxwell+Tegra': (['5.3'], []),
  185. 'Maxwell': (['5.0', '5.2'], ['5.2']),
  186. 'Pascal': (['6.0', '6.1'], ['6.1']),
  187. 'Pascal+Tegra': (['6.2'], []),
  188. 'Volta': (['7.0'], ['7.0']),
  189. 'Xavier': (['7.2'], []),
  190. 'Turing': (['7.5'], ['7.5']),
  191. }.get(arch_name, (None, None))
  192. if arch_bin is None:
  193. raise InvalidArguments('Unknown CUDA Architecture Name {}!'
  194. .format(arch_name))
  195. cuda_arch_bin += arch_bin
  196. if add_ptx:
  197. if not arch_ptx:
  198. arch_ptx = arch_bin
  199. cuda_arch_ptx += arch_ptx
  200. cuda_arch_bin = re.sub('\\.', '', ' '.join(cuda_arch_bin))
  201. cuda_arch_ptx = re.sub('\\.', '', ' '.join(cuda_arch_ptx))
  202. cuda_arch_bin = re.findall('[0-9()]+', cuda_arch_bin)
  203. cuda_arch_ptx = re.findall('[0-9]+', cuda_arch_ptx)
  204. cuda_arch_bin = sorted(list(set(cuda_arch_bin)))
  205. cuda_arch_ptx = sorted(list(set(cuda_arch_ptx)))
  206. nvcc_flags = []
  207. nvcc_archs_readable = []
  208. for arch in cuda_arch_bin:
  209. m = re.match('([0-9]+)\\(([0-9]+)\\)', arch)
  210. if m:
  211. nvcc_flags += ['-gencode', 'arch=compute_' + m[2] + ',code=sm_' + m[1]]
  212. nvcc_archs_readable += ['sm_' + m[1]]
  213. else:
  214. nvcc_flags += ['-gencode', 'arch=compute_' + arch + ',code=sm_' + arch]
  215. nvcc_archs_readable += ['sm_' + arch]
  216. for arch in cuda_arch_ptx:
  217. nvcc_flags += ['-gencode', 'arch=compute_' + arch + ',code=compute_' + arch]
  218. nvcc_archs_readable += ['compute_' + arch]
  219. return nvcc_flags, nvcc_archs_readable
  220. def initialize(*args, **kwargs):
  221. return CudaModule(*args, **kwargs)