opencv-4.10.0-cuda-fp16.patch 7.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227
  1. https://github.com/opencv/opencv/issues/25711
  2. https://github.com/opencv/opencv/pull/25880
  3. From 5115dc62f8af616c6e75e4b3df3eb8f201298432 Mon Sep 17 00:00:00 2001
  4. From: Aliaksei Urbanski <aliaksei.urbanski@gmail.com>
  5. Date: Tue, 9 Jul 2024 01:46:12 +0300
  6. Subject: [PATCH 1/3] =?UTF-8?q?=F0=9F=90=9B=20Fix=20CUDA=20for=20old=20GPU?=
  7. =?UTF-8?q?s=20without=20FP16=20support?=
  8. MIME-Version: 1.0
  9. Content-Type: text/plain; charset=UTF-8
  10. Content-Transfer-Encoding: 8bit
  11. --- a/modules/dnn/src/cuda4dnn/init.hpp
  12. +++ b/modules/dnn/src/cuda4dnn/init.hpp
  13. @@ -15,7 +15,7 @@
  14. namespace cv { namespace dnn { namespace cuda4dnn {
  15. - void checkVersions()
  16. + inline void checkVersions()
  17. {
  18. // https://docs.nvidia.com/deeplearning/cudnn/developer-guide/index.html#programming-model
  19. // cuDNN API Compatibility
  20. @@ -44,19 +44,19 @@ namespace cv { namespace dnn { namespace cuda4dnn {
  21. }
  22. }
  23. - int getDeviceCount()
  24. + inline int getDeviceCount()
  25. {
  26. return cuda::getCudaEnabledDeviceCount();
  27. }
  28. - int getDevice()
  29. + inline int getDevice()
  30. {
  31. int device_id = -1;
  32. CUDA4DNN_CHECK_CUDA(cudaGetDevice(&device_id));
  33. return device_id;
  34. }
  35. - bool isDeviceCompatible()
  36. + inline bool isDeviceCompatible()
  37. {
  38. int device_id = getDevice();
  39. if (device_id < 0)
  40. @@ -76,7 +76,7 @@ namespace cv { namespace dnn { namespace cuda4dnn {
  41. return false;
  42. }
  43. - bool doesDeviceSupportFP16()
  44. + inline bool doesDeviceSupportFP16()
  45. {
  46. int device_id = getDevice();
  47. if (device_id < 0)
  48. --- a/modules/dnn/src/registry.cpp
  49. +++ b/modules/dnn/src/registry.cpp
  50. @@ -18,6 +18,10 @@
  51. #include "backend.hpp"
  52. #include "factory.hpp"
  53. +#ifdef HAVE_CUDA
  54. +#include "cuda4dnn/init.hpp"
  55. +#endif
  56. +
  57. namespace cv {
  58. namespace dnn {
  59. CV__DNN_INLINE_NS_BEGIN
  60. @@ -121,7 +125,8 @@ class BackendRegistry
  61. if (haveCUDA())
  62. {
  63. backends.push_back(std::make_pair(DNN_BACKEND_CUDA, DNN_TARGET_CUDA));
  64. - backends.push_back(std::make_pair(DNN_BACKEND_CUDA, DNN_TARGET_CUDA_FP16));
  65. + if (cuda4dnn::doesDeviceSupportFP16())
  66. + backends.push_back(std::make_pair(DNN_BACKEND_CUDA, DNN_TARGET_CUDA_FP16));
  67. }
  68. #endif
  69. From cfb2bc34acd7699707110523f067a7452a404206 Mon Sep 17 00:00:00 2001
  70. From: Alexander Smorkalov <alexander.smorkalov@xperience.ai>
  71. Date: Tue, 9 Jul 2024 11:21:58 +0300
  72. Subject: [PATCH 2/3] Added CUDA FP16 availability check for target management.
  73. --- a/modules/dnn/src/cuda4dnn/init.hpp
  74. +++ b/modules/dnn/src/cuda4dnn/init.hpp
  75. @@ -56,9 +56,11 @@ namespace cv { namespace dnn { namespace cuda4dnn {
  76. return device_id;
  77. }
  78. - inline bool isDeviceCompatible()
  79. + inline bool isDeviceCompatible(int device_id = -1)
  80. {
  81. - int device_id = getDevice();
  82. + if (device_id < 0)
  83. + device_id = getDevice();
  84. +
  85. if (device_id < 0)
  86. return false;
  87. @@ -76,9 +78,11 @@ namespace cv { namespace dnn { namespace cuda4dnn {
  88. return false;
  89. }
  90. - inline bool doesDeviceSupportFP16()
  91. + inline bool doesDeviceSupportFP16(int device_id = -1)
  92. {
  93. - int device_id = getDevice();
  94. + if (device_id < 0)
  95. + device_id = getDevice();
  96. +
  97. if (device_id < 0)
  98. return false;
  99. @@ -87,9 +91,7 @@ namespace cv { namespace dnn { namespace cuda4dnn {
  100. CUDA4DNN_CHECK_CUDA(cudaDeviceGetAttribute(&minor, cudaDevAttrComputeCapabilityMinor, device_id));
  101. int version = major * 10 + minor;
  102. - if (version < 53)
  103. - return false;
  104. - return true;
  105. + return (version >= 53);
  106. }
  107. }}} /* namespace cv::dnn::cuda4dnn */
  108. --- a/modules/dnn/src/net_impl_backend.cpp
  109. +++ b/modules/dnn/src/net_impl_backend.cpp
  110. @@ -10,6 +10,10 @@
  111. #include "backend.hpp"
  112. #include "factory.hpp"
  113. +#ifdef HAVE_CUDA
  114. +#include "cuda4dnn/init.hpp"
  115. +#endif
  116. +
  117. namespace cv {
  118. namespace dnn {
  119. CV__DNN_INLINE_NS_BEGIN
  120. @@ -242,6 +246,16 @@ void Net::Impl::setPreferableTarget(int targetId)
  121. #endif
  122. }
  123. + if (IS_DNN_CUDA_TARGET(targetId))
  124. + {
  125. + preferableTarget = DNN_TARGET_CPU;
  126. +#ifdef HAVE_CUDA
  127. + if (cuda4dnn::doesDeviceSupportFP16() && targetId == DNN_TARGET_CUDA_FP16)
  128. + preferableTarget = DNN_TARGET_CUDA_FP16;
  129. + else
  130. + preferableTarget = DNN_TARGET_CUDA;
  131. +#endif
  132. + }
  133. #if !defined(__arm64__) || !__arm64__
  134. if (targetId == DNN_TARGET_CPU_FP16)
  135. {
  136. --- a/modules/dnn/src/registry.cpp
  137. +++ b/modules/dnn/src/registry.cpp
  138. @@ -122,10 +122,24 @@ class BackendRegistry
  139. #endif
  140. #ifdef HAVE_CUDA
  141. - if (haveCUDA())
  142. + cuda4dnn::checkVersions();
  143. +
  144. + bool hasCudaCompatible = false;
  145. + bool hasCudaFP16 = false;
  146. + for (int i = 0; i < cuda4dnn::getDeviceCount(); i++)
  147. + {
  148. + if (cuda4dnn::isDeviceCompatible(i))
  149. + {
  150. + hasCudaCompatible = true;
  151. + if (cuda4dnn::doesDeviceSupportFP16(i))
  152. + hasCudaFP16 = true;
  153. + }
  154. + }
  155. +
  156. + if (hasCudaCompatible)
  157. {
  158. backends.push_back(std::make_pair(DNN_BACKEND_CUDA, DNN_TARGET_CUDA));
  159. - if (cuda4dnn::doesDeviceSupportFP16())
  160. + if (hasCudaFP16)
  161. backends.push_back(std::make_pair(DNN_BACKEND_CUDA, DNN_TARGET_CUDA_FP16));
  162. }
  163. #endif
  164. --- a/modules/dnn/test/test_common.hpp
  165. +++ b/modules/dnn/test/test_common.hpp
  166. @@ -211,7 +211,7 @@ class DNNTestLayer : public TestWithParam<tuple<Backend, Target> >
  167. if ((!l->supportBackend(backend) || l->preferableTarget != target) && !fused)
  168. {
  169. hasFallbacks = true;
  170. - std::cout << "FALLBACK: Layer [" << l->type << "]:[" << l->name << "] is expected to has backend implementation" << endl;
  171. + std::cout << "FALLBACK: Layer [" << l->type << "]:[" << l->name << "] is expected to have backend implementation" << endl;
  172. }
  173. }
  174. if (hasFallbacks && raiseError)
  175. --- a/modules/dnn/test/test_onnx_conformance.cpp
  176. +++ b/modules/dnn/test/test_onnx_conformance.cpp
  177. @@ -1008,7 +1008,7 @@ class Test_ONNX_conformance : public TestWithParam<ONNXConfParams>
  178. if ((!l->supportBackend(backend) || l->preferableTarget != target) && !fused)
  179. {
  180. hasFallbacks = true;
  181. - std::cout << "FALLBACK: Layer [" << l->type << "]:[" << l->name << "] is expected to has backend implementation" << endl;
  182. + std::cout << "FALLBACK: Layer [" << l->type << "]:[" << l->name << "] is expected to have backend implementation" << endl;
  183. }
  184. }
  185. return hasFallbacks;
  186. From cc9178903daff229bc396db718bf347c4eafd33b Mon Sep 17 00:00:00 2001
  187. From: Alexander Smorkalov <2536374+asmorkalov@users.noreply.github.com>
  188. Date: Wed, 10 Jul 2024 09:06:09 +0300
  189. Subject: [PATCH 3/3] Update modules/dnn/src/registry.cpp
  190. Co-authored-by: Aliaksei Urbanski <aliaksei.urbanski@gmail.com>
  191. --- a/modules/dnn/src/registry.cpp
  192. +++ b/modules/dnn/src/registry.cpp
  193. @@ -132,7 +132,10 @@ class BackendRegistry
  194. {
  195. hasCudaCompatible = true;
  196. if (cuda4dnn::doesDeviceSupportFP16(i))
  197. + {
  198. hasCudaFP16 = true;
  199. + break; // we already have all we need here
  200. + }
  201. }
  202. }