123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227 |
- https://github.com/opencv/opencv/issues/25711
- https://github.com/opencv/opencv/pull/25880
- From 5115dc62f8af616c6e75e4b3df3eb8f201298432 Mon Sep 17 00:00:00 2001
- From: Aliaksei Urbanski <aliaksei.urbanski@gmail.com>
- Date: Tue, 9 Jul 2024 01:46:12 +0300
- Subject: [PATCH 1/3] =?UTF-8?q?=F0=9F=90=9B=20Fix=20CUDA=20for=20old=20GPU?=
- =?UTF-8?q?s=20without=20FP16=20support?=
- MIME-Version: 1.0
- Content-Type: text/plain; charset=UTF-8
- Content-Transfer-Encoding: 8bit
- --- a/modules/dnn/src/cuda4dnn/init.hpp
- +++ b/modules/dnn/src/cuda4dnn/init.hpp
- @@ -15,7 +15,7 @@
-
- namespace cv { namespace dnn { namespace cuda4dnn {
-
- - void checkVersions()
- + inline void checkVersions()
- {
- // https://docs.nvidia.com/deeplearning/cudnn/developer-guide/index.html#programming-model
- // cuDNN API Compatibility
- @@ -44,19 +44,19 @@ namespace cv { namespace dnn { namespace cuda4dnn {
- }
- }
-
- - int getDeviceCount()
- + inline int getDeviceCount()
- {
- return cuda::getCudaEnabledDeviceCount();
- }
-
- - int getDevice()
- + inline int getDevice()
- {
- int device_id = -1;
- CUDA4DNN_CHECK_CUDA(cudaGetDevice(&device_id));
- return device_id;
- }
-
- - bool isDeviceCompatible()
- + inline bool isDeviceCompatible()
- {
- int device_id = getDevice();
- if (device_id < 0)
- @@ -76,7 +76,7 @@ namespace cv { namespace dnn { namespace cuda4dnn {
- return false;
- }
-
- - bool doesDeviceSupportFP16()
- + inline bool doesDeviceSupportFP16()
- {
- int device_id = getDevice();
- if (device_id < 0)
- --- a/modules/dnn/src/registry.cpp
- +++ b/modules/dnn/src/registry.cpp
- @@ -18,6 +18,10 @@
- #include "backend.hpp"
- #include "factory.hpp"
-
- +#ifdef HAVE_CUDA
- +#include "cuda4dnn/init.hpp"
- +#endif
- +
- namespace cv {
- namespace dnn {
- CV__DNN_INLINE_NS_BEGIN
- @@ -121,7 +125,8 @@ class BackendRegistry
- if (haveCUDA())
- {
- backends.push_back(std::make_pair(DNN_BACKEND_CUDA, DNN_TARGET_CUDA));
- - backends.push_back(std::make_pair(DNN_BACKEND_CUDA, DNN_TARGET_CUDA_FP16));
- + if (cuda4dnn::doesDeviceSupportFP16())
- + backends.push_back(std::make_pair(DNN_BACKEND_CUDA, DNN_TARGET_CUDA_FP16));
- }
- #endif
-
- From cfb2bc34acd7699707110523f067a7452a404206 Mon Sep 17 00:00:00 2001
- From: Alexander Smorkalov <alexander.smorkalov@xperience.ai>
- Date: Tue, 9 Jul 2024 11:21:58 +0300
- Subject: [PATCH 2/3] Added CUDA FP16 availability check for target management.
- --- a/modules/dnn/src/cuda4dnn/init.hpp
- +++ b/modules/dnn/src/cuda4dnn/init.hpp
- @@ -56,9 +56,11 @@ namespace cv { namespace dnn { namespace cuda4dnn {
- return device_id;
- }
-
- - inline bool isDeviceCompatible()
- + inline bool isDeviceCompatible(int device_id = -1)
- {
- - int device_id = getDevice();
- + if (device_id < 0)
- + device_id = getDevice();
- +
- if (device_id < 0)
- return false;
-
- @@ -76,9 +78,11 @@ namespace cv { namespace dnn { namespace cuda4dnn {
- return false;
- }
-
- - inline bool doesDeviceSupportFP16()
- + inline bool doesDeviceSupportFP16(int device_id = -1)
- {
- - int device_id = getDevice();
- + if (device_id < 0)
- + device_id = getDevice();
- +
- if (device_id < 0)
- return false;
-
- @@ -87,9 +91,7 @@ namespace cv { namespace dnn { namespace cuda4dnn {
- CUDA4DNN_CHECK_CUDA(cudaDeviceGetAttribute(&minor, cudaDevAttrComputeCapabilityMinor, device_id));
-
- int version = major * 10 + minor;
- - if (version < 53)
- - return false;
- - return true;
- + return (version >= 53);
- }
-
- }}} /* namespace cv::dnn::cuda4dnn */
- --- a/modules/dnn/src/net_impl_backend.cpp
- +++ b/modules/dnn/src/net_impl_backend.cpp
- @@ -10,6 +10,10 @@
- #include "backend.hpp"
- #include "factory.hpp"
-
- +#ifdef HAVE_CUDA
- +#include "cuda4dnn/init.hpp"
- +#endif
- +
- namespace cv {
- namespace dnn {
- CV__DNN_INLINE_NS_BEGIN
- @@ -242,6 +246,16 @@ void Net::Impl::setPreferableTarget(int targetId)
- #endif
- }
-
- + if (IS_DNN_CUDA_TARGET(targetId))
- + {
- + preferableTarget = DNN_TARGET_CPU;
- +#ifdef HAVE_CUDA
- + if (cuda4dnn::doesDeviceSupportFP16() && targetId == DNN_TARGET_CUDA_FP16)
- + preferableTarget = DNN_TARGET_CUDA_FP16;
- + else
- + preferableTarget = DNN_TARGET_CUDA;
- +#endif
- + }
- #if !defined(__arm64__) || !__arm64__
- if (targetId == DNN_TARGET_CPU_FP16)
- {
- --- a/modules/dnn/src/registry.cpp
- +++ b/modules/dnn/src/registry.cpp
- @@ -122,10 +122,24 @@ class BackendRegistry
- #endif
-
- #ifdef HAVE_CUDA
- - if (haveCUDA())
- + cuda4dnn::checkVersions();
- +
- + bool hasCudaCompatible = false;
- + bool hasCudaFP16 = false;
- + for (int i = 0; i < cuda4dnn::getDeviceCount(); i++)
- + {
- + if (cuda4dnn::isDeviceCompatible(i))
- + {
- + hasCudaCompatible = true;
- + if (cuda4dnn::doesDeviceSupportFP16(i))
- + hasCudaFP16 = true;
- + }
- + }
- +
- + if (hasCudaCompatible)
- {
- backends.push_back(std::make_pair(DNN_BACKEND_CUDA, DNN_TARGET_CUDA));
- - if (cuda4dnn::doesDeviceSupportFP16())
- + if (hasCudaFP16)
- backends.push_back(std::make_pair(DNN_BACKEND_CUDA, DNN_TARGET_CUDA_FP16));
- }
- #endif
- --- a/modules/dnn/test/test_common.hpp
- +++ b/modules/dnn/test/test_common.hpp
- @@ -211,7 +211,7 @@ class DNNTestLayer : public TestWithParam<tuple<Backend, Target> >
- if ((!l->supportBackend(backend) || l->preferableTarget != target) && !fused)
- {
- hasFallbacks = true;
- - std::cout << "FALLBACK: Layer [" << l->type << "]:[" << l->name << "] is expected to has backend implementation" << endl;
- + std::cout << "FALLBACK: Layer [" << l->type << "]:[" << l->name << "] is expected to have backend implementation" << endl;
- }
- }
- if (hasFallbacks && raiseError)
- --- a/modules/dnn/test/test_onnx_conformance.cpp
- +++ b/modules/dnn/test/test_onnx_conformance.cpp
- @@ -1008,7 +1008,7 @@ class Test_ONNX_conformance : public TestWithParam<ONNXConfParams>
- if ((!l->supportBackend(backend) || l->preferableTarget != target) && !fused)
- {
- hasFallbacks = true;
- - std::cout << "FALLBACK: Layer [" << l->type << "]:[" << l->name << "] is expected to has backend implementation" << endl;
- + std::cout << "FALLBACK: Layer [" << l->type << "]:[" << l->name << "] is expected to have backend implementation" << endl;
- }
- }
- return hasFallbacks;
- From cc9178903daff229bc396db718bf347c4eafd33b Mon Sep 17 00:00:00 2001
- From: Alexander Smorkalov <2536374+asmorkalov@users.noreply.github.com>
- Date: Wed, 10 Jul 2024 09:06:09 +0300
- Subject: [PATCH 3/3] Update modules/dnn/src/registry.cpp
- Co-authored-by: Aliaksei Urbanski <aliaksei.urbanski@gmail.com>
- --- a/modules/dnn/src/registry.cpp
- +++ b/modules/dnn/src/registry.cpp
- @@ -132,7 +132,10 @@ class BackendRegistry
- {
- hasCudaCompatible = true;
- if (cuda4dnn::doesDeviceSupportFP16(i))
- + {
- hasCudaFP16 = true;
- + break; // we already have all we need here
- + }
- }
- }
-
|