b3GpuRaycast.cpp 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391
  1. #include "b3GpuRaycast.h"
  2. #include "Bullet3Collision/NarrowPhaseCollision/shared/b3Collidable.h"
  3. #include "Bullet3Collision/NarrowPhaseCollision/shared/b3RigidBodyData.h"
  4. #include "Bullet3OpenCL/RigidBody/b3GpuNarrowPhaseInternalData.h"
  5. #include "Bullet3OpenCL/Initialize/b3OpenCLUtils.h"
  6. #include "Bullet3OpenCL/ParallelPrimitives/b3OpenCLArray.h"
  7. #include "Bullet3OpenCL/ParallelPrimitives/b3LauncherCL.h"
  8. #include "Bullet3OpenCL/ParallelPrimitives/b3FillCL.h"
  9. #include "Bullet3OpenCL/ParallelPrimitives/b3RadixSort32CL.h"
  10. #include "Bullet3OpenCL/BroadphaseCollision/b3GpuBroadphaseInterface.h"
  11. #include "Bullet3OpenCL/BroadphaseCollision/b3GpuParallelLinearBvh.h"
  12. #include "Bullet3OpenCL/Raycast/kernels/rayCastKernels.h"
  13. #define B3_RAYCAST_PATH "src/Bullet3OpenCL/Raycast/kernels/rayCastKernels.cl"
  14. struct b3GpuRaycastInternalData
  15. {
  16. cl_context m_context;
  17. cl_device_id m_device;
  18. cl_command_queue m_q;
  19. cl_kernel m_raytraceKernel;
  20. cl_kernel m_raytracePairsKernel;
  21. cl_kernel m_findRayRigidPairIndexRanges;
  22. b3GpuParallelLinearBvh* m_plbvh;
  23. b3RadixSort32CL* m_radixSorter;
  24. b3FillCL* m_fill;
  25. //1 element per ray
  26. b3OpenCLArray<b3RayInfo>* m_gpuRays;
  27. b3OpenCLArray<b3RayHit>* m_gpuHitResults;
  28. b3OpenCLArray<int>* m_firstRayRigidPairIndexPerRay;
  29. b3OpenCLArray<int>* m_numRayRigidPairsPerRay;
  30. //1 element per (ray index, rigid index) pair, where the ray intersects with the rigid's AABB
  31. b3OpenCLArray<int>* m_gpuNumRayRigidPairs;
  32. b3OpenCLArray<b3Int2>* m_gpuRayRigidPairs; //x == ray index, y == rigid index
  33. int m_test;
  34. };
  35. b3GpuRaycast::b3GpuRaycast(cl_context ctx,cl_device_id device, cl_command_queue q)
  36. {
  37. m_data = new b3GpuRaycastInternalData;
  38. m_data->m_context = ctx;
  39. m_data->m_device = device;
  40. m_data->m_q = q;
  41. m_data->m_raytraceKernel = 0;
  42. m_data->m_raytracePairsKernel = 0;
  43. m_data->m_findRayRigidPairIndexRanges = 0;
  44. m_data->m_plbvh = new b3GpuParallelLinearBvh(ctx, device, q);
  45. m_data->m_radixSorter = new b3RadixSort32CL(ctx, device, q);
  46. m_data->m_fill = new b3FillCL(ctx, device, q);
  47. m_data->m_gpuRays = new b3OpenCLArray<b3RayInfo>(ctx, q);
  48. m_data->m_gpuHitResults = new b3OpenCLArray<b3RayHit>(ctx, q);
  49. m_data->m_firstRayRigidPairIndexPerRay = new b3OpenCLArray<int>(ctx, q);
  50. m_data->m_numRayRigidPairsPerRay = new b3OpenCLArray<int>(ctx, q);
  51. m_data->m_gpuNumRayRigidPairs = new b3OpenCLArray<int>(ctx, q);
  52. m_data->m_gpuRayRigidPairs = new b3OpenCLArray<b3Int2>(ctx, q);
  53. {
  54. cl_int errNum=0;
  55. cl_program prog = b3OpenCLUtils::compileCLProgramFromString(m_data->m_context,m_data->m_device,rayCastKernelCL,&errNum,"",B3_RAYCAST_PATH);
  56. b3Assert(errNum==CL_SUCCESS);
  57. m_data->m_raytraceKernel = b3OpenCLUtils::compileCLKernelFromString(m_data->m_context, m_data->m_device,rayCastKernelCL, "rayCastKernel",&errNum,prog);
  58. b3Assert(errNum==CL_SUCCESS);
  59. m_data->m_raytracePairsKernel = b3OpenCLUtils::compileCLKernelFromString(m_data->m_context, m_data->m_device,rayCastKernelCL, "rayCastPairsKernel",&errNum,prog);
  60. b3Assert(errNum==CL_SUCCESS);
  61. m_data->m_findRayRigidPairIndexRanges = b3OpenCLUtils::compileCLKernelFromString(m_data->m_context, m_data->m_device,rayCastKernelCL, "findRayRigidPairIndexRanges",&errNum,prog);
  62. b3Assert(errNum==CL_SUCCESS);
  63. clReleaseProgram(prog);
  64. }
  65. }
  66. b3GpuRaycast::~b3GpuRaycast()
  67. {
  68. clReleaseKernel(m_data->m_raytraceKernel);
  69. clReleaseKernel(m_data->m_raytracePairsKernel);
  70. clReleaseKernel(m_data->m_findRayRigidPairIndexRanges);
  71. delete m_data->m_plbvh;
  72. delete m_data->m_radixSorter;
  73. delete m_data->m_fill;
  74. delete m_data->m_gpuRays;
  75. delete m_data->m_gpuHitResults;
  76. delete m_data->m_firstRayRigidPairIndexPerRay;
  77. delete m_data->m_numRayRigidPairsPerRay;
  78. delete m_data->m_gpuNumRayRigidPairs;
  79. delete m_data->m_gpuRayRigidPairs;
  80. delete m_data;
  81. }
  82. bool sphere_intersect(const b3Vector3& spherePos, b3Scalar radius, const b3Vector3& rayFrom, const b3Vector3& rayTo, float& hitFraction)
  83. {
  84. b3Vector3 rs = rayFrom - spherePos;
  85. b3Vector3 rayDir = rayTo-rayFrom;
  86. float A = b3Dot(rayDir,rayDir);
  87. float B = b3Dot(rs, rayDir);
  88. float C = b3Dot(rs, rs) - (radius * radius);
  89. float D = B * B - A*C;
  90. if (D > 0.0)
  91. {
  92. float t = (-B - sqrt(D))/A;
  93. if ( (t >= 0.0f) && (t < hitFraction) )
  94. {
  95. hitFraction = t;
  96. return true;
  97. }
  98. }
  99. return false;
  100. }
  101. bool rayConvex(const b3Vector3& rayFromLocal, const b3Vector3& rayToLocal, const b3ConvexPolyhedronData& poly,
  102. const b3AlignedObjectArray<b3GpuFace>& faces, float& hitFraction, b3Vector3& hitNormal)
  103. {
  104. float exitFraction = hitFraction;
  105. float enterFraction = -0.1f;
  106. b3Vector3 curHitNormal=b3MakeVector3(0,0,0);
  107. for (int i=0;i<poly.m_numFaces;i++)
  108. {
  109. const b3GpuFace& face = faces[poly.m_faceOffset+i];
  110. float fromPlaneDist = b3Dot(rayFromLocal,face.m_plane)+face.m_plane.w;
  111. float toPlaneDist = b3Dot(rayToLocal,face.m_plane)+face.m_plane.w;
  112. if (fromPlaneDist<0.f)
  113. {
  114. if (toPlaneDist >= 0.f)
  115. {
  116. float fraction = fromPlaneDist / (fromPlaneDist-toPlaneDist);
  117. if (exitFraction>fraction)
  118. {
  119. exitFraction = fraction;
  120. }
  121. }
  122. } else
  123. {
  124. if (toPlaneDist<0.f)
  125. {
  126. float fraction = fromPlaneDist / (fromPlaneDist-toPlaneDist);
  127. if (enterFraction <= fraction)
  128. {
  129. enterFraction = fraction;
  130. curHitNormal = face.m_plane;
  131. curHitNormal.w = 0.f;
  132. }
  133. } else
  134. {
  135. return false;
  136. }
  137. }
  138. if (exitFraction <= enterFraction)
  139. return false;
  140. }
  141. if (enterFraction < 0.f)
  142. return false;
  143. hitFraction = enterFraction;
  144. hitNormal = curHitNormal;
  145. return true;
  146. }
  147. void b3GpuRaycast::castRaysHost(const b3AlignedObjectArray<b3RayInfo>& rays, b3AlignedObjectArray<b3RayHit>& hitResults,
  148. int numBodies,const struct b3RigidBodyData* bodies, int numCollidables,const struct b3Collidable* collidables, const struct b3GpuNarrowPhaseInternalData* narrowphaseData)
  149. {
  150. // return castRays(rays,hitResults,numBodies,bodies,numCollidables,collidables);
  151. B3_PROFILE("castRaysHost");
  152. for (int r=0;r<rays.size();r++)
  153. {
  154. b3Vector3 rayFrom = rays[r].m_from;
  155. b3Vector3 rayTo = rays[r].m_to;
  156. float hitFraction = hitResults[r].m_hitFraction;
  157. int hitBodyIndex= -1;
  158. b3Vector3 hitNormal;
  159. for (int b=0;b<numBodies;b++)
  160. {
  161. const b3Vector3& pos = bodies[b].m_pos;
  162. //const b3Quaternion& orn = bodies[b].m_quat;
  163. switch (collidables[bodies[b].m_collidableIdx].m_shapeType)
  164. {
  165. case SHAPE_SPHERE:
  166. {
  167. b3Scalar radius = collidables[bodies[b].m_collidableIdx].m_radius;
  168. if (sphere_intersect(pos, radius, rayFrom, rayTo,hitFraction))
  169. {
  170. hitBodyIndex = b;
  171. b3Vector3 hitPoint;
  172. hitPoint.setInterpolate3(rays[r].m_from, rays[r].m_to,hitFraction);
  173. hitNormal = (hitPoint-bodies[b].m_pos).normalize();
  174. }
  175. }
  176. case SHAPE_CONVEX_HULL:
  177. {
  178. b3Transform convexWorldTransform;
  179. convexWorldTransform.setIdentity();
  180. convexWorldTransform.setOrigin(bodies[b].m_pos);
  181. convexWorldTransform.setRotation(bodies[b].m_quat);
  182. b3Transform convexWorld2Local = convexWorldTransform.inverse();
  183. b3Vector3 rayFromLocal = convexWorld2Local(rayFrom);
  184. b3Vector3 rayToLocal = convexWorld2Local(rayTo);
  185. int shapeIndex = collidables[bodies[b].m_collidableIdx].m_shapeIndex;
  186. const b3ConvexPolyhedronData& poly = narrowphaseData->m_convexPolyhedra[shapeIndex];
  187. if (rayConvex(rayFromLocal, rayToLocal,poly,narrowphaseData->m_convexFaces, hitFraction, hitNormal))
  188. {
  189. hitBodyIndex = b;
  190. }
  191. break;
  192. }
  193. default:
  194. {
  195. static bool once=true;
  196. if (once)
  197. {
  198. once=false;
  199. b3Warning("Raytest: unsupported shape type\n");
  200. }
  201. }
  202. }
  203. }
  204. if (hitBodyIndex>=0)
  205. {
  206. hitResults[r].m_hitFraction = hitFraction;
  207. hitResults[r].m_hitPoint.setInterpolate3(rays[r].m_from, rays[r].m_to,hitFraction);
  208. hitResults[r].m_hitNormal = hitNormal;
  209. hitResults[r].m_hitBody = hitBodyIndex;
  210. }
  211. }
  212. }
  213. ///todo: add some acceleration structure (AABBs, tree etc)
  214. void b3GpuRaycast::castRays(const b3AlignedObjectArray<b3RayInfo>& rays, b3AlignedObjectArray<b3RayHit>& hitResults,
  215. int numBodies,const struct b3RigidBodyData* bodies, int numCollidables, const struct b3Collidable* collidables,
  216. const struct b3GpuNarrowPhaseInternalData* narrowphaseData, class b3GpuBroadphaseInterface* broadphase)
  217. {
  218. //castRaysHost(rays,hitResults,numBodies,bodies,numCollidables,collidables,narrowphaseData);
  219. B3_PROFILE("castRaysGPU");
  220. {
  221. B3_PROFILE("raycast copyFromHost");
  222. m_data->m_gpuRays->copyFromHost(rays);
  223. m_data->m_gpuHitResults->copyFromHost(hitResults);
  224. }
  225. int numRays = hitResults.size();
  226. {
  227. m_data->m_firstRayRigidPairIndexPerRay->resize(numRays);
  228. m_data->m_numRayRigidPairsPerRay->resize(numRays);
  229. m_data->m_gpuNumRayRigidPairs->resize(1);
  230. m_data->m_gpuRayRigidPairs->resize(numRays * 16);
  231. }
  232. //run kernel
  233. const bool USE_BRUTE_FORCE_RAYCAST = false;
  234. if(USE_BRUTE_FORCE_RAYCAST)
  235. {
  236. B3_PROFILE("raycast launch1D");
  237. b3LauncherCL launcher(m_data->m_q,m_data->m_raytraceKernel,"m_raytraceKernel");
  238. int numRays = rays.size();
  239. launcher.setConst(numRays);
  240. launcher.setBuffer(m_data->m_gpuRays->getBufferCL());
  241. launcher.setBuffer(m_data->m_gpuHitResults->getBufferCL());
  242. launcher.setConst(numBodies);
  243. launcher.setBuffer(narrowphaseData->m_bodyBufferGPU->getBufferCL());
  244. launcher.setBuffer(narrowphaseData->m_collidablesGPU->getBufferCL());
  245. launcher.setBuffer(narrowphaseData->m_convexFacesGPU->getBufferCL());
  246. launcher.setBuffer(narrowphaseData->m_convexPolyhedraGPU->getBufferCL());
  247. launcher.launch1D(numRays);
  248. clFinish(m_data->m_q);
  249. }
  250. else
  251. {
  252. m_data->m_plbvh->build( broadphase->getAllAabbsGPU(), broadphase->getSmallAabbIndicesGPU(), broadphase->getLargeAabbIndicesGPU() );
  253. m_data->m_plbvh->testRaysAgainstBvhAabbs(*m_data->m_gpuRays, *m_data->m_gpuNumRayRigidPairs, *m_data->m_gpuRayRigidPairs);
  254. int numRayRigidPairs = -1;
  255. m_data->m_gpuNumRayRigidPairs->copyToHostPointer(&numRayRigidPairs, 1);
  256. if( numRayRigidPairs > m_data->m_gpuRayRigidPairs->size() )
  257. {
  258. numRayRigidPairs = m_data->m_gpuRayRigidPairs->size();
  259. m_data->m_gpuNumRayRigidPairs->copyFromHostPointer(&numRayRigidPairs, 1);
  260. }
  261. m_data->m_gpuRayRigidPairs->resize(numRayRigidPairs); //Radix sort needs b3OpenCLArray::size() to be correct
  262. //Sort ray-rigid pairs by ray index
  263. {
  264. B3_PROFILE("sort ray-rigid pairs");
  265. m_data->m_radixSorter->execute( *reinterpret_cast< b3OpenCLArray<b3SortData>* >(m_data->m_gpuRayRigidPairs) );
  266. }
  267. //detect start,count of each ray pair
  268. {
  269. B3_PROFILE("detect ray-rigid pair index ranges");
  270. {
  271. B3_PROFILE("reset ray-rigid pair index ranges");
  272. m_data->m_fill->execute(*m_data->m_firstRayRigidPairIndexPerRay, numRayRigidPairs, numRays); //atomic_min used to find first index
  273. m_data->m_fill->execute(*m_data->m_numRayRigidPairsPerRay, 0, numRays);
  274. clFinish(m_data->m_q);
  275. }
  276. b3BufferInfoCL bufferInfo[] =
  277. {
  278. b3BufferInfoCL( m_data->m_gpuRayRigidPairs->getBufferCL() ),
  279. b3BufferInfoCL( m_data->m_firstRayRigidPairIndexPerRay->getBufferCL() ),
  280. b3BufferInfoCL( m_data->m_numRayRigidPairsPerRay->getBufferCL() )
  281. };
  282. b3LauncherCL launcher(m_data->m_q, m_data->m_findRayRigidPairIndexRanges, "m_findRayRigidPairIndexRanges");
  283. launcher.setBuffers( bufferInfo, sizeof(bufferInfo)/sizeof(b3BufferInfoCL) );
  284. launcher.setConst(numRayRigidPairs);
  285. launcher.launch1D(numRayRigidPairs);
  286. clFinish(m_data->m_q);
  287. }
  288. {
  289. B3_PROFILE("ray-rigid intersection");
  290. b3BufferInfoCL bufferInfo[] =
  291. {
  292. b3BufferInfoCL( m_data->m_gpuRays->getBufferCL() ),
  293. b3BufferInfoCL( m_data->m_gpuHitResults->getBufferCL() ),
  294. b3BufferInfoCL( m_data->m_firstRayRigidPairIndexPerRay->getBufferCL() ),
  295. b3BufferInfoCL( m_data->m_numRayRigidPairsPerRay->getBufferCL() ),
  296. b3BufferInfoCL( narrowphaseData->m_bodyBufferGPU->getBufferCL() ),
  297. b3BufferInfoCL( narrowphaseData->m_collidablesGPU->getBufferCL() ),
  298. b3BufferInfoCL( narrowphaseData->m_convexFacesGPU->getBufferCL() ),
  299. b3BufferInfoCL( narrowphaseData->m_convexPolyhedraGPU->getBufferCL() ),
  300. b3BufferInfoCL( m_data->m_gpuRayRigidPairs->getBufferCL() )
  301. };
  302. b3LauncherCL launcher(m_data->m_q, m_data->m_raytracePairsKernel, "m_raytracePairsKernel");
  303. launcher.setBuffers( bufferInfo, sizeof(bufferInfo)/sizeof(b3BufferInfoCL) );
  304. launcher.setConst(numRays);
  305. launcher.launch1D(numRays);
  306. clFinish(m_data->m_q);
  307. }
  308. }
  309. //copy results
  310. {
  311. B3_PROFILE("raycast copyToHost");
  312. m_data->m_gpuHitResults->copyToHost(hitResults);
  313. }
  314. }