RayTracingAccelerationStructurePass.cpp 20 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429
  1. /*
  2. * Copyright (c) Contributors to the Open 3D Engine Project.
  3. * For complete copyright and license terms please see the LICENSE at the root of this distribution.
  4. *
  5. * SPDX-License-Identifier: Apache-2.0 OR MIT
  6. *
  7. */
  8. #include <Atom/RHI/BufferFrameAttachment.h>
  9. #include <Atom/RHI/BufferScopeAttachment.h>
  10. #include <Atom/RHI/CommandList.h>
  11. #include <Atom/RHI/FrameScheduler.h>
  12. #include <Atom/RHI/RHISystemInterface.h>
  13. #include <Atom/RHI/ScopeProducerFunction.h>
  14. #include <Atom/RPI.Public/Buffer/Buffer.h>
  15. #include <Atom/RPI.Public/Buffer/BufferSystemInterface.h>
  16. #include <Atom/RPI.Public/GpuQuery/QueryPool.h>
  17. #include <Atom/RPI.Public/RenderPipeline.h>
  18. #include <Atom/RPI.Public/Scene.h>
  19. #include <Mesh/MeshFeatureProcessor.h>
  20. #include <RayTracing/RayTracingAccelerationStructurePass.h>
  21. #include <RayTracing/RayTracingFeatureProcessor.h>
  22. namespace AZ
  23. {
  24. namespace Render
  25. {
  26. RPI::Ptr<RayTracingAccelerationStructurePass> RayTracingAccelerationStructurePass::Create(const RPI::PassDescriptor& descriptor)
  27. {
  28. RPI::Ptr<RayTracingAccelerationStructurePass> rayTracingAccelerationStructurePass =
  29. aznew RayTracingAccelerationStructurePass(descriptor);
  30. return AZStd::move(rayTracingAccelerationStructurePass);
  31. }
  32. RayTracingAccelerationStructurePass::RayTracingAccelerationStructurePass(const RPI::PassDescriptor& descriptor)
  33. : Pass(descriptor)
  34. {
  35. // disable this pass if we're on a platform that doesn't support raytracing
  36. if (RHI::RHISystemInterface::Get()->GetRayTracingSupport() == RHI::MultiDevice::NoDevices)
  37. {
  38. SetEnabled(false);
  39. }
  40. }
  41. void RayTracingAccelerationStructurePass::BuildInternal()
  42. {
  43. // [GFX TODO][ATOM-18111] Ideally, this would be done on the Compute queue, but that has multiple issues (see also 18305).
  44. auto deviceIndex = Pass::GetDeviceIndex();
  45. InitScope(
  46. RHI::ScopeId(AZStd::string(GetPathName().GetCStr() + AZStd::to_string(deviceIndex))),
  47. AZ::RHI::HardwareQueueClass::Graphics,
  48. deviceIndex);
  49. }
  50. void RayTracingAccelerationStructurePass::FrameBeginInternal(FramePrepareParams params)
  51. {
  52. if (IsTimestampQueryEnabled())
  53. {
  54. m_timestampResult = AZ::RPI::TimestampResult();
  55. }
  56. if (GetScopeId().IsEmpty())
  57. {
  58. InitScope(RHI::ScopeId(GetPathName()), RHI::HardwareQueueClass::Graphics, Pass::GetDeviceIndex());
  59. }
  60. params.m_frameGraphBuilder->ImportScopeProducer(*this);
  61. RPI::Scene* scene = m_pipeline->GetScene();
  62. RayTracingFeatureProcessor* rayTracingFeatureProcessor = scene->GetFeatureProcessor<RayTracingFeatureProcessor>();
  63. if (rayTracingFeatureProcessor)
  64. {
  65. rayTracingFeatureProcessor->BeginFrame();
  66. auto revision = rayTracingFeatureProcessor->GetRevision();
  67. m_rayTracingRevisionOutDated = revision != m_rayTracingRevision;
  68. m_rayTracingRevision = revision;
  69. if (m_rayTracingRevisionOutDated || rayTracingFeatureProcessor->GetSkinnedMeshCount() != 0)
  70. {
  71. ReadbackScopeQueryResults();
  72. }
  73. }
  74. }
  75. RHI::Ptr<RPI::Query> RayTracingAccelerationStructurePass::GetQuery(RPI::ScopeQueryType queryType)
  76. {
  77. auto typeIndex{ static_cast<uint32_t>(queryType) };
  78. if (!m_scopeQueries[typeIndex])
  79. {
  80. RHI::Ptr<RPI::Query> query;
  81. switch (queryType)
  82. {
  83. case RPI::ScopeQueryType::Timestamp:
  84. query = RPI::GpuQuerySystemInterface::Get()->CreateQuery(
  85. RHI::QueryType::Timestamp, RHI::QueryPoolScopeAttachmentType::Global, RHI::ScopeAttachmentAccess::Write);
  86. break;
  87. case RPI::ScopeQueryType::PipelineStatistics:
  88. query = RPI::GpuQuerySystemInterface::Get()->CreateQuery(
  89. RHI::QueryType::PipelineStatistics, RHI::QueryPoolScopeAttachmentType::Global, RHI::ScopeAttachmentAccess::Write);
  90. break;
  91. }
  92. m_scopeQueries[typeIndex] = query;
  93. }
  94. return m_scopeQueries[typeIndex];
  95. }
  96. template<typename Func>
  97. inline void RayTracingAccelerationStructurePass::ExecuteOnTimestampQuery(Func&& func)
  98. {
  99. if (IsTimestampQueryEnabled())
  100. {
  101. auto query{ GetQuery(RPI::ScopeQueryType::Timestamp) };
  102. if (query)
  103. {
  104. func(query);
  105. }
  106. }
  107. }
  108. template<typename Func>
  109. inline void RayTracingAccelerationStructurePass::ExecuteOnPipelineStatisticsQuery(Func&& func)
  110. {
  111. if (IsPipelineStatisticsQueryEnabled())
  112. {
  113. auto query{ GetQuery(RPI::ScopeQueryType::PipelineStatistics) };
  114. if (query)
  115. {
  116. func(query);
  117. }
  118. }
  119. }
  120. RPI::TimestampResult RayTracingAccelerationStructurePass::GetTimestampResultInternal() const
  121. {
  122. return m_timestampResult;
  123. }
  124. RPI::PipelineStatisticsResult RayTracingAccelerationStructurePass::GetPipelineStatisticsResultInternal() const
  125. {
  126. return m_statisticsResult;
  127. }
  128. void RayTracingAccelerationStructurePass::SetupFrameGraphDependencies(RHI::FrameGraphInterface frameGraph)
  129. {
  130. RPI::Scene* scene = m_pipeline->GetScene();
  131. RayTracingFeatureProcessor* rayTracingFeatureProcessor = scene->GetFeatureProcessor<RayTracingFeatureProcessor>();
  132. if (rayTracingFeatureProcessor)
  133. {
  134. if (m_rayTracingRevisionOutDated)
  135. {
  136. // create the TLAS buffers based on the descriptor
  137. RHI::Ptr<RHI::RayTracingTlas>& rayTracingTlas = rayTracingFeatureProcessor->GetTlas();
  138. // import and attach the TLAS buffer
  139. const RHI::Ptr<RHI::Buffer>& rayTracingTlasBuffer = rayTracingTlas->GetTlasBuffer();
  140. if (rayTracingTlasBuffer && rayTracingFeatureProcessor->HasGeometry())
  141. {
  142. AZ::RHI::AttachmentId tlasAttachmentId = rayTracingFeatureProcessor->GetTlasAttachmentId();
  143. if (frameGraph.GetAttachmentDatabase().IsAttachmentValid(tlasAttachmentId) == false)
  144. {
  145. [[maybe_unused]] RHI::ResultCode result =
  146. frameGraph.GetAttachmentDatabase().ImportBuffer(tlasAttachmentId, rayTracingTlasBuffer);
  147. AZ_Assert(result == RHI::ResultCode::Success, "Failed to import ray tracing TLAS buffer with error %d", result);
  148. }
  149. uint32_t tlasBufferByteCount = aznumeric_cast<uint32_t>(rayTracingTlasBuffer->GetDescriptor().m_byteCount);
  150. RHI::BufferViewDescriptor tlasBufferViewDescriptor =
  151. RHI::BufferViewDescriptor::CreateRayTracingTLAS(tlasBufferByteCount);
  152. RHI::BufferScopeAttachmentDescriptor desc;
  153. desc.m_attachmentId = tlasAttachmentId;
  154. desc.m_bufferViewDescriptor = tlasBufferViewDescriptor;
  155. desc.m_loadStoreAction.m_loadAction = AZ::RHI::AttachmentLoadAction::DontCare;
  156. frameGraph.UseShaderAttachment(
  157. desc, RHI::ScopeAttachmentAccess::Write, RHI::ScopeAttachmentStage::RayTracingShader);
  158. }
  159. }
  160. // Attach output data from the skinning pass. This is needed to ensure that this pass is executed after
  161. // the skinning pass has finished. We assume that the pipeline has a skinning pass with this output available.
  162. if (rayTracingFeatureProcessor->GetSkinnedMeshCount() > 0)
  163. {
  164. RHI::Ptr<RPI::Pass> skinningPassPtr;
  165. for (const auto& sibling : m_parent->GetChildren())
  166. {
  167. if (sibling->GetPassTemplate() && sibling->GetPassTemplate()->m_name == AZ::Name{ "SkinningPassTemplate" } &&
  168. sibling->GetDeviceIndex() == Pass::GetDeviceIndex())
  169. {
  170. skinningPassPtr = sibling;
  171. break;
  172. }
  173. }
  174. AZ_Assert(skinningPassPtr, "Failed to find SkinningPass");
  175. auto skinnedMeshOutputStreamBindingPtr = skinningPassPtr->FindAttachmentBinding(AZ::Name("SkinnedMeshOutputStream"));
  176. [[maybe_unused]] auto result = frameGraph.UseShaderAttachment(
  177. skinnedMeshOutputStreamBindingPtr->m_unifiedScopeDesc.GetAsBuffer(),
  178. RHI::ScopeAttachmentAccess::Read,
  179. RHI::ScopeAttachmentStage::RayTracingShader);
  180. AZ_Assert(
  181. result == AZ::RHI::ResultCode::Success, "Failed to attach SkinnedMeshOutputStream buffer with error %d", result);
  182. }
  183. AddScopeQueryToFrameGraph(frameGraph);
  184. }
  185. }
  186. void RayTracingAccelerationStructurePass::BuildCommandList(const RHI::FrameGraphExecuteContext& context)
  187. {
  188. RPI::Scene* scene = m_pipeline->GetScene();
  189. RayTracingFeatureProcessor* rayTracingFeatureProcessor = scene->GetFeatureProcessor<RayTracingFeatureProcessor>();
  190. if (!rayTracingFeatureProcessor)
  191. {
  192. return;
  193. }
  194. if (!rayTracingFeatureProcessor->GetTlas()->GetTlasBuffer())
  195. {
  196. return;
  197. }
  198. if (!m_rayTracingRevisionOutDated && rayTracingFeatureProcessor->GetSkinnedMeshCount() == 0)
  199. {
  200. // TLAS is up to date
  201. return;
  202. }
  203. if (!rayTracingFeatureProcessor->HasGeometry())
  204. {
  205. // no ray tracing meshes in the scene
  206. return;
  207. }
  208. BeginScopeQuery(context);
  209. AZStd::vector<const AZ::RHI::DeviceRayTracingBlas*> changedBlasList;
  210. AZStd::vector<AZStd::pair<RHI::DeviceRayTracingBlas*, RHI::DeviceRayTracingCompactionQuery*>> compactionQueries;
  211. RayTracingFeatureProcessor::BlasInstanceMap& blasInstances = rayTracingFeatureProcessor->GetBlasInstances();
  212. // Build newly added Blas instances
  213. auto& toBuildList = rayTracingFeatureProcessor->GetBlasBuildList(context.GetDeviceIndex());
  214. for (auto assetId : toBuildList)
  215. {
  216. auto it = blasInstances.find(assetId);
  217. if (it == blasInstances.end())
  218. {
  219. continue;
  220. }
  221. bool enqueuedForCompaction = false;
  222. auto& blasInstance = it->second;
  223. for (auto& submeshBlasInstance : blasInstance.m_subMeshes)
  224. {
  225. changedBlasList.push_back(submeshBlasInstance.m_blas->GetDeviceRayTracingBlas(context.GetDeviceIndex()).get());
  226. context.GetCommandList()->BuildBottomLevelAccelerationStructure(
  227. *submeshBlasInstance.m_blas->GetDeviceRayTracingBlas(context.GetDeviceIndex()));
  228. auto query = submeshBlasInstance.m_compactionSizeQuery;
  229. if (query)
  230. {
  231. auto deviceQuery = query->GetDeviceRayTracingCompactionQuery(context.GetDeviceIndex());
  232. compactionQueries.push_back(
  233. { submeshBlasInstance.m_blas->GetDeviceRayTracingBlas(context.GetDeviceIndex()).get(), deviceQuery.get() });
  234. enqueuedForCompaction = true;
  235. }
  236. else
  237. {
  238. AZ_Assert(!enqueuedForCompaction, "All or none Blas of an asset need to be compacted");
  239. }
  240. }
  241. if (enqueuedForCompaction)
  242. {
  243. rayTracingFeatureProcessor->MarkBlasInstanceForCompaction(context.GetDeviceIndex(), assetId);
  244. }
  245. {
  246. // Lock is needed because multiple RayTracingAccelerationPasses for multiple devices may be built simultaneously
  247. AZStd::lock_guard lock(rayTracingFeatureProcessor->GetBlasBuiltMutex());
  248. blasInstance.m_blasBuilt |= RHI::MultiDevice::DeviceMask(1 << context.GetDeviceIndex());
  249. }
  250. }
  251. toBuildList.clear();
  252. // Build, update, or rebuild skinned mesh Blas instances
  253. for (auto assetId : rayTracingFeatureProcessor->GetSkinnedMeshBlasList())
  254. {
  255. auto it = blasInstances.find(assetId);
  256. if (it == blasInstances.end())
  257. {
  258. continue;
  259. }
  260. auto& blasInstance = it->second;
  261. const bool buildBlas =
  262. (blasInstance.m_blasBuilt & RHI::MultiDevice::DeviceMask(1 << context.GetDeviceIndex())) == RHI::MultiDevice::NoDevices;
  263. for (auto submeshIndex = 0; submeshIndex < blasInstance.m_subMeshes.size(); ++submeshIndex)
  264. {
  265. auto& submeshBlasInstance = blasInstance.m_subMeshes[submeshIndex];
  266. // Determine if a skinned mesh BLAS needs to be updated or completely rebuilt. For now, we want to rebuild a BLAS
  267. // every SKINNED_BLAS_REBUILD_FRAME_INTERVAL frames, while updating it all other frames. This is based on the
  268. // assumption that by adding together the asset ID hash, submesh index, and frame count, we get a value that allows
  269. // us to uniformly distribute rebuilding all skinned mesh BLASs over all frames.
  270. auto assetGuid = it->first.m_guid.GetHash();
  271. if (!buildBlas && ((assetGuid + submeshIndex + m_frameCount) % SKINNED_BLAS_REBUILD_FRAME_INTERVAL != 0))
  272. {
  273. // Skinned mesh that simply needs an update
  274. context.GetCommandList()->UpdateBottomLevelAccelerationStructure(
  275. *submeshBlasInstance.m_blas->GetDeviceRayTracingBlas(context.GetDeviceIndex()));
  276. }
  277. else
  278. {
  279. // Fall back to building the BLAS in any case
  280. context.GetCommandList()->BuildBottomLevelAccelerationStructure(
  281. *submeshBlasInstance.m_blas->GetDeviceRayTracingBlas(context.GetDeviceIndex()));
  282. }
  283. changedBlasList.push_back(submeshBlasInstance.m_blas->GetDeviceRayTracingBlas(context.GetDeviceIndex()).get());
  284. }
  285. {
  286. // Lock is needed because multiple RayTracingAccelerationPasses for multiple devices may be built simultaneously
  287. AZStd::lock_guard lock(rayTracingFeatureProcessor->GetBlasBuiltMutex());
  288. blasInstance.m_blasBuilt |= RHI::MultiDevice::DeviceMask(1 << context.GetDeviceIndex());
  289. }
  290. }
  291. // Compact Blas instances
  292. auto& toCompactList = rayTracingFeatureProcessor->GetBlasCompactionList(context.GetDeviceIndex());
  293. for (auto assetId : toCompactList)
  294. {
  295. auto it = blasInstances.find(assetId);
  296. if (it == blasInstances.end())
  297. {
  298. continue;
  299. }
  300. auto& blasInstance = it->second;
  301. for (auto& submeshBlasInstance : blasInstance.m_subMeshes)
  302. {
  303. auto query = submeshBlasInstance.m_compactionSizeQuery;
  304. context.GetCommandList()->CompactBottomLevelAccelerationStructure(
  305. *submeshBlasInstance.m_blas->GetDeviceRayTracingBlas(context.GetDeviceIndex()),
  306. *submeshBlasInstance.m_compactBlas->GetDeviceRayTracingBlas(context.GetDeviceIndex()));
  307. changedBlasList.push_back(submeshBlasInstance.m_compactBlas->GetDeviceRayTracingBlas(context.GetDeviceIndex()).get());
  308. }
  309. AZStd::lock_guard lock(rayTracingFeatureProcessor->GetBlasBuiltMutex());
  310. rayTracingFeatureProcessor->MarkBlasInstanceAsCompactionEnqueued(context.GetDeviceIndex(), assetId);
  311. }
  312. toCompactList.clear();
  313. // build the TLAS object
  314. context.GetCommandList()->BuildTopLevelAccelerationStructure(
  315. *rayTracingFeatureProcessor->GetTlas()->GetDeviceRayTracingTlas(context.GetDeviceIndex()), changedBlasList);
  316. if (!compactionQueries.empty())
  317. {
  318. context.GetCommandList()->QueryBlasCompactionSizes(compactionQueries);
  319. }
  320. ++m_frameCount;
  321. EndScopeQuery(context);
  322. }
  323. void RayTracingAccelerationStructurePass::AddScopeQueryToFrameGraph(RHI::FrameGraphInterface frameGraph)
  324. {
  325. const auto addToFrameGraph = [&frameGraph](RHI::Ptr<RPI::Query> query)
  326. {
  327. query->AddToFrameGraph(frameGraph);
  328. };
  329. ExecuteOnTimestampQuery(addToFrameGraph);
  330. ExecuteOnPipelineStatisticsQuery(addToFrameGraph);
  331. }
  332. void RayTracingAccelerationStructurePass::BeginScopeQuery(const RHI::FrameGraphExecuteContext& context)
  333. {
  334. const auto beginQuery = [&context, this](RHI::Ptr<RPI::Query> query)
  335. {
  336. if (query->BeginQuery(context) == RPI::QueryResultCode::Fail)
  337. {
  338. AZ_UNUSED(this); // Prevent unused warning in release builds
  339. AZ_WarningOnce(
  340. "RayTracingAccelerationStructurePass",
  341. false,
  342. "BeginScopeQuery failed. Make sure AddScopeQueryToFrameGraph was called in SetupFrameGraphDependencies"
  343. " for this pass: %s",
  344. this->RTTI_GetTypeName());
  345. }
  346. };
  347. ExecuteOnTimestampQuery(beginQuery);
  348. ExecuteOnPipelineStatisticsQuery(beginQuery);
  349. }
  350. void RayTracingAccelerationStructurePass::EndScopeQuery(const RHI::FrameGraphExecuteContext& context)
  351. {
  352. const auto endQuery = [&context](const RHI::Ptr<RPI::Query>& query)
  353. {
  354. query->EndQuery(context);
  355. };
  356. // This scope query implementation should be replaced by the feature linked below on GitHub:
  357. // [GHI-16945] Feature Request - Add GPU timestamp and pipeline statistic support for scopes
  358. ExecuteOnTimestampQuery(endQuery);
  359. ExecuteOnPipelineStatisticsQuery(endQuery);
  360. m_lastDeviceIndex = context.GetDeviceIndex();
  361. }
  362. void RayTracingAccelerationStructurePass::ReadbackScopeQueryResults()
  363. {
  364. ExecuteOnTimestampQuery(
  365. [this](const RHI::Ptr<RPI::Query>& query)
  366. {
  367. const uint32_t TimestampResultQueryCount{ 2u };
  368. uint64_t timestampResult[TimestampResultQueryCount] = { 0 };
  369. query->GetLatestResult(&timestampResult, sizeof(uint64_t) * TimestampResultQueryCount, m_lastDeviceIndex);
  370. m_timestampResult = RPI::TimestampResult(timestampResult[0], timestampResult[1], RHI::HardwareQueueClass::Graphics);
  371. });
  372. ExecuteOnPipelineStatisticsQuery(
  373. [this](const RHI::Ptr<RPI::Query>& query)
  374. {
  375. query->GetLatestResult(&m_statisticsResult, sizeof(RPI::PipelineStatisticsResult), m_lastDeviceIndex);
  376. });
  377. }
  378. } // namespace Render
  379. } // namespace AZ