RayTracingAccelerationStructurePass.cpp 20 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438
  1. /*
  2. * Copyright (c) Contributors to the Open 3D Engine Project.
  3. * For complete copyright and license terms please see the LICENSE at the root of this distribution.
  4. *
  5. * SPDX-License-Identifier: Apache-2.0 OR MIT
  6. *
  7. */
  8. #include <Atom/RHI/BufferFrameAttachment.h>
  9. #include <Atom/RHI/BufferScopeAttachment.h>
  10. #include <Atom/RHI/CommandList.h>
  11. #include <Atom/RHI/FrameScheduler.h>
  12. #include <Atom/RHI/RHISystemInterface.h>
  13. #include <Atom/RHI/ScopeProducerFunction.h>
  14. #include <Atom/RPI.Public/Buffer/Buffer.h>
  15. #include <Atom/RPI.Public/Buffer/BufferSystemInterface.h>
  16. #include <Atom/RPI.Public/GpuQuery/QueryPool.h>
  17. #include <Atom/RPI.Public/RenderPipeline.h>
  18. #include <Atom/RPI.Public/Scene.h>
  19. #include <Mesh/MeshFeatureProcessor.h>
  20. #include <RayTracing/RayTracingAccelerationStructurePass.h>
  21. #include <RayTracing/RayTracingFeatureProcessor.h>
  22. namespace AZ
  23. {
  24. namespace Render
  25. {
  26. RPI::Ptr<RayTracingAccelerationStructurePass> RayTracingAccelerationStructurePass::Create(const RPI::PassDescriptor& descriptor)
  27. {
  28. RPI::Ptr<RayTracingAccelerationStructurePass> rayTracingAccelerationStructurePass =
  29. aznew RayTracingAccelerationStructurePass(descriptor);
  30. return AZStd::move(rayTracingAccelerationStructurePass);
  31. }
  32. RayTracingAccelerationStructurePass::RayTracingAccelerationStructurePass(const RPI::PassDescriptor& descriptor)
  33. : Pass(descriptor)
  34. {
  35. // disable this pass if we're on a platform that doesn't support raytracing
  36. if (RHI::RHISystemInterface::Get()->GetRayTracingSupport() == RHI::MultiDevice::NoDevices)
  37. {
  38. SetEnabled(false);
  39. }
  40. }
  41. void RayTracingAccelerationStructurePass::BuildInternal()
  42. {
  43. // [GFX TODO][ATOM-18111] Ideally, this would be done on the Compute queue, but that has multiple issues (see also 18305).
  44. auto deviceIndex = Pass::GetDeviceIndex();
  45. InitScope(
  46. RHI::ScopeId(AZStd::string(GetPathName().GetCStr() + AZStd::to_string(deviceIndex))),
  47. AZ::RHI::HardwareQueueClass::Graphics,
  48. deviceIndex);
  49. }
  50. void RayTracingAccelerationStructurePass::FrameBeginInternal(FramePrepareParams params)
  51. {
  52. if (IsTimestampQueryEnabled())
  53. {
  54. m_timestampResult = AZ::RPI::TimestampResult();
  55. }
  56. if (GetScopeId().IsEmpty() || (ScopeProducer::GetDeviceIndex() != Pass::GetDeviceIndex()))
  57. {
  58. InitScope(RHI::ScopeId(GetPathName()), RHI::HardwareQueueClass::Graphics, Pass::GetDeviceIndex());
  59. }
  60. params.m_frameGraphBuilder->ImportScopeProducer(*this);
  61. RPI::Scene* scene = m_pipeline->GetScene();
  62. RayTracingFeatureProcessor* rayTracingFeatureProcessor = scene->GetFeatureProcessor<RayTracingFeatureProcessor>();
  63. if (rayTracingFeatureProcessor)
  64. {
  65. rayTracingFeatureProcessor->BeginFrame(static_cast<RHI::ScopeProducer*>(this)->GetDeviceIndex());
  66. if (RaytracingRevisionOutdated())
  67. {
  68. ReadbackScopeQueryResults();
  69. }
  70. }
  71. }
  72. bool RayTracingAccelerationStructurePass::RaytracingRevisionOutdated() const
  73. {
  74. auto rayTracingFeatureProcessor = GetScene()->GetFeatureProcessor<RayTracingFeatureProcessor>();
  75. if (rayTracingFeatureProcessor)
  76. {
  77. return rayTracingFeatureProcessor->GetRevision() != rayTracingFeatureProcessor->GetBuiltRevision(RHI::ScopeProducer::GetDeviceIndex()) || rayTracingFeatureProcessor->GetSkinnedMeshCount() != 0;
  78. }
  79. return false;
  80. }
  81. RHI::Ptr<RPI::Query> RayTracingAccelerationStructurePass::GetQuery(RPI::ScopeQueryType queryType)
  82. {
  83. auto typeIndex{ static_cast<uint32_t>(queryType) };
  84. if (!m_scopeQueries[typeIndex])
  85. {
  86. RHI::Ptr<RPI::Query> query;
  87. switch (queryType)
  88. {
  89. case RPI::ScopeQueryType::Timestamp:
  90. query = RPI::GpuQuerySystemInterface::Get()->CreateQuery(
  91. RHI::QueryType::Timestamp, RHI::QueryPoolScopeAttachmentType::Global, RHI::ScopeAttachmentAccess::Write);
  92. break;
  93. case RPI::ScopeQueryType::PipelineStatistics:
  94. query = RPI::GpuQuerySystemInterface::Get()->CreateQuery(
  95. RHI::QueryType::PipelineStatistics, RHI::QueryPoolScopeAttachmentType::Global, RHI::ScopeAttachmentAccess::Write);
  96. break;
  97. }
  98. m_scopeQueries[typeIndex] = query;
  99. }
  100. return m_scopeQueries[typeIndex];
  101. }
  102. template<typename Func>
  103. inline void RayTracingAccelerationStructurePass::ExecuteOnTimestampQuery(Func&& func)
  104. {
  105. if (IsTimestampQueryEnabled())
  106. {
  107. auto query{ GetQuery(RPI::ScopeQueryType::Timestamp) };
  108. if (query)
  109. {
  110. func(query);
  111. }
  112. }
  113. }
  114. template<typename Func>
  115. inline void RayTracingAccelerationStructurePass::ExecuteOnPipelineStatisticsQuery(Func&& func)
  116. {
  117. if (IsPipelineStatisticsQueryEnabled())
  118. {
  119. auto query{ GetQuery(RPI::ScopeQueryType::PipelineStatistics) };
  120. if (query)
  121. {
  122. func(query);
  123. }
  124. }
  125. }
  126. RPI::TimestampResult RayTracingAccelerationStructurePass::GetTimestampResultInternal() const
  127. {
  128. return m_timestampResult;
  129. }
  130. RPI::PipelineStatisticsResult RayTracingAccelerationStructurePass::GetPipelineStatisticsResultInternal() const
  131. {
  132. return m_statisticsResult;
  133. }
  134. void RayTracingAccelerationStructurePass::SetupFrameGraphDependencies(RHI::FrameGraphInterface frameGraph)
  135. {
  136. RPI::Scene* scene = m_pipeline->GetScene();
  137. RayTracingFeatureProcessor* rayTracingFeatureProcessor = scene->GetFeatureProcessor<RayTracingFeatureProcessor>();
  138. if (rayTracingFeatureProcessor)
  139. {
  140. if (RaytracingRevisionOutdated())
  141. {
  142. // create the TLAS buffers based on the descriptor
  143. RHI::Ptr<RHI::RayTracingTlas>& rayTracingTlas = rayTracingFeatureProcessor->GetTlas();
  144. // import and attach the TLAS buffer
  145. const RHI::Ptr<RHI::Buffer>& rayTracingTlasBuffer = rayTracingTlas->GetTlasBuffer();
  146. if (rayTracingTlasBuffer && rayTracingFeatureProcessor->HasGeometry())
  147. {
  148. AZ::RHI::AttachmentId tlasAttachmentId = rayTracingFeatureProcessor->GetTlasAttachmentId();
  149. if (frameGraph.GetAttachmentDatabase().IsAttachmentValid(tlasAttachmentId) == false)
  150. {
  151. [[maybe_unused]] RHI::ResultCode result =
  152. frameGraph.GetAttachmentDatabase().ImportBuffer(tlasAttachmentId, rayTracingTlasBuffer);
  153. AZ_Assert(result == RHI::ResultCode::Success, "Failed to import ray tracing TLAS buffer with error %d", result);
  154. }
  155. uint32_t tlasBufferByteCount = aznumeric_cast<uint32_t>(rayTracingTlasBuffer->GetDescriptor().m_byteCount);
  156. RHI::BufferViewDescriptor tlasBufferViewDescriptor =
  157. RHI::BufferViewDescriptor::CreateRayTracingTLAS(tlasBufferByteCount);
  158. RHI::BufferScopeAttachmentDescriptor desc;
  159. desc.m_attachmentId = tlasAttachmentId;
  160. desc.m_bufferViewDescriptor = tlasBufferViewDescriptor;
  161. desc.m_loadStoreAction.m_loadAction = AZ::RHI::AttachmentLoadAction::DontCare;
  162. frameGraph.UseShaderAttachment(
  163. desc, RHI::ScopeAttachmentAccess::Write, RHI::ScopeAttachmentStage::RayTracingShader);
  164. }
  165. }
  166. // Attach output data from the skinning pass. This is needed to ensure that this pass is executed after
  167. // the skinning pass has finished. We assume that the pipeline has a skinning pass with this output available.
  168. if (rayTracingFeatureProcessor->GetSkinnedMeshCount() > 0)
  169. {
  170. RHI::Ptr<RPI::Pass> skinningPassPtr;
  171. for (const auto& sibling : m_parent->GetChildren())
  172. {
  173. if (sibling->GetPassTemplate() && sibling->GetPassTemplate()->m_name == AZ::Name{ "SkinningPassTemplate" } &&
  174. sibling->GetDeviceIndex() == Pass::GetDeviceIndex())
  175. {
  176. skinningPassPtr = sibling;
  177. break;
  178. }
  179. }
  180. AZ_Assert(skinningPassPtr, "Failed to find SkinningPass");
  181. auto skinnedMeshOutputStreamBindingPtr = skinningPassPtr->FindAttachmentBinding(AZ::Name("SkinnedMeshOutputStream"));
  182. [[maybe_unused]] auto result = frameGraph.UseShaderAttachment(
  183. skinnedMeshOutputStreamBindingPtr->m_unifiedScopeDesc.GetAsBuffer(),
  184. RHI::ScopeAttachmentAccess::Read,
  185. RHI::ScopeAttachmentStage::RayTracingShader);
  186. AZ_Assert(
  187. result == AZ::RHI::ResultCode::Success, "Failed to attach SkinnedMeshOutputStream buffer with error %d", result);
  188. }
  189. AddScopeQueryToFrameGraph(frameGraph);
  190. }
  191. }
  192. void RayTracingAccelerationStructurePass::BuildCommandList(const RHI::FrameGraphExecuteContext& context)
  193. {
  194. RPI::Scene* scene = m_pipeline->GetScene();
  195. RayTracingFeatureProcessor* rayTracingFeatureProcessor = scene->GetFeatureProcessor<RayTracingFeatureProcessor>();
  196. if (!rayTracingFeatureProcessor)
  197. {
  198. return;
  199. }
  200. if (!rayTracingFeatureProcessor->GetTlas()->GetTlasBuffer())
  201. {
  202. return;
  203. }
  204. if (!RaytracingRevisionOutdated())
  205. {
  206. // TLAS is up to date
  207. return;
  208. }
  209. if (!rayTracingFeatureProcessor->HasGeometry())
  210. {
  211. // no ray tracing meshes in the scene
  212. return;
  213. }
  214. rayTracingFeatureProcessor->SetBuiltRevision(context.GetDeviceIndex(), rayTracingFeatureProcessor->GetRevision());
  215. BeginScopeQuery(context);
  216. AZStd::vector<const AZ::RHI::DeviceRayTracingBlas*> changedBlasList;
  217. AZStd::vector<AZStd::pair<RHI::DeviceRayTracingBlas*, RHI::DeviceRayTracingCompactionQuery*>> compactionQueries;
  218. RayTracingFeatureProcessor::BlasInstanceMap& blasInstances = rayTracingFeatureProcessor->GetBlasInstances();
  219. // Build newly added Blas instances
  220. auto& toBuildList = rayTracingFeatureProcessor->GetBlasBuildList(context.GetDeviceIndex());
  221. for (auto assetId : toBuildList)
  222. {
  223. auto it = blasInstances.find(assetId);
  224. if (it == blasInstances.end())
  225. {
  226. continue;
  227. }
  228. bool enqueuedForCompaction = false;
  229. auto& blasInstance = it->second;
  230. for (auto& submeshBlasInstance : blasInstance.m_subMeshes)
  231. {
  232. changedBlasList.push_back(submeshBlasInstance.m_blas->GetDeviceRayTracingBlas(context.GetDeviceIndex()).get());
  233. context.GetCommandList()->BuildBottomLevelAccelerationStructure(
  234. *submeshBlasInstance.m_blas->GetDeviceRayTracingBlas(context.GetDeviceIndex()));
  235. auto query = submeshBlasInstance.m_compactionSizeQuery;
  236. if (query)
  237. {
  238. auto deviceQuery = query->GetDeviceRayTracingCompactionQuery(context.GetDeviceIndex());
  239. compactionQueries.push_back(
  240. { submeshBlasInstance.m_blas->GetDeviceRayTracingBlas(context.GetDeviceIndex()).get(), deviceQuery.get() });
  241. enqueuedForCompaction = true;
  242. }
  243. else
  244. {
  245. AZ_Assert(!enqueuedForCompaction, "All or none Blas of an asset need to be compacted");
  246. }
  247. }
  248. if (enqueuedForCompaction)
  249. {
  250. rayTracingFeatureProcessor->MarkBlasInstanceForCompaction(context.GetDeviceIndex(), assetId);
  251. }
  252. {
  253. // Lock is needed because multiple RayTracingAccelerationPasses for multiple devices may be built simultaneously
  254. AZStd::lock_guard lock(rayTracingFeatureProcessor->GetBlasBuiltMutex());
  255. blasInstance.m_blasBuilt |= RHI::MultiDevice::DeviceMask(1 << context.GetDeviceIndex());
  256. }
  257. }
  258. toBuildList.clear();
  259. // Build, update, or rebuild skinned mesh Blas instances
  260. for (auto assetId : rayTracingFeatureProcessor->GetSkinnedMeshBlasList())
  261. {
  262. auto it = blasInstances.find(assetId);
  263. if (it == blasInstances.end())
  264. {
  265. continue;
  266. }
  267. auto& blasInstance = it->second;
  268. const bool buildBlas =
  269. (blasInstance.m_blasBuilt & RHI::MultiDevice::DeviceMask(1 << context.GetDeviceIndex())) == RHI::MultiDevice::NoDevices;
  270. for (auto submeshIndex = 0; submeshIndex < blasInstance.m_subMeshes.size(); ++submeshIndex)
  271. {
  272. auto& submeshBlasInstance = blasInstance.m_subMeshes[submeshIndex];
  273. // Determine if a skinned mesh BLAS needs to be updated or completely rebuilt. For now, we want to rebuild a BLAS
  274. // every SKINNED_BLAS_REBUILD_FRAME_INTERVAL frames, while updating it all other frames. This is based on the
  275. // assumption that by adding together the asset ID hash, submesh index, and frame count, we get a value that allows
  276. // us to uniformly distribute rebuilding all skinned mesh BLASs over all frames.
  277. auto assetGuid = it->first.m_guid.GetHash();
  278. if (!buildBlas && ((assetGuid + submeshIndex + m_frameCount) % SKINNED_BLAS_REBUILD_FRAME_INTERVAL != 0))
  279. {
  280. // Skinned mesh that simply needs an update
  281. context.GetCommandList()->UpdateBottomLevelAccelerationStructure(
  282. *submeshBlasInstance.m_blas->GetDeviceRayTracingBlas(context.GetDeviceIndex()));
  283. }
  284. else
  285. {
  286. // Fall back to building the BLAS in any case
  287. context.GetCommandList()->BuildBottomLevelAccelerationStructure(
  288. *submeshBlasInstance.m_blas->GetDeviceRayTracingBlas(context.GetDeviceIndex()));
  289. }
  290. changedBlasList.push_back(submeshBlasInstance.m_blas->GetDeviceRayTracingBlas(context.GetDeviceIndex()).get());
  291. }
  292. {
  293. // Lock is needed because multiple RayTracingAccelerationPasses for multiple devices may be built simultaneously
  294. AZStd::lock_guard lock(rayTracingFeatureProcessor->GetBlasBuiltMutex());
  295. blasInstance.m_blasBuilt |= RHI::MultiDevice::DeviceMask(1 << context.GetDeviceIndex());
  296. }
  297. }
  298. // Compact Blas instances
  299. auto& toCompactList = rayTracingFeatureProcessor->GetBlasCompactionList(context.GetDeviceIndex());
  300. for (auto assetId : toCompactList)
  301. {
  302. auto it = blasInstances.find(assetId);
  303. if (it == blasInstances.end())
  304. {
  305. continue;
  306. }
  307. auto& blasInstance = it->second;
  308. for (auto& submeshBlasInstance : blasInstance.m_subMeshes)
  309. {
  310. auto query = submeshBlasInstance.m_compactionSizeQuery;
  311. context.GetCommandList()->CompactBottomLevelAccelerationStructure(
  312. *submeshBlasInstance.m_blas->GetDeviceRayTracingBlas(context.GetDeviceIndex()),
  313. *submeshBlasInstance.m_compactBlas->GetDeviceRayTracingBlas(context.GetDeviceIndex()));
  314. changedBlasList.push_back(submeshBlasInstance.m_compactBlas->GetDeviceRayTracingBlas(context.GetDeviceIndex()).get());
  315. }
  316. AZStd::lock_guard lock(rayTracingFeatureProcessor->GetBlasBuiltMutex());
  317. rayTracingFeatureProcessor->MarkBlasInstanceAsCompactionEnqueued(context.GetDeviceIndex(), assetId);
  318. }
  319. toCompactList.clear();
  320. // build the TLAS object
  321. context.GetCommandList()->BuildTopLevelAccelerationStructure(
  322. *rayTracingFeatureProcessor->GetTlas()->GetDeviceRayTracingTlas(context.GetDeviceIndex()), changedBlasList);
  323. if (!compactionQueries.empty())
  324. {
  325. context.GetCommandList()->QueryBlasCompactionSizes(compactionQueries);
  326. }
  327. ++m_frameCount;
  328. EndScopeQuery(context);
  329. }
  330. void RayTracingAccelerationStructurePass::AddScopeQueryToFrameGraph(RHI::FrameGraphInterface frameGraph)
  331. {
  332. const auto addToFrameGraph = [&frameGraph](RHI::Ptr<RPI::Query> query)
  333. {
  334. query->AddToFrameGraph(frameGraph);
  335. };
  336. ExecuteOnTimestampQuery(addToFrameGraph);
  337. ExecuteOnPipelineStatisticsQuery(addToFrameGraph);
  338. }
  339. void RayTracingAccelerationStructurePass::BeginScopeQuery(const RHI::FrameGraphExecuteContext& context)
  340. {
  341. const auto beginQuery = [&context, this](RHI::Ptr<RPI::Query> query)
  342. {
  343. if (query->BeginQuery(context) == RPI::QueryResultCode::Fail)
  344. {
  345. AZ_UNUSED(this); // Prevent unused warning in release builds
  346. AZ_WarningOnce(
  347. "RayTracingAccelerationStructurePass",
  348. false,
  349. "BeginScopeQuery failed. Make sure AddScopeQueryToFrameGraph was called in SetupFrameGraphDependencies"
  350. " for this pass: %s",
  351. this->RTTI_GetTypeName());
  352. }
  353. };
  354. ExecuteOnTimestampQuery(beginQuery);
  355. ExecuteOnPipelineStatisticsQuery(beginQuery);
  356. }
  357. void RayTracingAccelerationStructurePass::EndScopeQuery(const RHI::FrameGraphExecuteContext& context)
  358. {
  359. const auto endQuery = [&context](const RHI::Ptr<RPI::Query>& query)
  360. {
  361. query->EndQuery(context);
  362. };
  363. // This scope query implementation should be replaced by the feature linked below on GitHub:
  364. // [GHI-16945] Feature Request - Add GPU timestamp and pipeline statistic support for scopes
  365. ExecuteOnTimestampQuery(endQuery);
  366. ExecuteOnPipelineStatisticsQuery(endQuery);
  367. m_lastDeviceIndex = context.GetDeviceIndex();
  368. }
  369. void RayTracingAccelerationStructurePass::ReadbackScopeQueryResults()
  370. {
  371. ExecuteOnTimestampQuery(
  372. [this](const RHI::Ptr<RPI::Query>& query)
  373. {
  374. const uint32_t TimestampResultQueryCount{ 2u };
  375. uint64_t timestampResult[TimestampResultQueryCount] = { 0 };
  376. query->GetLatestResult(&timestampResult, sizeof(uint64_t) * TimestampResultQueryCount, m_lastDeviceIndex);
  377. m_timestampResult = RPI::TimestampResult(timestampResult[0], timestampResult[1], RHI::HardwareQueueClass::Graphics);
  378. });
  379. ExecuteOnPipelineStatisticsQuery(
  380. [this](const RHI::Ptr<RPI::Query>& query)
  381. {
  382. query->GetLatestResult(&m_statisticsResult, sizeof(RPI::PipelineStatisticsResult), m_lastDeviceIndex);
  383. });
  384. }
  385. } // namespace Render
  386. } // namespace AZ