Fifo.cpp 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529
  1. // Copyright 2008 Dolphin Emulator Project
  2. // Licensed under GPLv2+
  3. // Refer to the license.txt file included.
  4. #include <atomic>
  5. #include "Common/Atomic.h"
  6. #include "Common/BlockingLoop.h"
  7. #include "Common/ChunkFile.h"
  8. #include "Common/CPUDetect.h"
  9. #include "Common/Event.h"
  10. #include "Common/FPURoundMode.h"
  11. #include "Common/MemoryUtil.h"
  12. #include "Common/Thread.h"
  13. #include "Core/ConfigManager.h"
  14. #include "Core/Core.h"
  15. #include "Core/CoreTiming.h"
  16. #include "Core/NetPlayProto.h"
  17. #include "Core/HW/Memmap.h"
  18. #include "VideoCommon/AsyncRequests.h"
  19. #include "VideoCommon/CommandProcessor.h"
  20. #include "VideoCommon/CPMemory.h"
  21. #include "VideoCommon/DataReader.h"
  22. #include "VideoCommon/Fifo.h"
  23. #include "VideoCommon/OpcodeDecoding.h"
  24. #include "VideoCommon/PixelEngine.h"
  25. #include "VideoCommon/VertexLoaderManager.h"
  26. #include "VideoCommon/VertexManagerBase.h"
  27. #include "VideoCommon/VideoConfig.h"
  28. bool g_bSkipCurrentFrame = false;
  29. static Common::BlockingLoop s_gpu_mainloop;
  30. static std::atomic<bool> s_emu_running_state;
  31. // Most of this array is unlikely to be faulted in...
  32. static u8 s_fifo_aux_data[FIFO_SIZE];
  33. static u8* s_fifo_aux_write_ptr;
  34. static u8* s_fifo_aux_read_ptr;
  35. bool g_use_deterministic_gpu_thread;
  36. // STATE_TO_SAVE
  37. static u8* s_video_buffer;
  38. static u8* s_video_buffer_read_ptr;
  39. static std::atomic<u8*> s_video_buffer_write_ptr;
  40. static std::atomic<u8*> s_video_buffer_seen_ptr;
  41. static u8* s_video_buffer_pp_read_ptr;
  42. // The read_ptr is always owned by the GPU thread. In normal mode, so is the
  43. // write_ptr, despite it being atomic. In g_use_deterministic_gpu_thread mode,
  44. // things get a bit more complicated:
  45. // - The seen_ptr is written by the GPU thread, and points to what it's already
  46. // processed as much of as possible - in the case of a partial command which
  47. // caused it to stop, not the same as the read ptr. It's written by the GPU,
  48. // under the lock, and updating the cond.
  49. // - The write_ptr is written by the CPU thread after it copies data from the
  50. // FIFO. Maybe someday it will be under the lock. For now, because RunGpuLoop
  51. // polls, it's just atomic.
  52. // - The pp_read_ptr is the CPU preprocessing version of the read_ptr.
  53. static std::atomic<int> s_sync_ticks;
  54. static Common::Event s_sync_wakeup_event;
  55. void Fifo_DoState(PointerWrap &p)
  56. {
  57. p.DoArray(s_video_buffer, FIFO_SIZE);
  58. u8* write_ptr = s_video_buffer_write_ptr;
  59. p.DoPointer(write_ptr, s_video_buffer);
  60. s_video_buffer_write_ptr = write_ptr;
  61. p.DoPointer(s_video_buffer_read_ptr, s_video_buffer);
  62. if (p.mode == PointerWrap::MODE_READ && g_use_deterministic_gpu_thread)
  63. {
  64. // We're good and paused, right?
  65. s_video_buffer_seen_ptr = s_video_buffer_pp_read_ptr = s_video_buffer_read_ptr;
  66. }
  67. p.Do(g_bSkipCurrentFrame);
  68. }
  69. void Fifo_PauseAndLock(bool doLock, bool unpauseOnUnlock)
  70. {
  71. if (doLock)
  72. {
  73. SyncGPU(SYNC_GPU_OTHER);
  74. EmulatorState(false);
  75. FlushGpu();
  76. }
  77. else
  78. {
  79. if (unpauseOnUnlock)
  80. EmulatorState(true);
  81. }
  82. }
  83. void Fifo_Init()
  84. {
  85. // Padded so that SIMD overreads in the vertex loader are safe
  86. s_video_buffer = (u8*)AllocateMemoryPages(FIFO_SIZE + 4);
  87. ResetVideoBuffer();
  88. if (SConfig::GetInstance().m_LocalCoreStartupParameter.bCPUThread)
  89. s_gpu_mainloop.Prepare();
  90. s_sync_ticks.store(0);
  91. }
  92. void Fifo_Shutdown()
  93. {
  94. if (s_gpu_mainloop.IsRunning())
  95. PanicAlert("Fifo shutting down while active");
  96. FreeMemoryPages(s_video_buffer, FIFO_SIZE + 4);
  97. s_video_buffer = nullptr;
  98. s_video_buffer_write_ptr = nullptr;
  99. s_video_buffer_pp_read_ptr = nullptr;
  100. s_video_buffer_read_ptr = nullptr;
  101. s_video_buffer_seen_ptr = nullptr;
  102. s_fifo_aux_write_ptr = nullptr;
  103. s_fifo_aux_read_ptr = nullptr;
  104. }
  105. void Fifo_SetRendering(bool enabled)
  106. {
  107. g_bSkipCurrentFrame = !enabled;
  108. }
  109. // May be executed from any thread, even the graphics thread.
  110. // Created to allow for self shutdown.
  111. void ExitGpuLoop()
  112. {
  113. // This should break the wait loop in CPU thread
  114. CommandProcessor::fifo.bFF_GPReadEnable = false;
  115. FlushGpu();
  116. // Terminate GPU thread loop
  117. s_emu_running_state.store(true);
  118. s_gpu_mainloop.Stop(false);
  119. }
  120. void EmulatorState(bool running)
  121. {
  122. s_emu_running_state.store(running);
  123. s_gpu_mainloop.Wakeup();
  124. }
  125. void SyncGPU(SyncGPUReason reason, bool may_move_read_ptr)
  126. {
  127. if (g_use_deterministic_gpu_thread)
  128. {
  129. s_gpu_mainloop.Wait();
  130. if (!s_gpu_mainloop.IsRunning())
  131. return;
  132. // Opportunistically reset FIFOs so we don't wrap around.
  133. if (may_move_read_ptr && s_fifo_aux_write_ptr != s_fifo_aux_read_ptr)
  134. PanicAlert("aux fifo not synced (%p, %p)", s_fifo_aux_write_ptr, s_fifo_aux_read_ptr);
  135. memmove(s_fifo_aux_data, s_fifo_aux_read_ptr, s_fifo_aux_write_ptr - s_fifo_aux_read_ptr);
  136. s_fifo_aux_write_ptr -= (s_fifo_aux_read_ptr - s_fifo_aux_data);
  137. s_fifo_aux_read_ptr = s_fifo_aux_data;
  138. if (may_move_read_ptr)
  139. {
  140. u8* write_ptr = s_video_buffer_write_ptr;
  141. // what's left over in the buffer
  142. size_t size = write_ptr - s_video_buffer_pp_read_ptr;
  143. memmove(s_video_buffer, s_video_buffer_pp_read_ptr, size);
  144. // This change always decreases the pointers. We write seen_ptr
  145. // after write_ptr here, and read it before in RunGpuLoop, so
  146. // 'write_ptr > seen_ptr' there cannot become spuriously true.
  147. s_video_buffer_write_ptr = write_ptr = s_video_buffer + size;
  148. s_video_buffer_pp_read_ptr = s_video_buffer;
  149. s_video_buffer_read_ptr = s_video_buffer;
  150. s_video_buffer_seen_ptr = write_ptr;
  151. }
  152. }
  153. }
  154. void PushFifoAuxBuffer(void* ptr, size_t size)
  155. {
  156. if (size > (size_t) (s_fifo_aux_data + FIFO_SIZE - s_fifo_aux_write_ptr))
  157. {
  158. SyncGPU(SYNC_GPU_AUX_SPACE, /* may_move_read_ptr */ false);
  159. if (!s_gpu_mainloop.IsRunning())
  160. {
  161. // GPU is shutting down
  162. return;
  163. }
  164. if (size > (size_t) (s_fifo_aux_data + FIFO_SIZE - s_fifo_aux_write_ptr))
  165. {
  166. // That will sync us up to the last 32 bytes, so this short region
  167. // of FIFO would have to point to a 2MB display list or something.
  168. PanicAlert("absurdly large aux buffer");
  169. return;
  170. }
  171. }
  172. memcpy(s_fifo_aux_write_ptr, ptr, size);
  173. s_fifo_aux_write_ptr += size;
  174. }
  175. void* PopFifoAuxBuffer(size_t size)
  176. {
  177. void* ret = s_fifo_aux_read_ptr;
  178. s_fifo_aux_read_ptr += size;
  179. return ret;
  180. }
  181. // Description: RunGpuLoop() sends data through this function.
  182. static void ReadDataFromFifo(u32 readPtr)
  183. {
  184. size_t len = 32;
  185. if (len > (size_t)(s_video_buffer + FIFO_SIZE - s_video_buffer_write_ptr))
  186. {
  187. size_t existing_len = s_video_buffer_write_ptr - s_video_buffer_read_ptr;
  188. if (len > (size_t)(FIFO_SIZE - existing_len))
  189. {
  190. PanicAlert("FIFO out of bounds (existing %lu + new %lu > %lu)", (unsigned long) existing_len, (unsigned long) len, (unsigned long) FIFO_SIZE);
  191. return;
  192. }
  193. memmove(s_video_buffer, s_video_buffer_read_ptr, existing_len);
  194. s_video_buffer_write_ptr = s_video_buffer + existing_len;
  195. s_video_buffer_read_ptr = s_video_buffer;
  196. }
  197. // Copy new video instructions to s_video_buffer for future use in rendering the new picture
  198. Memory::CopyFromEmu(s_video_buffer_write_ptr, readPtr, len);
  199. s_video_buffer_write_ptr += len;
  200. }
  201. // The deterministic_gpu_thread version.
  202. static void ReadDataFromFifoOnCPU(u32 readPtr)
  203. {
  204. size_t len = 32;
  205. u8 *write_ptr = s_video_buffer_write_ptr;
  206. if (len > (size_t)(s_video_buffer + FIFO_SIZE - write_ptr))
  207. {
  208. // We can't wrap around while the GPU is working on the data.
  209. // This should be very rare due to the reset in SyncGPU.
  210. SyncGPU(SYNC_GPU_WRAPAROUND);
  211. if (!s_gpu_mainloop.IsRunning())
  212. {
  213. // GPU is shutting down, so the next asserts may fail
  214. return;
  215. }
  216. if (s_video_buffer_pp_read_ptr != s_video_buffer_read_ptr)
  217. {
  218. PanicAlert("desynced read pointers");
  219. return;
  220. }
  221. write_ptr = s_video_buffer_write_ptr;
  222. size_t existing_len = write_ptr - s_video_buffer_pp_read_ptr;
  223. if (len > (size_t)(FIFO_SIZE - existing_len))
  224. {
  225. PanicAlert("FIFO out of bounds (existing %lu + new %lu > %lu)", (unsigned long) existing_len, (unsigned long) len, (unsigned long) FIFO_SIZE);
  226. return;
  227. }
  228. }
  229. Memory::CopyFromEmu(s_video_buffer_write_ptr, readPtr, len);
  230. s_video_buffer_pp_read_ptr = OpcodeDecoder_Run<true>(DataReader(s_video_buffer_pp_read_ptr, write_ptr + len), nullptr, false);
  231. // This would have to be locked if the GPU thread didn't spin.
  232. s_video_buffer_write_ptr = write_ptr + len;
  233. }
  234. void ResetVideoBuffer()
  235. {
  236. s_video_buffer_read_ptr = s_video_buffer;
  237. s_video_buffer_write_ptr = s_video_buffer;
  238. s_video_buffer_seen_ptr = s_video_buffer;
  239. s_video_buffer_pp_read_ptr = s_video_buffer;
  240. s_fifo_aux_write_ptr = s_fifo_aux_data;
  241. s_fifo_aux_read_ptr = s_fifo_aux_data;
  242. }
  243. // Description: Main FIFO update loop
  244. // Purpose: Keep the Core HW updated about the CPU-GPU distance
  245. void RunGpuLoop()
  246. {
  247. AsyncRequests::GetInstance()->SetEnable(true);
  248. AsyncRequests::GetInstance()->SetPassthrough(false);
  249. s_gpu_mainloop.Run(
  250. [] {
  251. const SCoreStartupParameter& param = SConfig::GetInstance().m_LocalCoreStartupParameter;
  252. g_video_backend->PeekMessages();
  253. // Do nothing while paused
  254. if (!s_emu_running_state.load())
  255. return;
  256. if (g_use_deterministic_gpu_thread)
  257. {
  258. AsyncRequests::GetInstance()->PullEvents();
  259. // All the fifo/CP stuff is on the CPU. We just need to run the opcode decoder.
  260. u8* seen_ptr = s_video_buffer_seen_ptr;
  261. u8* write_ptr = s_video_buffer_write_ptr;
  262. // See comment in SyncGPU
  263. if (write_ptr > seen_ptr)
  264. {
  265. s_video_buffer_read_ptr = OpcodeDecoder_Run(DataReader(s_video_buffer_read_ptr, write_ptr), nullptr, false);
  266. s_video_buffer_seen_ptr = write_ptr;
  267. }
  268. }
  269. else
  270. {
  271. SCPFifoStruct &fifo = CommandProcessor::fifo;
  272. AsyncRequests::GetInstance()->PullEvents();
  273. CommandProcessor::SetCPStatusFromGPU();
  274. // check if we are able to run this buffer
  275. while (!CommandProcessor::IsInterruptWaiting() && fifo.bFF_GPReadEnable && fifo.CPReadWriteDistance && !AtBreakpoint())
  276. {
  277. if (SConfig::GetInstance().m_LocalCoreStartupParameter.bSyncGPU && s_sync_ticks.load() < param.iSyncGpuMinDistance)
  278. break;
  279. u32 cyclesExecuted = 0;
  280. u32 readPtr = fifo.CPReadPointer;
  281. ReadDataFromFifo(readPtr);
  282. if (readPtr == fifo.CPEnd)
  283. readPtr = fifo.CPBase;
  284. else
  285. readPtr += 32;
  286. _assert_msg_(COMMANDPROCESSOR, (s32)fifo.CPReadWriteDistance - 32 >= 0 ,
  287. "Negative fifo.CPReadWriteDistance = %i in FIFO Loop !\nThat can produce instability in the game. Please report it.", fifo.CPReadWriteDistance - 32);
  288. u8* write_ptr = s_video_buffer_write_ptr;
  289. s_video_buffer_read_ptr = OpcodeDecoder_Run(DataReader(s_video_buffer_read_ptr, write_ptr), &cyclesExecuted, false);
  290. Common::AtomicStore(fifo.CPReadPointer, readPtr);
  291. Common::AtomicAdd(fifo.CPReadWriteDistance, -32);
  292. if ((write_ptr - s_video_buffer_read_ptr) == 0)
  293. Common::AtomicStore(fifo.SafeCPReadPointer, fifo.CPReadPointer);
  294. CommandProcessor::SetCPStatusFromGPU();
  295. if (SConfig::GetInstance().m_LocalCoreStartupParameter.bSyncGPU)
  296. {
  297. cyclesExecuted = (int)(cyclesExecuted / param.fSyncGpuOverclock);
  298. int old = s_sync_ticks.fetch_sub(cyclesExecuted);
  299. if (old > 0 && old - (int)cyclesExecuted <= 0)
  300. s_sync_wakeup_event.Set();
  301. }
  302. // This call is pretty important in DualCore mode and must be called in the FIFO Loop.
  303. // If we don't, s_swapRequested or s_efbAccessRequested won't be set to false
  304. // leading the CPU thread to wait in Video_BeginField or Video_AccessEFB thus slowing things down.
  305. AsyncRequests::GetInstance()->PullEvents();
  306. }
  307. // fast skip remaining GPU time if fifo is empty
  308. if (s_sync_ticks.load() > 0)
  309. {
  310. int old = s_sync_ticks.exchange(0);
  311. if (old > 0)
  312. s_sync_wakeup_event.Set();
  313. }
  314. // The fifo is empty and it's unlikely we will get any more work in the near future.
  315. // Make sure VertexManager finishes drawing any primitives it has stored in it's buffer.
  316. VertexManager::Flush();
  317. }
  318. }, 100);
  319. AsyncRequests::GetInstance()->SetEnable(false);
  320. AsyncRequests::GetInstance()->SetPassthrough(true);
  321. }
  322. void FlushGpu()
  323. {
  324. const SCoreStartupParameter& param = SConfig::GetInstance().m_LocalCoreStartupParameter;
  325. if (!param.bCPUThread || g_use_deterministic_gpu_thread)
  326. return;
  327. s_gpu_mainloop.Wait();
  328. }
  329. void GpuMaySleep()
  330. {
  331. s_gpu_mainloop.AllowSleep();
  332. }
  333. bool AtBreakpoint()
  334. {
  335. SCPFifoStruct &fifo = CommandProcessor::fifo;
  336. return fifo.bFF_BPEnable && (fifo.CPReadPointer == fifo.CPBreakpoint);
  337. }
  338. void RunGpu()
  339. {
  340. SCPFifoStruct &fifo = CommandProcessor::fifo;
  341. const SCoreStartupParameter& param = SConfig::GetInstance().m_LocalCoreStartupParameter;
  342. // execute GPU
  343. if (!param.bCPUThread || g_use_deterministic_gpu_thread)
  344. {
  345. bool reset_simd_state = false;
  346. while (fifo.bFF_GPReadEnable && fifo.CPReadWriteDistance && !AtBreakpoint() )
  347. {
  348. if (g_use_deterministic_gpu_thread)
  349. {
  350. ReadDataFromFifoOnCPU(fifo.CPReadPointer);
  351. s_gpu_mainloop.Wakeup();
  352. }
  353. else
  354. {
  355. if (!reset_simd_state)
  356. {
  357. FPURoundMode::SaveSIMDState();
  358. FPURoundMode::LoadDefaultSIMDState();
  359. reset_simd_state = true;
  360. }
  361. ReadDataFromFifo(fifo.CPReadPointer);
  362. s_video_buffer_read_ptr = OpcodeDecoder_Run(DataReader(s_video_buffer_read_ptr, s_video_buffer_write_ptr), nullptr, false);
  363. }
  364. //DEBUG_LOG(COMMANDPROCESSOR, "Fifo wraps to base");
  365. if (fifo.CPReadPointer == fifo.CPEnd)
  366. fifo.CPReadPointer = fifo.CPBase;
  367. else
  368. fifo.CPReadPointer += 32;
  369. fifo.CPReadWriteDistance -= 32;
  370. }
  371. CommandProcessor::SetCPStatusFromGPU();
  372. if (reset_simd_state)
  373. {
  374. FPURoundMode::LoadSIMDState();
  375. }
  376. }
  377. // wake up GPU thread
  378. if (param.bCPUThread)
  379. {
  380. s_gpu_mainloop.Wakeup();
  381. }
  382. }
  383. void Fifo_UpdateWantDeterminism(bool want)
  384. {
  385. // We are paused (or not running at all yet), so
  386. // it should be safe to change this.
  387. const SCoreStartupParameter& param = SConfig::GetInstance().m_LocalCoreStartupParameter;
  388. bool gpu_thread = false;
  389. switch (param.m_GPUDeterminismMode)
  390. {
  391. case GPU_DETERMINISM_AUTO:
  392. gpu_thread = want;
  393. // Hack: For now movies are an exception to this being on (but not
  394. // to wanting determinism in general). Once vertex arrays are
  395. // fixed, there should be no reason to want this off for movies by
  396. // default, so this can be removed.
  397. if (!NetPlay::IsNetPlayRunning())
  398. gpu_thread = false;
  399. break;
  400. case GPU_DETERMINISM_NONE:
  401. gpu_thread = false;
  402. break;
  403. case GPU_DETERMINISM_FAKE_COMPLETION:
  404. gpu_thread = true;
  405. break;
  406. }
  407. gpu_thread = gpu_thread && param.bCPUThread;
  408. if (g_use_deterministic_gpu_thread != gpu_thread)
  409. {
  410. g_use_deterministic_gpu_thread = gpu_thread;
  411. if (gpu_thread)
  412. {
  413. // These haven't been updated in non-deterministic mode.
  414. s_video_buffer_seen_ptr = s_video_buffer_pp_read_ptr = s_video_buffer_read_ptr;
  415. CopyPreprocessCPStateFromMain();
  416. VertexLoaderManager::MarkAllDirty();
  417. }
  418. }
  419. }
  420. int Fifo_Update(int ticks)
  421. {
  422. const SCoreStartupParameter& param = SConfig::GetInstance().m_LocalCoreStartupParameter;
  423. if (ticks == 0)
  424. {
  425. FlushGpu();
  426. return param.iSyncGpuMaxDistance;
  427. }
  428. // GPU is sleeping, so no need for synchronization
  429. if (s_gpu_mainloop.IsDone() || g_use_deterministic_gpu_thread)
  430. {
  431. if (s_sync_ticks.load() < 0)
  432. {
  433. int old = s_sync_ticks.fetch_add(ticks);
  434. if (old < param.iSyncGpuMinDistance && old + ticks >= param.iSyncGpuMinDistance)
  435. RunGpu();
  436. }
  437. return param.iSyncGpuMaxDistance;
  438. }
  439. int old = s_sync_ticks.fetch_add(ticks);
  440. if (old < param.iSyncGpuMinDistance && old + ticks >= param.iSyncGpuMinDistance)
  441. RunGpu();
  442. if (s_sync_ticks.load() >= param.iSyncGpuMaxDistance)
  443. {
  444. while (s_sync_ticks.load() > 0)
  445. {
  446. s_sync_wakeup_event.Wait();
  447. }
  448. }
  449. return param.iSyncGpuMaxDistance - s_sync_ticks.load();
  450. }