test-vram-timing.code.asm 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623
  1. ; Second battery of tests: Test the timings for VRAM access, relative to the INT
  2. ; Test times
  3. TestVRAMTiming proc
  4. di
  5. ; As a first precaution, we're filling all VRAM with a known pattern
  6. ld de,66EEh
  7. call FillVRAM
  8. ; Sanity check: verify VRAM contents
  9. xor a
  10. out (99h),a ; A0-A7 set to 0
  11. ld a,30h
  12. out (99h),a ; A8-A13 = 30h (3000h), read mode
  13. ld bc,0FF10h ; 4095 bytes
  14. ld de,66EEh xor 0EE00h
  15. _VRAMverify: in a,(98h)
  16. cp e
  17. jp nz,_VerifyError
  18. xor d
  19. ld e,a
  20. djnz _VRAMverify
  21. dec c
  22. jp nz,_VRAMverify
  23. ; Find all cycles after the vertical interrupt for which a 12T separation
  24. ; between writes is not sufficient.
  25. ;
  26. ; Method: Perform two consecutive writes, the second 12T away from the first,
  27. ; then leave enough time for processing. Later, compare the expectations with
  28. ; the actual values present in VRAM.
  29. ;
  30. ; Do this at every possible phase of the total loop length with respect to the
  31. ; vertical interrupt, storing the results in a bit array.
  32. ; We could check all 71364 (or whatever) cycles, one per frame, but
  33. ; that would take about 20 minutes. Instead, we parallelize it and
  34. ; check multiple cycles in the same frame. Later we read back the
  35. ; written bytes to find out which writes failed and how. Then we
  36. ; shift the phase to test the next batch, until all cycles have
  37. ; been tested.
  38. ld hl,(CycFrm1)
  39. ld a,(CycFrm3)
  40. ld (FirstBad12),hl
  41. ld (FirstBad12+2),a
  42. ld (FirstBad14),hl
  43. ld (FirstBad14+2),a
  44. ld (FirstBad17),hl
  45. ld (FirstBad17+2),a
  46. ld (FirstBad18),hl
  47. ld (FirstBad18+2),a
  48. ld (FirstBad19),hl
  49. ld (FirstBad19+2),a
  50. ld (FirstBad20),hl
  51. ld (FirstBad20+2),a
  52. ld (FirstBad21),hl
  53. ld (FirstBad21+2),a
  54. ld (FirstBad22),hl
  55. ld (FirstBad22+2),a
  56. ld (FirstBad23),hl
  57. ld (FirstBad23+2),a
  58. ld (FirstBad24),hl
  59. ld (FirstBad24+2),a
  60. ld a,59 ; Number of cycles in the write loop
  61. ld ix,_WLoop_12_59
  62. call _PerformTest
  63. ld (FirstBad12),hl
  64. ld (FirstBad12+2),a
  65. ret nc
  66. ld a,61 ; Number of cycles in the write loop
  67. ld ix,_WLoop_14_61
  68. call _PerformTest
  69. ld (FirstBad14),hl
  70. ld (FirstBad14+2),a
  71. ret nc
  72. ld a,59 ; Number of cycles in the write loop
  73. ld ix,_WLoop_17_59
  74. call _PerformTest
  75. ld (FirstBad17),hl
  76. ld (FirstBad17+2),a
  77. ret nc
  78. ld a,77 ; Number of cycles in the write loop
  79. ld ix,_WLoop_18_77
  80. call _PerformTest
  81. ld (FirstBad18),hl
  82. ld (FirstBad18+2),a
  83. ret nc
  84. ld a,66 ; Number of cycles in the write loop
  85. ld ix,_WLoop_19_66
  86. call _PerformTest
  87. ld (FirstBad19),hl
  88. ld (FirstBad19+2),a
  89. ret nc
  90. ld a,67 ; Number of cycles in the write loop
  91. ld ix,_WLoop_20_67
  92. call _PerformTest
  93. ld (FirstBad20),hl
  94. ld (FirstBad20+2),a
  95. ret nc
  96. ld a,68 ; Number of cycles in the write loop
  97. ld ix,_WLoop_21_68
  98. call _PerformTest
  99. ld (FirstBad21),hl
  100. ld (FirstBad21+2),a
  101. ret nc
  102. ld a,69 ; Number of cycles in the write loop
  103. ld ix,_WLoop_22_69
  104. call _PerformTest
  105. ld (FirstBad22),hl
  106. ld (FirstBad22+2),a
  107. ret nc
  108. ld a,70 ; Number of cycles in the write loop
  109. ld ix,_WLoop_23_70
  110. call _PerformTest
  111. ld (FirstBad23),hl
  112. ld (FirstBad23+2),a
  113. ret nc
  114. ld a,66 ; Number of cycles in the write loop
  115. ld ix,_WLoop_24_66
  116. call _PerformTest
  117. ld (FirstBad24),hl
  118. ld (FirstBad24+2),a
  119. ;ret nc
  120. ret
  121. _JpWriteLoop: jp (ix) ; 10T
  122. ; Input: A = cycles per write loop
  123. ; IX = pointer to write loop
  124. ; Output:
  125. ; L = [FirstBad1]
  126. ; H = [FirstBad2]
  127. ; A = [FirstBad3]
  128. ; CF: Set if FirstBad < CycFrm, Reset otherwise
  129. ; Trashes: F,BC,DE,HL,IY,BC',DE',HL'
  130. _PerformTest:
  131. ld (CycPerLoop),a
  132. ld c,a
  133. call DivCycFrmByC
  134. ; We want ceiling division, so if remainder was nonzero, increment HL
  135. ;ld a,c
  136. ;ex af,af' ; Save remainder in A' (not deemed necessary)
  137. xor a
  138. ld (VRAMW_Phase),a
  139. cp c
  140. ld de,-1 ; because HL reaches -1 later when counting down, not 0
  141. adc hl,de
  142. ld (CycDivByLoop),hl
  143. ld hl,FirstBad1
  144. ld (hl),80h
  145. inc hl
  146. ld (hl),38h
  147. inc hl
  148. ld (hl),01h ; 13880h = 80000
  149. _NextPhase: ; Fill VRAM with 01h
  150. ld de,0101h
  151. call FillVRAM
  152. exx
  153. ld hl,(CycDivByLoop)
  154. ld de,-1 ; Loop increment
  155. exx
  156. xor a
  157. out (99h),a ; A0-A7 set to 0
  158. ld a,70h
  159. out (99h),a ; A8-A13 = 30h (3000h), write mode
  160. ld bc,0FC98h ; C = VRAM R/W port; B = byte to write to even addresses
  161. ld hl,ScratchWLoop
  162. ld (hl),0FEh ; Byte to write to odd addresses
  163. push bc
  164. exx
  165. pop bc
  166. exx
  167. call SyncVInt
  168. ; di, IntVec trashed, int not acked, 9T into the interrupt
  169. ; 9T ; from SyncVInt
  170. ; Start a fresh frame at the correct cycle
  171. ; We could handle wraparound instead, but this is much easier.
  172. ld a,(VRAMW_Phase) ; 14T ; Delay by current phase (0..48)
  173. sub 122 ; 8T ; 9+14+8+5+5+5+18+11+8+18+10+11 = 122
  174. ld l,a ; 5T
  175. sbc a,a ; 5T
  176. ld h,a ; 5T
  177. call WaitFrmPlusHL ; 18T
  178. ld hl,ScratchWLoop ; 11T
  179. ld a,(hl) ; 8T ; Determine the value that goes to odd addresses
  180. call _JpWriteLoop ; 18T
  181. ; 10T ; JP (IX)
  182. ; 11T ; OUT (C),B (before the out is effective)
  183. ; Any violation of alternance is a failed write.
  184. ; We hope (and there are reasons behind it) that we don't get
  185. ; exactly the same pattern from a failed write as for a successful write.
  186. ; Find the first position where the alternance fails and determine
  187. ; the corresponding cycle number. Store the minimum.
  188. ld hl,VRAMW_Phase
  189. inc (hl) ; Increment phase for next loop
  190. ld l,(hl) ; Fetch incremented value. We need to take the
  191. ; incremented value instead of the original value,
  192. ; because it's used for a comparison which is done
  193. ; in reverse order of how it should be done, causing
  194. ; an off-by-one.
  195. ; Set up address 3000h for read in VDP
  196. ; Let's try writing to the address register as fast as possible
  197. ld bc,99h
  198. ld a,30h
  199. out (c),b ; A0-A7 set to 0
  200. out (99h),a ; A8-A11 = 0, A12-A13 = 1 (3000h), read mode
  201. ld iy,3000h-1 ; IY tracks VRAM address for error reporting
  202. ; E:H:L tracks cycle number of current VRAM position
  203. ld h,b
  204. ld e,b
  205. ld a,(CycPerLoop)
  206. ld c,a
  207. ld b,0
  208. _AltCheck: in a,(98h) ; 9T+3T
  209. cp 0FCh ; 8T
  210. jp nz,_BadAlt ; 11T ; 3+8+11+9=31, enough
  211. in a,(98h) ; 9T+3T
  212. cp 0FEh
  213. ; WRONG: "If the fast write has succeeded, the slow write MUST succeed."
  214. ; The V9938 begs to disagree.
  215. ;jp nz,_CompareError1
  216. jp nz,_BadAlt
  217. inc iy
  218. inc iy
  219. add hl,bc
  220. ld a,e
  221. adc a,b
  222. ld e,a
  223. ld a,(CycFrm1)
  224. sub l
  225. ld a,(CycFrm2)
  226. sbc a,h
  227. ld a,(CycFrm3)
  228. sbc a,e
  229. jp nc,_AltCheck ; The subtraction is reversed, so this check is off by one,
  230. ; but given the instruction set, it's faster in this direction.
  231. ; That's why we took the incremented value of the phase
  232. ; instead of the direct one.
  233. _BadAlt: ld a,(FirstBad1)
  234. sub l
  235. ld a,(FirstBad2)
  236. sbc a,h
  237. ld a,(FirstBad3)
  238. sbc a,e
  239. jr c,_NoRecord
  240. ; We're still one above the real value
  241. ld bc,-1
  242. add hl,bc
  243. ld (FirstBad1),hl
  244. ld a,e
  245. adc a,b
  246. ld (FirstBad3),a
  247. _NoRecord:
  248. ; Check other phases
  249. ld hl,CycPerLoop
  250. ld a,(VRAMW_Phase)
  251. cp (hl)
  252. jp nz,_NextPhase
  253. ; Calculate FirstBad minus CycFrm
  254. ; (No Carry indicates we're done)
  255. ld hl,(CycFrm1)
  256. ld a,(FirstBad1)
  257. sub l
  258. ld a,(FirstBad2)
  259. sbc a,h
  260. ld hl,(CycFrm2)
  261. ld a,(FirstBad3)
  262. sbc a,h
  263. ld hl,(FirstBad1)
  264. ld a,(FirstBad3)
  265. ret
  266. ; Write loops
  267. ; _WLoop_12_59: 12T between writes, 59T long
  268. _WLoop_12_59: out (c),b ; 11T ; before output
  269. ; actual output of 0FCh; distance: 47T from previous write
  270. ; 3T ; after output
  271. out (98h),a ; 9T ; before output
  272. ; actual write of 0FEh; distance: 12T from previous write!
  273. ; 3T ; after output
  274. exx ; 5T
  275. add hl,de ; 12T ; dec counter; there will be carry unless HL was 0.
  276. exx ; 5T
  277. jp c,_WLoop_12_59 ; 11T ; loop: (11+3)+(9+3)+5+12+5+11 = 59T
  278. ret ; We're out of the timed area now
  279. ; _WLoop_14_61: 14T between writes, 61T long
  280. _WLoop_14_61: out (c),b ; 11T ; before output
  281. ; actual output of 0FCh; distance: 47T from previous write
  282. ; 3T ; after output
  283. out (c),a ; 11T ; before output
  284. ; actual write of 0FEh; distance: 14T from previous write!
  285. ; 3T ; after output
  286. exx ; 5T
  287. add hl,de ; 12T ; dec counter; there will be carry unless HL was 0.
  288. exx ; 5T
  289. jp c,_WLoop_14_61 ; 11T ; loop: (11+3)+(11+3)+5+12+5+11 = 61T
  290. ret ; We're out of the timed area now
  291. ; _WLoop_17_59: 17T between writes, 59T long
  292. _WLoop_17_59: out (c),b ; 11T ; before output
  293. ; actual output of 0FCh; distance: 42T from previous write
  294. ; 3T ; after output
  295. exx ; 5T
  296. out (98h),a ; 9T ; before output
  297. ; actual write of 0FEh; distance: 17T from previous write!
  298. ; 3T ; after output
  299. add hl,de ; 12T ; dec counter; there will be carry unless HL was 0.
  300. exx ; 5T
  301. jp c,_WLoop_17_59 ; 11T ; loop: (11+3)+5+(9+3)+12+5+11 = 59T
  302. ret ; We're out of the timed area now
  303. ; _WLoop_18_77: 18T between writes, 77T long
  304. _WLoop_18_77: out (c),b ; 11T ; before output
  305. ; actual output of 0FCh; distance: 59T from previous write
  306. ; 3T ; after output
  307. outi ; 15T ; before output
  308. ; actual write of 0FEh; distance: 18T from previous write!
  309. ; 3T ; after output
  310. dec hl ; 7T
  311. inc b ; 5T ; compensate for changes made by OUTI
  312. exx ; 5T
  313. add hl,de ; 12T ; dec counter; there will be carry unless HL was 0.
  314. exx ; 5T
  315. jp c,_WLoop_18_77 ; 11T ; loop: (11+3)+(15+3)+7+5+5+12+5+11 = 77T
  316. ret ; We're out of the timed area now
  317. _WLoop_19_66: out (c),b ; 11T ; before output
  318. ; actual output of 0FCh; distance: 47T from previous write
  319. ; 3T ; after output
  320. inc hl ; 7T ; dummy, for delay
  321. out (98h),a ; 9T ; before output
  322. ; actual write of 0FEh; distance: 19T from previous write!
  323. ; 3T ; after output
  324. exx ; 5T
  325. add hl,de ; 12T ; dec counter; there will be carry unless HL was 0.
  326. exx ; 5T
  327. jp c,_WLoop_19_66 ; 11T ; loop: (11+3)+7+(9+3)+5+12+5+11 = 66T
  328. ret ; We're out of the timed area now
  329. _WLoop_20_67: out (c),b ; 11T ; before output
  330. ; actual output of 0FCh; distance: 47T from previous write
  331. ; 3T ; after output
  332. ld l,0 ; 8T ; dummy, for delay
  333. out (98h),a ; 9T ; before output
  334. ; actual write of 0FEh; distance: 20T from previous write!
  335. ; 3T ; after output
  336. exx ; 5T
  337. add hl,de ; 12T ; dec counter; there will be carry unless HL was 0.
  338. exx ; 5T
  339. jp c,_WLoop_20_67 ; 11T ; loop: (11+3)+8+(9+3)+5+12+5+11 = 67T
  340. ret ; We're out of the timed area now
  341. _WLoop_21_68: out (c),b ; 11T ; before output
  342. ; actual output of 0FCh; distance: 47T from previous write
  343. ; 3T ; after output
  344. inc hl ; 7T ; dummy, for delay
  345. out (c),a ; 11T ; before output
  346. ; actual write of 0FEh; distance: 21T from previous write!
  347. ; 3T ; after output
  348. exx ; 5T
  349. add hl,de ; 12T ; dec counter; there will be carry unless HL was 0.
  350. exx ; 5T
  351. jp c,_WLoop_21_68 ; 11T ; loop: (11+3)+7+(11+3)+5+12+5+11 = 68T
  352. ret ; We're out of the timed area now
  353. _WLoop_22_69: out (c),b ; 11T ; before output
  354. ; actual output of 0FCh; distance: 47T from previous write
  355. ; 3T ; after output
  356. ld l,0 ; 8T ; dummy, for delay
  357. out (c),a ; 11T ; before output
  358. ; actual write of 0FEh; distance: 22T from previous write!
  359. ; 3T ; after output
  360. exx ; 5T
  361. add hl,de ; 12T ; dec counter; there will be carry unless HL was 0.
  362. exx ; 5T
  363. jp c,_WLoop_22_69 ; 11T ; loop: (11+3)+8+(11+3)+5+12+5+11 = 69T
  364. ret ; We're out of the timed area now
  365. _WLoop_23_70: out (c),b ; 11T ; before output
  366. ; actual output of 0FCh; distance: 47T from previous write
  367. ; 3T ; after output
  368. ld hl,0 ; 11T ; dummy, for delay
  369. out (98h),a ; 9T ; before output
  370. ; actual write of 0FEh; distance: 23T from previous write!
  371. ; 3T ; after output
  372. exx ; 5T
  373. add hl,de ; 12T ; dec counter; there will be carry unless HL was 0.
  374. exx ; 5T
  375. jp c,_WLoop_23_70 ; 11T ; loop: (11+3)+11+(9+3)+5+12+5+11 = 70T
  376. ret ; We're out of the timed area now
  377. _WLoop_24_66: out (c),b ; 11T ; before output
  378. ; actual output of 0FCh; distance: 42T from previous write
  379. ; 3T ; after output
  380. nop ; 5T ; dummy, for delay
  381. exx ; 5T
  382. out (c),a ; 11T ; before output
  383. ; actual write of 0FEh; distance: 24T from previous write!
  384. ; 3T ; after output
  385. add hl,de ; 12T ; dec counter; there will be carry unless HL was 0.
  386. exx ; 5T
  387. jp c,_WLoop_24_66 ; 11T ; loop: (11+3)+5+5+(11+3)+12+5+11 = 66T
  388. ret ; We're out of the timed area now
  389. _CompareError1: ld l,a
  390. ld h,0FEh
  391. ld (ErrParams),hl
  392. ld (ErrParams+2),iy; VRAM address with error
  393. ld a,5 ; Error code 5: Unexpected VRAM contents during analysis
  394. jp Finish
  395. _CompareError2: ld l,a
  396. ld h,0FCh
  397. ld (ErrParams),hl
  398. ld (ErrParams+2),iy
  399. ld a,5 ; Error code 5: Unexpected VRAM contents during analysis
  400. jp Finish
  401. _VerifyError: dec b ; Calc failure address
  402. dec c
  403. ld d,c ; swap bytes
  404. ld e,b
  405. ld hl,4000h
  406. scf
  407. sbc hl,de
  408. ld (ErrParams),hl
  409. ld a,4 ; Error code 4: VRAM verification error
  410. jp Finish
  411. endp
  412. ; Fills the first 16K of VRAM with the given byte
  413. ; Input: E = value for first byte of every other address
  414. ; D = Value for second byte of every other address
  415. ; e.g. if DE = 0305h, the values are 5, 3, 5, 3, 5, 3, ...
  416. ; Trashes: nothing, but fiddles with VDP registers
  417. FillVRAM proc
  418. ; We support up to 80,000 cycles/frame. With a 49 cycle loop, at
  419. ; 2 bytes per loop, that takes up to 3266 bytes. Therefore 4095
  420. ; bytes are enough, so we fill 4095 bytes starting at 3000h.
  421. push bc
  422. push af
  423. xor a ; A14-A16 set to 0
  424. out (99h),a
  425. ld a,80h+14 ; register 14 in V9938; 6 in earlier ones
  426. out (99h),a
  427. ld (RG00SAV+14),a ; save new value of register 14
  428. ld a,(RG0SAV+6)
  429. out (99h),a
  430. ld a,80h+6
  431. out (99h),a ; restore register 6 in case it was overwritten
  432. xor a
  433. out (99h),a ; A0-A7 set to 0
  434. ld a,70h
  435. out (99h),a ; A8-A13 = 30h (3000h), write mode
  436. ld a,d
  437. xor e
  438. ld d,a ; Prepare value to xor with
  439. ld a,e
  440. ld bc,10FFh ; total VRAM to fill: 4095
  441. ; (prevents incrementing into A14,
  442. ; allowing us to avoid setting A14-A16 later)
  443. _FillVRAMloop: out (98h),a ; 12T
  444. xor d ; 5T
  445. dec c ; 5T
  446. jp nz,_FillVRAMloop; 12T ; inner loop: 29T exactly
  447. djnz _FillVRAMloop ; 14T ; We're not under fixed-time constraints
  448. ; -5T
  449. pop af
  450. pop bc
  451. ret
  452. endp
  453. ; Set VDP blank mode
  454. BlankVideo proc
  455. push af
  456. ld a,(RG0SAV+1)
  457. and 10111111b ; clear /BLANK bit
  458. out (99h),a
  459. ld a,81h ; reg 1
  460. out (99h),a
  461. pop af
  462. ret
  463. endp
  464. ; Unset VDP blank mode
  465. UnblankVideo proc
  466. push af
  467. ld a,(RG0SAV+1)
  468. or 01000000b ; set /BLANK bit (no blanking)
  469. out (99h),a
  470. ld a,81h ; reg 1
  471. out (99h),a
  472. pop af
  473. ret
  474. endp
  475. ; Code adapted from multiple sources on the internet.
  476. ; Divide cycles per frame by C.
  477. ; Input: C = divisor (assumes C > [CycFrm3] so that the result fits in 16 bits)
  478. ; Output: Quotient in HL, remainder in C.
  479. ; Trashes: AF
  480. ; Uses exactly 729 T-states regardless of input (on MSX, running on Z80)
  481. ; Note CycFrm3 is typically < 2 so any divisor > 1 will probably do.
  482. ;
  483. DivCycFrmByC proc
  484. ld hl,(CycFrm1) ; 17T
  485. ld a,(CycFrm3) ; 14T
  486. add hl,hl ; 12T ; First bit
  487. rept 16 ; 16 * (
  488. adc a,a ; 5T
  489. sub c ; 5T
  490. jr nc,$+3 ; 13T ; rept-local labels are not working for us
  491. ; -5T ; for false branch
  492. add a,c ; 5T ; Subtracted once too much, adjust back; compensates timing
  493. ; Jump destination
  494. adc hl,hl ; 17T ; Shift in the inverted next bit of the quotient
  495. endm ; )
  496. ld c,a ; 5T ; save remainder
  497. ld a,l ; 5T ; Complement HL
  498. cpl ; 5T
  499. ld l,a ; 5T
  500. ld a,h ; 5T
  501. cpl ; 5T
  502. ld h,a ; 5T ; total 7 * 5T for complement. Using ccf in the loop would be 16 * 5T.
  503. ret ; 11T
  504. ; 17+14+12+16*(5+5+13-5+5+17)+5+5+5+5+5+5+5+11 = 729
  505. endp
  506. ; Used for unit testing of the division routine
  507. UnitTestDiv:
  508. ld a,(DAC+2)
  509. call DivCycFrmByC
  510. ld (DAC+2),hl
  511. ret