mali_kbase_10969_workaround.c 6.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209
  1. /*
  2. *
  3. * (C) COPYRIGHT 2013-2015 ARM Limited. All rights reserved.
  4. *
  5. * This program is free software and is provided to you under the terms of the
  6. * GNU General Public License version 2 as published by the Free Software
  7. * Foundation, and any use by you of this program is subject to the terms
  8. * of such GNU licence.
  9. *
  10. * A copy of the licence is included with the program, and can also be obtained
  11. * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
  12. * Boston, MA 02110-1301, USA.
  13. *
  14. */
  15. #include <linux/dma-mapping.h>
  16. #include <mali_kbase.h>
  17. #include <mali_kbase_10969_workaround.h>
  18. /* This function is used to solve an HW issue with single iterator GPUs.
  19. * If a fragment job is soft-stopped on the edge of its bounding box, can happen that the
  20. * restart index is out of bounds and the rerun causes a tile range fault. If this happens
  21. * we try to clamp the restart index to a correct value and rerun the job.
  22. */
  23. /* Mask of X and Y coordinates for the coordinates words in the descriptors*/
  24. #define X_COORDINATE_MASK 0x00000FFF
  25. #define Y_COORDINATE_MASK 0x0FFF0000
  26. /* Max number of words needed from the fragment shader job descriptor */
  27. #define JOB_HEADER_SIZE_IN_WORDS 10
  28. #define JOB_HEADER_SIZE (JOB_HEADER_SIZE_IN_WORDS*sizeof(u32))
  29. /* Word 0: Status Word */
  30. #define JOB_DESC_STATUS_WORD 0
  31. /* Word 1: Restart Index */
  32. #define JOB_DESC_RESTART_INDEX_WORD 1
  33. /* Word 2: Fault address low word */
  34. #define JOB_DESC_FAULT_ADDR_LOW_WORD 2
  35. /* Word 8: Minimum Tile Coordinates */
  36. #define FRAG_JOB_DESC_MIN_TILE_COORD_WORD 8
  37. /* Word 9: Maximum Tile Coordinates */
  38. #define FRAG_JOB_DESC_MAX_TILE_COORD_WORD 9
  39. int kbasep_10969_workaround_clamp_coordinates(struct kbase_jd_atom *katom)
  40. {
  41. struct device *dev = katom->kctx->kbdev->dev;
  42. u32 clamped = 0;
  43. struct kbase_va_region *region;
  44. phys_addr_t *page_array;
  45. u64 page_index;
  46. u32 offset = katom->jc & (~PAGE_MASK);
  47. u32 *page_1 = NULL;
  48. u32 *page_2 = NULL;
  49. u32 job_header[JOB_HEADER_SIZE_IN_WORDS];
  50. void *dst = job_header;
  51. u32 minX, minY, maxX, maxY;
  52. u32 restartX, restartY;
  53. struct page *p;
  54. u32 copy_size;
  55. dev_warn(dev, "Called TILE_RANGE_FAULT workaround clamping function.\n");
  56. if (!(katom->core_req & BASE_JD_REQ_FS))
  57. return 0;
  58. kbase_gpu_vm_lock(katom->kctx);
  59. region = kbase_region_tracker_find_region_enclosing_address(katom->kctx,
  60. katom->jc);
  61. if (!region || (region->flags & KBASE_REG_FREE))
  62. goto out_unlock;
  63. page_array = kbase_get_cpu_phy_pages(region);
  64. if (!page_array)
  65. goto out_unlock;
  66. page_index = (katom->jc >> PAGE_SHIFT) - region->start_pfn;
  67. p = pfn_to_page(PFN_DOWN(page_array[page_index]));
  68. /* we need the first 10 words of the fragment shader job descriptor.
  69. * We need to check that the offset + 10 words is less that the page
  70. * size otherwise we need to load the next page.
  71. * page_size_overflow will be equal to 0 in case the whole descriptor
  72. * is within the page > 0 otherwise.
  73. */
  74. copy_size = MIN(PAGE_SIZE - offset, JOB_HEADER_SIZE);
  75. page_1 = kmap_atomic(p);
  76. /* page_1 is a u32 pointer, offset is expressed in bytes */
  77. page_1 += offset>>2;
  78. kbase_sync_single_for_cpu(katom->kctx->kbdev,
  79. kbase_dma_addr(p) + offset,
  80. copy_size, DMA_BIDIRECTIONAL);
  81. memcpy(dst, page_1, copy_size);
  82. /* The data needed overflows page the dimension,
  83. * need to map the subsequent page */
  84. if (copy_size < JOB_HEADER_SIZE) {
  85. p = pfn_to_page(PFN_DOWN(page_array[page_index + 1]));
  86. page_2 = kmap_atomic(p);
  87. kbase_sync_single_for_cpu(katom->kctx->kbdev,
  88. kbase_dma_addr(p),
  89. JOB_HEADER_SIZE - copy_size, DMA_BIDIRECTIONAL);
  90. memcpy(dst + copy_size, page_2, JOB_HEADER_SIZE - copy_size);
  91. }
  92. /* We managed to correctly map one or two pages (in case of overflow) */
  93. /* Get Bounding Box data and restart index from fault address low word */
  94. minX = job_header[FRAG_JOB_DESC_MIN_TILE_COORD_WORD] & X_COORDINATE_MASK;
  95. minY = job_header[FRAG_JOB_DESC_MIN_TILE_COORD_WORD] & Y_COORDINATE_MASK;
  96. maxX = job_header[FRAG_JOB_DESC_MAX_TILE_COORD_WORD] & X_COORDINATE_MASK;
  97. maxY = job_header[FRAG_JOB_DESC_MAX_TILE_COORD_WORD] & Y_COORDINATE_MASK;
  98. restartX = job_header[JOB_DESC_FAULT_ADDR_LOW_WORD] & X_COORDINATE_MASK;
  99. restartY = job_header[JOB_DESC_FAULT_ADDR_LOW_WORD] & Y_COORDINATE_MASK;
  100. dev_warn(dev, "Before Clamping:\n"
  101. "Jobstatus: %08x\n"
  102. "restartIdx: %08x\n"
  103. "Fault_addr_low: %08x\n"
  104. "minCoordsX: %08x minCoordsY: %08x\n"
  105. "maxCoordsX: %08x maxCoordsY: %08x\n",
  106. job_header[JOB_DESC_STATUS_WORD],
  107. job_header[JOB_DESC_RESTART_INDEX_WORD],
  108. job_header[JOB_DESC_FAULT_ADDR_LOW_WORD],
  109. minX, minY,
  110. maxX, maxY);
  111. /* Set the restart index to the one which generated the fault*/
  112. job_header[JOB_DESC_RESTART_INDEX_WORD] =
  113. job_header[JOB_DESC_FAULT_ADDR_LOW_WORD];
  114. if (restartX < minX) {
  115. job_header[JOB_DESC_RESTART_INDEX_WORD] = (minX) | restartY;
  116. dev_warn(dev,
  117. "Clamping restart X index to minimum. %08x clamped to %08x\n",
  118. restartX, minX);
  119. clamped = 1;
  120. }
  121. if (restartY < minY) {
  122. job_header[JOB_DESC_RESTART_INDEX_WORD] = (minY) | restartX;
  123. dev_warn(dev,
  124. "Clamping restart Y index to minimum. %08x clamped to %08x\n",
  125. restartY, minY);
  126. clamped = 1;
  127. }
  128. if (restartX > maxX) {
  129. job_header[JOB_DESC_RESTART_INDEX_WORD] = (maxX) | restartY;
  130. dev_warn(dev,
  131. "Clamping restart X index to maximum. %08x clamped to %08x\n",
  132. restartX, maxX);
  133. clamped = 1;
  134. }
  135. if (restartY > maxY) {
  136. job_header[JOB_DESC_RESTART_INDEX_WORD] = (maxY) | restartX;
  137. dev_warn(dev,
  138. "Clamping restart Y index to maximum. %08x clamped to %08x\n",
  139. restartY, maxY);
  140. clamped = 1;
  141. }
  142. if (clamped) {
  143. /* Reset the fault address low word
  144. * and set the job status to STOPPED */
  145. job_header[JOB_DESC_FAULT_ADDR_LOW_WORD] = 0x0;
  146. job_header[JOB_DESC_STATUS_WORD] = BASE_JD_EVENT_STOPPED;
  147. dev_warn(dev, "After Clamping:\n"
  148. "Jobstatus: %08x\n"
  149. "restartIdx: %08x\n"
  150. "Fault_addr_low: %08x\n"
  151. "minCoordsX: %08x minCoordsY: %08x\n"
  152. "maxCoordsX: %08x maxCoordsY: %08x\n",
  153. job_header[JOB_DESC_STATUS_WORD],
  154. job_header[JOB_DESC_RESTART_INDEX_WORD],
  155. job_header[JOB_DESC_FAULT_ADDR_LOW_WORD],
  156. minX, minY,
  157. maxX, maxY);
  158. /* Flush CPU cache to update memory for future GPU reads*/
  159. memcpy(page_1, dst, copy_size);
  160. p = pfn_to_page(PFN_DOWN(page_array[page_index]));
  161. kbase_sync_single_for_device(katom->kctx->kbdev,
  162. kbase_dma_addr(p) + offset,
  163. copy_size, DMA_TO_DEVICE);
  164. if (copy_size < JOB_HEADER_SIZE) {
  165. memcpy(page_2, dst + copy_size,
  166. JOB_HEADER_SIZE - copy_size);
  167. p = pfn_to_page(PFN_DOWN(page_array[page_index + 1]));
  168. kbase_sync_single_for_device(katom->kctx->kbdev,
  169. kbase_dma_addr(p),
  170. JOB_HEADER_SIZE - copy_size,
  171. DMA_TO_DEVICE);
  172. }
  173. }
  174. if (copy_size < JOB_HEADER_SIZE)
  175. kunmap_atomic(page_2);
  176. kunmap_atomic(page_1);
  177. out_unlock:
  178. kbase_gpu_vm_unlock(katom->kctx);
  179. return clamped;
  180. }