intel_workarounds.c 6.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300
  1. /*
  2. * SPDX-License-Identifier: MIT
  3. *
  4. * Copyright © 2018 Intel Corporation
  5. */
  6. #include "../i915_selftest.h"
  7. #include "igt_wedge_me.h"
  8. #include "mock_context.h"
  9. static struct drm_i915_gem_object *
  10. read_nonprivs(struct i915_gem_context *ctx, struct intel_engine_cs *engine)
  11. {
  12. struct drm_i915_gem_object *result;
  13. struct i915_request *rq;
  14. struct i915_vma *vma;
  15. const u32 base = engine->mmio_base;
  16. u32 srm, *cs;
  17. int err;
  18. int i;
  19. result = i915_gem_object_create_internal(engine->i915, PAGE_SIZE);
  20. if (IS_ERR(result))
  21. return result;
  22. i915_gem_object_set_cache_level(result, I915_CACHE_LLC);
  23. cs = i915_gem_object_pin_map(result, I915_MAP_WB);
  24. if (IS_ERR(cs)) {
  25. err = PTR_ERR(cs);
  26. goto err_obj;
  27. }
  28. memset(cs, 0xc5, PAGE_SIZE);
  29. i915_gem_object_unpin_map(result);
  30. vma = i915_vma_instance(result, &engine->i915->ggtt.vm, NULL);
  31. if (IS_ERR(vma)) {
  32. err = PTR_ERR(vma);
  33. goto err_obj;
  34. }
  35. err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL);
  36. if (err)
  37. goto err_obj;
  38. rq = i915_request_alloc(engine, ctx);
  39. if (IS_ERR(rq)) {
  40. err = PTR_ERR(rq);
  41. goto err_pin;
  42. }
  43. err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE);
  44. if (err)
  45. goto err_req;
  46. srm = MI_STORE_REGISTER_MEM | MI_SRM_LRM_GLOBAL_GTT;
  47. if (INTEL_GEN(ctx->i915) >= 8)
  48. srm++;
  49. cs = intel_ring_begin(rq, 4 * RING_MAX_NONPRIV_SLOTS);
  50. if (IS_ERR(cs)) {
  51. err = PTR_ERR(cs);
  52. goto err_req;
  53. }
  54. for (i = 0; i < RING_MAX_NONPRIV_SLOTS; i++) {
  55. *cs++ = srm;
  56. *cs++ = i915_mmio_reg_offset(RING_FORCE_TO_NONPRIV(base, i));
  57. *cs++ = i915_ggtt_offset(vma) + sizeof(u32) * i;
  58. *cs++ = 0;
  59. }
  60. intel_ring_advance(rq, cs);
  61. i915_gem_object_get(result);
  62. i915_gem_object_set_active_reference(result);
  63. i915_request_add(rq);
  64. i915_vma_unpin(vma);
  65. return result;
  66. err_req:
  67. i915_request_add(rq);
  68. err_pin:
  69. i915_vma_unpin(vma);
  70. err_obj:
  71. i915_gem_object_put(result);
  72. return ERR_PTR(err);
  73. }
  74. static u32 get_whitelist_reg(const struct whitelist *w, unsigned int i)
  75. {
  76. return i < w->count ? i915_mmio_reg_offset(w->reg[i]) : w->nopid;
  77. }
  78. static void print_results(const struct whitelist *w, const u32 *results)
  79. {
  80. unsigned int i;
  81. for (i = 0; i < RING_MAX_NONPRIV_SLOTS; i++) {
  82. u32 expected = get_whitelist_reg(w, i);
  83. u32 actual = results[i];
  84. pr_info("RING_NONPRIV[%d]: expected 0x%08x, found 0x%08x\n",
  85. i, expected, actual);
  86. }
  87. }
  88. static int check_whitelist(const struct whitelist *w,
  89. struct i915_gem_context *ctx,
  90. struct intel_engine_cs *engine)
  91. {
  92. struct drm_i915_gem_object *results;
  93. struct igt_wedge_me wedge;
  94. u32 *vaddr;
  95. int err;
  96. int i;
  97. results = read_nonprivs(ctx, engine);
  98. if (IS_ERR(results))
  99. return PTR_ERR(results);
  100. err = 0;
  101. igt_wedge_on_timeout(&wedge, ctx->i915, HZ / 5) /* a safety net! */
  102. err = i915_gem_object_set_to_cpu_domain(results, false);
  103. if (i915_terminally_wedged(&ctx->i915->gpu_error))
  104. err = -EIO;
  105. if (err)
  106. goto out_put;
  107. vaddr = i915_gem_object_pin_map(results, I915_MAP_WB);
  108. if (IS_ERR(vaddr)) {
  109. err = PTR_ERR(vaddr);
  110. goto out_put;
  111. }
  112. for (i = 0; i < RING_MAX_NONPRIV_SLOTS; i++) {
  113. u32 expected = get_whitelist_reg(w, i);
  114. u32 actual = vaddr[i];
  115. if (expected != actual) {
  116. print_results(w, vaddr);
  117. pr_err("Invalid RING_NONPRIV[%d], expected 0x%08x, found 0x%08x\n",
  118. i, expected, actual);
  119. err = -EINVAL;
  120. break;
  121. }
  122. }
  123. i915_gem_object_unpin_map(results);
  124. out_put:
  125. i915_gem_object_put(results);
  126. return err;
  127. }
  128. static int do_device_reset(struct intel_engine_cs *engine)
  129. {
  130. i915_reset(engine->i915, ENGINE_MASK(engine->id), NULL);
  131. return 0;
  132. }
  133. static int do_engine_reset(struct intel_engine_cs *engine)
  134. {
  135. return i915_reset_engine(engine, NULL);
  136. }
  137. static int switch_to_scratch_context(struct intel_engine_cs *engine)
  138. {
  139. struct i915_gem_context *ctx;
  140. struct i915_request *rq;
  141. ctx = kernel_context(engine->i915);
  142. if (IS_ERR(ctx))
  143. return PTR_ERR(ctx);
  144. rq = i915_request_alloc(engine, ctx);
  145. kernel_context_close(ctx);
  146. if (IS_ERR(rq))
  147. return PTR_ERR(rq);
  148. i915_request_add(rq);
  149. return 0;
  150. }
  151. static int check_whitelist_across_reset(struct intel_engine_cs *engine,
  152. int (*reset)(struct intel_engine_cs *),
  153. const struct whitelist *w,
  154. const char *name)
  155. {
  156. struct i915_gem_context *ctx;
  157. int err;
  158. ctx = kernel_context(engine->i915);
  159. if (IS_ERR(ctx))
  160. return PTR_ERR(ctx);
  161. err = check_whitelist(w, ctx, engine);
  162. if (err) {
  163. pr_err("Invalid whitelist *before* %s reset!\n", name);
  164. goto out;
  165. }
  166. err = switch_to_scratch_context(engine);
  167. if (err)
  168. goto out;
  169. err = reset(engine);
  170. if (err) {
  171. pr_err("%s reset failed\n", name);
  172. goto out;
  173. }
  174. err = check_whitelist(w, ctx, engine);
  175. if (err) {
  176. pr_err("Whitelist not preserved in context across %s reset!\n",
  177. name);
  178. goto out;
  179. }
  180. kernel_context_close(ctx);
  181. ctx = kernel_context(engine->i915);
  182. if (IS_ERR(ctx))
  183. return PTR_ERR(ctx);
  184. err = check_whitelist(w, ctx, engine);
  185. if (err) {
  186. pr_err("Invalid whitelist *after* %s reset in fresh context!\n",
  187. name);
  188. goto out;
  189. }
  190. out:
  191. kernel_context_close(ctx);
  192. return err;
  193. }
  194. static int live_reset_whitelist(void *arg)
  195. {
  196. struct drm_i915_private *i915 = arg;
  197. struct intel_engine_cs *engine = i915->engine[RCS];
  198. struct i915_gpu_error *error = &i915->gpu_error;
  199. struct whitelist w;
  200. int err = 0;
  201. /* If we reset the gpu, we should not lose the RING_NONPRIV */
  202. if (!engine)
  203. return 0;
  204. if (!whitelist_build(engine, &w))
  205. return 0;
  206. pr_info("Checking %d whitelisted registers (RING_NONPRIV)\n", w.count);
  207. set_bit(I915_RESET_BACKOFF, &error->flags);
  208. set_bit(I915_RESET_ENGINE + engine->id, &error->flags);
  209. if (intel_has_reset_engine(i915)) {
  210. err = check_whitelist_across_reset(engine,
  211. do_engine_reset, &w,
  212. "engine");
  213. if (err)
  214. goto out;
  215. }
  216. if (intel_has_gpu_reset(i915)) {
  217. err = check_whitelist_across_reset(engine,
  218. do_device_reset, &w,
  219. "device");
  220. if (err)
  221. goto out;
  222. }
  223. out:
  224. clear_bit(I915_RESET_ENGINE + engine->id, &error->flags);
  225. clear_bit(I915_RESET_BACKOFF, &error->flags);
  226. return err;
  227. }
  228. int intel_workarounds_live_selftests(struct drm_i915_private *i915)
  229. {
  230. static const struct i915_subtest tests[] = {
  231. SUBTEST(live_reset_whitelist),
  232. };
  233. int err;
  234. if (i915_terminally_wedged(&i915->gpu_error))
  235. return 0;
  236. mutex_lock(&i915->drm.struct_mutex);
  237. err = i915_subtests(tests, i915);
  238. mutex_unlock(&i915->drm.struct_mutex);
  239. return err;
  240. }