trace.c 15 KB


  1. /*
  2. *
  3. * Copyright (C) 2017 Cafe Beverage. All rights reserved.
  4. *
  5. * This program is free software and is provided to you under the terms of the
  6. * GNU General Public License version 2 as published by the Free Software
  7. * Foundation, and any use by you of this program is subject to the terms
  8. * of such GNU licence.
  9. *
  10. * A copy of the licence is included with the program, and can also be obtained
  11. * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
  12. * Boston, MA 02110-1301, USA.
  13. *
  14. */
  15. #include <stdio.h>
  16. #include <stdlib.h>
  17. #include <pandriver.h>
  18. #include <pantrace.h>
  19. /* TODO: Remove this dependency */
  20. #include "../panwrap/panwrap.h"
  21. /* Assert that synthesised command stream is bit-identical with trace */
  22. static void assert_gpu_same(uint64_t addr, size_t s, uint8_t *synth)
  23. {
  24. uint8_t *buffer = fetch_mapped_gpu(addr, s);
  25. if (!buffer) {
  26. panwrap_log("Bad allocation in assert %llX\n", addr);
  27. return;
  28. }
  29. for (unsigned int i = 0; i < s; ++i) {
  30. if (buffer[i] != synth[i]) {
  31. panwrap_log("At %llX, expected:\n", addr);
  32. panwrap_log_hexdump_trimmed(synth, s, "\t\t");
  33. panwrap_log("Instead got:\n");
  34. panwrap_log_hexdump_trimmed(buffer, s, "\t\t");
  35. break;
  36. }
  37. }
  38. }
  39. static void assert_gpu_zeroes(uint64_t addr, size_t s)
  40. {
  41. uint8_t *zero = calloc(s, 1);
  42. printf("Zero address %LX\n", addr);
  43. assert_gpu_same(addr, s, zero);
  44. free(zero);
  45. }
  46. static void quick_dump_gpu(uint64_t addr, size_t s)
  47. {
  48. uint8_t *buf;
  49. if (!addr) {
  50. panwrap_log("Null quick dump\n");
  51. return;
  52. }
  53. buf = fetch_mapped_gpu(addr, s);
  54. panwrap_log("Quick GPU dump (%llX)\n", addr);
  55. if (!buf) {
  56. panwrap_log("Not found\n");
  57. return;
  58. }
  59. panwrap_log_hexdump_trimmed(buf, s, "\t\t");
  60. }
  61. #include "chai-notes.h"
  62. #define DEFINE_CASE(label) case label: return #label;
  63. static char *chai_job_type_name(int type)
  64. {
  65. switch (type) {
  66. DEFINE_CASE(JOB_NOT_STARTED)
  67. DEFINE_CASE(JOB_TYPE_NULL)
  68. DEFINE_CASE(JOB_TYPE_SET_VALUE)
  69. DEFINE_CASE(JOB_TYPE_CACHE_FLUSH)
  70. DEFINE_CASE(JOB_TYPE_COMPUTE)
  71. DEFINE_CASE(JOB_TYPE_VERTEX)
  72. DEFINE_CASE(JOB_TYPE_TILER)
  73. DEFINE_CASE(JOB_TYPE_FUSED)
  74. DEFINE_CASE(JOB_TYPE_FRAGMENT)
  75. default:
  76. panwrap_log("Requested job type %X\n", type);
  77. return "UNKNOWN";
  78. }
  79. }
  80. static char* chai_gl_mode_name(uint8_t b)
  81. {
  82. switch (b) {
  83. DEFINE_CASE(CHAI_POINTS)
  84. DEFINE_CASE(CHAI_LINES)
  85. DEFINE_CASE(CHAI_TRIANGLES)
  86. DEFINE_CASE(CHAI_TRIANGLE_STRIP)
  87. DEFINE_CASE(CHAI_TRIANGLE_FAN)
  88. default:
  89. panwrap_log("Unknown mode %X\n", b);
  90. return "GL_UNKNOWN";
  91. }
  92. }
  93. /* TODO: Figure out what "fbd" means */
  94. /* TODO: Corresponding SFBD decode (don't assume MFBD) */
  95. static void chai_trace_fbd(uintptr_t fbd)
  96. {
  97. struct tentative_mfbd *mfbd =
  98. fetch_mapped_gpu(fbd & FBD_POINTER_MASK, sizeof(*mfbd));
  99. uint8_t *buf;
  100. uintptr_t *buf32;
  101. panwrap_log("MFBD @ %X (%X)\n",
  102. fbd & FBD_POINTER_MASK, fbd & ~FBD_POINTER_MASK);
  103. panwrap_log("MFBD flags %X, heap free address %llX\n",
  104. mfbd->flags, mfbd->heap_free_address);
  105. panwrap_log_hexdump_trimmed((uint8_t *) mfbd->block1,
  106. sizeof(mfbd->block1), "\t\t");
  107. panwrap_log("unk2\n");
  108. buf = fetch_mapped_gpu(mfbd->unknown2, 64);
  109. panwrap_log_hexdump_trimmed(buf, 64, "\t\t");
  110. assert_gpu_zeroes(mfbd->block2[0], 64);
  111. assert_gpu_zeroes(mfbd->block2[1], 64);
  112. assert_gpu_zeroes(mfbd->ugaT, 64);
  113. if (mfbd->unknown_gpu_address) {
  114. printf("Nonzero uga?\n");
  115. assert_gpu_zeroes(mfbd->unknown_gpu_address, 64);
  116. } else {
  117. printf("No uga\n");
  118. }
  119. /* Somehow maybe sort of kind of framebufferish?
  120. * It changes predictably in the same way as the FB.
  121. * Unclear what exactly it is, though.
  122. *
  123. * Where the framebuffer is: 1A 33 00 00
  124. * This is: 71 B3 03 71 6C 4D 87 46
  125. * Where the framebuffer is: 1A 33 1A 00
  126. * This is: AB E4 43 9C E8 D6 D1 25
  127. *
  128. * It repeats, too, but everything 8 bytes rather than 4.
  129. *
  130. * It is a function of the colour painted. But the exact details
  131. * are elusive.
  132. *
  133. * Also, this is an output, not an input.
  134. * Assuming the framebuffer works as intended, RE may be
  135. * pointless.
  136. */
  137. panwrap_log("ugaT %llX, uga %llX\n",
  138. mfbd->ugaT, mfbd->unknown_gpu_address);
  139. if (mfbd->unknown_gpu_addressN) {
  140. panwrap_log("ugan %llX\n", mfbd->unknown_gpu_addressN);
  141. buf = fetch_mapped_gpu(mfbd->unknown_gpu_addressN, 64);
  142. panwrap_log_hexdump_trimmed(buf, 64, "\t\t");
  143. } else {
  144. printf("No ugan\n");
  145. }
  146. panwrap_log("unk1 %X, b1 %llX, b2 %llX, unk2 %llX, unk3 %llX, blah %llX\n",
  147. mfbd->unknown1,
  148. mfbd->block2[0],
  149. mfbd->block2[1],
  150. mfbd->unknown2,
  151. mfbd->unknown3,
  152. mfbd->blah);
  153. panwrap_log("Weights [ %X, %X, %X, %X, %X, %X, %X, %X ]\n",
  154. mfbd->weights[0], mfbd->weights[1],
  155. mfbd->weights[2], mfbd->weights[3],
  156. mfbd->weights[4], mfbd->weights[5],
  157. mfbd->weights[6], mfbd->weights[7]);
  158. panwrap_log_hexdump_trimmed((uint8_t *) mfbd->block3,
  159. sizeof(mfbd->block3), "\t\t");
  160. panwrap_log("---\n");
  161. panwrap_log_hexdump_trimmed((uint8_t *) mfbd->block4,
  162. sizeof(mfbd->block4), "\t\t");
  163. panwrap_log("--- (seriously though) --- %X\n", mfbd->block3[4]);
  164. buf32 = fetch_mapped_gpu(mfbd->block3[4], 128);
  165. if (buf32) {
  166. panwrap_log_hexdump_trimmed((uint8_t*) buf32, 128, "\t\t");
  167. quick_dump_gpu(buf32[6], 64);
  168. quick_dump_gpu(buf32[20], 64);
  169. quick_dump_gpu(buf32[23], 64);
  170. quick_dump_gpu(buf32[24], 64);
  171. quick_dump_gpu(buf32[25], 64);
  172. quick_dump_gpu(buf32[26], 64);
  173. quick_dump_gpu(buf32[27], 64);
  174. quick_dump_gpu(buf32[28], 64);
  175. quick_dump_gpu(buf32[31], 64);
  176. }
  177. quick_dump_gpu(mfbd->block3[16], 128);
  178. }
  179. static void chai_trace_vecN(float *p, size_t count)
  180. {
  181. if (count == 1)
  182. panwrap_log("\t<%f>,\n", p[0]);
  183. else if (count == 2)
  184. panwrap_log("\t<%f, %f>,\n", p[0], p[1]);
  185. else if (count == 3)
  186. panwrap_log("\t<%f, %f, %f>,\n", p[0], p[1], p[2]);
  187. else if (count == 4)
  188. panwrap_log("\t<%f, %f, %f, %f>,\n", p[0], p[1], p[2], p[3]);
  189. else
  190. panwrap_log("Cannot print vec%d\n", count);
  191. }
  192. //#include "shim.c"
  193. static void chai_trace_attribute(uint64_t address)
  194. {
  195. uint64_t raw;
  196. uint64_t flags;
  197. size_t vertex_count;
  198. size_t component_count;
  199. float *v;
  200. float *p;
  201. struct attribute_buffer *vb =
  202. (struct attribute_buffer *) fetch_mapped_gpu(
  203. address,
  204. sizeof(struct attribute_buffer));
  205. if (!vb) return;
  206. vertex_count = vb->total_size / vb->element_size;
  207. component_count = vb->element_size / sizeof(float);
  208. raw = vb->elements & ~3;
  209. flags = vb->elements ^ raw;
  210. p = v = fetch_mapped_gpu(raw, vb->total_size);
  211. panwrap_log("attribute vec%d mem%llXflag%llX = {\n",
  212. component_count, raw, flags);
  213. for (unsigned int i = 0; i < vertex_count; i++, p += component_count) {
  214. chai_trace_vecN(p, component_count);
  215. /* I don't like these verts... let's add some flare! */
  216. /*p[0] += (float) (rand() & 0xFF) / 1024.0f;
  217. p[1] += (float) (rand() & 0xFF) / 1024.0f;
  218. p[2] += (float) (rand() & 0xFF) / 1024.0f;*/
  219. }
  220. panwrap_log("}\n");
  221. }
  222. /* TODO: Dump more than one */
  223. static void chai_dump_texture(uint64_t addresses, uint64_t metadata)
  224. {
  225. /* Undecoded metadata array */
  226. quick_dump_gpu(metadata, 0x40);
  227. uint64_t *texture;
  228. texture = fetch_mapped_gpu(addresses, sizeof(*texture));
  229. printf("Texture address (more metadata) (?): %llx\n", *texture);
  230. quick_dump_gpu(*texture, 0x100);
  231. uint64_t* region = fetch_mapped_gpu(*texture, sizeof(region));
  232. if (!region) {
  233. printf("Bad region\n");
  234. return;
  235. }
  236. uint64_t int_addr = region[4];
  237. printf("Next address: %llx\n", int_addr);
  238. /* Bitmap data itself present, swizzled in an unknown fashion */
  239. /* TODO: Compute size */
  240. #define ANTISWIZZLE_SIZE 64
  241. uint8_t *bitmap = fetch_mapped_gpu(int_addr, ANTISWIZZLE_SIZE * ANTISWIZZLE_SIZE * 3);
  242. if (!bitmap) {
  243. printf("Missing bitmap\n");
  244. }
  245. FILE *fp = fopen("swizzled.bin", "wb");
  246. fwrite(bitmap, 1, ANTISWIZZLE_SIZE * ANTISWIZZLE_SIZE * 3, fp);
  247. fclose(fp);
  248. #if 0
  249. /* Rewrite the bitmap for fun! */
  250. uint8_t *bitmap = fetch_mapped_gpu(int_addr, 0x200);
  251. if (!bitmap) {
  252. printf("Missing bitmap\n");
  253. return;
  254. }
  255. for (int i = 0; i < (3 * 4); i += 3) {
  256. bitmap[i] = rand() & 0xFF;
  257. bitmap[i + 1] = 0;
  258. bitmap[i + 2] = 0;
  259. }
  260. #endif
  261. }
  262. static void chai_trace_hw_chain(uint64_t chain)
  263. {
  264. struct job_descriptor_header *h;
  265. uint8_t *gen_pay;
  266. u64 next;
  267. u64 payload;
  268. /* Trace descriptor */
  269. h = fetch_mapped_gpu(chain, sizeof(*h));
  270. if (!h) {
  271. panwrap_log("Failed to map the job chain %llX\n\n", chain);
  272. return;
  273. }
  274. panwrap_log("%s job, %d-bit, status %X, incomplete %X, fault %llX, barrier %d, index %hX, dependencies (%hX, %hX)\n",
  275. chai_job_type_name(h->job_type),
  276. h->job_descriptor_size ? 64 : 32,
  277. h->exception_status,
  278. h->first_incomplete_task,
  279. h->fault_pointer,
  280. h->job_barrier,
  281. h->job_index,
  282. h->job_dependency_index_1,
  283. h->job_dependency_index_2);
  284. payload = chain + sizeof(*h);
  285. switch (h->job_type) {
  286. case JOB_TYPE_SET_VALUE:
  287. {
  288. struct payload_set_value *s;
  289. s = fetch_mapped_gpu(payload, sizeof(*s));
  290. panwrap_log("set value -> %llX (%llX)\n",
  291. s->out, s->unknown);
  292. break;
  293. }
  294. case JOB_TYPE_VERTEX:
  295. case JOB_TYPE_TILER:
  296. {
  297. FILE *fp;
  298. struct payload_vertex_tiler *v;
  299. uint64_t *i_shader, s;
  300. uint8_t *shader;
  301. char *fn;
  302. v = fetch_mapped_gpu(payload, sizeof(*v));
  303. if ((v->shader & 0xFFF00000) == 0x5AB00000) {
  304. panwrap_log("Job sabotaged\n");
  305. break;
  306. }
  307. /* Mask out lower 128-bit (instruction word) for flags.
  308. *
  309. * TODO: Decode flags.
  310. */
  311. quick_dump_gpu(payload, sizeof(*v));
  312. i_shader = fetch_mapped_gpu(v->shader, sizeof(u64));
  313. panwrap_log("%s shader @ %llX (flags %llX)\n",
  314. h->job_type == JOB_TYPE_VERTEX ?
  315. "Vertex" : "Fragment",
  316. *i_shader & ~15, *i_shader & 15);
  317. shader = fetch_mapped_gpu(*i_shader & ~15,
  318. 0x880 - 0x540);
  319. if (shader) {
  320. panwrap_log_hexdump_trimmed(shader,
  321. 0x880 - 0x540, "\t\t");
  322. asprintf(&fn, "shader_%s.bin",
  323. h->job_type == JOB_TYPE_VERTEX ?
  324. "Vertex" : "Fragment");
  325. fp = fopen(fn, "wb");
  326. fwrite(shader, 1, 0x880 - 0x540, fp);
  327. free(fn);
  328. fclose(fp);
  329. }
  330. /* Trace attribute based on metadata */
  331. s = v->attribute_meta;
  332. while (true) {
  333. attribute_meta_t *attr_meta = fetch_mapped_gpu(
  334. s, sizeof(attribute_meta_t));
  335. if (!HAS_ATTRIBUTE(*attr_meta))
  336. break;
  337. panwrap_log("Attribute %llX (flags %llX)\n",
  338. ATTRIBUTE_NO(*attr_meta),
  339. ATTRIBUTE_FLAGS(*attr_meta));
  340. chai_trace_attribute(
  341. v->attributes + ATTRIBUTE_NO(*attr_meta) *
  342. sizeof(struct attribute_buffer));
  343. s += sizeof(attribute_meta_t);
  344. }
  345. if (h->job_type == JOB_TYPE_TILER) {
  346. panwrap_log(
  347. "Drawing in %s\n",
  348. chai_gl_mode_name(((uint8_t *) v->block1)[8]));
  349. }
  350. assert_gpu_zeroes(v->zeroes, 64);
  351. /* TODO: Rename appropriately */
  352. printf("nulls: %x, %x, %x\n",
  353. v->null1, v->null2, v->null4);
  354. if (v->null1 && v->null2) {
  355. chai_dump_texture(v->null1, v->null2);
  356. }
  357. if (v->null4)
  358. panwrap_log("Null tripped?\n");
  359. panwrap_log("%cFBD\n", v->fbd & FBD_TYPE ? 'M' : 'S');
  360. chai_trace_fbd(v->fbd);
  361. panwrap_log_hexdump_trimmed((uint8_t *) v->block1,
  362. sizeof(v->block1),
  363. "\t\t");
  364. for (int addr = 0; addr < 14; ++addr) {
  365. uintptr_t address =
  366. ((uintptr_t *) &(v->zeroes))[addr];
  367. uint8_t *buf;
  368. size_t sz = 64;
  369. /* Structure known. Skip hex dump */
  370. if (addr == 2) continue;
  371. if (addr == 3) continue;
  372. if (addr == 6) continue;
  373. if (addr == 10 && h->job_type == JOB_TYPE_VERTEX) continue;
  374. if (addr == 11) continue;
  375. if (addr == 12) continue;
  376. /* Likely not an address! */
  377. if (addr == 13) continue;
  378. /* Size known exactly but not structure; cull */
  379. if (addr == 0) sz = 0x100;
  380. if (addr == 1) sz = 0x10;
  381. if (addr == 4) sz = 0x40;
  382. if (addr == 5) sz = 0x20;
  383. if (addr == 7) sz = 0x20;
  384. if (addr == 8) sz = 0x20;
  385. panwrap_log("Addr %d %X\n", addr, address);
  386. if (!address)
  387. continue;
  388. buf = fetch_mapped_gpu(address, sz);
  389. panwrap_log_hexdump_trimmed(buf, sz, "\t\t");
  390. if (addr == 8) {
  391. uintptr_t sub =
  392. *((uintptr_t *) buf) & 0xFFFFFFFE;
  393. uint8_t *sbuf =
  394. fetch_mapped_gpu(sub, 64);
  395. panwrap_log("---\n");
  396. panwrap_log_hexdump_trimmed(
  397. sbuf, 64, "\t\t");
  398. }
  399. if (addr == 1) {
  400. uint64_t sub = *((uint64_t*) buf) >> 8;
  401. uint8_t *sbuf =
  402. fetch_mapped_gpu(sub, 64);
  403. panwrap_log("--- %llX\n", sub);
  404. panwrap_log_hexdump_trimmed(
  405. sbuf, 64, "\t\t");
  406. }
  407. if (addr == 4 &&
  408. h->job_type == JOB_TYPE_TILER) {
  409. __fp16 *uniforms = (__fp16*) buf;
  410. printf("uniform vec4 u = vec4(");
  411. for(int u = 0; u < 4; ++u) {
  412. float v = (float) uniforms[u];
  413. printf("%f, ", v);
  414. }
  415. printf("\b\b);\n");
  416. }
  417. }
  418. panwrap_log_hexdump_trimmed((uint8_t *) v->block2,
  419. sizeof(v->block2), "\t\t");
  420. break;
  421. }
  422. case JOB_TYPE_FRAGMENT: {
  423. struct payload_fragment *f;
  424. f = fetch_mapped_gpu(payload, sizeof(*f));
  425. /* Bit 31 of max_tile_coord clear on the first frame.
  426. * Set after.
  427. * TODO: Research.
  428. */
  429. panwrap_log("frag %X %X (%d, %d) -> (%d, %d), fbd type %cFBD at %llX (%llX) \n",
  430. f->min_tile_coord, f->max_tile_coord,
  431. TILE_COORD_X(f->min_tile_coord),
  432. TILE_COORD_Y(f->min_tile_coord),
  433. TILE_COORD_X(f->max_tile_coord),
  434. TILE_COORD_Y(f->max_tile_coord),
  435. f->fragment_fbd & FBD_TYPE ? 'M' : 'S',
  436. f->fragment_fbd,
  437. f->fragment_fbd & FBD_POINTER_MASK);
  438. chai_trace_fbd(f->fragment_fbd);
  439. break;
  440. }
  441. default:
  442. panwrap_log("Dumping payload %llX for job type %s\n",
  443. payload,
  444. chai_job_type_name(h->job_type));
  445. gen_pay = fetch_mapped_gpu(payload, 256);
  446. panwrap_log_hexdump_trimmed(gen_pay, 256, "\t\t");
  447. break;
  448. }
  449. next = h->job_descriptor_size ? h->next_job._64 : h->next_job._32;
  450. /* Traverse the job chain */
  451. if (next)
  452. chai_trace_hw_chain(next);
  453. }
  454. void chai_trace_atom(const struct mali_jd_atom_v2 *v)
  455. {
  456. uint64_t req = v->compat_core_req | v->core_req;
  457. if (req & MALI_JD_REQ_SOFT_JOB) {
  458. if (req & MALI_JD_REQ_SOFT_REPLAY) {
  459. struct mali_jd_replay_payload *payload;
  460. payload = (struct mali_jd_replay_payload *)
  461. fetch_mapped_gpu(v->jc, sizeof(*payload));
  462. panwrap_log(
  463. "tiler_jc_list = %llX, fragment_jc = %llX, "
  464. "tiler_heap_free = %llX, fragment hierarchy mask = %hX, "
  465. "tiler hierachy mask = %hX, hierarchy def weight %X, "
  466. "tiler core_req = %X, fragment core_req = %X\n",
  467. payload->tiler_jc_list,
  468. payload->fragment_jc,
  469. payload->tiler_heap_free,
  470. payload->fragment_hierarchy_mask,
  471. payload->tiler_hierarchy_mask,
  472. payload->hierarchy_default_weight,
  473. payload->tiler_core_req,
  474. payload->fragment_core_req);
  475. } else {
  476. /* TODO: Soft job decoding */
  477. panwrap_log("Unknown soft job\n");
  478. }
  479. } else {
  480. chai_trace_hw_chain(v->jc);
  481. }
  482. }