18 커밋 10c751e08a ... e3029fb90e

작성자 SHA1 메시지 날짜
  Cafe e3029fb90e Link with -lm 7 년 전
  Cafe 63f9fbd91d fix sizing in replayer 7 년 전
  Cafe 0da39a41f8 Fix pointerness 7 년 전
  Cafe 9c1f255887 Fix size? 7 년 전
  Cafe adc423ad74 Build with -ldl 7 년 전
  Cafe f7588d34ff Build replay in master cmakelists 7 년 전
  Cafe fc6ecc5d82 Open kernel ctx in replay 7 년 전
  Cafe 1b854136ee Cleanup errors in replay 7 년 전
  Cafe 8fc0e63f2c Replay pseudocode 7 년 전
  Cafe 4106712b7f Replay cmakelists 7 년 전
  Cafe 2362ea0740 Fix a bug and dump ioctls 7 년 전
  Cafe 94ef30b041 Mapped memory dump 7 년 전
  Cafe 2dddaa40f3 Antiswizzle instrumentation 7 년 전
  Cafe f50bfb957b Fix soft job decoding 7 년 전
  Cafe 69ba83e2f6 Instrument memory mapping to debug disappearing nodes 7 년 전
  Cafe 97f16d93ed Don't crash on null pointers 7 년 전
  Cafe 565f79013a Rewrite textures 7 년 전
  Cafe 2cb4da48d2 Disable attribute randomisation 7 년 전
6개의 변경된 파일241개의 추가작업 그리고 52개의 파일을 삭제
  1. 1 0
      CMakeLists.txt
  2. 69 21
      decoder/trace.c
  3. 60 31
      panwrap/panwrap-syscall.c
  4. 5 0
      panwrap/panwrap-util.c
  5. 6 0
      replay/CMakeLists.txt
  6. 100 0
      replay/replay.c

+ 1 - 0
CMakeLists.txt

@@ -68,6 +68,7 @@ include_directories("include")
 add_subdirectory(panwrap)
 add_subdirectory(driver)
 add_subdirectory(decoder)
+add_subdirectory(replay)
 
 if (BUILD_SYNTHESISER)
     add_subdirectory(prototype)

+ 69 - 21
decoder/trace.c

@@ -136,7 +136,13 @@ static void chai_trace_fbd(uintptr_t fbd)
 	assert_gpu_zeroes(mfbd->block2[0], 64);
 	assert_gpu_zeroes(mfbd->block2[1], 64);
 	assert_gpu_zeroes(mfbd->ugaT, 64);
-	assert_gpu_zeroes(mfbd->unknown_gpu_address, 64);
+
+	if (mfbd->unknown_gpu_address) {
+		printf("Nonzero uga?\n");
+		assert_gpu_zeroes(mfbd->unknown_gpu_address, 64);
+	} else {
+		printf("No uga\n");
+	}
 
 	/* Somehow maybe sort of kind of framebufferish?
 	 * It changes predictably in the same way as the FB.
@@ -159,9 +165,14 @@ static void chai_trace_fbd(uintptr_t fbd)
 
 	panwrap_log("ugaT %llX, uga %llX\n",
 		    mfbd->ugaT, mfbd->unknown_gpu_address);
-	panwrap_log("ugan %llX\n", mfbd->unknown_gpu_addressN);
-	buf = fetch_mapped_gpu(mfbd->unknown_gpu_addressN, 64);
-	panwrap_log_hexdump_trimmed(buf, 64, "\t\t");
+	
+	if (mfbd->unknown_gpu_addressN) {
+		panwrap_log("ugan %llX\n", mfbd->unknown_gpu_addressN);
+		buf = fetch_mapped_gpu(mfbd->unknown_gpu_addressN, 64);
+		panwrap_log_hexdump_trimmed(buf, 64, "\t\t");
+	} else {
+		printf("No ugan\n");
+	}
 
 	panwrap_log("unk1 %X, b1 %llX, b2 %llX, unk2 %llX, unk3 %llX, blah %llX\n",
 		    mfbd->unknown1,
@@ -250,9 +261,9 @@ static void chai_trace_attribute(uint64_t address)
 
 		/* I don't like these verts... let's add some flare! */
 
-		p[0] += (float) (rand() & 0xFF) / 1024.0f;
+		/*p[0] += (float) (rand() & 0xFF) / 1024.0f;
 		p[1] += (float) (rand() & 0xFF) / 1024.0f;
-		p[2] += (float) (rand() & 0xFF) / 1024.0f;
+		p[2] += (float) (rand() & 0xFF) / 1024.0f;*/
 	}
 
 	panwrap_log("}\n");
@@ -273,10 +284,43 @@ static void chai_dump_texture(uint64_t addresses, uint64_t metadata)
 
 	uint64_t* region = fetch_mapped_gpu(*texture, sizeof(region));
 
+	if (!region) {
+		printf("Bad region\n");
+		return;
+	}
+
 	uint64_t int_addr = region[4];
 	printf("Next address: %llx\n", int_addr);
 
-	quick_dump_gpu(int_addr, 0x200);
+	/* Bitmap data itself present, swizzled in an unknown fashion */
+	/* TODO: Compute size */
+
+#define ANTISWIZZLE_SIZE 64
+
+	uint8_t *bitmap = fetch_mapped_gpu(int_addr, ANTISWIZZLE_SIZE * ANTISWIZZLE_SIZE * 3);
+	if (!bitmap) {
+		printf("Missing bitmap\n");
+	}
+
+	FILE *fp = fopen("swizzled.bin", "wb");
+	fwrite(bitmap, 1, ANTISWIZZLE_SIZE * ANTISWIZZLE_SIZE * 3, fp);
+	fclose(fp);
+	
+#if 0
+	/* Rewrite the bitmap for fun! */
+	uint8_t *bitmap = fetch_mapped_gpu(int_addr, 0x200);
+
+	if (!bitmap) {
+		printf("Missing bitmap\n");
+		return;
+	}
+
+	for (int i = 0; i < (3 * 4); i += 3) {
+		bitmap[i] = rand() & 0xFF;
+		bitmap[i + 1] = 0;
+		bitmap[i + 2] = 0;
+	}
+#endif
 }
 
 static void chai_trace_hw_chain(uint64_t chain)
@@ -349,16 +393,18 @@ static void chai_trace_hw_chain(uint64_t chain)
 
 			shader = fetch_mapped_gpu(*i_shader & ~15,
 						  0x880 - 0x540);
-			panwrap_log_hexdump_trimmed(shader,
-						    0x880 - 0x540, "\t\t");
-
-			asprintf(&fn, "shader_%s.bin",
-				 h->job_type == JOB_TYPE_VERTEX ?
-				 "Vertex" : "Fragment");
-			fp = fopen(fn, "wb");
-			fwrite(shader, 1, 0x880 - 0x540, fp);
-			free(fn);
-			fclose(fp);
+			if (shader) {
+				panwrap_log_hexdump_trimmed(shader,
+							    0x880 - 0x540, "\t\t");
+
+				asprintf(&fn, "shader_%s.bin",
+					 h->job_type == JOB_TYPE_VERTEX ?
+					 "Vertex" : "Fragment");
+				fp = fopen(fn, "wb");
+				fwrite(shader, 1, 0x880 - 0x540, fp);
+				free(fn);
+				fclose(fp);
+			}
 
 			/* Trace attribute based on metadata */
 			s = v->attribute_meta;
@@ -527,18 +573,20 @@ static void chai_trace_hw_chain(uint64_t chain)
 
 void chai_trace_atom(const struct mali_jd_atom_v2 *v)
 {
-	if (v->core_req & MALI_JD_REQ_SOFT_JOB) {
-		if (v->core_req & MALI_JD_REQ_SOFT_REPLAY) {
+	uint64_t req = v->compat_core_req | v->core_req;
+
+	if (req & MALI_JD_REQ_SOFT_JOB) {
+		if (req & MALI_JD_REQ_SOFT_REPLAY) {
 			struct mali_jd_replay_payload *payload;
 
 			payload = (struct mali_jd_replay_payload *)
 				fetch_mapped_gpu(v->jc, sizeof(*payload));
 
 			panwrap_log(
-			    "tiler_jc_list = %llX, fragment_jc = %llX, \nt "
+			    "tiler_jc_list = %llX, fragment_jc = %llX, "
 			    "tiler_heap_free = %llX, fragment hierarchy mask = %hX, "
 			    "tiler hierachy mask = %hX, hierarchy def weight %X, "
-			    "tiler core_req = %X, fragment core_req = %X",
+			    "tiler core_req = %X, fragment core_req = %X\n",
 			    payload->tiler_jc_list,
 			    payload->fragment_jc,
 			    payload->tiler_heap_free,

+ 60 - 31
panwrap/panwrap-syscall.c

@@ -126,6 +126,8 @@ static int mali_fd = 0;
 static LIST_HEAD(allocations);
 static LIST_HEAD(mmaps);
 
+extern FILE *ioctl_fp;
+
 #define FLAG_INFO(flag) { MALI_MEM_##flag, #flag }
 static const struct panwrap_flag_info mem_flag_info[] = {
 	FLAG_INFO(PROT_CPU_RD),
@@ -225,23 +227,12 @@ static struct mapped_memory *find_mapped_mem(void *addr)
 	return NULL;
 }
 
-static struct mapped_memory *find_mapped_mem_containing(void *addr)
-{
-	struct mapped_memory *pos = NULL;
-
-	list_for_each_entry(pos, &mmaps, node) {
-		if (addr >= pos->addr && addr <= pos->addr + pos->length)
-			return pos;
-	}
-
-	return NULL;
-}
-
 static struct mapped_memory *find_gpu_mapped_mem(uint64_t addr)
 {
 	struct mapped_memory *pos = NULL;
 
 	list_for_each_entry(pos, &mmaps, node) {
+		printf("Candidate (%llx, %llx) for %llx\n", pos->gpu_va, pos->gpu_va + pos->length, addr);
 		if (addr >= pos->gpu_va && addr <= pos->gpu_va + pos->length)
 			return pos;
 	}
@@ -507,6 +498,46 @@ ioctl_decode_pre_stream_create(unsigned long int request, void *ptr)
 	panwrap_log("\tname = %s\n", args->name);
 }
 
+struct mapped_dump {
+	u64 gpu_va;
+	u64 length;
+	/* Followed by buffer itself */
+};
+
+static void
+dump_mapped_memory() {
+	/* Anything that is mapped? Dump it to a file.
+	 * Should be called once at the end of a frame in a single-frame
+	 * render.
+	 * Useful for replay, offline decode, etc.
+	 */
+
+	FILE *fp = fopen("memory-dump.bin", "wb");
+
+	struct mapped_memory *pos = NULL;
+
+	list_for_each_entry(pos, &mmaps, node) {
+		struct mapped_dump dump = {
+			.gpu_va = pos->gpu_va,
+			.length = pos->length
+		};
+
+		/* We don't care about endianness, LE everywhere */
+
+		if (pos->addr && pos->gpu_va) {
+			fwrite(&dump, 1, sizeof(dump), fp);
+
+			if (pos->length) {
+				fwrite(pos->addr, 1, pos->length, fp);
+			}
+		} else {
+			printf("Warning, NULL mmap skipped in dump\n");
+		}
+	}
+
+	fclose(fp);
+}
+
 /* TODO: Decode offline */
 #define __PANWRAP
 #include <pantrace.h>
@@ -535,22 +566,7 @@ ioctl_decode_pre_job_submit(unsigned long int request, void *ptr)
 	panwrap_log("\tAtoms:\n");
 	for (unsigned int i = 0; i < args->nr_atoms; i++) {
 		const struct mali_jd_atom_v2 *a = &atoms[i];
-		struct mapped_memory *mem;
-
-		panwrap_log("\t\tjc = 0x%llx\n", a->jc);
-		mem = find_mapped_mem_containing((void*) (uintptr_t) a->jc);
-		if (mem) {
-			off_t offset = (void*) (uintptr_t) a->jc - mem->addr;
-
-			panwrap_log("\t\tAddress %llu bytes inside mmap %p - %p (length=%zd)\n",
-				    (loff_t) offset, mem->addr, mem->addr + mem->length,
-				    mem->length);
-
-			chai_trace_atom(a);
-
-		} else {
-			panwrap_log("\t\tERROR! jc contained in unknown memory region, cannot dump\n");
-		}
+		uint64_t core_req;
 
 		panwrap_log("\t\tudata = [0x%llx, 0x%llx]\n",
 			    a->udata.blob[0], a->udata.blob[1]);
@@ -573,7 +589,10 @@ ioctl_decode_pre_job_submit(unsigned long int request, void *ptr)
 			panwrap_log("\t\t<no external resources>\n");
 		}
 
+		core_req = a->core_req | a->compat_core_req;
+
 		panwrap_log("\t\tcompat_core_req = 0x%x\n", a->compat_core_req);
+		panwrap_log("\t\tcore_req = 0x%x\n", a->core_req);
 
 		panwrap_log("\t\tPre-dependencies:\n");
 		for (unsigned int j = 0; j < ARRAY_SIZE(a->pre_dep); j++) {
@@ -591,10 +610,16 @@ ioctl_decode_pre_job_submit(unsigned long int request, void *ptr)
 		panwrap_log("\t\tdevice_nr = %d\n", a->device_nr);
 
 		panwrap_log("\t\tJob type = %s\n",
-			    ioctl_get_job_type_from_jd_core_req(a->core_req));
-		panwrap_log("\t\tcore_req = ");
-		ioctl_log_decoded_jd_core_req(a->core_req);
+			    ioctl_get_job_type_from_jd_core_req(core_req));
+		panwrap_log("\t\tdecoded_core_req = ");
+		ioctl_log_decoded_jd_core_req(core_req);
 		panwrap_log_cont("\n");
+
+		dump_mapped_memory();
+		fflush(ioctl_fp);
+
+		chai_trace_atom(a);
+
 	}
 }
 
@@ -989,6 +1014,10 @@ int ioctl(int fd, unsigned long request, ...)
 		goto out;
 	}
 
+	/* Dump to ioctl file */
+	fwrite(&request, 1, sizeof(request), ioctl_fp);
+	fwrite(ptr, 1, _IOC_SIZE(request), ioctl_fp);
+
 	func = header->id;
 	panwrap_log("<%-20s> (%02lu) (%08lx) (%04lu) (%03d)\n",
 		    name, _IOC_NR(request), request, _IOC_SIZE(request), func);

+ 5 - 0
panwrap/panwrap-util.c

@@ -32,6 +32,7 @@ static bool time_is_frozen = false;
 static struct timespec start_time;
 static struct timespec total_time_frozen, start_freeze_time, frozen_timestamp;
 static FILE *log_output;
+FILE *ioctl_fp;
 
 void
 panwrap_log_decoded_flags(const struct panwrap_flag_info *flag_info,
@@ -369,4 +370,8 @@ panwrap_util_init()
 	} else {
 		log_output = stdout;
 	}
+
+	srand(time(NULL));
+
+	ioctl_fp = fopen("ioctls.bin", "wb");
 }

+ 6 - 0
replay/CMakeLists.txt

@@ -0,0 +1,6 @@
+project(replay)
+
+set(SRCS replay.c)
+
+add_executable(replay ${SRCS})
+target_link_libraries(replay dl m)

+ 100 - 0
replay/replay.c

@@ -0,0 +1,100 @@
+/*
+ *
+ * Copyright (C) 2017 Cafe Beverage. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <sys/ioctl.h>
+#include <sys/types.h>
+#include <fcntl.h>
+
+struct ioctl_buffer {
+	unsigned long request;
+	void *ptr;
+};
+
+struct ioctl_buffer *ioctls = NULL;
+int ioctl_count = 0;
+
+struct mmap_buffer {
+	uint64_t gpu_va;
+	uint64_t length;
+	void *buffer;
+};
+
+struct mmap_buffer *mmaps = NULL;
+int mmap_count = 0;
+
+int main(int argc, const char **argv)
+{
+	if (argc != 3) {
+		printf("Usage: ./replay ioctl.bin memory.bin\n");
+		return 1;
+	}
+
+	FILE *fp;
+
+	/* Read and decode ioctls in a blocky way */
+	fp = fopen(argv[1], "rb");
+	unsigned long request;
+
+	while(fread(&request, 1, sizeof(request), fp) == sizeof(request)) {
+		ioctls = realloc(ioctls, sizeof(struct ioctl_buffer) * (++ioctl_count));
+		struct ioctl_buffer* buffer = &ioctls[ioctl_count - 1];
+
+		buffer->request = request;
+		
+		size_t s = _IOC_SIZE(buffer->request);
+
+		void *arg = malloc(s);
+		printf("malloc(%d) = %p\n", s, arg);
+		buffer->ptr = arg;
+		fread(arg, 1, s, fp);
+
+		printf("Request: %lx\n", buffer->request);
+	}
+	
+	fclose(fp);
+
+#if 0
+	/* Read and decode memory */
+	fp = fopen(argv[2], "rb");
+
+	uint64_t gpu_va;
+	while(fread(&gpu_va, 1, sizeof(gpu_va), fp) == sizeof(gpu_va)) {
+		mmaps = realloc(mmaps, sizeof(struct mmap_buffer) * (++mmap_count));
+		struct mmap_buffer buffer = mmaps[mmap_count - 1];
+
+		buffer->gpu_va = gpu_va;
+		fread(&buffer->length, 1, sizeof(buffer->length), fp);
+
+		buffer->buffer = malloc(buffer->length);
+		fread(buffer->buffer, 1, buffer->length, fp);
+	}
+#endif
+
+	/* Open a context with the kernel */
+	int fd = open("/dev/mali0", O_RDWR | O_CLOEXEC);
+
+	/* Walk the ioctl tree */
+	for(int i = 0; i < ioctl_count; ++i) {
+		struct ioctl_buffer io = ioctls[i];
+		printf("(%lx, %p)\n", io.request, io.ptr);
+		ioctl(fd, io.request, io.ptr);
+	}
+
+	/* TODO: Put in memory at the right places */
+	return 1;
+}