patch_solution_since_16.0.7.0.i386.cpp 34 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630
  1. #define _CRT_SECURE_NO_WARNINGS
  2. #include "i386_emulator.hpp"
  3. #include "keystone_assembler.hpp"
  4. #include "patch_solution_since_16.0.7.0.hpp"
  5. #include <algorithm>
  6. #include <fmt/format.h>
  7. namespace nkg {
  8. patch_solution_since<16, 0, 7, 0>::patch_solution_since(image_interpreter& libcc_interpreter) :
  9. m_libcc_interpreter(libcc_interpreter),
  10. m_va_CSRegistrationInfoFetcher_WIN_vtable(0),
  11. m_va_CSRegistrationInfoFetcher_WIN_GenerateRegistrationKey(0),
  12. m_va_iat_entry_malloc(0) {}
  13. bool patch_solution_since<16, 0, 7, 0>::find_patch() {
  14. auto CSRegistrationInfoFetcher_WIN_type_descriptor_name =
  15. m_libcc_interpreter.search_section<const uint8_t*>(
  16. ".data",
  17. [](const uint8_t* p, size_t s) {
  18. if (s < sizeof(".?AVCSRegistrationInfoFetcher_WIN@@")) {
  19. return false;
  20. }
  21. return strcmp(reinterpret_cast<const char*>(p), ".?AVCSRegistrationInfoFetcher_WIN@@") == 0;
  22. }
  23. );
  24. if (CSRegistrationInfoFetcher_WIN_type_descriptor_name == nullptr) {
  25. wprintf_s(L"[-] patch_solution_since<16, 0, 7, 0>: RTTI info for CSRegistrationInfoFetcher_WIN is not found. (failure label 0)\n");
  26. wprintf_s(L"[-] patch_solution_since<16, 0, 7, 0>: This patch solution will be suppressed.\n");
  27. return false;
  28. }
  29. auto CSRegistrationInfoFetcher_WIN_rtti_type_descriptor = CSRegistrationInfoFetcher_WIN_type_descriptor_name - 0x8;
  30. auto CSRegistrationInfoFetcher_WIN_rtti_type_descriptor_va = m_libcc_interpreter.convert_ptr_to_va(CSRegistrationInfoFetcher_WIN_rtti_type_descriptor);
  31. auto CSRegistrationInfoFetcher_WIN_rtti_complete_object_locator_pTypeDescriptor =
  32. m_libcc_interpreter.search_section<const uint8_t*>(
  33. ".rdata",
  34. [this, CSRegistrationInfoFetcher_WIN_rtti_type_descriptor_va](const uint8_t* p, size_t s) {
  35. if (reinterpret_cast<uintptr_t>(p) % sizeof(uint32_t) != 0) {
  36. return false;
  37. }
  38. if (s < sizeof(uint32_t)) {
  39. return false;
  40. }
  41. if (*reinterpret_cast<const uint32_t*>(p) != CSRegistrationInfoFetcher_WIN_rtti_type_descriptor_va) {
  42. return false;
  43. }
  44. if (s < sizeof(uint32_t) * 2) {
  45. return false;
  46. }
  47. auto maybe_CSRegistrationInfoFetcher_WIN_rtti_class_hierarchy_descriptor_va = reinterpret_cast<const uint32_t*>(p)[1];
  48. try {
  49. return memcmp(m_libcc_interpreter.image_section_header_from_va(maybe_CSRegistrationInfoFetcher_WIN_rtti_class_hierarchy_descriptor_va)->Name, ".rdata\x00\x00", 8) == 0;
  50. } catch (nkg::exception&) {
  51. return false;
  52. }
  53. }
  54. );
  55. if (CSRegistrationInfoFetcher_WIN_rtti_complete_object_locator_pTypeDescriptor == nullptr) {
  56. wprintf_s(L"[-] patch_solution_since<16, 0, 7, 0>: RTTI info for CSRegistrationInfoFetcher_WIN is not found. (failure label 1)\n");
  57. wprintf_s(L"[-] patch_solution_since<16, 0, 7, 0>: This patch solution will be suppressed.\n");
  58. return false;
  59. }
  60. auto CSRegistrationInfoFetcher_WIN_rtti_complete_object_locator = CSRegistrationInfoFetcher_WIN_rtti_complete_object_locator_pTypeDescriptor - 0xC;
  61. auto CSRegistrationInfoFetcher_WIN_rtti_complete_object_locator_va = m_libcc_interpreter.convert_ptr_to_va(CSRegistrationInfoFetcher_WIN_rtti_complete_object_locator);
  62. auto CSRegistrationInfoFetcher_WIN_vtable_before =
  63. m_libcc_interpreter.search_section<const uint8_t*>(
  64. ".rdata",
  65. [CSRegistrationInfoFetcher_WIN_rtti_complete_object_locator_va](const uint8_t* p, size_t s) {
  66. if (reinterpret_cast<uintptr_t>(p) % sizeof(uint32_t) != 0) {
  67. return false;
  68. }
  69. if (s < sizeof(uint32_t)) {
  70. return false;
  71. }
  72. return *reinterpret_cast<const uint32_t*>(p) == CSRegistrationInfoFetcher_WIN_rtti_complete_object_locator_va;
  73. }
  74. );
  75. if (CSRegistrationInfoFetcher_WIN_vtable_before == nullptr) {
  76. wprintf_s(L"[-] patch_solution_since<16, 0, 7, 0>: Vftable for CSRegistrationInfoFetcher_WIN is not found.\n");
  77. wprintf_s(L"[-] patch_solution_since<16, 0, 7, 0>: This patch solution will be suppressed.\n");
  78. return false;
  79. }
  80. auto CSRegistrationInfoFetcher_WIN_vtable =
  81. reinterpret_cast<const uint32_t*>(CSRegistrationInfoFetcher_WIN_vtable_before + sizeof(uint32_t));
  82. m_va_CSRegistrationInfoFetcher_WIN_vtable = m_libcc_interpreter.convert_ptr_to_va(CSRegistrationInfoFetcher_WIN_vtable);
  83. m_va_CSRegistrationInfoFetcher_WIN_GenerateRegistrationKey = CSRegistrationInfoFetcher_WIN_vtable[6];
  84. wprintf(L"[*] patch_solution_since<16, 0, 7, 0>: m_va_CSRegistrationInfoFetcher_WIN_vtable = 0x%08x\n", m_va_CSRegistrationInfoFetcher_WIN_vtable);
  85. wprintf(L"[*] patch_solution_since<16, 0, 7, 0>: m_va_CSRegistrationInfoFetcher_WIN_GenerateRegistrationKey = 0x%08x\n", m_va_CSRegistrationInfoFetcher_WIN_GenerateRegistrationKey);
  86. i386_emulator x86_emulator;
  87. x86_emulator.context_set("heap_base", uint32_t{ 0x7f000000 });
  88. x86_emulator.context_set("heap_size", size_t{ 0x1000 * 32 });
  89. x86_emulator.context_set("heap_records", std::map<uint32_t, uint32_t>{});
  90. x86_emulator.context_set("stack_base", uint32_t{ 0x7fff0000 });
  91. x86_emulator.context_set("stack_size", size_t{ 0x1000 * 32 });
  92. x86_emulator.context_set("stack_top", uint32_t{ x86_emulator.context_get<uint32_t>("stack_base") - x86_emulator.context_get<size_t>("stack_size") });
  93. x86_emulator.context_set("r0_to_r3_stub_area_base", uint32_t{ 0xffffe000 });
  94. x86_emulator.context_set("r0_to_r3_stub_area_size", size_t{ 0x1000 });
  95. x86_emulator.context_set("dead_area_base", uint32_t{ 0xfffff000 });
  96. x86_emulator.context_set("dead_area_size", size_t{ 0x1000 });
  97. x86_emulator.context_set("iat_base", uint32_t{ m_libcc_interpreter.convert_rva_to_va(m_libcc_interpreter.image_nt_headers()->OptionalHeader.DataDirectory[IMAGE_DIRECTORY_ENTRY_IAT].VirtualAddress) });
  98. x86_emulator.context_set("iat_size", size_t{ m_libcc_interpreter.image_nt_headers()->OptionalHeader.DataDirectory[IMAGE_DIRECTORY_ENTRY_IAT].Size });
  99. x86_emulator.context_set("external_api_stub_area_base", uint32_t{ 0x80000000 });
  100. x86_emulator.context_set("external_api_stub_area_size", size_t{ (x86_emulator.context_get<size_t>("iat_size") / 8 + 0xfff) / 0x1000 * 0x1000 });
  101. x86_emulator.context_set("external_api_impl", std::map<std::string, uint32_t>{});
  102. x86_emulator.context_set("external_api_impl_area_base", uint32_t{ 0x90000000 });
  103. x86_emulator.context_set("external_api_impl_area_size", size_t{ 0 });
  104. x86_emulator.context_set("gdt_base", uint32_t{ 0xffff0000 });
  105. x86_emulator.context_set("gdt_size", size_t{ 0x1000 });
  106. x86_emulator.context_set("fs_base", uint32_t{ 0xa0000000 });
  107. x86_emulator.context_set("fs_size", size_t{ 0x1000 });
  108. x86_emulator.context_set("start_address", static_cast<uint32_t>(m_va_CSRegistrationInfoFetcher_WIN_GenerateRegistrationKey));
  109. x86_emulator.context_set("dead_address", x86_emulator.context_get<uint32_t>("dead_area_base"));
  110. // allocate heap
  111. x86_emulator.mem_map(x86_emulator.context_get<uint32_t>("heap_base"), x86_emulator.context_get<size_t>("heap_size"), UC_PROT_READ | UC_PROT_WRITE);
  112. // allocate stack
  113. x86_emulator.mem_map(x86_emulator.context_get<uint32_t>("stack_top"), x86_emulator.context_get<size_t>("stack_size"), UC_PROT_READ | UC_PROT_WRITE);
  114. // allocate r0_to_r3_stub area
  115. x86_emulator.mem_map(x86_emulator.context_get<uint32_t>("r0_to_r3_stub_area_base"), x86_emulator.context_get<size_t>("r0_to_r3_stub_area_size"), UC_PROT_READ | UC_PROT_EXEC);
  116. x86_emulator.mem_write(x86_emulator.context_get<uint32_t>("r0_to_r3_stub_area_base"), keystone_assembler{ KS_ARCH_X86, KS_MODE_32 }.assemble("iretd;"));
  117. // allocate dead area
  118. x86_emulator.mem_map(x86_emulator.context_get<uint32_t>("dead_area_base"), x86_emulator.context_get<size_t>("dead_area_size"), UC_PROT_READ | UC_PROT_EXEC);
  119. // allocate and hook read access to IAT
  120. {
  121. auto iat_base = x86_emulator.context_get<uint32_t>("iat_base");
  122. auto iat_size = x86_emulator.context_get<size_t>("iat_size");
  123. auto external_api_stub_area_base = x86_emulator.context_get<uint32_t>("external_api_stub_area_base");
  124. auto iat_page_base = iat_base / 0x1000 * 0x1000;
  125. auto iat_page_count = (iat_base - iat_page_base + iat_size + 0xfff) / 0x1000;
  126. x86_emulator.mem_map(iat_page_base, iat_page_count * 0x1000, UC_PROT_READ);
  127. x86_emulator.hook_add<UC_HOOK_MEM_READ>(
  128. [this, &x86_emulator, iat_base, external_api_stub_area_base](uc_mem_type type, uint32_t address, size_t size, int32_t value) {
  129. auto rva = m_libcc_interpreter.convert_va_to_rva(address);
  130. auto import_lookup_entry = m_libcc_interpreter.import_lookup_entry_from_rva(rva);
  131. if (import_lookup_entry && !IMAGE_SNAP_BY_ORDINAL(import_lookup_entry->u1.Ordinal)) {
  132. auto import_by_name_entry = m_libcc_interpreter.convert_rva_to_ptr<PIMAGE_IMPORT_BY_NAME>(import_lookup_entry->u1.AddressOfData);
  133. if (strcmp(import_by_name_entry->Name, "memcpy") == 0) {
  134. uint32_t impl_address = x86_emulator.context_get<std::map<std::string, uint32_t>&>("external_api_impl")["memcpy"];
  135. x86_emulator.mem_write(address, &impl_address, sizeof(impl_address));
  136. } else {
  137. uint32_t stub_address = external_api_stub_area_base + (address - iat_base) / sizeof(IMAGE_THUNK_DATA);
  138. x86_emulator.mem_write(address, &stub_address, sizeof(stub_address));
  139. }
  140. } else {
  141. x86_emulator.emu_stop();
  142. }
  143. },
  144. iat_base,
  145. iat_base + iat_size - 1
  146. );
  147. }
  148. // allocate and setup external api stub area
  149. {
  150. auto external_api_stub_area_base = x86_emulator.context_get<uint32_t>("external_api_stub_area_base");
  151. auto external_api_stub_area_size = x86_emulator.context_get<size_t>("external_api_stub_area_size");
  152. x86_emulator.mem_map(external_api_stub_area_base, external_api_stub_area_size, UC_PROT_READ | UC_PROT_EXEC);
  153. x86_emulator.mem_write(external_api_stub_area_base, std::vector<uint8_t>(external_api_stub_area_size, 0xc3)); // c3 -> ret
  154. x86_emulator.hook_add<UC_HOOK_CODE>(
  155. [this, &x86_emulator, external_api_stub_area_base](uint32_t address, size_t size) {
  156. auto iat_base = x86_emulator.context_get<uint32_t>("iat_base");
  157. auto from_va = iat_base + (address - external_api_stub_area_base) * sizeof(IMAGE_THUNK_DATA);
  158. auto from_rva = m_libcc_interpreter.convert_va_to_rva(from_va);
  159. auto import_lookup_entry = m_libcc_interpreter.import_lookup_entry_from_rva(from_rva);
  160. if (import_lookup_entry && !IMAGE_SNAP_BY_ORDINAL(import_lookup_entry->u1.Ordinal)) {
  161. auto import_by_name_entry = m_libcc_interpreter.convert_rva_to_ptr<PIMAGE_IMPORT_BY_NAME>(import_lookup_entry->u1.AddressOfData);
  162. if (strcmp(import_by_name_entry->Name, "malloc") == 0) {
  163. m_va_iat_entry_malloc = from_va;
  164. uint32_t esp;
  165. x86_emulator.reg_read(UC_X86_REG_ESP, &esp);
  166. uint32_t alloc_size;
  167. x86_emulator.mem_read(esp + 4, &alloc_size, sizeof(alloc_size));
  168. auto& heap_records = x86_emulator.context_get<std::map<uint32_t, uint32_t>&>("heap_records");
  169. auto predecessor_chunk =
  170. std::adjacent_find(
  171. heap_records.begin(),
  172. heap_records.end(),
  173. [alloc_size](const auto& chunk0, const auto& chunk1) { return chunk1.first - (chunk0.first + chunk0.second) >= alloc_size; }
  174. );
  175. uint32_t alloc_p;
  176. if (predecessor_chunk != heap_records.end()) {
  177. alloc_p = predecessor_chunk->first + predecessor_chunk->second;
  178. } else {
  179. auto heap_base = x86_emulator.context_get<uint32_t>("heap_base");
  180. auto heap_size = x86_emulator.context_get<uint32_t>("heap_size");
  181. auto free_space_base = heap_records.size() > 0 ? heap_records.rbegin()->first + heap_records.rbegin()->second : heap_base;
  182. auto free_space_size = heap_base + heap_size - free_space_base;
  183. if (free_space_size < alloc_size) {
  184. auto heap_expand_base = heap_base + heap_size;
  185. auto heap_expand_size = (alloc_size - free_space_size + 0xfff) / 0x1000 * 0x1000;
  186. x86_emulator.mem_map(heap_expand_base, heap_expand_size, UC_PROT_READ | UC_PROT_WRITE);
  187. }
  188. alloc_p = free_space_base;
  189. }
  190. heap_records[alloc_p] = alloc_size;
  191. x86_emulator.reg_write(UC_X86_REG_EAX, &alloc_p);
  192. } else if (strcmp(import_by_name_entry->Name, "free") == 0) {
  193. uint32_t esp;
  194. x86_emulator.reg_read(UC_X86_REG_ESP, &esp);
  195. uint32_t alloc_p;
  196. x86_emulator.mem_read(esp + 4, &alloc_p, sizeof(alloc_p));
  197. auto& heap_records = x86_emulator.context_get<std::map<uint32_t, uint32_t>&>("heap_records");
  198. auto chunk = heap_records.find(alloc_p);
  199. if (chunk != heap_records.end()) {
  200. heap_records.erase(chunk);
  201. } else {
  202. x86_emulator.emu_stop();
  203. }
  204. } else {
  205. x86_emulator.emu_stop();
  206. }
  207. } else {
  208. x86_emulator.emu_stop();
  209. }
  210. },
  211. external_api_stub_area_base,
  212. external_api_stub_area_base + external_api_stub_area_size - 1
  213. );
  214. }
  215. // allocate and setup external api impl area
  216. {
  217. keystone_assembler x86_assembler{ KS_ARCH_X86, KS_MODE_32 };
  218. std::map<std::string, std::vector<uint8_t>> machine_code_list =
  219. {
  220. std::make_pair(
  221. "memcpy",
  222. x86_assembler.assemble(
  223. "push edi;"
  224. "push esi;"
  225. "mov eax, dword ptr [esp + 0x8 + 0x4];"
  226. "mov edi, eax;"
  227. "mov esi, dword ptr [esp + 0x8 + 0x8];"
  228. "mov ecx, dword ptr [esp + 0x8 + 0xc];"
  229. "rep movs byte ptr [edi], byte ptr [esi];"
  230. "pop esi;"
  231. "pop edi;"
  232. "ret;"
  233. )
  234. )
  235. };
  236. auto& external_api_impl = x86_emulator.context_get<std::map<std::string, uint32_t>&>("external_api_impl");
  237. auto& external_api_impl_area_base = x86_emulator.context_get<uint32_t&>("external_api_impl_area_base");
  238. auto& external_api_impl_area_size = x86_emulator.context_get<size_t&>("external_api_impl_area_size");
  239. auto p = external_api_impl_area_base;
  240. for (const auto& name_code_pair : machine_code_list) {
  241. external_api_impl[name_code_pair.first] = p;
  242. p = (p + name_code_pair.second.size() + 0xf) / 0x10 * 0x10;
  243. }
  244. external_api_impl_area_size = (p + 0xfff) / 0x1000 * 0x1000 - external_api_impl_area_base;
  245. x86_emulator.mem_map(external_api_impl_area_base, external_api_impl_area_size, UC_PROT_READ | UC_PROT_EXEC);
  246. for (const auto& name_code_pair : machine_code_list) {
  247. x86_emulator.mem_write(external_api_impl[name_code_pair.first], name_code_pair.second);
  248. }
  249. }
  250. // allocate and setup GDT, segment registers
  251. {
  252. auto gdt_base = x86_emulator.context_get<uint32_t>("gdt_base");
  253. auto gdt_size = x86_emulator.context_get<size_t>("gdt_size");
  254. x86_emulator.mem_map(gdt_base, gdt_size, UC_PROT_READ | UC_PROT_WRITE);
  255. x86_emulator.create_gdt_entry(gdt_base, 0, 0, 0, 0); // null segment descriptor
  256. // -------------------------------------------------------- access_byte
  257. // 0x80 -> present bit
  258. // (0 << 5) -> DPL is set to 0
  259. // 0x10 -> code/data segment
  260. // 0x08 -> executable segment
  261. // !(0x4) -> not conforming code segment
  262. // 0x02 -> code segment is readable
  263. // !(0x01) -> accessed bit, this bit is managed by CPU
  264. // -------------------------------------------------------- flags
  265. // 0x08 -> 4k granularity
  266. // 0x04 -> 32-bit protected mode segment
  267. // !(0x01) -> AVL bit is not used
  268. x86_emulator.create_gdt_entry(gdt_base + 1 * 0x8, 0x00000000, 0xfffff, 0x80 | (0 << 5) | 0x10 | 0x08 | !(0x04) | 0x02 | !(0x01), 0x08 | 0x04 | !(0x01)); // kernel code segment
  269. // -------------------------------------------------------- access_byte
  270. // 0x80 -> present bit
  271. // (0 << 5) -> DPL is set to 0
  272. // 0x10 -> code/data segment
  273. // !(0x08) -> data segment
  274. // !(0x4) -> segment grows up
  275. // 0x02 -> data segment is writable
  276. // !(0x01) -> accessed bit, this bit is managed by CPU
  277. // -------------------------------------------------------- flags
  278. // 0x08 -> 4k granularity
  279. // 0x04 -> 32-bit protected mode segment
  280. // !(0x01) -> AVL bit is not used
  281. x86_emulator.create_gdt_entry(gdt_base + 2 * 0x8, 0x00000000, 0xfffff, 0x80 | (0 << 5) | 0x10 | !(0x08) | !(0x04) | 0x02 | !(0x01), 0x08 | 0x04 | !(0x01)); // kernel data segment
  282. // -------------------------------------------------------- access_byte
  283. // 0x80 -> present bit
  284. // (3 << 5) -> DPL is set to 3
  285. // 0x10 -> code/data segment
  286. // 0x08 -> executable segment
  287. // !(0x4) -> not conforming code segment
  288. // 0x02 -> code segment is readable
  289. // !(0x01) -> accessed bit, this bit is managed by CPU
  290. // -------------------------------------------------------- flags
  291. // 0x08 -> 4k granularity
  292. // 0x04 -> 32-bit protected mode segment
  293. // !(0x01) -> AVL bit is not used
  294. x86_emulator.create_gdt_entry(gdt_base + 3 * 0x8, 0x00000000, 0xfffff, 0x80 | (3 << 5) | 0x10 | 0x08 | !(0x04) | 0x02 | !(0x01), 0x08 | 0x04 | !(0x01)); // user code segment
  295. // -------------------------------------------------------- access_byte
  296. // 0x80 -> present bit
  297. // (3 << 5) -> DPL is set to 3
  298. // 0x10 -> code/data segment
  299. // !(0x08) -> data segment
  300. // !(0x4) -> segment grows up
  301. // 0x02 -> data segment is writable
  302. // !(0x01) -> accessed bit, this bit is managed by CPU
  303. // -------------------------------------------------------- flags
  304. // 0x08 -> 4k granularity
  305. // 0x04 -> 32-bit protected mode segment
  306. // !(0x01) -> AVL bit is not used
  307. x86_emulator.create_gdt_entry(gdt_base + 4 * 0x8, 0x00000000, 0xfffff, 0x80 | (3 << 5) | 0x10 | !(0x08) | !(0x04) | 0x02 | !(0x01), 0x08 | 0x04 | !(0x01)); // user data segment
  308. // -------------------------------------------------------- access_byte
  309. // 0x80 -> present bit
  310. // (3 << 5) -> DPL is set to 3
  311. // 0x10 -> code/data segment
  312. // !(0x08) -> data segment
  313. // !(0x4) -> segment grows up
  314. // 0x02 -> data segment is writable
  315. // !(0x01) -> accessed bit, this bit is managed by CPU
  316. // -------------------------------------------------------- flags
  317. // !(0x08) -> 1-byte granularity
  318. // 0x04 -> 32-bit protected mode segment
  319. // !(0x01) -> AVL bit is not used
  320. auto fs_base = x86_emulator.context_get<uint32_t>("fs_base");
  321. auto fs_size = x86_emulator.context_get<size_t>("fs_size");
  322. x86_emulator.create_gdt_entry(gdt_base + 7 * 0x8, fs_base, fs_size - 1, 0x80 | (3 << 5) | 0x10 | !(0x08) | !(0x04) | 0x02 | !(0x01), !(0x08) | 0x04 | !(0x01)); // user fs segment
  323. uc_x86_mmr gdtr = {};
  324. gdtr.base = gdt_base;
  325. gdtr.limit = gdt_base + gdt_size - 1;
  326. x86_emulator.reg_write(UC_X86_REG_GDTR, &gdtr);
  327. uint16_t cs, ds, es, fs, gs, ss;
  328. cs = (1 << 3) | (0 << 2) | (0); // use kernel code segmet
  329. ss = (2 << 3) | (0 << 2) | (0); // use kernel data segmet
  330. ds = es = (4 << 3) | (0 << 2) | (3); // use user data segment
  331. fs = (7 << 3) | (0 << 2) | (3); // use user fs segment
  332. gs = 0; // not used
  333. uint32_t eflags;
  334. x86_emulator.reg_read(UC_X86_REG_EFLAGS, &eflags);
  335. x86_emulator.reg_write(UC_X86_REG_CS, &cs);
  336. x86_emulator.reg_write(UC_X86_REG_SS, &ss);
  337. x86_emulator.reg_write(UC_X86_REG_DS, &ds);
  338. x86_emulator.reg_write(UC_X86_REG_ES, &es);
  339. x86_emulator.reg_write(UC_X86_REG_FS, &fs);
  340. x86_emulator.reg_write(UC_X86_REG_GS, &gs);
  341. }
  342. // allocate and hook access to fs area
  343. {
  344. auto fs_base = x86_emulator.context_get<uint32_t>("fs_base");
  345. auto fs_size = x86_emulator.context_get<size_t>("fs_size");
  346. x86_emulator.mem_map(fs_base, fs_size, UC_PROT_READ | UC_PROT_WRITE);
  347. x86_emulator.hook_add<UC_HOOK_MEM_READ>(
  348. [this, &x86_emulator, fs_base](uc_mem_type access, uint32_t address, size_t size, int64_t value) {
  349. switch (address - fs_base) {
  350. case 0:
  351. if (size == 4) {
  352. // Current Structured Exception Handling (SEH) frame, leave it NULL
  353. } else {
  354. x86_emulator.emu_stop();
  355. }
  356. break;
  357. default:
  358. x86_emulator.emu_stop();
  359. break;
  360. }
  361. },
  362. fs_base,
  363. fs_base + fs_size - 1
  364. );
  365. }
  366. // x86_emulator.hook_add<UC_HOOK_CODE>([](uint32_t address, uint32_t size) { wprintf_s(L"code_trace, address = 0x%08x\n", address); });
  367. x86_emulator.hook_add<UC_HOOK_MEM_UNMAPPED>(
  368. [this, &x86_emulator](uc_mem_type access, uint32_t address, size_t size, int64_t value) -> bool {
  369. try {
  370. auto fault_section = m_libcc_interpreter.image_section_header_from_va(address);
  371. auto page_base = address / 0x1000 * 0x1000;
  372. auto page_size = 0x1000;
  373. uint32_t page_perms = UC_PROT_NONE;
  374. if (fault_section->Characteristics & IMAGE_SCN_MEM_READ) {
  375. page_perms |= UC_PROT_READ;
  376. }
  377. if (fault_section->Characteristics & IMAGE_SCN_MEM_WRITE) {
  378. page_perms |= UC_PROT_WRITE;
  379. }
  380. if (fault_section->Characteristics & IMAGE_SCN_MEM_EXECUTE) {
  381. page_perms |= UC_PROT_EXEC;
  382. }
  383. x86_emulator.mem_map(page_base, page_size, page_perms);
  384. x86_emulator.mem_write(page_base, m_libcc_interpreter.convert_va_to_ptr<const void*>(page_base), page_size);
  385. return true;
  386. } catch (::nkg::exception&) {
  387. return false;
  388. }
  389. }
  390. );
  391. // set ebp, esp
  392. uint32_t init_ebp = x86_emulator.context_get<uint32_t>("stack_base") - x86_emulator.context_get<size_t>("stack_size") / 4;
  393. uint32_t init_esp = x86_emulator.context_get<uint32_t>("stack_base") - x86_emulator.context_get<size_t>("stack_size") / 2;
  394. x86_emulator.reg_write(UC_X86_REG_EBP, &init_ebp);
  395. x86_emulator.reg_write(UC_X86_REG_ESP, &init_esp);
  396. // setup iretd context
  397. uint32_t ring3_eip = x86_emulator.context_get<uint32_t>("start_address");
  398. uint32_t ring3_cs = (3 << 3) | (0 << 2) | (3); // use user code segment
  399. uint32_t ring3_eflags; x86_emulator.reg_read(UC_X86_REG_EFLAGS, &ring3_eflags);
  400. uint32_t ring3_esp = init_esp + 5 * 4;
  401. uint32_t ring3_ss = (4 << 3) | (0 << 2) | (3); // use user data segment
  402. x86_emulator.mem_write(init_esp, &ring3_eip, sizeof(ring3_eip));
  403. x86_emulator.mem_write(init_esp + 0x4, &ring3_cs, sizeof(ring3_cs));
  404. x86_emulator.mem_write(init_esp + 0x8, &ring3_eflags, sizeof(ring3_eflags));
  405. x86_emulator.mem_write(init_esp + 0xc, &ring3_esp, sizeof(ring3_esp));
  406. x86_emulator.mem_write(init_esp + 0x10, &ring3_ss, sizeof(ring3_ss));
  407. // set ring3 retaddr
  408. uint32_t ring3_retaddr = x86_emulator.context_get<uint32_t>("dead_address");
  409. x86_emulator.mem_write(ring3_esp, &ring3_retaddr, sizeof(ring3_retaddr));
  410. // set argument registers
  411. uint32_t init_ecx = 0; // `this` pointer of CSRegistrationInfoFetcher_WIN, but we don't need it for now.
  412. uint32_t retval_addr = ring3_esp + 0x40; // a pointer to stack memory which stores return value
  413. x86_emulator.reg_write(UC_X86_REG_ECX, &init_ecx);
  414. x86_emulator.mem_write(ring3_esp + 4, &retval_addr, sizeof(retval_addr)); // write to dword ptr [ring3_esp + 4]
  415. //
  416. // start emulate
  417. //
  418. try {
  419. x86_emulator.emu_start(x86_emulator.context_get<uint32_t>("r0_to_r3_stub_area_base"), x86_emulator.context_get<uint32_t>("dead_address"));
  420. } catch (nkg::exception&) {
  421. wprintf_s(L"[-] patch_solution_since<16, 0, 7, 0>: Code emulation failed.\n");
  422. wprintf_s(L"[-] patch_solution_since<16, 0, 7, 0>: This patch solution will be suppressed.\n");
  423. return false;
  424. }
  425. wprintf_s(L"[*] patch_solution_since<16, 0, 7, 0>: m_va_iat_entry_malloc = 0x%08x\n", m_va_iat_entry_malloc);
  426. //
  427. // get result
  428. //
  429. // on I386 platform, `std::string` has follow memory layout:
  430. // ------------------------------
  431. // | offset | size |
  432. // ------------------------------
  433. // | +0 | 0x10 | `char[16]: a small string buffer` OR `char*: a large string buffer pointer`
  434. // ------------------------------
  435. // | +0x10 | 0x4 | size_t: string length
  436. // ------------------------------
  437. // | +0x14 | 0x4 | size_t: capacity
  438. // ------------------------------
  439. //
  440. uint32_t encoded_key_length;
  441. x86_emulator.mem_read(retval_addr + 0x10, &encoded_key_length, sizeof(encoded_key_length));
  442. if (encoded_key_length != official_encoded_key.length()) {
  443. wprintf_s(L"[-] patch_solution_since<16, 0, 7, 0>: Unexpected encoded key length(%u).\n", encoded_key_length);
  444. wprintf_s(L"[-] patch_solution_since<16, 0, 7, 0>: This patch solution will be suppressed.\n");
  445. return false;
  446. }
  447. uint32_t encoded_key_ptr;
  448. x86_emulator.mem_read(retval_addr, &encoded_key_ptr, sizeof(encoded_key_ptr));
  449. auto encoded_key = x86_emulator.mem_read(encoded_key_ptr, encoded_key_length);
  450. if (memcmp(encoded_key.data(), official_encoded_key.data(), encoded_key.size()) == 0) {
  451. wprintf_s(L"[+] patch_solution_since<16, 0, 7, 0>: Official encoded key is found.\n");
  452. return true;
  453. } else {
  454. wprintf_s(L"[-] patch_solution_since<16, 0, 7, 0>: Official encoded key is not found.\n");
  455. wprintf_s(L"[-] patch_solution_since<16, 0, 7, 0>: This patch solution will be suppressed.\n");
  456. return false;
  457. }
  458. }
  459. bool patch_solution_since<16, 0, 7, 0>::check_rsa_privkey(const rsa_cipher& cipher) {
  460. return true; // no requirements
  461. }
  462. void patch_solution_since<16, 0, 7, 0>::make_patch(const rsa_cipher& cipher) {
  463. auto encoded_key = _build_encoded_key(cipher);
  464. auto CSRegistrationInfoFetcher_WIN_GenerateRegistrationKey =
  465. m_libcc_interpreter.convert_va_to_ptr<uint8_t*>(m_va_CSRegistrationInfoFetcher_WIN_GenerateRegistrationKey);
  466. std::vector<std::string> patch_code_chunks;
  467. patch_code_chunks.emplace_back("push edi;");
  468. patch_code_chunks.emplace_back("push esi;");
  469. patch_code_chunks.emplace_back("push ebx;");
  470. patch_code_chunks.emplace_back("push ebp;");
  471. patch_code_chunks.emplace_back("mov ebp, esp;");
  472. patch_code_chunks.emplace_back("call label; label: pop ebx; sub ebx, label;"); // ebx <- relocation shift value
  473. patch_code_chunks.emplace_back(fmt::format("mov eax, {:#08x};", m_va_iat_entry_malloc));
  474. patch_code_chunks.emplace_back("add eax, ebx;");
  475. patch_code_chunks.emplace_back("mov eax, dword ptr [eax];"); // eax <- address of `malloc`
  476. patch_code_chunks.emplace_back(fmt::format("push {:#x};", encoded_key.length() + 1));
  477. patch_code_chunks.emplace_back("call eax;");
  478. patch_code_chunks.emplace_back("add esp, 0x4;");
  479. {
  480. std::vector<uint32_t> push_values((encoded_key.length() + 1 + 3) / 4, 0);
  481. memcpy(push_values.data(), encoded_key.data(), encoded_key.length());
  482. std::for_each(push_values.crbegin(), push_values.crend(), [&patch_code_chunks](uint32_t x) { patch_code_chunks.emplace_back(fmt::format("push {:#08x};", x)); });
  483. }
  484. patch_code_chunks.emplace_back("mov edi, eax;");
  485. patch_code_chunks.emplace_back("mov esi, esp;");
  486. patch_code_chunks.emplace_back(fmt::format("mov ecx, {:#x};", encoded_key.length() + 1));
  487. patch_code_chunks.emplace_back("rep movs byte ptr [edi], byte ptr [esi];");
  488. patch_code_chunks.emplace_back("mov edx, dword ptr [ebp + 0x14];");
  489. patch_code_chunks.emplace_back("mov dword ptr [edx], eax;");
  490. patch_code_chunks.emplace_back(fmt::format("mov dword ptr [edx + 0x10], {:#x};", encoded_key.length()));
  491. patch_code_chunks.emplace_back(fmt::format("mov dword ptr [edx + 0x14], {:#x};", encoded_key.length() + 1));
  492. patch_code_chunks.emplace_back("mov eax, edx;");
  493. patch_code_chunks.emplace_back("leave;");
  494. patch_code_chunks.emplace_back("pop ebx;");
  495. patch_code_chunks.emplace_back("pop esi;");
  496. patch_code_chunks.emplace_back("pop edi;");
  497. patch_code_chunks.emplace_back("ret 4;");
  498. std::vector<uint8_t> assembled_patch_code;
  499. {
  500. keystone_assembler x86_assembler{ KS_ARCH_X86, KS_MODE_32 };
  501. auto current_va = m_va_CSRegistrationInfoFetcher_WIN_GenerateRegistrationKey;
  502. auto next_reloc = m_libcc_interpreter.relocation_distribute().lower_bound(m_libcc_interpreter.convert_va_to_rva(current_va));
  503. for (const auto& patch_code_chunk : patch_code_chunks) {
  504. auto assembled_patch_code_chunk = x86_assembler.assemble(patch_code_chunk, current_va);
  505. while (true) {
  506. auto next_reloc_va = m_libcc_interpreter.convert_rva_to_va(next_reloc->first);
  507. auto next_reloc_size = next_reloc->second;
  508. if (current_va + assembled_patch_code_chunk.size() + 2 <= next_reloc_va) { // 2 -> size of machine code "jmp rel8"
  509. assembled_patch_code.insert(assembled_patch_code.end(), assembled_patch_code_chunk.begin(), assembled_patch_code_chunk.end());
  510. current_va += assembled_patch_code_chunk.size();
  511. break;
  512. } else if (current_va + 2 <= next_reloc_va) {
  513. auto next_va = next_reloc_va + next_reloc_size;
  514. auto assembled_jmp = x86_assembler.assemble(fmt::format("jmp {:#08x};", next_va), current_va);
  515. auto assembled_padding = std::vector<uint8_t>(next_va - (current_va + assembled_jmp.size()), 0xcc); // 0xcc -> int3
  516. assembled_patch_code.insert(assembled_patch_code.end(), assembled_jmp.begin(), assembled_jmp.end());
  517. assembled_patch_code.insert(assembled_patch_code.end(), assembled_padding.begin(), assembled_padding.end());
  518. current_va = next_va;
  519. ++next_reloc;
  520. } else {
  521. __assume(false); // impossible to reach here
  522. }
  523. }
  524. }
  525. }
  526. memcpy(CSRegistrationInfoFetcher_WIN_GenerateRegistrationKey, assembled_patch_code.data(), assembled_patch_code.size());
  527. wprintf_s(L"[*] patch_solution_since<16, 0, 7, 0>: Patch has been done.\n");
  528. }
  529. }