Elf64Interpreter.cpp 19 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407
  1. #include "Elf64Interpreter.hpp"
  2. #include "Exception.hpp"
  3. #include "ExceptionGeneric.hpp"
  4. #include <memory.h>
  5. #include <string.h>
  6. namespace nkg {
  7. [[nodiscard]]
  8. Elf64Interpreter Elf64Interpreter::Parse(const void* lpImage, size_t cbImage) {
  9. Elf64Interpreter Interpreter;
  10. //
  11. // Checking ELF header
  12. //
  13. Interpreter.m_ElfSize = cbImage;
  14. Interpreter.m_lpElfHdr = reinterpret_cast<const Elf64_Ehdr*>(lpImage);
  15. if (ARL::AddressIsInRangeEx(Interpreter.m_lpElfHdr, sizeof(Elf64_Ehdr), lpImage, cbImage) == false) {
  16. throw ARL::Exception(__BASE_FILE__, __LINE__, "Bad ELF file: image is corrupted.");
  17. }
  18. if (memcmp(Interpreter.m_lpElfHdr->e_ident, ELFMAG, SELFMAG) != 0) {
  19. throw ARL::AssertionError(__BASE_FILE__, __LINE__, "Bad ELF file: header magic check failure.");
  20. }
  21. if (Interpreter.m_lpElfHdr->e_ident[EI_CLASS] != ELFCLASS64) {
  22. throw ARL::AssertionError(__BASE_FILE__, __LINE__, "Unsupported ELF file: not ELF64 image.");
  23. }
  24. switch (Interpreter.m_lpElfHdr->e_ident[EI_DATA]) {
  25. case ELFDATA2LSB:
  26. if (__BYTE_ORDER__ != __ORDER_LITTLE_ENDIAN__) {
  27. throw ARL::NotImplementedError(__BASE_FILE__, __LINE__, "Unsupported ELF file: unsupported endian.");
  28. }
  29. break;
  30. case ELFDATA2MSB:
  31. if (__BYTE_ORDER__ != __ORDER_BIG_ENDIAN__) {
  32. throw ARL::NotImplementedError(__BASE_FILE__, __LINE__, "Unsupported ELF file: unsupported endian.");
  33. }
  34. break;
  35. default:
  36. throw ARL::Exception(__BASE_FILE__, __LINE__, "Bad ELF file: unknown endian.");
  37. }
  38. if (Interpreter.m_lpElfHdr->e_ident[EI_VERSION] != EV_CURRENT) {
  39. throw ARL::AssertionError(__BASE_FILE__, __LINE__, "Bad ELF file: Elf64_Ehdr::e_ident[EI_VERSION] check failure.");
  40. }
  41. // Interpreter.m_lpElfHdr->e_ident[EI_OSABI]
  42. // Interpreter.m_lpElfHdr->e_ident[EI_ABIVERSION]
  43. for (int i = EI_PAD; i < sizeof(Interpreter.m_lpElfHdr->e_ident); ++i) {
  44. if (Interpreter.m_lpElfHdr->e_ident[i] != 0) {
  45. throw ARL::AssertionError(__BASE_FILE__, __LINE__, "Bad ELF file: Elf64_Ehdr::e_ident padding contains non-zero byte(s).");
  46. }
  47. }
  48. if (Interpreter.m_lpElfHdr->e_version != EV_CURRENT) {
  49. throw ARL::AssertionError(__BASE_FILE__, __LINE__, "Bad ELF file: Elf64_Ehdr::e_version check failure.");
  50. }
  51. if (Interpreter.m_lpElfHdr->e_ehsize != sizeof(Elf64_Ehdr)) {
  52. throw ARL::AssertionError(__BASE_FILE__, __LINE__, "Bad ELF file: Elf64_Ehdr::e_ehsize check failure.");
  53. }
  54. if (Interpreter.m_lpElfHdr->e_phoff && Interpreter.m_lpElfHdr->e_phentsize && Interpreter.m_lpElfHdr->e_phnum) {
  55. if (Interpreter.m_lpElfHdr->e_phentsize != sizeof(Elf64_Phdr)) {
  56. throw ARL::AssertionError(__BASE_FILE__, __LINE__, "Bad ELF file: Elf64_Ehdr::e_phentsize check failure.");
  57. }
  58. Interpreter.m_lpElfProgramHdr =
  59. ARL::AddressOffsetWithCast<Elf64_Phdr*>(lpImage, Interpreter.m_lpElfHdr->e_phoff);
  60. auto a1 = Interpreter.m_lpElfProgramHdr;
  61. auto a2 = Interpreter.m_lpElfProgramHdr + Interpreter.m_lpElfHdr->e_phnum;
  62. if (a1 < a2) {
  63. if (ARL::AddressIsInRangeEx(a1, a2, lpImage, cbImage) == false) {
  64. throw ARL::AssertionError(__BASE_FILE__, __LINE__, "Bad ELF file: image is corrupted.");
  65. }
  66. } else {
  67. throw ARL::OverflowError(__BASE_FILE__, __LINE__, "Bad ELF file: program header table overflowed.");
  68. }
  69. } else if (Interpreter.m_lpElfHdr->e_phoff == 0 && Interpreter.m_lpElfHdr->e_phentsize == 0 && Interpreter.m_lpElfHdr->e_phnum == 0) {
  70. Interpreter.m_lpElfProgramHdr = nullptr;
  71. } else {
  72. throw ARL::ValueError(__BASE_FILE__, __LINE__, "Bad ELF file: Elf64_Ehdr::e_ph* check failure.");
  73. }
  74. if (Interpreter.m_lpElfHdr->e_shoff && Interpreter.m_lpElfHdr->e_shentsize && Interpreter.m_lpElfHdr->e_shnum) {
  75. if (Interpreter.m_lpElfHdr->e_shentsize != sizeof(Elf64_Shdr)) {
  76. throw ARL::AssertionError(__BASE_FILE__, __LINE__, "Bad ELF file: Elf64_Ehdr::e_shentsize check failure.");
  77. }
  78. Interpreter.m_lpElfSectionHdr =
  79. ARL::AddressOffsetWithCast<Elf64_Shdr*>(lpImage, Interpreter.m_lpElfHdr->e_shoff);
  80. auto b1 = Interpreter.m_lpElfSectionHdr;
  81. auto b2 = Interpreter.m_lpElfSectionHdr + Interpreter.m_lpElfHdr->e_shnum;
  82. if (b1 < b2) {
  83. if (ARL::AddressIsInRangeEx(b1, b2, lpImage, cbImage) == false) {
  84. throw ARL::AssertionError(__BASE_FILE__, __LINE__, "Bad ELF file: image is corrupted.");
  85. }
  86. } else {
  87. throw ARL::OverflowError(__BASE_FILE__, __LINE__, "Bad ELF file: section header table overflowed.");
  88. }
  89. } else if (Interpreter.m_lpElfHdr->e_shoff == 0 && Interpreter.m_lpElfHdr->e_shentsize == 0 && Interpreter.m_lpElfHdr->e_shnum == 0) {
  90. Interpreter.m_lpElfSectionHdr = nullptr;
  91. } else {
  92. throw ARL::ValueError(__BASE_FILE__, __LINE__, "Bad ELF file: Elf64_Ehdr::e_sh* check failure.");
  93. }
  94. if (Interpreter.m_lpElfHdr->e_shstrndx != SHN_UNDEF) {
  95. if (Interpreter.m_lpElfHdr->e_shstrndx >= Interpreter.m_lpElfHdr->e_shnum) {
  96. throw ARL::IndexError(__BASE_FILE__, __LINE__, "Bad ELF file: Elf64_Ehdr::e_shstrndx is out of range.");
  97. }
  98. }
  99. //
  100. // Checking program header table and section header table
  101. //
  102. if (Interpreter.m_lpElfProgramHdr && Interpreter.m_lpElfSectionHdr) {
  103. auto a1 = Interpreter.m_lpElfProgramHdr;
  104. auto a2 = Interpreter.m_lpElfProgramHdr + Interpreter.m_lpElfHdr->e_phnum;
  105. auto b1 = Interpreter.m_lpElfSectionHdr;
  106. auto b2 = Interpreter.m_lpElfSectionHdr + Interpreter.m_lpElfHdr->e_shnum;
  107. bool NotOverlapped =
  108. (ARL::AddressDelta(a1, b1) < 0 && ARL::AddressDelta(a2, b1) <= 0) ||
  109. (ARL::AddressDelta(b1, a1) < 0 && ARL::AddressDelta(b2, a1) <= 0);
  110. if (NotOverlapped == false) {
  111. throw ARL::AssertionError(__BASE_FILE__, __LINE__, "Bad ELF file: program header table and section header table overlap.");
  112. }
  113. }
  114. //
  115. // Parsing program header
  116. //
  117. {
  118. for (decltype(Elf64_Ehdr::e_phnum) i = 0; i < Interpreter.m_lpElfHdr->e_phnum; ++i) {
  119. const auto& proghdr = Interpreter.m_lpElfProgramHdr[i];
  120. if (ARL::AddressIsInRangeEx(ARL::AddressOffset(lpImage, proghdr.p_offset), proghdr.p_filesz, lpImage, cbImage) == false) {
  121. throw ARL::AssertionError(__BASE_FILE__, __LINE__, "Bad ELF file: image is corrupted.");
  122. }
  123. if (auto p_align = proghdr.p_align; p_align) {
  124. // align must be a power of 2
  125. if ((p_align & (p_align - 1)) != 0) {
  126. throw ARL::AssertionError(__BASE_FILE__, __LINE__, "Bad ELF file: Elf64_Phdr[%u]: p_align is not a power of 2.", i);
  127. }
  128. if (proghdr.p_offset % p_align != proghdr.p_vaddr % p_align) {
  129. throw ARL::AssertionError(__BASE_FILE__, __LINE__, "Bad ELF file: Elf64_Phdr[%u]: p_offset !== p_vaddr (mod p_align).", i);
  130. }
  131. }
  132. // todo
  133. }
  134. }
  135. //
  136. // Parsing section header
  137. //
  138. {
  139. const Elf64_Shdr* sechdr_shstrtab;
  140. const char* secview_shstrtab;
  141. if (Interpreter.m_lpElfHdr->e_shstrndx != SHN_UNDEF) {
  142. sechdr_shstrtab = &Interpreter.m_lpElfSectionHdr[Interpreter.m_lpElfHdr->e_shstrndx];
  143. secview_shstrtab = ARL::AddressOffsetWithCast<const char*>(lpImage, sechdr_shstrtab->sh_offset);
  144. if (sechdr_shstrtab->sh_type != SHT_STRTAB) {
  145. throw ARL::AssertionError(__BASE_FILE__, __LINE__, "Bad ELF file: sechdr_shstrtab->sh_type != SHT_STRTAB.");
  146. }
  147. if (ARL::AddressIsInRangeEx(secview_shstrtab, sechdr_shstrtab->sh_size, lpImage, cbImage) == false) {
  148. throw ARL::AssertionError(__BASE_FILE__, __LINE__, "Bad ELF file: image is corrupted.");
  149. }
  150. } else {
  151. sechdr_shstrtab = nullptr;
  152. secview_shstrtab = nullptr;
  153. }
  154. for (decltype(Elf64_Ehdr::e_shnum) i = 0; i < Interpreter.m_lpElfHdr->e_shnum; ++i) {
  155. auto& sechdr = Interpreter.m_lpElfSectionHdr[i];
  156. //
  157. // checking sh_type
  158. //
  159. switch (sechdr.sh_type) {
  160. case SHT_SYMTAB:
  161. if (sechdr.sh_entsize != sizeof(Elf64_Sym)) {
  162. throw ARL::AssertionError(__BASE_FILE__, __LINE__, "Bad ELF file: Elf64_Shdr[%u]: sh_entsize != sizeof(Elf64_Dyn).", i);
  163. }
  164. break;
  165. case SHT_RELA:
  166. if (sechdr.sh_entsize != sizeof(Elf64_Rela)) {
  167. throw ARL::AssertionError(__BASE_FILE__, __LINE__, "Bad ELF file: Elf64_Shdr[%u]: sh_entsize != sizeof(Elf64_Rela).", i);
  168. }
  169. break;
  170. case SHT_DYNAMIC:
  171. if (sechdr.sh_entsize != sizeof(Elf64_Dyn)) {
  172. throw ARL::AssertionError(__BASE_FILE__, __LINE__, "Bad ELF file: Elf64_Shdr[%u]: sh_entsize != sizeof(Elf64_Dyn).", i);
  173. }
  174. break;
  175. case SHT_REL:
  176. if (sechdr.sh_entsize != sizeof(Elf64_Rel)) {
  177. throw ARL::AssertionError(__BASE_FILE__, __LINE__, "Bad ELF file: Elf64_Shdr[%u]: sh_entsize != sizeof(Elf64_Rel).", i);
  178. }
  179. break;
  180. case SHT_DYNSYM:
  181. if (sechdr.sh_entsize != sizeof(Elf64_Sym)) {
  182. throw ARL::AssertionError(__BASE_FILE__, __LINE__, "Bad ELF file: Elf64_Shdr[%u]: sh_entsize != sizeof(Elf64_Dyn).", i);
  183. }
  184. break;
  185. default:
  186. break;
  187. }
  188. //
  189. // checking sh_link and sh_info
  190. //
  191. switch (sechdr.sh_type) {
  192. case SHT_DYNAMIC:
  193. if (sechdr.sh_link == SHN_UNDEF) {
  194. throw ARL::AssertionError(__BASE_FILE__, __LINE__, "Bad ELF file: Elf64_Shdr[%u]: sh_link == SHN_UNDEF.", i);
  195. }
  196. if (sechdr.sh_link < Interpreter.m_lpElfHdr->e_shnum) {
  197. if (Interpreter.m_lpElfSectionHdr[sechdr.sh_link].sh_type != SHT_STRTAB) {
  198. throw ARL::AssertionError(__BASE_FILE__, __LINE__, "Bad ELF file: Elf64_Shdr[%u]: incorrect value of sh_link.", i);
  199. }
  200. } else {
  201. throw ARL::IndexError(__BASE_FILE__, __LINE__, "Bad ELF file: Elf64_Shdr[%u]: sh_link is out of range.", i);
  202. }
  203. if (sechdr.sh_info != 0) {
  204. throw ARL::AssertionError(__BASE_FILE__, __LINE__, "Bad ELF file: Elf64_Shdr[%u]: sh_info != 0.", i);
  205. }
  206. break;
  207. case SHT_HASH:
  208. if (sechdr.sh_link == SHN_UNDEF) {
  209. throw ARL::AssertionError(__BASE_FILE__, __LINE__, "Bad ELF file: Elf64_Shdr[%u]: sh_link == SHN_UNDEF.", i);
  210. }
  211. if (sechdr.sh_link < Interpreter.m_lpElfHdr->e_shnum) {
  212. if (Interpreter.m_lpElfSectionHdr[sechdr.sh_link].sh_type != SHT_SYMTAB && Interpreter.m_lpElfSectionHdr[sechdr.sh_link].sh_type != SHT_DYNSYM) {
  213. throw ARL::AssertionError(__BASE_FILE__, __LINE__, "Bad ELF file: Elf64_Shdr[%u]: incorrect value of sh_link.", i);
  214. }
  215. } else {
  216. throw ARL::IndexError(__BASE_FILE__, __LINE__, "Bad ELF file: Elf64_Shdr[%u]: sh_link is out of range.", i);
  217. }
  218. if (sechdr.sh_info != 0) {
  219. throw ARL::AssertionError(__BASE_FILE__, __LINE__, "Bad ELF file: Elf64_Shdr[%u]: sh_info != 0.", i);
  220. }
  221. break;
  222. case SHT_RELA:
  223. case SHT_REL:
  224. if (sechdr.sh_link == SHN_UNDEF) {
  225. throw ARL::AssertionError(__BASE_FILE__, __LINE__, "Bad ELF file: Elf64_Shdr[%u]: sh_link == SHN_UNDEF.", i);
  226. }
  227. if (sechdr.sh_link < Interpreter.m_lpElfHdr->e_shnum) {
  228. if (Interpreter.m_lpElfSectionHdr[sechdr.sh_link].sh_type != SHT_SYMTAB && Interpreter.m_lpElfSectionHdr[sechdr.sh_link].sh_type != SHT_DYNSYM) {
  229. throw ARL::AssertionError(__BASE_FILE__, __LINE__, "Bad ELF file: Elf64_Shdr[%u]: incorrect value of sh_link.", i);
  230. }
  231. } else {
  232. throw ARL::IndexError(__BASE_FILE__, __LINE__, "Bad ELF file: Elf64_Shdr[%u]: sh_link is out of range.", i);
  233. }
  234. if (sechdr.sh_flags & SHF_INFO_LINK) {
  235. if (sechdr.sh_info == SHN_UNDEF) {
  236. throw ARL::AssertionError(__BASE_FILE__, __LINE__, "Bad ELF file: Elf64_Shdr[%u]: sh_info == SHN_UNDEF.", i);
  237. }
  238. if (sechdr.sh_info >= Interpreter.m_lpElfHdr->e_shnum) {
  239. throw ARL::IndexError(__BASE_FILE__, __LINE__, "Bad ELF file: Elf64_Shdr[%u]: sh_info is out of range.", i);
  240. }
  241. } else {
  242. if (sechdr.sh_info != 0) {
  243. throw ARL::AssertionError(__BASE_FILE__, __LINE__, "Bad ELF file: Elf64_Shdr[%u]: sh_info != 0.", i);
  244. }
  245. }
  246. break;
  247. case SHT_SYMTAB:
  248. case SHT_DYNSYM:
  249. if (sechdr.sh_link == SHN_UNDEF) {
  250. throw ARL::AssertionError(__BASE_FILE__, __LINE__, "Bad ELF file: Elf64_Shdr[%u]: sh_link == SHN_UNDEF.", i);
  251. }
  252. if (sechdr.sh_link < Interpreter.m_lpElfHdr->e_shnum) {
  253. if (Interpreter.m_lpElfSectionHdr[sechdr.sh_link].sh_type != SHT_STRTAB) {
  254. throw ARL::AssertionError(__BASE_FILE__, __LINE__, "Bad ELF file: Elf64_Shdr[%u]: incorrect value of sh_link.", i);
  255. }
  256. } else {
  257. throw ARL::IndexError(__BASE_FILE__, __LINE__, "Bad ELF file: Elf64_Shdr[%u]: sh_link is out of range.", i);
  258. }
  259. // todo: check sh_info
  260. break;
  261. default:
  262. break;
  263. }
  264. if (sechdr.sh_type != SHT_NOBITS) {
  265. if (ARL::AddressIsInRangeEx(ARL::AddressOffset(lpImage, sechdr.sh_offset), sechdr.sh_size, lpImage, cbImage) == false) {
  266. throw ARL::AssertionError(__BASE_FILE__, __LINE__, "Bad ELF file: Elf64_Shdr[%u]: image is corrupted.", i);
  267. }
  268. }
  269. if (sechdr.sh_addr) {
  270. if (sechdr.sh_addralign && sechdr.sh_addr % sechdr.sh_addralign != 0) {
  271. throw ARL::AssertionError(__BASE_FILE__, __LINE__, "Bad ELF file: Elf64_Shdr[%u]: sh_addr is not aligned to sh_addralign.", i);
  272. }
  273. Interpreter.m_SectionRvaMap.emplace(
  274. std::make_pair(
  275. sechdr.sh_addr,
  276. Interpreter.m_lpElfSectionHdr + i
  277. )
  278. );
  279. }
  280. if (sechdr.sh_type != SHT_NOBITS) {
  281. Interpreter.m_SectionOffsetMap.emplace(
  282. std::make_pair(
  283. sechdr.sh_offset,
  284. Interpreter.m_lpElfSectionHdr + i
  285. )
  286. );
  287. }
  288. if (secview_shstrtab) {
  289. Interpreter.m_SectionNameMap.emplace(
  290. std::make_pair(
  291. std::string(ARL::AddressOffset(secview_shstrtab, sechdr.sh_name)),
  292. Interpreter.m_lpElfSectionHdr + i
  293. )
  294. );
  295. }
  296. // todo
  297. }
  298. }
  299. return Interpreter;
  300. }
  301. size_t Elf64Interpreter::ElfSize() const noexcept {
  302. return m_ElfSize;
  303. }
  304. [[nodiscard]]
  305. const Elf64_Phdr* Elf64Interpreter::ElfProgramHeader(size_t Idx) const {
  306. if (Idx < m_lpElfHdr->e_phnum) {
  307. return m_lpElfProgramHdr + Idx;
  308. } else {
  309. throw ARL::IndexError(__BASE_FILE__, __LINE__, "Elf64Interpreter: Idx is out of range.");
  310. }
  311. }
  312. [[nodiscard]]
  313. const Elf64_Shdr* Elf64Interpreter::ElfSectionHeader(size_t Idx) const {
  314. if (Idx < m_lpElfHdr->e_shnum) {
  315. return m_lpElfSectionHdr + Idx;
  316. } else {
  317. throw ARL::IndexError(__BASE_FILE__, __LINE__, "Elf64Interpreter: Idx is out of range.");
  318. }
  319. }
  320. [[nodiscard]]
  321. const Elf64_Shdr* Elf64Interpreter::ElfSectionHeader(std::string_view SectionName) const {
  322. auto it = m_SectionNameMap.find(std::string(SectionName));
  323. if (it != m_SectionNameMap.end()) {
  324. return it->second;
  325. } else {
  326. throw ARL::KeyError(__BASE_FILE__, __LINE__, "Elf64Interpreter: section %s is not found.", SectionName.data());
  327. }
  328. }
  329. [[nodiscard]]
  330. Elf64_Off Elf64Interpreter::ConvertRvaToOffset(Elf64_Addr Rva) const {
  331. auto it = m_SectionRvaMap.upper_bound(Rva);
  332. if (it != m_SectionRvaMap.begin()) {
  333. --it;
  334. if (it->second->sh_addr <= Rva && Rva < it->second->sh_addr + it->second->sh_size) {
  335. return it->second->sh_offset + (Rva - it->second->sh_addr);
  336. }
  337. }
  338. throw ARL::KeyError(__BASE_FILE__, __LINE__, "Elf64Interpreter: Invalid RVA.");
  339. }
  340. [[nodiscard]]
  341. Elf64_Addr Elf64Interpreter::ConvertOffsetToRva(Elf64_Off Offset) const {
  342. auto it = m_SectionOffsetMap.upper_bound(Offset);
  343. if (it != m_SectionOffsetMap.begin()) {
  344. --it;
  345. if (it->second->sh_offset <= Offset && Offset < it->second->sh_offset + it->second->sh_size) {
  346. return it->second->sh_addr + (Offset - it->second->sh_offset);
  347. }
  348. }
  349. throw ARL::KeyError(__BASE_FILE__, __LINE__, "Elf64Interpreter: Invalid Offset.");
  350. }
  351. }