vmcore.c 30 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198
  1. /*
  2. * fs/proc/vmcore.c Interface for accessing the crash
  3. * dump from the system's previous life.
  4. * Heavily borrowed from fs/proc/kcore.c
  5. * Created by: Hariprasad Nellitheertha (hari@in.ibm.com)
  6. * Copyright (C) IBM Corporation, 2004. All rights reserved
  7. *
  8. */
  9. #include <linux/mm.h>
  10. #include <linux/kcore.h>
  11. #include <linux/user.h>
  12. #include <linux/elf.h>
  13. #include <linux/elfcore.h>
  14. #include <linux/export.h>
  15. #include <linux/slab.h>
  16. #include <linux/highmem.h>
  17. #include <linux/printk.h>
  18. #include <linux/bootmem.h>
  19. #include <linux/init.h>
  20. #include <linux/crash_dump.h>
  21. #include <linux/list.h>
  22. #include <linux/vmalloc.h>
  23. #include <linux/pagemap.h>
  24. #include <asm/uaccess.h>
  25. #include <asm/io.h>
  26. #include "internal.h"
  27. /* List representing chunks of contiguous memory areas and their offsets in
  28. * vmcore file.
  29. */
  30. static LIST_HEAD(vmcore_list);
  31. /* Stores the pointer to the buffer containing kernel elf core headers. */
  32. static char *elfcorebuf;
  33. static size_t elfcorebuf_sz;
  34. static size_t elfcorebuf_sz_orig;
  35. static char *elfnotes_buf;
  36. static size_t elfnotes_sz;
  37. /* Total size of vmcore file. */
  38. static u64 vmcore_size;
  39. static struct proc_dir_entry *proc_vmcore;
  40. /*
  41. * Returns > 0 for RAM pages, 0 for non-RAM pages, < 0 on error
  42. * The called function has to take care of module refcounting.
  43. */
  44. static int (*oldmem_pfn_is_ram)(unsigned long pfn);
  45. int register_oldmem_pfn_is_ram(int (*fn)(unsigned long pfn))
  46. {
  47. if (oldmem_pfn_is_ram)
  48. return -EBUSY;
  49. oldmem_pfn_is_ram = fn;
  50. return 0;
  51. }
  52. EXPORT_SYMBOL_GPL(register_oldmem_pfn_is_ram);
  53. void unregister_oldmem_pfn_is_ram(void)
  54. {
  55. oldmem_pfn_is_ram = NULL;
  56. wmb();
  57. }
  58. EXPORT_SYMBOL_GPL(unregister_oldmem_pfn_is_ram);
  59. static int pfn_is_ram(unsigned long pfn)
  60. {
  61. int (*fn)(unsigned long pfn);
  62. /* pfn is ram unless fn() checks pagetype */
  63. int ret = 1;
  64. /*
  65. * Ask hypervisor if the pfn is really ram.
  66. * A ballooned page contains no data and reading from such a page
  67. * will cause high load in the hypervisor.
  68. */
  69. fn = oldmem_pfn_is_ram;
  70. if (fn)
  71. ret = fn(pfn);
  72. return ret;
  73. }
  74. /* Reads a page from the oldmem device from given offset. */
  75. static ssize_t read_from_oldmem(char *buf, size_t count,
  76. u64 *ppos, int userbuf)
  77. {
  78. unsigned long pfn, offset;
  79. size_t nr_bytes;
  80. ssize_t read = 0, tmp;
  81. if (!count)
  82. return 0;
  83. offset = (unsigned long)(*ppos % PAGE_SIZE);
  84. pfn = (unsigned long)(*ppos / PAGE_SIZE);
  85. do {
  86. if (count > (PAGE_SIZE - offset))
  87. nr_bytes = PAGE_SIZE - offset;
  88. else
  89. nr_bytes = count;
  90. /* If pfn is not ram, return zeros for sparse dump files */
  91. if (pfn_is_ram(pfn) == 0)
  92. memset(buf, 0, nr_bytes);
  93. else {
  94. tmp = copy_oldmem_page(pfn, buf, nr_bytes,
  95. offset, userbuf);
  96. if (tmp < 0)
  97. return tmp;
  98. }
  99. *ppos += nr_bytes;
  100. count -= nr_bytes;
  101. buf += nr_bytes;
  102. read += nr_bytes;
  103. ++pfn;
  104. offset = 0;
  105. } while (count);
  106. return read;
  107. }
  108. /*
  109. * Architectures may override this function to allocate ELF header in 2nd kernel
  110. */
  111. int __weak elfcorehdr_alloc(unsigned long long *addr, unsigned long long *size)
  112. {
  113. return 0;
  114. }
  115. /*
  116. * Architectures may override this function to free header
  117. */
  118. void __weak elfcorehdr_free(unsigned long long addr)
  119. {}
  120. /*
  121. * Architectures may override this function to read from ELF header
  122. */
  123. ssize_t __weak elfcorehdr_read(char *buf, size_t count, u64 *ppos)
  124. {
  125. return read_from_oldmem(buf, count, ppos, 0);
  126. }
  127. /*
  128. * Architectures may override this function to read from notes sections
  129. */
  130. ssize_t __weak elfcorehdr_read_notes(char *buf, size_t count, u64 *ppos)
  131. {
  132. return read_from_oldmem(buf, count, ppos, 0);
  133. }
  134. /*
  135. * Architectures may override this function to map oldmem
  136. */
  137. int __weak remap_oldmem_pfn_range(struct vm_area_struct *vma,
  138. unsigned long from, unsigned long pfn,
  139. unsigned long size, pgprot_t prot)
  140. {
  141. return remap_pfn_range(vma, from, pfn, size, prot);
  142. }
  143. /*
  144. * Copy to either kernel or user space
  145. */
  146. static int copy_to(void *target, void *src, size_t size, int userbuf)
  147. {
  148. if (userbuf) {
  149. if (copy_to_user((char __user *) target, src, size))
  150. return -EFAULT;
  151. } else {
  152. memcpy(target, src, size);
  153. }
  154. return 0;
  155. }
  156. /* Read from the ELF header and then the crash dump. On error, negative value is
  157. * returned otherwise number of bytes read are returned.
  158. */
  159. static ssize_t __read_vmcore(char *buffer, size_t buflen, loff_t *fpos,
  160. int userbuf)
  161. {
  162. ssize_t acc = 0, tmp;
  163. size_t tsz;
  164. u64 start;
  165. struct vmcore *m = NULL;
  166. if (buflen == 0 || *fpos >= vmcore_size)
  167. return 0;
  168. /* trim buflen to not go beyond EOF */
  169. if (buflen > vmcore_size - *fpos)
  170. buflen = vmcore_size - *fpos;
  171. /* Read ELF core header */
  172. if (*fpos < elfcorebuf_sz) {
  173. tsz = min(elfcorebuf_sz - (size_t)*fpos, buflen);
  174. if (copy_to(buffer, elfcorebuf + *fpos, tsz, userbuf))
  175. return -EFAULT;
  176. buflen -= tsz;
  177. *fpos += tsz;
  178. buffer += tsz;
  179. acc += tsz;
  180. /* leave now if filled buffer already */
  181. if (buflen == 0)
  182. return acc;
  183. }
  184. /* Read Elf note segment */
  185. if (*fpos < elfcorebuf_sz + elfnotes_sz) {
  186. void *kaddr;
  187. tsz = min(elfcorebuf_sz + elfnotes_sz - (size_t)*fpos, buflen);
  188. kaddr = elfnotes_buf + *fpos - elfcorebuf_sz;
  189. if (copy_to(buffer, kaddr, tsz, userbuf))
  190. return -EFAULT;
  191. buflen -= tsz;
  192. *fpos += tsz;
  193. buffer += tsz;
  194. acc += tsz;
  195. /* leave now if filled buffer already */
  196. if (buflen == 0)
  197. return acc;
  198. }
  199. list_for_each_entry(m, &vmcore_list, list) {
  200. if (*fpos < m->offset + m->size) {
  201. tsz = (size_t)min_t(unsigned long long,
  202. m->offset + m->size - *fpos,
  203. buflen);
  204. start = m->paddr + *fpos - m->offset;
  205. tmp = read_from_oldmem(buffer, tsz, &start, userbuf);
  206. if (tmp < 0)
  207. return tmp;
  208. buflen -= tsz;
  209. *fpos += tsz;
  210. buffer += tsz;
  211. acc += tsz;
  212. /* leave now if filled buffer already */
  213. if (buflen == 0)
  214. return acc;
  215. }
  216. }
  217. return acc;
  218. }
  219. static ssize_t read_vmcore(struct file *file, char __user *buffer,
  220. size_t buflen, loff_t *fpos)
  221. {
  222. return __read_vmcore((__force char *) buffer, buflen, fpos, 1);
  223. }
  224. /*
  225. * The vmcore fault handler uses the page cache and fills data using the
  226. * standard __vmcore_read() function.
  227. *
  228. * On s390 the fault handler is used for memory regions that can't be mapped
  229. * directly with remap_pfn_range().
  230. */
  231. static int mmap_vmcore_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
  232. {
  233. #ifdef CONFIG_S390
  234. struct address_space *mapping = vma->vm_file->f_mapping;
  235. pgoff_t index = vmf->pgoff;
  236. struct page *page;
  237. loff_t offset;
  238. char *buf;
  239. int rc;
  240. page = find_or_create_page(mapping, index, GFP_KERNEL);
  241. if (!page)
  242. return VM_FAULT_OOM;
  243. if (!PageUptodate(page)) {
  244. offset = (loff_t) index << PAGE_SHIFT;
  245. buf = __va((page_to_pfn(page) << PAGE_SHIFT));
  246. rc = __read_vmcore(buf, PAGE_SIZE, &offset, 0);
  247. if (rc < 0) {
  248. unlock_page(page);
  249. put_page(page);
  250. return (rc == -ENOMEM) ? VM_FAULT_OOM : VM_FAULT_SIGBUS;
  251. }
  252. SetPageUptodate(page);
  253. }
  254. unlock_page(page);
  255. vmf->page = page;
  256. return 0;
  257. #else
  258. return VM_FAULT_SIGBUS;
  259. #endif
  260. }
  261. static const struct vm_operations_struct vmcore_mmap_ops = {
  262. .fault = mmap_vmcore_fault,
  263. };
  264. /**
  265. * alloc_elfnotes_buf - allocate buffer for ELF note segment in
  266. * vmalloc memory
  267. *
  268. * @notes_sz: size of buffer
  269. *
  270. * If CONFIG_MMU is defined, use vmalloc_user() to allow users to mmap
  271. * the buffer to user-space by means of remap_vmalloc_range().
  272. *
  273. * If CONFIG_MMU is not defined, use vzalloc() since mmap_vmcore() is
  274. * disabled and there's no need to allow users to mmap the buffer.
  275. */
  276. static inline char *alloc_elfnotes_buf(size_t notes_sz)
  277. {
  278. #ifdef CONFIG_MMU
  279. return vmalloc_user(notes_sz);
  280. #else
  281. return vzalloc(notes_sz);
  282. #endif
  283. }
  284. /*
  285. * Disable mmap_vmcore() if CONFIG_MMU is not defined. MMU is
  286. * essential for mmap_vmcore() in order to map physically
  287. * non-contiguous objects (ELF header, ELF note segment and memory
  288. * regions in the 1st kernel pointed to by PT_LOAD entries) into
  289. * virtually contiguous user-space in ELF layout.
  290. */
  291. #ifdef CONFIG_MMU
  292. /*
  293. * remap_oldmem_pfn_checked - do remap_oldmem_pfn_range replacing all pages
  294. * reported as not being ram with the zero page.
  295. *
  296. * @vma: vm_area_struct describing requested mapping
  297. * @from: start remapping from
  298. * @pfn: page frame number to start remapping to
  299. * @size: remapping size
  300. * @prot: protection bits
  301. *
  302. * Returns zero on success, -EAGAIN on failure.
  303. */
  304. static int remap_oldmem_pfn_checked(struct vm_area_struct *vma,
  305. unsigned long from, unsigned long pfn,
  306. unsigned long size, pgprot_t prot)
  307. {
  308. unsigned long map_size;
  309. unsigned long pos_start, pos_end, pos;
  310. unsigned long zeropage_pfn = my_zero_pfn(0);
  311. size_t len = 0;
  312. pos_start = pfn;
  313. pos_end = pfn + (size >> PAGE_SHIFT);
  314. for (pos = pos_start; pos < pos_end; ++pos) {
  315. if (!pfn_is_ram(pos)) {
  316. /*
  317. * We hit a page which is not ram. Remap the continuous
  318. * region between pos_start and pos-1 and replace
  319. * the non-ram page at pos with the zero page.
  320. */
  321. if (pos > pos_start) {
  322. /* Remap continuous region */
  323. map_size = (pos - pos_start) << PAGE_SHIFT;
  324. if (remap_oldmem_pfn_range(vma, from + len,
  325. pos_start, map_size,
  326. prot))
  327. goto fail;
  328. len += map_size;
  329. }
  330. /* Remap the zero page */
  331. if (remap_oldmem_pfn_range(vma, from + len,
  332. zeropage_pfn,
  333. PAGE_SIZE, prot))
  334. goto fail;
  335. len += PAGE_SIZE;
  336. pos_start = pos + 1;
  337. }
  338. }
  339. if (pos > pos_start) {
  340. /* Remap the rest */
  341. map_size = (pos - pos_start) << PAGE_SHIFT;
  342. if (remap_oldmem_pfn_range(vma, from + len, pos_start,
  343. map_size, prot))
  344. goto fail;
  345. }
  346. return 0;
  347. fail:
  348. do_munmap(vma->vm_mm, from, len);
  349. return -EAGAIN;
  350. }
  351. static int vmcore_remap_oldmem_pfn(struct vm_area_struct *vma,
  352. unsigned long from, unsigned long pfn,
  353. unsigned long size, pgprot_t prot)
  354. {
  355. /*
  356. * Check if oldmem_pfn_is_ram was registered to avoid
  357. * looping over all pages without a reason.
  358. */
  359. if (oldmem_pfn_is_ram)
  360. return remap_oldmem_pfn_checked(vma, from, pfn, size, prot);
  361. else
  362. return remap_oldmem_pfn_range(vma, from, pfn, size, prot);
  363. }
  364. static int mmap_vmcore(struct file *file, struct vm_area_struct *vma)
  365. {
  366. size_t size = vma->vm_end - vma->vm_start;
  367. u64 start, end, len, tsz;
  368. struct vmcore *m;
  369. start = (u64)vma->vm_pgoff << PAGE_SHIFT;
  370. end = start + size;
  371. if (size > vmcore_size || end > vmcore_size)
  372. return -EINVAL;
  373. if (vma->vm_flags & (VM_WRITE | VM_EXEC))
  374. return -EPERM;
  375. vma->vm_flags &= ~(VM_MAYWRITE | VM_MAYEXEC);
  376. vma->vm_flags |= VM_MIXEDMAP;
  377. vma->vm_ops = &vmcore_mmap_ops;
  378. len = 0;
  379. if (start < elfcorebuf_sz) {
  380. u64 pfn;
  381. tsz = min(elfcorebuf_sz - (size_t)start, size);
  382. pfn = __pa(elfcorebuf + start) >> PAGE_SHIFT;
  383. if (remap_pfn_range(vma, vma->vm_start, pfn, tsz,
  384. vma->vm_page_prot))
  385. return -EAGAIN;
  386. size -= tsz;
  387. start += tsz;
  388. len += tsz;
  389. if (size == 0)
  390. return 0;
  391. }
  392. if (start < elfcorebuf_sz + elfnotes_sz) {
  393. void *kaddr;
  394. tsz = min(elfcorebuf_sz + elfnotes_sz - (size_t)start, size);
  395. kaddr = elfnotes_buf + start - elfcorebuf_sz;
  396. if (remap_vmalloc_range_partial(vma, vma->vm_start + len,
  397. kaddr, tsz))
  398. goto fail;
  399. size -= tsz;
  400. start += tsz;
  401. len += tsz;
  402. if (size == 0)
  403. return 0;
  404. }
  405. list_for_each_entry(m, &vmcore_list, list) {
  406. if (start < m->offset + m->size) {
  407. u64 paddr = 0;
  408. tsz = (size_t)min_t(unsigned long long,
  409. m->offset + m->size - start, size);
  410. paddr = m->paddr + start - m->offset;
  411. if (vmcore_remap_oldmem_pfn(vma, vma->vm_start + len,
  412. paddr >> PAGE_SHIFT, tsz,
  413. vma->vm_page_prot))
  414. goto fail;
  415. size -= tsz;
  416. start += tsz;
  417. len += tsz;
  418. if (size == 0)
  419. return 0;
  420. }
  421. }
  422. return 0;
  423. fail:
  424. do_munmap(vma->vm_mm, vma->vm_start, len);
  425. return -EAGAIN;
  426. }
  427. #else
  428. static int mmap_vmcore(struct file *file, struct vm_area_struct *vma)
  429. {
  430. return -ENOSYS;
  431. }
  432. #endif
  433. static const struct file_operations proc_vmcore_operations = {
  434. .read = read_vmcore,
  435. .llseek = default_llseek,
  436. .mmap = mmap_vmcore,
  437. };
  438. static struct vmcore* __init get_new_element(void)
  439. {
  440. return kzalloc(sizeof(struct vmcore), GFP_KERNEL);
  441. }
  442. static u64 __init get_vmcore_size(size_t elfsz, size_t elfnotesegsz,
  443. struct list_head *vc_list)
  444. {
  445. u64 size;
  446. struct vmcore *m;
  447. size = elfsz + elfnotesegsz;
  448. list_for_each_entry(m, vc_list, list) {
  449. size += m->size;
  450. }
  451. return size;
  452. }
  453. /**
  454. * update_note_header_size_elf64 - update p_memsz member of each PT_NOTE entry
  455. *
  456. * @ehdr_ptr: ELF header
  457. *
  458. * This function updates p_memsz member of each PT_NOTE entry in the
  459. * program header table pointed to by @ehdr_ptr to real size of ELF
  460. * note segment.
  461. */
  462. static int __init update_note_header_size_elf64(const Elf64_Ehdr *ehdr_ptr)
  463. {
  464. int i, rc=0;
  465. Elf64_Phdr *phdr_ptr;
  466. Elf64_Nhdr *nhdr_ptr;
  467. phdr_ptr = (Elf64_Phdr *)(ehdr_ptr + 1);
  468. for (i = 0; i < ehdr_ptr->e_phnum; i++, phdr_ptr++) {
  469. void *notes_section;
  470. u64 offset, max_sz, sz, real_sz = 0;
  471. if (phdr_ptr->p_type != PT_NOTE)
  472. continue;
  473. max_sz = phdr_ptr->p_memsz;
  474. offset = phdr_ptr->p_offset;
  475. notes_section = kmalloc(max_sz, GFP_KERNEL);
  476. if (!notes_section)
  477. return -ENOMEM;
  478. rc = elfcorehdr_read_notes(notes_section, max_sz, &offset);
  479. if (rc < 0) {
  480. kfree(notes_section);
  481. return rc;
  482. }
  483. nhdr_ptr = notes_section;
  484. while (nhdr_ptr->n_namesz != 0) {
  485. sz = sizeof(Elf64_Nhdr) +
  486. (((u64)nhdr_ptr->n_namesz + 3) & ~3) +
  487. (((u64)nhdr_ptr->n_descsz + 3) & ~3);
  488. if ((real_sz + sz) > max_sz) {
  489. pr_warn("Warning: Exceeded p_memsz, dropping PT_NOTE entry n_namesz=0x%x, n_descsz=0x%x\n",
  490. nhdr_ptr->n_namesz, nhdr_ptr->n_descsz);
  491. break;
  492. }
  493. real_sz += sz;
  494. nhdr_ptr = (Elf64_Nhdr*)((char*)nhdr_ptr + sz);
  495. }
  496. kfree(notes_section);
  497. phdr_ptr->p_memsz = real_sz;
  498. if (real_sz == 0) {
  499. pr_warn("Warning: Zero PT_NOTE entries found\n");
  500. }
  501. }
  502. return 0;
  503. }
  504. /**
  505. * get_note_number_and_size_elf64 - get the number of PT_NOTE program
  506. * headers and sum of real size of their ELF note segment headers and
  507. * data.
  508. *
  509. * @ehdr_ptr: ELF header
  510. * @nr_ptnote: buffer for the number of PT_NOTE program headers
  511. * @sz_ptnote: buffer for size of unique PT_NOTE program header
  512. *
  513. * This function is used to merge multiple PT_NOTE program headers
  514. * into a unique single one. The resulting unique entry will have
  515. * @sz_ptnote in its phdr->p_mem.
  516. *
  517. * It is assumed that program headers with PT_NOTE type pointed to by
  518. * @ehdr_ptr has already been updated by update_note_header_size_elf64
  519. * and each of PT_NOTE program headers has actual ELF note segment
  520. * size in its p_memsz member.
  521. */
  522. static int __init get_note_number_and_size_elf64(const Elf64_Ehdr *ehdr_ptr,
  523. int *nr_ptnote, u64 *sz_ptnote)
  524. {
  525. int i;
  526. Elf64_Phdr *phdr_ptr;
  527. *nr_ptnote = *sz_ptnote = 0;
  528. phdr_ptr = (Elf64_Phdr *)(ehdr_ptr + 1);
  529. for (i = 0; i < ehdr_ptr->e_phnum; i++, phdr_ptr++) {
  530. if (phdr_ptr->p_type != PT_NOTE)
  531. continue;
  532. *nr_ptnote += 1;
  533. *sz_ptnote += phdr_ptr->p_memsz;
  534. }
  535. return 0;
  536. }
  537. /**
  538. * copy_notes_elf64 - copy ELF note segments in a given buffer
  539. *
  540. * @ehdr_ptr: ELF header
  541. * @notes_buf: buffer into which ELF note segments are copied
  542. *
  543. * This function is used to copy ELF note segment in the 1st kernel
  544. * into the buffer @notes_buf in the 2nd kernel. It is assumed that
  545. * size of the buffer @notes_buf is equal to or larger than sum of the
  546. * real ELF note segment headers and data.
  547. *
  548. * It is assumed that program headers with PT_NOTE type pointed to by
  549. * @ehdr_ptr has already been updated by update_note_header_size_elf64
  550. * and each of PT_NOTE program headers has actual ELF note segment
  551. * size in its p_memsz member.
  552. */
  553. static int __init copy_notes_elf64(const Elf64_Ehdr *ehdr_ptr, char *notes_buf)
  554. {
  555. int i, rc=0;
  556. Elf64_Phdr *phdr_ptr;
  557. phdr_ptr = (Elf64_Phdr*)(ehdr_ptr + 1);
  558. for (i = 0; i < ehdr_ptr->e_phnum; i++, phdr_ptr++) {
  559. u64 offset;
  560. if (phdr_ptr->p_type != PT_NOTE)
  561. continue;
  562. offset = phdr_ptr->p_offset;
  563. rc = elfcorehdr_read_notes(notes_buf, phdr_ptr->p_memsz,
  564. &offset);
  565. if (rc < 0)
  566. return rc;
  567. notes_buf += phdr_ptr->p_memsz;
  568. }
  569. return 0;
  570. }
  571. /* Merges all the PT_NOTE headers into one. */
  572. static int __init merge_note_headers_elf64(char *elfptr, size_t *elfsz,
  573. char **notes_buf, size_t *notes_sz)
  574. {
  575. int i, nr_ptnote=0, rc=0;
  576. char *tmp;
  577. Elf64_Ehdr *ehdr_ptr;
  578. Elf64_Phdr phdr;
  579. u64 phdr_sz = 0, note_off;
  580. ehdr_ptr = (Elf64_Ehdr *)elfptr;
  581. rc = update_note_header_size_elf64(ehdr_ptr);
  582. if (rc < 0)
  583. return rc;
  584. rc = get_note_number_and_size_elf64(ehdr_ptr, &nr_ptnote, &phdr_sz);
  585. if (rc < 0)
  586. return rc;
  587. *notes_sz = roundup(phdr_sz, PAGE_SIZE);
  588. *notes_buf = alloc_elfnotes_buf(*notes_sz);
  589. if (!*notes_buf)
  590. return -ENOMEM;
  591. rc = copy_notes_elf64(ehdr_ptr, *notes_buf);
  592. if (rc < 0)
  593. return rc;
  594. /* Prepare merged PT_NOTE program header. */
  595. phdr.p_type = PT_NOTE;
  596. phdr.p_flags = 0;
  597. note_off = sizeof(Elf64_Ehdr) +
  598. (ehdr_ptr->e_phnum - nr_ptnote +1) * sizeof(Elf64_Phdr);
  599. phdr.p_offset = roundup(note_off, PAGE_SIZE);
  600. phdr.p_vaddr = phdr.p_paddr = 0;
  601. phdr.p_filesz = phdr.p_memsz = phdr_sz;
  602. phdr.p_align = 0;
  603. /* Add merged PT_NOTE program header*/
  604. tmp = elfptr + sizeof(Elf64_Ehdr);
  605. memcpy(tmp, &phdr, sizeof(phdr));
  606. tmp += sizeof(phdr);
  607. /* Remove unwanted PT_NOTE program headers. */
  608. i = (nr_ptnote - 1) * sizeof(Elf64_Phdr);
  609. *elfsz = *elfsz - i;
  610. memmove(tmp, tmp+i, ((*elfsz)-sizeof(Elf64_Ehdr)-sizeof(Elf64_Phdr)));
  611. memset(elfptr + *elfsz, 0, i);
  612. *elfsz = roundup(*elfsz, PAGE_SIZE);
  613. /* Modify e_phnum to reflect merged headers. */
  614. ehdr_ptr->e_phnum = ehdr_ptr->e_phnum - nr_ptnote + 1;
  615. return 0;
  616. }
  617. /**
  618. * update_note_header_size_elf32 - update p_memsz member of each PT_NOTE entry
  619. *
  620. * @ehdr_ptr: ELF header
  621. *
  622. * This function updates p_memsz member of each PT_NOTE entry in the
  623. * program header table pointed to by @ehdr_ptr to real size of ELF
  624. * note segment.
  625. */
  626. static int __init update_note_header_size_elf32(const Elf32_Ehdr *ehdr_ptr)
  627. {
  628. int i, rc=0;
  629. Elf32_Phdr *phdr_ptr;
  630. Elf32_Nhdr *nhdr_ptr;
  631. phdr_ptr = (Elf32_Phdr *)(ehdr_ptr + 1);
  632. for (i = 0; i < ehdr_ptr->e_phnum; i++, phdr_ptr++) {
  633. void *notes_section;
  634. u64 offset, max_sz, sz, real_sz = 0;
  635. if (phdr_ptr->p_type != PT_NOTE)
  636. continue;
  637. max_sz = phdr_ptr->p_memsz;
  638. offset = phdr_ptr->p_offset;
  639. notes_section = kmalloc(max_sz, GFP_KERNEL);
  640. if (!notes_section)
  641. return -ENOMEM;
  642. rc = elfcorehdr_read_notes(notes_section, max_sz, &offset);
  643. if (rc < 0) {
  644. kfree(notes_section);
  645. return rc;
  646. }
  647. nhdr_ptr = notes_section;
  648. while (nhdr_ptr->n_namesz != 0) {
  649. sz = sizeof(Elf32_Nhdr) +
  650. (((u64)nhdr_ptr->n_namesz + 3) & ~3) +
  651. (((u64)nhdr_ptr->n_descsz + 3) & ~3);
  652. if ((real_sz + sz) > max_sz) {
  653. pr_warn("Warning: Exceeded p_memsz, dropping PT_NOTE entry n_namesz=0x%x, n_descsz=0x%x\n",
  654. nhdr_ptr->n_namesz, nhdr_ptr->n_descsz);
  655. break;
  656. }
  657. real_sz += sz;
  658. nhdr_ptr = (Elf32_Nhdr*)((char*)nhdr_ptr + sz);
  659. }
  660. kfree(notes_section);
  661. phdr_ptr->p_memsz = real_sz;
  662. if (real_sz == 0) {
  663. pr_warn("Warning: Zero PT_NOTE entries found\n");
  664. }
  665. }
  666. return 0;
  667. }
  668. /**
  669. * get_note_number_and_size_elf32 - get the number of PT_NOTE program
  670. * headers and sum of real size of their ELF note segment headers and
  671. * data.
  672. *
  673. * @ehdr_ptr: ELF header
  674. * @nr_ptnote: buffer for the number of PT_NOTE program headers
  675. * @sz_ptnote: buffer for size of unique PT_NOTE program header
  676. *
  677. * This function is used to merge multiple PT_NOTE program headers
  678. * into a unique single one. The resulting unique entry will have
  679. * @sz_ptnote in its phdr->p_mem.
  680. *
  681. * It is assumed that program headers with PT_NOTE type pointed to by
  682. * @ehdr_ptr has already been updated by update_note_header_size_elf32
  683. * and each of PT_NOTE program headers has actual ELF note segment
  684. * size in its p_memsz member.
  685. */
  686. static int __init get_note_number_and_size_elf32(const Elf32_Ehdr *ehdr_ptr,
  687. int *nr_ptnote, u64 *sz_ptnote)
  688. {
  689. int i;
  690. Elf32_Phdr *phdr_ptr;
  691. *nr_ptnote = *sz_ptnote = 0;
  692. phdr_ptr = (Elf32_Phdr *)(ehdr_ptr + 1);
  693. for (i = 0; i < ehdr_ptr->e_phnum; i++, phdr_ptr++) {
  694. if (phdr_ptr->p_type != PT_NOTE)
  695. continue;
  696. *nr_ptnote += 1;
  697. *sz_ptnote += phdr_ptr->p_memsz;
  698. }
  699. return 0;
  700. }
  701. /**
  702. * copy_notes_elf32 - copy ELF note segments in a given buffer
  703. *
  704. * @ehdr_ptr: ELF header
  705. * @notes_buf: buffer into which ELF note segments are copied
  706. *
  707. * This function is used to copy ELF note segment in the 1st kernel
  708. * into the buffer @notes_buf in the 2nd kernel. It is assumed that
  709. * size of the buffer @notes_buf is equal to or larger than sum of the
  710. * real ELF note segment headers and data.
  711. *
  712. * It is assumed that program headers with PT_NOTE type pointed to by
  713. * @ehdr_ptr has already been updated by update_note_header_size_elf32
  714. * and each of PT_NOTE program headers has actual ELF note segment
  715. * size in its p_memsz member.
  716. */
  717. static int __init copy_notes_elf32(const Elf32_Ehdr *ehdr_ptr, char *notes_buf)
  718. {
  719. int i, rc=0;
  720. Elf32_Phdr *phdr_ptr;
  721. phdr_ptr = (Elf32_Phdr*)(ehdr_ptr + 1);
  722. for (i = 0; i < ehdr_ptr->e_phnum; i++, phdr_ptr++) {
  723. u64 offset;
  724. if (phdr_ptr->p_type != PT_NOTE)
  725. continue;
  726. offset = phdr_ptr->p_offset;
  727. rc = elfcorehdr_read_notes(notes_buf, phdr_ptr->p_memsz,
  728. &offset);
  729. if (rc < 0)
  730. return rc;
  731. notes_buf += phdr_ptr->p_memsz;
  732. }
  733. return 0;
  734. }
  735. /* Merges all the PT_NOTE headers into one. */
  736. static int __init merge_note_headers_elf32(char *elfptr, size_t *elfsz,
  737. char **notes_buf, size_t *notes_sz)
  738. {
  739. int i, nr_ptnote=0, rc=0;
  740. char *tmp;
  741. Elf32_Ehdr *ehdr_ptr;
  742. Elf32_Phdr phdr;
  743. u64 phdr_sz = 0, note_off;
  744. ehdr_ptr = (Elf32_Ehdr *)elfptr;
  745. rc = update_note_header_size_elf32(ehdr_ptr);
  746. if (rc < 0)
  747. return rc;
  748. rc = get_note_number_and_size_elf32(ehdr_ptr, &nr_ptnote, &phdr_sz);
  749. if (rc < 0)
  750. return rc;
  751. *notes_sz = roundup(phdr_sz, PAGE_SIZE);
  752. *notes_buf = alloc_elfnotes_buf(*notes_sz);
  753. if (!*notes_buf)
  754. return -ENOMEM;
  755. rc = copy_notes_elf32(ehdr_ptr, *notes_buf);
  756. if (rc < 0)
  757. return rc;
  758. /* Prepare merged PT_NOTE program header. */
  759. phdr.p_type = PT_NOTE;
  760. phdr.p_flags = 0;
  761. note_off = sizeof(Elf32_Ehdr) +
  762. (ehdr_ptr->e_phnum - nr_ptnote +1) * sizeof(Elf32_Phdr);
  763. phdr.p_offset = roundup(note_off, PAGE_SIZE);
  764. phdr.p_vaddr = phdr.p_paddr = 0;
  765. phdr.p_filesz = phdr.p_memsz = phdr_sz;
  766. phdr.p_align = 0;
  767. /* Add merged PT_NOTE program header*/
  768. tmp = elfptr + sizeof(Elf32_Ehdr);
  769. memcpy(tmp, &phdr, sizeof(phdr));
  770. tmp += sizeof(phdr);
  771. /* Remove unwanted PT_NOTE program headers. */
  772. i = (nr_ptnote - 1) * sizeof(Elf32_Phdr);
  773. *elfsz = *elfsz - i;
  774. memmove(tmp, tmp+i, ((*elfsz)-sizeof(Elf32_Ehdr)-sizeof(Elf32_Phdr)));
  775. memset(elfptr + *elfsz, 0, i);
  776. *elfsz = roundup(*elfsz, PAGE_SIZE);
  777. /* Modify e_phnum to reflect merged headers. */
  778. ehdr_ptr->e_phnum = ehdr_ptr->e_phnum - nr_ptnote + 1;
  779. return 0;
  780. }
  781. /* Add memory chunks represented by program headers to vmcore list. Also update
  782. * the new offset fields of exported program headers. */
  783. static int __init process_ptload_program_headers_elf64(char *elfptr,
  784. size_t elfsz,
  785. size_t elfnotes_sz,
  786. struct list_head *vc_list)
  787. {
  788. int i;
  789. Elf64_Ehdr *ehdr_ptr;
  790. Elf64_Phdr *phdr_ptr;
  791. loff_t vmcore_off;
  792. struct vmcore *new;
  793. ehdr_ptr = (Elf64_Ehdr *)elfptr;
  794. phdr_ptr = (Elf64_Phdr*)(elfptr + sizeof(Elf64_Ehdr)); /* PT_NOTE hdr */
  795. /* Skip Elf header, program headers and Elf note segment. */
  796. vmcore_off = elfsz + elfnotes_sz;
  797. for (i = 0; i < ehdr_ptr->e_phnum; i++, phdr_ptr++) {
  798. u64 paddr, start, end, size;
  799. if (phdr_ptr->p_type != PT_LOAD)
  800. continue;
  801. paddr = phdr_ptr->p_offset;
  802. start = rounddown(paddr, PAGE_SIZE);
  803. end = roundup(paddr + phdr_ptr->p_memsz, PAGE_SIZE);
  804. size = end - start;
  805. /* Add this contiguous chunk of memory to vmcore list.*/
  806. new = get_new_element();
  807. if (!new)
  808. return -ENOMEM;
  809. new->paddr = start;
  810. new->size = size;
  811. list_add_tail(&new->list, vc_list);
  812. /* Update the program header offset. */
  813. phdr_ptr->p_offset = vmcore_off + (paddr - start);
  814. vmcore_off = vmcore_off + size;
  815. }
  816. return 0;
  817. }
  818. static int __init process_ptload_program_headers_elf32(char *elfptr,
  819. size_t elfsz,
  820. size_t elfnotes_sz,
  821. struct list_head *vc_list)
  822. {
  823. int i;
  824. Elf32_Ehdr *ehdr_ptr;
  825. Elf32_Phdr *phdr_ptr;
  826. loff_t vmcore_off;
  827. struct vmcore *new;
  828. ehdr_ptr = (Elf32_Ehdr *)elfptr;
  829. phdr_ptr = (Elf32_Phdr*)(elfptr + sizeof(Elf32_Ehdr)); /* PT_NOTE hdr */
  830. /* Skip Elf header, program headers and Elf note segment. */
  831. vmcore_off = elfsz + elfnotes_sz;
  832. for (i = 0; i < ehdr_ptr->e_phnum; i++, phdr_ptr++) {
  833. u64 paddr, start, end, size;
  834. if (phdr_ptr->p_type != PT_LOAD)
  835. continue;
  836. paddr = phdr_ptr->p_offset;
  837. start = rounddown(paddr, PAGE_SIZE);
  838. end = roundup(paddr + phdr_ptr->p_memsz, PAGE_SIZE);
  839. size = end - start;
  840. /* Add this contiguous chunk of memory to vmcore list.*/
  841. new = get_new_element();
  842. if (!new)
  843. return -ENOMEM;
  844. new->paddr = start;
  845. new->size = size;
  846. list_add_tail(&new->list, vc_list);
  847. /* Update the program header offset */
  848. phdr_ptr->p_offset = vmcore_off + (paddr - start);
  849. vmcore_off = vmcore_off + size;
  850. }
  851. return 0;
  852. }
  853. /* Sets offset fields of vmcore elements. */
  854. static void __init set_vmcore_list_offsets(size_t elfsz, size_t elfnotes_sz,
  855. struct list_head *vc_list)
  856. {
  857. loff_t vmcore_off;
  858. struct vmcore *m;
  859. /* Skip Elf header, program headers and Elf note segment. */
  860. vmcore_off = elfsz + elfnotes_sz;
  861. list_for_each_entry(m, vc_list, list) {
  862. m->offset = vmcore_off;
  863. vmcore_off += m->size;
  864. }
  865. }
  866. static void free_elfcorebuf(void)
  867. {
  868. free_pages((unsigned long)elfcorebuf, get_order(elfcorebuf_sz_orig));
  869. elfcorebuf = NULL;
  870. vfree(elfnotes_buf);
  871. elfnotes_buf = NULL;
  872. }
  873. static int __init parse_crash_elf64_headers(void)
  874. {
  875. int rc=0;
  876. Elf64_Ehdr ehdr;
  877. u64 addr;
  878. addr = elfcorehdr_addr;
  879. /* Read Elf header */
  880. rc = elfcorehdr_read((char *)&ehdr, sizeof(Elf64_Ehdr), &addr);
  881. if (rc < 0)
  882. return rc;
  883. /* Do some basic Verification. */
  884. if (memcmp(ehdr.e_ident, ELFMAG, SELFMAG) != 0 ||
  885. (ehdr.e_type != ET_CORE) ||
  886. !vmcore_elf64_check_arch(&ehdr) ||
  887. ehdr.e_ident[EI_CLASS] != ELFCLASS64 ||
  888. ehdr.e_ident[EI_VERSION] != EV_CURRENT ||
  889. ehdr.e_version != EV_CURRENT ||
  890. ehdr.e_ehsize != sizeof(Elf64_Ehdr) ||
  891. ehdr.e_phentsize != sizeof(Elf64_Phdr) ||
  892. ehdr.e_phnum == 0) {
  893. pr_warn("Warning: Core image elf header is not sane\n");
  894. return -EINVAL;
  895. }
  896. /* Read in all elf headers. */
  897. elfcorebuf_sz_orig = sizeof(Elf64_Ehdr) +
  898. ehdr.e_phnum * sizeof(Elf64_Phdr);
  899. elfcorebuf_sz = elfcorebuf_sz_orig;
  900. elfcorebuf = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO,
  901. get_order(elfcorebuf_sz_orig));
  902. if (!elfcorebuf)
  903. return -ENOMEM;
  904. addr = elfcorehdr_addr;
  905. rc = elfcorehdr_read(elfcorebuf, elfcorebuf_sz_orig, &addr);
  906. if (rc < 0)
  907. goto fail;
  908. /* Merge all PT_NOTE headers into one. */
  909. rc = merge_note_headers_elf64(elfcorebuf, &elfcorebuf_sz,
  910. &elfnotes_buf, &elfnotes_sz);
  911. if (rc)
  912. goto fail;
  913. rc = process_ptload_program_headers_elf64(elfcorebuf, elfcorebuf_sz,
  914. elfnotes_sz, &vmcore_list);
  915. if (rc)
  916. goto fail;
  917. set_vmcore_list_offsets(elfcorebuf_sz, elfnotes_sz, &vmcore_list);
  918. return 0;
  919. fail:
  920. free_elfcorebuf();
  921. return rc;
  922. }
  923. static int __init parse_crash_elf32_headers(void)
  924. {
  925. int rc=0;
  926. Elf32_Ehdr ehdr;
  927. u64 addr;
  928. addr = elfcorehdr_addr;
  929. /* Read Elf header */
  930. rc = elfcorehdr_read((char *)&ehdr, sizeof(Elf32_Ehdr), &addr);
  931. if (rc < 0)
  932. return rc;
  933. /* Do some basic Verification. */
  934. if (memcmp(ehdr.e_ident, ELFMAG, SELFMAG) != 0 ||
  935. (ehdr.e_type != ET_CORE) ||
  936. !vmcore_elf32_check_arch(&ehdr) ||
  937. ehdr.e_ident[EI_CLASS] != ELFCLASS32||
  938. ehdr.e_ident[EI_VERSION] != EV_CURRENT ||
  939. ehdr.e_version != EV_CURRENT ||
  940. ehdr.e_ehsize != sizeof(Elf32_Ehdr) ||
  941. ehdr.e_phentsize != sizeof(Elf32_Phdr) ||
  942. ehdr.e_phnum == 0) {
  943. pr_warn("Warning: Core image elf header is not sane\n");
  944. return -EINVAL;
  945. }
  946. /* Read in all elf headers. */
  947. elfcorebuf_sz_orig = sizeof(Elf32_Ehdr) + ehdr.e_phnum * sizeof(Elf32_Phdr);
  948. elfcorebuf_sz = elfcorebuf_sz_orig;
  949. elfcorebuf = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO,
  950. get_order(elfcorebuf_sz_orig));
  951. if (!elfcorebuf)
  952. return -ENOMEM;
  953. addr = elfcorehdr_addr;
  954. rc = elfcorehdr_read(elfcorebuf, elfcorebuf_sz_orig, &addr);
  955. if (rc < 0)
  956. goto fail;
  957. /* Merge all PT_NOTE headers into one. */
  958. rc = merge_note_headers_elf32(elfcorebuf, &elfcorebuf_sz,
  959. &elfnotes_buf, &elfnotes_sz);
  960. if (rc)
  961. goto fail;
  962. rc = process_ptload_program_headers_elf32(elfcorebuf, elfcorebuf_sz,
  963. elfnotes_sz, &vmcore_list);
  964. if (rc)
  965. goto fail;
  966. set_vmcore_list_offsets(elfcorebuf_sz, elfnotes_sz, &vmcore_list);
  967. return 0;
  968. fail:
  969. free_elfcorebuf();
  970. return rc;
  971. }
  972. static int __init parse_crash_elf_headers(void)
  973. {
  974. unsigned char e_ident[EI_NIDENT];
  975. u64 addr;
  976. int rc=0;
  977. addr = elfcorehdr_addr;
  978. rc = elfcorehdr_read(e_ident, EI_NIDENT, &addr);
  979. if (rc < 0)
  980. return rc;
  981. if (memcmp(e_ident, ELFMAG, SELFMAG) != 0) {
  982. pr_warn("Warning: Core image elf header not found\n");
  983. return -EINVAL;
  984. }
  985. if (e_ident[EI_CLASS] == ELFCLASS64) {
  986. rc = parse_crash_elf64_headers();
  987. if (rc)
  988. return rc;
  989. } else if (e_ident[EI_CLASS] == ELFCLASS32) {
  990. rc = parse_crash_elf32_headers();
  991. if (rc)
  992. return rc;
  993. } else {
  994. pr_warn("Warning: Core image elf header is not sane\n");
  995. return -EINVAL;
  996. }
  997. /* Determine vmcore size. */
  998. vmcore_size = get_vmcore_size(elfcorebuf_sz, elfnotes_sz,
  999. &vmcore_list);
  1000. return 0;
  1001. }
  1002. /* Init function for vmcore module. */
  1003. static int __init vmcore_init(void)
  1004. {
  1005. int rc = 0;
  1006. /* Allow architectures to allocate ELF header in 2nd kernel */
  1007. rc = elfcorehdr_alloc(&elfcorehdr_addr, &elfcorehdr_size);
  1008. if (rc)
  1009. return rc;
  1010. /*
  1011. * If elfcorehdr= has been passed in cmdline or created in 2nd kernel,
  1012. * then capture the dump.
  1013. */
  1014. if (!(is_vmcore_usable()))
  1015. return rc;
  1016. rc = parse_crash_elf_headers();
  1017. if (rc) {
  1018. pr_warn("Kdump: vmcore not initialized\n");
  1019. return rc;
  1020. }
  1021. elfcorehdr_free(elfcorehdr_addr);
  1022. elfcorehdr_addr = ELFCORE_ADDR_ERR;
  1023. proc_vmcore = proc_create("vmcore", S_IRUSR, NULL, &proc_vmcore_operations);
  1024. if (proc_vmcore)
  1025. proc_vmcore->size = vmcore_size;
  1026. return 0;
  1027. }
  1028. fs_initcall(vmcore_init);
  1029. /* Cleanup function for vmcore module. */
  1030. void vmcore_cleanup(void)
  1031. {
  1032. struct list_head *pos, *next;
  1033. if (proc_vmcore) {
  1034. proc_remove(proc_vmcore);
  1035. proc_vmcore = NULL;
  1036. }
  1037. /* clear the vmcore list. */
  1038. list_for_each_safe(pos, next, &vmcore_list) {
  1039. struct vmcore *m;
  1040. m = list_entry(pos, struct vmcore, list);
  1041. list_del(&m->list);
  1042. kfree(m);
  1043. }
  1044. free_elfcorebuf();
  1045. }