thread-stack.c 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653
  1. /*
  2. * thread-stack.c: Synthesize a thread's stack using call / return events
  3. * Copyright (c) 2014, Intel Corporation.
  4. *
  5. * This program is free software; you can redistribute it and/or modify it
  6. * under the terms and conditions of the GNU General Public License,
  7. * version 2, as published by the Free Software Foundation.
  8. *
  9. * This program is distributed in the hope it will be useful, but WITHOUT
  10. * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11. * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
  12. * more details.
  13. *
  14. */
  15. #include <linux/rbtree.h>
  16. #include <linux/list.h>
  17. #include <errno.h>
  18. #include "thread.h"
  19. #include "event.h"
  20. #include "machine.h"
  21. #include "util.h"
  22. #include "debug.h"
  23. #include "symbol.h"
  24. #include "comm.h"
  25. #include "call-path.h"
  26. #include "thread-stack.h"
  27. #define STACK_GROWTH 2048
  28. /**
  29. * struct thread_stack_entry - thread stack entry.
  30. * @ret_addr: return address
  31. * @timestamp: timestamp (if known)
  32. * @ref: external reference (e.g. db_id of sample)
  33. * @branch_count: the branch count when the entry was created
  34. * @cp: call path
  35. * @no_call: a 'call' was not seen
  36. */
  37. struct thread_stack_entry {
  38. u64 ret_addr;
  39. u64 timestamp;
  40. u64 ref;
  41. u64 branch_count;
  42. struct call_path *cp;
  43. bool no_call;
  44. };
  45. /**
  46. * struct thread_stack - thread stack constructed from 'call' and 'return'
  47. * branch samples.
  48. * @stack: array that holds the stack
  49. * @cnt: number of entries in the stack
  50. * @sz: current maximum stack size
  51. * @trace_nr: current trace number
  52. * @branch_count: running branch count
  53. * @kernel_start: kernel start address
  54. * @last_time: last timestamp
  55. * @crp: call/return processor
  56. * @comm: current comm
  57. */
  58. struct thread_stack {
  59. struct thread_stack_entry *stack;
  60. size_t cnt;
  61. size_t sz;
  62. u64 trace_nr;
  63. u64 branch_count;
  64. u64 kernel_start;
  65. u64 last_time;
  66. struct call_return_processor *crp;
  67. struct comm *comm;
  68. };
  69. static int thread_stack__grow(struct thread_stack *ts)
  70. {
  71. struct thread_stack_entry *new_stack;
  72. size_t sz, new_sz;
  73. new_sz = ts->sz + STACK_GROWTH;
  74. sz = new_sz * sizeof(struct thread_stack_entry);
  75. new_stack = realloc(ts->stack, sz);
  76. if (!new_stack)
  77. return -ENOMEM;
  78. ts->stack = new_stack;
  79. ts->sz = new_sz;
  80. return 0;
  81. }
  82. static struct thread_stack *thread_stack__new(struct thread *thread,
  83. struct call_return_processor *crp)
  84. {
  85. struct thread_stack *ts;
  86. ts = zalloc(sizeof(struct thread_stack));
  87. if (!ts)
  88. return NULL;
  89. if (thread_stack__grow(ts)) {
  90. free(ts);
  91. return NULL;
  92. }
  93. if (thread->mg && thread->mg->machine)
  94. ts->kernel_start = machine__kernel_start(thread->mg->machine);
  95. else
  96. ts->kernel_start = 1ULL << 63;
  97. ts->crp = crp;
  98. return ts;
  99. }
  100. static int thread_stack__push(struct thread_stack *ts, u64 ret_addr)
  101. {
  102. int err = 0;
  103. if (ts->cnt == ts->sz) {
  104. err = thread_stack__grow(ts);
  105. if (err) {
  106. pr_warning("Out of memory: discarding thread stack\n");
  107. ts->cnt = 0;
  108. }
  109. }
  110. ts->stack[ts->cnt++].ret_addr = ret_addr;
  111. return err;
  112. }
  113. static void thread_stack__pop(struct thread_stack *ts, u64 ret_addr)
  114. {
  115. size_t i;
  116. /*
  117. * In some cases there may be functions which are not seen to return.
  118. * For example when setjmp / longjmp has been used. Or the perf context
  119. * switch in the kernel which doesn't stop and start tracing in exactly
  120. * the same code path. When that happens the return address will be
  121. * further down the stack. If the return address is not found at all,
  122. * we assume the opposite (i.e. this is a return for a call that wasn't
  123. * seen for some reason) and leave the stack alone.
  124. */
  125. for (i = ts->cnt; i; ) {
  126. if (ts->stack[--i].ret_addr == ret_addr) {
  127. ts->cnt = i;
  128. return;
  129. }
  130. }
  131. }
  132. static bool thread_stack__in_kernel(struct thread_stack *ts)
  133. {
  134. if (!ts->cnt)
  135. return false;
  136. return ts->stack[ts->cnt - 1].cp->in_kernel;
  137. }
  138. static int thread_stack__call_return(struct thread *thread,
  139. struct thread_stack *ts, size_t idx,
  140. u64 timestamp, u64 ref, bool no_return)
  141. {
  142. struct call_return_processor *crp = ts->crp;
  143. struct thread_stack_entry *tse;
  144. struct call_return cr = {
  145. .thread = thread,
  146. .comm = ts->comm,
  147. .db_id = 0,
  148. };
  149. tse = &ts->stack[idx];
  150. cr.cp = tse->cp;
  151. cr.call_time = tse->timestamp;
  152. cr.return_time = timestamp;
  153. cr.branch_count = ts->branch_count - tse->branch_count;
  154. cr.call_ref = tse->ref;
  155. cr.return_ref = ref;
  156. if (tse->no_call)
  157. cr.flags |= CALL_RETURN_NO_CALL;
  158. if (no_return)
  159. cr.flags |= CALL_RETURN_NO_RETURN;
  160. return crp->process(&cr, crp->data);
  161. }
  162. static int __thread_stack__flush(struct thread *thread, struct thread_stack *ts)
  163. {
  164. struct call_return_processor *crp = ts->crp;
  165. int err;
  166. if (!crp) {
  167. ts->cnt = 0;
  168. return 0;
  169. }
  170. while (ts->cnt) {
  171. err = thread_stack__call_return(thread, ts, --ts->cnt,
  172. ts->last_time, 0, true);
  173. if (err) {
  174. pr_err("Error flushing thread stack!\n");
  175. ts->cnt = 0;
  176. return err;
  177. }
  178. }
  179. return 0;
  180. }
  181. int thread_stack__flush(struct thread *thread)
  182. {
  183. if (thread->ts)
  184. return __thread_stack__flush(thread, thread->ts);
  185. return 0;
  186. }
  187. int thread_stack__event(struct thread *thread, u32 flags, u64 from_ip,
  188. u64 to_ip, u16 insn_len, u64 trace_nr)
  189. {
  190. if (!thread)
  191. return -EINVAL;
  192. if (!thread->ts) {
  193. thread->ts = thread_stack__new(thread, NULL);
  194. if (!thread->ts) {
  195. pr_warning("Out of memory: no thread stack\n");
  196. return -ENOMEM;
  197. }
  198. thread->ts->trace_nr = trace_nr;
  199. }
  200. /*
  201. * When the trace is discontinuous, the trace_nr changes. In that case
  202. * the stack might be completely invalid. Better to report nothing than
  203. * to report something misleading, so flush the stack.
  204. */
  205. if (trace_nr != thread->ts->trace_nr) {
  206. if (thread->ts->trace_nr)
  207. __thread_stack__flush(thread, thread->ts);
  208. thread->ts->trace_nr = trace_nr;
  209. }
  210. /* Stop here if thread_stack__process() is in use */
  211. if (thread->ts->crp)
  212. return 0;
  213. if (flags & PERF_IP_FLAG_CALL) {
  214. u64 ret_addr;
  215. if (!to_ip)
  216. return 0;
  217. ret_addr = from_ip + insn_len;
  218. if (ret_addr == to_ip)
  219. return 0; /* Zero-length calls are excluded */
  220. return thread_stack__push(thread->ts, ret_addr);
  221. } else if (flags & PERF_IP_FLAG_RETURN) {
  222. if (!from_ip)
  223. return 0;
  224. thread_stack__pop(thread->ts, to_ip);
  225. }
  226. return 0;
  227. }
  228. void thread_stack__set_trace_nr(struct thread *thread, u64 trace_nr)
  229. {
  230. if (!thread || !thread->ts)
  231. return;
  232. if (trace_nr != thread->ts->trace_nr) {
  233. if (thread->ts->trace_nr)
  234. __thread_stack__flush(thread, thread->ts);
  235. thread->ts->trace_nr = trace_nr;
  236. }
  237. }
  238. void thread_stack__free(struct thread *thread)
  239. {
  240. if (thread->ts) {
  241. __thread_stack__flush(thread, thread->ts);
  242. zfree(&thread->ts->stack);
  243. zfree(&thread->ts);
  244. }
  245. }
  246. static inline u64 callchain_context(u64 ip, u64 kernel_start)
  247. {
  248. return ip < kernel_start ? PERF_CONTEXT_USER : PERF_CONTEXT_KERNEL;
  249. }
  250. void thread_stack__sample(struct thread *thread, struct ip_callchain *chain,
  251. size_t sz, u64 ip, u64 kernel_start)
  252. {
  253. u64 context = callchain_context(ip, kernel_start);
  254. u64 last_context;
  255. size_t i, j;
  256. if (sz < 2) {
  257. chain->nr = 0;
  258. return;
  259. }
  260. chain->ips[0] = context;
  261. chain->ips[1] = ip;
  262. if (!thread || !thread->ts) {
  263. chain->nr = 2;
  264. return;
  265. }
  266. last_context = context;
  267. for (i = 2, j = 1; i < sz && j <= thread->ts->cnt; i++, j++) {
  268. ip = thread->ts->stack[thread->ts->cnt - j].ret_addr;
  269. context = callchain_context(ip, kernel_start);
  270. if (context != last_context) {
  271. if (i >= sz - 1)
  272. break;
  273. chain->ips[i++] = context;
  274. last_context = context;
  275. }
  276. chain->ips[i] = ip;
  277. }
  278. chain->nr = i;
  279. }
  280. struct call_return_processor *
  281. call_return_processor__new(int (*process)(struct call_return *cr, void *data),
  282. void *data)
  283. {
  284. struct call_return_processor *crp;
  285. crp = zalloc(sizeof(struct call_return_processor));
  286. if (!crp)
  287. return NULL;
  288. crp->cpr = call_path_root__new();
  289. if (!crp->cpr)
  290. goto out_free;
  291. crp->process = process;
  292. crp->data = data;
  293. return crp;
  294. out_free:
  295. free(crp);
  296. return NULL;
  297. }
  298. void call_return_processor__free(struct call_return_processor *crp)
  299. {
  300. if (crp) {
  301. call_path_root__free(crp->cpr);
  302. free(crp);
  303. }
  304. }
  305. static int thread_stack__push_cp(struct thread_stack *ts, u64 ret_addr,
  306. u64 timestamp, u64 ref, struct call_path *cp,
  307. bool no_call)
  308. {
  309. struct thread_stack_entry *tse;
  310. int err;
  311. if (ts->cnt == ts->sz) {
  312. err = thread_stack__grow(ts);
  313. if (err)
  314. return err;
  315. }
  316. tse = &ts->stack[ts->cnt++];
  317. tse->ret_addr = ret_addr;
  318. tse->timestamp = timestamp;
  319. tse->ref = ref;
  320. tse->branch_count = ts->branch_count;
  321. tse->cp = cp;
  322. tse->no_call = no_call;
  323. return 0;
  324. }
  325. static int thread_stack__pop_cp(struct thread *thread, struct thread_stack *ts,
  326. u64 ret_addr, u64 timestamp, u64 ref,
  327. struct symbol *sym)
  328. {
  329. int err;
  330. if (!ts->cnt)
  331. return 1;
  332. if (ts->cnt == 1) {
  333. struct thread_stack_entry *tse = &ts->stack[0];
  334. if (tse->cp->sym == sym)
  335. return thread_stack__call_return(thread, ts, --ts->cnt,
  336. timestamp, ref, false);
  337. }
  338. if (ts->stack[ts->cnt - 1].ret_addr == ret_addr) {
  339. return thread_stack__call_return(thread, ts, --ts->cnt,
  340. timestamp, ref, false);
  341. } else {
  342. size_t i = ts->cnt - 1;
  343. while (i--) {
  344. if (ts->stack[i].ret_addr != ret_addr)
  345. continue;
  346. i += 1;
  347. while (ts->cnt > i) {
  348. err = thread_stack__call_return(thread, ts,
  349. --ts->cnt,
  350. timestamp, ref,
  351. true);
  352. if (err)
  353. return err;
  354. }
  355. return thread_stack__call_return(thread, ts, --ts->cnt,
  356. timestamp, ref, false);
  357. }
  358. }
  359. return 1;
  360. }
  361. static int thread_stack__bottom(struct thread *thread, struct thread_stack *ts,
  362. struct perf_sample *sample,
  363. struct addr_location *from_al,
  364. struct addr_location *to_al, u64 ref)
  365. {
  366. struct call_path_root *cpr = ts->crp->cpr;
  367. struct call_path *cp;
  368. struct symbol *sym;
  369. u64 ip;
  370. if (sample->ip) {
  371. ip = sample->ip;
  372. sym = from_al->sym;
  373. } else if (sample->addr) {
  374. ip = sample->addr;
  375. sym = to_al->sym;
  376. } else {
  377. return 0;
  378. }
  379. cp = call_path__findnew(cpr, &cpr->call_path, sym, ip,
  380. ts->kernel_start);
  381. if (!cp)
  382. return -ENOMEM;
  383. return thread_stack__push_cp(thread->ts, ip, sample->time, ref, cp,
  384. true);
  385. }
  386. static int thread_stack__no_call_return(struct thread *thread,
  387. struct thread_stack *ts,
  388. struct perf_sample *sample,
  389. struct addr_location *from_al,
  390. struct addr_location *to_al, u64 ref)
  391. {
  392. struct call_path_root *cpr = ts->crp->cpr;
  393. struct call_path *cp, *parent;
  394. u64 ks = ts->kernel_start;
  395. int err;
  396. if (sample->ip >= ks && sample->addr < ks) {
  397. /* Return to userspace, so pop all kernel addresses */
  398. while (thread_stack__in_kernel(ts)) {
  399. err = thread_stack__call_return(thread, ts, --ts->cnt,
  400. sample->time, ref,
  401. true);
  402. if (err)
  403. return err;
  404. }
  405. /* If the stack is empty, push the userspace address */
  406. if (!ts->cnt) {
  407. cp = call_path__findnew(cpr, &cpr->call_path,
  408. to_al->sym, sample->addr,
  409. ts->kernel_start);
  410. if (!cp)
  411. return -ENOMEM;
  412. return thread_stack__push_cp(ts, 0, sample->time, ref,
  413. cp, true);
  414. }
  415. } else if (thread_stack__in_kernel(ts) && sample->ip < ks) {
  416. /* Return to userspace, so pop all kernel addresses */
  417. while (thread_stack__in_kernel(ts)) {
  418. err = thread_stack__call_return(thread, ts, --ts->cnt,
  419. sample->time, ref,
  420. true);
  421. if (err)
  422. return err;
  423. }
  424. }
  425. if (ts->cnt)
  426. parent = ts->stack[ts->cnt - 1].cp;
  427. else
  428. parent = &cpr->call_path;
  429. /* This 'return' had no 'call', so push and pop top of stack */
  430. cp = call_path__findnew(cpr, parent, from_al->sym, sample->ip,
  431. ts->kernel_start);
  432. if (!cp)
  433. return -ENOMEM;
  434. err = thread_stack__push_cp(ts, sample->addr, sample->time, ref, cp,
  435. true);
  436. if (err)
  437. return err;
  438. return thread_stack__pop_cp(thread, ts, sample->addr, sample->time, ref,
  439. to_al->sym);
  440. }
  441. static int thread_stack__trace_begin(struct thread *thread,
  442. struct thread_stack *ts, u64 timestamp,
  443. u64 ref)
  444. {
  445. struct thread_stack_entry *tse;
  446. int err;
  447. if (!ts->cnt)
  448. return 0;
  449. /* Pop trace end */
  450. tse = &ts->stack[ts->cnt - 1];
  451. if (tse->cp->sym == NULL && tse->cp->ip == 0) {
  452. err = thread_stack__call_return(thread, ts, --ts->cnt,
  453. timestamp, ref, false);
  454. if (err)
  455. return err;
  456. }
  457. return 0;
  458. }
  459. static int thread_stack__trace_end(struct thread_stack *ts,
  460. struct perf_sample *sample, u64 ref)
  461. {
  462. struct call_path_root *cpr = ts->crp->cpr;
  463. struct call_path *cp;
  464. u64 ret_addr;
  465. /* No point having 'trace end' on the bottom of the stack */
  466. if (!ts->cnt || (ts->cnt == 1 && ts->stack[0].ref == ref))
  467. return 0;
  468. cp = call_path__findnew(cpr, ts->stack[ts->cnt - 1].cp, NULL, 0,
  469. ts->kernel_start);
  470. if (!cp)
  471. return -ENOMEM;
  472. ret_addr = sample->ip + sample->insn_len;
  473. return thread_stack__push_cp(ts, ret_addr, sample->time, ref, cp,
  474. false);
  475. }
  476. int thread_stack__process(struct thread *thread, struct comm *comm,
  477. struct perf_sample *sample,
  478. struct addr_location *from_al,
  479. struct addr_location *to_al, u64 ref,
  480. struct call_return_processor *crp)
  481. {
  482. struct thread_stack *ts = thread->ts;
  483. int err = 0;
  484. if (ts) {
  485. if (!ts->crp) {
  486. /* Supersede thread_stack__event() */
  487. thread_stack__free(thread);
  488. thread->ts = thread_stack__new(thread, crp);
  489. if (!thread->ts)
  490. return -ENOMEM;
  491. ts = thread->ts;
  492. ts->comm = comm;
  493. }
  494. } else {
  495. thread->ts = thread_stack__new(thread, crp);
  496. if (!thread->ts)
  497. return -ENOMEM;
  498. ts = thread->ts;
  499. ts->comm = comm;
  500. }
  501. /* Flush stack on exec */
  502. if (ts->comm != comm && thread->pid_ == thread->tid) {
  503. err = __thread_stack__flush(thread, ts);
  504. if (err)
  505. return err;
  506. ts->comm = comm;
  507. }
  508. /* If the stack is empty, put the current symbol on the stack */
  509. if (!ts->cnt) {
  510. err = thread_stack__bottom(thread, ts, sample, from_al, to_al,
  511. ref);
  512. if (err)
  513. return err;
  514. }
  515. ts->branch_count += 1;
  516. ts->last_time = sample->time;
  517. if (sample->flags & PERF_IP_FLAG_CALL) {
  518. struct call_path_root *cpr = ts->crp->cpr;
  519. struct call_path *cp;
  520. u64 ret_addr;
  521. if (!sample->ip || !sample->addr)
  522. return 0;
  523. ret_addr = sample->ip + sample->insn_len;
  524. if (ret_addr == sample->addr)
  525. return 0; /* Zero-length calls are excluded */
  526. cp = call_path__findnew(cpr, ts->stack[ts->cnt - 1].cp,
  527. to_al->sym, sample->addr,
  528. ts->kernel_start);
  529. if (!cp)
  530. return -ENOMEM;
  531. err = thread_stack__push_cp(ts, ret_addr, sample->time, ref,
  532. cp, false);
  533. } else if (sample->flags & PERF_IP_FLAG_RETURN) {
  534. if (!sample->ip || !sample->addr)
  535. return 0;
  536. err = thread_stack__pop_cp(thread, ts, sample->addr,
  537. sample->time, ref, from_al->sym);
  538. if (err) {
  539. if (err < 0)
  540. return err;
  541. err = thread_stack__no_call_return(thread, ts, sample,
  542. from_al, to_al, ref);
  543. }
  544. } else if (sample->flags & PERF_IP_FLAG_TRACE_BEGIN) {
  545. err = thread_stack__trace_begin(thread, ts, sample->time, ref);
  546. } else if (sample->flags & PERF_IP_FLAG_TRACE_END) {
  547. err = thread_stack__trace_end(ts, sample, ref);
  548. }
  549. return err;
  550. }
  551. size_t thread_stack__depth(struct thread *thread)
  552. {
  553. if (!thread->ts)
  554. return 0;
  555. return thread->ts->cnt;
  556. }