mmap.c 7.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340
  1. /*
  2. * Copyright (C) 2011-2017, Red Hat Inc, Arnaldo Carvalho de Melo <acme@redhat.com>
  3. *
  4. * Parts came from evlist.c builtin-{top,stat,record}.c, see those files for further
  5. * copyright notes.
  6. *
  7. * Released under the GPL v2. (and only v2, not any later version)
  8. */
  9. #include <sys/mman.h>
  10. #include <inttypes.h>
  11. #include <asm/bug.h>
  12. #include "debug.h"
  13. #include "event.h"
  14. #include "mmap.h"
  15. #include "util.h" /* page_size */
  16. size_t perf_mmap__mmap_len(struct perf_mmap *map)
  17. {
  18. return map->mask + 1 + page_size;
  19. }
  20. /* When check_messup is true, 'end' must points to a good entry */
  21. static union perf_event *perf_mmap__read(struct perf_mmap *map,
  22. u64 *startp, u64 end)
  23. {
  24. unsigned char *data = map->base + page_size;
  25. union perf_event *event = NULL;
  26. int diff = end - *startp;
  27. if (diff >= (int)sizeof(event->header)) {
  28. size_t size;
  29. event = (union perf_event *)&data[*startp & map->mask];
  30. size = event->header.size;
  31. if (size < sizeof(event->header) || diff < (int)size)
  32. return NULL;
  33. /*
  34. * Event straddles the mmap boundary -- header should always
  35. * be inside due to u64 alignment of output.
  36. */
  37. if ((*startp & map->mask) + size != ((*startp + size) & map->mask)) {
  38. unsigned int offset = *startp;
  39. unsigned int len = min(sizeof(*event), size), cpy;
  40. void *dst = map->event_copy;
  41. do {
  42. cpy = min(map->mask + 1 - (offset & map->mask), len);
  43. memcpy(dst, &data[offset & map->mask], cpy);
  44. offset += cpy;
  45. dst += cpy;
  46. len -= cpy;
  47. } while (len);
  48. event = (union perf_event *)map->event_copy;
  49. }
  50. *startp += size;
  51. }
  52. return event;
  53. }
  54. /*
  55. * Read event from ring buffer one by one.
  56. * Return one event for each call.
  57. *
  58. * Usage:
  59. * perf_mmap__read_init()
  60. * while(event = perf_mmap__read_event()) {
  61. * //process the event
  62. * perf_mmap__consume()
  63. * }
  64. * perf_mmap__read_done()
  65. */
  66. union perf_event *perf_mmap__read_event(struct perf_mmap *map)
  67. {
  68. union perf_event *event;
  69. /*
  70. * Check if event was unmapped due to a POLLHUP/POLLERR.
  71. */
  72. if (!refcount_read(&map->refcnt))
  73. return NULL;
  74. /* non-overwirte doesn't pause the ringbuffer */
  75. if (!map->overwrite)
  76. map->end = perf_mmap__read_head(map);
  77. event = perf_mmap__read(map, &map->start, map->end);
  78. if (!map->overwrite)
  79. map->prev = map->start;
  80. return event;
  81. }
  82. static bool perf_mmap__empty(struct perf_mmap *map)
  83. {
  84. return perf_mmap__read_head(map) == map->prev && !map->auxtrace_mmap.base;
  85. }
  86. void perf_mmap__get(struct perf_mmap *map)
  87. {
  88. refcount_inc(&map->refcnt);
  89. }
  90. void perf_mmap__put(struct perf_mmap *map)
  91. {
  92. BUG_ON(map->base && refcount_read(&map->refcnt) == 0);
  93. if (refcount_dec_and_test(&map->refcnt))
  94. perf_mmap__munmap(map);
  95. }
  96. void perf_mmap__consume(struct perf_mmap *map)
  97. {
  98. if (!map->overwrite) {
  99. u64 old = map->prev;
  100. perf_mmap__write_tail(map, old);
  101. }
  102. if (refcount_read(&map->refcnt) == 1 && perf_mmap__empty(map))
  103. perf_mmap__put(map);
  104. }
  105. int __weak auxtrace_mmap__mmap(struct auxtrace_mmap *mm __maybe_unused,
  106. struct auxtrace_mmap_params *mp __maybe_unused,
  107. void *userpg __maybe_unused,
  108. int fd __maybe_unused)
  109. {
  110. return 0;
  111. }
  112. void __weak auxtrace_mmap__munmap(struct auxtrace_mmap *mm __maybe_unused)
  113. {
  114. }
  115. void __weak auxtrace_mmap_params__init(struct auxtrace_mmap_params *mp __maybe_unused,
  116. off_t auxtrace_offset __maybe_unused,
  117. unsigned int auxtrace_pages __maybe_unused,
  118. bool auxtrace_overwrite __maybe_unused)
  119. {
  120. }
  121. void __weak auxtrace_mmap_params__set_idx(struct auxtrace_mmap_params *mp __maybe_unused,
  122. struct perf_evlist *evlist __maybe_unused,
  123. int idx __maybe_unused,
  124. bool per_cpu __maybe_unused)
  125. {
  126. }
  127. void perf_mmap__munmap(struct perf_mmap *map)
  128. {
  129. if (map->base != NULL) {
  130. munmap(map->base, perf_mmap__mmap_len(map));
  131. map->base = NULL;
  132. map->fd = -1;
  133. refcount_set(&map->refcnt, 0);
  134. }
  135. auxtrace_mmap__munmap(&map->auxtrace_mmap);
  136. }
  137. int perf_mmap__mmap(struct perf_mmap *map, struct mmap_params *mp, int fd, int cpu)
  138. {
  139. /*
  140. * The last one will be done at perf_mmap__consume(), so that we
  141. * make sure we don't prevent tools from consuming every last event in
  142. * the ring buffer.
  143. *
  144. * I.e. we can get the POLLHUP meaning that the fd doesn't exist
  145. * anymore, but the last events for it are still in the ring buffer,
  146. * waiting to be consumed.
  147. *
  148. * Tools can chose to ignore this at their own discretion, but the
  149. * evlist layer can't just drop it when filtering events in
  150. * perf_evlist__filter_pollfd().
  151. */
  152. refcount_set(&map->refcnt, 2);
  153. map->prev = 0;
  154. map->mask = mp->mask;
  155. map->base = mmap(NULL, perf_mmap__mmap_len(map), mp->prot,
  156. MAP_SHARED, fd, 0);
  157. if (map->base == MAP_FAILED) {
  158. pr_debug2("failed to mmap perf event ring buffer, error %d\n",
  159. errno);
  160. map->base = NULL;
  161. return -1;
  162. }
  163. map->fd = fd;
  164. map->cpu = cpu;
  165. if (auxtrace_mmap__mmap(&map->auxtrace_mmap,
  166. &mp->auxtrace_mp, map->base, fd))
  167. return -1;
  168. return 0;
  169. }
  170. static int overwrite_rb_find_range(void *buf, int mask, u64 *start, u64 *end)
  171. {
  172. struct perf_event_header *pheader;
  173. u64 evt_head = *start;
  174. int size = mask + 1;
  175. pr_debug2("%s: buf=%p, start=%"PRIx64"\n", __func__, buf, *start);
  176. pheader = (struct perf_event_header *)(buf + (*start & mask));
  177. while (true) {
  178. if (evt_head - *start >= (unsigned int)size) {
  179. pr_debug("Finished reading overwrite ring buffer: rewind\n");
  180. if (evt_head - *start > (unsigned int)size)
  181. evt_head -= pheader->size;
  182. *end = evt_head;
  183. return 0;
  184. }
  185. pheader = (struct perf_event_header *)(buf + (evt_head & mask));
  186. if (pheader->size == 0) {
  187. pr_debug("Finished reading overwrite ring buffer: get start\n");
  188. *end = evt_head;
  189. return 0;
  190. }
  191. evt_head += pheader->size;
  192. pr_debug3("move evt_head: %"PRIx64"\n", evt_head);
  193. }
  194. WARN_ONCE(1, "Shouldn't get here\n");
  195. return -1;
  196. }
  197. /*
  198. * Report the start and end of the available data in ringbuffer
  199. */
  200. static int __perf_mmap__read_init(struct perf_mmap *md)
  201. {
  202. u64 head = perf_mmap__read_head(md);
  203. u64 old = md->prev;
  204. unsigned char *data = md->base + page_size;
  205. unsigned long size;
  206. md->start = md->overwrite ? head : old;
  207. md->end = md->overwrite ? old : head;
  208. if (md->start == md->end)
  209. return -EAGAIN;
  210. size = md->end - md->start;
  211. if (size > (unsigned long)(md->mask) + 1) {
  212. if (!md->overwrite) {
  213. WARN_ONCE(1, "failed to keep up with mmap data. (warn only once)\n");
  214. md->prev = head;
  215. perf_mmap__consume(md);
  216. return -EAGAIN;
  217. }
  218. /*
  219. * Backward ring buffer is full. We still have a chance to read
  220. * most of data from it.
  221. */
  222. if (overwrite_rb_find_range(data, md->mask, &md->start, &md->end))
  223. return -EINVAL;
  224. }
  225. return 0;
  226. }
  227. int perf_mmap__read_init(struct perf_mmap *map)
  228. {
  229. /*
  230. * Check if event was unmapped due to a POLLHUP/POLLERR.
  231. */
  232. if (!refcount_read(&map->refcnt))
  233. return -ENOENT;
  234. return __perf_mmap__read_init(map);
  235. }
  236. int perf_mmap__push(struct perf_mmap *md, void *to,
  237. int push(void *to, void *buf, size_t size))
  238. {
  239. u64 head = perf_mmap__read_head(md);
  240. unsigned char *data = md->base + page_size;
  241. unsigned long size;
  242. void *buf;
  243. int rc = 0;
  244. rc = perf_mmap__read_init(md);
  245. if (rc < 0)
  246. return (rc == -EAGAIN) ? 0 : -1;
  247. size = md->end - md->start;
  248. if ((md->start & md->mask) + size != (md->end & md->mask)) {
  249. buf = &data[md->start & md->mask];
  250. size = md->mask + 1 - (md->start & md->mask);
  251. md->start += size;
  252. if (push(to, buf, size) < 0) {
  253. rc = -1;
  254. goto out;
  255. }
  256. }
  257. buf = &data[md->start & md->mask];
  258. size = md->end - md->start;
  259. md->start += size;
  260. if (push(to, buf, size) < 0) {
  261. rc = -1;
  262. goto out;
  263. }
  264. md->prev = head;
  265. perf_mmap__consume(md);
  266. out:
  267. return rc;
  268. }
  269. /*
  270. * Mandatory for overwrite mode
  271. * The direction of overwrite mode is backward.
  272. * The last perf_mmap__read() will set tail to map->prev.
  273. * Need to correct the map->prev to head which is the end of next read.
  274. */
  275. void perf_mmap__read_done(struct perf_mmap *map)
  276. {
  277. /*
  278. * Check if event was unmapped due to a POLLHUP/POLLERR.
  279. */
  280. if (!refcount_read(&map->refcnt))
  281. return;
  282. map->prev = perf_mmap__read_head(map);
  283. }