virtualization_tlb.c 4.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186
  1. /*
  2. In this exercise, you are to measure the size and cost of accessing a TLB.
  3. The idea is based on work by Saavedra-Barrera, who developed a simple but
  4. beautiful method to measure numerous aspects of cache hierarchies, all
  5. with a very simple user-level program. Read his work for more details.
  6. The basic idea is to access some number of pages within a large data structure
  7. (e.g., an array) and to time those accesses. For example, let’s say the TLB
  8. size of a machine happens to be 4 (which would be very small, but useful for
  9. the purposes of this discussion). If you write a program that touches 4 or fewer
  10. pages, each access should be a TLB hit, and thus relatively fast. However, once
  11. you touch 5 pages or more, repeatedly in a loop, each access will suddenly jum
  12. in cost, to that of a TLB miss.
  13. */
  14. #include <unistd.h>
  15. #include <stdio.h>
  16. #include <signal.h>
  17. #include <stdlib.h>
  18. #include <stdint.h>
  19. #include <stdbool.h>
  20. #include <time.h>
  21. #include <linux/perf_event.h>
  22. #include <linux/hw_breakpoint.h>
  23. #include <sys/syscall.h>
  24. #include <sys/types.h>
  25. #include <sys/wait.h>
  26. #include <sys/prctl.h>
  27. #define __GNU_SOURCE
  28. #define __USE_GNU
  29. #include <sched.h>
  30. static const int tested_cpu = 0;
  31. static const int group_fd_leader = -1;
  32. struct read_format
  33. {
  34. uint64_t value;
  35. uint64_t time_enabled;
  36. uint64_t time_running;
  37. uint64_t id;
  38. };
  39. struct cpu_process_monitor_conf
  40. {
  41. pid_t pid;
  42. int cpu;
  43. };
  44. struct cpu_process_monitor_conf
  45. get_conf_monitor_each_process_on_cpu(int cpu)
  46. {
  47. return (struct cpu_process_monitor_conf){-1, cpu};
  48. }
  49. struct cpu_process_monitor_conf
  50. get_conf_monitor_calling_process(void)
  51. {
  52. return (struct cpu_process_monitor_conf){0, -1};
  53. }
  54. struct perf_event_attr
  55. get_perf_event_attr_for_page_fault(void)
  56. {
  57. struct perf_event_attr attr = {0};
  58. attr.type = PERF_TYPE_SOFTWARE;
  59. attr.size = sizeof(struct perf_event_attr);
  60. attr.config = PERF_COUNT_SW_PAGE_FAULTS;
  61. // attr.config = PERF_COUNT_SW_CPU_CLOCK;
  62. attr.disabled = 0;
  63. attr.exclude_user = 0;
  64. attr.read_format = PERF_FORMAT_ID;
  65. return attr;
  66. }
  67. void
  68. measure(void)
  69. {
  70. struct cpu_process_monitor_conf conf =
  71. get_conf_monitor_calling_process();
  72. // get_conf_monitor_each_process_on_cpu(tested_cpu);
  73. int fd;
  74. struct perf_event_attr attr = get_perf_event_attr_for_page_fault();
  75. if (fd = syscall(SYS_perf_event_open,
  76. &attr,
  77. conf.pid, conf.cpu,
  78. group_fd_leader, 0) == -1)
  79. {
  80. perror("perf_event_open");
  81. exit(EXIT_FAILURE);
  82. }
  83. unsigned char buffer[sizeof(struct read_format)];
  84. if (read(fd, buffer, sizeof(struct read_format)) > 0)
  85. {
  86. puts("something inside");
  87. }
  88. close(fd);
  89. }
  90. struct pipe_t
  91. {
  92. int pipefd[2];
  93. bool valid;
  94. };
  95. #define on_error(valid, msg) \
  96. if (!valid) { \
  97. perror(msg); \
  98. exit(EXIT_FAILURE); } \
  99. static void
  100. sig_handler(int signum)
  101. {
  102. puts("child terminated because of parent");
  103. exit(EXIT_SUCCESS);
  104. }
  105. static void reactor(long page_size)
  106. {
  107. const int area_pages = 10;
  108. const size_t ss = page_size / sizeof(int) * area_pages;
  109. int *area = malloc(ss);
  110. for (int i = 0; i < ss; i += page_size / sizeof(int) / 2)
  111. {
  112. clock_t start, end;
  113. start = clock();
  114. area[i] = rand() % 100 + 20;
  115. end = clock();
  116. printf("clocks[%d]: %ld\n", i, end - start);
  117. }
  118. free(area);
  119. }
  120. int
  121. main(void)
  122. {
  123. pid_t pid = getpid();
  124. cpu_set_t cpu_set = {0};
  125. CPU_SET(tested_cpu, &cpu_set);
  126. sched_setaffinity(pid, sizeof cpu_set, &cpu_set);
  127. printf("current pid: %ld\n", pid);
  128. printf("on cpu: %d\n", sched_getcpu());
  129. long page_size = sysconf(_SC_PAGESIZE);
  130. printf("pagesize: %ld\n", page_size);
  131. struct pipe_t a_pipe = {0};
  132. if (pipe(a_pipe.pipefd) == 0)
  133. a_pipe.valid = true;
  134. on_error(a_pipe.valid, "pipe");
  135. pid_t fpid = fork();
  136. if (fpid == -1)
  137. {
  138. perror("fork");
  139. exit(EXIT_FAILURE);
  140. }
  141. if (fpid == 0)
  142. {
  143. signal(SIGTERM, sig_handler);
  144. prctl(PR_SET_PDEATHSIG, SIGTERM);
  145. close(a_pipe.pipefd[0]);
  146. write(a_pipe.pipefd[1], "hello", 5);
  147. close(a_pipe.pipefd[1]);
  148. measure();
  149. exit(EXIT_SUCCESS);
  150. }
  151. close(a_pipe.pipefd[1]);
  152. char buffer[10];
  153. ssize_t ss;
  154. if ((ss = read(a_pipe.pipefd[0], buffer, 10)) > 0)
  155. {
  156. buffer[ss] = '\0';
  157. puts(buffer);
  158. }
  159. close(a_pipe.pipefd[0]);
  160. reactor(page_size);
  161. puts("reactor finished");
  162. wait(NULL);
  163. return 0;
  164. }