talk.tex 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454
  1. % Copyright (C) 2017 Koz Ross <koz.ross@retro-freedom.nz>
  2. % This work is licensed under the Creative Commons Attribution-ShareAlike 4.0
  3. % International License. To view a copy of this license, visit
  4. % http://creativecommons.org/licenses/by-sa/4.0/ or send a letter to Creative
  5. % Commons, PO Box 1866, Mountain View, CA 94042, USA.
  6. \documentclass{beamer}
  7. \usepackage[utf8]{inputenc}
  8. \usecolortheme{seahorse}
  9. \usefonttheme{professionalfonts}
  10. \usepackage{fontspec}
  11. \setmainfont{DejaVu Sans}
  12. \setbeamertemplate{navigation symbols}{}
  13. \usepackage{caption}
  14. \usepackage[font=itshape, begintext=``, endtext='']{quoting}
  15. \usepackage{fancyvrb}
  16. \usepackage{color}
  17. \usepackage[noend]{algpseudocode}
  18. \usepackage{amsmath}
  19. \newtheorem{observation}{Observation}
  20. \title{Dependency graphs}
  21. \subtitle{Or: why scheduling is {\em hard\/}}
  22. \titlegraphic{\includegraphics[scale=0.8]{img/logo.pdf}}
  23. \author{Koz Ross}
  24. \date{19th October, 2017}
  25. \begin{document}
  26. \begin{frame}[c]
  27. \titlepage{}
  28. \end{frame}
  29. \begin{frame}[c]
  30. \frametitle{Outline}
  31. \tableofcontents
  32. \end{frame}
  33. \section{Introduction}
  34. \begin{frame}[c]
  35. \frametitle{A simple recipe: tomato scrambled eggs}
  36. \pause{}
  37. \begin{enumerate}
  38. \item Chop tomatoes into small pieces.
  39. \item Break the eggs into a bowl.
  40. \item Beat the eggs.
  41. \item Mix the tomatoes into the eggs.
  42. \item Heat up a frying pan to medium heat.
  43. \item Dump the egg-and-tomato mixture into the frying pan.
  44. \item Cook the mixture until desired consistency.
  45. \item Season with salt and pepper.
  46. \end{enumerate}
  47. \pause{}
  48. \begin{itemize}
  49. \item Some tasks can be done simultaneously (e.g.\@ $1$ and
  50. $3$)\pause{}
  51. \item Other tasks {\em must\/} be done in a certain order (e.g.\@ $2$ must
  52. happen before $3$)\pause{}
  53. \item These together determine how (and how fast!) we can finish all the tasks
  54. \end{itemize}
  55. \end{frame}
  56. \begin{frame}[c]
  57. \frametitle{Why does it matter?}
  58. \begin{itemize}
  59. \item Knowing how many tasks (and which ones) can be done simultaneously
  60. helps us finish faster\pause{}
  61. \item Knowing how many additional {\em processes\/} we can benefit from
  62. (and how long they'll spend idle) lets us use our resources more
  63. efficiently\pause{}
  64. \item Finding the {\em critical path\/} gives us a hard bound on how quickly
  65. we can finish at all\pause{}
  66. \item {\em Especially\/} important for computers (parallel processing is how
  67. we get all of our speed gains these days)\pause{}
  68. \item A special (and limited) case of {\em scheduling\/} (this acts as a
  69. `difficulty floor' for the more general version)
  70. \end{itemize}
  71. \end{frame}
  72. \section{Preliminaries}
  73. \begin{frame}[c]
  74. \frametitle{The basics}
  75. Let $\mathbb{N} = \{0, 1, 2, \ldots\}$ be the set of {\em natural numbers}.
  76. We use $\mathbb{N}_k = \{ x \in \mathbb{N} \mid x < k\}$ for $k \in
  77. \mathbb{N}$.\pause{} For example:
  78. \begin{itemize}
  79. \item $\mathbb{N}_0 = \{\}$ (there are no natural numbers less than
  80. $0$)\pause{}
  81. \item $\mathbb{N}_1 = \{0\}$\pause{}
  82. \item $\mathbb{N}_5 = \{0, 1, 2, 3, 4\}$\pause{}
  83. \end{itemize}
  84. \begin{definition}
  85. Let $A$ be a set. The {\em powerset of $A$\/} is the set \[ P(A) = \{ x \mid x
  86. \subseteq A\}\]
  87. \end{definition}\pause{}
  88. For example, $P(\mathbb{N}_3) = \{\{\}, \{0\}, \{1\}, \{2\}, \{0,
  89. 1\}, \{0, 2\}, \{1, 2\}, \{0, 1, 2\}\}$.
  90. \end{frame}
  91. \begin{frame}[c]
  92. \frametitle{Task list}
  93. \begin{definition}
  94. A {\em $k$-task list\/} is a function $T_k : \mathbb{N}_k \rightarrow
  95. P(\mathbb{N}_k)$. We call each $t \in \mathbb{N}_k$ a {\em task of $T_k$}.
  96. \end{definition}\pause{}
  97. Essentially, we number all of the tasks sequentially, and associate them with
  98. those tasks that need to be done immediately before them.\pause{} For example,
  99. if our tasks were:
  100. \begin{itemize}
  101. \item Get up (numbered $0$)
  102. \item Take a dump (numbered $1$)
  103. \item Brush teeth (numbered $2$)
  104. \end{itemize}\pause{}
  105. A possible $3$-task list for that would be $\{(0, \{\}), (1,
  106. \{0\}), (2, \{0\})\}$.
  107. \end{frame}
  108. \begin{frame}[c]
  109. \frametitle{Dependencies}
  110. \pause{}
  111. Throughout, let $T_k$ be a $k$-task list, and let $x, y \in \mathbb{N}_k$.\pause{}
  112. \begin{definition}
  113. {\em $x$ is a (direct) dependency of $y$ in $T_k$\/} if $x \in T_k(y)$.
  114. \end{definition}\pause{}
  115. \begin{definition}
  116. Let $d_0, d_1, \ldots, d_n \in \mathbb{N}_k$. $(d_0, d_1, \ldots, d_n)$ is a
  117. {\em dependency chain in $T_k$\/} if for any $0 \leq i < n$, $d_i$ is a
  118. dependency of $d_{i+1}$ in $T_k$.
  119. \end{definition}\pause{}
  120. \begin{definition}
  121. {\em $x$ is a transitive dependency of $y$ in $T_k$\/} if there exists a
  122. dependency chain in $T_k$ whose first element is $x$ and whose last element
  123. is $y$.
  124. \end{definition}\pause{}
  125. \begin{definition}
  126. We have a {\em cyclic dependency between $x$ and $y$ in $T_k$\/} if $x$ is a
  127. transitive dependency of $y$ and $y$ is a transitive dependency of $x$.
  128. \end{definition}\pause{}
  129. Where clear, we will drop references to $T_k$ from now on.
  130. \end{frame}
  131. \begin{frame}[c]
  132. \frametitle{More on task lists}
  133. \begin{observation}
  134. A $k$-task list with a cyclic dependency is impossible to complete.
  135. \end{observation}\pause{}
  136. \medskip
  137. Let $T_k$ be a $k$-task list without any cyclic dependencies.\pause{}
  138. \begin{definition}
  139. The {\em minimum parallel width of $T_k$\/} is
  140. the size of the smallest set of tasks where no two members are connected by
  141. a dependency chain.\pause{} We define the {\em maximum parallel width of $T_k$\/}
  142. analogously.
  143. \end{definition}\pause{}
  144. \begin{definition}
  145. The {\em critical path of $T_k$\/} is the longest dependency chain.
  146. \end{definition}
  147. \end{frame}
  148. \begin{frame}[c]
  149. \frametitle{What we want to know about task lists}
  150. \pause{}
  151. \begin{enumerate}
  152. \item Are there any cyclic dependencies?\pause{}
  153. \item What are the minimum and maximum parallel widths?\pause{}
  154. \item What is the length of the critical path?\pause{}
  155. \item How can we compute all of these things?\pause{}
  156. \item How efficiently can we compute all of these things?
  157. \end{enumerate}
  158. \end{frame}
  159. \section{Dependency graphs}
  160. \begin{frame}[c,fragile]
  161. \frametitle{Graphs}
  162. \begin{definition}
  163. A {\em (directed) graph\/} $G = (V, E)$ is a tuple, where $V$ is a set of {\em
  164. vertices}, and $E \subseteq V \times V$ is a set of {\em edges}.
  165. \end{definition}\pause{}
  166. Our graphs will always have $V = \mathbb{N}_k$.\pause{}
  167. \begin{center}
  168. \includegraphics[scale=0.5]{img/graph.pdf}
  169. \end{center}
  170. This is a visual representation of the graph $(\mathbb{N}_4, \{(0,1), (0,2),
  171. (1,2)\})$.
  172. \end{frame}
  173. \begin{frame}[c]
  174. \frametitle{Paths and cycles}
  175. \begin{definition}
  176. A {\em path\/} in a graph $G = (V, E)$ is a tuple $(p_0, p_1, \ldots, p_n)$,
  177. such that:
  178. \begin{itemize}
  179. \item Each $p_i \in V$; and
  180. \item For any $0 \leq i < n$, $(p_i, p_{i+1}) \in E$.
  181. \end{itemize}
  182. \end{definition}\pause{}
  183. \begin{definition}
  184. A {\em cycle\/} is a path whose first and last element are equal.\pause{} We
  185. say a graph is {\em cyclic\/} if it contains any cycles, and {\em acyclic\/}
  186. otherwise.
  187. \end{definition}
  188. \end{frame}
  189. \begin{frame}
  190. \frametitle{Sources and neighbourhoods}
  191. \begin{definition}
  192. For any edge $e = (a, b)$, we call $a$ the {\em head of $e$}, and $b$ the
  193. {\em tail of $e$}.
  194. \end{definition}\pause{}
  195. \begin{definition}
  196. A vertex $u$ is a {\em source\/} if it is not the tail of any edge.\pause{} A
  197. vertex $u$ is a {\em sink\/} if it is not the head of any edge.
  198. \end{definition}\pause{}
  199. \begin{definition}
  200. Let $G = (V, E)$ be a graph and $u \in V$. The {\em neighbourhood of
  201. $u$\/} $N(u) = \{ v \in V \mid (u, v) \in E\}$.
  202. \end{definition}
  203. \end{frame}
  204. \begin{frame}
  205. \frametitle{Connecting $k$-task lists and graphs}
  206. Let $T_k$ be a $k$-task list. We can represent it as a graph, which we call
  207. the {\em dependency graph of $T_k$}.\pause{} More specifically:\pause{}
  208. \begin{definition}
  209. The {\em dependency graph of $T_k$\/} is $D(T_k) = (V, E)$, such
  210. that:
  211. \begin{itemize}
  212. \item $V = \mathbb{N}_k$; and
  213. \item $E = \{(u, v) \in \mathbb{N}_k^2 \mid u \in T_k(v)\}$
  214. \end{itemize}
  215. \end{definition}\pause{}
  216. Any $k$-task list has a unique dependency graph.\pause{} This means that we
  217. can solve problems for $k$-task lists by solving (related) problems on their
  218. dependency graphs.
  219. \end{frame}
  220. \begin{frame}[c, fragile]
  221. \frametitle{Representing dependency graphs}
  222. To compute with dependency graphs, we need a way to store them on the
  223. computer:\pause{}
  224. \begin{definition}
  225. Let $G = (\mathbb{N}_k,E)$ be a graph. The {\em adjacency matrix of $G$\/}
  226. is a $k \times k$ matrix $M(G)$, such that
  227. \[
  228. M(G)[u][v] = \begin{cases}
  229. 1 & \text{ if } (u,v) \in E\\
  230. 0 & \text{ otherwise }\\
  231. \end{cases}
  232. \]
  233. \end{definition}\pause{}
  234. Consider the previous example graph and its adjacency matrix:\pause{}
  235. \begin{columns}[c]
  236. \column{0.5\textwidth}
  237. \begin{center}
  238. \includegraphics[scale=0.35]{img/graph.pdf}
  239. \end{center}
  240. \column{0.5\textwidth}
  241. \[
  242. \begin{bmatrix}
  243. 0 & 1 & 1 & 0\\
  244. 0 & 0 & 1 & 0\\
  245. 0 & 0 & 0 & 0\\
  246. 0 & 0 & 0 & 0\\
  247. \end{bmatrix}
  248. \]
  249. \end{columns}
  250. \end{frame}
  251. \section{Working with dependency graphs}
  252. \begin{frame}[c]
  253. \frametitle{Testing for cyclic dependencies}
  254. \begin{observation}
  255. If a $k$-task set has a cyclic dependency, its dependency graph will be
  256. cyclic.
  257. \end{observation}\pause{}
  258. \begin{lemma}
  259. If a graph has no source vertices, then it is cyclic.
  260. \end{lemma}\pause{}
  261. Thus, we need a way of determining what our source vertices are.
  262. \end{frame}
  263. \begin{frame}[c]
  264. \frametitle{Finding source vertices}
  265. We can check what the source vertices of a graph $G$ are as follows:\pause{}
  266. \begin{enumerate}
  267. \item Initially, assume all vertices are sources.\pause{}
  268. \item For each row of $M(G)$ and each column, if the $i$th column's entry in
  269. that row is $1$, then mark $i$ as not being a source.\pause{}
  270. \item Return the set of all unmarked vertices.
  271. \end{enumerate}\pause{}
  272. If $G$ has $n$ vertices, this procedure takes $O(n^2)$ time, as we potentially
  273. have to inspect every cell of the adjacency matrix.\pause{}
  274. \medskip
  275. However, this is not a complete test --- a graph with source vertices may still
  276. be cyclic!
  277. \end{frame}
  278. \begin{frame}[c]
  279. \frametitle{A more thorough cycle test}
  280. \begin{observation}
  281. \begin{itemize}
  282. \item A source cannot be part of any cycle
  283. \item A cycle must eventually re-visit at least one node
  284. \end{itemize}
  285. \end{observation}\pause{}
  286. We can use this to design a more thorough method:\pause{}
  287. \begin{enumerate}
  288. \item Let $S = \emptyset$ and $C$ be the set of sources\pause{}
  289. \item Repeat until $C$ is empty:\pause{}
  290. \begin{enumerate}
  291. \item Let $C^{\prime} = \emptyset$
  292. \item For every $u \in C$:\pause{}
  293. \begin{enumerate}
  294. \item Add $u$ to $S$\pause{}
  295. \item If any $v \in N(u)$ is in $S$, declare we've found a cycle and
  296. stop\pause{}
  297. \item Otherwise, add every $v \in N(u)$ to $C^{\prime}$\pause{}
  298. \end{enumerate}
  299. \item Set $C = C^{\prime}$
  300. \end{enumerate}
  301. \item Declare that there are no cycles and stop.
  302. \end{enumerate}
  303. This process is also $O(n^2)$ --- we have to make as many checks as there are
  304. edges, and there could be as many as $n^2$.
  305. \end{frame}
  306. \begin{frame}
  307. \frametitle{Finding parallel widths}
  308. When we search for cycles, at the start of each `outer' loop body, $C$ will
  309. contain a set of vertices which are not connected by any dependency
  310. chain.\pause{} Thus, the algorithm `slices' the dependency graph into
  311. parallelizable chunks.\pause{} The size of the biggest chunk will be the
  312. maximum parallel width; similarly, the size of the smallest will be the
  313. minimum parallel width.\pause{}
  314. \medskip
  315. We can determine both by simply setting each of these values as equal to the
  316. size of the set of sources, then updating it as we go through.\pause{} If we
  317. find a cycle, these are both $0$ (as we can't do anything).\pause{}
  318. \medskip
  319. Thus, we can find both the minimum and maximum parallel width in $O(n^2)$ time
  320. as well.
  321. \end{frame}
  322. \begin{frame}
  323. \frametitle{Finding the length of the critical path}
  324. The number of times that the outer loop runs must be the length of the
  325. critical path --- otherwise, we would have to do at least one more iteration
  326. before we exhaust every vertex.\pause{} Thus, we can just count the
  327. iterations, and report them at the end.\pause{}
  328. \medskip
  329. If we have a cycle, the length of the critical path is $\infty$ (as we can't
  330. ever actually finish the tasks).\pause{}
  331. \medskip
  332. Thus, we can find the length of the critical path in $O(n^2)$ time as
  333. well!\pause{} We can potentially combine all of these together to avoid
  334. traversing the graph more than once.
  335. \end{frame}
  336. \begin{frame}
  337. \frametitle{What does this all mean?}
  338. \begin{itemize}
  339. \item Solving all of these problems requires quadratic time {\em and\/}
  340. space, which means that it's inherently not easy\pause{}
  341. \item It is possible to be (asymptotically) more space-efficient and
  342. (practically) more time-efficient, but doesn't happen often\pause{}
  343. \item Shows that more general scheduling (which has {\em additional\/}
  344. requirements) is going to be {\em at least\/} $O(n^2)$ (and is actually
  345. much worse)\pause{}
  346. \item Determining the critical path itself is harder\pause{}
  347. \end{itemize}
  348. \medskip
  349. However, despite this, we can still use these algorithms in many cases, as
  350. long as the number of tasks we're dealing with isn't {\em too\/} large.
  351. \end{frame}
  352. \section{Questions}
  353. \begin{frame}[fragile, c]
  354. \frametitle{Questions?}
  355. \begin{center}
  356. \includegraphics[scale=0.27]{img/questions.pdf}
  357. \end{center}
  358. \end{frame}
  359. \end{document}