aut-csc
/
dependency-graphs


			
				
					
						
						
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454
							% Copyright (C) 2017 Koz Ross <koz.ross@retro-freedom.nz>

% This work is licensed under the Creative Commons Attribution-ShareAlike 4.0 
% International License. To view a copy of this license, visit 
% http://creativecommons.org/licenses/by-sa/4.0/ or send a letter to Creative 
% Commons, PO Box 1866, Mountain View, CA 94042, USA.

\documentclass{beamer}

\usepackage[utf8]{inputenc}
\usecolortheme{seahorse}
\usefonttheme{professionalfonts}
\usepackage{fontspec}
\setmainfont{DejaVu Sans}
\setbeamertemplate{navigation symbols}{}
\usepackage{caption}
\usepackage[font=itshape, begintext=``, endtext='']{quoting}
\usepackage{fancyvrb}
\usepackage{color}
\usepackage[noend]{algpseudocode}
\usepackage{amsmath}

\newtheorem{observation}{Observation}

\title{Dependency graphs}
\subtitle{Or: why scheduling is {\em hard\/}}
\titlegraphic{\includegraphics[scale=0.8]{img/logo.pdf}}
\author{Koz Ross}
\date{19th October, 2017}

\begin{document}

\begin{frame}[c]
  \titlepage{}
\end{frame}

\begin{frame}[c]
  \frametitle{Outline}
  \tableofcontents
\end{frame}

\section{Introduction}

\begin{frame}[c]
  \frametitle{A simple recipe: tomato scrambled eggs}
  \pause{}
  \begin{enumerate}
    \item Chop tomatoes into small pieces.
    \item Break the eggs into a bowl.
    \item Beat the eggs.
    \item Mix the tomatoes into the eggs.
    \item Heat up a frying pan to medium heat.
    \item Dump the egg-and-tomato mixture into the frying pan.
    \item Cook the mixture until desired consistency.
    \item Season with salt and pepper.
  \end{enumerate}
  \pause{}

  \begin{itemize}
    \item Some tasks can be done simultaneously (e.g.\@ $1$ and
      $3$)\pause{}
    \item Other tasks {\em must\/} be done in a certain order (e.g.\@ $2$ must
      happen before $3$)\pause{}
    \item These together determine how (and how fast!) we can finish all the tasks
  \end{itemize}
\end{frame}

\begin{frame}[c]
  \frametitle{Why does it matter?}

  \begin{itemize}
    \item Knowing how many tasks (and which ones) can be done simultaneously
      helps us finish faster\pause{}
    \item Knowing how many additional {\em processes\/} we can benefit from
      (and how long they'll spend idle) lets us use our resources more
      efficiently\pause{}
    \item Finding the {\em critical path\/} gives us a hard bound on how quickly
      we can finish at all\pause{}
    \item {\em Especially\/} important for computers (parallel processing is how
      we get all of our speed gains these days)\pause{}
    \item A special (and limited) case of {\em scheduling\/} (this acts as a
      `difficulty floor' for the more general version)
  \end{itemize}
\end{frame}

\section{Preliminaries}

\begin{frame}[c]
  \frametitle{The basics}

  Let $\mathbb{N} = \{0, 1, 2, \ldots\}$ be the set of {\em natural numbers}.
  We use $\mathbb{N}_k = \{ x \in \mathbb{N} \mid x < k\}$ for $k \in
  \mathbb{N}$.\pause{} For example:

  \begin{itemize}
    \item $\mathbb{N}_0 = \{\}$ (there are no natural numbers less than
      $0$)\pause{}
    \item $\mathbb{N}_1 = \{0\}$\pause{}
    \item $\mathbb{N}_5 = \{0, 1, 2, 3, 4\}$\pause{}
  \end{itemize}

  \begin{definition}
    Let $A$ be a set. The {\em powerset of $A$\/} is the set \[ P(A) = \{ x \mid x
    \subseteq A\}\]
  \end{definition}\pause{}

  For example, $P(\mathbb{N}_3) = \{\{\}, \{0\}, \{1\}, \{2\}, \{0,
  1\}, \{0, 2\}, \{1, 2\}, \{0, 1, 2\}\}$.
\end{frame}

\begin{frame}[c]
  \frametitle{Task list}

  \begin{definition}
    A {\em $k$-task list\/} is a function $T_k : \mathbb{N}_k \rightarrow
    P(\mathbb{N}_k)$. We call each $t \in \mathbb{N}_k$ a {\em task of $T_k$}.
  \end{definition}\pause{}

  Essentially, we number all of the tasks sequentially, and associate them with
  those tasks that need to be done immediately before them.\pause{} For example, 
  if our tasks were:

  \begin{itemize}
    \item Get up (numbered $0$)
    \item Take a dump (numbered $1$)
    \item Brush teeth (numbered $2$)
  \end{itemize}\pause{}

  A possible $3$-task list for that would be $\{(0, \{\}), (1,
  \{0\}), (2, \{0\})\}$.
\end{frame}

\begin{frame}[c]
  \frametitle{Dependencies}
  \pause{}
  Throughout, let $T_k$ be a $k$-task list, and let $x, y \in \mathbb{N}_k$.\pause{}

  \begin{definition}
    {\em $x$ is a (direct) dependency of $y$ in $T_k$\/} if $x \in T_k(y)$.
  \end{definition}\pause{}

  \begin{definition}
    Let $d_0, d_1, \ldots, d_n \in \mathbb{N}_k$. $(d_0, d_1, \ldots, d_n)$ is a
    {\em dependency chain in $T_k$\/} if for any $0 \leq i < n$, $d_i$ is a
    dependency of $d_{i+1}$ in $T_k$.
  \end{definition}\pause{}

  \begin{definition}
    {\em $x$ is a transitive dependency of $y$ in $T_k$\/} if there exists a
    dependency chain in $T_k$ whose first element is $x$ and whose last element
    is $y$.
  \end{definition}\pause{}

  \begin{definition}
    We have a {\em cyclic dependency between $x$ and $y$ in $T_k$\/} if $x$ is a
    transitive dependency of $y$ and $y$ is a transitive dependency of $x$.
  \end{definition}\pause{}

  Where clear, we will drop references to $T_k$ from now on.
\end{frame}

\begin{frame}[c]
  \frametitle{More on task lists}

  \begin{observation}
    A $k$-task list with a cyclic dependency is impossible to complete.
  \end{observation}\pause{}
  
  \medskip

  Let $T_k$ be a $k$-task list without any cyclic dependencies.\pause{} 

  \begin{definition}
    The {\em minimum parallel width of $T_k$\/} is
    the size of the smallest set of tasks where no two members are connected by
    a dependency chain.\pause{} We define the {\em maximum parallel width of $T_k$\/}
    analogously.
  \end{definition}\pause{}

  \begin{definition}
    The {\em critical path of $T_k$\/} is the longest dependency chain.
  \end{definition}
\end{frame}

\begin{frame}[c]
  \frametitle{What we want to know about task lists}

  \pause{}
  \begin{enumerate}
    \item Are there any cyclic dependencies?\pause{}
    \item What are the minimum and maximum parallel widths?\pause{}
    \item What is the length of the critical path?\pause{}
    \item How can we compute all of these things?\pause{}
    \item How efficiently can we compute all of these things?
  \end{enumerate}
\end{frame}

\section{Dependency graphs}

\begin{frame}[c,fragile]
  \frametitle{Graphs}

  \begin{definition}
    A {\em (directed) graph\/} $G = (V, E)$ is a tuple, where $V$ is a set of {\em
    vertices}, and $E \subseteq V \times V$ is a set of {\em edges}.
  \end{definition}\pause{}

  Our graphs will always have $V = \mathbb{N}_k$.\pause{}

  \begin{center}
    \includegraphics[scale=0.5]{img/graph.pdf}
  \end{center}

  This is a visual representation of the graph $(\mathbb{N}_4, \{(0,1), (0,2),
  (1,2)\})$.
\end{frame}

\begin{frame}[c]
  \frametitle{Paths and cycles}
  
  \begin{definition}
    A {\em path\/} in a graph $G = (V, E)$ is a tuple $(p_0, p_1, \ldots, p_n)$,
    such that:
    
    \begin{itemize}
      \item Each $p_i \in V$; and
      \item For any $0 \leq i < n$, $(p_i, p_{i+1}) \in E$.
    \end{itemize}
  \end{definition}\pause{}

  \begin{definition}
    A {\em cycle\/} is a path whose first and last element are equal.\pause{} We
    say a graph is {\em cyclic\/} if it contains any cycles, and {\em acyclic\/}
    otherwise.
  \end{definition}
\end{frame}

\begin{frame}
  \frametitle{Sources and neighbourhoods}
  
  \begin{definition}
    For any edge $e = (a, b)$, we call $a$ the {\em head of $e$}, and $b$ the
    {\em tail of $e$}.
  \end{definition}\pause{}
  
  \begin{definition}
    A vertex $u$ is a {\em source\/} if it is not the tail of any edge.\pause{} A
    vertex $u$ is a {\em sink\/} if it is not the head of any edge.
  \end{definition}\pause{}

  \begin{definition}
    Let $G = (V, E)$ be a graph and $u \in V$. The {\em neighbourhood of
    $u$\/} $N(u) = \{ v \in V \mid (u, v) \in E\}$.
  \end{definition}
\end{frame}

\begin{frame}
  \frametitle{Connecting $k$-task lists and graphs}

  Let $T_k$ be a $k$-task list. We can represent it as a graph, which we call
  the {\em dependency graph of $T_k$}.\pause{} More specifically:\pause{}

  \begin{definition}
    The {\em dependency graph of $T_k$\/} is $D(T_k) = (V, E)$, such
    that:

    \begin{itemize}
      \item $V = \mathbb{N}_k$; and
      \item $E = \{(u, v) \in \mathbb{N}_k^2 \mid u \in T_k(v)\}$
    \end{itemize}
  \end{definition}\pause{}

  Any $k$-task list has a unique dependency graph.\pause{} This means that we
  can solve problems for $k$-task lists by solving (related) problems on their
  dependency graphs.
\end{frame}

\begin{frame}[c, fragile]
  \frametitle{Representing dependency graphs}

  To compute with dependency graphs, we need a way to store them on the
  computer:\pause{}

  \begin{definition}
    Let $G = (\mathbb{N}_k,E)$ be a graph. The {\em adjacency matrix of $G$\/}
    is a $k \times k$ matrix $M(G)$, such that
    \[
      M(G)[u][v] = \begin{cases}
                    1 & \text{ if } (u,v) \in E\\
                    0 & \text{ otherwise }\\
                    \end{cases}
    \]
  \end{definition}\pause{}
  Consider the previous example graph and its adjacency matrix:\pause{}
  \begin{columns}[c]
    \column{0.5\textwidth}
    \begin{center}
      \includegraphics[scale=0.35]{img/graph.pdf}
    \end{center}
    \column{0.5\textwidth}
    \[
      \begin{bmatrix}
        0 & 1 & 1 & 0\\
        0 & 0 & 1 & 0\\
        0 & 0 & 0 & 0\\
        0 & 0 & 0 & 0\\
      \end{bmatrix}
    \]
  \end{columns}
\end{frame}

\section{Working with dependency graphs}

\begin{frame}[c]
  \frametitle{Testing for cyclic dependencies}

  \begin{observation}
    If a $k$-task set has a cyclic dependency, its dependency graph will be
    cyclic.
  \end{observation}\pause{}

  \begin{lemma}
    If a graph has no source vertices, then it is cyclic.
  \end{lemma}\pause{}

  Thus, we need a way of determining what our source vertices are.
\end{frame}

\begin{frame}[c]
  \frametitle{Finding source vertices}
  We can check what the source vertices of a graph $G$ are as follows:\pause{}

  \begin{enumerate}
    \item Initially, assume all vertices are sources.\pause{}
    \item For each row of $M(G)$ and each column, if the $i$th column's entry in
      that row is $1$, then mark $i$ as not being a source.\pause{}
    \item Return the set of all unmarked vertices.
  \end{enumerate}\pause{}

  If $G$ has $n$ vertices, this procedure takes $O(n^2)$ time, as we potentially
  have to inspect every cell of the adjacency matrix.\pause{} 
 
  \medskip

  However, this is not a complete test --- a graph with source vertices may still 
  be cyclic!
\end{frame}

\begin{frame}[c]
  \frametitle{A more thorough cycle test}

  \begin{observation}
    \begin{itemize}
      \item A source cannot be part of any cycle
      \item A cycle must eventually re-visit at least one node
    \end{itemize}
  \end{observation}\pause{}

  We can use this to design a more thorough method:\pause{}

  \begin{enumerate}
    \item Let $S = \emptyset$ and $C$ be the set of sources\pause{}
    \item Repeat until $C$ is empty:\pause{}
      \begin{enumerate}
        \item Let $C^{\prime} = \emptyset$
        \item For every $u \in C$:\pause{}
          \begin{enumerate}
            \item Add $u$ to $S$\pause{}
            \item If any $v \in N(u)$ is in $S$, declare we've found a cycle and
              stop\pause{}
            \item Otherwise, add every $v \in N(u)$ to $C^{\prime}$\pause{}
          \end{enumerate}
        \item Set $C = C^{\prime}$
      \end{enumerate}
    \item Declare that there are no cycles and stop.
  \end{enumerate}

  This process is also $O(n^2)$ --- we have to make as many checks as there are
  edges, and there could be as many as $n^2$.
\end{frame}

\begin{frame}
  \frametitle{Finding parallel widths}

  When we search for cycles, at the start of each `outer' loop body, $C$ will
  contain a set of vertices which are not connected by any dependency
  chain.\pause{} Thus, the algorithm `slices' the dependency graph into
  parallelizable chunks.\pause{} The size of the biggest chunk will be the
  maximum parallel width; similarly, the size of the smallest will be the
  minimum parallel width.\pause{}

  \medskip

  We can determine both by simply setting each of these values as equal to the
  size of the set of sources, then updating it as we go through.\pause{} If we
  find a cycle, these are both $0$ (as we can't do anything).\pause{}

  \medskip

  Thus, we can find both the minimum and maximum parallel width in $O(n^2)$ time 
  as well.
\end{frame}

\begin{frame}
  \frametitle{Finding the length of the critical path}

  The number of times that the outer loop runs must be the length of the
  critical path --- otherwise, we would have to do at least one more iteration
  before we exhaust every vertex.\pause{} Thus, we can just count the
  iterations, and report them at the end.\pause{}

  \medskip

  If we have a cycle, the length of the critical path is $\infty$ (as we can't
  ever actually finish the tasks).\pause{}

  \medskip

  Thus, we can find the length of the critical path in $O(n^2)$ time as
  well!\pause{} We can potentially combine all of these together to avoid
  traversing the graph more than once.
\end{frame}

\begin{frame}
  \frametitle{What does this all mean?}

  \begin{itemize}
    \item Solving all of these problems requires quadratic time {\em and\/}
      space, which means that it's inherently not easy\pause{}
    \item It is possible to be (asymptotically) more space-efficient and
      (practically) more time-efficient, but doesn't happen often\pause{}
    \item Shows that more general scheduling (which has {\em additional\/}
      requirements) is going to be {\em at least\/} $O(n^2)$ (and is actually
      much worse)\pause{}
    \item Determining the critical path itself is harder\pause{}
  \end{itemize}
  
  \medskip

  However, despite this, we can still use these algorithms in many cases, as
  long as the number of tasks we're dealing with isn't {\em too\/} large.
\end{frame}

\section{Questions}

\begin{frame}[fragile, c]
  \frametitle{Questions?}
  \begin{center}
  \includegraphics[scale=0.27]{img/questions.pdf}
\end{center}
\end{frame}

\end{document}