datastructures.py 7.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218
  1. # Copyright 2013 The Distro Tracker Developers
  2. # See the COPYRIGHT file at the top-level directory of this distribution and
  3. # at http://deb.li/DTAuthors
  4. #
  5. # This file is part of Distro Tracker. It is subject to the license terms
  6. # in the LICENSE file found in the top-level directory of this
  7. # distribution and at http://deb.li/DTLicense. No part of Distro Tracker,
  8. # including this file, may be copied, modified, propagated, or distributed
  9. # except according to the terms contained in the LICENSE file.
  10. """Utility data structures for Distro Tracker."""
  11. from __future__ import unicode_literals
  12. from collections import deque
  13. from copy import deepcopy
  14. class InvalidDAGException(Exception):
  15. pass
  16. class DAG(object):
  17. """
  18. A class representing a Directed Acyclic Graph.
  19. Allows clients to build up a DAG where the nodes of the graph are any type
  20. of object which can be placed in a dictionary.
  21. """
  22. class Node(object):
  23. def __init__(self, id, original):
  24. self.id = id
  25. self.original = original
  26. def __hash__(self):
  27. return self.id.__hash__()
  28. def __eq__(self, other):
  29. return self.id == other.id
  30. def __init__(self):
  31. """
  32. Instantiates an empty DAG.
  33. """
  34. #: Maps original node objects to their internal representation
  35. self.nodes_map = {}
  36. #: Represents the graph structure of the DAG as an adjacency list
  37. self.graph = {}
  38. #: Holds the in-degree of each node to allow constant-time lookups
  39. #: instead of iterating through all nodes in the graph.
  40. self.in_degree = {}
  41. #: Represents the last id given to an inserted node.
  42. self._last_id = 0
  43. def _next_id(self):
  44. """
  45. A private helper method which returns the next ID which can be given to
  46. a node being inserted in the DAG.
  47. """
  48. # NOTE: Not thread safe.
  49. self._last_id += 1
  50. return self._last_id
  51. @property
  52. def all_nodes(self):
  53. """
  54. Returns a list of all nodes in the DAG.
  55. """
  56. return list(self.nodes_map.keys())
  57. def add_node(self, node):
  58. """
  59. Adds a new node to the graph.
  60. """
  61. dag_node = DAG.Node(self._next_id(), node)
  62. self.nodes_map[node] = dag_node
  63. self.in_degree[dag_node.id] = 0
  64. self.graph[dag_node.id] = []
  65. def replace_node(self, original_node, replacement_node):
  66. """
  67. Replaces a node already present in the graph ``original_node`` by a new
  68. object.
  69. The internal representation of the DAG remains the same, except the new
  70. object now takes the place of the original one.
  71. """
  72. node = self.nodes_map[original_node]
  73. del self.nodes_map[original_node]
  74. node.original = replacement_node
  75. self.nodes_map[replacement_node] = node
  76. def remove_node(self, node):
  77. """
  78. Removes a given node from the graph.
  79. The ``node`` parameter can be either the internal Node type or the
  80. node as the client sees them.
  81. """
  82. if not isinstance(node, DAG.Node):
  83. # Try to map it to a DAG Node
  84. node = self.nodes_map[node]
  85. node_to_remove = node
  86. # Update the in degrees of its dependents
  87. for node in self.graph[node_to_remove.id]:
  88. self.in_degree[node] -= 1
  89. # Finally remove it:
  90. # From node mappings
  91. del self.nodes_map[node_to_remove.original]
  92. # From the graph
  93. for dependent_nodes in self.graph.values():
  94. if node_to_remove.id in dependent_nodes:
  95. dependent_nodes.remove(node_to_remove.id)
  96. del self.graph[node_to_remove.id]
  97. # And the in-degree counter
  98. del self.in_degree[node_to_remove.id]
  99. def add_edge(self, node1, node2):
  100. """
  101. Adds an edge between two nodes.
  102. :raises InvalidDAGException: If the edge would introduce a cycle in the
  103. graph structure.
  104. """
  105. # Check for a cycle
  106. if node1 in self.nodes_reachable_from(node2):
  107. raise InvalidDAGException(
  108. "Cannot add edge since it would create a cycle in the graph.")
  109. # When everything is ok, create the new link
  110. node1 = self.nodes_map[node1]
  111. node2 = self.nodes_map[node2]
  112. # If an edge already exists, adding it again does nothing
  113. if node2.id not in self.graph[node1.id]:
  114. self.graph[node1.id].append(node2.id)
  115. self.in_degree[node2.id] += 1
  116. def _get_node_with_no_dependencies(self):
  117. """
  118. Returns an internal node which has no dependencies, i.e. that has an
  119. in-degree of 0.
  120. If there are multiple such nodes, it is not defined which one of them
  121. them is returned.
  122. """
  123. for node in self.nodes_map.values():
  124. if self.in_degree[node.id] == 0:
  125. return node
  126. # If no node with a zero in-degree can be found, the graph is not a DAG
  127. # NOTE: If edges are always added using the `add_edge` method, this
  128. # will never happen since the cycle would be caught at that point
  129. raise InvalidDAGException("The graph contains a cycle.")
  130. def dependent_nodes(self, node):
  131. """
  132. Returns all nodes which are directly dependent on the given node, i.e.
  133. returns a set of all nodes N where there exists an edge(node, N) in the
  134. DAG.
  135. """
  136. node = self.nodes_map[node]
  137. id_to_node_map = {
  138. node.id: node
  139. for node in self.nodes_map.values()
  140. }
  141. return [
  142. id_to_node_map[dependent_node_id].original
  143. for dependent_node_id in self.graph[node.id]
  144. ]
  145. def topsort_nodes(self):
  146. """
  147. Generator which returns DAG nodes in toplogical sort order.
  148. """
  149. # Save the original nodes structure
  150. original_nodes_map = deepcopy(self.nodes_map)
  151. original_in_degree = deepcopy(self.in_degree)
  152. original_graph = deepcopy(self.graph)
  153. while len(self.nodes_map):
  154. # Find a node with a 0 in degree
  155. node = self._get_node_with_no_dependencies()
  156. # We yield instances of the original node added to the graph, not
  157. # DAG.Node as that is what clients expect.
  158. yield node.original
  159. # Remove this node from the graph and update the in-degrees
  160. self.remove_node(node)
  161. # Restore the original nodes structure so that a top sort is not a
  162. # destructive operation
  163. self.nodes_map = original_nodes_map
  164. self.in_degree = original_in_degree
  165. self.graph = original_graph
  166. def nodes_reachable_from(self, node):
  167. """
  168. Returns a set of all nodes reachable from the given node.
  169. """
  170. node = self.nodes_map[node]
  171. # Implemented in terms of a BFS to avoid recursion
  172. queue = deque([node])
  173. reachable_nodes = []
  174. visited = set()
  175. visited.add(node.id)
  176. id_to_node_map = {
  177. node.id: node
  178. for node in self.nodes_map.values()
  179. }
  180. while len(queue):
  181. current_node = queue.popleft()
  182. for successor_id in self.graph[current_node.id]:
  183. if successor_id not in visited:
  184. visited.add(successor_id)
  185. successor = id_to_node_map[successor_id]
  186. queue.append(successor)
  187. reachable_nodes.append(successor.original)
  188. return set(reachable_nodes)