viz_nx.py 1.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566
  1. """
  2. To scale the run_deposit.py script, we had to forgo creating
  3. NX graph in memory. This script does exactly that. The motivation
  4. is to isolate the high memory parts to a single file.
  5. """
  6. import os
  7. import numpy as np
  8. import pandas as pd
  9. import networkx as nx
  10. import matplotlib.pyplot as plt
  11. from typing import Any, List, Tuple
  12. def main(args: Any):
  13. data: pd.DataFrame = pd.read_csv(args.data_file)
  14. print('making user graph...', end = '', flush=True)
  15. graph: nx.DiGraph = make_graph(data.user, data.deposit)
  16. if not os.path.isdir(args.save_dir):
  17. os.makedirs(args.save_dir)
  18. plt.figure(num=None, figsize=(20, 20), dpi=80)
  19. plt.axis('off')
  20. pos = nx.spring_layout(graph)
  21. nx.draw_networkx_nodes(graph, pos)
  22. nx.draw_networkx_edges(graph, pos)
  23. plt.savefig(
  24. os.path.join(args.save_path, 'fig.png'),
  25. bbox_inches="tight",
  26. )
  27. def make_graph(node_a: pd.Series, node_b: pd.Series) -> nx.DiGraph:
  28. """
  29. DEPRECATED: This assumes we can store all connections in memory.
  30. Make a directed graph connecting each row of node_a to the
  31. corresponding row of node_b.
  32. """
  33. assert node_a.size == node_b.size, "Dataframes are uneven sizes."
  34. graph = nx.DiGraph()
  35. nodes: np.array = np.concatenate([node_a.unique(), node_b.unique()])
  36. edges: List[Tuple[str, str]] = list(
  37. zip(node_a.to_numpy(), node_b.to_numpy())
  38. )
  39. graph.add_nodes_from(nodes)
  40. graph.add_edges_from(edges)
  41. return graph
  42. if __name__ == "__main__":
  43. import argparse
  44. parser = argparse.ArgumentParser()
  45. parser.add_argument('data_file', type=str, help='path to cached out of deposit.py')
  46. parser.add_argument('save_dir', type=str, help='where to save files.')
  47. args: Any = parser.parse_args()
  48. main(args)