make_graph.py 1.6 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152
  1. """
  2. Need to convert the transactions dataframe to a smaller dataframe
  3. with the columns: `Address A | Address B | # of interactions`
  4. """
  5. import pandas as pd
  6. from typing import Any, Iterable
  7. def yield_transactions(
  8. transactions_csv: str, chunk_size: int = 10000) -> Iterable[pd.DataFrame]:
  9. """
  10. Load a segment at a time (otherwise too large).
  11. """
  12. for chunk in pd.read_csv(transactions_csv, chunksize = chunk_size):
  13. yield chunk
  14. def make_graph_dataframe(
  15. transactions_csv: str, out_csv: str, chunk_size: int = 10000) -> pd.DataFrame:
  16. count: int = 0
  17. print('processing txs', end = '', flush=True)
  18. for chunk in yield_transactions(transactions_csv, chunk_size):
  19. chunk: pd.DataFrame = \
  20. chunk.groupby(['from_address', 'to_address'], as_index=False).size()
  21. if count == 0:
  22. chunk.to_csv(out_csv, index=False)
  23. else:
  24. chunk.to_csv(out_csv, mode='a', header=False, index=False)
  25. del chunk # wipe memory
  26. print('.', end = '', flush=True)
  27. count += 1
  28. def main(args: Any):
  29. make_graph_dataframe(
  30. args.transactions_csv, args.save_csv, args.chunk_size)
  31. if __name__ == "__main__":
  32. from argparse import ArgumentParser
  33. parser: ArgumentParser = ArgumentParser()
  34. parser.add_argument('transactions_csv', type=str, help='path to transaction data')
  35. parser.add_argument('save_csv', type=str, help='path to save data')
  36. parser.add_argument('--chunk-size', type=int, default=1000000,
  37. help='Chunk size (default: 1000000)')
  38. args: Any = parser.parse_args()
  39. main(args)