json_to_sql_format.py 2.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172
  1. """
  2. Process same gas price clusters to CSV (same table format).
  3. """
  4. import json
  5. import pandas as pd
  6. from tqdm import tqdm
  7. from datetime import datetime
  8. from typing import Any, Dict, List, Set, Tuple
  9. def from_json(path):
  10. with open(path, 'r') as fp:
  11. return json.load(fp)
  12. def main(args: Any):
  13. clusters: List[Set[str]] = from_json(args.clusters_file)
  14. tx2addr: Dict[str, str] = from_json(args.tx2addr_file)
  15. tx2block: Dict[str, int] = from_json(args.tx2block_file)
  16. tx2ts: Dict[str, Any] = from_json(args.tx2ts_file)
  17. transactions, tx2cluster = get_transactions(clusters)
  18. transactions: List[str] = list(transactions)
  19. addresses: List[str] = [tx2addr[tx] for tx in transactions]
  20. block_numbers: List[int] = [tx2block[tx] for tx in transactions]
  21. block_timestamps: List[Any] = [tx2ts[tx] for tx in transactions]
  22. block_timestamps: List[datetime] = [
  23. datetime.strptime(ts, '%Y-%m-%d %H:%M:%S') for ts in block_timestamps]
  24. clusters: List[int] = [tx2cluster[tx] for tx in transactions]
  25. meta_datas: List[str] = [json.dumps({}) for _ in transactions]
  26. dataset: Dict[str, List[Any]] = {
  27. 'address': addresses,
  28. 'transaction': transactions,
  29. 'block_number': block_numbers,
  30. 'block_ts': block_timestamps,
  31. 'meta_data': meta_datas,
  32. 'cluster': clusters,
  33. }
  34. df: pd.DataFrame = pd.DataFrame.from_dict(dataset)
  35. df.to_csv(args.out_file, index=False)
  36. def get_transactions(
  37. clusters: List[Set[str]],
  38. ) -> Tuple[Set[str], Dict[str, int]]:
  39. transactions: Set[str] = set()
  40. tx2cluster: Dict[str, int] = {}
  41. pbar = tqdm(total=len(clusters))
  42. for c, cluster in enumerate(clusters):
  43. transactions = transactions.union(cluster)
  44. for tx in cluster: # put all
  45. tx2cluster[tx] = c
  46. pbar.update()
  47. pbar.close()
  48. return transactions, tx2cluster
  49. if __name__ == "__main__":
  50. import argparse
  51. parser = argparse.ArgumentParser()
  52. parser.add_argument('clusters_file', type=str)
  53. parser.add_argument('tx2addr_file', type=str)
  54. parser.add_argument('tx2block_file', type=str)
  55. parser.add_argument('tx2ts_file', type=str)
  56. parser.add_argument('out_file', type=str)
  57. args = parser.parse_args()
  58. main(args)