heuristic_metadata.py 1.3 KB

123456789101112131415161718192021222324252627282930313233343536
  1. """
  2. We made separate metadata for heuristics and need to add it the
  3. `metadata-pruned.csv` file. We will save it as `metadata-joined.csv`
  4. """
  5. import pandas as pd
  6. from tqdm import tqdm
  7. from typing import Any
  8. def main(args: Any):
  9. dar_metadata: pd.DataFrame = pd.read_csv(args.metadata_pruned)
  10. if 'cluster_type' in dar_metadata.columns:
  11. dar_metadata.rename(columns={'cluster_type': 'heuristic'}, inplace=True)
  12. dar_metadata['heuristic'] = 0
  13. if 'metadata' in dar_metadata.columns:
  14. dar_metadata.rename(columns={'metadata': 'meta_data'}, inplace=True)
  15. gas_metadata: pd.DataFrame = pd.read_csv(args.gas_price_metadata)
  16. same_metadata: pd.DataFrame = pd.read_csv(args.same_num_tx_metadata)
  17. metadata: pd.DataFrame = pd.concat([dar_metadata, gas_metadata, same_metadata])
  18. metadata: pd.DataFrame = metadata.loc[metadata.groupby('address')['conf'].idxmax()]
  19. metadata.to_csv(args.out_csv, index=False)
  20. if __name__ == "__main__":
  21. import argparse
  22. parser = argparse.ArgumentParser()
  23. parser.add_argument('metadata_pruned', type=str)
  24. parser.add_argument('gas_price_metadata', type=str)
  25. parser.add_argument('same_num_tx_metadata', type=str)
  26. parser.add_argument('out_csv', type=str)
  27. args = parser.parse_args()
  28. main(args)