get_dataset.py 1.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142
  1. """
  2. Create a dataset of complete Tcash transactions.
  3. https://github.com/lambdaclass/tornado_cash_anonymity_tool/blob/main/notebooks/complete_dataset.ipynb
  4. """
  5. import os
  6. import pandas as pd
  7. from typing import Any
  8. from src.tcash.data import decode_transactions
  9. def main(args: Any):
  10. trace_df: pd.DataFrame = pd.read_csv(
  11. os.path.join(args.data_dir, 'tornado_traces.csv'), low_memory=False)
  12. transaction_df: pd.DataFrame = pd.read_csv(
  13. os.path.join(args.data_dir, 'tornado_transactions.csv'))
  14. address_df: pd.DataFrame = pd.read_csv(
  15. os.path.join(args.contract_dir, 'tornado_contract_abi.csv'),
  16. names=['address', 'token', 'value', 'name','abi'],
  17. sep='|')
  18. proxy_df = pd.read_csv(
  19. os.path.join(args.contract_dir, 'tornado_proxy_abi.csv'),
  20. names=['address', 'abi'],
  21. sep='|')
  22. deposit_df, withdraw_df = decode_transactions(
  23. address_df, proxy_df, transaction_df, trace_df)
  24. withdraw_df.to_csv(
  25. os.path.join(args.data_dir, 'complete_withdraw_txs.csv'), index=False)
  26. deposit_df.to_csv(
  27. os.path.join(args.data_dir, 'complete_deposit_txs.csv'), index=False)
  28. if __name__ == "__main__":
  29. from argparse import ArgumentParser
  30. parser: ArgumentParser = ArgumentParser()
  31. parser.add_argument('data_dir', type=str, help='path to trace and transaction data')
  32. parser.add_argument('contract_dir', type=str, help='path to contract data')
  33. args: Any = parser.parse_args()
  34. main(args)