dl_bucket.py 2.2 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364
  1. """
  2. Download Google BigQuery code from bucket.
  3. ethereum-block-data
  4. ethereum-contract-data
  5. ethereum-token-data
  6. ethereum-transaction-data
  7. We can download these locally if needed.
  8. """
  9. import os
  10. import subprocess
  11. from tqdm import tqdm
  12. from typing import Any, List
  13. from google.cloud.storage import Client
  14. from google.cloud.storage.bucket import Bucket
  15. from google.cloud.storage.blob import Blob
  16. def main(args: Any):
  17. root: str = os.path.join(args.root, args.bucket)
  18. if not os.path.isdir(root): os.makedirs(root)
  19. if args.gsutil:
  20. cmd: str = f'gsutil -m cp -r gs://{args.bucket} {root}'
  21. subprocess.call(cmd, shell=True)
  22. else:
  23. client: Client = Client()
  24. assert len(os.environ['GOOGLE_APPLICATION_CREDENTIALS']) > 0, \
  25. "Set GOOGLE_APPLICATION_CREDENTIALS prior to use."
  26. bucket: Bucket = client.get_bucket(args.bucket)
  27. blobs: List[Blob] = list(bucket.list_blobs())
  28. print(f'Found {len(blobs)} in bucket: {args.bucket}.')
  29. for i in tqdm(range(len(blobs))):
  30. blob: Blob = blobs[i]
  31. blob.download_to_filename(os.path.join(root, blob.name))
  32. if __name__ == "__main__":
  33. import argparse
  34. parser = argparse.ArgumentParser()
  35. parser.add_argument('--root', type=str, default='./data/bigquery',
  36. help='path to data root (default: ./data/bigquery)')
  37. parser.add_argument('--bucket', type=str, default='ethereum-block-data',
  38. choices=['ethereum-block-data',
  39. 'ethereum-contract-data',
  40. 'ethereum-token-data',
  41. 'ethereum-transaction-1week-data',
  42. 'ethereum-transaction-1month-data',
  43. 'ethereum-transaction-1year-data',
  44. 'ethereum-transaction-data',
  45. 'ethereum-transaction-data2',
  46. 'ethereum-transaction-data3',
  47. 'tornado-trace',
  48. 'tornado-transaction'])
  49. parser.add_argument('--gsutil', action='store_true', default=False)
  50. args: Any = parser.parse_args()
  51. main(args)