json2csv 1.1 KB

1234567891011121314151617181920212223242526272829303132333435
  1. #!/usr/bin/env python
  2. # Converts OONI JSON indexes into CSV. JSON indexes come from
  3. # https://measurements.ooni.torproject.org/api/.
  4. #
  5. # Usage: ./json2csv < ooni.json > ooni.csv
  6. import csv
  7. import datetime
  8. import json
  9. import re
  10. import sys
  11. # Regex to extract date, country code, AS number, and test name from a report
  12. # URL. Sample URL:
  13. # https://s3.amazonaws.com/ooni-public/sanitised/2017-04-14/20170414T002940Z-GB-AS786-meek_fronted_requests_test-20170414T002941Z_AS786_TBh4y19qx6OuNTMVAZppdGiKUuqTDgKmBAOBr7z2Gszn3h6iO9-0.2.0-probe.json
  14. URL_REGEX = re.compile(r'/(\d{8}T\d{6}Z)-(\w+)-AS(\d+)-([\w_]+)-')
  15. INPUT_DATE_FORMAT = "%Y-%m-%dT%H:%M:%SZ"
  16. OUTPUT_DATE_FORMAT = "%Y-%m-%d %H:%M:%S"
  17. w = csv.writer(sys.stdout)
  18. #w.writerow(("date", "cc", "asn", "test_name"))
  19. data = json.load(sys.stdin)
  20. for entry in data["results"]:
  21. date = datetime.datetime.strptime(entry["test_start_time"], INPUT_DATE_FORMAT)
  22. url = entry["download_url"]
  23. m = URL_REGEX.search(url)
  24. assert m is not None, url
  25. _, cc, asn, test_name = m.groups()
  26. w.writerow((date.strftime(OUTPUT_DATE_FORMAT), cc, asn, test_name))