1234567891011121314151617181920212223242526272829303132333435 |
- #!/usr/bin/env python
- # Converts OONI JSON indexes into CSV. JSON indexes come from
- # https://measurements.ooni.torproject.org/api/.
- #
- # Usage: ./json2csv < ooni.json > ooni.csv
- import csv
- import datetime
- import json
- import re
- import sys
- # Regex to extract date, country code, AS number, and test name from a report
- # URL. Sample URL:
- # https://s3.amazonaws.com/ooni-public/sanitised/2017-04-14/20170414T002940Z-GB-AS786-meek_fronted_requests_test-20170414T002941Z_AS786_TBh4y19qx6OuNTMVAZppdGiKUuqTDgKmBAOBr7z2Gszn3h6iO9-0.2.0-probe.json
- URL_REGEX = re.compile(r'/(\d{8}T\d{6}Z)-(\w+)-AS(\d+)-([\w_]+)-')
- INPUT_DATE_FORMAT = "%Y-%m-%dT%H:%M:%SZ"
- OUTPUT_DATE_FORMAT = "%Y-%m-%d %H:%M:%S"
- w = csv.writer(sys.stdout)
- #w.writerow(("date", "cc", "asn", "test_name"))
- data = json.load(sys.stdin)
- for entry in data["results"]:
- date = datetime.datetime.strptime(entry["test_start_time"], INPUT_DATE_FORMAT)
- url = entry["download_url"]
- m = URL_REGEX.search(url)
- assert m is not None, url
- _, cc, asn, test_name = m.groups()
- w.writerow((date.strftime(OUTPUT_DATE_FORMAT), cc, asn, test_name))
|