123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107 |
- library(ggplot2)
- library(plyr)
- library(scales)
- ooni.index <- read.csv("ooni-index.csv.xz",
- colClasses=c(date="POSIXct", cc="character", asn="integer", test_name="character"),
- na.strings="--" # "NA" is Namibia.
- )
- country.codes <- read.csv("country-codes.csv",
- stringsAsFactors=F,
- na.strings="--" # "NA" is North America.
- )
- hdi <- read.csv("hdi.csv",
- stringsAsFactors=F
- )
- # We need to merge the country.codes and hdi data sets. country.codes has nice
- # ISO-3166 2-letter codes alongside spelled-out country names, but hdi only has
- # the spelled-out names, and the spelled-out names are not always exactly the
- # same across the two data sets. We need to build a mapping from hdi's
- # spelled-out country names to ISO-3166 2-letter code. Most of the cases are
- # handled by matching hdi$country with country.codes$official_name_en; then we
- # manually fill out the countries that are still missing.
- hdi.country.to.code <- country.info <- merge(hdi, country.codes, by.x=c("country"), by.y=c("official_name_en"))[, c("country", "ISO3166.1.Alpha.2")]
- hdi.country.to.code <- rbind(hdi.country.to.code,
- c("Congo (Democratic Republic of the)", "CD"),
- c("Czech Republic", "CZ"),
- c("Hong Kong, China (SAR)", "HK"),
- c("Korea (Republic of)", "KR"),
- c("Moldova (Republic of)", "MD"),
- c("Palestine, State of", "PS"),
- c("Tanzania (United Republic of)", "TZ"),
- c("United Kingdom", "GB"),
- c("United States", "US")
- )
- hdi <- merge(hdi, hdi.country.to.code, by=c("country"), all.x=T)
- country.info <- merge(country.codes, hdi, by=c("ISO3166.1.Alpha.2"), all=T)
- data <- merge(ooni.index, country.info, by.x=c("cc"), by.y=c("ISO3166.1.Alpha.2"), all.x=T)
- data$Continent <- factor(data$Continent,
- levels=c("AF", "NA", "SA", "AN", "AS", "EU", "OC", NA),
- labels=c("Africa", "N.America", "S.America", "Antarctica", "Asia", "Europe", "Oceania", "??"),
- exclude=NULL
- )
- # Check if anything is missing an HDI because of the uncertain mapping of spelled-out country names.
- options(width=120)
- print("Countries missing an HDI:")
- print(count(data[is.na(data$hdi), c("cc", "name", "official_name_en")]))
- plot_num_reports_by_date <- function(data, title) {
- p <- ggplot(data)
- p <- p + geom_bar(aes(as.Date(date)))
- p <- p + scale_x_date(date_breaks="1 year", date_minor_breaks="1 month")
- p <- p + labs(title="Number of OONI reports per day", x="date", y="number of reports")
- p <- p + theme_bw()
- p
- }
- plot_num_reports_by_date_and_continent <- function(data, title) {
- p <- ggplot(data)
- p <- p + geom_bar(aes(as.Date(date)))
- p <- p + scale_x_date(date_breaks="1 year", date_minor_breaks="1 month")
- p <- p + facet_grid(Continent ~ .)
- p <- p + labs(title=title, x="date", y="number of reports")
- p <- p + theme_bw()
- p
- }
- plot_num_reports_by_test_name <- function(data, title) {
- p <- ggplot(data)
- p <- p + geom_bar(aes(reorder(test_name, test_name, function(x) length(x))))
- p <- p + coord_flip()
- p <- p + labs(title=title, x=NULL, y="number of reports")
- p <- p + theme_bw()
- p
- }
- plot_num_reports_by_country <- function(data, title) {
- p <- ggplot(data)
- p <- p + geom_text(aes(x=Continent, group=cc, label=cc), stat="count", alpha=0.8, size=2.5, position=position_jitter(width=0.25))
- p <- p + scale_y_continuous(trans="log10", breaks=trans_breaks("log10", function(x) 10^x), labels=comma)
- p <- p + labs(title=title, x="continent", y="number of reports")
- p <- p + theme_bw()
- p
- }
- plot_num_reports_by_hdi <- function(data, title) {
- p <- ggplot(data)
- p <- p + geom_text(aes(x=hdi, group=cc, label=cc, color=Continent), stat="count", size=2.5)
- p <- p + scale_y_continuous(trans="log10", breaks=trans_breaks("log10", function(x) 10^x), labels=comma)
- p <- p + scale_color_brewer(palette="Set2")
- p <- p + labs(title=title, x="Human Development Index 2015", y="number of reports")
- p <- p + guides(color=guide_legend(override.aes=list(alpha=1, size=4)))
- p <- p + theme_bw()
- p
- }
- plot_num_reports_by_date(data[data$date >= as.POSIXct("2015-08-28"), ], "Number of OONI reports per day")
- plot_num_reports_by_date_and_continent(data[data$date >= as.POSIXct("2015-08-28"), ], "Number of OONI reports per day, by continent") + coord_cartesian(ylim=c(0, 1250))
- plot_num_reports_by_test_name(data, "Number of OONI reports by test name")
- plot_num_reports_by_test_name(data[data$date >= as.POSIXct("2018-01-01"), ], "Number of OONI reports by test name (since 2018-01-01)")
- plot_num_reports_by_country(data, "Number of OONI reports by country and continent")
- plot_num_reports_by_country(data[data$date >= as.POSIXct("2018-01-01"), ], "Number of OONI reports by country and continent (since 2018-01-01)")
- plot_num_reports_by_hdi(data, "Number of OONI reports by Human Development Index")
- plot_num_reports_by_hdi(data[data$date >= as.POSIXct("2018-01-01"), ], "Number of OONI reports by Human Development Index (since 2018-01-01)")
|