countrylist <- list(
  "ad" = "Andorra",
  "ae" = "the United Arab Emirates",
  "af" = "Afghanistan",
  "ag" = "Antigua and Barbuda",
  "ai" = "Anguilla",
  "al" = "Albania",
  "am" = "Armenia",
  "an" = "the Netherlands Antilles",
  "ao" = "Angola",
  "aq" = "Antarctica",
  "ar" = "Argentina",
  "as" = "American Samoa",
  "at" = "Austria",
  "au" = "Australia",
  "aw" = "Aruba",
  "ax" = "the Aland Islands",
  "az" = "Azerbaijan",
  "ba" = "Bosnia and Herzegovina",
  "bb" = "Barbados",
  "bd" = "Bangladesh",
  "be" = "Belgium",
  "bf" = "Burkina Faso",
  "bg" = "Bulgaria",
  "bh" = "Bahrain",
  "bi" = "Burundi",
  "bj" = "Benin",
  "bl" = "Saint Bartelemey",
  "bm" = "Bermuda",
  "bn" = "Brunei",
  "bo" = "Bolivia",
  "bq" = "Bonaire, Sint Eustatius and Saba",
  "br" = "Brazil",
  "bs" = "the Bahamas",
  "bt" = "Bhutan",
  "bv" = "the Bouvet Island",
  "bw" = "Botswana",
  "by" = "Belarus",
  "bz" = "Belize",
  "ca" = "Canada",
  "cc" = "the Cocos (Keeling) Islands",
  "cd" = "the Democratic Republic of the Congo",
  "cf" = "Central African Republic",
  "cg" = "Congo",
  "ch" = "Switzerland",
  "ci" = "Côte d'Ivoire",
  "ck" = "the Cook Islands",
  "cl" = "Chile",
  "cm" = "Cameroon",
  "cn" = "China",
  "co" = "Colombia",
  "cr" = "Costa Rica",
  "cu" = "Cuba",
  "cv" = "Cape Verde",
  "cw" = "Curaçao",
  "cx" = "the Christmas Island",
  "cy" = "Cyprus",
  "cz" = "the Czech Republic",
  "de" = "Germany",
  "dj" = "Djibouti",
  "dk" = "Denmark",
  "dm" = "Dominica",
  "do" = "the Dominican Republic",
  "dz" = "Algeria",
  "ec" = "Ecuador",
  "ee" = "Estonia",
  "eg" = "Egypt",
  "eh" = "the Western Sahara",
  "er" = "Eritrea",
  "es" = "Spain",
  "et" = "Ethiopia",
  "fi" = "Finland",
  "fj" = "Fiji",
  "fk" = "the Falkland Islands (Malvinas)",
  "fm" = "the Federated States of Micronesia",
  "fo" = "the Faroe Islands",
  "fr" = "France",
  "ga" = "Gabon",
  "gb" = "the United Kingdom",
  "gd" = "Grenada",
  "ge" = "Georgia",
  "gf" = "French Guiana",
  "gg" = "Guernsey",
  "gh" = "Ghana",
  "gi" = "Gibraltar",
  "gl" = "Greenland",
  "gm" = "Gambia",
  "gn" = "Guinea",
  "gp" = "Guadeloupe",
  "gq" = "Equatorial Guinea",
  "gr" = "Greece",
  "gs" = "South Georgia and the South Sandwich Islands",
  "gt" = "Guatemala",
  "gu" = "Guam",
  "gw" = "Guinea-Bissau",
  "gy" = "Guyana",
  "hk" = "Hong Kong",
  "hm" = "Heard Island and McDonald Islands",
  "hn" = "Honduras",
  "hr" = "Croatia",
  "ht" = "Haiti",
  "hu" = "Hungary",
  "id" = "Indonesia",
  "ie" = "Ireland",
  "il" = "Israel",
  "im" = "the Isle of Man",
  "in" = "India",
  "io" = "the British Indian Ocean Territory",
  "iq" = "Iraq",
  "ir" = "Iran",
  "is" = "Iceland",
  "it" = "Italy",
  "je" = "Jersey",
  "jm" = "Jamaica",
  "jo" = "Jordan",
  "jp" = "Japan",
  "ke" = "Kenya",
  "kg" = "Kyrgyzstan",
  "kh" = "Cambodia",
  "ki" = "Kiribati",
  "km" = "Comoros",
  "kn" = "Saint Kitts and Nevis",
  "kp" = "North Korea",
  "kr" = "the Republic of Korea",
  "kw" = "Kuwait",
  "ky" = "the Cayman Islands",
  "kz" = "Kazakhstan",
  "la" = "Laos",
  "lb" = "Lebanon",
  "lc" = "Saint Lucia",
  "li" = "Liechtenstein",
  "lk" = "Sri Lanka",
  "lr" = "Liberia",
  "ls" = "Lesotho",
  "lt" = "Lithuania",
  "lu" = "Luxembourg",
  "lv" = "Latvia",
  "ly" = "Libya",
  "ma" = "Morocco",
  "mc" = "Monaco",
  "md" = "the Republic of Moldova",
  "me" = "Montenegro",
  "mf" = "Saint Martin",
  "mg" = "Madagascar",
  "mh" = "the Marshall Islands",
  "mk" = "Macedonia",
  "ml" = "Mali",
  "mm" = "Burma",
  "mn" = "Mongolia",
  "mo" = "Macau",
  "mp" = "the Northern Mariana Islands",
  "mq" = "Martinique",
  "mr" = "Mauritania",
  "ms" = "Montserrat",
  "mt" = "Malta",
  "mu" = "Mauritius",
  "mv" = "the Maldives",
  "mw" = "Malawi",
  "mx" = "Mexico",
  "my" = "Malaysia",
  "mz" = "Mozambique",
  "na" = "Namibia",
  "nc" = "New Caledonia",
  "ne" = "Niger",
  "nf" = "Norfolk Island",
  "ng" = "Nigeria",
  "ni" = "Nicaragua",
  "nl" = "the Netherlands",
  "no" = "Norway",
  "np" = "Nepal",
  "nr" = "Nauru",
  "nu" = "Niue",
  "nz" = "New Zealand",
  "om" = "Oman",
  "pa" = "Panama",
  "pe" = "Peru",
  "pf" = "French Polynesia",
  "pg" = "Papua New Guinea",
  "ph" = "the Philippines",
  "pk" = "Pakistan",
  "pl" = "Poland",
  "pm" = "Saint Pierre and Miquelon",
  "pn" = "the Pitcairn Islands",
  "pr" = "Puerto Rico",
  "ps" = "the Palestinian Territory",
  "pt" = "Portugal",
  "pw" = "Palau",
  "py" = "Paraguay",
  "qa" = "Qatar",
  "re" = "Reunion",
  "ro" = "Romania",
  "rs" = "Serbia",
  "ru" = "Russia",
  "rw" = "Rwanda",
  "sa" = "Saudi Arabia",
  "sb" = "the Solomon Islands",
  "sc" = "the Seychelles",
  "sd" = "Sudan",
  "se" = "Sweden",
  "sg" = "Singapore",
  "sh" = "Saint Helena",
  "si" = "Slovenia",
  "sj" = "Svalbard and Jan Mayen",
  "sk" = "Slovakia",
  "sl" = "Sierra Leone",
  "sm" = "San Marino",
  "sn" = "Senegal",
  "so" = "Somalia",
  "sr" = "Suriname",
  "ss" = "South Sudan",
  "st" = "São Tomé and Príncipe",
  "sv" = "El Salvador",
  "sx" = "Sint Maarten",
  "sy" = "the Syrian Arab Republic",
  "sz" = "Swaziland",
  "tc" = "Turks and Caicos Islands",
  "td" = "Chad",
  "tf" = "the French Southern Territories",
  "tg" = "Togo",
  "th" = "Thailand",
  "tj" = "Tajikistan",
  "tk" = "Tokelau",
  "tl" = "East Timor",
  "tm" = "Turkmenistan",
  "tn" = "Tunisia",
  "to" = "Tonga",
  "tr" = "Turkey",
  "tt" = "Trinidad and Tobago",
  "tv" = "Tuvalu",
  "tw" = "Taiwan",
  "tz" = "the United Republic of Tanzania",
  "ua" = "Ukraine",
  "ug" = "Uganda",
  "um" = "the United States Minor Outlying Islands",
  "us" = "the United States",
  "uy" = "Uruguay",
  "uz" = "Uzbekistan",
  "va" = "Vatican City",
  "vc" = "Saint Vincent and the Grenadines",
  "ve" = "Venezuela",
  "vg" = "the British Virgin Islands",
  "vi" = "the United States Virgin Islands",
  "vn" = "Vietnam",
  "vu" = "Vanuatu",
  "wf" = "Wallis and Futuna",
  "ws" = "Samoa",
  "xk" = "Kosovo",
  "ye" = "Yemen",
  "yt" = "Mayotte",
  "za" = "South Africa",
  "zm" = "Zambia",
  "zw" = "Zimbabwe")

countryname <- function(country) {
  res <- countrylist[[country]]
  if (is.null(res))
    res <- "no-man's-land"
  res
}

date_breaks <- function(days) {
  length <- cut(days, c(-1, 7, 12, 56, 180, 600, 5000, Inf), labels=FALSE)
  major <- c("days", "2 days", "weeks", "months", "3 months", "years",
    "5 years")[length]
  minor <- c("days", "days", "days", "weeks", "months", "months",
    "years")[length]
  format <- c("%d-%b", "%d-%b", "%d-%b", "%b-%Y", "%b-%Y", "%Y",
    "%Y")[length]
  list(major = major, minor = minor, format = format)
}

formatter <- function(x, ...) {
  format(x, ..., scientific = FALSE, big.mark = ' ')
}

theme_update(plot.title = element_text(hjust = 0.5),
  plot.margin = margin(5.5, 11, 5.5, 5.5))

plot_networksize <- function(start, end, path) {
  end <- min(end, as.character(Sys.Date() - 2))
  s <- read.csv(paste("/srv/metrics.torproject.org/metrics/shared/stats/",
                "servers.csv", sep = ""), stringsAsFactors = FALSE)
  s <- s[s$date >= start & s$date <= end & s$flag == '' &
         s$country == '' & s$version == '' & s$platform == '' &
         s$ec2bridge == '', ]
  s <- data.frame(date = as.Date(s$date, "%Y-%m-%d"), relays = s$relays,
                  bridges = s$bridges)
  dates <- seq(from = as.Date(start, "%Y-%m-%d"),
      to = as.Date(end, "%Y-%m-%d"), by="1 day")
  missing <- setdiff(dates, as.Date(s$date, origin = "1970-01-01"))
  if (length(missing) > 0)
    s <- rbind(s,
        data.frame(date = as.Date(missing, origin = "1970-01-01"),
        relays = NA, bridges = NA))
  networksize <- melt(s, id = "date")
  date_breaks <- date_breaks(
    as.numeric(max(as.Date(networksize$date, "%Y-%m-%d")) -
    min(as.Date(networksize$date, "%Y-%m-%d"))))
  ggplot(networksize, aes(x = as.Date(date, "%Y-%m-%d"), y = value,
    colour = variable)) + geom_line(size = 1) +
    scale_x_date(name = paste("\nThe Tor Project - ",
        "https://metrics.torproject.org/", sep = ""),
        labels = date_format(date_breaks$format),
        date_breaks = date_breaks$major,
        date_minor_breaks = date_breaks$minor) +
    scale_y_continuous(name = "", limits = c(0, max(networksize$value,
        na.rm = TRUE))) +
    scale_colour_hue("", breaks = c("relays", "bridges"),
        labels = c("Relays", "Bridges")) +
    ggtitle("Number of relays\n")
  ggsave(filename = path, width = 8, height = 5, dpi = 150)
}

plot_relaycountries <- function(start, end, country, path) {
  end <- min(end, as.character(Sys.Date() - 2))
  s <- read.csv(paste("/srv/metrics.torproject.org/metrics/shared/stats/",
                "servers.csv", sep = ""), stringsAsFactors = FALSE)
  s <- s[s$date >= start & s$date <= end & s$flag == '' &
         s$country == ifelse(country == "all", '', country) &
         s$version == '' & s$platform == '' & s$ec2bridge == '', ]
  s <- data.frame(date = as.Date(s$date, "%Y-%m-%d"), relays = s$relays)
  dates <- seq(from = as.Date(start, "%Y-%m-%d"),
      to = as.Date(end, "%Y-%m-%d"), by="1 day")
  missing <- setdiff(dates, s$date)
  if (length(missing) > 0)
    s <- rbind(s,
        data.frame(date = as.Date(missing, origin = "1970-01-01"),
        relays = NA))
  title <- ifelse(country == "all",
    "Number of relays in all countries\n",
    paste("Number of relays in ", countryname(country), "\n", sep = ""))
  formatter <- function(x, ...) { format(x, scientific = FALSE, ...) }
  date_breaks <- date_breaks(
    as.numeric(max(as.Date(s$date, "%Y-%m-%d")) -
    min(as.Date(s$date, "%Y-%m-%d"))))
  ggplot(s, aes(x = as.Date(date, "%Y-%m-%d"), y = relays)) +
    geom_line(size = 1) +
    scale_x_date(name = paste("\nThe Tor Project - ",
        "https://metrics.torproject.org/", sep = ""),
        labels = date_format(date_breaks$format),
        date_breaks = date_breaks$major,
        date_minor_breaks = date_breaks$minor) +
    scale_y_continuous(name = "", limits = c(0, max(s$relays,
        na.rm = TRUE)), formatter = formatter) +
    ggtitle(title)
  ggsave(filename = path, width = 8, height = 5, dpi = 150)
}

plot_versions <- function(start, end, path) {
  end <- min(end, as.character(Sys.Date() - 2))
  s <- read.csv(paste("/srv/metrics.torproject.org/metrics/shared/stats/",
                "servers.csv", sep = ""), stringsAsFactors = FALSE)
  s <- s[s$date >= start & s$date <= end & s$flag == '' &
         s$country == '' & s$version != '' & s$platform == '' &
         s$ec2bridge == '', ]
  s <- data.frame(date = as.Date(s$date, "%Y-%m-%d"), version = s$version,
                  relays = s$relays)
  known_versions <- c("Other", "0.1.0", "0.1.1", "0.1.2", "0.2.0",
        "0.2.1", "0.2.2", "0.2.3", "0.2.4", "0.2.5", "0.2.6", "0.2.7",
        "0.2.8", "0.2.9", "0.3.0", "0.3.1", "0.3.2", "0.3.3")
  getPalette = colorRampPalette(brewer.pal(12, "Paired"))
  colours <- data.frame(breaks = known_versions,
    values = rep(brewer.pal(min(12, length(known_versions)), "Paired"),
                 len = length(known_versions)),
    stringsAsFactors = FALSE)
  versions <- s[s$version %in% known_versions, ]
  visible_versions <- sort(unique(versions$version))
  date_breaks <- date_breaks(
    as.numeric(max(as.Date(versions$date, "%Y-%m-%d")) -
    min(as.Date(versions$date, "%Y-%m-%d"))))
  ggplot(versions, aes(x = as.Date(date, "%Y-%m-%d"), y = relays,
      colour = version)) +
    geom_line(size = 1) +
    scale_x_date(name = paste("\nThe Tor Project - ",
        "https://metrics.torproject.org/", sep = ""),
        labels = date_format(date_breaks$format),
        date_breaks = date_breaks$major,
        date_minor_breaks = date_breaks$minor) +
    scale_y_continuous(name = "",
      limits = c(0, max(versions$relays, na.rm = TRUE))) +
    scale_colour_manual(name = "Tor version",
      values = colours[colours$breaks %in% visible_versions, 2],
      breaks = visible_versions) +
    ggtitle("Relay versions\n")
  ggsave(filename = path, width = 8, height = 5, dpi = 150)
}

plot_platforms <- function(start, end, path) {
  end <- min(end, as.character(Sys.Date() - 2))
  s <- read.csv(paste("/srv/metrics.torproject.org/metrics/shared/stats/",
                "servers.csv", sep = ""), stringsAsFactors = FALSE)
  s <- s[s$date >= start & s$date <= end & s$flag == '' &
         s$country == '' & s$version == '' & s$platform != '' &
         s$ec2bridge == '', ]
  platforms <- data.frame(date = as.Date(s$date, "%Y-%m-%d"),
                  variable = s$platform, value = s$relays)
  date_breaks <- date_breaks(
    as.numeric(max(as.Date(platforms$date, "%Y-%m-%d")) -
    min(as.Date(platforms$date, "%Y-%m-%d"))))
  ggplot(platforms, aes(x = as.Date(date, "%Y-%m-%d"), y = value,
      colour = variable)) +
    geom_line(size = 1) +
    scale_x_date(name = paste("\nThe Tor Project - ",
        "https://metrics.torproject.org/", sep = ""),
        labels = date_format(date_breaks$format),
        date_breaks = date_breaks$major,
        date_minor_breaks = date_breaks$minor) +
    scale_y_continuous(name = "",
      limits = c(0, max(platforms$value, na.rm = TRUE))) +
    scale_colour_manual(name = "Platform",
      breaks = c("Linux", "Darwin", "BSD", "Windows", "Other"),
      values = c("#E69F00", "#56B4E9", "#009E73", "#0072B2", "#333333")) +
    ggtitle("Relay platforms\n")
  ggsave(filename = path, width = 8, height = 5, dpi = 150)
}

plot_bandwidth <- function(start, end, path) {
  end <- min(end, as.character(Sys.Date() - 4))
  b <- read.csv(paste("/srv/metrics.torproject.org/metrics/shared/stats/",
                "bandwidth.csv", sep = ""), stringsAsFactors = FALSE)
  b <- b[b$date >= start & b$date <= end & b$isexit == '' &
         b$isguard == '', ]
  b <- data.frame(date = as.Date(b$date, "%Y-%m-%d"),
                  bwadv = b$advbw,
                  bwhist = (b$bwread + b$bwwrite) / 2)
  bandwidth <- melt(b, id = "date")
  date_breaks <- date_breaks(
    as.numeric(max(as.Date(bandwidth$date, "%Y-%m-%d")) -
    min(as.Date(bandwidth$date, "%Y-%m-%d"))))
  ggplot(bandwidth, aes(x = as.Date(date, "%Y-%m-%d"),
      y = value * 8 / 1e9, colour = variable)) +
    geom_line(size = 1) +
    scale_x_date(name = paste("\nThe Tor Project - ",
        "https://metrics.torproject.org/", sep = ""),
        labels = date_format(date_breaks$format),
        date_breaks = date_breaks$major,
        date_minor_breaks = date_breaks$minor) +
    scale_y_continuous(name = "Bandwidth (Gbit/s)",
        limits = c(0, max(bandwidth$value, na.rm = TRUE) * 8 / 1e9)) +
    scale_colour_hue(name = "", h.start = 90,
        breaks = c("bwadv", "bwhist"),
        labels = c("Advertised bandwidth", "Bandwidth history")) +
    ggtitle("Total relay bandwidth") +
    theme(legend.position = "top")
  ggsave(filename = path, width = 8, height = 5, dpi = 150)
}

plot_bwhist_flags <- function(start, end, path) {
  end <- min(end, as.character(Sys.Date() - 4))
  b <- read.csv(paste("/srv/metrics.torproject.org/metrics/shared/stats/",
                "bandwidth.csv", sep = ""), stringsAsFactors = FALSE)
  b <- b[b$date >= start & b$date <= end & b$isexit != '' &
         b$isguard != '', ]
  bw <- data.frame(date = as.Date(b$date, "%Y-%m-%d"),
                  isexit = b$isexit == 't', isguard = b$isguard == 't',
                  read = b$bwread, written = b$bwwrite)
  dates <- seq(from = as.Date(start, "%Y-%m-%d"),
      to = as.Date(end, "%Y-%m-%d"), by = "1 day")
  missing <- setdiff(dates, as.Date(bw$date, origin = "1970-01-01"))
  if (length(missing) > 0)
    bw <- rbind(bw,
        data.frame(date = as.Date(missing, origin = "1970-01-01"),
        isexit = FALSE, isguard = FALSE, read = NA, written = NA),
        data.frame(date = as.Date(missing, origin = "1970-01-01"),
        isexit = FALSE, isguard = TRUE, read = NA, written = NA),
        data.frame(date = as.Date(missing, origin = "1970-01-01"),
        isexit = TRUE, isguard = FALSE, read = NA, written = NA),
        data.frame(date = as.Date(missing, origin = "1970-01-01"),
        isexit = TRUE, isguard = TRUE, read = NA, written = NA))
  bw <- data.frame(date = bw$date, variable = ifelse(bw$isexit,
        ifelse(bw$isguard, "Guard & Exit", "Exit only"),
        ifelse(bw$isguard, "Guard only", "Middle only")),
        value = (bw$read + bw$written) / 2)
  date_breaks <- date_breaks(
    as.numeric(max(as.Date(bw$date, "%Y-%m-%d")) -
    min(as.Date(bw$date, "%Y-%m-%d"))))
  ggplot(bw, aes(x = as.Date(date, "%Y-%m-%d"), y = value * 8 / 1e9,
      colour = variable)) +
    geom_line(size = 1) +
    scale_x_date(name = paste("\nThe Tor Project - ",
        "https://metrics.torproject.org/", sep = ""),
        labels = date_format(date_breaks$format),
        date_breaks = date_breaks$major,
        date_minor_breaks = date_breaks$minor) +
    scale_y_continuous(name="Bandwidth (Gbit/s)",
        limits = c(0, max(bw$value, na.rm = TRUE) * 8 / 1e9)) +
    scale_colour_manual(name = "",
        values = c("#E69F00", "#56B4E9", "#009E73", "#0072B2")) +
    ggtitle("Bandwidth history by relay flags") +
    theme(legend.position = "top")
  ggsave(filename = path, width = 8, height = 5, dpi = 150)
}

plot_dirbytes <- function(start, end, path) {
  end <- min(end, as.character(Sys.Date() - 4))
  b <- read.csv(paste("/srv/metrics.torproject.org/metrics/shared/stats/",
                "bandwidth.csv", sep = ""), stringsAsFactors = FALSE)
  b <- b[b$date >= start & b$date <= end & b$isexit == '' &
         b$isguard == '', ]
  b <- data.frame(date = as.Date(b$date, "%Y-%m-%d"),
                  dirread = b$dirread, dirwrite = b$dirwrite)
  dir <- melt(b, id = "date")
  date_breaks <- date_breaks(
    as.numeric(max(as.Date(dir$date, "%Y-%m-%d")) -
    min(as.Date(dir$date, "%Y-%m-%d"))))
  ggplot(dir, aes(x = as.Date(date, "%Y-%m-%d"), y = value * 8 / 1e9,
      colour = variable)) +
    geom_line(size = 1) +
    scale_x_date(name = paste("\nThe Tor Project - ",
        "https://metrics.torproject.org/", sep = ""),
        labels = date_format(date_breaks$format),
        date_breaks = date_breaks$major,
        date_minor_breaks = date_breaks$minor) +
    scale_y_continuous(name="Bandwidth (Gbit/s)",
        limits = c(0, max(dir$value, na.rm = TRUE) * 8 / 1e9)) +
    scale_colour_hue(name = "",
        breaks = c("dirwrite", "dirread"),
        labels = c("Written dir bytes", "Read dir bytes")) +
    ggtitle("Number of bytes spent on answering directory requests") +
    theme(legend.position = "top")
  ggsave(filename = path, width = 8, height = 5, dpi = 150)
}

plot_relayflags <- function(start, end, flags, path) {
  end <- min(end, as.character(Sys.Date() - 2))
  s <- read.csv(paste("/srv/metrics.torproject.org/metrics/shared/stats/",
                "servers.csv", sep = ""), stringsAsFactors = FALSE)
  s <- s[s$date >= start & s$date <= end & s$country == '' &
         s$version == '' & s$platform == '' & s$ec2bridge == '', ]
  s <- data.frame(date = as.Date(s$date, "%Y-%m-%d"),
                  variable = ifelse(s$flag == '', 'Running', s$flag),
                  value = s$relays)
  networksize <- s[s$variable %in% flags, ]
  networksize <- rbind(data.frame(
    date = as.Date(end) + 1,
    variable = c("Running", "Exit", "Guard", "Fast", "Stable", "HSDir"),
    value = NA), networksize)
  dates <- seq(from = as.Date(start, "%Y-%m-%d"),
      to = as.Date(end, "%Y-%m-%d"), by="1 day")
  missing <- setdiff(dates, networksize$date)
  if (length(missing) > 0)
    networksize <- rbind(data.frame(
      date = as.Date(rep(missing, 6), origin = "1970-01-01"),
      variable = c("Running", "Exit", "Guard", "Fast", "Stable", "HSDir"),
      value = rep(NA, length(missing) * 6)), networksize)
  date_breaks <- date_breaks(
    as.numeric(max(as.Date(end, "%Y-%m-%d")) -
    min(as.Date(networksize$date, "%Y-%m-%d"))))
  ggplot(networksize, aes(x = as.Date(date, "%Y-%m-%d"), y = value,
    colour = as.factor(variable))) + geom_line(size = 1) +
    scale_x_date(name = paste("\nThe Tor Project - ",
        "https://metrics.torproject.org/", sep = ""),
        labels = date_format(date_breaks$format),
        date_breaks = date_breaks$major,
        date_minor_breaks = date_breaks$minor, limits = as.Date(c(start, end))) +
    scale_y_continuous(name = "", limits = c(0, max(networksize$value,
        na.rm = TRUE))) +
    scale_colour_manual(name = "Relay flags", values = c("#E69F00",
        "#56B4E9", "#009E73", "#EE6A50", "#000000", "#0072B2"),
        breaks = flags, labels = flags) +
    ggtitle("Number of relays with relay flags assigned\n")
  ggsave(filename = path, width = 8, height = 5, dpi = 150)
}

plot_torperf <- function(start, end, source, server, filesize, path) {
  end <- min(end, as.character(Sys.Date() - 2))
  filesizeVal <- ifelse(filesize == '50kb', 50 * 1024,
          ifelse(filesize == '1mb', 1024 * 1024, 5 * 1024 * 1024))
  t <- read.csv(paste("/srv/metrics.torproject.org/metrics/shared/stats/",
                "torperf-1.1.csv", sep = ""), stringsAsFactors = FALSE)
  known_sources <- c("all", unique(t[t$source != "", "source"]))
  colours <- data.frame(source = known_sources,
      colour = brewer.pal(length(known_sources), "Paired"),
      stringsAsFactors = FALSE)
  t <- t[t$date >= start & t$date <= end & t$filesize == filesizeVal &
         t$source == ifelse(source == 'all', '', source) &
         t$server == server, ]
  torperf <- data.frame(date = as.Date(t$date, "%Y-%m-%d"),
                        q1 = t$q1, md = t$md, q3 = t$q3)
  dates <- seq(from = as.Date(start, "%Y-%m-%d"),
      to = as.Date(end, "%Y-%m-%d"), by="1 day")
  missing <- setdiff(dates, torperf$date)
  if (length(missing) > 0)
    torperf <- rbind(torperf,
        data.frame(date = as.Date(missing, origin = "1970-01-01"),
        q1 = NA, md = NA, q3 = NA))
  colour <- colours[colours$source == source, "colour"]
  filesizes <- data.frame(filesizes = c("5mb", "1mb", "50kb"),
      label = c("5 MiB", "1 MiB", "50 KiB"), stringsAsFactors = FALSE)
  filesizeStr <- filesizes[filesizes$filesize == filesize, "label"]
  date_breaks <- date_breaks(
    as.numeric(max(as.Date(torperf$date, "%Y-%m-%d")) -
    min(as.Date(torperf$date, "%Y-%m-%d"))))
  ggplot(torperf, aes(x = as.Date(date, "%Y-%m-%d"), y = md/1e3,
      fill = "line")) +
    geom_line(colour = colour, size = 0.75) +
    geom_ribbon(data = torperf, aes(x = date, ymin = q1/1e3,
      ymax = q3/1e3, fill = "ribbon")) +
    scale_x_date(name = paste("\nThe Tor Project - ",
        "https://metrics.torproject.org/", sep = ""),
        labels = date_format(date_breaks$format),
        date_breaks = date_breaks$major,
        date_minor_breaks = date_breaks$minor) +
    scale_y_continuous(name = "") +
    expand_limits(y = 0) +
    scale_fill_manual(name = paste("Measured times on",
        ifelse(source == "all", "all sources", source), "per day"),
      breaks = c("line", "ribbon"),
      labels = c("Median", "1st to 3rd quartile"),
      values = paste(colour, c("", "66"), sep = "")) +
    ggtitle(paste("Time in seconds to complete", filesizeStr,
        "request to", server, "server")) +
    theme(legend.position = "top")
  ggsave(filename = path, width = 8, height = 5, dpi = 150)
}

plot_torperf_failures <- function(start, end, source, server, filesize, path) {
  end <- min(end, as.character(Sys.Date() - 2))
  filesizeVal <- ifelse(filesize == '50kb', 50 * 1024,
          ifelse(filesize == '1mb', 1024 * 1024, 5 * 1024 * 1024))
  t <- read.csv(paste("/srv/metrics.torproject.org/metrics/shared/stats/",
                "torperf-1.1.csv", sep = ""), stringsAsFactors = FALSE)
  t <- t[t$date >= start & t$date <= end & t$filesize == filesizeVal &
         t$source == ifelse(source == 'all', '', source) &
         t$server == server, ]
  torperf <- data.frame(date = as.Date(t$date, "%Y-%m-%d"),
                        timeouts = t$timeouts, failures = t$failures,
                        requests = t$requests)
  dates <- seq(from = as.Date(start, "%Y-%m-%d"),
      to = as.Date(end, "%Y-%m-%d"), by="1 day")
  missing <- setdiff(dates, torperf$date)
  if (length(missing) > 0)
    torperf <- rbind(torperf,
        data.frame(date = as.Date(missing, origin = "1970-01-01"),
        timeouts = NA, failures = NA, requests = NA))
  filesizes <- data.frame(filesizes = c("5mb", "1mb", "50kb"),
      label = c("5 MiB", "1 MiB", "50 KiB"), stringsAsFactors = FALSE)
  filesizeStr <- filesizes[filesizes$filesize == filesize, "label"]
  torperf <- rbind(data.frame(date = torperf$date,
      value = ifelse(torperf$requests > 0,
                     torperf$timeouts / torperf$requests, 0),
      variable = "timeouts"),
    data.frame(date = torperf$date,
      value = ifelse(torperf$requests > 0,
                     torperf$failures / torperf$requests, 0),
      variable = "failures"))
  date_breaks <- date_breaks(
    as.numeric(max(as.Date(torperf$date, "%Y-%m-%d")) -
    min(as.Date(torperf$date, "%Y-%m-%d"))))
  ggplot(torperf, aes(x = as.Date(date, "%Y-%m-%d"), y = value,
    colour = variable)) +
    geom_point(size = 2) +
    scale_x_date(name = paste("\nThe Tor Project - ",
        "https://metrics.torproject.org/", sep = ""),
        labels = date_format(date_breaks$format),
        date_breaks = date_breaks$major,
        date_minor_breaks = date_breaks$minor) +
    scale_y_continuous(name = "", labels = percent) +
    scale_colour_hue(name = paste("Problems encountered on",
        ifelse(source == "all", "all sources", source)),
        h.start = 45, breaks = c("timeouts", "failures"),
        labels = c("Timeouts", "Failures")) +
    ggtitle(paste("Timeouts and failures of", filesizeStr,
        "requests to", server, "server")) +
    theme(legend.position = "top")
  ggsave(filename = path, width = 8, height = 5, dpi = 150)
}

plot_connbidirect <- function(start, end, path) {
  end <- min(end, as.character(Sys.Date() - 2))
  c <- read.csv(paste("/srv/metrics.torproject.org/metrics/shared/stats/",
                "connbidirect2.csv", sep = ""), stringsAsFactors = FALSE)
  c <- c[c$date >= start & c$date <= end, ]
  c <- data.frame(date = as.Date(c$date),
                  direction = factor(c$direction,
                              levels = c("both", "write", "read")),
                  quantile = paste("X", c$quantile, sep = ""),
                  fraction = c$fraction / 100)
  c <- cast(c, date + direction ~ quantile, value = "fraction")
  date_breaks <- date_breaks(
    as.numeric(max(as.Date(c$date, "%Y-%m-%d")) -
    min(as.Date(c$date, "%Y-%m-%d"))))
  ggplot(c, aes(x = date, y = X0.5, colour = direction)) +
    geom_line(size = 0.75) +
    geom_ribbon(aes(x = date, ymin = X0.25, ymax = X0.75,
                fill = direction), alpha = 0.5, show_guide = FALSE) +
    scale_x_date(name = paste("\nThe Tor Project - ",
        "https://metrics.torproject.org/", sep = ""),
        labels = date_format(date_breaks$format),
        date_breaks = date_breaks$major,
        date_minor_breaks = date_breaks$minor) +
    scale_y_continuous(name = "", labels = percent) +
    scale_colour_hue(name = "Medians and interquartile ranges",
                     breaks = c("both", "write", "read"),
        labels = c("Both reading and writing", "Mostly writing",
                   "Mostly reading")) +
    scale_fill_hue(name = "Medians and interquartile ranges",
                   breaks = c("both", "write", "read"),
        labels = c("Both reading and writing", "Mostly writing",
                   "Mostly reading")) +
    ggtitle("Fraction of connections used uni-/bidirectionally\n") +
    theme(legend.position = "top")
  ggsave(filename = path, width = 8, height = 5, dpi = 150)
}

plot_bandwidth_flags <- function(start, end, path) {
  end <- min(end, as.character(Sys.Date() - 4))
  b <- read.csv(paste("/srv/metrics.torproject.org/metrics/shared/stats/",
                "bandwidth.csv", sep = ""), stringsAsFactors = FALSE)
  b <- b[b$date >= start & b$date <= end & b$isexit != '' &
         b$isguard != '', ]
  b <- data.frame(date = as.Date(b$date, "%Y-%m-%d"),
                  isexit = b$isexit == 't', isguard = b$isguard == 't',
                  advbw = b$advbw,
                  bwhist = floor((b$bwread + b$bwwrite) / 2))
  b <- rbind(
    data.frame(b[b$isguard == TRUE, ], flag = "Guard"),
    data.frame(b[b$isexit == TRUE, ], flag = "Exit"))
  b <- data.frame(date = b$date, advbw = b$advbw, bwhist = b$bwhist,
                  flag = b$flag)
  b <- aggregate(list(advbw = b$advbw, bwhist = b$bwhist),
                 by = list(date = b$date, flag = b$flag), FUN = sum,
                 na.rm = TRUE, na.action = NULL)
  b <- melt(b, id.vars = c("date", "flag"))
  b <- data.frame(date = b$date,
      type = ifelse(b$variable == 'advbw', 'advertised bandwidth',
                    'bandwidth history'),
      flag = b$flag, value = b$value)
  bandwidth <- b[b$value > 0, ]
  date_breaks <- date_breaks(
    as.numeric(max(as.Date(bandwidth$date, "%Y-%m-%d")) -
    min(as.Date(bandwidth$date, "%Y-%m-%d"))))
  dates <- seq(from = as.Date(start, "%Y-%m-%d"),
      to = as.Date(end, "%Y-%m-%d"), by = "1 day")
  missing <- setdiff(dates, as.Date(bandwidth$date,
    origin = "1970-01-01"))
  if (length(missing) > 0) {
    bandwidth <- rbind(bandwidth,
        data.frame(date = as.Date(missing, origin = "1970-01-01"),
        type = "advertised bandwidth", flag = "Exit", value = NA),
        data.frame(date = as.Date(missing, origin = "1970-01-01"),
        type = "bandwidth history", flag = "Exit", value = NA),
        data.frame(date = as.Date(missing, origin = "1970-01-01"),
        type = "advertised bandwidth", flag = "Guard", value = NA),
        data.frame(date = as.Date(missing, origin = "1970-01-01"),
        type = "bandwidth history", flag = "Guard", value = NA))
  }
  bandwidth <- data.frame(date = bandwidth$date,
    variable = as.factor(paste(bandwidth$flag, ", ", bandwidth$type,
    sep = "")), value = bandwidth$value)
  bandwidth$variable <- factor(bandwidth$variable,
    levels = levels(bandwidth$variable)[c(3, 4, 1, 2)])
  ggplot(bandwidth, aes(x = as.Date(date, "%Y-%m-%d"),
      y = value * 8 / 1e9, colour = variable)) +
    geom_line(size = 1) +
    scale_x_date(name = paste("\nThe Tor Project - ",
        "https://metrics.torproject.org/", sep = ""),
        labels = date_format(date_breaks$format),
        date_breaks = date_breaks$major,
        date_minor_breaks = date_breaks$minor) +
    scale_y_continuous(name="Bandwidth (Gbit/s)",
        limits = c(0, max(bandwidth$value, na.rm = TRUE) * 8 / 1e9)) +
    scale_colour_manual(name = "",
        values = c("#E69F00", "#D6C827", "#009E73", "#00C34F")) +
    ggtitle(paste("Advertised bandwidth and bandwidth history by",
        "relay flags")) +
    theme(legend.position = "top")
  ggsave(filename = path, width = 8, height = 5, dpi = 150)
}

plot_userstats <- function(start, end, node, variable, value, events,
                           path) {
  end <- min(end, as.character(Sys.Date() - 2))
  load(paste("/srv/metrics.torproject.org/metrics/shared/RData/clients-",
             node, ".RData", sep = ""))
  c <- data
  u <- c[c$date >= start & c$date <= end, ]
  u <- rbind(u, data.frame(date = start,
      country = ifelse(variable == 'country' & value != 'all', value, ''),
      transport = ifelse(variable == 'transport', value, ''),
      version = ifelse(variable == 'version', value, ''),
      lower = 0, upper = 0, clients = 0))
  if (node == 'relay') {
    if (value != 'all') {
      u <- u[u$country == value, ]
      title <- paste("Directly connecting users from ",
                     countryname(value), "\n", sep = "")
    } else {
      u <- u[u$country == '', ]
      title <- "Directly connecting users\n"
    }
    u <- aggregate(list(lower = u$lower, upper = u$upper,
                        users = u$clients),
                   by = list(date = as.Date(u$date, "%Y-%m-%d"),
                             value = u$country),
                   FUN = sum)
  } else if (variable == 'transport') {
    if ('!<OR>' %in% value) {
      n <- u[u$transport != '' & u$transport != '<OR>', ]
      n <- aggregate(list(lower = n$lower, upper = n$upper,
                          clients = n$clients),
                     by = list(date = n$date),
                     FUN = sum)
      u <- rbind(u, data.frame(date = n$date,
                               country = '', transport = '!<OR>',
                               version = '', lower = n$lower,
                               upper = n$upper, clients = n$clients))
    }
    if (length(value) > 1) {
      u <- u[u$transport %in% value, ]
      u <- aggregate(list(lower = u$lower, upper = u$upper,
                          users = u$clients),
                     by = list(date = as.Date(u$date, "%Y-%m-%d"),
                               value = u$transport),
                     FUN = sum)
      title <- paste("Bridge users by transport\n")
    } else {
      u <- u[u$transport == value, ]
      u <- aggregate(list(lower = u$lower, upper = u$upper,
                          users = u$clients),
                     by = list(date = as.Date(u$date, "%Y-%m-%d"),
                               value = u$transport),
                     FUN = sum)
      title <- paste("Bridge users using ",
               ifelse(value == '<??>', 'unknown pluggable transport(s)',
               ifelse(value == '<OR>', 'default OR protocol',
               ifelse(value == '!<OR>', 'any pluggable transport',
               ifelse(value == 'fte', 'FTE',
               ifelse(value == 'websocket', 'Flash proxy/websocket',
               paste('transport', value)))))), "\n", sep = "")
    }
  } else if (variable == 'version') {
    u <- u[u$version == value, ]
    title <- paste("Bridge users using IP", value, "\n", sep = "")
    u <- aggregate(list(lower = u$lower, upper = u$upper,
                        users = u$clients),
                   by = list(date = as.Date(u$date, "%Y-%m-%d"),
                             value = u$version),
                   FUN = sum)
  } else {
    if (value != 'all') {
      u <- u[u$country == value, ]
      title <- paste("Bridge users from ", countryname(value),
                     "\n", sep = "")
    } else {
      u <- u[u$country == '' & u$transport == '' & u$version == '', ]
      title <- "Bridge users\n"
    }
    u <- aggregate(list(lower = u$lower, upper = u$upper,
                        users = u$clients),
                   by = list(date = as.Date(u$date, "%Y-%m-%d"),
                             value = u$country),
                   FUN = sum)
  }
  u <- merge(x = u, all.y = TRUE, y = data.frame(expand.grid(
             date = seq(from = as.Date(start, "%Y-%m-%d"),
             to = as.Date(end, "%Y-%m-%d"), by="1 day"),
             value = ifelse(value == 'all', '', value))))
  date_breaks <- date_breaks(
    as.numeric(max(u$date) - min(u$date)))
  if (length(value) > 1) {
    plot <- ggplot(u, aes(x = date, y = users, colour = value))
  } else {
    plot <- ggplot(u, aes(x = date, y = users))
  }
  if (length(na.omit(u$users)) > 0 & events != "off" &
      variable == 'country' & length(value) == 1 && value != "all") {
    upturns <- u[u$users > u$upper, c("date", "users")]
    downturns <- u[u$users < u$lower, c("date", "users")]
    if (events == "on") {
      u[!is.na(u$lower) & u$lower < 0, "lower"] <- 0
      plot <- plot +
        geom_ribbon(data = u, aes(ymin = lower, ymax = upper), fill = "gray")
    }
    if (length(upturns$date) > 0)
      plot <- plot +
          geom_point(data = upturns, aes(x = date, y = users), size = 5,
          colour = "dodgerblue2")
    if (length(downturns$date) > 0)
      plot <- plot +
          geom_point(data = downturns, aes(x = date, y = users), size = 5,
          colour = "firebrick2")
  }
  plot <- plot +
    geom_line(size = 1) +
    scale_x_date(name = paste("\nThe Tor Project - ",
        "https://metrics.torproject.org/", sep = ""),
        labels = date_format(date_breaks$format),
        date_breaks = date_breaks$major,
        date_minor_breaks = date_breaks$minor) +
    scale_y_continuous(name = "", labels = formatter) +
    expand_limits(y = 0) +
    ggtitle(title)
  if (length(value) > 1) {
    plot <- plot +
      scale_colour_hue(name = "", breaks = value,
            labels = ifelse(value == '<??>', 'Unknown PT',
                     ifelse(value == '<OR>', 'Default OR protocol',
                     ifelse(value == '!<OR>', 'Any PT',
                     ifelse(value == 'fte', 'FTE',
                     ifelse(value == 'websocket', 'Flash proxy/websocket',
                     value))))))
  }
  ggsave(filename = path, width = 8, height = 5, dpi = 150)
}

plot_userstats_relay_country <- function(start, end, country, events,
    path) {
  plot_userstats(start, end, 'relay', 'country', country, events, path)
}

plot_userstats_bridge_country <- function(start, end, country, path) {
  plot_userstats(start, end, 'bridge', 'country', country, 'off', path)
}

plot_userstats_bridge_transport <- function(start, end, transport, path) {
  plot_userstats(start, end, 'bridge', 'transport', transport, 'off',
    path)
}

plot_userstats_bridge_version <- function(start, end, version, path) {
  plot_userstats(start, end, 'bridge', 'version', version, 'off', path)
}

plot_userstats_bridge_combined <- function(start, end, country, path) {
  if (country == "all") {
    plot_userstats_bridge_country(start, end, country, path)
  } else {
    top <- 3
    country <- ifelse(country == "all", NA, country)
    end <- min(end, as.character(Sys.Date() - 2))
    load(paste("/srv/metrics.torproject.org/metrics/shared/RData/",
               "userstats-bridge-combined.RData", sep = ""))
    u <- data
    u <- u[u$date >= start & u$date <= end
           & (is.na(country) | u$country == country), ]
    a <- aggregate(list(mid = (u$high + u$low) / 2),
                   by = list(transport = u$transport), FUN = sum)
    a <- a[order(a$mid, decreasing = TRUE)[1:top], ]
    u <- u[u$transport %in% a$transport, ]
    max_y <- ifelse(length(na.omit(u$high)) == 0, 0,
        max(u$high, na.rm = TRUE))
    title <- paste("Bridge users by transport from ",
                   countryname(country), sep = "")
    date_breaks <- date_breaks(
      as.numeric(max(as.Date(u$date, "%Y-%m-%d")) -
      min(as.Date(u$date, "%Y-%m-%d"))))
    ggplot(u, aes(x = as.Date(date), ymin = low, ymax = high,
                colour = transport, fill = transport)) +
    geom_ribbon(alpha = 0.5, size = 0.5) +
    scale_x_date(name = paste("\nThe Tor Project - ",
        "https://metrics.torproject.org/", sep = ""),
        labels = date_format(date_breaks$format),
        date_breaks = date_breaks$major,
        date_minor_breaks = date_breaks$minor) +
    scale_y_continuous(name = "", limits = c(0, max_y),
        labels = formatter) +
    scale_colour_hue(paste("Top-", top, " transports", sep = "")) +
    scale_fill_hue(paste("Top-", top, " transports", sep = "")) +
    ggtitle(title) +
    theme(legend.position = "top")
    ggsave(filename = path, width = 8, height = 5, dpi = 150)
  }
}

plot_advbwdist_perc <- function(start, end, p, path) {
  end <- min(end, as.character(Sys.Date() - 2))
  t <- read.csv(paste("/srv/metrics.torproject.org/metrics/shared/stats/",
                "advbwdist.csv", sep = ""), stringsAsFactors = FALSE)
  t <- t[t$date >= start & t$date <= end &
         t$percentile %in% as.numeric(p), ]
  t <- data.frame(date = t$date, advbw = t$advbw * 8 / 1e9,
                  variable = ifelse(t$isexit != "t", "All relays",
                                    "Exits only"),
                  percentile = as.factor(t$percentile))
  date_breaks <- date_breaks(
    as.numeric(max(as.Date(t$date, "%Y-%m-%d")) -
    min(as.Date(t$date, "%Y-%m-%d"))))
  ggplot(t, aes(x = as.Date(date), y = advbw, colour = percentile)) +
    facet_grid(variable ~ .) +
    geom_line(size = 0.75) +
    scale_x_date(name = paste("\nThe Tor Project - ",
        "https://metrics.torproject.org/", sep = ""),
        labels = date_format(date_breaks$format),
        date_breaks = date_breaks$major,
        date_minor_breaks = date_breaks$minor) +
    scale_y_continuous(name = "Advertised bandwidth in Gbit/s\n",
        limits = c(0, max(t$advbw, na.rm = TRUE))) +
    scale_colour_hue(name = "Percentile",
        breaks = rev(levels(t$percentile))) +
    ggtitle("Advertised bandwidth distribution\n")
  ggsave(filename = path, width = 8, height = 5, dpi = 150)
}

plot_advbwdist_relay <- function(start, end, n, path) {
  end <- min(end, as.character(Sys.Date() - 2))
  t <- read.csv(paste("/srv/metrics.torproject.org/metrics/shared/stats/",
                "advbwdist.csv", sep = ""), stringsAsFactors = FALSE)
  t <- t[t$date >= start & t$date <= end & t$relay %in% as.numeric(n), ]
  t <- data.frame(date = t$date, advbw = t$advbw * 8 / 1e9,
                  variable = ifelse(t$isexit != "t", "All relays",
                                    "Exits only"),
                  relay = as.factor(t$relay))
  date_breaks <- date_breaks(
    as.numeric(max(as.Date(t$date, "%Y-%m-%d")) -
    min(as.Date(t$date, "%Y-%m-%d"))))
  ggplot(t, aes(x = as.Date(date), y = advbw, colour = relay)) +
    facet_grid(variable ~ .) +
    geom_line(size = 0.75) +
    scale_x_date(name = paste("\nThe Tor Project - ",
        "https://metrics.torproject.org/", sep = ""),
        labels = date_format(date_breaks$format),
        date_breaks = date_breaks$major,
        date_minor_breaks = date_breaks$minor) +
    scale_y_continuous(name = "Advertised bandwidth in Gbit/s\n",
        limits = c(0, max(t$advbw, na.rm = TRUE))) +
    scale_colour_hue(name = "n", breaks = levels(t$relay)) +
    ggtitle("Advertised bandwidth of n-th fastest relays\n")
  ggsave(filename = path, width = 8, height = 5, dpi = 150)
}

plot_hidserv_dir_onions_seen <- function(start, end, path) {
  end <- min(end, as.character(Sys.Date() - 2))
  h <- read.csv(paste("/srv/metrics.torproject.org/metrics/shared/stats/",
                "hidserv.csv", sep = ""), stringsAsFactors = FALSE)
  h <- h[h$date >= start & h$date <= end & h$type == "dir-onions-seen", ]
  h <- rbind(data.frame(date = NA, wiqm = 0),
             data.frame(date = as.Date(h$date, "%Y-%m-%d"),
                        wiqm = ifelse(h$frac >= 0.01, h$wiqm, NA)))
  date_breaks <- date_breaks(as.numeric(max(h$date, na.rm = TRUE)
                                      - min(h$date, na.rm = TRUE)))
  ggplot(h, aes(x = as.Date(date, origin = "1970-01-01"), y = wiqm)) +
    geom_line(size = 0.75) +
    scale_x_date(name = paste("\nThe Tor Project - ",
        "https://metrics.torproject.org/", sep = ""),
        labels = date_format(date_breaks$format),
        date_breaks = date_breaks$major,
        date_minor_breaks = date_breaks$minor) +
    scale_y_continuous(name = "") +
    ggtitle("Unique .onion addresses\n")
  ggsave(filename = path, width = 8, height = 5, dpi = 150)
}

plot_hidserv_rend_relayed_cells <- function(start, end, path) {
  end <- min(end, as.character(Sys.Date() - 2))
  h <- read.csv(paste("/srv/metrics.torproject.org/metrics/shared/stats/",
                "hidserv.csv", sep = ""), stringsAsFactors = FALSE)
  h <- h[h$date >= start & h$date <= end &
         h$type == "rend-relayed-cells", ]
  h <- rbind(data.frame(date = NA, wiqm = 0),
             data.frame(date = as.Date(h$date, "%Y-%m-%d"),
                        wiqm = ifelse(h$frac >= 0.01, h$wiqm, NA)))
  date_breaks <- date_breaks(as.numeric(max(h$date, na.rm = TRUE)
                                      - min(h$date, na.rm = TRUE)))
  ggplot(h, aes(x = as.Date(date, origin = "1970-01-01"),
      y = wiqm * 8 * 512 / (86400 * 1e6))) +
    geom_line(size = 0.75) +
    scale_x_date(name = paste("\nThe Tor Project - ",
        "https://metrics.torproject.org/", sep = ""),
        labels = date_format(date_breaks$format),
        date_breaks = date_breaks$major,
        date_minor_breaks = date_breaks$minor) +
    scale_y_continuous(name = "") +
    ggtitle("Onion-service traffic in Mbit/s\n")
  ggsave(filename = path, width = 8, height = 5, dpi = 150)
}

plot_hidserv_frac_reporting <- function(start, end, path) {
  end <- min(end, as.character(Sys.Date() - 2))
  h <- read.csv(paste("/srv/metrics.torproject.org/metrics/shared/stats/",
                "hidserv.csv", sep = ""), stringsAsFactors = FALSE)
  h <- h[h$date >= start & h$date <= end, ]
  h <- rbind(data.frame(date = NA, frac = 0,
                        type = c("rend-relayed-cells",
                                 "dir-onions-seen")),
             data.frame(date = as.Date(h$date, "%Y-%m-%d"),
                        frac = h$frac, type = h$type))
  date_breaks <- date_breaks(as.numeric(max(h$date, na.rm = TRUE)
                                      - min(h$date, na.rm = TRUE)))
  ggplot(h, aes(x = as.Date(date, origin = "1970-01-01"), y = frac,
      colour = type)) +
    geom_line(size = 0.75) +
    geom_hline(yintercept = 0.01, linetype = 2) +
    scale_x_date(name = paste("\nThe Tor Project - ",
        "https://metrics.torproject.org/", sep = ""),
        labels = date_format(date_breaks$format),
        date_breaks = date_breaks$major,
        date_minor_breaks = date_breaks$minor) +
    scale_y_continuous(name = "", labels = percent) +
    scale_colour_hue(name = "",
                     breaks = c("rend-relayed-cells", "dir-onions-seen"),
                     labels = c("Onion-service traffic",
                                "Unique .onion addresses")) +
    ggtitle(paste("Fraction of relays reporting onion-service",
                       "statistics")) +
    theme(legend.position = "top")
  ggsave(filename = path, width = 8, height = 5, dpi = 150)
}

plot_webstats_tb <- function(start, end, path) {
  end <- min(end, as.character(Sys.Date() - 2))
  load("/srv/metrics.torproject.org/metrics/shared/RData/webstats-tb.RData")
  d <- data
  d <- d[d$log_date >= start & d$log_date <= end, ]
  date_breaks <- date_breaks(as.numeric(max(d$log_date) - min(d$log_date)))
  d$request_type <- factor(d$request_type)
  levels(d$request_type) <- list(
      'Initial downloads' = 'tbid',
      'Signature downloads' = 'tbsd',
      'Update pings' = 'tbup',
      'Update requests' = 'tbur')
  ggplot(d, aes(x = log_date, y = count)) +
    geom_point() +
    geom_line() +
    expand_limits(y = 0) +
    facet_grid(request_type ~ ., scales = "free_y") +
    scale_x_date(name = paste("\nThe Tor Project - ",
        "https://metrics.torproject.org/", sep = ""),
        labels = date_format(date_breaks$format),
        date_breaks = date_breaks$major,
        date_minor_breaks = date_breaks$minor) +
    scale_y_continuous(name = 'Requests per day\n', labels = formatter) +
    theme(strip.text.y = element_text(angle = 0, hjust = 0, size = rel(1.5)),
          strip.background = element_rect(fill = NA)) +
    ggtitle("Tor Browser downloads and updates\n")
  ggsave(filename = path, width = 8, height = 5, dpi = 150)
}

plot_webstats_tb_platform <- function(start, end, path) {
  end <- min(end, as.character(Sys.Date() - 2))
  d <- read.csv(paste("/srv/metrics.torproject.org/metrics/shared/stats/",
                "webstats.csv", sep = ""), stringsAsFactors = FALSE)
  d <- d[d$log_date >= start & d$log_date <= end & d$request_type == 'tbid', ]
  d <- aggregate(list(count = d$count), by = list(log_date = as.Date(d$log_date),
    platform = d$platform), FUN = sum)
  date_breaks <- date_breaks(as.numeric(max(d$log_date) - min(d$log_date)))
  ggplot(d, aes(x = log_date, y = count, colour = platform)) +
    geom_point() +
    geom_line() +
    expand_limits(y = 0) +
    scale_x_date(name = paste("\nThe Tor Project - ",
        "https://metrics.torproject.org/", sep = ""),
        labels = date_format(date_breaks$format),
        date_breaks = date_breaks$major,
        date_minor_breaks = date_breaks$minor) +
    scale_y_continuous(name = 'Requests per day\n', labels = formatter) +
    scale_colour_hue(name = "Platform",
        breaks = c("w", "m", "l", "o", ""),
        labels = c("Windows", "Mac", "Linux", "Other", "Unknown")) +
    theme(strip.text.y = element_text(angle = 0, hjust = 0, size = rel(1.5)),
          strip.background = element_rect(fill = NA)) +
    ggtitle("Tor Browser downloads by platform\n")
  ggsave(filename = path, width = 8, height = 5, dpi = 150)
}

plot_webstats_tb_locale <- function(start, end, path) {
  end <- min(end, as.character(Sys.Date() - 2))
  d <- read.csv(paste("/srv/metrics.torproject.org/metrics/shared/stats/",
                "webstats.csv", sep = ""), stringsAsFactors = FALSE)
  d <- d[d$log_date >= start & d$log_date <= end & d$request_type == 'tbid', ]
  e <- d
  e <- aggregate(list(count = e$count), by = list(locale = e$locale), FUN = sum)
  e <- e[order(e$count, decreasing = TRUE), ]
  e <- e[1:5, ]
  d <- aggregate(list(count = d$count), by = list(log_date = as.Date(d$log_date),
    locale = ifelse(d$locale %in% e$locale, d$locale, '(other)')), FUN = sum)
  date_breaks <- date_breaks(as.numeric(max(d$log_date) - min(d$log_date)))
  ggplot(d, aes(x = log_date, y = count, colour = locale)) +
    geom_point() +
    geom_line() +
    expand_limits(y = 0) +
    scale_x_date(name = paste("\nThe Tor Project - ",
        "https://metrics.torproject.org/", sep = ""),
        labels = date_format(date_breaks$format),
        date_breaks = date_breaks$major,
        date_minor_breaks = date_breaks$minor) +
    scale_y_continuous(name = 'Requests per day\n', labels = formatter) +
    scale_colour_hue(name = "Locale",
        breaks = c(e$locale, "(other)"),
        labels = c(e$locale, "Other")) +
    theme(strip.text.y = element_text(angle = 0, hjust = 0, size = rel(1.5)),
          strip.background = element_rect(fill = NA)) +
    ggtitle("Tor Browser downloads by locale\n")
  ggsave(filename = path, width = 8, height = 5, dpi = 150)
}

plot_webstats_tm <- function(start, end, path) {
  end <- min(end, as.character(Sys.Date() - 2))
  load("/srv/metrics.torproject.org/metrics/shared/RData/webstats-tm.RData")
  d <- data
  d <- d[d$log_date >= start & d$log_date <= end, ]
  date_breaks <- date_breaks(as.numeric(max(d$log_date) - min(d$log_date)))
  d$request_type <- factor(d$request_type)
  levels(d$request_type) <- list(
      'Initial downloads' = 'tmid',
      'Update pings' = 'tmup')
  ggplot(d, aes(x = log_date, y = count)) +
    geom_point() +
    geom_line() +
    expand_limits(y = 0) +
    facet_grid(request_type ~ ., scales = "free_y") +
    scale_x_date(name = paste("\nThe Tor Project - ",
        "https://metrics.torproject.org/", sep = ""),
        labels = date_format(date_breaks$format),
        date_breaks = date_breaks$major,
        date_minor_breaks = date_breaks$minor) +
    scale_y_continuous(name = 'Requests per day\n', labels = formatter) +
    theme(strip.text.y = element_text(angle = 0, hjust = 0, size = rel(1.5)),
          strip.background = element_rect(fill = NA)) +
    ggtitle("Tor Messenger downloads and updates\n")
  ggsave(filename = path, width = 8, height = 5, dpi = 150)
}

