Commit 59b800a4 authored by Karsten Loesing's avatar Karsten Loesing
Browse files

Remove bandwidth and bwhist-flags graphs.

The "Advertised and consumed bandwidth by relay flags" graph now
contains everything that's contained in the "Total relay bandwidth"
and the "Consumed bandwidth by Exit/Guard flag combination" graphs.
Removing these two graphs as obsolete.

Also update documentation for the newly deployed "Advertised and
consumed bandwidth by relay flags" graph.

Part of #28353.
parent 6b5f7599
Loading
Loading
Loading
Loading
+0 −79
Original line number Diff line number Diff line
@@ -448,85 +448,6 @@ write_platforms <- function(start_p = NULL, end_p = NULL, path_p) {
    write.csv(path_p, quote = FALSE, row.names = FALSE, na = "")
}

prepare_bandwidth <- function(start_p, end_p) {
  advbw <- read.csv(paste(stats_dir, "advbw.csv", sep = ""),
    colClasses = c("date" = "Date")) %>%
    transmute(date, variable = "advbw", value = advbw * 8 / 1e9)
  bwhist <- read.csv(paste(stats_dir, "bandwidth.csv", sep = ""),
    colClasses = c("date" = "Date")) %>%
    transmute(date, variable = "bwhist", value = (bwread + bwwrite) * 8 / 2e9)
  rbind(advbw, bwhist) %>%
    filter(if (!is.null(start_p)) date >= as.Date(start_p) else TRUE) %>%
    filter(if (!is.null(end_p)) date <= as.Date(end_p) else TRUE) %>%
    filter(!is.na(value)) %>%
    group_by(date, variable) %>%
    summarize(value = sum(value)) %>%
    spread(variable, value)
}

plot_bandwidth <- function(start_p, end_p, path_p) {
  prepare_bandwidth(start_p, end_p) %>%
    gather(variable, value, -date) %>%
    ggplot(aes(x = date, y = value, colour = variable)) +
    geom_line() +
    scale_x_date(name = "", breaks = custom_breaks,
      labels = custom_labels, minor_breaks = custom_minor_breaks) +
    scale_y_continuous(name = "", labels = unit_format(unit = "Gbit/s"),
      limits = c(0, NA)) +
    scale_colour_hue(name = "", h.start = 90,
        breaks = c("advbw", "bwhist"),
        labels = c("Advertised bandwidth", "Bandwidth history")) +
    ggtitle("Total relay bandwidth") +
    labs(caption = copyright_notice) +
    theme(legend.position = "top")
  ggsave(filename = path_p, width = 8, height = 5, dpi = 150)
}

write_bandwidth <- function(start_p = NULL, end_p = NULL, path_p) {
  prepare_bandwidth(start_p, end_p) %>%
    write.csv(path_p, quote = FALSE, row.names = FALSE, na = "")
}

prepare_bwhist_flags <- function(start_p, end_p) {
  read.csv(paste(stats_dir, "bandwidth.csv", sep = ""),
    colClasses = c("date" = "Date")) %>%
    filter(if (!is.null(start_p)) date >= as.Date(start_p) else TRUE) %>%
    filter(if (!is.null(end_p)) date <= as.Date(end_p) else TRUE) %>%
    filter(isexit != "") %>%
    filter(isguard != "") %>%
    mutate(variable = ifelse(isexit == "t",
        ifelse(isguard == "t", "guard_and_exit", "exit_only"),
        ifelse(isguard == "t", "guard_only", "middle_only")),
      value = (bwread + bwwrite) * 8 / 2e9) %>%
    select(date, variable, value)
}

plot_bwhist_flags <- function(start_p, end_p, path_p) {
  prepare_bwhist_flags(start_p, end_p) %>%
    complete(date = full_seq(date, period = 1),
      variable = unique(variable)) %>%
    ggplot(aes(x = date, y = value, colour = variable)) +
    geom_line() +
    scale_x_date(name = "", breaks = custom_breaks,
      labels = custom_labels, minor_breaks = custom_minor_breaks) +
    scale_y_continuous(name = "", labels = unit_format(unit = "Gbit/s"),
      limits = c(0, NA)) +
    scale_colour_manual(name = "",
      breaks = c("exit_only", "guard_and_exit", "guard_only", "middle_only"),
      labels = c("Exit only", "Guard & Exit", "Guard only", "Middle only"),
      values = c("#E69F00", "#56B4E9", "#009E73", "#0072B2")) +
    ggtitle("Bandwidth history by relay flags") +
    labs(caption = copyright_notice) +
    theme(legend.position = "top")
  ggsave(filename = path_p, width = 8, height = 5, dpi = 150)
}

write_bwhist_flags <- function(start_p = NULL, end_p = NULL, path_p) {
  prepare_bwhist_flags(start_p, end_p) %>%
    spread(variable, value) %>%
    write.csv(path_p, quote = FALSE, row.names = FALSE, na = "")
}

prepare_dirbytes <- function(start_p, end_p, path_p) {
  read.csv(paste(stats_dir, "bandwidth.csv", sep = ""),
    colClasses = c("date" = "Date")) %>%
+0 −8
Original line number Diff line number Diff line
@@ -30,8 +30,6 @@
    <url-pattern>/relayflags.html</url-pattern>
    <url-pattern>/versions.html</url-pattern>
    <url-pattern>/platforms.html</url-pattern>
    <url-pattern>/bandwidth.html</url-pattern>
    <url-pattern>/bwhist-flags.html</url-pattern>
    <url-pattern>/bandwidth-flags.html</url-pattern>
    <url-pattern>/dirbytes.html</url-pattern>
    <url-pattern>/advbwdist-perc.html</url-pattern>
@@ -119,12 +117,6 @@
    <url-pattern>/platforms.png</url-pattern>
    <url-pattern>/platforms.pdf</url-pattern>
    <url-pattern>/platforms.csv</url-pattern>
    <url-pattern>/bandwidth.png</url-pattern>
    <url-pattern>/bandwidth.pdf</url-pattern>
    <url-pattern>/bandwidth.csv</url-pattern>
    <url-pattern>/bwhist-flags.png</url-pattern>
    <url-pattern>/bwhist-flags.pdf</url-pattern>
    <url-pattern>/bwhist-flags.csv</url-pattern>
    <url-pattern>/bandwidth-flags.png</url-pattern>
    <url-pattern>/bandwidth-flags.pdf</url-pattern>
    <url-pattern>/bandwidth-flags.csv</url-pattern>
+0 −2
Original line number Diff line number Diff line
@@ -43,12 +43,10 @@
    "summary": "How much traffic the Tor network can handle and how much traffic there is.",
    "description": "We measure total available bandwidth and current capacity by aggregating what relays and bridges report to directory authorities.",
    "metrics": [
      "bandwidth",
      "bandwidth-flags",
      "advbw-ipv6",
      "advbwdist-perc",
      "advbwdist-relay",
      "bwhist-flags",
      "dirbytes",
      "connbidirect",
      "uncharted-data-flow"
+0 −22
Original line number Diff line number Diff line
@@ -66,28 +66,6 @@
      "end"
    ]
  },
  {
    "id": "bandwidth",
    "title": "Total relay bandwidth (deprecated)",
    "type": "Graph",
    "description": "<p>This graph shows the total <a href=\"glossary.html#advertised-bandwidth\">advertised</a> and <a href=\"glossary.html#bandwidth-history\">consumed bandwidth</a> of all <a href=\"glossary.html#relay\">relays</a> in the network. <strong>This graph will disappear by December 20, 2018, because it won't contain anything new compared to the soon-to-be tweaked <a href=\"/bandwidth-flags.html\">Advertised and consumed bandwidth by relay flags</a> graph.</strong></p>",
    "function": "bandwidth",
    "parameters": [
      "start",
      "end"
    ]
  },
  {
    "id": "bwhist-flags",
    "title": "Consumed bandwidth by Exit/Guard flag combination (deprecated)",
    "type": "Graph",
    "description": "<p>This graph shows the <a href=\"glossary.html#bandwidth-history\">consumed bandwidth</a> reported by relays, subdivided into four distinct subsets by assigned \"Exit\" and/or \"Guard\" <a href=\"glossary.html#relay-flag\">flags</a>. <strong>This graph will disappear by December 20, 2018, because it won't contain anything new compared to the soon-to-be tweaked <a href=\"/bandwidth-flags.html\">Advertised and consumed bandwidth by relay flags</a> graph.</strong></p>",
    "function": "bwhist_flags",
    "parameters": [
      "start",
      "end"
    ]
  },
  {
    "id": "bandwidth-flags",
    "title": "Advertised and consumed bandwidth by relay flags",
+7 −10
Original line number Diff line number Diff line
@@ -431,8 +431,7 @@ Relays self-report their advertised bandwidth in their server descriptors which
<p>The following description applies to the following graphs:</p>

<ul>
<li>Total relay bandwidth (just the advertised bandwidth part; for the consumed bandwidth part <a href="#consumed-bandwidth">see below</a>) <a href="/bandwidth.html" class="btn btn-primary btn-xs"><i class="fa fa-chevron-right" aria-hidden="true"></i> graph</a></li>
<li>Advertised and consumed bandwidth by relay flag (just the advertised bandwidth part; for the consumed bandwidth part <a href="#consumed-bandwidth">see below</a>) <a href="/bandwidth-flags.html" class="btn btn-primary btn-xs"><i class="fa fa-chevron-right" aria-hidden="true"></i> graph</a></li>
<li>Advertised and consumed bandwidth by relay flags (just the advertised bandwidth part; for the consumed bandwidth part <a href="#consumed-bandwidth">see below</a>) <a href="/bandwidth-flags.html" class="btn btn-primary btn-xs"><i class="fa fa-chevron-right" aria-hidden="true"></i> graph</a></li>
<li>Advertised bandwidth by IP version <a href="/advbw-ipv6.html" class="btn btn-primary btn-xs"><i class="fa fa-chevron-right" aria-hidden="true"></i> graph</a></li>
<li>Advertised bandwidth distribution <a href="/advbwdist-perc.html" class="btn btn-primary btn-xs"><i class="fa fa-chevron-right" aria-hidden="true"></i> graph</a></li>
<li>Advertised bandwidth of n-th fastest relays <a href="/advbwdist-relay.html" class="btn btn-primary btn-xs"><i class="fa fa-chevron-right" aria-hidden="true"></i> graph</a></li>
@@ -469,7 +468,7 @@ We consider a relay with the <code>"Guard"</code> flag as guard and a relay with

<h4>Step 3: Compute daily averages</h4>

<p>The first three graphs described here, namely <a href="/bandwidth.html">Total relay bandwidth</a>, <a href="/bandwidth-flags.html">Advertised and consumed bandwidth by relay flag</a>, and <a href="/advbw-ipv6.html">Advertised bandwidth by IP version</a>, have in common that they show daily averages of advertised bandwidth.</p>
<p>The first two graphs described here, namely <a href="/bandwidth-flags.html">Advertised and consumed bandwidth by relay flags</a> and <a href="/advbw-ipv6.html">Advertised bandwidth by IP version</a>, have in common that they show daily averages of advertised bandwidth.</p>

<p>In order to compute these averages, first match consensus entries with server descriptors by SHA-1 digest.
Every consensus entry references exactly one server descriptor, and a server descriptor may be referenced from an arbitrary number of consensus entries.
@@ -514,9 +513,7 @@ Relays self-report bandwidth histories as part of their extra-info descriptors,
<p>The following description applies to the following graphs:</p>

<ul>
<li>Total relay bandwidth (just the consumed bandwidth part; for the advertised bandwidth part <a href="#advertised-bandwidth">see above</a>) <a href="/bandwidth.html" class="btn btn-primary btn-xs"><i class="fa fa-chevron-right" aria-hidden="true"></i> graph</a></li>
<li>Advertised and consumed bandwidth by relay flag (just the consumed bandwidth part; for the advertised bandwidth part <a href="#advertised-bandwidth">see above</a>) <a href="/bandwidth-flags.html" class="btn btn-primary btn-xs"><i class="fa fa-chevron-right" aria-hidden="true"></i> graph</a></li>
<li>Consumed bandwidth by Exit/Guard flag combination <a href="/bwhist-flags.html" class="btn btn-primary btn-xs"><i class="fa fa-chevron-right" aria-hidden="true"></i> graph</a></li>
<li>Advertised and consumed bandwidth by relay flags (just the consumed bandwidth part; for the advertised bandwidth part <a href="#advertised-bandwidth">see above</a>) <a href="/bandwidth-flags.html" class="btn btn-primary btn-xs"><i class="fa fa-chevron-right" aria-hidden="true"></i> graph</a></li>
<li>Bandwidth spent on answering directory requests <a href="/dirbytes.html" class="btn btn-primary btn-xs"><i class="fa fa-chevron-right" aria-hidden="true"></i> graph</a></li>
</ul>

@@ -552,13 +549,13 @@ We consider a relay with the <code>"Guard"</code> flag as guard and a relay with

<h4>Step 3: Compute daily totals</h4>

<p>The first three graphs described here, namely <a href="/bandwidth.html">Total relay bandwidth</a>, <a href="/bandwidth-flags.html">Advertised and consumed bandwidth by relay flag</a>, and <a href="/bwhist-flags.html">Consumed bandwidth by Exit/Guard flag combination</a>, show daily totals of all bytes written or read by relays.
For all three graphs, we sum up all read and written bytes on a given day and divide the result by 2.
<p>The first graph described here, <a href="/bandwidth-flags.html">Advertised and consumed bandwidth by relay flags</a>, shows daily totals of all bytes written or read by relays.
For this graph, we sum up all read and written bytes on a given day and divide the result by 2.
However, we only include bandwidth histories for a given day if a relay was listed as running in a consensus at least once on that day.
We attribute bandwidth to guards and/or exits if a relay was a guard and/or exit at least in one consensus on a day.</p>

<p>The fourth graph, <a href="/dirbytes.html">Bandwidth spent on answering directory requests</a>, shows bytes spent by <a href="/glossary.html#directory-mirror">directory mirrors</a> on answering directory requests.
As opposed to the first three graphs, all bandwidth histories are included, regardless of whether a relay was listed as running in a consensus.
<p>The second graph, <a href="/dirbytes.html">Bandwidth spent on answering directory requests</a>, shows bytes spent by <a href="/glossary.html#directory-mirror">directory mirrors</a> on answering directory requests.
As opposed to the first graph, all bandwidth histories are included, regardless of whether a relay was listed as running in a consensus.
Also, we compute total read directory and total written directory bytes for this fourth graph, not an average of the two.</p>

<h3 id="connbidirect" class="hover">Connection usage
Loading