Commit 1f90b723 authored by Karsten Loesing's avatar Karsten Loesing
Browse files

Make user graphs even faster.

In a91f2dc9 we split up clients.csv to make most client graphs faster
(except for clients by country and transport).  Turns out we can do
even better by using R's save() and load() functions.  Performance
gain as compared to pre-a91f2dc9:

 - userstats-relay-country: 3.0 seconds

 - userstats-bridge-country: 3.5 seconds

 - userstats-bridge-transport: 3.5 seconds

 - userstats-bridge-version: 3.5 seconds

 - userstats-bridge-combined: 2.7 seconds
parent eac64e3b
Loading
Loading
Loading
Loading
+11 −5
Original line number Diff line number Diff line
u <- read.csv("clients.csv", stringsAsFactors = FALSE)
write.csv(u[u$node == 'relay', names(u) != "node"], 'clients-relay.csv',
  quote = FALSE, row.names = FALSE, na = '')
write.csv(u[u$node == 'bridge', names(u) != "node"], 'clients-bridge.csv',
  quote = FALSE, row.names = FALSE, na = '')
dir.create("RData", showWarnings = FALSE)

c <- read.csv("clients.csv", stringsAsFactors = FALSE)
data <- c[c$node == 'relay', !(names(c) %in% c("node", "frac"))]
save(data, file = "RData/clients-relay.RData")
data <- c[c$node == 'bridge', !(names(c) %in% c("node", "frac"))]
save(data, file = "RData/clients-bridge.RData")

u <- read.csv("userstats-combined.csv", stringsAsFactors = FALSE)
data <- u[, !(names(u) %in% c("node", "version", "frac"))]
save(data, file = "RData/userstats-bridge-combined.RData")
+4 −4
Original line number Diff line number Diff line
@@ -21,13 +21,13 @@ python detector.py

echo `date` "Merging censorship detector results."
R --slave -f merge-clients.R > /dev/null 2>&1

echo `date` "Splitting results file."
R --slave -f split-clients.R > /dev/null 2>&1
mkdir -p stats/
cp clients*.csv stats/
cp clients.csv stats/
cp userstats-combined.csv stats/

echo `date` "Saving results as .RData files."
R --slave -f split-clients.R > /dev/null 2>&1

echo `date` "Terminating."

cd ../../
+3 −0
Original line number Diff line number Diff line
@@ -7,3 +7,6 @@ cp -a modules/hidserv/stats/hidserv.csv shared/stats/
cp -a modules/clients/stats/clients*.csv shared/stats/
cp -a modules/clients/stats/userstats-combined.csv shared/stats/

mkdir -p shared/RData
cp -a modules/clients/RData/*.RData shared/RData/
+8 −9
Original line number Diff line number Diff line
@@ -765,15 +765,15 @@ plot_bandwidth_flags <- function(start, end, path) {
plot_userstats <- function(start, end, node, variable, value, events,
                           path) {
  end <- min(end, as.character(Sys.Date() - 2))
  c <- read.csv(paste("/srv/metrics.torproject.org/metrics/shared/stats/",
                "clients-", node, ".csv", sep = ""),
                stringsAsFactors = FALSE)
  load(paste("/srv/metrics.torproject.org/metrics/shared/RData/clients-",
             node, ".RData", sep = ""))
  c <- data
  u <- c[c$date >= start & c$date <= end, ]
  u <- rbind(u, data.frame(date = start,
      country = ifelse(variable == 'country' & value != 'all', value, ''),
      transport = ifelse(variable == 'transport', value, ''),
      version = ifelse(variable == 'version', value, ''),
      lower = 0, upper = 0, clients = 0, frac = 0))
      lower = 0, upper = 0, clients = 0))
  if (node == 'relay') {
    if (value != 'all') {
      u <- u[u$country == value, ]
@@ -798,8 +798,7 @@ plot_userstats <- function(start, end, node, variable, value, events,
      u <- rbind(u, data.frame(date = n$date,
                               country = '', transport = '!<OR>',
                               version = '', lower = n$lower,
                               upper = n$upper, clients = n$clients,
                               frac = NA))
                               upper = n$upper, clients = n$clients))
    }
    if (length(value) > 1) {
      u <- u[u$transport %in% value, ]
@@ -926,9 +925,9 @@ plot_userstats_bridge_combined <- function(start, end, country, path) {
    top <- 3
    country <- ifelse(country == "all", NA, country)
    end <- min(end, as.character(Sys.Date() - 2))
    u <- read.csv(paste("/srv/metrics.torproject.org/metrics/shared/",
                        "stats/userstats-combined.csv", sep = ""),
                  stringsAsFactors = FALSE)
    load(paste("/srv/metrics.torproject.org/metrics/shared/RData/",
               "userstats-bridge-combined.RData", sep = ""))
    u <- data
    u <- u[u$date >= start & u$date <= end
           & (is.na(country) | u$country == country), ]
    a <- aggregate(list(mid = (u$high + u$low) / 2),