Commit 4165e3bf authored by Ana Custura's avatar Ana Custura
Browse files

Track which Guard is used for measurements.

Adds code to track guards on a per-circuit basis.
Adds country information per fingerprint.
Removes guard-related visualisations, to be
rewritten as per tpo/metrics/onionperf#40024.
Includes all Guard information in CSV file.
This has been rebased on the latest develop branch.

Implements tpo/metrics/onionperf#33421
parent 615fd5bf
......@@ -2,6 +2,12 @@
- Avoid tracebacks when visualizing analysis files containing only
unsuccessful measurements. Fixes #40012.
- Add support for processing GUARD events and including guard-related
information in the analysis. Bump the analysis version to 5.0 to
include optional guard metadata defined in a 'guards' field and
optional metadata on current guards defined in a 'current\_guards'
field. Add several new fields to the CSV output: guard\_country\_codes,
guards, guard, uses\_guard, guard\_index. Implements #33421.
# Changes in version 0.8 - 2020-09-16
......
......@@ -24,7 +24,7 @@ class OPAnalysis(Analysis):
def __init__(self, nickname=None, ip_address=None):
super().__init__(nickname, ip_address)
self.json_db = {'type': 'onionperf', 'version': '4.0', 'data': {}}
self.json_db = {'type': 'onionperf', 'version': '5.0', 'data': {}}
self.torctl_filepaths = []
def add_torctl_file(self, filepath):
......@@ -139,7 +139,7 @@ class OPAnalysis(Analysis):
if 'type' not in db or 'version' not in db:
logging.warning("'type' or 'version' not present in database")
return None
elif db['type'] != 'onionperf' or str(db['version']) >= '5.':
elif db['type'] != 'onionperf' or str(db['version']) >= '6.':
logging.warning("type or version not supported (type={0}, version={1})".format(db['type'], db['version']))
return None
else:
......@@ -229,12 +229,35 @@ class TorCircuit(object):
self.buildtime_seconds = None
self.build_timeout = None
self.build_quantile = None
self.current_guards = None
self.elapsed_seconds = []
self.path = []
def add_event(self, event, arrived_at):
self.elapsed_seconds.append([str(event), arrived_at])
def add_current_guards(self, guards):
g = [guard.get_data() for guard in guards]
self.current_guards = g
def add_hop(self, hop, arrived_at):
self.path.append(["${0}~{1}".format(hop[0], hop[1]), arrived_at])
def set_launched(self, unix_ts, build_timeout, build_quantile):
if self.unix_ts_start is None:
self.unix_ts_start = unix_ts
self.build_timeout = build_timeout
self.build_quantile = build_quantile
def set_end_time(self, unix_ts):
self.unix_ts_end = unix_ts
def set_local_failure(self, reason):
self.failure_reason_local = reason
def set_remote_failure(self, reason):
self.failure_reason_remote = reason
def add_hop(self, hop, arrived_at):
self.path.append(["${0}~{1}".format(hop[0], hop[1]), arrived_at])
......@@ -270,10 +293,7 @@ class TorCircuit(object):
else:
d['buildtime_seconds'] = self.buildtime_seconds - self.unix_ts_start
if len(d['path']) == 0: del(d['path'])
if d['failure_reason_local'] is None: del(d['failure_reason_local'])
if d['failure_reason_remote'] is None: del(d['failure_reason_remote'])
if d['build_timeout'] is None: del(d['build_timeout'])
if d['build_quantile'] is None: del(d['build_quantile'])
d = {k: v for k, v in d.items() if v is not None}
return d
def __str__(self):
......@@ -282,9 +302,10 @@ class TorCircuit(object):
sorted(self.elapsed_seconds, key=lambda item: item[1])])))
class TorGuard(object):
def __init__(self, fingerprint, nickname):
def __init__(self, fingerprint, nickname, country=None):
self.fingerprint = fingerprint
self.nickname = nickname
self.country = country
self.new_ts = None
self.up_ts = None
self.down_ts = None
......@@ -292,10 +313,7 @@ class TorGuard(object):
def get_data(self):
d = self.__dict__
if d['new_ts'] is None: del(d['new_ts'])
if d['up_ts'] is None: del(d['up_ts'])
if d['down_ts'] is None: del(d['down_ts'])
if d['dropped_ts'] is None: del(d['dropped_ts'])
d = {k: v for k, v in d.items() if v is not None}
return d
class TorCtlParser(Parser):
......@@ -344,6 +362,12 @@ class TorCtlParser(Parser):
circ.add_event(key, arrival_dt)
if event.status == CircStatus.CLOSED or event.status == CircStatus.FAILED:
current_guards = []
for g in self.guards:
if g.up_ts and circ.unix_ts_start >= g.up_ts and (not g.dropped_ts or circ.unix_ts_start < g.dropped_ts) and (not g.down_ts or circ.unix_ts_start < g.down_ts):
current_guards.append(g)
if current_guards:
circ.add_current_guards(current_guards)
circ.set_end_time(arrival_dt)
started, built, ended = circ.unix_ts_start, circ.buildtime_seconds, circ.unix_ts_end
......@@ -406,13 +430,16 @@ class TorCtlParser(Parser):
guard = g
break
if guard is None or guard.dropped_ts is not None:
guard = TorGuard(fingerprint=fingerprint, nickname=nickname)
try:
country = util.get_country_by_fingerprint(fingerprint)
except IndexError:
pass
guard = TorGuard(fingerprint=fingerprint, nickname=nickname, country=country)
self.guards.append(guard)
if event.status == GuardStatus.NEW and guard.new_ts is None:
guard.new_ts = arrival_dt
elif event.status == GuardStatus.UP and guard.up_ts is None:
guard.up_ts = arrival_dt
# TODO maybe this should be a list?
elif event.status == GuardStatus.DOWN and guard.down_ts is None:
guard.down_ts = arrival_dt
elif event.status == GuardStatus.DROPPED and guard.dropped_ts is None:
......
......@@ -5,13 +5,17 @@
See LICENSE for licensing information
'''
import sys, os, socket, logging, random, re, shutil, datetime, urllib.request, urllib.parse, urllib.error, gzip, lzma
import sys, os, socket, logging, random, re, shutil, datetime, urllib.request, urllib.parse, urllib.error, gzip, lzma, requests
from threading import Lock
from io import StringIO
from abc import ABCMeta, abstractmethod
LINEFORMATS = "k-,r-,b-,g-,c-,m-,y-,k--,r--,b--,g--,c--,m--,y--,k:,r:,b:,g:,c:,m:,y:,k-.,r-.,b-.,g-.,c-.,m-.,y-."
def get_country_by_fingerprint(fingerprint):
r = requests.get(f"https://onionoo.torproject.org/details?search={fingerprint}")
return r.json()["relays"][0]["country"]
def make_dir_path(path):
p = os.path.abspath(os.path.expanduser(path))
if not os.path.exists(p):
......
......@@ -49,9 +49,6 @@ class TGenVisualization(Visualization):
self.__plot_downloads_count()
self.__plot_errors_count()
self.__plot_errors_time()
self.__plot_guards_time()
self.__plot_uses_guards_time()
self.__plot_guard_index_time()
self.page.close()
def __extract_data_frame(self):
......@@ -144,18 +141,16 @@ class TGenVisualization(Visualization):
if "unix_ts_start" in transfer_data:
stream["start"] = datetime.datetime.utcfromtimestamp(transfer_data["unix_ts_start"])
tor_circuit = None
if source_port and unix_ts_end:
circuit_id = None
if source_port and source_port in tor_streams_by_source_port and unix_ts_end:
for tor_stream in tor_streams_by_source_port[source_port]:
if abs(unix_ts_end - tor_stream["unix_ts_end"]) < 150.0:
circuit_id = tor_stream["circuit_id"]
if circuit_id and str(circuit_id) in tor_circuits:
tor_circuit = tor_circuits[circuit_id]
if client in tor_guards_by_client:
guards = []
for guard in tor_guards_by_client[client]:
if "up_ts" in guard and tor_circuit["unix_ts_start"] >= guard["up_ts"] and \
("dropped_ts" not in guard or tor_circuit["unix_ts_start"] < guard["dropped_ts"]):
guards.append(guard["fingerprint"])
if client in tor_guards_by_client and "current_guards" in tor_circuit:
stream["guard_country_codes"] = [d["country"] if "country" in d else "N/A" for d in tor_circuit["current_guards"]]
guards = [d["fingerprint"] for d in tor_circuit["current_guards"]]
stream["guards"] = int(len(guards))
path = tor_circuit["path"]
if path:
......@@ -174,11 +169,13 @@ class TGenVisualization(Visualization):
error_code_parts = ["TOR"]
else:
error_code_parts = ["TGEN", error_code]
if tor_stream:
if "failure_reason_local" in tor_stream:
error_code_parts.append(tor_stream["failure_reason_local"])
if "failure_reason_remote" in tor_stream:
error_code_parts.append(tor_stream["failure_reason_remote"])
if source_port and source_port in tor_streams_by_source_port and unix_ts_end:
for tor_stream in tor_streams_by_source_port[source_port]:
if abs(unix_ts_end - tor_stream["unix_ts_end"]) < 150.0:
if "failure_reason_local" in tor_stream:
error_code_parts.append(tor_stream["failure_reason_local"])
if "failure_reason_remote" in tor_stream:
error_code_parts.append(tor_stream["failure_reason_remote"])
stream["error_code"] = "/".join(error_code_parts)
if "filters" in analysis.json_db.keys() and analysis.json_db["filters"]["tor/circuits"]:
......@@ -266,27 +263,6 @@ class TGenVisualization(Visualization):
xlabel="Download start time", ylabel="Error code",
title="Downloads failed over time from {0} service".format(server))
def __plot_guards_time(self):
if self.data["guards"].count() > 0:
self.__draw_timeplot(x="start", y="guards", hue="label", hue_name="Data set",
data=self.data,
xlabel="Download start time", ylabel="Guards",
title="Number of guards over time")
def __plot_guard_index_time(self):
if self.data["guard_index"].count() > 0:
self.__draw_timeplot(x="start", y="guard_index", hue="label", hue_name="Data set",
data=self.data,
xlabel="Download start time", ylabel="Guard index",
title="Guard index over time")
def __plot_uses_guards_time(self):
if self.data["uses_guard"].count() > 0:
self.__draw_timeplot(x="start", y="uses_guard", hue="label", hue_name="Data set",
data=self.data,
xlabel="Download start time", ylabel="Guard usage",
title="Guard usage over time")
def __draw_ecdf(self, x, hue, hue_name, data, title, xlabel, ylabel):
data = data.dropna(subset=[x])
if data.empty:
......@@ -320,8 +296,8 @@ class TGenVisualization(Visualization):
data = data.rename(columns={hue: hue_name})
xmin = data[x].min()
xmax = data[x].max()
ymin = data[y].min()
ymax = data[y].max()
ymin = float(data[y].min())
ymax = float(data[y].max())
g = sns.scatterplot(data=data, x=x, y=y, hue=hue_name, alpha=0.5)
g.set(title=title, xlabel=xlabel, ylabel=ylabel,
xlim=(xmin - 0.03 * (xmax - xmin), xmax + 0.03 * (xmax - xmin)),
......@@ -360,7 +336,9 @@ class TGenVisualization(Visualization):
plt.figure()
if hue is not None:
data = data.rename(columns={hue: hue_name})
g = sns.countplot(data=data, x=x, hue=hue_name)
if data.empty:
return
g = sns.countplot(data=data.dropna(subset=[x]), x=x, hue=hue_name)
g.set(xlabel=xlabel, ylabel=ylabel, title=title)
sns.despine()
self.page.savefig()
......@@ -372,6 +350,8 @@ class TGenVisualization(Visualization):
return
plt.figure()
data = data.rename(columns={hue: hue_name})
if data.empty:
return
xmin = data[x].min()
xmax = data[x].max()
g = sns.stripplot(data=data, x=x, y=y, hue=hue_name)
......
This diff is collapsed.
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment