Commit e24155e1 authored by Ana Custura's avatar Ana Custura
Browse files

Merge branch 'task-33421' into 'develop'

Task 33421

See merge request tpo/metrics/onionperf!10
parents 15ff2bee b5a7dad4
......@@ -2,6 +2,12 @@
- Avoid tracebacks when visualizing analysis files containing only
unsuccessful measurements. Fixes #40012.
- Add support for processing GUARD events and including guard-related
information in the analysis. Bump the analysis version to 5.0 to
include optional guard metadata defined in a 'guards' field and
optional metadata on current guards defined in a 'current\_guards'
field. Add several new fields to the CSV output: guard\_country\_codes,
guards, guard, uses\_guard, guard\_index. Implements #33421.
# Changes in version 0.8 - 2020-09-16
......
......@@ -10,8 +10,8 @@ import os, re, json, datetime, logging
from abc import ABCMeta, abstractmethod
# stem imports
from stem import CircEvent, CircStatus, CircPurpose, StreamStatus
from stem.response.events import CircuitEvent, CircMinorEvent, StreamEvent, BuildTimeoutSetEvent
from stem import CircEvent, CircStatus, CircPurpose, StreamStatus, GuardStatus, GuardType
from stem.response.events import CircuitEvent, CircMinorEvent, StreamEvent, BuildTimeoutSetEvent, GuardEvent
from stem.response import ControlMessage, convert
# tgentools imports
......@@ -24,7 +24,7 @@ class OPAnalysis(Analysis):
def __init__(self, nickname=None, ip_address=None):
super().__init__(nickname, ip_address)
self.json_db = {'type': 'onionperf', 'version': '4.0', 'data': {}}
self.json_db = {'type': 'onionperf', 'version': '5.0', 'data': {}}
self.torctl_filepaths = []
def add_torctl_file(self, filepath):
......@@ -97,6 +97,12 @@ class OPAnalysis(Analysis):
except:
return None
def get_tor_guards(self, node):
try:
return self.json_db['data'][node]['tor']['guards']
except:
return None
def get_tor_circuits(self, node):
try:
return self.json_db['data'][node]['tor']['circuits']
......@@ -133,7 +139,7 @@ class OPAnalysis(Analysis):
if 'type' not in db or 'version' not in db:
logging.warning("'type' or 'version' not present in database")
return None
elif db['type'] != 'onionperf' or str(db['version']) >= '5.':
elif db['type'] != 'onionperf' or str(db['version']) >= '6.':
logging.warning("type or version not supported (type={0}, version={1})".format(db['type'], db['version']))
return None
else:
......@@ -223,12 +229,35 @@ class TorCircuit(object):
self.buildtime_seconds = None
self.build_timeout = None
self.build_quantile = None
self.current_guards = None
self.elapsed_seconds = []
self.path = []
def add_event(self, event, arrived_at):
self.elapsed_seconds.append([str(event), arrived_at])
def add_current_guards(self, guards):
g = [guard.get_data() for guard in guards]
self.current_guards = g
def add_hop(self, hop, arrived_at):
self.path.append(["${0}~{1}".format(hop[0], hop[1]), arrived_at])
def set_launched(self, unix_ts, build_timeout, build_quantile):
if self.unix_ts_start is None:
self.unix_ts_start = unix_ts
self.build_timeout = build_timeout
self.build_quantile = build_quantile
def set_end_time(self, unix_ts):
self.unix_ts_end = unix_ts
def set_local_failure(self, reason):
self.failure_reason_local = reason
def set_remote_failure(self, reason):
self.failure_reason_remote = reason
def add_hop(self, hop, arrived_at):
self.path.append(["${0}~{1}".format(hop[0], hop[1]), arrived_at])
......@@ -264,10 +293,7 @@ class TorCircuit(object):
else:
d['buildtime_seconds'] = self.buildtime_seconds - self.unix_ts_start
if len(d['path']) == 0: del(d['path'])
if d['failure_reason_local'] is None: del(d['failure_reason_local'])
if d['failure_reason_remote'] is None: del(d['failure_reason_remote'])
if d['build_timeout'] is None: del(d['build_timeout'])
if d['build_quantile'] is None: del(d['build_quantile'])
d = {k: v for k, v in d.items() if v is not None}
return d
def __str__(self):
......@@ -275,6 +301,21 @@ class TorCircuit(object):
(event, arrived_at) for (event, arrived_at) in
sorted(self.elapsed_seconds, key=lambda item: item[1])])))
class TorGuard(object):
def __init__(self, fingerprint, nickname, country=None):
self.fingerprint = fingerprint
self.nickname = nickname
self.country = country
self.new_ts = None
self.up_ts = None
self.down_ts = None
self.dropped_ts = None
def get_data(self):
d = self.__dict__
d = {k: v for k, v in d.items() if v is not None}
return d
class TorCtlParser(Parser):
def __init__(self, date_filter=None):
......@@ -283,6 +324,7 @@ class TorCtlParser(Parser):
self.circuits = {}
self.streams_state = {}
self.streams = {}
self.guards = []
self.name = None
self.boot_succeeded = False
self.build_timeout_last = None
......@@ -320,6 +362,12 @@ class TorCtlParser(Parser):
circ.add_event(key, arrival_dt)
if event.status == CircStatus.CLOSED or event.status == CircStatus.FAILED:
current_guards = []
for g in self.guards:
if g.up_ts and circ.unix_ts_start >= g.up_ts and (not g.dropped_ts or circ.unix_ts_start < g.dropped_ts) and (not g.down_ts or circ.unix_ts_start < g.down_ts):
current_guards.append(g)
if current_guards:
circ.add_current_guards(current_guards)
circ.set_end_time(arrival_dt)
started, built, ended = circ.unix_ts_start, circ.buildtime_seconds, circ.unix_ts_end
......@@ -371,6 +419,32 @@ class TorCtlParser(Parser):
self.build_timeout_last = event.timeout
self.build_quantile_last = event.quantile
def __handle_guard(self, event, arrival_dt):
if event.guard_type != GuardType.ENTRY:
return
fingerprint = event.endpoint_fingerprint
nickname = event.endpoint_nickname
guard = None
for g in reversed(self.guards):
if g.fingerprint == fingerprint:
guard = g
break
if guard is None or guard.dropped_ts is not None:
try:
country = util.get_country_by_fingerprint(fingerprint)
except IndexError:
pass
guard = TorGuard(fingerprint=fingerprint, nickname=nickname, country=country)
self.guards.append(guard)
if event.status == GuardStatus.NEW and guard.new_ts is None:
guard.new_ts = arrival_dt
elif event.status == GuardStatus.UP and guard.up_ts is None:
guard.up_ts = arrival_dt
elif event.status == GuardStatus.DOWN and guard.down_ts is None:
guard.down_ts = arrival_dt
elif event.status == GuardStatus.DROPPED and guard.dropped_ts is None:
guard.dropped_ts = arrival_dt
def __handle_event(self, event, arrival_dt):
if isinstance(event, (CircuitEvent, CircMinorEvent)):
self.__handle_circuit(event, arrival_dt)
......@@ -378,6 +452,8 @@ class TorCtlParser(Parser):
self.__handle_stream(event, arrival_dt)
elif isinstance(event, BuildTimeoutSetEvent):
self.__handle_buildtimeout(event, arrival_dt)
elif isinstance(event, GuardEvent):
self.__handle_guard(event, arrival_dt)
def __is_date_valid(self, date_to_check):
if self.date_filter is None:
......@@ -433,7 +509,8 @@ class TorCtlParser(Parser):
source.close()
def get_data(self):
return {'circuits': self.circuits, 'streams': self.streams}
return {'circuits': self.circuits, 'streams': self.streams,
'guards': [guard.get_data() for guard in self.guards]}
def get_name(self):
return self.name
......@@ -5,13 +5,17 @@
See LICENSE for licensing information
'''
import sys, os, socket, logging, random, re, shutil, datetime, urllib.request, urllib.parse, urllib.error, gzip, lzma
import sys, os, socket, logging, random, re, shutil, datetime, urllib.request, urllib.parse, urllib.error, gzip, lzma, requests
from threading import Lock
from io import StringIO
from abc import ABCMeta, abstractmethod
LINEFORMATS = "k-,r-,b-,g-,c-,m-,y-,k--,r--,b--,g--,c--,m--,y--,k:,r:,b:,g:,c:,m:,y:,k-.,r-.,b-.,g-.,c-.,m-.,y-."
def get_country_by_fingerprint(fingerprint):
r = requests.get(f"https://onionoo.torproject.org/details?search={fingerprint}")
return r.json()["relays"][0]["country"]
def make_dir_path(path):
p = os.path.abspath(os.path.expanduser(path))
if not os.path.exists(p):
......
......@@ -7,7 +7,7 @@
import matplotlib; matplotlib.use('Agg') # for systems without X11
from matplotlib.backends.backend_pdf import PdfPages
import time
import time, re
from abc import abstractmethod, ABCMeta
import matplotlib.pyplot as plt
import pandas as pd
......@@ -54,10 +54,26 @@ class TGenVisualization(Visualization):
def __extract_data_frame(self):
streams = []
for (analyses, label) in self.datasets:
tor_guards_by_client = {}
for analysis in analyses:
for client in analysis.get_nodes():
known_guards = tor_guards_by_client.setdefault(client, [])
for guard in analysis.get_tor_guards(client):
if "new_ts" not in guard:
_guard = None
for g in reversed(known_guards):
if g["fingerprint"] == guard["fingerprint"]:
_guard = g
break
if _guard and "dropped_ts" not in _guard:
_guard["dropped_ts"] = guard["dropped_ts"]
continue
known_guards.append(guard)
for analysis in analyses:
for client in analysis.get_nodes():
tor_streams_by_source_port = {}
tor_streams = analysis.get_tor_streams(client)
fingerprint_pattern = re.compile("\$?([0-9a-fA-F]{40})")
for tor_stream in tor_streams.values():
if "source" in tor_stream and ":" in tor_stream["source"]:
source_port = tor_stream["source"].split(":")[1]
......@@ -124,27 +140,43 @@ class TGenVisualization(Visualization):
unix_ts_end = transfer_data["unix_ts_end"]
if "unix_ts_start" in transfer_data:
stream["start"] = datetime.datetime.utcfromtimestamp(transfer_data["unix_ts_start"])
tor_stream = None
tor_circuit = None
circuit_id = None
if source_port and source_port in tor_streams_by_source_port and unix_ts_end:
for s in tor_streams_by_source_port[source_port]:
if abs(unix_ts_end - s["unix_ts_end"]) < 150.0:
tor_stream = s
break
if tor_stream and "circuit_id" in tor_stream:
circuit_id = tor_stream["circuit_id"]
if str(circuit_id) in tor_circuits:
tor_circuit = tor_circuits[circuit_id]
for tor_stream in tor_streams_by_source_port[source_port]:
if abs(unix_ts_end - tor_stream["unix_ts_end"]) < 150.0:
circuit_id = tor_stream["circuit_id"]
if circuit_id and str(circuit_id) in tor_circuits:
tor_circuit = tor_circuits[circuit_id]
guards = []
if client in tor_guards_by_client and "current_guards" in tor_circuit:
stream["guard_country_codes"] = [d["country"] if "country" in d else "N/A" for d in tor_circuit["current_guards"]]
guards = [d["fingerprint"] for d in tor_circuit["current_guards"]]
stream["guards"] = int(len(guards))
path = tor_circuit["path"]
if path:
long_name, _ = path[0]
fingerprint_match = fingerprint_pattern.match(long_name)
if fingerprint_match:
fingerprint = fingerprint_match.group(1).upper()
stream["guard"] = fingerprint
stream["uses_guard"] = fingerprint in guards
try:
stream["guard_index"] = guards.index(fingerprint)
except:
stream["guard_index"] = -1
if error_code:
if error_code == "PROXY":
error_code_parts = ["TOR"]
else:
error_code_parts = ["TGEN", error_code]
if tor_stream:
if "failure_reason_local" in tor_stream:
error_code_parts.append(tor_stream["failure_reason_local"])
if "failure_reason_remote" in tor_stream:
error_code_parts.append(tor_stream["failure_reason_remote"])
if source_port and source_port in tor_streams_by_source_port and unix_ts_end:
for tor_stream in tor_streams_by_source_port[source_port]:
if abs(unix_ts_end - tor_stream["unix_ts_end"]) < 150.0:
if "failure_reason_local" in tor_stream:
error_code_parts.append(tor_stream["failure_reason_local"])
if "failure_reason_remote" in tor_stream:
error_code_parts.append(tor_stream["failure_reason_remote"])
stream["error_code"] = "/".join(error_code_parts)
if "filters" in analysis.json_db.keys() and analysis.json_db["filters"]["tor/circuits"]:
......@@ -265,11 +297,12 @@ class TGenVisualization(Visualization):
data = data.rename(columns={hue: hue_name})
xmin = data[x].min()
xmax = data[x].max()
ymax = data[y].max()
ymin = float(data[y].min())
ymax = float(data[y].max())
g = sns.scatterplot(data=data, x=x, y=y, hue=hue_name, alpha=0.5)
g.set(title=title, xlabel=xlabel, ylabel=ylabel,
xlim=(xmin - 0.03 * (xmax - xmin), xmax + 0.03 * (xmax - xmin)),
ylim=(-0.05 * ymax, ymax * 1.05))
ylim=(ymin - 0.05 * (ymax - ymin), ymax + 0.05 * (ymax - ymin)))
plt.xticks(rotation=10)
sns.despine()
self.page.savefig()
......@@ -304,7 +337,9 @@ class TGenVisualization(Visualization):
plt.figure()
if hue is not None:
data = data.rename(columns={hue: hue_name})
g = sns.countplot(data=data, x=x, hue=hue_name)
if data.empty:
return
g = sns.countplot(data=data.dropna(subset=[x]), x=x, hue=hue_name)
g.set(xlabel=xlabel, ylabel=ylabel, title=title)
sns.despine()
self.page.savefig()
......@@ -316,6 +351,8 @@ class TGenVisualization(Visualization):
return
plt.figure()
data = data.rename(columns={hue: hue_name})
if data.empty:
return
xmin = data[x].min()
xmax = data[x].max()
g = sns.stripplot(data=data, x=x, y=y, hue=hue_name)
......
This diff is collapsed.
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment