GitLab is used only for code review, issue tracking and project management. Canonical locations for source code are still and

Commit 95b749a8 authored by Ana Custura's avatar Ana Custura

Move filters and filter metadata to analysis files

parent 9d0c8056
......@@ -7,6 +7,7 @@
import re
from onionperf.analysis import OPAnalysis
from collections import defaultdict
class Filtering(object):
......@@ -14,9 +15,11 @@ class Filtering(object):
self.fingerprints_to_include = None
self.fingerprints_to_exclude = None
self.fingerprint_pattern = re.compile("\$?([0-9a-fA-F]{40})")
self.filters = defaultdict(list)
def include_fingerprints(self, path):
self.fingerprints_to_include = []
self.fingerprints_to_include_path = path
with open(path, 'rt') as f:
for line in f:
fingerprint_match = self.fingerprint_pattern.match(line)
......@@ -26,6 +29,7 @@ class Filtering(object):
def exclude_fingerprints(self, path):
self.fingerprints_to_exclude = []
self.fingerprints_to_exclude_path = path
with open(path, 'rt') as f:
for line in f:
fingerprint_match = self.fingerprint_pattern.match(line)
......@@ -33,12 +37,16 @@ class Filtering(object):
fingerprint =
def apply_filters(self, input_path, output_dir, output_file):
self.analysis = OPAnalysis.load(filename=input_path)
def filter_tor_circuits(self, analysis):
if self.fingerprints_to_include is None and self.fingerprints_to_exclude is None:
for source in self.analysis.get_nodes():
tor_circuits = self.analysis.get_tor_circuits(source)
self.filters["tor/circuits"] = []
if self.fingerprints_to_include:
self.filters["tor/circuits"].append({"name": "include_fingerprints", "filepath": self.fingerprints_to_include_path })
if self.fingerprints_to_exclude:
self.filters["tor/circuits"].append({"name": "exclude_fingerprints", "filepath": self.fingerprints_to_exclude_path })
for source in analysis.get_nodes():
tor_circuits = analysis.get_tor_circuits(source)
filtered_circuit_ids = []
for circuit_id, tor_circuit in tor_circuits.items():
keep = False
......@@ -56,8 +64,11 @@ class Filtering(object):
keep = False
if not keep:
for circuit_id in filtered_circuit_ids:
tor_circuits[circuit_id]["filtered"] = True
def apply_filters(self, input_path, output_dir, output_file):
self.analysis = OPAnalysis.load(filename=input_path)
self.analysis.json_db["filters"] = self.filters, output_prefix=output_dir, sort_keys=False)
......@@ -342,13 +342,6 @@ files generated by this script will be written""",
action=PathStringArgsAction, dest="datasets")
help="""Include measurements without an existing mapping between TGen
transfers/streams and Tor streams/circuits, which is the
equivalent of an outer join in the database sense""",
action="store_true", dest="outer_join",
visualize_parser.add_argument('-p', '--prefix',
help="a STRING filename prefix for graphs we generate",
metavar="STRING", type=str,
......@@ -489,7 +482,7 @@ def visualize(args):
if analysis is not None:
tgen_viz.add_dataset(analyses, label)
tgen_viz.plot_all(args.prefix, outer_join=args.outer_join)
def type_nonnegative_integer(value):
i = int(value)
......@@ -31,11 +31,11 @@ class Visualization(object, metaclass=ABCMeta):
class TGenVisualization(Visualization):
def plot_all(self, output_prefix, outer_join=False):
def plot_all(self, output_prefix):
if len(self.datasets) > 0:
prefix = output_prefix + '.' if output_prefix is not None else ''
ts = time.strftime("%Y-%m-%d_%H:%M:%S")
self.__extract_data_frame()"{0}onionperf.viz.{1}.csv".format(prefix, ts))
sns.set_context("paper") = PdfPages("{0}onionperf.viz.{1}.pdf".format(prefix, ts))
......@@ -51,7 +51,7 @@ class TGenVisualization(Visualization):
def __extract_data_frame(self, outer_join=False):
def __extract_data_frame(self):
streams = []
for (analyses, label) in self.datasets:
for analysis in analyses:
......@@ -145,7 +145,11 @@ class TGenVisualization(Visualization):
if "failure_reason_remote" in tor_stream:
stream["error_code"] = "/".join(error_code_parts)
if tor_circuit or outer_join:
if "filters" in analysis.json_db.keys() and analysis.json_db["filters"]["tor/circuits"]:
if tor_circuit and "filtered" not in tor_circuit.keys():
streams.append(stream) = pd.DataFrame.from_records(streams, index="id")
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment