Commit 95b749a8 authored by Ana Custura's avatar Ana Custura

Move filters and filter metadata to analysis files

parent 9d0c8056
......@@ -7,6 +7,7 @@
import re
from onionperf.analysis import OPAnalysis
from collections import defaultdict
class Filtering(object):
......@@ -14,9 +15,11 @@ class Filtering(object):
self.fingerprints_to_include = None
self.fingerprints_to_exclude = None
self.fingerprint_pattern = re.compile("\$?([0-9a-fA-F]{40})")
self.filters = defaultdict(list)
def include_fingerprints(self, path):
self.fingerprints_to_include = []
self.fingerprints_to_include_path = path
with open(path, 'rt') as f:
for line in f:
fingerprint_match = self.fingerprint_pattern.match(line)
......@@ -26,6 +29,7 @@ class Filtering(object):
def exclude_fingerprints(self, path):
self.fingerprints_to_exclude = []
self.fingerprints_to_exclude_path = path
with open(path, 'rt') as f:
for line in f:
fingerprint_match = self.fingerprint_pattern.match(line)
......@@ -33,12 +37,16 @@ class Filtering(object):
fingerprint =
def apply_filters(self, input_path, output_dir, output_file):
self.analysis = OPAnalysis.load(filename=input_path)
def filter_tor_circuits(self, analysis):
if self.fingerprints_to_include is None and self.fingerprints_to_exclude is None:
for source in self.analysis.get_nodes():
tor_circuits = self.analysis.get_tor_circuits(source)
self.filters["tor/circuits"] = []
if self.fingerprints_to_include:
self.filters["tor/circuits"].append({"name": "include_fingerprints", "filepath": self.fingerprints_to_include_path })
if self.fingerprints_to_exclude:
self.filters["tor/circuits"].append({"name": "exclude_fingerprints", "filepath": self.fingerprints_to_exclude_path })
for source in analysis.get_nodes():
tor_circuits = analysis.get_tor_circuits(source)
filtered_circuit_ids = []
for circuit_id, tor_circuit in tor_circuits.items():
keep = False
......@@ -56,8 +64,11 @@ class Filtering(object):
keep = False
if not keep:
for circuit_id in filtered_circuit_ids:
tor_circuits[circuit_id]["filtered"] = True
def apply_filters(self, input_path, output_dir, output_file):
self.analysis = OPAnalysis.load(filename=input_path)
self.analysis.json_db["filters"] = self.filters, output_prefix=output_dir, sort_keys=False)
......@@ -342,13 +342,6 @@ files generated by this script will be written""",
action=PathStringArgsAction, dest="datasets")
help="""Include measurements without an existing mapping between TGen
transfers/streams and Tor streams/circuits, which is the
equivalent of an outer join in the database sense""",
action="store_true", dest="outer_join",
visualize_parser.add_argument('-p', '--prefix',
help="a STRING filename prefix for graphs we generate",
metavar="STRING", type=str,
......@@ -489,7 +482,7 @@ def visualize(args):
if analysis is not None:
tgen_viz.add_dataset(analyses, label)
tgen_viz.plot_all(args.prefix, outer_join=args.outer_join)
def type_nonnegative_integer(value):
i = int(value)
......@@ -31,11 +31,11 @@ class Visualization(object, metaclass=ABCMeta):
class TGenVisualization(Visualization):
def plot_all(self, output_prefix, outer_join=False):
def plot_all(self, output_prefix):
if len(self.datasets) > 0:
prefix = output_prefix + '.' if output_prefix is not None else ''
ts = time.strftime("%Y-%m-%d_%H:%M:%S")
self.__extract_data_frame()"{0}onionperf.viz.{1}.csv".format(prefix, ts))
sns.set_context("paper") = PdfPages("{0}onionperf.viz.{1}.pdf".format(prefix, ts))
......@@ -51,7 +51,7 @@ class TGenVisualization(Visualization):
def __extract_data_frame(self, outer_join=False):
def __extract_data_frame(self):
streams = []
for (analyses, label) in self.datasets:
for analysis in analyses:
......@@ -145,7 +145,11 @@ class TGenVisualization(Visualization):
if "failure_reason_remote" in tor_stream:
stream["error_code"] = "/".join(error_code_parts)
if tor_circuit or outer_join:
if "filters" in analysis.json_db.keys() and analysis.json_db["filters"]["tor/circuits"]:
if tor_circuit and "filtered" not in tor_circuit.keys():
streams.append(stream) = pd.DataFrame.from_records(streams, index="id")
