Commit 95b749a8 authored by Ana Custura's avatar Ana Custura
Browse files

Move filters and filter metadata to analysis files

parent 9d0c8056
...@@ -7,6 +7,7 @@ ...@@ -7,6 +7,7 @@
import re import re
from onionperf.analysis import OPAnalysis from onionperf.analysis import OPAnalysis
from collections import defaultdict
class Filtering(object): class Filtering(object):
...@@ -14,9 +15,11 @@ class Filtering(object): ...@@ -14,9 +15,11 @@ class Filtering(object):
self.fingerprints_to_include = None self.fingerprints_to_include = None
self.fingerprints_to_exclude = None self.fingerprints_to_exclude = None
self.fingerprint_pattern = re.compile("\$?([0-9a-fA-F]{40})") self.fingerprint_pattern = re.compile("\$?([0-9a-fA-F]{40})")
self.filters = defaultdict(list)
def include_fingerprints(self, path): def include_fingerprints(self, path):
self.fingerprints_to_include = [] self.fingerprints_to_include = []
self.fingerprints_to_include_path = path
with open(path, 'rt') as f: with open(path, 'rt') as f:
for line in f: for line in f:
fingerprint_match = self.fingerprint_pattern.match(line) fingerprint_match = self.fingerprint_pattern.match(line)
...@@ -26,6 +29,7 @@ class Filtering(object): ...@@ -26,6 +29,7 @@ class Filtering(object):
def exclude_fingerprints(self, path): def exclude_fingerprints(self, path):
self.fingerprints_to_exclude = [] self.fingerprints_to_exclude = []
self.fingerprints_to_exclude_path = path
with open(path, 'rt') as f: with open(path, 'rt') as f:
for line in f: for line in f:
fingerprint_match = self.fingerprint_pattern.match(line) fingerprint_match = self.fingerprint_pattern.match(line)
...@@ -33,12 +37,16 @@ class Filtering(object): ...@@ -33,12 +37,16 @@ class Filtering(object):
fingerprint = fingerprint_match.group(1).upper() fingerprint = fingerprint_match.group(1).upper()
self.fingerprints_to_exclude.append(fingerprint) self.fingerprints_to_exclude.append(fingerprint)
def apply_filters(self, input_path, output_dir, output_file): def filter_tor_circuits(self, analysis):
self.analysis = OPAnalysis.load(filename=input_path)
if self.fingerprints_to_include is None and self.fingerprints_to_exclude is None: if self.fingerprints_to_include is None and self.fingerprints_to_exclude is None:
return return
for source in self.analysis.get_nodes(): self.filters["tor/circuits"] = []
tor_circuits = self.analysis.get_tor_circuits(source) if self.fingerprints_to_include:
self.filters["tor/circuits"].append({"name": "include_fingerprints", "filepath": self.fingerprints_to_include_path })
if self.fingerprints_to_exclude:
self.filters["tor/circuits"].append({"name": "exclude_fingerprints", "filepath": self.fingerprints_to_exclude_path })
for source in analysis.get_nodes():
tor_circuits = analysis.get_tor_circuits(source)
filtered_circuit_ids = [] filtered_circuit_ids = []
for circuit_id, tor_circuit in tor_circuits.items(): for circuit_id, tor_circuit in tor_circuits.items():
keep = False keep = False
...@@ -56,8 +64,11 @@ class Filtering(object): ...@@ -56,8 +64,11 @@ class Filtering(object):
keep = False keep = False
break break
if not keep: if not keep:
filtered_circuit_ids.append(circuit_id) tor_circuits[circuit_id]["filtered"] = True
for circuit_id in filtered_circuit_ids:
del(tor_circuits[circuit_id]) def apply_filters(self, input_path, output_dir, output_file):
self.analysis = OPAnalysis.load(filename=input_path)
self.filter_tor_circuits(self.analysis)
self.analysis.json_db["filters"] = self.filters
self.analysis.save(filename=output_file, output_prefix=output_dir, sort_keys=False) self.analysis.save(filename=output_file, output_prefix=output_dir, sort_keys=False)
...@@ -342,13 +342,6 @@ files generated by this script will be written""", ...@@ -342,13 +342,6 @@ files generated by this script will be written""",
required="True", required="True",
action=PathStringArgsAction, dest="datasets") action=PathStringArgsAction, dest="datasets")
visualize_parser.add_argument('--outer-join',
help="""Include measurements without an existing mapping between TGen
transfers/streams and Tor streams/circuits, which is the
equivalent of an outer join in the database sense""",
action="store_true", dest="outer_join",
default=False)
visualize_parser.add_argument('-p', '--prefix', visualize_parser.add_argument('-p', '--prefix',
help="a STRING filename prefix for graphs we generate", help="a STRING filename prefix for graphs we generate",
metavar="STRING", type=str, metavar="STRING", type=str,
...@@ -489,7 +482,7 @@ def visualize(args): ...@@ -489,7 +482,7 @@ def visualize(args):
if analysis is not None: if analysis is not None:
analyses.append(analysis) analyses.append(analysis)
tgen_viz.add_dataset(analyses, label) tgen_viz.add_dataset(analyses, label)
tgen_viz.plot_all(args.prefix, outer_join=args.outer_join) tgen_viz.plot_all(args.prefix)
def type_nonnegative_integer(value): def type_nonnegative_integer(value):
i = int(value) i = int(value)
......
...@@ -31,11 +31,11 @@ class Visualization(object, metaclass=ABCMeta): ...@@ -31,11 +31,11 @@ class Visualization(object, metaclass=ABCMeta):
class TGenVisualization(Visualization): class TGenVisualization(Visualization):
def plot_all(self, output_prefix, outer_join=False): def plot_all(self, output_prefix):
if len(self.datasets) > 0: if len(self.datasets) > 0:
prefix = output_prefix + '.' if output_prefix is not None else '' prefix = output_prefix + '.' if output_prefix is not None else ''
ts = time.strftime("%Y-%m-%d_%H:%M:%S") ts = time.strftime("%Y-%m-%d_%H:%M:%S")
self.__extract_data_frame(outer_join) self.__extract_data_frame()
self.data.to_csv("{0}onionperf.viz.{1}.csv".format(prefix, ts)) self.data.to_csv("{0}onionperf.viz.{1}.csv".format(prefix, ts))
sns.set_context("paper") sns.set_context("paper")
self.page = PdfPages("{0}onionperf.viz.{1}.pdf".format(prefix, ts)) self.page = PdfPages("{0}onionperf.viz.{1}.pdf".format(prefix, ts))
...@@ -51,7 +51,7 @@ class TGenVisualization(Visualization): ...@@ -51,7 +51,7 @@ class TGenVisualization(Visualization):
self.__plot_errors_time() self.__plot_errors_time()
self.page.close() self.page.close()
def __extract_data_frame(self, outer_join=False): def __extract_data_frame(self):
streams = [] streams = []
for (analyses, label) in self.datasets: for (analyses, label) in self.datasets:
for analysis in analyses: for analysis in analyses:
...@@ -145,8 +145,12 @@ class TGenVisualization(Visualization): ...@@ -145,8 +145,12 @@ class TGenVisualization(Visualization):
if "failure_reason_remote" in tor_stream: if "failure_reason_remote" in tor_stream:
error_code_parts.append(tor_stream["failure_reason_remote"]) error_code_parts.append(tor_stream["failure_reason_remote"])
stream["error_code"] = "/".join(error_code_parts) stream["error_code"] = "/".join(error_code_parts)
if tor_circuit or outer_join:
streams.append(stream) if "filters" in analysis.json_db.keys() and analysis.json_db["filters"]["tor/circuits"]:
if tor_circuit and "filtered" not in tor_circuit.keys():
streams.append(stream)
else:
streams.append(stream)
self.data = pd.DataFrame.from_records(streams, index="id") self.data = pd.DataFrame.from_records(streams, index="id")
def __plot_firstbyte_ecdf(self): def __plot_firstbyte_ecdf(self):
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment