GitLab is used only for code review, issue tracking and project management. Canonical locations for source code are still https://gitweb.torproject.org/ https://git.torproject.org/ and git-rw.torproject.org.

Commit 95b749a8 authored by Ana Custura's avatar Ana Custura

Move filters and filter metadata to analysis files

parent 9d0c8056
......@@ -7,6 +7,7 @@
import re
from onionperf.analysis import OPAnalysis
from collections import defaultdict
class Filtering(object):
......@@ -14,9 +15,11 @@ class Filtering(object):
self.fingerprints_to_include = None
self.fingerprints_to_exclude = None
self.fingerprint_pattern = re.compile("\$?([0-9a-fA-F]{40})")
self.filters = defaultdict(list)
def include_fingerprints(self, path):
self.fingerprints_to_include = []
self.fingerprints_to_include_path = path
with open(path, 'rt') as f:
for line in f:
fingerprint_match = self.fingerprint_pattern.match(line)
......@@ -26,6 +29,7 @@ class Filtering(object):
def exclude_fingerprints(self, path):
self.fingerprints_to_exclude = []
self.fingerprints_to_exclude_path = path
with open(path, 'rt') as f:
for line in f:
fingerprint_match = self.fingerprint_pattern.match(line)
......@@ -33,12 +37,16 @@ class Filtering(object):
fingerprint = fingerprint_match.group(1).upper()
self.fingerprints_to_exclude.append(fingerprint)
def apply_filters(self, input_path, output_dir, output_file):
self.analysis = OPAnalysis.load(filename=input_path)
def filter_tor_circuits(self, analysis):
if self.fingerprints_to_include is None and self.fingerprints_to_exclude is None:
return
for source in self.analysis.get_nodes():
tor_circuits = self.analysis.get_tor_circuits(source)
self.filters["tor/circuits"] = []
if self.fingerprints_to_include:
self.filters["tor/circuits"].append({"name": "include_fingerprints", "filepath": self.fingerprints_to_include_path })
if self.fingerprints_to_exclude:
self.filters["tor/circuits"].append({"name": "exclude_fingerprints", "filepath": self.fingerprints_to_exclude_path })
for source in analysis.get_nodes():
tor_circuits = analysis.get_tor_circuits(source)
filtered_circuit_ids = []
for circuit_id, tor_circuit in tor_circuits.items():
keep = False
......@@ -56,8 +64,11 @@ class Filtering(object):
keep = False
break
if not keep:
filtered_circuit_ids.append(circuit_id)
for circuit_id in filtered_circuit_ids:
del(tor_circuits[circuit_id])
tor_circuits[circuit_id]["filtered"] = True
def apply_filters(self, input_path, output_dir, output_file):
self.analysis = OPAnalysis.load(filename=input_path)
self.filter_tor_circuits(self.analysis)
self.analysis.json_db["filters"] = self.filters
self.analysis.save(filename=output_file, output_prefix=output_dir, sort_keys=False)
......@@ -342,13 +342,6 @@ files generated by this script will be written""",
required="True",
action=PathStringArgsAction, dest="datasets")
visualize_parser.add_argument('--outer-join',
help="""Include measurements without an existing mapping between TGen
transfers/streams and Tor streams/circuits, which is the
equivalent of an outer join in the database sense""",
action="store_true", dest="outer_join",
default=False)
visualize_parser.add_argument('-p', '--prefix',
help="a STRING filename prefix for graphs we generate",
metavar="STRING", type=str,
......@@ -489,7 +482,7 @@ def visualize(args):
if analysis is not None:
analyses.append(analysis)
tgen_viz.add_dataset(analyses, label)
tgen_viz.plot_all(args.prefix, outer_join=args.outer_join)
tgen_viz.plot_all(args.prefix)
def type_nonnegative_integer(value):
i = int(value)
......
......@@ -31,11 +31,11 @@ class Visualization(object, metaclass=ABCMeta):
class TGenVisualization(Visualization):
def plot_all(self, output_prefix, outer_join=False):
def plot_all(self, output_prefix):
if len(self.datasets) > 0:
prefix = output_prefix + '.' if output_prefix is not None else ''
ts = time.strftime("%Y-%m-%d_%H:%M:%S")
self.__extract_data_frame(outer_join)
self.__extract_data_frame()
self.data.to_csv("{0}onionperf.viz.{1}.csv".format(prefix, ts))
sns.set_context("paper")
self.page = PdfPages("{0}onionperf.viz.{1}.pdf".format(prefix, ts))
......@@ -51,7 +51,7 @@ class TGenVisualization(Visualization):
self.__plot_errors_time()
self.page.close()
def __extract_data_frame(self, outer_join=False):
def __extract_data_frame(self):
streams = []
for (analyses, label) in self.datasets:
for analysis in analyses:
......@@ -145,8 +145,12 @@ class TGenVisualization(Visualization):
if "failure_reason_remote" in tor_stream:
error_code_parts.append(tor_stream["failure_reason_remote"])
stream["error_code"] = "/".join(error_code_parts)
if tor_circuit or outer_join:
streams.append(stream)
if "filters" in analysis.json_db.keys() and analysis.json_db["filters"]["tor/circuits"]:
if tor_circuit and "filtered" not in tor_circuit.keys():
streams.append(stream)
else:
streams.append(stream)
self.data = pd.DataFrame.from_records(streams, index="id")
def __plot_firstbyte_ecdf(self):
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment