GitLab is used only for code review, issue tracking and project management. Canonical locations for source code are still https://gitweb.torproject.org/ https://git.torproject.org/ and git-rw.torproject.org.

filtering.py 3.26 KB
Newer Older
1 2 3 4 5 6 7 8 9
'''
  OnionPerf
  Authored by Rob Jansen, 2015
  Copyright 2015-2020 The Tor Project
  See LICENSE for licensing information
'''

import re
from onionperf.analysis import OPAnalysis
10
from collections import defaultdict
11 12 13 14 15 16 17

class Filtering(object):

    def __init__(self):
        self.fingerprints_to_include = None
        self.fingerprints_to_exclude = None
        self.fingerprint_pattern = re.compile("\$?([0-9a-fA-F]{40})")
18
        self.filters = defaultdict(list)
19 20 21

    def include_fingerprints(self, path):
        self.fingerprints_to_include = []
22
        self.fingerprints_to_include_path = path
23 24 25 26 27 28 29 30
        with open(path, 'rt') as f:
            for line in f:
                fingerprint_match = self.fingerprint_pattern.match(line)
                if fingerprint_match:
                    fingerprint = fingerprint_match.group(1).upper()
                    self.fingerprints_to_include.append(fingerprint)

    def exclude_fingerprints(self, path):
Ana Custura's avatar
Ana Custura committed
31
        self.fingerprints_to_exclude = []
32
        self.fingerprints_to_exclude_path = path
33 34 35 36 37
        with open(path, 'rt') as f:
            for line in f:
                fingerprint_match = self.fingerprint_pattern.match(line)
                if fingerprint_match:
                    fingerprint = fingerprint_match.group(1).upper()
Ana Custura's avatar
Ana Custura committed
38
                    self.fingerprints_to_exclude.append(fingerprint)
39

40
    def filter_tor_circuits(self, analysis):
41 42
        if self.fingerprints_to_include is None and self.fingerprints_to_exclude is None:
            return
43 44 45 46 47 48 49
        self.filters["tor/circuits"] = []
        if self.fingerprints_to_include:
           self.filters["tor/circuits"].append({"name": "include_fingerprints", "filepath": self.fingerprints_to_include_path })
        if self.fingerprints_to_exclude:
           self.filters["tor/circuits"].append({"name": "exclude_fingerprints", "filepath": self.fingerprints_to_exclude_path })
        for source in analysis.get_nodes():
            tor_circuits = analysis.get_tor_circuits(source)
50 51
            filtered_circuit_ids = []
            for circuit_id, tor_circuit in tor_circuits.items():
52
                keep = False
53
                if "path" in tor_circuit:
54 55 56 57 58 59
                    path = tor_circuit["path"]
                    keep = True
                    for long_name, _ in path:
                        fingerprint_match = self.fingerprint_pattern.match(long_name)
                        if fingerprint_match:
                            fingerprint = fingerprint_match.group(1).upper()
60
                            if self.fingerprints_to_include is not None and fingerprint not in self.fingerprints_to_include:
61 62
                                keep = False
                                break
63
                            if self.fingerprints_to_exclude is not None and fingerprint in self.fingerprints_to_exclude:
64 65
                                keep = False
                                break
66
                if not keep:
67 68 69 70 71 72
                    tor_circuits[circuit_id]["filtered"] = True

    def apply_filters(self, input_path, output_dir, output_file):
        self.analysis = OPAnalysis.load(filename=input_path)
        self.filter_tor_circuits(self.analysis)
        self.analysis.json_db["filters"] = self.filters
73
        self.analysis.save(filename=output_file, output_prefix=output_dir, sort_keys=False)
74