Add scripts for comparing v3bw files

0d6ec2d9 · Matt Traudt · f8b895ca · 0d6ec2d9 · 0d6ec2d9
Commit 0d6ec2d9 authored 7 years ago by Matt Traudt
--- a/scripts/tools/plot-v3bw-xy.py
+++ b/scripts/tools/plot-v3bw-xy.py
+#!/usr/bin/env python3
+# File: plot-v3bw-xy.py
+# Author: Matt Traudt
+# License: CC0
+#
+# Requires matplotlib; pip install matplotlib
+from argparse import ArgumentDefaultsHelpFormatter, ArgumentParser
+import matplotlib; matplotlib.use('Agg')  # noqa; for systems without X11
+from matplotlib.backends.backend_pdf import PdfPages
+import pylab as plt
+
+plt.rcParams.update({
+    'axes.grid': True,
+})
+
+
+def get_all_values_from_fd(fd):
+    values = []
+    for line in fd:
+        try:
+            x, y = line.strip().split()
+            x, y = str(x), float(y)
+            assert len(x) == 40
+            values.append((x, y))
+        except ValueError:
+            print('ignoring', line)
+            continue
+    return values
+
+
+def common_elements(l1, l2):
+    ret = set()
+    for item in l1:
+        if item in l2:
+            ret.add(item)
+    return ret
+
+
+def main(args, pdf):
+    plt.figure()
+    data = {}
+    # Read all data in
+    all_labels = []
+    for fname, label in args.input:
+        with open(fname, 'rt') as fd:
+            data[label] = {
+                'label': label,
+                'data': get_all_values_from_fd(fd)
+            }
+            all_labels.append(label)
+    # Determine what relay fingerprints have data from all input sources
+    common_fingerprints = None
+    for label1 in data:
+        fp_list1 = set([point[0] for point in data[label1]['data']])
+        for label2 in data:
+            if label2 == label1:
+                continue
+            fp_list2 = set([point[0] for point in data[label2]['data']])
+            fp_list1 = common_elements(fp_list1, fp_list2)
+        common_fingerprints = fp_list1
+        break
+    # Remove unneeded data, then
+    # sort by fingerprint
+    for label in data:
+        points = [p for p in data[label]['data']
+                  if p[0] in common_fingerprints]
+        points = sorted(points, key=lambda p: p[0])
+        data[label]['data'] = points
+    # combine the y values for each fingerprint
+    # {
+    #    'fp1': {'label1': 10, 'label2': 30},
+    #    'fp2': {'label1': 20, 'label2': 15},
+    # }
+    # and change dict data's structure to that
+    new_data = {}
+    for fp in common_fingerprints:
+        new_data[fp] = {}
+        for label in data:
+            y = [p[1] for p in data[label]['data'] if p[0] == fp]
+            assert len(y) == 1
+            y = y[0]
+            new_data[fp].update({label: y})
+    data = new_data
+    sort_label = all_labels[0]
+    # Sort the data points such that sort_label's highest value is first.
+    # Assuming sort_label is label1, then turn into this list
+    # [
+    #    {'label1': 20, 'label2': 15},
+    #    {'label1': 10, 'label2': 30},
+    # ]
+    # and change dict data's structure to that
+    new_data = []
+    for fp in data:
+        new_data.append(data[fp])
+    new_data = sorted(new_data, key=lambda k: k[sort_label], reverse=True)
+    data = new_data
+    # Plot data
+    for label in all_labels:
+        x = []
+        y = []
+        for i, point in enumerate(data):
+            x.append(i)
+            y.append(point[label] / 1000)
+        plt.scatter(x, y, s=args.size, label=label)
+    plt.legend(loc='upper right')
+    plt.xlabel(args.xlabel)
+    plt.ylabel(args.ylabel)
+    plt.title(args.title)
+    pdf.savefig()
+
+
+if __name__ == '__main__':
+    d = 'Takes one or more lists of (fingerprint, bandwidth) points, 1 per '\
+        'line, and plots a scatter plot of them. Data points are sorted by '\
+        'the first input\'s bandwidth values, thus this script can be used '\
+        'to visually determine how similar the results are from various '\
+        'instances of a bandwidth scanner, or even across different '\
+        'bandwidth scanning tools.'
+    parser = ArgumentParser(
+        formatter_class=ArgumentDefaultsHelpFormatter, description=d)
+    parser.add_argument(
+        '-i', '--input', nargs=2, metavar=('FNAME', 'LABEL'),
+        action='append', help='Specify a file to read values from and what '
+        'to label its points in the PDF. Can be given more than once.')
+    parser.add_argument('-o', '--output', default='temp.pdf')
+    parser.add_argument('-x', '--xlabel', type=str, default='Relay #',
+                        help='What to label the X axis in the PDF')
+    parser.add_argument('-y', '--ylabel', type=str,
+                        default='"Bandwidth" units (thousands)',
+                        help='What to label the Y axis in the PDF')
+    parser.add_argument('-t', '--title', type=str,
+                        default='Correlation of various bwscanning systems',
+                        help='What to title the plot in the PDF')
+    parser.add_argument('-s', '--size', type=float, default=1,
+                        help='Size of scatter plot points')
+    args = parser.parse_args()
+    with PdfPages(args.output) as pdf:
+        exit(main(args, pdf))
--- a/scripts/tools/v3bw-into-xy.sh
+++ b/scripts/tools/v3bw-into-xy.sh
+#!/usr/bin/env bash
+# File: v3bw-into-xy.sh
+# Author: Matt Traudt
+# License: CC0
+#
+# Takes one or more v3bw files as arguments.
+#
+# Looks for lines that contain actual data. That means most of them, since most
+# of them start with "node_id=" and those are the ones that are interesting.
+#
+# Extract the fingerprint and bandwidth values for each of those lines and put
+# them on stdout, one per line. Effectively, after ignoring other lines, this:
+#     node_id=$AAAA...AAAA bw=12345
+# becomes this:
+#     AAAA...AAAA 12345
+#
+# NOTE: If you specify more than v3bw file, this will do NOTHING to tell you
+# when the output from one file stops and the next begins
+set -e
+while [ "$1" != "" ]
+do
+    grep '^node_id=' "$1" |
+	    sed -r 's|^node_id=([$A-Z0-9]+) bw=([0-9]+).*$|\1 \2|' |
+	    sed 's|\$||g'
+    shift
+done