Commit a9ad5ba0 authored by Damian Johnson's avatar Damian Johnson
Browse files

Parse BandwidthFile body

Huh. That was easy. Our spec is pretty sparse on what our body includes,
mandating that each line is a series of key=value pairs and includes a
'node_id' but not much beyond that.

Minimal specificity limits what our parser can provide, but also grants sbws
flexability and makes my work dead easy. Body content is vended to users as a
mapping of relay fingerprints to measurement metadata without any additional
processing (no mandatory fields, type casting, etc).
parent 3dcc573e
Loading
Loading
Loading
Loading
+42 −5
Original line number Diff line number Diff line
@@ -19,7 +19,10 @@ import time

import stem.util.str_tools

from stem.descriptor import Descriptor
from stem.descriptor import (
  _mappings_for,
  Descriptor,
)

HEADER_DIV = '====='

@@ -122,10 +125,46 @@ def _parse_timestamp(descriptor, entries):
    raise ValueError("First line should be a unix timestamp, but was '%s'" % first_line)


def _parse_body(descriptor, entries):
  # In version 1.0.0 the body is everything after the first line. Otherwise
  # it's everything after the header's divider.

  div = '\n' if descriptor.version == '1.0.0' else HEADER_DIV

  if div in str(descriptor):
    body = str(descriptor).split(div, 1)[1].strip()
  else:
    body = ''

  measurements = {}

  if body:
    for line in body.split('\n'):
      attr = dict(_mappings_for('measurement', line))

      if 'node_id' not in attr:
        raise ValueError("Every meaurement must include 'node_id': %s" % line)
      elif attr['node_id'] in measurements:
        # Relay is listed multiple times. This is a bug for the bandwidth
        # authority that made this descriptor, but according to the spec
        # should be ignored by parsers.

        continue

      fingerprint = attr['node_id'].lstrip('$')  # bwauths prefix fingerprints with '$'
      measurements[fingerprint] = attr

  descriptor.measurements = measurements


class BandwidthFile(Descriptor):
  """
  Tor bandwidth authroity measurements.

  :var dict measurements: **\*** mapping of relay fingerprints to their
    bandwidth measurement metadata

  :var dict header: **\*** header metadata
  :var datetime timestamp: **\*** time when these metrics were published
  :var str version: **\*** document format version

@@ -143,8 +182,6 @@ class BandwidthFile(Descriptor):
  :var int min_count: minimum eligible relays for results to be provided
  :var int min_percent: minimum measured percentage of the consensus

  :var dict header: **\*** header metadata

  **\*** attribute is either required when we're parsed with validation or has
  a default value, others are left as **None** if undefined
  """
@@ -154,6 +191,7 @@ class BandwidthFile(Descriptor):
  ATTRIBUTES = {
    'timestamp': (None, _parse_timestamp),
    'header': ({}, _parse_header),
    'measurements': ({}, _parse_body),
  }

  ATTRIBUTES.update(dict([(k, (None, _parse_header)) for k in HEADER_ATTR.keys()]))
@@ -211,8 +249,7 @@ class BandwidthFile(Descriptor):
  def __init__(self, raw_content, validate = False):
    super(BandwidthFile, self).__init__(raw_content, lazy_load = not validate)

    self.content = []  # TODO: implement

    if validate:
      _parse_timestamp(self, None)
      _parse_header(self, None)
      _parse_body(self, None)
+36 −0
Original line number Diff line number Diff line
@@ -16,6 +16,36 @@ try:
except ImportError:
  from mock import Mock, patch

EXPECTED_MEASUREMENT_1 = {
  'scanner': '/scanner.1/scan-data/bws-0.0:0.8-done-2019-01-13-22:55:22',
  'measured_at': '1547441722',
  'pid_delta': '1.07534299311',
  'updated_at': '1547441722',
  'pid_error_sum': '3.23746667827',
  'nick': 'baldr',
  'node_id': '$D8B9CAA5B818DEFE80857F83FDABBB6429DCFCA0',
  'pid_bw': '47625769',
  'bw': '47600',
  'pid_error': '3.23746667827',
  'circ_fail': '0.0',
}

EXPECTED_MEASUREMENT_2 = {
  'desc_bw_obs_last': '473188',
  'success': '13',
  'desc_bw_obs_mean': '581671',
  'bw_median': '202438',
  'nick': 'Teinetteiine',
  'bw': '1',
  'desc_bw_avg': '1024000',
  'time': '2019-01-13T12:21:29',
  'bw_mean': '184647',
  'error_circ': '0',
  'error_stream': '0',
  'node_id': '$9C7E1AFDACC53228F6FB57B3A08C7D36240B8F6F',
  'error_misc': '0',
}

EXPECTED_NEW_HEADER_CONTENT = """
1410723598
version=1.1.0
@@ -49,6 +79,9 @@ class TestBandwidthFile(unittest.TestCase):
    self.assertEqual(None, desc.min_count)
    self.assertEqual(None, desc.min_percent)

    self.assertEqual(94, len(desc.measurements))
    self.assertEqual(EXPECTED_MEASUREMENT_1, desc.measurements['D8B9CAA5B818DEFE80857F83FDABBB6429DCFCA0'])

  def test_format_v1_2(self):
    """
    Parse version 1.2 formatted files.
@@ -73,6 +106,9 @@ class TestBandwidthFile(unittest.TestCase):
    self.assertEqual(3908, desc.min_count)
    self.assertEqual(60, desc.min_percent)

    self.assertEqual(81, len(desc.measurements))
    self.assertEqual(EXPECTED_MEASUREMENT_2, desc.measurements['9C7E1AFDACC53228F6FB57B3A08C7D36240B8F6F'])

  @patch('time.time', Mock(return_value = 1410723598.276578))
  def test_minimal_bandwidth_file(self):
    """