Implement method to scale as Torflow

090507ee · juga · 5a9be7c5 · 090507ee
Commit 090507ee authored 6 years ago by juga
--- a/sbws/lib/v3bwfile.py
+++ b/sbws/lib/v3bwfile.py
@@ -481,9 +481,196 @@ class V3BWFile(object):
                        'allowed', (1 - accuracy_ratio) * 100, margin * 100)

    @staticmethod
-    def bw_lines_torflow(bw_lines, desc_obs_bws=TORFLOW_OBS_LAST,
+    def bw_torflow_scale(bw_lines, desc_obs_bws=TORFLOW_OBS_LAST,
                         cap=TORFLOW_BW_MARGIN, reverse=False):
-        pass
+        """
+        Obtain final bandwidth measurements applying Torflow's scaling
+        method.
+
+        From Torflow's README.spec.txt (section 2.2)::
+
+            In this way, the resulting network status consensus bandwidth values  # NOQA
+            are effectively re-weighted proportional to how much faster the node  # NOQA
+            was as compared to the rest of the network.
+
+        The variables and steps used in Torflow:
+
+        **strm_bw**::
+
+            The strm_bw field is the average (mean) of all the streams for the relay  # NOQA
+            identified by the fingerprint field.
+            strm_bw = sum(bw stream x)/|n stream|
+
+        **filt_bw**::
+
+            The filt_bw field is computed similarly, but only the streams equal to  # NOQA
+            or greater than the strm_bw are counted in order to filter very slow  # NOQA
+            streams due to slow node pairings.
+
+        **filt_sbw and strm_sbw**::
+
+            for rs in RouterStats.query.filter(stats_clause).\
+                  options(eagerload_all('router.streams.circuit.routers')).all():  # NOQA
+              tot_sbw = 0
+              sbw_cnt = 0
+              for s in rs.router.streams:
+                if isinstance(s, ClosedStream):
+                  skip = False
+                  #for br in badrouters:
+                  #  if br != rs:
+                  #    if br.router in s.circuit.routers:
+                  #      skip = True
+                  if not skip:
+                    # Throw out outliers < mean
+                    # (too much variance for stddev to filter much)
+                    if rs.strm_closed == 1 or s.bandwidth() >= rs.sbw:
+                      tot_sbw += s.bandwidth()
+                      sbw_cnt += 1
+
+            if sbw_cnt: rs.filt_sbw = tot_sbw/sbw_cnt
+            else: rs.filt_sbw = None
+
+        **filt_avg, and strm_avg**::
+
+            Once we have determined the most recent measurements for each node, we  # NOQA
+            compute an average of the filt_bw fields over all nodes we have measured.  # NOQA
+
+        ::
+
+            filt_avg = sum(map(lambda n: n.filt_bw, nodes.itervalues()))/float(len(nodes))  # NOQA
+            strm_avg = sum(map(lambda n: n.strm_bw, nodes.itervalues()))/float(len(nodes))  # NOQA
+
+        **true_filt_avg and true_strm_avg**::
+
+            for cl in ["Guard+Exit", "Guard", "Exit", "Middle"]:
+                true_filt_avg[cl] = filt_avg
+                true_strm_avg[cl] = strm_avg
+
+        In the non-pid case, all types of nodes get the same avg
+
+        **n.fbw_ratio and n.fsw_ratio**::
+
+            for n in nodes.itervalues():
+                n.fbw_ratio = n.filt_bw/true_filt_avg[n.node_class()]
+                n.sbw_ratio = n.strm_bw/true_strm_avg[n.node_class()]
+
+        **n.ratio**::
+
+            These averages are used to produce ratios for each node by dividing the  # NOQA
+            measured value for that node by the network average.
+
+        ::
+
+            # Choose the larger between sbw and fbw
+              if n.sbw_ratio > n.fbw_ratio:
+                n.ratio = n.sbw_ratio
+              else:
+                n.ratio = n.fbw_ratio
+
+        **desc_bw**:
+
+        It is the ``observed bandwidth`` in the descriptor, NOT the ``average
+        bandwidth``::
+
+            return Router(ns.idhex, ns.nickname, bw_observed, dead, exitpolicy,
+            ns.flags, ip, version, os, uptime, published, contact, rate_limited,  # NOQA
+            ns.orhash, ns.bandwidth, extra_info_digest, ns.unmeasured)
+            self.desc_bw = max(bw,1) # Avoid div by 0
+
+        **new_bw**::
+
+            These ratios are then multiplied by the most recent observed descriptor  # NOQA
+            bandwidth we have available for each node, to produce a new value for  # NOQA
+            the network status consensus process.
+
+        ::
+
+            n.new_bw = n.desc_bw*n.ratio
+
+        The descriptor observed bandwidth is multiplied by the ratio.
+        With empirical results this ratio is ~[0.9, 8.9]
+
+        **Limit the bandwidth to a maximum**::
+
+            NODE_CAP = 0.05
+
+        ::
+
+            if n.new_bw > tot_net_bw*NODE_CAP:
+              plog("INFO", "Clipping extremely fast "+n.node_class()+" node "+n.idhex+"="+n.nick+  # NOQA
+                   " at "+str(100*NODE_CAP)+"% of network capacity ("+
+                   str(n.new_bw)+"->"+str(int(tot_net_bw*NODE_CAP))+") "+
+                   " pid_error="+str(n.pid_error)+
+                   " pid_error_sum="+str(n.pid_error_sum))
+              n.new_bw = int(tot_net_bw*NODE_CAP)
+
+        However, tot_net_bw does not seems to be updated when not using pid.
+        This clipping would make faster relays to all have the same value.
+
+        All of that can be expressed as:
+
+        .. math::
+
+                bwnew_i &=
+                    max\\left(
+                        \\frac{bw_i}{\\mu},
+                        min \\left(
+                            bw_i,
+                            bw_i \\times \\mu
+                            \\right)
+                                \\times
+                                \\frac{bw}{\\sum_{i=1}^{n}
+                                min \\left(bw_i,
+                                    bw_i \\times \\mu
+                            \\right)}
+                        \\right)
+                    \\times bwdescobs_i \\
+
+                &=
+                    max\\left(
+                        \\frac{bw_i}{\\frac{\\sum_{i=1}^{n}bw_i}{n}},
+                        min \\left(
+                            bw_i,
+                            bw_i \\times \\frac{\\sum_{i=1}^{n}bw_i}{n}
+                            \\right)
+                                \\times
+                                \\frac{bw}{\\sum_{i=1}^{n}
+                                min \\left(bw_i,
+                                    bw_i \\times \\frac{\\sum_{i=1}^{n}bw_i}{n}
+                            \\right)}
+                        \\right)
+                    \\times bwdescobs_i
+
+        """
+        log.info("Calculating relays' bandwidth using Torflow method.")
+        bw_lines_tf = copy.deepcopy(bw_lines)
+        # mean (Torflow's strm_avg)
+        mu = mean([l.bw_bs_mean for l in bw_lines])
+        # filtered mean (Torflow's filt_avg)
+        muf = mean([min(l.bw_bs_mean, mu) for l in bw_lines])
+        # bw sum (Torflow's tot_net_bw or tot_sbw)
+        sum_bw = sum([l.bw_bs_mean for l in bw_lines])
+        # Torflow's clipping, not being applied
+        # hlimit = sum_bw * TORFLOW_BW_MARGIN
+        log.debug('sum %s', sum_bw)
+        log.debug('mu %s', mu)
+        log.debug('muf %s', muf)
+        # log.debug('hlimit %s', hlimit)
+        for l in bw_lines_tf:
+            if desc_obs_bws == TORFLOW_OBS_LAST:
+                desc_obs_bw = l.desc_obs_bw_bs_last
+            elif desc_obs_bws == TORFLOW_OBS_MEAN:
+                desc_obs_bw = l.desc_obs_bw_bs_mean
+            # just applying the formula above:
+            l.bw = max(round(
+                max(
+                    # ratio
+                    l.bw_bs_mean / mu,
+                    # ratio filtered
+                    min(l.bw_bs_mean, mu) / muf
+                    ) * desc_obs_bw
+                / 1000), 1)
+        return sorted(bw_lines_tf, key=lambda x: x.bw, reverse=reverse)

    @property
    def sum_bw(self):