Commit 11910cf5 authored by Mike Perry's avatar Mike Perry
Browse files

Do away with the complexity of the network liveness detection.

We really should ignore any timeouts that have *no* network activity for their
entire measured lifetime, now that we have the 95th percentile measurement
changes. Usually this is up to a minute, even on fast connections.
parent 0744a175
Loading
Loading
Loading
Loading
+21 −50
Original line number Diff line number Diff line
@@ -306,6 +306,7 @@ circuit_build_times_init(circuit_build_times_t *cbt)
  control_event_buildtimeout_set(cbt, BUILDTIMEOUT_SET_EVENT_RESET);
}

#if 0
/**
 * Rewind our build time history by n positions.
 */
@@ -332,6 +333,7 @@ circuit_build_times_rewind_history(circuit_build_times_t *cbt, int n)
          "Rewound history by %d places. Current index: %d. "
          "Total: %d", n, cbt->build_times_idx, cbt->total_build_times);
}
#endif

/**
 * Add a new build time value <b>time</b> to the set of build times. Time
@@ -941,8 +943,16 @@ circuit_build_times_needs_circuits_now(circuit_build_times_t *cbt)
void
circuit_build_times_network_is_live(circuit_build_times_t *cbt)
{
  cbt->liveness.network_last_live = approx_time();
  cbt->liveness.nonlive_discarded = 0;
  time_t now = approx_time();
  if (cbt->liveness.nonlive_timeouts > 0) {
    log_notice(LD_CIRC,
               "Tor now sees network activity. Restoring circuit build "
               "timeout recording. Network was down for %ld seconds "
               "during %d circuit attempts.",
               (long int)now - cbt->liveness.network_last_live,
               cbt->liveness.nonlive_timeouts);
  }
  cbt->liveness.network_last_live = now;
  cbt->liveness.nonlive_timeouts = 0;
}

@@ -1002,11 +1012,18 @@ circuit_build_times_network_close(circuit_build_times_t *cbt,
               now_buf);
    }
    cbt->liveness.nonlive_timeouts++;
    if (cbt->liveness.nonlive_timeouts == 1) {
      log_notice(LD_CIRC,
                 "Tor has not observed any network activity for the past %ld "
                 "seconds. Disabling circuit build timeout code.",
                 (long int)now - cbt->liveness.network_last_live);
    } else {
      log_info(LD_CIRC,
             "Got non-live timeout. Current count is: %d",
             cbt->liveness.nonlive_timeouts);
    }
  }
}

/**
 * Returns false if the network has not received a cell or tls handshake
@@ -1018,54 +1035,8 @@ circuit_build_times_network_close(circuit_build_times_t *cbt,
int
circuit_build_times_network_check_live(circuit_build_times_t *cbt)
{
  time_t now = approx_time();
  if (cbt->liveness.nonlive_timeouts >= CBT_NETWORK_NONLIVE_DISCARD_COUNT) {
    if (!cbt->liveness.nonlive_discarded) {
      cbt->liveness.nonlive_discarded = 1;
      log_notice(LD_CIRC, "Network is no longer live (too many recent "
                "circuit timeouts). Dead for %ld seconds.",
                (long int)(now - cbt->liveness.network_last_live));
      /* Only discard NETWORK_NONLIVE_TIMEOUT_COUNT-1 because we stopped
       * counting after that */
      circuit_build_times_rewind_history(cbt,
                     CBT_NETWORK_NONLIVE_TIMEOUT_COUNT-1);
      control_event_buildtimeout_set(cbt, BUILDTIMEOUT_SET_EVENT_DISCARD);
    }
  if (cbt->liveness.nonlive_timeouts > 0) {
    return 0;
  } else if (cbt->liveness.nonlive_timeouts >=
                CBT_NETWORK_NONLIVE_TIMEOUT_COUNT) {
    if (cbt->liveness.suspended_timeout <= 0) {
      cbt->liveness.suspended_timeout = cbt->timeout_ms;
      cbt->liveness.suspended_close_timeout = cbt->close_ms;

      if (cbt->timeout_ms < circuit_build_times_get_initial_timeout())
        cbt->timeout_ms = circuit_build_times_get_initial_timeout();
      else
        cbt->timeout_ms *= 2;

      if (cbt->close_ms < circuit_build_times_get_initial_timeout())
        cbt->close_ms = circuit_build_times_get_initial_timeout();
      else
        cbt->close_ms *= 2;

      log_notice(LD_CIRC,
                "Network is flaky. No activity for %ld seconds. "
                "Temporarily raising timeout to %lds.",
                (long int)(now - cbt->liveness.network_last_live),
                tor_lround(cbt->timeout_ms/1000));
      control_event_buildtimeout_set(cbt, BUILDTIMEOUT_SET_EVENT_SUSPENDED);
    }

    return 0;
  } else if (cbt->liveness.suspended_timeout > 0) {
    log_notice(LD_CIRC,
              "Network activity has resumed. "
              "Resuming circuit timeout calculations.");
    cbt->timeout_ms = cbt->liveness.suspended_timeout;
    cbt->close_ms = cbt->liveness.suspended_close_timeout;
    cbt->liveness.suspended_timeout = 0;
    cbt->liveness.suspended_close_timeout = 0;
    control_event_buildtimeout_set(cbt, BUILDTIMEOUT_SET_EVENT_RESUME);
  }

  return 1;
+1 −31
Original line number Diff line number Diff line
@@ -2961,26 +2961,6 @@ typedef uint32_t build_time_t;
/** Save state every 10 circuits */
#define CBT_SAVE_STATE_EVERY 10

/* Circuit Build Timeout network liveness constants */

/**
 * Have we received a cell in the last N circ attempts?
 *
 * This tells us when to temporarily switch back to
 * BUILD_TIMEOUT_INITIAL_VALUE until we start getting cells,
 * at which point we switch back to computing the timeout from
 * our saved history.
 */
#define CBT_NETWORK_NONLIVE_TIMEOUT_COUNT 3

/**
 * This tells us when to toss out the last streak of N timeouts.
 *
 * If instead we start getting cells, we switch back to computing the timeout
 * from our saved history.
 */
#define CBT_NETWORK_NONLIVE_DISCARD_COUNT (CBT_NETWORK_NONLIVE_TIMEOUT_COUNT*2)

/* Circuit build times consensus parameters */

/**
@@ -3021,9 +3001,7 @@ double circuit_build_times_quantile_cutoff(void);
#define CBT_DEFAULT_TIMEOUT_INITIAL_VALUE (60*1000)
int32_t circuit_build_times_initial_timeout(void);

#if CBT_DEFAULT_MAX_RECENT_TIMEOUT_COUNT < 1 || \
    CBT_NETWORK_NONLIVE_DISCARD_COUNT < 1 || \
    CBT_NETWORK_NONLIVE_TIMEOUT_COUNT < 1
#if CBT_DEFAULT_MAX_RECENT_TIMEOUT_COUNT < 1
#error "RECENT_CIRCUITS is set too low."
#endif

@@ -3033,8 +3011,6 @@ typedef struct {
  time_t network_last_live;
  /** If the network is not live, how many timeouts has this caused? */
  int nonlive_timeouts;
  /** If the network is not live, have we yet discarded our history? */
  int nonlive_discarded;
  /** Circular array of circuits that have made it to the first hop. Slot is
   * 1 if circuit timed out, 0 if circuit succeeded */
  int8_t *timeouts_after_firsthop;
@@ -3042,12 +3018,6 @@ typedef struct {
  int num_recent_circs;
  /** Index into circular array. */
  int after_firsthop_idx;
  /** Timeout gathering is suspended if non-zero. The old timeout value
    * is stored here in that case. */
  double suspended_timeout;
  /** Timeout gathering is suspended if non-zero. The old close value
    * is stored here in that case. */
  double suspended_close_timeout;
} network_liveness_t;

/** Structure for circuit build times history */
+6 −20
Original line number Diff line number Diff line
@@ -499,7 +499,7 @@ test_circuit_timeout(void)

    build_times_idx = estimate.build_times_idx;
    total_build_times = estimate.total_build_times;
    for (i = 0; i < CBT_NETWORK_NONLIVE_TIMEOUT_COUNT; i++) {

    test_assert(circuit_build_times_network_check_live(&estimate));
    test_assert(circuit_build_times_network_check_live(&final));

@@ -507,20 +507,6 @@ test_circuit_timeout(void)
            (time_t)(approx_time()-estimate.close_ms/1000.0-1));
    circuit_build_times_count_close(&final, 0,
            (time_t)(approx_time()-final.close_ms/1000.0-1));
    }

    test_assert(!circuit_build_times_network_check_live(&estimate));
    test_assert(!circuit_build_times_network_check_live(&final));

    for ( ; i < CBT_NETWORK_NONLIVE_DISCARD_COUNT; i++) {
      circuit_build_times_count_close(&estimate, 0,
                (time_t)(approx_time()-estimate.close_ms/1000.0-1));

      if (i < CBT_NETWORK_NONLIVE_DISCARD_COUNT-1) {
        circuit_build_times_count_close(&final, 0,
                (time_t)(approx_time()-final.close_ms/1000.0-1));
      }
    }

    test_assert(!circuit_build_times_network_check_live(&estimate));
    test_assert(!circuit_build_times_network_check_live(&final));