Commit 451a53a5 authored by Nick Mathewson's avatar Nick Mathewson 🤹
Browse files

circmgr: use AllGuardsDown to retry better

If all guards are down and they won't be retriable for a while, try
waiting that long to get whichever guard _is_ retriable.

Additionally, if we are making multiple circuit plans in parallel,
only report our planning as having failed if we failed at making
_all_ the plans.  Previously we treated any failure as fatal for the
other plans, which could lead to trouble in the case when guards
were all down or pending.

Part of #407.
parent eed1f066
Loading
Loading
Loading
Loading
+1 −1
Original line number Diff line number Diff line
@@ -146,7 +146,7 @@ impl HasKind for Error {
            E::Protocol(e) => e.kind(),
            E::State(e) => e.kind(),
            E::GuardMgr(e) => e.kind(),
            E::Guard(_) => EK::NoPath,
            E::Guard(e) => e.kind(),
            E::ExpiredConsensus => EK::DirectoryExpired,
            E::Spawn { cause, .. } => cause.kind(),
        }
+35 −7
Original line number Diff line number Diff line
@@ -780,11 +780,27 @@ impl<B: AbstractCircBuilder + 'static, R: Runtime> AbstractCircMgr<B, R> {
                    }
                }
                Err(e) => {
                    // We couldn't pick the action! This is unusual; wait
                    // a little while before we try again.
                    // We couldn't pick the action!
                    info!("Couldn't pick action for circuit attempt {}: {}", n, &e);
                    let small_delay = Duration::from_millis(50);
                    let wait_for_action = match &e {
                        Error::Guard(tor_guardmgr::PickGuardError::AllGuardsDown {
                            retry_at: Some(instant),
                        }) => {
                            // If we failed because all guards are down, that's fine: we just wait until
                            // the next guard is retriable.
                            instant.saturating_duration_since(self.runtime.now()) + small_delay
                        }
                        _ => {
                            // Any other errors are pretty unusual; wait a little while, then try again.
                            small_delay
                        }
                    };
                    retry_err.push(e);
                    let wait_for_action = Duration::from_millis(50);
                    if remaining < wait_for_action {
                        // We can't wait long enough.  Call this failed now.
                        break;
                    }
                    self.runtime
                        .sleep(std::cmp::min(remaining, wait_for_action))
                        .await;
@@ -869,12 +885,24 @@ impl<B: AbstractCircBuilder + 'static, R: Runtime> AbstractCircMgr<B, R> {
        // Okay, we need to launch circuits here.
        let parallelism = std::cmp::max(1, self.builder.launch_parallelism(usage));
        let mut plans = Vec::new();
        let mut errs = Vec::new();
        for _ in 0..parallelism {
            let (pending, plan) = self.plan_by_usage(dir, usage)?;
            match self.plan_by_usage(dir, usage) {
                Ok((pending, plan)) => {
                    list.add_pending_circ(pending);
                    plans.push(plan);
                }
                Err(e) => errs.push(e),
            }
        }
        if !plans.is_empty() {
            Ok(Action::Build(plans))
        } else if let Some(last_err) = errs.pop() {
            Err(last_err)
        } else {
            // we didn't even try to plan anything!
            Err(internal!("no plans were built, but no errors were found").into())
        }
    }

    /// Execute an action returned by pick-action, and return the