GuardMgr: revise handling of "all guards are down". (eed1f066) · Commits · The Tor Project / Core / Arti

crates/tor-guardmgr/src/guard.rs

+7 −0

Original line number	Diff line number	Diff line
		@@ -230,6 +230,13 @@ impl Guard {
		self.reachable
		}

		/// Return the next time at which this guard will be retriable.
		///
		/// (Return None if we think this guard might be reachable right now.)
		pub(crate) fn next_retry(&self) -> Option<Instant> {
		self.retry_at
		}

		/// Return true if this guard is listed in the latest NetDir, and hasn't
		/// been turned off for some other reason.
		pub(crate) fn usable(&self) -> bool {

crates/tor-guardmgr/src/lib.rs

+12 −8

Original line number	Diff line number	Diff line
		@@ -341,6 +341,13 @@ impl<R: Runtime> GuardMgr<R> {
		== 0
		}

		/// Mark every guard as potentially retriable, regardless of how recently we
		/// failed to connect to it.
		pub fn mark_all_guards_retriable(&self) {
		let mut inner = self.inner.lock().expect("Poisoned lock");
		inner.guards.active_guards_mut().mark_all_guards_retriable();
		}

		/// Update the state of this [`GuardMgr`] based on a new or modified
		/// [`NetDir`] object.
		///
		@@ -773,7 +780,8 @@ impl GuardMgrInner {
		now: SystemTime,
		) -> Result<(sample::ListKind, GuardId), PickGuardError> {
		// Try to find a guard.
		if let Ok(s) = self.guards.active_guards().pick_guard(usage, &self.params) {
		let res1 = self.guards.active_guards().pick_guard(usage, &self.params);
		if let Ok(s) = res1 {
		return Ok(s);
		}

		@@ -789,16 +797,12 @@ impl GuardMgrInner {
		self.guards
		.active_guards_mut()
		.select_primary_guards(&self.params);
		if let Ok(s) = self.guards.active_guards().pick_guard(usage, &self.params) {
		return Ok(s);
		}
		return self.guards.active_guards().pick_guard(usage, &self.params);
		}
		}

		// That didn't work either. Mark everybody as potentially retriable.
		info!("All guards seem down. Marking them retriable and trying again.");
		self.guards.active_guards_mut().mark_all_guards_retriable();
		self.guards.active_guards().pick_guard(usage, &self.params)
		// Couldn't extend the sample; return the original error.
		res1
		}
		}

crates/tor-guardmgr/src/sample.rs

+43 −13

Original line number	Diff line number	Diff line
		@@ -524,6 +524,11 @@ impl GuardSet {
		}
		}

		/// Return the earliest time at which any guard will be retriable.
		pub(crate) fn next_retry(&self) -> Option<Instant> {
		self.guards.values().filter_map(Guard::next_retry).min()
		}

		/// Mark every `Unreachable` primary guard as `Unknown`.
		pub(crate) fn mark_primary_guards_retriable(&mut self) {
		for id in &self.primary {
		@@ -690,13 +695,16 @@ impl GuardSet {
		GuardUsageKind::Data => params.data_parallelism,
		};

		// count the number of running options, distinct from the total options.
		let mut n_running: usize = 0;

		let mut options: Vec<_> = self
		.preference_order()
		.filter(\|(_, g)\| g.usable() && g.reachable() != Reachable::Unreachable)
		.inspect(\|_\| n_running += 1)
		.filter(\|(_, g)\| {
		g.usable()
		!g.exploratory_circ_pending()
		&& self.active_filter.permits(*g)
		&& g.reachable() != Reachable::Unreachable
		&& !g.exploratory_circ_pending()
		&& g.conforms_to_usage(usage)
		})
		.take(n_options)
		@@ -712,7 +720,14 @@ impl GuardSet {

		match options.choose(&mut rand::thread_rng()) {
		Some((src, g)) => Ok((*src, g.guard_id().clone())),
		None => Err(PickGuardError::EveryoneIsDown),
		None => {
		if n_running == 0 {
		let retry_at = self.next_retry();
		Err(PickGuardError::AllGuardsDown { retry_at })
		} else {
		Err(PickGuardError::NoGuardsUsable)
		}
		}
		}
		}
		}
		@@ -757,14 +772,29 @@ impl<'a> From<GuardSample<'a>> for GuardSet {
		#[derive(Clone, Debug, thiserror::Error)]
		#[non_exhaustive]
		pub enum PickGuardError {
		/// All members of the current sample were down, or waiting for
		/// other circuits to finish.
		#[error("Everybody is either down or pending")]
		EveryoneIsDown,
		/// All members of the current sample were down.
		#[error("All guards are down")]
		AllGuardsDown {
		/// The next time at which any guard will be retriable.
		retry_at: Option<Instant>,
		},

		/// Some guards were running, but all of them were either blocked on pending
		/// circuits at other guards, unusable for the provided purpose, or filtered
		/// out.
		#[error("No running guards were usable for the selected purpose")]
		NoGuardsUsable,
		}

		/// We had no members in the current sample.
		#[error("The current sample is empty")]
		SampleIsEmpty,
		impl tor_error::HasKind for PickGuardError {
		fn kind(&self) -> tor_error::ErrorKind {
		use tor_error::ErrorKind as EK;
		use PickGuardError as E;
		match self {
		E::AllGuardsDown { .. } => EK::TorAccessFailed,
		E::NoGuardsUsable => EK::NoPath,
		}
		}
		}

		#[cfg(test)]
		@@ -1121,7 +1151,7 @@ mod test {
		}

		let e = guards.pick_guard(&usage, &params);
		assert!(matches!(e, Err(PickGuardError::EveryoneIsDown)));
		assert!(matches!(e, Err(PickGuardError::AllGuardsDown { .. })));

		// Now in theory we should re-grow when we extend.
		guards.extend_sample_as_needed(st, &params, &netdir);