diff --git a/crates/tor-guardmgr/src/guard.rs b/crates/tor-guardmgr/src/guard.rs index 12d63c5644225097a8553271d9d92d8909a9e0db..e147fc5d1ce1bf6e1afae574b281071ef1619fde 100644 --- a/crates/tor-guardmgr/src/guard.rs +++ b/crates/tor-guardmgr/src/guard.rs @@ -230,6 +230,13 @@ impl Guard { self.reachable } + /// Return the next time at which this guard will be retriable. + /// + /// (Return None if we think this guard might be reachable right now.) + pub(crate) fn next_retry(&self) -> Option<Instant> { + self.retry_at + } + /// Return true if this guard is listed in the latest NetDir, and hasn't /// been turned off for some other reason. pub(crate) fn usable(&self) -> bool { diff --git a/crates/tor-guardmgr/src/lib.rs b/crates/tor-guardmgr/src/lib.rs index e14bc57a136fd13ed0eaf06db9fdac93965cf8a7..e7f9a498a1e76a516b21e48b0fbba427a581588a 100644 --- a/crates/tor-guardmgr/src/lib.rs +++ b/crates/tor-guardmgr/src/lib.rs @@ -341,6 +341,13 @@ impl<R: Runtime> GuardMgr<R> { == 0 } + /// Mark every guard as potentially retriable, regardless of how recently we + /// failed to connect to it. + pub fn mark_all_guards_retriable(&self) { + let mut inner = self.inner.lock().expect("Poisoned lock"); + inner.guards.active_guards_mut().mark_all_guards_retriable(); + } + /// Update the state of this [`GuardMgr`] based on a new or modified /// [`NetDir`] object. /// @@ -773,7 +780,8 @@ impl GuardMgrInner { now: SystemTime, ) -> Result<(sample::ListKind, GuardId), PickGuardError> { // Try to find a guard. - if let Ok(s) = self.guards.active_guards().pick_guard(usage, &self.params) { + let res1 = self.guards.active_guards().pick_guard(usage, &self.params); + if let Ok(s) = res1 { return Ok(s); } @@ -789,16 +797,12 @@ impl GuardMgrInner { self.guards .active_guards_mut() .select_primary_guards(&self.params); - if let Ok(s) = self.guards.active_guards().pick_guard(usage, &self.params) { - return Ok(s); - } + return self.guards.active_guards().pick_guard(usage, &self.params); } } - // That didn't work either. Mark everybody as potentially retriable. - info!("All guards seem down. Marking them retriable and trying again."); - self.guards.active_guards_mut().mark_all_guards_retriable(); - self.guards.active_guards().pick_guard(usage, &self.params) + // Couldn't extend the sample; return the original error. + res1 } } diff --git a/crates/tor-guardmgr/src/sample.rs b/crates/tor-guardmgr/src/sample.rs index 27f5dc2b4f8722d997826601086ada933a834e3d..d62c312a79de86d2299be38c896c685f846b7342 100644 --- a/crates/tor-guardmgr/src/sample.rs +++ b/crates/tor-guardmgr/src/sample.rs @@ -524,6 +524,11 @@ impl GuardSet { } } + /// Return the earliest time at which any guard will be retriable. + pub(crate) fn next_retry(&self) -> Option<Instant> { + self.guards.values().filter_map(Guard::next_retry).min() + } + /// Mark every `Unreachable` primary guard as `Unknown`. pub(crate) fn mark_primary_guards_retriable(&mut self) { for id in &self.primary { @@ -690,13 +695,16 @@ impl GuardSet { GuardUsageKind::Data => params.data_parallelism, }; + // count the number of running options, distinct from the total options. + let mut n_running: usize = 0; + let mut options: Vec<_> = self .preference_order() + .filter(|(_, g)| g.usable() && g.reachable() != Reachable::Unreachable) + .inspect(|_| n_running += 1) .filter(|(_, g)| { - g.usable() + !g.exploratory_circ_pending() && self.active_filter.permits(*g) - && g.reachable() != Reachable::Unreachable - && !g.exploratory_circ_pending() && g.conforms_to_usage(usage) }) .take(n_options) @@ -712,7 +720,14 @@ impl GuardSet { match options.choose(&mut rand::thread_rng()) { Some((src, g)) => Ok((*src, g.guard_id().clone())), - None => Err(PickGuardError::EveryoneIsDown), + None => { + if n_running == 0 { + let retry_at = self.next_retry(); + Err(PickGuardError::AllGuardsDown { retry_at }) + } else { + Err(PickGuardError::NoGuardsUsable) + } + } } } } @@ -757,14 +772,29 @@ impl<'a> From<GuardSample<'a>> for GuardSet { #[derive(Clone, Debug, thiserror::Error)] #[non_exhaustive] pub enum PickGuardError { - /// All members of the current sample were down, or waiting for - /// other circuits to finish. - #[error("Everybody is either down or pending")] - EveryoneIsDown, - - /// We had no members in the current sample. - #[error("The current sample is empty")] - SampleIsEmpty, + /// All members of the current sample were down. + #[error("All guards are down")] + AllGuardsDown { + /// The next time at which any guard will be retriable. + retry_at: Option<Instant>, + }, + + /// Some guards were running, but all of them were either blocked on pending + /// circuits at other guards, unusable for the provided purpose, or filtered + /// out. + #[error("No running guards were usable for the selected purpose")] + NoGuardsUsable, +} + +impl tor_error::HasKind for PickGuardError { + fn kind(&self) -> tor_error::ErrorKind { + use tor_error::ErrorKind as EK; + use PickGuardError as E; + match self { + E::AllGuardsDown { .. } => EK::TorAccessFailed, + E::NoGuardsUsable => EK::NoPath, + } + } } #[cfg(test)] @@ -1121,7 +1151,7 @@ mod test { } let e = guards.pick_guard(&usage, ¶ms); - assert!(matches!(e, Err(PickGuardError::EveryoneIsDown))); + assert!(matches!(e, Err(PickGuardError::AllGuardsDown { .. }))); // Now in theory we should re-grow when we extend. guards.extend_sample_as_needed(st, ¶ms, &netdir);