diff --git a/crates/arti-config/src/arti_defaults.toml b/crates/arti-config/src/arti_defaults.toml index bab80e4cba44e88d5fe33be64f95cc3c9e726731..ba4fff4d63070f76f6eeab6846f1189ce39c7513 100644 --- a/crates/arti-config/src/arti_defaults.toml +++ b/crates/arti-config/src/arti_defaults.toml @@ -145,7 +145,7 @@ request_timeout = "60 sec" # When a circuit is requested, we make up to this many attempts to build # circuits for it before the request gives up. -request_max_retries = 32 +request_max_retries = 16 # If a circuit is finished that would satisfy a pending request, but the # request is still waiting for its own circuits to complete, the request diff --git a/crates/tor-circmgr/src/config.rs b/crates/tor-circmgr/src/config.rs index 2d50d0fc891df1426030b1ea82855e9b94a54bc7..ddce6a6fbd7d121eac65815985128f9c1d15cb8b 100644 --- a/crates/tor-circmgr/src/config.rs +++ b/crates/tor-circmgr/src/config.rs @@ -210,7 +210,7 @@ fn default_request_timeout() -> Duration { /// Return the default value for `request_max_retries`. fn default_request_max_retries() -> u32 { - 32 + 16 } /// Return the default request loyalty timeout. diff --git a/crates/tor-circmgr/src/mgr.rs b/crates/tor-circmgr/src/mgr.rs index 10e60614e0d22870a8a878789447c38ba8cd8daf..f67099fca03055aba1f4955936fe38169ee1bc55 100644 --- a/crates/tor-circmgr/src/mgr.rs +++ b/crates/tor-circmgr/src/mgr.rs @@ -742,14 +742,34 @@ impl<B: AbstractCircBuilder + 'static, R: Runtime> AbstractCircMgr<B, R> { usage: &<B::Spec as AbstractSpec>::Usage, dir: DirInfo<'_>, ) -> Result<B::Circ> { + /// Return CEIL(a/b). + /// + /// Requires that a+b is less than usize::MAX. + /// + /// This can be removed once usize::div_ceil is stable. + /// + /// # Panics + /// + /// Panics if b is 0. + fn div_ceil(a: usize, b: usize) -> usize { + (a + b - 1) / b + } + let circuit_timing = self.circuit_timing(); let wait_for_circ = circuit_timing.request_timeout; let timeout_at = self.runtime.now() + wait_for_circ; let max_tries = circuit_timing.request_max_retries; + // We compute the maximum number of times through this loop by dividing + // the maximum number of circuits to attempt by the number that will be + // launched in parallel for each iteration. + let max_iterations = div_ceil( + max_tries as usize, + std::cmp::max(1, self.builder.launch_parallelism(usage)), + ); let mut retry_err = RetryError::<Box<Error>>::in_attempt_to("find or build a circuit"); - for n in 1..(max_tries + 1) { + for n in 1..(max_iterations + 1) { // How much time is remaining? let remaining = match timeout_at.checked_duration_since(self.runtime.now()) { None => { diff --git a/crates/tor-dirmgr/src/bootstrap.rs b/crates/tor-dirmgr/src/bootstrap.rs index 7c0d7f316a86d211d0557c66a8b118c3c113f5ef..3c0c09e7d2dd27f65fa4d5caabc57f0050633a16 100644 --- a/crates/tor-dirmgr/src/bootstrap.rs +++ b/crates/tor-dirmgr/src/bootstrap.rs @@ -18,7 +18,7 @@ use futures::FutureExt; use futures::StreamExt; use tor_dirclient::DirResponse; use tor_rtcompat::{Runtime, SleepProviderExt}; -use tracing::{info, trace, warn}; +use tracing::{debug, info, trace, warn}; #[cfg(test)] use once_cell::sync::Lazy; @@ -278,12 +278,32 @@ pub(crate) async fn download<R: Runtime>( return Ok((state, None)); } + let reset_time = no_more_than_a_week_from(runtime.wallclock(), state.reset_time()); + let mut reset_timeout_future = runtime.sleep_until_wallclock(reset_time).fuse(); + let mut retry = retry_config.schedule(); + let mut delay = None; // Make several attempts to fetch whatever we're missing, // until either we can advance, or we've got a complete // document, or we run out of tries, or we run out of time. 'next_attempt: for attempt in retry_config.attempts() { + // We wait at the start of this loop, on all attempts but the first. + // This ensures that we always wait between attempts, but not after + // the final attempt. + let next_delay = retry.next_delay(&mut rand::thread_rng()); + if let Some(delay) = delay.replace(next_delay) { + debug!("Waiting {:?} for next download attempt...", delay); + futures::select_biased! { + _ = reset_timeout_future => { + info!("Download attempt timed out completely; resetting download state."); + state = state.reset()?; + continue 'next_state; + } + _ = FutureExt::fuse(runtime.sleep(delay)) => {} + }; + } + info!("{}: {}", attempt + 1, state.describe()); let reset_time = no_more_than_a_week_from(now, state.reset_time()); @@ -329,18 +349,6 @@ pub(crate) async fn download<R: Runtime>( // We have enough info to advance to another state. state = state.advance()?; continue 'next_state; - } else { - // We should wait a bit, and then retry. - // TODO: we shouldn't wait on the final attempt. - let reset_time = no_more_than_a_week_from(now, state.reset_time()); - let delay = retry.next_delay(&mut rand::thread_rng()); - futures::select_biased! { - _ = runtime.sleep_until_wallclock(reset_time).fuse() => { - state = state.reset()?; - continue 'next_state; - } - _ = FutureExt::fuse(runtime.sleep(delay)) => {} - }; } }