Skip to content
Snippets Groups Projects
Commit a461ddc9 authored by Nick Mathewson's avatar Nick Mathewson :game_die:
Browse files

Merge branch 'retry_tuning_1' into 'main'

Minor retry tuning to improve behavior under failing conditions.

See merge request !439
parents e6cfbf58 c5e5fc15
No related branches found
No related tags found
No related merge requests found
......@@ -145,7 +145,7 @@ request_timeout = "60 sec"
# When a circuit is requested, we make up to this many attempts to build
# circuits for it before the request gives up.
request_max_retries = 32
request_max_retries = 16
# If a circuit is finished that would satisfy a pending request, but the
# request is still waiting for its own circuits to complete, the request
......
......@@ -210,7 +210,7 @@ fn default_request_timeout() -> Duration {
/// Return the default value for `request_max_retries`.
fn default_request_max_retries() -> u32 {
32
16
}
/// Return the default request loyalty timeout.
......
......@@ -742,14 +742,34 @@ impl<B: AbstractCircBuilder + 'static, R: Runtime> AbstractCircMgr<B, R> {
usage: &<B::Spec as AbstractSpec>::Usage,
dir: DirInfo<'_>,
) -> Result<B::Circ> {
/// Return CEIL(a/b).
///
/// Requires that a+b is less than usize::MAX.
///
/// This can be removed once usize::div_ceil is stable.
///
/// # Panics
///
/// Panics if b is 0.
fn div_ceil(a: usize, b: usize) -> usize {
(a + b - 1) / b
}
let circuit_timing = self.circuit_timing();
let wait_for_circ = circuit_timing.request_timeout;
let timeout_at = self.runtime.now() + wait_for_circ;
let max_tries = circuit_timing.request_max_retries;
// We compute the maximum number of times through this loop by dividing
// the maximum number of circuits to attempt by the number that will be
// launched in parallel for each iteration.
let max_iterations = div_ceil(
max_tries as usize,
std::cmp::max(1, self.builder.launch_parallelism(usage)),
);
let mut retry_err = RetryError::<Box<Error>>::in_attempt_to("find or build a circuit");
for n in 1..(max_tries + 1) {
for n in 1..(max_iterations + 1) {
// How much time is remaining?
let remaining = match timeout_at.checked_duration_since(self.runtime.now()) {
None => {
......
......@@ -18,7 +18,7 @@ use futures::FutureExt;
use futures::StreamExt;
use tor_dirclient::DirResponse;
use tor_rtcompat::{Runtime, SleepProviderExt};
use tracing::{info, trace, warn};
use tracing::{debug, info, trace, warn};
#[cfg(test)]
use once_cell::sync::Lazy;
......@@ -278,12 +278,32 @@ pub(crate) async fn download<R: Runtime>(
return Ok((state, None));
}
let reset_time = no_more_than_a_week_from(runtime.wallclock(), state.reset_time());
let mut reset_timeout_future = runtime.sleep_until_wallclock(reset_time).fuse();
let mut retry = retry_config.schedule();
let mut delay = None;
// Make several attempts to fetch whatever we're missing,
// until either we can advance, or we've got a complete
// document, or we run out of tries, or we run out of time.
'next_attempt: for attempt in retry_config.attempts() {
// We wait at the start of this loop, on all attempts but the first.
// This ensures that we always wait between attempts, but not after
// the final attempt.
let next_delay = retry.next_delay(&mut rand::thread_rng());
if let Some(delay) = delay.replace(next_delay) {
debug!("Waiting {:?} for next download attempt...", delay);
futures::select_biased! {
_ = reset_timeout_future => {
info!("Download attempt timed out completely; resetting download state.");
state = state.reset()?;
continue 'next_state;
}
_ = FutureExt::fuse(runtime.sleep(delay)) => {}
};
}
info!("{}: {}", attempt + 1, state.describe());
let reset_time = no_more_than_a_week_from(now, state.reset_time());
......@@ -329,18 +349,6 @@ pub(crate) async fn download<R: Runtime>(
// We have enough info to advance to another state.
state = state.advance()?;
continue 'next_state;
} else {
// We should wait a bit, and then retry.
// TODO: we shouldn't wait on the final attempt.
let reset_time = no_more_than_a_week_from(now, state.reset_time());
let delay = retry.next_delay(&mut rand::thread_rng());
futures::select_biased! {
_ = runtime.sleep_until_wallclock(reset_time).fuse() => {
state = state.reset()?;
continue 'next_state;
}
_ = FutureExt::fuse(runtime.sleep(delay)) => {}
};
}
}
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment