GitLab is used only for code review, issue tracking and project management. Canonical locations for source code are still https://gitweb.torproject.org/ https://git.torproject.org/ and git-rw.torproject.org.

Commit e740cd57 authored by Mike Perry's avatar Mike Perry
Browse files

Merge branch 'bug40168+34088-035-v2' into bug40168+34088-v2

parents 94fc207d 2558933e
Pipeline #1782 failed with stage
in 44 minutes and 51 seconds
o Minor bugfixes (circuit build timeout):
- Remove max_time calculation and associated warn from circuit build
timeout 'alpha' parameter estimation, as this is no longer needed
by our new estimator from 40168. Fixes bug 34088; bugfix on 0.2.2.9-alpha.
o Minor bugfixes (circuit build timeout):
- Improve the accuracy of our circuit build timeout calculation for 60%,
70%, and 80% build rates for various guard choices. We now use a maximum
likelihood estimator for Pareto parameters of the circuit build time
distribution, instead of a "right-censored estimator". This causes
clients to ignore circuits that never finish building in their timeout
calculations. Previously, clients were counting such unfinished circuits
as having the highest possible build time value, when in reality these
circuits most likely just contain relays that are offline. We also now
wait a bit longer to let circuits complete for measurement purposes,
lower the minimum possible effective timeout from 1.5 seconds to 10ms,
and increase the resolution of the circuit build time histrogram from
50ms bin widths to 10ms bin widths. Additionally, we alter our estimate
Xm by taking the maximum of the top 10 most common build time values
of the 10ms histogram, and compute Xm as the average of these.
Fixes bug 40168; bugfix on 0.2.2.14-alpha.
......@@ -858,51 +858,41 @@ circuit_build_times_create_histogram(const circuit_build_times_t *cbt,
static build_time_t
circuit_build_times_get_xm(circuit_build_times_t *cbt)
{
build_time_t i, nbins;
build_time_t nbins = 0;
build_time_t *nth_max_bin;
int32_t bin_counts=0;
build_time_t ret = 0;
uint32_t *histogram = circuit_build_times_create_histogram(cbt, &nbins);
int n=0;
build_time_t xm_total = 0;
build_time_t Xm = 0;
int32_t xm_counts=0;
int num_modes = circuit_build_times_default_num_xm_modes();
uint32_t *histogram = circuit_build_times_create_histogram(cbt, &nbins);
tor_assert(nbins > 0);
tor_assert(num_modes > 0);
// Only use one mode if < 1000 buildtimes. Not enough data
// for multiple.
if (cbt->total_build_times < CBT_NCIRCUITS_TO_OBSERVE)
num_modes = 1;
nth_max_bin = tor_calloc(num_modes, sizeof(build_time_t));
/* Determine the N most common build times */
for (i = 0; i < nbins; i++) {
if (histogram[i] >= histogram[nth_max_bin[0]]) {
nth_max_bin[0] = i;
}
for (n = 1; n < num_modes; n++) {
if (histogram[i] >= histogram[nth_max_bin[n]] &&
(!histogram[nth_max_bin[n-1]]
|| histogram[i] < histogram[nth_max_bin[n-1]])) {
/* Determine the N most common build times, by selecting the
* nth largest mode, counting it, and removing it from the histogram. */
for (int n = 0; n < num_modes; n++) {
/* Get nth mode */
for (build_time_t i = 0; i < nbins; i++) {
if (histogram[i] > histogram[nth_max_bin[n]]) {
nth_max_bin[n] = i;
}
}
}
for (n = 0; n < num_modes; n++) {
bin_counts += histogram[nth_max_bin[n]];
ret += CBT_BIN_TO_MS(nth_max_bin[n])*histogram[nth_max_bin[n]];
log_info(LD_CIRC, "Xm mode #%d: %u %u", n, CBT_BIN_TO_MS(nth_max_bin[n]),
histogram[nth_max_bin[n]]);
/* Update average */
xm_counts += histogram[nth_max_bin[n]];
xm_total += CBT_BIN_TO_MS(nth_max_bin[n])*histogram[nth_max_bin[n]];
/* Prevent from re-counting this value */
histogram[nth_max_bin[n]] = 0;
}
/* bin_counts can become zero if all of our last CBT_NCIRCUITS_TO_OBSERVE
/* xm_counts can become zero if all of our last CBT_NCIRCUITS_TO_OBSERVE
* circuits were abandoned before they completed. This shouldn't happen,
* though. We should have reset/re-learned a lower timeout first. */
if (bin_counts == 0) {
ret = 0;
if (xm_counts == 0) {
log_warn(LD_CIRC,
"No valid circuit build time data out of %d times, %u modes, "
"have_timeout=%d, %lfms", cbt->total_build_times, num_modes,
......@@ -910,15 +900,13 @@ circuit_build_times_get_xm(circuit_build_times_t *cbt)
goto done;
}
tor_assert(bin_counts > 0);
ret /= bin_counts;
Xm = xm_total / xm_counts;
done:
tor_free(histogram);
tor_free(nth_max_bin);
return ret;
return Xm;
}
/**
......@@ -1007,43 +995,6 @@ circuit_build_times_shuffle_and_store_array(circuit_build_times_t *cbt,
}
}
/**
* Filter old synthetic timeouts that were created before the
* new right-censored Pareto calculation was deployed.
*
* Once all clients before 0.2.1.13-alpha are gone, this code
* will be unused.
*/
static int
circuit_build_times_filter_timeouts(circuit_build_times_t *cbt)
{
int num_filtered=0, i=0;
double timeout_rate = 0;
build_time_t max_timeout = 0;
timeout_rate = circuit_build_times_timeout_rate(cbt);
max_timeout = (build_time_t)cbt->close_ms;
for (i = 0; i < CBT_NCIRCUITS_TO_OBSERVE; i++) {
if (cbt->circuit_build_times[i] > max_timeout) {
build_time_t replaced = cbt->circuit_build_times[i];
num_filtered++;
cbt->circuit_build_times[i] = CBT_BUILD_ABANDONED;
log_debug(LD_CIRC, "Replaced timeout %d with %d", replaced,
cbt->circuit_build_times[i]);
}
}
log_info(LD_CIRC,
"We had %d timeouts out of %d build times, "
"and filtered %d above the max of %u",
(int)(cbt->total_build_times*timeout_rate),
cbt->total_build_times, num_filtered, max_timeout);
return num_filtered;
}
/**
* Load histogram from <b>state</b>, shuffling the resulting array
* after we do so. Use this result to estimate parameters and
......@@ -1169,10 +1120,6 @@ circuit_build_times_parse_state(circuit_build_times_t *cbt,
circuit_build_times_set_timeout(cbt);
if (!state->CircuitBuildAbandonedCount && cbt->total_build_times) {
circuit_build_times_filter_timeouts(cbt);
}
done:
tor_free(loaded_times);
return err ? -1 : 0;
......@@ -1193,7 +1140,6 @@ circuit_build_times_update_alpha(circuit_build_times_t *cbt)
build_time_t *x=cbt->circuit_build_times;
double a = 0;
int n=0,i=0,abandoned_count=0;
build_time_t max_time=0;
/* https://en.wikipedia.org/wiki/Pareto_distribution#Parameter_estimation */
/* We sort of cheat here and make our samples slightly more pareto-like
......@@ -1213,14 +1159,13 @@ circuit_build_times_update_alpha(circuit_build_times_t *cbt)
if (x[i] < cbt->Xm) {
a += tor_mathlog(cbt->Xm);
n++;
} else if (x[i] == CBT_BUILD_ABANDONED) {
abandoned_count++;
} else {
a += tor_mathlog(x[i]);
if (x[i] > max_time)
max_time = x[i];
n++;
}
n++;
}
/*
......@@ -1229,30 +1174,23 @@ circuit_build_times_update_alpha(circuit_build_times_t *cbt)
* performs this same check, and resets state if it hits it. If we
* hit it at runtime, something serious has gone wrong.
*/
if (n!=cbt->total_build_times) {
if (n!=cbt->total_build_times-abandoned_count) {
log_err(LD_CIRC, "Discrepancy in build times count: %d vs %d", n,
cbt->total_build_times);
}
tor_assert(n==cbt->total_build_times);
if (max_time <= 0) {
/* This can happen if Xm is actually the *maximum* value in the set.
* It can also happen if we've abandoned every single circuit somehow.
* In either case, tell the caller not to compute a new build timeout. */
log_warn(LD_BUG,
"Could not determine largest build time (%d). "
"Xm is %dms and we've abandoned %d out of %d circuits.", max_time,
cbt->Xm, abandoned_count, n);
return 0;
}
a += abandoned_count*tor_mathlog(max_time);
tor_assert(n==cbt->total_build_times-abandoned_count);
/* This is the "Maximum Likelihood Estimator" for parameter alpha of a Pareto
* Distribution. See:
* https://en.wikipedia.org/wiki/Pareto_distribution#Estimation_of_parameters
*
* The division in the estimator is done with subtraction outside the ln(),
* with the sum occurring in the for loop above.
*
* This done is to avoid the precision issues of logs of small values.
*/
a -= n*tor_mathlog(cbt->Xm);
// Estimator comes from Eq #4 in:
// "Bayesian estimation based on trimmed samples from Pareto populations"
// by Arturo J. Fernández. We are right-censored only.
a = (n-abandoned_count)/a;
a = n/a;
cbt->alpha = a;
......@@ -1661,9 +1599,8 @@ circuit_build_times_network_check_changed(circuit_build_times_t *cbt)
log_notice(LD_CIRC,
"Your network connection speed appears to have changed. Resetting "
"timeout to %lds after %d timeouts and %d buildtimes.",
tor_lround(cbt->timeout_ms/1000), timeout_count,
total_build_times);
"timeout to %ldms after %d timeouts and %d buildtimes.",
tor_lround(cbt->timeout_ms), timeout_count, total_build_times);
return 1;
}
......@@ -1829,7 +1766,7 @@ circuit_build_times_set_timeout(circuit_build_times_t *cbt)
return;
if (cbt->timeout_ms < circuit_build_times_min_timeout()) {
log_info(LD_CIRC, "Set buildtimeout to low value %fms. Setting to %dms",
log_notice(LD_CIRC, "Set buildtimeout to low value %fms. Setting to %dms",
cbt->timeout_ms, circuit_build_times_min_timeout());
cbt->timeout_ms = circuit_build_times_min_timeout();
if (cbt->close_ms < cbt->timeout_ms) {
......@@ -1847,9 +1784,9 @@ circuit_build_times_set_timeout(circuit_build_times_t *cbt)
log_info(LD_CIRC,
"Based on %d circuit times, it looks like we don't need to "
"wait so long for circuits to finish. We will now assume a "
"circuit is too slow to use after waiting %ld seconds.",
"circuit is too slow to use after waiting %ld milliseconds.",
cbt->total_build_times,
tor_lround(cbt->timeout_ms/1000));
tor_lround(cbt->timeout_ms));
log_info(LD_CIRC,
"Circuit timeout data: %fms, %fms, Xm: %d, a: %f, r: %f",
cbt->timeout_ms, cbt->close_ms, cbt->Xm, cbt->alpha,
......@@ -1858,18 +1795,18 @@ circuit_build_times_set_timeout(circuit_build_times_t *cbt)
log_info(LD_CIRC,
"Based on %d circuit times, it looks like we need to wait "
"longer for circuits to finish. We will now assume a "
"circuit is too slow to use after waiting %ld seconds.",
"circuit is too slow to use after waiting %ld milliseconds.",
cbt->total_build_times,
tor_lround(cbt->timeout_ms/1000));
tor_lround(cbt->timeout_ms));
log_info(LD_CIRC,
"Circuit timeout data: %fms, %fms, Xm: %d, a: %f, r: %f",
cbt->timeout_ms, cbt->close_ms, cbt->Xm, cbt->alpha,
timeout_rate);
} else {
log_info(LD_CIRC,
"Set circuit build timeout to %lds (%fms, %fms, Xm: %d, a: %f,"
"Set circuit build timeout to %ldms (%fms, %fms, Xm: %d, a: %f,"
" r: %f) based on %d circuit times",
tor_lround(cbt->timeout_ms/1000),
tor_lround(cbt->timeout_ms),
cbt->timeout_ms, cbt->close_ms, cbt->Xm, cbt->alpha, timeout_rate,
cbt->total_build_times);
}
......
......@@ -56,10 +56,10 @@ void circuit_build_times_reset(circuit_build_times_t *cbt);
#define CBT_NCIRCUITS_TO_OBSERVE 1000
/** Width of the histogram bins in milliseconds */
#define CBT_BIN_WIDTH ((build_time_t)50)
#define CBT_BIN_WIDTH ((build_time_t)10)
/** Number of modes to use in the weighted-avg computation of Xm */
#define CBT_DEFAULT_NUM_XM_MODES 3
#define CBT_DEFAULT_NUM_XM_MODES 10
#define CBT_MIN_NUM_XM_MODES 1
#define CBT_MAX_NUM_XM_MODES 20
......@@ -79,7 +79,7 @@ void circuit_build_times_reset(circuit_build_times_t *cbt);
* How long to wait before actually closing circuits that take too long to
* build in terms of CDF quantile.
*/
#define CBT_DEFAULT_CLOSE_QUANTILE 95
#define CBT_DEFAULT_CLOSE_QUANTILE 99
#define CBT_MIN_CLOSE_QUANTILE CBT_MIN_QUANTILE_CUTOFF
#define CBT_MAX_CLOSE_QUANTILE CBT_MAX_QUANTILE_CUTOFF
......@@ -120,8 +120,8 @@ double circuit_build_times_quantile_cutoff(void);
#define CBT_MAX_TEST_FREQUENCY INT32_MAX
/** Lowest allowable value for CircuitBuildTimeout in milliseconds */
#define CBT_DEFAULT_TIMEOUT_MIN_VALUE (1500)
#define CBT_MIN_TIMEOUT_MIN_VALUE 500
#define CBT_DEFAULT_TIMEOUT_MIN_VALUE (CBT_BIN_WIDTH)
#define CBT_MIN_TIMEOUT_MIN_VALUE CBT_BIN_WIDTH
#define CBT_MAX_TIMEOUT_MIN_VALUE INT32_MAX
/** Initial circuit build timeout in milliseconds */
......
......@@ -401,11 +401,7 @@ test_circuit_timeout(void *arg)
for (i=0; i < CBT_DEFAULT_MIN_CIRCUITS_TO_OBSERVE; i++) {
build_time_t sample = circuit_build_times_generate_sample(&initial,0,1);
if (sample > close_ms) {
circuit_build_times_add_time(&estimate, CBT_BUILD_ABANDONED);
} else {
circuit_build_times_add_time(&estimate, sample);
}
circuit_build_times_add_time(&estimate, sample);
}
circuit_build_times_update_alpha(&estimate);
timeout1 = circuit_build_times_calculate_timeout(&estimate,
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment