Commit f6600377 authored by Alexander Færøy's avatar Alexander Færøy 🍍
Browse files

Merge remote-tracking branch 'tor-gitlab/mr/474' into main

parents b109161c 7084ec87
o Major bugfixes (bridges):
- Make Tor work reliably again when you have multiple bridges
configured and one or more of them are unreachable. The problem
came because we require that we have bridge descriptors for both
of our first two bridges (else we refuse to try to connect), but
in some cases we would wait three hours before trying to fetch
these missing descriptors, and/or never recover when we do try
to fetch them. Fixes bugs 40396 and 40495; bugfix on 0.3.0.5-rc
and 0.3.2.1-alpha.
o Minor bugfixes (logging):
- When we no longer have enough directory information to use the
network, we would log a notice-level message -- but we would not
reliably log a message when we recovered and resumed using the
network. Now make sure there is always a corresponding message
about recovering. Fixes bug 40496; bugfix on 0.3.5.1-alpha.
o Minor bugfixes (bridges):
- When we don't yet have a descriptor for one of our bridges, disable
the entry guard retry schedule on that bridge. The entry guard retry
schedule and the bridge descriptor retry schedule can conflict,
e.g. where we mark a bridge as "maybe up" yet we don't try to fetch
its descriptor yet, leading Tor to wait (refusing to do anything)
until it becomes time to fetch the descriptor. Fixes bug 40497;
bugfix on 0.3.0.3-alpha.
......@@ -3576,14 +3576,15 @@ The following options are used for running a testing Tor network.
[[TestingAuthKeySlop]] **TestingAuthKeySlop** __N__ **seconds**|**minutes**|**hours** +
[[TestingBridgeBootstrapDownloadInitialDelay]] **TestingBridgeBootstrapDownloadInitialDelay** __N__::
Initial delay in seconds for when clients should download each bridge descriptor when they
have just started, or when they can not contact any of their bridges.
Initial delay in seconds for how long clients should wait before
downloading a bridge descriptor for a new bridge.
Changing this requires that **TestingTorNetwork** is set. (Default: 0)
[[TestingBridgeDownloadInitialDelay]] **TestingBridgeDownloadInitialDelay** __N__::
Initial delay in seconds for when clients should download each bridge descriptor when they
know that one or more of their configured bridges are running. Changing
this requires that **TestingTorNetwork** is set. (Default: 10800)
How long to wait (in seconds) once clients have successfully
downloaded a bridge descriptor, before trying another download for
that same bridge. Changing this requires that **TestingTorNetwork**
is set. (Default: 10800)
[[TestingClientConsensusDownloadInitialDelay]] **TestingClientConsensusDownloadInitialDelay** __N__::
Initial delay in seconds for when clients should download consensuses. Changing this
......
......@@ -943,9 +943,17 @@ rewrite_node_address_for_bridge(const bridge_info_t *bridge, node_t *node)
}
/** We just learned a descriptor for a bridge. See if that
* digest is in our entry guard list, and add it if not. */
* digest is in our entry guard list, and add it if not. Schedule the
* next fetch for a long time from now, and initiate any follow-up
* activities like continuing to bootstrap.
*
* <b>from_cache</b> * tells us whether we fetched it from disk (else
* the network)
*
* <b>desc_is_new</b> tells us if we preferred it to the old version we
* had, if any. */
void
learned_bridge_descriptor(routerinfo_t *ri, int from_cache)
learned_bridge_descriptor(routerinfo_t *ri, int from_cache, int desc_is_new)
{
tor_assert(ri);
tor_assert(ri->purpose == ROUTER_PURPOSE_BRIDGE);
......@@ -961,12 +969,14 @@ learned_bridge_descriptor(routerinfo_t *ri, int from_cache)
if (bridge) { /* if we actually want to use this one */
node_t *node;
/* it's here; schedule its re-fetch for a long time from now. */
if (!from_cache) {
/* This schedules the re-fetch at a constant interval, which produces
* a pattern of bridge traffic. But it's better than trying all
* configured bridges several times in the first few minutes. */
download_status_reset(&bridge->fetch_status);
/* it's here; schedule its re-fetch for a long time from now. */
bridge->fetch_status.next_attempt_at +=
get_options()->TestingBridgeDownloadInitialDelay;
}
node = node_get_mutable_by_id(ri->cache_info.identity_digest);
......@@ -982,8 +992,10 @@ learned_bridge_descriptor(routerinfo_t *ri, int from_cache)
entry_guard_learned_bridge_identity(&bridge->addrport_configured,
(const uint8_t*)ri->cache_info.identity_digest);
log_notice(LD_DIR, "new bridge descriptor '%s' (%s): %s", ri->nickname,
from_cache ? "cached" : "fresh", router_describe(ri));
if (desc_is_new)
log_notice(LD_DIR, "new bridge descriptor '%s' (%s): %s",
ri->nickname,
from_cache ? "cached" : "fresh", router_describe(ri));
/* If we didn't have a reachable bridge before this one, try directory
* documents again. */
if (first) {
......
......@@ -46,7 +46,8 @@ void learned_router_identity(const tor_addr_t *addr, uint16_t port,
void bridge_add_from_config(struct bridge_line_t *bridge_line);
void retry_bridge_descriptor_fetch_directly(const char *digest);
void fetch_bridge_descriptors(const or_options_t *options, time_t now);
void learned_bridge_descriptor(routerinfo_t *ri, int from_cache);
void learned_bridge_descriptor(routerinfo_t *ri,
int from_cache, int desc_is_new);
const smartlist_t *get_socks_args_by_bridge_addrport(const tor_addr_t *addr,
uint16_t port);
......
......@@ -132,6 +132,7 @@
#include "feature/client/entrynodes.h"
#include "feature/client/transports.h"
#include "feature/control/control_events.h"
#include "feature/dirclient/dlstatus.h"
#include "feature/dircommon/directory.h"
#include "feature/nodelist/describe.h"
#include "feature/nodelist/microdesc.h"
......@@ -576,6 +577,18 @@ mark_guard_maybe_reachable(entry_guard_t *guard)
guard->is_reachable = GUARD_REACHABLE_MAYBE;
if (guard->is_filtered_guard)
guard->is_usable_filtered_guard = 1;
/* Check if it is a bridge and we don't have its descriptor yet */
if (guard->bridge_addr && !guard_has_descriptor(guard)) {
/* Reset the descriptor fetch retry schedule, so it gives it another
* go soon. It's important to keep any "REACHABLE_MAYBE" bridges in
* sync with the descriptor fetch schedule, since we will refuse to
* use the network until our first primary bridges are either
* known-usable or known-unusable. See bug 40396. */
download_status_t *dl = get_bridge_dl_status_by_id(guard->identity);
if (dl)
download_status_reset(dl);
}
}
/**
......@@ -2046,6 +2059,14 @@ entry_guard_consider_retry(entry_guard_t *guard)
get_retry_schedule(guard->failing_since, now, guard->is_primary);
const time_t last_attempt = guard->last_tried_to_connect;
/* Check if it is a bridge and we don't have its descriptor yet */
if (guard->bridge_addr && !guard_has_descriptor(guard)) {
/* We want to leave the retry schedule to fetch_bridge_descriptors(),
* so we don't have two retry schedules clobbering each other. See
* bugs 40396 and 40497 for details of why we need this exception. */
return;
}
if (BUG(last_attempt == 0) ||
now >= last_attempt + delay) {
/* We should mark this retriable. */
......@@ -2271,6 +2292,13 @@ entry_guards_note_guard_failure(guard_selection_t *gs,
guard->is_primary?"primary ":"",
guard->confirmed_idx>=0?"confirmed ":"",
entry_guard_describe(guard));
/* Schedule a re-assessment of whether we have enough dir info to
* use the network. Counterintuitively, *losing* a bridge might actually
* be just what we need to *resume* using the network, if we had it in
* state GUARD_REACHABLE_MAYBE and we were stalling to learn this
* outcome. See bug 40396 for more details. */
router_dir_info_changed();
}
/**
......@@ -2295,6 +2323,12 @@ entry_guards_note_guard_success(guard_selection_t *gs,
/* If guard was not already marked as reachable, send a GUARD UP signal */
if (guard->is_reachable != GUARD_REACHABLE_YES) {
control_event_guard(guard->nickname, guard->identity, "UP");
/* Schedule a re-assessment of whether we have enough dir info to
* use the network. One of our guards has just moved to
* GUARD_REACHABLE_YES, so maybe we can resume using the network
* now. */
router_dir_info_changed();
}
guard->is_reachable = GUARD_REACHABLE_YES;
......@@ -3538,6 +3572,11 @@ entry_guards_changed_for_guard_selection(guard_selection_t *gs)
entry_guards_update_guards_in_state()
*/
or_state_mark_dirty(get_or_state(), when);
/* Schedule a re-assessment of whether we have enough dir info to
* use the network. When we add or remove or disable or enable a
* guard, the decision could shift. */
router_dir_info_changed();
}
/** Our list of entry guards has changed for the default guard selection
......
......@@ -73,15 +73,14 @@ find_dl_min_delay(const download_status_t *dls, const or_options_t *options)
}
}
case DL_SCHED_BRIDGE:
if (options->UseBridges && num_bridges_usable(0) > 0) {
/* A bridge client that is sure that one or more of its bridges are
* running can afford to wait longer to update bridge descriptors. */
return options->TestingBridgeDownloadInitialDelay;
} else {
/* A bridge client which might have no running bridges, must try to
* get bridge descriptors straight away. */
return options->TestingBridgeBootstrapDownloadInitialDelay;
}
/* Be conservative here: always return the 'during bootstrap' delay
* value, so we never delay while trying to fetch descriptors
* for new bridges. Once we do succeed at fetching a descriptor
* for our bridge, we will adjust its next_attempt_at based on
* the longer "TestingBridgeDownloadInitialDelay" value. See
* learned_bridge_descriptor() for details.
*/
return options->TestingBridgeBootstrapDownloadInitialDelay;
default:
tor_assert(0);
}
......
......@@ -2820,6 +2820,7 @@ update_router_have_minimum_dir_info(void)
const networkstatus_t *consensus =
networkstatus_get_reasonably_live_consensus(now,usable_consensus_flavor());
int using_md;
static int be_loud_when_things_work_again = 0;
if (!consensus) {
if (!networkstatus_get_latest_consensus())
......@@ -2875,8 +2876,9 @@ update_router_have_minimum_dir_info(void)
if (res && !have_min_dir_info) {
control_event_client_status(LOG_NOTICE, "ENOUGH_DIR_INFO");
control_event_boot_dir(BOOTSTRAP_STATUS_ENOUGH_DIRINFO, 0);
log_info(LD_DIR,
"We now have enough directory information to build circuits.");
tor_log(be_loud_when_things_work_again ? LOG_NOTICE : LOG_INFO, LD_DIR,
"We now have enough directory information to build circuits.");
be_loud_when_things_work_again = 0;
}
/* If paths have just become unavailable in this update. */
......@@ -2885,6 +2887,10 @@ update_router_have_minimum_dir_info(void)
tor_log(quiet ? LOG_INFO : LOG_NOTICE, LD_DIR,
"Our directory information is no longer up-to-date "
"enough to build circuits: %s", dir_info_status);
if (!quiet) {
/* remember to do a notice-level log when things come back */
be_loud_when_things_work_again = 1;
}
/* a) make us log when we next complete a circuit, so we know when Tor
* is back up and usable, and b) disable some activities that Tor
......
......@@ -1617,6 +1617,13 @@ router_add_to_routerlist(routerinfo_t *router, const char **msg,
"descriptor for router %s",
router_describe(router));
} else {
if (router->purpose == ROUTER_PURPOSE_BRIDGE) {
/* Even if we're not going to keep this descriptor, we need to
* let the bridge descriptor fetch subsystem know that we
* succeeded at getting it -- so we can adjust the retry schedule
* to stop trying for a while. */
learned_bridge_descriptor(router, from_cache, 0);
}
log_info(LD_DIR,
"Dropping descriptor that we already have for router %s",
router_describe(router));
......@@ -2047,7 +2054,7 @@ routerlist_descriptors_added(smartlist_t *sl, int from_cache)
control_event_descriptors_changed(sl);
SMARTLIST_FOREACH_BEGIN(sl, routerinfo_t *, ri) {
if (ri->purpose == ROUTER_PURPOSE_BRIDGE)
learned_bridge_descriptor(ri, from_cache);
learned_bridge_descriptor(ri, from_cache, 1);
if (ri->needs_retest_if_added) {
ri->needs_retest_if_added = 0;
dirserv_single_reachability_test(approx_time(), ri);
......
......@@ -6652,13 +6652,7 @@ test_dir_find_dl_min_delay(void* data)
dls.schedule = DL_SCHED_BRIDGE;
/* client */
mock_options->ClientOnly = 1;
mock_options->UseBridges = 1;
if (num_bridges_usable(0) > 0) {
tt_int_op(find_dl_min_delay(&dls, mock_options), OP_EQ, bridge);
} else {
tt_int_op(find_dl_min_delay(&dls, mock_options), OP_EQ, bridge_bootstrap);
}
tt_int_op(find_dl_min_delay(&dls, mock_options), OP_EQ, bridge_bootstrap);
done:
UNMOCK(networkstatus_consensus_is_bootstrapping);
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment