From 16f1bda3241e9f92a41a187cbb06c1c0b659a67c Mon Sep 17 00:00:00 2001 From: Nick Mathewson <nickm@torproject.org> Date: Thu, 10 Mar 2022 11:17:32 -0500 Subject: [PATCH] Canonicalize and intern family representations to save memory. This should save 1-3 MB of ram on each running arti client. Closes #384. See also tor#27359 and proposal 298. --- crates/tor-netdoc/src/doc/microdesc.rs | 10 ++++-- crates/tor-netdoc/src/doc/microdesc/build.rs | 2 +- crates/tor-netdoc/src/doc/routerdesc.rs | 20 +++++++++--- crates/tor-netdoc/src/types/family.rs | 34 +++++++++++++++++++- 4 files changed, 57 insertions(+), 9 deletions(-) diff --git a/crates/tor-netdoc/src/doc/microdesc.rs b/crates/tor-netdoc/src/doc/microdesc.rs index 48131c7a4f..01947dad18 100644 --- a/crates/tor-netdoc/src/doc/microdesc.rs +++ b/crates/tor-netdoc/src/doc/microdesc.rs @@ -63,7 +63,7 @@ pub struct Microdesc { /// Public key used for the ntor circuit extension protocol. ntor_onion_key: curve25519::PublicKey, /// Declared family for this relay. - family: RelayFamily, + family: Arc<RelayFamily>, /// List of IPv4 ports to which this relay will exit ipv4_policy: Arc<PortPolicy>, /// List of IPv6 ports to which this relay will exit @@ -111,7 +111,7 @@ impl Microdesc { } /// Return the relay family for this microdesc pub fn family(&self) -> &RelayFamily { - &self.family + self.family.as_ref() } /// Return the ed25519 identity for this microdesc, if its /// Ed25519 identity is well-formed. @@ -277,10 +277,14 @@ impl Microdesc { .into(); // family + // + // (We don't need to add the relay's own ID to this family, as we do in + // RouterDescs: the authorities already took care of that for us.) let family = body .maybe(FAMILY) .parse_args_as_str::<RelayFamily>()? - .unwrap_or_else(RelayFamily::new); + .unwrap_or_else(RelayFamily::new) + .intern(); // exit policies. let ipv4_policy = body diff --git a/crates/tor-netdoc/src/doc/microdesc/build.rs b/crates/tor-netdoc/src/doc/microdesc/build.rs index 5aed2e2264..538a51943e 100644 --- a/crates/tor-netdoc/src/doc/microdesc/build.rs +++ b/crates/tor-netdoc/src/doc/microdesc/build.rs @@ -136,7 +136,7 @@ impl MicrodescBuilder { Ok(Microdesc { sha256, ntor_onion_key, - family: self.family.clone(), + family: self.family.clone().intern(), ipv4_policy: self.ipv4_policy.clone().intern(), ipv6_policy: self.ipv6_policy.clone().intern(), ed25519_id, diff --git a/crates/tor-netdoc/src/doc/routerdesc.rs b/crates/tor-netdoc/src/doc/routerdesc.rs index 84ee126177..4fc0f1817a 100644 --- a/crates/tor-netdoc/src/doc/routerdesc.rs +++ b/crates/tor-netdoc/src/doc/routerdesc.rs @@ -131,9 +131,7 @@ pub struct RouterDesc { is_extrainfo_cache: bool, /// Declared family members for this relay. If two relays are in the /// same family, they shouldn't be used in the same circuit. - // TODO: these families can get bulky. Perhaps we should de-duplicate - // them in a cache, like Tor does. - family: Option<RelayFamily>, + family: Arc<RelayFamily>, /// Software and version that this relay says it's running. platform: Option<RelayPlatform>, /// A complete address-level policy for which IPv4 addresses this relay @@ -568,7 +566,21 @@ impl RouterDesc { } // Family - let family = body.maybe(FAMILY).parse_args_as_str::<RelayFamily>()?; + let family = { + let mut family = body + .maybe(FAMILY) + .parse_args_as_str::<RelayFamily>()? + .unwrap_or_else(RelayFamily::new); + if !family.is_empty() { + // If this family is nonempty, we add our own RSA id to it, on + // the theory that doing so will improve the odds of having a + // canonical family shared by all of the members of this family. + // If the family is empty, there's no point in adding our own ID + // to it, and doing so would only waste memory. + family.push(rsa_identity.to_rsa_identity()); + } + family.intern() + }; // or-address // Extract at most one ipv6 address from the list. It's not great, diff --git a/crates/tor-netdoc/src/types/family.rs b/crates/tor-netdoc/src/types/family.rs index 0b25c5d123..f7b1609c01 100644 --- a/crates/tor-netdoc/src/types/family.rs +++ b/crates/tor-netdoc/src/types/family.rs @@ -3,7 +3,10 @@ //! Families are opt-in lists of relays with the same operators, //! used to avoid building insecure circuits. +use std::sync::Arc; + use crate::types::misc::LongIdent; +use crate::util::intern::InternCache; use crate::{Error, Result}; use tor_llcrypto::pk::rsa::RsaIdentity; @@ -19,15 +22,39 @@ use tor_llcrypto::pk::rsa::RsaIdentity; /// entries, including entries that are only nicknames. /// /// TODO: This type probably belongs in a different crate. -#[derive(Clone, Debug, Default)] +#[derive(Clone, Debug, Default, Hash, Eq, PartialEq)] pub struct RelayFamily(Vec<RsaIdentity>); +/// Cache of RelayFamily objects, for saving memory. +// +/// This only holds weak references to the policy objects, so we don't +/// need to worry about running out of space because of stale entries. +static FAMILY_CACHE: InternCache<RelayFamily> = InternCache::new(); + impl RelayFamily { /// Return a new empty RelayFamily. pub fn new() -> Self { RelayFamily::default() } + /// Add `rsa_id` to this family. + pub fn push(&mut self, rsa_id: RsaIdentity) { + self.0.push(rsa_id); + } + + /// Convert this family to a standard format (with all IDs sorted and de-duplicated). + fn normalize(&mut self) { + self.0.sort(); + self.0.dedup(); + } + + /// Consume this family, and return a new canonical interned representation + /// of the family. + pub fn intern(mut self) -> Arc<Self> { + self.normalize(); + FAMILY_CACHE.intern(self) + } + /// Does this family include the given relay? pub fn contains(&self, rsa_id: &RsaIdentity) -> bool { self.0.contains(rsa_id) @@ -38,6 +65,11 @@ impl RelayFamily { pub fn members(&self) -> impl Iterator<Item = &RsaIdentity> { self.0.iter() } + + /// Return true if this family has no members. + pub fn is_empty(&self) -> bool { + self.0.is_empty() + } } impl std::str::FromStr for RelayFamily { -- GitLab