tor_guardmgr/
lib.rs

1#![cfg_attr(docsrs, feature(doc_auto_cfg, doc_cfg))]
2#![doc = include_str!("../README.md")]
3// @@ begin lint list maintained by maint/add_warning @@
4#![allow(renamed_and_removed_lints)] // @@REMOVE_WHEN(ci_arti_stable)
5#![allow(unknown_lints)] // @@REMOVE_WHEN(ci_arti_nightly)
6#![warn(missing_docs)]
7#![warn(noop_method_call)]
8#![warn(unreachable_pub)]
9#![warn(clippy::all)]
10#![deny(clippy::await_holding_lock)]
11#![deny(clippy::cargo_common_metadata)]
12#![deny(clippy::cast_lossless)]
13#![deny(clippy::checked_conversions)]
14#![warn(clippy::cognitive_complexity)]
15#![deny(clippy::debug_assert_with_mut_call)]
16#![deny(clippy::exhaustive_enums)]
17#![deny(clippy::exhaustive_structs)]
18#![deny(clippy::expl_impl_clone_on_copy)]
19#![deny(clippy::fallible_impl_from)]
20#![deny(clippy::implicit_clone)]
21#![deny(clippy::large_stack_arrays)]
22#![warn(clippy::manual_ok_or)]
23#![deny(clippy::missing_docs_in_private_items)]
24#![warn(clippy::needless_borrow)]
25#![warn(clippy::needless_pass_by_value)]
26#![warn(clippy::option_option)]
27#![deny(clippy::print_stderr)]
28#![deny(clippy::print_stdout)]
29#![warn(clippy::rc_buffer)]
30#![deny(clippy::ref_option_ref)]
31#![warn(clippy::semicolon_if_nothing_returned)]
32#![warn(clippy::trait_duplication_in_bounds)]
33#![deny(clippy::unchecked_duration_subtraction)]
34#![deny(clippy::unnecessary_wraps)]
35#![warn(clippy::unseparated_literal_suffix)]
36#![deny(clippy::unwrap_used)]
37#![deny(clippy::mod_module_files)]
38#![allow(clippy::let_unit_value)] // This can reasonably be done for explicitness
39#![allow(clippy::uninlined_format_args)]
40#![allow(clippy::significant_drop_in_scrutinee)] // arti/-/merge_requests/588/#note_2812945
41#![allow(clippy::result_large_err)] // temporary workaround for arti#587
42#![allow(clippy::needless_raw_string_hashes)] // complained-about code is fine, often best
43#![allow(clippy::needless_lifetimes)] // See arti#1765
44//! <!-- @@ end lint list maintained by maint/add_warning @@ -->
45
46// TODO #1645 (either remove this, or decide to have it everywhere)
47#![cfg_attr(not(all(feature = "full", feature = "experimental")), allow(unused))]
48
49// Glossary:
50//     Primary guard
51//     Sample
52//     confirmed
53//     filtered
54
55use futures::channel::mpsc;
56use futures::task::SpawnExt;
57use serde::{Deserialize, Serialize};
58use std::collections::HashMap;
59use std::net::SocketAddr;
60use std::sync::{Arc, Mutex, Weak};
61use std::time::{Duration, Instant, SystemTime};
62#[cfg(feature = "bridge-client")]
63use tor_error::internal;
64use tor_linkspec::{OwnedChanTarget, OwnedCircTarget, RelayId, RelayIdSet};
65use tor_netdir::NetDirProvider;
66use tor_proto::ClockSkew;
67use tor_units::BoundedInt32;
68use tracing::{debug, info, trace, warn};
69
70use tor_config::{define_list_builder_accessors, define_list_builder_helper};
71use tor_config::{impl_not_auto_value, ReconfigureError};
72use tor_config::{impl_standard_builder, ExplicitOrAuto};
73use tor_netdir::{params::NetParameters, NetDir, Relay};
74use tor_persist::{DynStorageHandle, StateMgr};
75use tor_rtcompat::Runtime;
76
77#[cfg(feature = "bridge-client")]
78pub mod bridge;
79mod config;
80mod daemon;
81mod dirstatus;
82mod err;
83mod events;
84pub mod fallback;
85mod filter;
86mod guard;
87mod ids;
88mod pending;
89mod sample;
90mod skew;
91mod util;
92#[cfg(feature = "vanguards")]
93pub mod vanguards;
94
95#[cfg(not(feature = "bridge-client"))]
96#[path = "bridge_disabled.rs"]
97pub mod bridge;
98
99#[cfg(any(test, feature = "testing"))]
100pub use config::testing::TestConfig;
101
102#[cfg(test)]
103use oneshot_fused_workaround as oneshot;
104
105pub use config::GuardMgrConfig;
106pub use err::{GuardMgrConfigError, GuardMgrError, PickGuardError};
107pub use events::ClockSkewEvents;
108pub use filter::GuardFilter;
109pub use ids::FirstHopId;
110pub use pending::{GuardMonitor, GuardStatus, GuardUsable};
111pub use skew::SkewEstimate;
112
113#[cfg(feature = "vanguards")]
114#[cfg_attr(docsrs, doc(cfg(feature = "vanguards")))]
115pub use vanguards::VanguardMgrError;
116
117use pending::{PendingRequest, RequestId};
118use sample::{GuardSet, Universe, UniverseRef};
119
120use crate::ids::{FirstHopIdInner, GuardId};
121
122use tor_config::ConfigBuildError;
123
124/// A "guard manager" that selects and remembers a persistent set of
125/// guard nodes.
126///
127/// This is a "handle"; clones of it share state.
128#[derive(Clone)]
129pub struct GuardMgr<R: Runtime> {
130    /// An asynchronous runtime object.
131    ///
132    /// GuardMgr uses this runtime for timing, timeouts, and spawning
133    /// tasks.
134    runtime: R,
135
136    /// Internal state for the guard manager.
137    inner: Arc<Mutex<GuardMgrInner>>,
138}
139
140/// Helper type that holds the data used by a [`GuardMgr`].
141///
142/// This would just be a [`GuardMgr`], except that it needs to sit inside
143/// a `Mutex` and get accessed by daemon tasks.
144struct GuardMgrInner {
145    /// Last time when marked all of our primary guards as retriable.
146    ///
147    /// We keep track of this time so that we can rate-limit
148    /// these attempts.
149    last_primary_retry_time: Instant,
150
151    /// Persistent guard manager state.
152    ///
153    /// This object remembers one or more persistent set of guards that we can
154    /// use, along with their relative priorities and statuses.
155    guards: GuardSets,
156
157    /// The current filter that we're using to decide which guards are
158    /// supported.
159    //
160    // TODO: This field is duplicated in the current active [`GuardSet`]; we
161    // should fix that.
162    filter: GuardFilter,
163
164    /// Configuration values derived from the consensus parameters.
165    ///
166    /// This is updated whenever the consensus parameters change.
167    params: GuardParams,
168
169    /// A mpsc channel, used to tell the task running in
170    /// [`daemon::report_status_events`] about a new event to monitor.
171    ///
172    /// This uses an `UnboundedSender` so that we don't have to await
173    /// while sending the message, which in turn allows the GuardMgr
174    /// API to be simpler.  The risk, however, is that there's no
175    /// backpressure in the event that the task running
176    /// [`daemon::report_status_events`] fails to read from this
177    /// channel.
178    ctrl: mpsc::UnboundedSender<daemon::Msg>,
179
180    /// Information about guards that we've given out, but where we have
181    /// not yet heard whether the guard was successful.
182    ///
183    /// Upon leaning whether the guard was successful, the pending
184    /// requests in this map may be either moved to `waiting`, or
185    /// discarded.
186    ///
187    /// There can be multiple pending requests corresponding to the
188    /// same guard.
189    pending: HashMap<RequestId, PendingRequest>,
190
191    /// A list of pending requests for which we have heard that the
192    /// guard was successful, but we have not yet decided whether the
193    /// circuit may be used.
194    ///
195    /// There can be multiple waiting requests corresponding to the
196    /// same guard.
197    waiting: Vec<PendingRequest>,
198
199    /// A list of fallback directories used to access the directory system
200    /// when no other directory information is yet known.
201    fallbacks: fallback::FallbackState,
202
203    /// Location in which to store persistent state.
204    storage: DynStorageHandle<GuardSets>,
205
206    /// A sender object to publish changes in our estimated clock skew.
207    send_skew: postage::watch::Sender<Option<SkewEstimate>>,
208
209    /// A receiver object to hand out to observers who want to know about
210    /// changes in our estimated clock skew.
211    recv_skew: events::ClockSkewEvents,
212
213    /// A netdir provider that we can use for adding new guards when
214    /// insufficient guards are available.
215    ///
216    /// This has to be an Option so it can be initialized from None: at the
217    /// time a GuardMgr is created, there is no NetDirProvider for it to use.
218    netdir_provider: Option<Weak<dyn NetDirProvider>>,
219
220    /// A netdir provider that we can use for discovering bridge descriptors.
221    ///
222    /// This has to be an Option so it can be initialized from None: at the time
223    /// a GuardMgr is created, there is no BridgeDescProvider for it to use.
224    #[cfg(feature = "bridge-client")]
225    bridge_desc_provider: Option<Weak<dyn bridge::BridgeDescProvider>>,
226
227    /// A list of the bridges that we are configured to use, or "None" if we are
228    /// not configured to use bridges.
229    #[cfg(feature = "bridge-client")]
230    configured_bridges: Option<Arc<[bridge::BridgeConfig]>>,
231}
232
233/// A selector that tells us which [`GuardSet`] of several is currently in use.
234#[derive(Clone, Debug, Default, Eq, PartialEq, Ord, PartialOrd, strum::EnumIter)]
235enum GuardSetSelector {
236    /// The default guard set is currently in use: that's the one that we use
237    /// when we have no filter installed, or the filter permits most of the
238    /// guards on the network.
239    #[default]
240    Default,
241    /// A "restrictive" guard set is currently in use: that's the one that we
242    /// use when we have a filter that excludes a large fraction of the guards
243    /// on the network.
244    Restricted,
245    /// The "bridges" guard set is currently in use: we are selecting our guards
246    /// from among the universe of configured bridges.
247    #[cfg(feature = "bridge-client")]
248    Bridges,
249}
250
251/// Describes the [`Universe`] that a guard sample should take its guards from.
252#[derive(Clone, Copy, Debug, Eq, PartialEq)]
253enum UniverseType {
254    /// Take information from the network directory.
255    NetDir,
256    /// Take information from the configured bridges.
257    #[cfg(feature = "bridge-client")]
258    BridgeSet,
259}
260
261impl GuardSetSelector {
262    /// Return a description of which [`Universe`] this guard sample should take
263    /// its guards from.
264    fn universe_type(&self) -> UniverseType {
265        match self {
266            GuardSetSelector::Default | GuardSetSelector::Restricted => UniverseType::NetDir,
267            #[cfg(feature = "bridge-client")]
268            GuardSetSelector::Bridges => UniverseType::BridgeSet,
269        }
270    }
271}
272
273/// Persistent state for a guard manager, as serialized to disk.
274#[derive(Debug, Clone, Default, Serialize, Deserialize)]
275struct GuardSets {
276    /// Which set of guards is currently in use?
277    #[serde(skip)]
278    active_set: GuardSetSelector,
279
280    /// The default set of guards to use.
281    ///
282    /// We use this one when there is no filter, or the filter permits most of the
283    /// guards on the network.
284    default: GuardSet,
285
286    /// A guard set to use when we have a restrictive filter.
287    #[serde(default)]
288    restricted: GuardSet,
289
290    /// A guard set sampled from our configured bridges.
291    #[serde(default)]
292    #[cfg(feature = "bridge-client")]
293    bridges: GuardSet,
294
295    /// Unrecognized fields, including (possibly) other guard sets.
296    #[serde(flatten)]
297    remaining: HashMap<String, tor_persist::JsonValue>,
298}
299
300/// The key (filename) we use for storing our persistent guard state in the
301/// `StateMgr`.
302///
303/// We used to store this in a different format in a filename called
304/// "default_guards" (before Arti 0.1.0).
305const STORAGE_KEY: &str = "guards";
306
307/// A description of which circuits to retire because of a configuration change.
308///
309/// TODO(nickm): Eventually we will want to add a "Some" here, to support
310/// removing only those circuits that correspond to no-longer-usable guards.
311#[derive(Clone, Debug, Eq, PartialEq)]
312#[must_use]
313#[non_exhaustive]
314pub enum RetireCircuits {
315    /// There's no need to retire any circuits.
316    None,
317    /// All circuits should be retired.
318    All,
319}
320
321impl<R: Runtime> GuardMgr<R> {
322    /// Create a new "empty" guard manager and launch its background tasks.
323    ///
324    /// It won't be able to hand out any guards until a [`NetDirProvider`] has
325    /// been installed.
326    pub fn new<S>(
327        runtime: R,
328        state_mgr: S,
329        config: &impl GuardMgrConfig,
330    ) -> Result<Self, GuardMgrError>
331    where
332        S: StateMgr + Send + Sync + 'static,
333    {
334        let (ctrl, rcv) = mpsc::unbounded();
335        let storage: DynStorageHandle<GuardSets> = state_mgr.create_handle(STORAGE_KEY);
336        // TODO(nickm): We should do something about the old state in
337        // `default_guards`.  Probably it would be best to delete it.  We could
338        // try to migrate it instead, but that's beyond the stability guarantee
339        // that we're getting at this stage of our (pre-0.1) development.
340        let state = storage.load()?.unwrap_or_default();
341
342        let (send_skew, recv_skew) = postage::watch::channel();
343        let recv_skew = ClockSkewEvents { inner: recv_skew };
344
345        let inner = Arc::new(Mutex::new(GuardMgrInner {
346            guards: state,
347            filter: GuardFilter::unfiltered(),
348            last_primary_retry_time: runtime.now(),
349            params: GuardParams::default(),
350            ctrl,
351            pending: HashMap::new(),
352            waiting: Vec::new(),
353            fallbacks: config.fallbacks().into(),
354            storage,
355            send_skew,
356            recv_skew,
357            netdir_provider: None,
358            #[cfg(feature = "bridge-client")]
359            bridge_desc_provider: None,
360            #[cfg(feature = "bridge-client")]
361            configured_bridges: None,
362        }));
363        #[cfg(feature = "bridge-client")]
364        {
365            let mut inner = inner.lock().expect("lock poisoned");
366            // TODO(nickm): This calls `GuardMgrInner::update`. Will we mind doing so before any
367            // providers are configured? I think not, but we should make sure.
368            let _: RetireCircuits =
369                inner.replace_bridge_config(config, runtime.wallclock(), runtime.now())?;
370        }
371        {
372            let weak_inner = Arc::downgrade(&inner);
373            let rt_clone = runtime.clone();
374            runtime
375                .spawn(daemon::report_status_events(rt_clone, weak_inner, rcv))
376                .map_err(|e| GuardMgrError::from_spawn("guard status event reporter", e))?;
377        }
378        {
379            let rt_clone = runtime.clone();
380            let weak_inner = Arc::downgrade(&inner);
381            runtime
382                .spawn(daemon::run_periodic(rt_clone, weak_inner))
383                .map_err(|e| GuardMgrError::from_spawn("periodic guard updater", e))?;
384        }
385        Ok(GuardMgr { runtime, inner })
386    }
387
388    /// Install a [`NetDirProvider`] for use by this guard manager.
389    ///
390    /// It will be used to keep the guards up-to-date with changes from the
391    /// network directory, and to find new guards when no NetDir is provided to
392    /// select_guard().
393    ///
394    /// TODO: we should eventually return some kind of a task handle from this
395    /// task, even though it is not strictly speaking periodic.
396    ///
397    /// The guardmgr retains only a `Weak` reference to `provider`,
398    /// `install_netdir_provider` downgrades it on entry,
399    // TODO add ref to document when https://gitlab.torproject.org/tpo/core/arti/-/issues/624
400    // is fixed.  Also, maybe take an owned `Weak` to start with.
401    //
402    /// # Panics
403    ///
404    /// Panics if a [`NetDirProvider`] is already installed.
405    pub fn install_netdir_provider(
406        &self,
407        provider: &Arc<dyn NetDirProvider>,
408    ) -> Result<(), GuardMgrError> {
409        let weak_provider = Arc::downgrade(provider);
410        {
411            let mut inner = self.inner.lock().expect("Poisoned lock");
412            assert!(inner.netdir_provider.is_none());
413            inner.netdir_provider = Some(weak_provider.clone());
414        }
415        let weak_inner = Arc::downgrade(&self.inner);
416        let rt_clone = self.runtime.clone();
417        self.runtime
418            .spawn(daemon::keep_netdir_updated(
419                rt_clone,
420                weak_inner,
421                weak_provider,
422            ))
423            .map_err(|e| GuardMgrError::from_spawn("periodic guard netdir updater", e))?;
424        Ok(())
425    }
426
427    /// Configure a new [`bridge::BridgeDescProvider`] for this [`GuardMgr`].
428    ///
429    /// It will be used to learn about changes in the set of available bridge
430    /// descriptors; we'll inform it whenever our desired set of bridge
431    /// descriptors changes.
432    ///
433    /// TODO: Same todo as in `install_netdir_provider` about task handles.
434    ///
435    /// # Panics
436    ///
437    /// Panics if a [`bridge::BridgeDescProvider`] is already installed.
438    #[cfg(feature = "bridge-client")]
439    pub fn install_bridge_desc_provider(
440        &self,
441        provider: &Arc<dyn bridge::BridgeDescProvider>,
442    ) -> Result<(), GuardMgrError> {
443        let weak_provider = Arc::downgrade(provider);
444        {
445            let mut inner = self.inner.lock().expect("Poisoned lock");
446            assert!(inner.bridge_desc_provider.is_none());
447            inner.bridge_desc_provider = Some(weak_provider.clone());
448        }
449
450        let weak_inner = Arc::downgrade(&self.inner);
451        let rt_clone = self.runtime.clone();
452        self.runtime
453            .spawn(daemon::keep_bridge_descs_updated(
454                rt_clone,
455                weak_inner,
456                weak_provider,
457            ))
458            .map_err(|e| GuardMgrError::from_spawn("periodic guard netdir updater", e))?;
459
460        Ok(())
461    }
462
463    /// Flush our current guard state to the state manager, if there
464    /// is any unsaved state.
465    pub fn store_persistent_state(&self) -> Result<(), GuardMgrError> {
466        let inner = self.inner.lock().expect("Poisoned lock");
467        trace!("Flushing guard state to disk.");
468        inner.storage.store(&inner.guards)?;
469        Ok(())
470    }
471
472    /// Reload state from the state manager.
473    ///
474    /// We only call this method if we _don't_ have the lock on the state
475    /// files.  If we have the lock, we only want to save.
476    pub fn reload_persistent_state(&self) -> Result<(), GuardMgrError> {
477        let mut inner = self.inner.lock().expect("Poisoned lock");
478        if let Some(new_guards) = inner.storage.load()? {
479            inner.replace_guards_with(new_guards, self.runtime.wallclock(), self.runtime.now());
480        }
481        Ok(())
482    }
483
484    /// Switch from having an unowned persistent state to having an owned one.
485    ///
486    /// Requires that we hold the lock on the state files.
487    pub fn upgrade_to_owned_persistent_state(&self) -> Result<(), GuardMgrError> {
488        let mut inner = self.inner.lock().expect("Poisoned lock");
489        debug_assert!(inner.storage.can_store());
490        let new_guards = inner.storage.load()?.unwrap_or_default();
491        let wallclock = self.runtime.wallclock();
492        let now = self.runtime.now();
493        inner.replace_guards_with(new_guards, wallclock, now);
494        Ok(())
495    }
496
497    /// Return true if `netdir` has enough information to safely become our new netdir.
498    pub fn netdir_is_sufficient(&self, netdir: &NetDir) -> bool {
499        let mut inner = self.inner.lock().expect("Poisoned lock");
500        if inner.guards.active_set.universe_type() != UniverseType::NetDir {
501            // If we aren't using the netdir, this isn't something we want to look at.
502            return true;
503        }
504        inner
505            .guards
506            .active_guards_mut()
507            .n_primary_without_id_info_in(netdir)
508            == 0
509    }
510
511    /// Mark every guard as potentially retriable, regardless of how recently we
512    /// failed to connect to it.
513    pub fn mark_all_guards_retriable(&self) {
514        let mut inner = self.inner.lock().expect("Poisoned lock");
515        inner.guards.active_guards_mut().mark_all_guards_retriable();
516    }
517
518    /// Configure this guardmgr to use a fixed [`NetDir`] instead of a provider.
519    ///
520    /// This function is for testing only, and is exclusive with
521    /// `install_netdir_provider`.
522    ///
523    /// # Panics
524    ///
525    /// Panics if any [`NetDirProvider`] has already been installed.
526    #[cfg(any(test, feature = "testing"))]
527    pub fn install_test_netdir(&self, netdir: &NetDir) {
528        use tor_netdir::testprovider::TestNetDirProvider;
529        let wallclock = self.runtime.wallclock();
530        let now = self.runtime.now();
531        let netdir_provider: Arc<dyn NetDirProvider> =
532            Arc::new(TestNetDirProvider::from(netdir.clone()));
533        self.install_netdir_provider(&netdir_provider)
534            .expect("Couldn't install testing network provider");
535
536        let mut inner = self.inner.lock().expect("Poisoned lock");
537        inner.update(wallclock, now);
538    }
539
540    /// Replace the configuration in this `GuardMgr` with `config`.
541    pub fn reconfigure(
542        &self,
543        config: &impl GuardMgrConfig,
544    ) -> Result<RetireCircuits, ReconfigureError> {
545        let mut inner = self.inner.lock().expect("Poisoned lock");
546        // Change the set of configured fallbacks.
547        {
548            let mut fallbacks: fallback::FallbackState = config.fallbacks().into();
549            std::mem::swap(&mut inner.fallbacks, &mut fallbacks);
550            inner.fallbacks.take_status_from(fallbacks);
551        }
552        // If we are built to use bridges, change the bridge configuration.
553        #[cfg(feature = "bridge-client")]
554        {
555            let wallclock = self.runtime.wallclock();
556            let now = self.runtime.now();
557            Ok(inner.replace_bridge_config(config, wallclock, now)?)
558        }
559        // If we are built to use bridges, change the bridge configuration.
560        #[cfg(not(feature = "bridge-client"))]
561        {
562            Ok(RetireCircuits::None)
563        }
564    }
565
566    /// Replace the current [`GuardFilter`] used by this `GuardMgr`.
567    // TODO should this be part of the config?
568    pub fn set_filter(&self, filter: GuardFilter) {
569        let wallclock = self.runtime.wallclock();
570        let now = self.runtime.now();
571        let mut inner = self.inner.lock().expect("Poisoned lock");
572        inner.set_filter(filter, wallclock, now);
573    }
574
575    /// Select a guard for a given [`GuardUsage`].
576    ///
577    /// On success, we return a [`FirstHop`] object to identify which
578    /// guard we have picked, a [`GuardMonitor`] object that the
579    /// caller can use to report whether its attempt to use the guard
580    /// succeeded or failed, and a [`GuardUsable`] future that the
581    /// caller can use to decide whether a circuit built through the
582    /// guard is actually safe to use.
583    ///
584    /// That last point is important: It's okay to build a circuit
585    /// through the guard returned by this function, but you can't
586    /// actually use it for traffic unless the [`GuardUsable`] future
587    /// yields "true".
588    pub fn select_guard(
589        &self,
590        usage: GuardUsage,
591    ) -> Result<(FirstHop, GuardMonitor, GuardUsable), PickGuardError> {
592        let now = self.runtime.now();
593        let wallclock = self.runtime.wallclock();
594
595        let mut inner = self.inner.lock().expect("Poisoned lock");
596
597        // (I am not 100% sure that we need to consider_all_retries here, but
598        // it should _probably_ not hurt.)
599        inner.guards.active_guards_mut().consider_all_retries(now);
600
601        let (origin, guard) = inner.select_guard_with_expand(&usage, now, wallclock)?;
602        trace!(?guard, ?usage, "Guard selected");
603
604        let (usable, usable_sender) = if origin.usable_immediately() {
605            (GuardUsable::new_usable_immediately(), None)
606        } else {
607            let (u, snd) = GuardUsable::new_uncertain();
608            (u, Some(snd))
609        };
610        let request_id = pending::RequestId::next();
611        let ctrl = inner.ctrl.clone();
612        let monitor = GuardMonitor::new(request_id, ctrl);
613
614        // Note that the network can be down even if all the primary guards
615        // are not yet marked as unreachable.  But according to guard-spec we
616        // don't want to acknowledge the net as down before that point, since
617        // we don't mark all the primary guards as retriable unless
618        // we've been forced to non-primary guards.
619        let net_has_been_down =
620            if let Some(duration) = tor_proto::time_since_last_incoming_traffic() {
621                inner
622                    .guards
623                    .active_guards_mut()
624                    .all_primary_guards_are_unreachable()
625                    && duration >= inner.params.internet_down_timeout
626            } else {
627                // TODO: Is this the correct behavior in this case?
628                false
629            };
630
631        let pending_request = pending::PendingRequest::new(
632            guard.first_hop_id(),
633            usage,
634            usable_sender,
635            net_has_been_down,
636        );
637        inner.pending.insert(request_id, pending_request);
638
639        match &guard.sample {
640            Some(sample) => {
641                let guard_id = GuardId::from_relay_ids(&guard);
642                inner
643                    .guards
644                    .guards_mut(sample)
645                    .record_attempt(&guard_id, now);
646            }
647            None => {
648                // We don't record attempts for fallbacks; we only care when
649                // they have failed.
650            }
651        }
652
653        Ok((guard, monitor, usable))
654    }
655
656    /// Record that _after_ we built a circuit with a guard, something described
657    /// in `external_failure` went wrong with it.
658    pub fn note_external_failure<T>(&self, identity: &T, external_failure: ExternalActivity)
659    where
660        T: tor_linkspec::HasRelayIds + ?Sized,
661    {
662        let now = self.runtime.now();
663        let mut inner = self.inner.lock().expect("Poisoned lock");
664        let ids = inner.lookup_ids(identity);
665        for id in ids {
666            match &id.0 {
667                FirstHopIdInner::Guard(sample, id) => {
668                    inner
669                        .guards
670                        .guards_mut(sample)
671                        .record_failure(id, Some(external_failure), now);
672                }
673                FirstHopIdInner::Fallback(id) => {
674                    if external_failure == ExternalActivity::DirCache {
675                        inner.fallbacks.note_failure(id, now);
676                    }
677                }
678            }
679        }
680    }
681
682    /// Record that _after_ we built a circuit with a guard, some activity
683    /// described in `external_activity` was successful with it.
684    pub fn note_external_success<T>(&self, identity: &T, external_activity: ExternalActivity)
685    where
686        T: tor_linkspec::HasRelayIds + ?Sized,
687    {
688        let mut inner = self.inner.lock().expect("Poisoned lock");
689
690        inner.record_external_success(identity, external_activity, self.runtime.wallclock());
691    }
692
693    /// Return a stream of events about our estimated clock skew; these events
694    /// are `None` when we don't have enough information to make an estimate,
695    /// and `Some(`[`SkewEstimate`]`)` otherwise.
696    ///
697    /// Note that this stream can be lossy: if the estimate changes more than
698    /// one before you read from the stream, you might only get the most recent
699    /// update.
700    pub fn skew_events(&self) -> ClockSkewEvents {
701        let inner = self.inner.lock().expect("Poisoned lock");
702        inner.recv_skew.clone()
703    }
704
705    /// Ensure that the message queue is flushed before proceeding to
706    /// the next step.  Used for testing.
707    #[cfg(test)]
708    async fn flush_msg_queue(&self) {
709        let (snd, rcv) = oneshot::channel();
710        let pingmsg = daemon::Msg::Ping(snd);
711        {
712            let inner = self.inner.lock().expect("Poisoned lock");
713            inner
714                .ctrl
715                .unbounded_send(pingmsg)
716                .expect("Guard observer task exited prematurely.");
717        }
718        let _ = rcv.await;
719    }
720}
721
722/// An activity that can succeed or fail, and whose success or failure can be
723/// attributed to a guard.
724#[derive(Copy, Clone, Debug, Eq, PartialEq)]
725#[non_exhaustive]
726pub enum ExternalActivity {
727    /// The activity of using the guard as a directory cache.
728    DirCache,
729}
730
731impl GuardSets {
732    /// Return a reference to the currently active set of guards.
733    ///
734    /// (That's easy enough for now, since there is never more than one set of
735    /// guards.  But eventually that will change, as we add support for more
736    /// complex filter types, and for bridge relays. Those will use separate
737    /// `GuardSet` instances, and this accessor will choose the right one.)
738    fn active_guards(&self) -> &GuardSet {
739        self.guards(&self.active_set)
740    }
741
742    /// Return the set of guards corresponding to the provided selector.
743    fn guards(&self, selector: &GuardSetSelector) -> &GuardSet {
744        match selector {
745            GuardSetSelector::Default => &self.default,
746            GuardSetSelector::Restricted => &self.restricted,
747            #[cfg(feature = "bridge-client")]
748            GuardSetSelector::Bridges => &self.bridges,
749        }
750    }
751
752    /// Return a mutable reference to the currently active set of guards.
753    fn active_guards_mut(&mut self) -> &mut GuardSet {
754        self.guards_mut(&self.active_set.clone())
755    }
756
757    /// Return a mutable reference to the set of guards corresponding to the
758    /// provided selector.
759    fn guards_mut(&mut self, selector: &GuardSetSelector) -> &mut GuardSet {
760        match selector {
761            GuardSetSelector::Default => &mut self.default,
762            GuardSetSelector::Restricted => &mut self.restricted,
763            #[cfg(feature = "bridge-client")]
764            GuardSetSelector::Bridges => &mut self.bridges,
765        }
766    }
767
768    /// Update all non-persistent state for the guards in this object with the
769    /// state in `other`.
770    fn copy_status_from(&mut self, mut other: GuardSets) {
771        use strum::IntoEnumIterator;
772        for sample in GuardSetSelector::iter() {
773            self.guards_mut(&sample)
774                .copy_ephemeral_status_into_newly_loaded_state(std::mem::take(
775                    other.guards_mut(&sample),
776                ));
777        }
778        self.active_set = other.active_set;
779    }
780}
781
782impl GuardMgrInner {
783    /// Look up the latest [`NetDir`] (if there is one) from our
784    /// [`NetDirProvider`] (if we have one).
785    fn timely_netdir(&self) -> Option<Arc<NetDir>> {
786        self.netdir_provider
787            .as_ref()
788            .and_then(Weak::upgrade)
789            .and_then(|np| np.timely_netdir().ok())
790    }
791
792    /// Look up the latest [`BridgeDescList`](bridge::BridgeDescList) (if there
793    /// is one) from our [`BridgeDescProvider`](bridge::BridgeDescProvider) (if
794    /// we have one).
795    #[cfg(feature = "bridge-client")]
796    fn latest_bridge_desc_list(&self) -> Option<Arc<bridge::BridgeDescList>> {
797        self.bridge_desc_provider
798            .as_ref()
799            .and_then(Weak::upgrade)
800            .map(|bp| bp.bridges())
801    }
802
803    /// Run a function that takes `&mut self` and an optional NetDir.
804    ///
805    /// We try to use the netdir from our [`NetDirProvider`] (if we have one).
806    /// Therefore, although its _parameters_ are suitable for every
807    /// [`GuardSet`], its _contents_ might not be. For those, call
808    /// [`with_opt_universe`](Self::with_opt_universe) instead.
809    //
810    // This function exists to handle the lifetime mess where sometimes the
811    // resulting NetDir will borrow from `netdir`, and sometimes it will borrow
812    // from an Arc returned by `self.latest_netdir()`.
813    fn with_opt_netdir<F, T>(&mut self, func: F) -> T
814    where
815        F: FnOnce(&mut Self, Option<&NetDir>) -> T,
816    {
817        if let Some(nd) = self.timely_netdir() {
818            func(self, Some(nd.as_ref()))
819        } else {
820            func(self, None)
821        }
822    }
823
824    /// Return the latest `BridgeSet` based on our `BridgeDescProvider` and our
825    /// configured bridges.
826    ///
827    /// Returns `None` if we are not configured to use bridges.
828    #[cfg(feature = "bridge-client")]
829    fn latest_bridge_set(&self) -> Option<bridge::BridgeSet> {
830        let bridge_config = self.configured_bridges.as_ref()?.clone();
831        let bridge_descs = self.latest_bridge_desc_list();
832        Some(bridge::BridgeSet::new(bridge_config, bridge_descs))
833    }
834
835    /// Run a function that takes `&mut self` and an optional [`UniverseRef`].
836    ///
837    /// We try to get a universe from the appropriate source for the current
838    /// active guard set.
839    fn with_opt_universe<F, T>(&mut self, func: F) -> T
840    where
841        F: FnOnce(&mut Self, Option<&UniverseRef>) -> T,
842    {
843        // TODO: it might be nice to make `func` take an GuardSet and a set of
844        // parameters, so we can't get the active set wrong. Doing that will
845        // require a fair amount of refactoring so that the borrow checker is
846        // happy, however.
847        match self.guards.active_set.universe_type() {
848            UniverseType::NetDir => {
849                if let Some(nd) = self.timely_netdir() {
850                    func(self, Some(&UniverseRef::NetDir(nd)))
851                } else {
852                    func(self, None)
853                }
854            }
855            #[cfg(feature = "bridge-client")]
856            UniverseType::BridgeSet => func(
857                self,
858                self.latest_bridge_set()
859                    .map(UniverseRef::BridgeSet)
860                    .as_ref(),
861            ),
862        }
863    }
864
865    /// Update the status of all guards in the active set, based on the passage
866    /// of time, our configuration, and the relevant Universe for our active
867    /// set.
868    fn update(&mut self, wallclock: SystemTime, now: Instant) {
869        self.with_opt_netdir(|this, netdir| {
870            // Here we update our parameters from the latest NetDir, and check
871            // whether we need to change to a (non)-restrictive GuardSet based
872            // on those parameters and our configured filter.
873            //
874            // This uses a NetDir unconditionally, since we always want to take
875            // the network parameters our parameters from the consensus even if
876            // the guards themselves are from a BridgeSet.
877            this.update_active_set_params_and_filter(netdir);
878        });
879        self.with_opt_universe(|this, univ| {
880            // Now we update the set of guards themselves based on the
881            // Universe, which is either the latest NetDir, or the latest
882            // BridgeSet—depending on what the GuardSet wants.
883            Self::update_guardset_internal(
884                &this.params,
885                wallclock,
886                this.guards.active_set.universe_type(),
887                this.guards.active_guards_mut(),
888                univ,
889            );
890            #[cfg(feature = "bridge-client")]
891            this.update_desired_descriptors(now);
892            #[cfg(not(feature = "bridge-client"))]
893            let _ = now;
894        });
895    }
896
897    /// Replace our bridge configuration with the one from `new_config`.
898    #[cfg(feature = "bridge-client")]
899    fn replace_bridge_config(
900        &mut self,
901        new_config: &impl GuardMgrConfig,
902        wallclock: SystemTime,
903        now: Instant,
904    ) -> Result<RetireCircuits, GuardMgrConfigError> {
905        match (&self.configured_bridges, new_config.bridges_enabled()) {
906            (None, false) => {
907                assert_ne!(
908                    self.guards.active_set.universe_type(),
909                    UniverseType::BridgeSet
910                );
911                return Ok(RetireCircuits::None); // nothing to do
912            }
913            (_, true) if !self.storage.can_store() => {
914                // TODO: Ideally we would try to upgrade, obtaining an exclusive lock,
915                // but `StorageHandle` currently lacks a method for that.
916                return Err(GuardMgrConfigError::NoLock("bridges configured".into()));
917            }
918            (Some(current_bridges), true) if new_config.bridges() == current_bridges.as_ref() => {
919                assert_eq!(
920                    self.guards.active_set.universe_type(),
921                    UniverseType::BridgeSet
922                );
923                return Ok(RetireCircuits::None); // nothing to do.
924            }
925            (_, true) => {
926                self.configured_bridges = Some(new_config.bridges().into());
927                self.guards.active_set = GuardSetSelector::Bridges;
928            }
929            (_, false) => {
930                self.configured_bridges = None;
931                self.guards.active_set = GuardSetSelector::Default;
932            }
933        }
934
935        // If we have gotten here, we have changed the set of bridges, changed
936        // which set is active, or changed them both.  We need to make sure that
937        // our `GuardSet` object is up-to-date with our configuration.
938        self.update(wallclock, now);
939
940        // We also need to tell the caller that its circuits are no good any
941        // more.
942        //
943        // TODO(nickm): Someday we can do this more judiciously by retuning
944        // "Some" in the case where we're still using bridges but our new bridge
945        // set contains different elements; see comment on RetireCircuits.
946        //
947        // TODO(nickm): We could also safely return RetireCircuits::None if we
948        // are using bridges, and our new bridge list is a superset of the older
949        // one.
950        Ok(RetireCircuits::All)
951    }
952
953    /// Update our parameters, our selection (based on network parameters and
954    /// configuration), and make sure the active GuardSet has the right
955    /// configuration itself.
956    ///
957    /// We should call this whenever the NetDir's parameters change, or whenever
958    /// our filter changes.  We do not need to call it for new elements arriving
959    /// in our Universe, since those do not affect anything here.
960    ///
961    /// We should also call this whenever a new GuardSet becomes active for any
962    /// reason _other_ than just having called this function.
963    ///
964    /// (This function is only invoked from `update`, which should be called
965    /// under the above circumstances.)
966    fn update_active_set_params_and_filter(&mut self, netdir: Option<&NetDir>) {
967        // Set the parameters.  These always come from the NetDir, even if this
968        // is a bridge set.
969        if let Some(netdir) = netdir {
970            match GuardParams::try_from(netdir.params()) {
971                Ok(params) => self.params = params,
972                Err(e) => warn!("Unusable guard parameters from consensus: {}", e),
973            }
974
975            self.select_guard_set_based_on_filter(netdir);
976        }
977
978        // Change the filter, if it doesn't match what the guards have.
979        //
980        // TODO(nickm): We could use a "dirty" flag or something to decide
981        // whether we need to call set_filter, if this comparison starts to show
982        // up in profiles.
983        if self.guards.active_guards().filter() != &self.filter {
984            let restrictive = self.guards.active_set == GuardSetSelector::Restricted;
985            self.guards
986                .active_guards_mut()
987                .set_filter(self.filter.clone(), restrictive);
988        }
989    }
990
991    /// Update the status of every guard in `active_guards`, and expand it as
992    /// needed.
993    ///
994    /// This function doesn't take `&self`, to make sure that we are only
995    /// affecting a single `GuardSet`, and to avoid confusing the borrow
996    /// checker.
997    ///
998    /// We should call this whenever the contents of the universe have changed.
999    ///
1000    /// We should also call this whenever a new GuardSet becomes active.
1001    fn update_guardset_internal<U: Universe>(
1002        params: &GuardParams,
1003        now: SystemTime,
1004        universe_type: UniverseType,
1005        active_guards: &mut GuardSet,
1006        universe: Option<&U>,
1007    ) -> ExtendedStatus {
1008        // Expire guards.  Do that early, in case doing so makes it clear that
1009        // we need to grab more guards or mark others as primary.
1010        active_guards.expire_old_guards(params, now);
1011
1012        let extended = if let Some(universe) = universe {
1013            // TODO: This check here may be completely unnecessary. I inserted
1014            // it back in 5ac0fcb7ef603e0d14 because I was originally concerned
1015            // it might be undesirable to list a primary guard as "missing dir
1016            // info" (and therefore unusable) if we were expecting to get its
1017            // microdescriptor "very soon."
1018            //
1019            // But due to the other check in `netdir_is_sufficient`, we
1020            // shouldn't be installing a netdir until it has microdescs for all
1021            // of the (non-bridge) primary guards that it lists. - nickm
1022            if active_guards.n_primary_without_id_info_in(universe) > 0
1023                && universe_type == UniverseType::NetDir
1024            {
1025                // We are missing the information from a NetDir needed to see
1026                // whether our primary guards are listed, so we shouldn't update
1027                // our guard status.
1028                //
1029                // We don't want to do this check if we are using bridges, since
1030                // a missing bridge descriptor is not guaranteed to temporary
1031                // problem in the same way that a missing microdescriptor is.
1032                // (When a bridge desc is missing, the bridge could be down or
1033                // unreachable, and nobody else can help us. But if a microdesc
1034                // is missing, we just need to find a cache that has it.)
1035                return ExtendedStatus::No;
1036            }
1037            active_guards.update_status_from_dir(universe);
1038            active_guards.extend_sample_as_needed(now, params, universe)
1039        } else {
1040            ExtendedStatus::No
1041        };
1042
1043        active_guards.select_primary_guards(params);
1044
1045        extended
1046    }
1047
1048    /// If using bridges, tell the BridgeDescProvider which descriptors we want.
1049    /// We need to check this *after* we select our primary guards.
1050    #[cfg(feature = "bridge-client")]
1051    fn update_desired_descriptors(&mut self, now: Instant) {
1052        if self.guards.active_set.universe_type() != UniverseType::BridgeSet {
1053            return;
1054        }
1055
1056        let provider = self.bridge_desc_provider.as_ref().and_then(Weak::upgrade);
1057        let bridge_set = self.latest_bridge_set();
1058        if let (Some(provider), Some(bridge_set)) = (provider, bridge_set) {
1059            let desired: Vec<_> = self
1060                .guards
1061                .active_guards()
1062                .descriptors_to_request(now, &self.params)
1063                .into_iter()
1064                .flat_map(|guard| bridge_set.bridge_by_guard(guard))
1065                .cloned()
1066                .collect();
1067
1068            provider.set_bridges(&desired);
1069        }
1070    }
1071
1072    /// Replace the active guard state with `new_state`, preserving
1073    /// non-persistent state for any guards that are retained.
1074    fn replace_guards_with(
1075        &mut self,
1076        mut new_guards: GuardSets,
1077        wallclock: SystemTime,
1078        now: Instant,
1079    ) {
1080        std::mem::swap(&mut self.guards, &mut new_guards);
1081        self.guards.copy_status_from(new_guards);
1082        self.update(wallclock, now);
1083    }
1084
1085    /// Update which guard set is active based on the current filter and the
1086    /// provided netdir.
1087    ///
1088    /// After calling this function, the new guard set's filter may be
1089    /// out-of-date: be sure to call `set_filter` as appropriate.
1090    fn select_guard_set_based_on_filter(&mut self, netdir: &NetDir) {
1091        // In general, we'd like to use the restricted set if we're under the
1092        // threshold, and the default set if we're over the threshold.  But if
1093        // we're sitting close to the threshold, we want to avoid flapping back
1094        // and forth, so we only change when we're more than 5% "off" from
1095        // whatever our current setting is.
1096        //
1097        // (See guard-spec section 2 for more information.)
1098        let offset = match self.guards.active_set {
1099            GuardSetSelector::Default => -0.05,
1100            GuardSetSelector::Restricted => 0.05,
1101            // If we're using bridges, then we don't switch between the other guard sets based on on the filter at all.
1102            #[cfg(feature = "bridge-client")]
1103            GuardSetSelector::Bridges => return,
1104        };
1105        let frac_permitted = self.filter.frac_bw_permitted(netdir);
1106        let threshold = self.params.filter_threshold + offset;
1107        let new_choice = if frac_permitted < threshold {
1108            GuardSetSelector::Restricted
1109        } else {
1110            GuardSetSelector::Default
1111        };
1112
1113        if new_choice != self.guards.active_set {
1114            info!(
1115                "Guard selection changed; we are now using the {:?} guard set",
1116                &new_choice
1117            );
1118
1119            self.guards.active_set = new_choice;
1120
1121            if frac_permitted < self.params.extreme_threshold {
1122                warn!(
1123                      "The number of guards permitted is smaller than the recommended minimum of {:.0}%.",
1124                      self.params.extreme_threshold * 100.0,
1125                );
1126            }
1127        }
1128    }
1129
1130    /// Mark all of our primary guards as retriable, if we haven't done
1131    /// so since long enough before `now`.
1132    ///
1133    /// We want to call this function whenever a guard attempt succeeds,
1134    /// if the internet seemed to be down when the guard attempt was
1135    /// first launched.
1136    fn maybe_retry_primary_guards(&mut self, now: Instant) {
1137        // We don't actually want to mark our primary guards as
1138        // retriable more than once per internet_down_timeout: after
1139        // the first time, we would just be noticing the same "coming
1140        // back online" event more than once.
1141        let interval = self.params.internet_down_timeout;
1142        if self.last_primary_retry_time + interval <= now {
1143            debug!("Successfully reached a guard after a while off the internet; marking all primary guards retriable.");
1144            self.guards
1145                .active_guards_mut()
1146                .mark_primary_guards_retriable();
1147            self.last_primary_retry_time = now;
1148        }
1149    }
1150
1151    /// Replace the current GuardFilter with `filter`.
1152    fn set_filter(&mut self, filter: GuardFilter, wallclock: SystemTime, now: Instant) {
1153        self.filter = filter;
1154        self.update(wallclock, now);
1155    }
1156
1157    /// Called when the circuit manager reports (via [`GuardMonitor`]) that
1158    /// a guard succeeded or failed.
1159    ///
1160    /// Changes the guard's status as appropriate, and updates the pending
1161    /// request as needed.
1162    #[allow(clippy::cognitive_complexity)]
1163    pub(crate) fn handle_msg(
1164        &mut self,
1165        request_id: RequestId,
1166        status: GuardStatus,
1167        skew: Option<ClockSkew>,
1168        runtime: &impl tor_rtcompat::SleepProvider,
1169    ) {
1170        if let Some(mut pending) = self.pending.remove(&request_id) {
1171            // If there was a pending request matching this RequestId, great!
1172            let guard_id = pending.guard_id();
1173            trace!(?guard_id, ?status, "Received report of guard status");
1174
1175            // First, handle the skew report (if any)
1176            if let Some(skew) = skew {
1177                let now = runtime.now();
1178                let observation = skew::SkewObservation { skew, when: now };
1179
1180                match &guard_id.0 {
1181                    FirstHopIdInner::Guard(_, id) => {
1182                        self.guards.active_guards_mut().record_skew(id, observation);
1183                    }
1184                    FirstHopIdInner::Fallback(id) => {
1185                        self.fallbacks.note_skew(id, observation);
1186                    }
1187                }
1188                // TODO: We call this whenever we receive an observed clock
1189                // skew. That's not the perfect timing for two reasons.  First
1190                // off, it might be too frequent: it does an O(n) calculation,
1191                // which isn't ideal.  Second, it might be too infrequent: after
1192                // an hour has passed, a given observation won't be up-to-date
1193                // any more, and we might want to recalculate the skew
1194                // accordingly.
1195                self.update_skew(now);
1196            }
1197
1198            match (status, &guard_id.0) {
1199                (GuardStatus::Failure, FirstHopIdInner::Fallback(id)) => {
1200                    // We used a fallback, and we weren't able to build a circuit through it.
1201                    self.fallbacks.note_failure(id, runtime.now());
1202                }
1203                (_, FirstHopIdInner::Fallback(_)) => {
1204                    // We don't record any other kind of circuit activity if we
1205                    // took the entry from the fallback list.
1206                }
1207                (GuardStatus::Success, FirstHopIdInner::Guard(sample, id)) => {
1208                    // If we had gone too long without any net activity when we
1209                    // gave out this guard, and now we're seeing a circuit
1210                    // succeed, tell the primary guards that they might be
1211                    // retriable.
1212                    if pending.net_has_been_down() {
1213                        self.maybe_retry_primary_guards(runtime.now());
1214                    }
1215
1216                    // The guard succeeded.  Tell the GuardSet.
1217                    self.guards.guards_mut(sample).record_success(
1218                        id,
1219                        &self.params,
1220                        None,
1221                        runtime.wallclock(),
1222                    );
1223                    // Either tell the request whether the guard is
1224                    // usable, or schedule it as a "waiting" request.
1225                    if let Some(usable) = self.guard_usability_status(&pending, runtime.now()) {
1226                        trace!(?guard_id, usable, "Known usability status");
1227                        pending.reply(usable);
1228                    } else {
1229                        // This is the one case where we can't use the
1230                        // guard yet.
1231                        trace!(?guard_id, "Not able to answer right now");
1232                        pending.mark_waiting(runtime.now());
1233                        self.waiting.push(pending);
1234                    }
1235                }
1236                (GuardStatus::Failure, FirstHopIdInner::Guard(sample, id)) => {
1237                    self.guards
1238                        .guards_mut(sample)
1239                        .record_failure(id, None, runtime.now());
1240                    pending.reply(false);
1241                }
1242                (GuardStatus::AttemptAbandoned, FirstHopIdInner::Guard(sample, id)) => {
1243                    self.guards.guards_mut(sample).record_attempt_abandoned(id);
1244                    pending.reply(false);
1245                }
1246                (GuardStatus::Indeterminate, FirstHopIdInner::Guard(sample, id)) => {
1247                    self.guards
1248                        .guards_mut(sample)
1249                        .record_indeterminate_result(id);
1250                    pending.reply(false);
1251                }
1252            };
1253        } else {
1254            warn!(
1255                "Got a status {:?} for a request {:?} that wasn't pending",
1256                status, request_id
1257            );
1258        }
1259
1260        // We might need to update the primary guards based on changes in the
1261        // status of guards above.
1262        self.guards
1263            .active_guards_mut()
1264            .select_primary_guards(&self.params);
1265
1266        // Some waiting request may just have become ready (usable or
1267        // not); we need to give them the information they're waiting
1268        // for.
1269        self.expire_and_answer_pending_requests(runtime.now());
1270    }
1271
1272    /// Helper to implement `GuardMgr::note_external_success()`.
1273    ///
1274    /// (This has to be a separate function so that we can borrow params while
1275    /// we have `mut self` borrowed.)
1276    fn record_external_success<T>(
1277        &mut self,
1278        identity: &T,
1279        external_activity: ExternalActivity,
1280        now: SystemTime,
1281    ) where
1282        T: tor_linkspec::HasRelayIds + ?Sized,
1283    {
1284        for id in self.lookup_ids(identity) {
1285            match &id.0 {
1286                FirstHopIdInner::Guard(sample, id) => {
1287                    self.guards.guards_mut(sample).record_success(
1288                        id,
1289                        &self.params,
1290                        Some(external_activity),
1291                        now,
1292                    );
1293                }
1294                FirstHopIdInner::Fallback(id) => {
1295                    if external_activity == ExternalActivity::DirCache {
1296                        self.fallbacks.note_success(id);
1297                    }
1298                }
1299            }
1300        }
1301    }
1302
1303    /// Return an iterator over all of the clock skew observations we've made
1304    /// for guards or fallbacks.
1305    fn skew_observations(&self) -> impl Iterator<Item = &skew::SkewObservation> {
1306        self.fallbacks
1307            .skew_observations()
1308            .chain(self.guards.active_guards().skew_observations())
1309    }
1310
1311    /// Recalculate our estimated clock skew, and publish it to anybody who
1312    /// cares.
1313    fn update_skew(&mut self, now: Instant) {
1314        let estimate = skew::SkewEstimate::estimate_skew(self.skew_observations(), now);
1315        // TODO: we might want to do this only conditionally, when the skew
1316        // estimate changes.
1317        *self.send_skew.borrow_mut() = estimate;
1318    }
1319
1320    /// If the circuit built because of a given [`PendingRequest`] may
1321    /// now be used (or discarded), return `Some(true)` or
1322    /// `Some(false)` respectively.
1323    ///
1324    /// Return None if we can't yet give an answer about whether such
1325    /// a circuit is usable.
1326    fn guard_usability_status(&self, pending: &PendingRequest, now: Instant) -> Option<bool> {
1327        match &pending.guard_id().0 {
1328            FirstHopIdInner::Guard(sample, id) => self.guards.guards(sample).circ_usability_status(
1329                id,
1330                pending.usage(),
1331                &self.params,
1332                now,
1333            ),
1334            // Fallback circuits are usable immediately, since we don't have to wait to
1335            // see whether any _other_ circuit succeeds or fails.
1336            FirstHopIdInner::Fallback(_) => Some(true),
1337        }
1338    }
1339
1340    /// For requests that have been "waiting" for an answer for too long,
1341    /// expire them and tell the circuit manager that their circuits
1342    /// are unusable.
1343    fn expire_and_answer_pending_requests(&mut self, now: Instant) {
1344        // A bit ugly: we use a separate Vec here to avoid borrowing issues,
1345        // and put it back when we're done.
1346        let mut waiting = Vec::new();
1347        std::mem::swap(&mut waiting, &mut self.waiting);
1348
1349        waiting.retain_mut(|pending| {
1350            let expired = pending
1351                .waiting_since()
1352                .and_then(|w| now.checked_duration_since(w))
1353                .map(|d| d >= self.params.np_idle_timeout)
1354                == Some(true);
1355            if expired {
1356                trace!(?pending, "Pending request expired");
1357                pending.reply(false);
1358                return false;
1359            }
1360
1361            // TODO-SPEC: guard_usability_status isn't what the spec says.  It
1362            // says instead that we should look at _circuit_ status, saying:
1363            //  "   Definition: In the algorithm above, C2 "blocks" C1 if:
1364            // * C2 obeys all the restrictions that C1 had to obey, AND
1365            // * C2 has higher priority than C1, AND
1366            // * Either C2 is <complete>, or C2 is <waiting_for_better_guard>,
1367            // or C2 has been <usable_if_no_better_guard> for no more than
1368            // {NONPRIMARY_GUARD_CONNECT_TIMEOUT} seconds."
1369            //
1370            // See comments in sample::GuardSet::circ_usability_status.
1371
1372            if let Some(answer) = self.guard_usability_status(pending, now) {
1373                trace!(?pending, answer, "Pending request now ready");
1374                pending.reply(answer);
1375                return false;
1376            }
1377            true
1378        });
1379
1380        // Put the waiting list back.
1381        std::mem::swap(&mut waiting, &mut self.waiting);
1382    }
1383
1384    /// Return every currently extant FirstHopId for a guard or fallback
1385    /// directory matching (or possibly matching) the provided keys.
1386    ///
1387    /// An identity is _possibly matching_ if it contains some of the IDs in the
1388    /// provided identity, and it has no _contradictory_ identities, but it does
1389    /// not necessarily contain _all_ of those identities.
1390    ///
1391    /// # TODO
1392    ///
1393    /// This function should probably not exist; it's only used so that dirmgr
1394    /// can report successes or failures, since by the time it observes them it
1395    /// doesn't know whether its circuit came from a guard or a fallback.  To
1396    /// solve that, we'll need CircMgr to record and report which one it was
1397    /// using, which will take some more plumbing.
1398    ///
1399    /// TODO relay: we will have to make the change above when we implement
1400    /// relays; otherwise, it would be possible for an attacker to exploit it to
1401    /// mislead us about our guard status.
1402    fn lookup_ids<T>(&self, identity: &T) -> Vec<FirstHopId>
1403    where
1404        T: tor_linkspec::HasRelayIds + ?Sized,
1405    {
1406        use strum::IntoEnumIterator;
1407        let mut vec = Vec::with_capacity(2);
1408
1409        let id = ids::GuardId::from_relay_ids(identity);
1410        for sample in GuardSetSelector::iter() {
1411            let guard_id = match self.guards.guards(&sample).contains(&id) {
1412                Ok(true) => &id,
1413                Err(other) => other,
1414                Ok(false) => continue,
1415            };
1416            vec.push(FirstHopId(FirstHopIdInner::Guard(sample, guard_id.clone())));
1417        }
1418
1419        let id = ids::FallbackId::from_relay_ids(identity);
1420        if self.fallbacks.contains(&id) {
1421            vec.push(id.into());
1422        }
1423
1424        vec
1425    }
1426
1427    /// Run any periodic events that update guard status, and return a
1428    /// duration after which periodic events should next be run.
1429    pub(crate) fn run_periodic_events(&mut self, wallclock: SystemTime, now: Instant) -> Duration {
1430        self.update(wallclock, now);
1431        self.expire_and_answer_pending_requests(now);
1432        Duration::from_secs(1) // TODO: Too aggressive.
1433    }
1434
1435    /// Try to select a guard, expanding the sample if the first attempt fails.
1436    fn select_guard_with_expand(
1437        &mut self,
1438        usage: &GuardUsage,
1439        now: Instant,
1440        wallclock: SystemTime,
1441    ) -> Result<(sample::ListKind, FirstHop), PickGuardError> {
1442        // Try to find a guard.
1443        let first_error = match self.select_guard_once(usage, now) {
1444            Ok(res1) => return Ok(res1),
1445            Err(e) => {
1446                trace!("Couldn't select guard on first attempt: {}", e);
1447                e
1448            }
1449        };
1450
1451        // That didn't work. If we have a netdir, expand the sample and try again.
1452        let res = self.with_opt_universe(|this, univ| {
1453            let univ = univ?;
1454            trace!("No guards available, trying to extend the sample.");
1455            // Make sure that the status on all of our guards are accurate, and
1456            // expand the sample if we can.
1457            //
1458            // Our parameters and configuration did not change, so we do not
1459            // need to call update() or update_active_set_and_filter(). This
1460            // call is sufficient to  extend the sample and recompute primary
1461            // guards.
1462            let extended = Self::update_guardset_internal(
1463                &this.params,
1464                wallclock,
1465                this.guards.active_set.universe_type(),
1466                this.guards.active_guards_mut(),
1467                Some(univ),
1468            );
1469            if extended == ExtendedStatus::Yes {
1470                match this.select_guard_once(usage, now) {
1471                    Ok(res) => return Some(res),
1472                    Err(e) => {
1473                        trace!("Couldn't select guard after update: {}", e);
1474                    }
1475                }
1476            }
1477            None
1478        });
1479        if let Some(res) = res {
1480            return Ok(res);
1481        }
1482
1483        // Okay, that didn't work either.  If we were asked for a directory
1484        // guard, and we aren't using bridges, then we may be able to use a
1485        // fallback.
1486        if usage.kind == GuardUsageKind::OneHopDirectory
1487            && self.guards.active_set.universe_type() == UniverseType::NetDir
1488        {
1489            return self.select_fallback(now);
1490        }
1491
1492        // Couldn't extend the sample or use a fallback; return the original error.
1493        Err(first_error)
1494    }
1495
1496    /// Helper: try to pick a single guard, without retrying on failure.
1497    fn select_guard_once(
1498        &self,
1499        usage: &GuardUsage,
1500        now: Instant,
1501    ) -> Result<(sample::ListKind, FirstHop), PickGuardError> {
1502        let active_set = &self.guards.active_set;
1503        #[cfg_attr(not(feature = "bridge-client"), allow(unused_mut))]
1504        let (list_kind, mut first_hop) =
1505            self.guards
1506                .guards(active_set)
1507                .pick_guard(active_set, usage, &self.params, now)?;
1508        #[cfg(feature = "bridge-client")]
1509        if self.guards.active_set.universe_type() == UniverseType::BridgeSet {
1510            // See if we can promote first_hop to a viable CircTarget.
1511            let bridges = self.latest_bridge_set().ok_or_else(|| {
1512                PickGuardError::Internal(internal!(
1513                    "No bridge set available, even though this is the Bridges sample"
1514                ))
1515            })?;
1516            first_hop.lookup_bridge_circ_target(&bridges);
1517
1518            if usage.kind == GuardUsageKind::Data && !first_hop.contains_circ_target() {
1519                return Err(PickGuardError::Internal(internal!(
1520                    "Tried to return a non-circtarget guard with Data usage!"
1521                )));
1522            }
1523        }
1524        Ok((list_kind, first_hop))
1525    }
1526
1527    /// Helper: Select a fallback directory.
1528    ///
1529    /// Called when we have no guard information to use. Return values are as
1530    /// for [`GuardMgr::select_guard()`]
1531    fn select_fallback(
1532        &self,
1533        now: Instant,
1534    ) -> Result<(sample::ListKind, FirstHop), PickGuardError> {
1535        let filt = self.guards.active_guards().filter();
1536
1537        let fallback = self
1538            .fallbacks
1539            .choose(&mut rand::rng(), now, filt)?
1540            .as_guard();
1541        let fallback = filt.modify_hop(fallback)?;
1542        Ok((sample::ListKind::Fallback, fallback))
1543    }
1544}
1545
1546/// A possible outcome of trying to extend a guard sample.
1547#[derive(Copy, Clone, Debug, Eq, PartialEq)]
1548enum ExtendedStatus {
1549    /// The guard sample was extended. (At least one guard was added to it.)
1550    Yes,
1551    /// The guard sample was not extended.
1552    No,
1553}
1554
1555/// A set of parameters, derived from the consensus document, controlling
1556/// the behavior of a guard manager.
1557#[derive(Debug, Clone)]
1558#[cfg_attr(test, derive(PartialEq))]
1559struct GuardParams {
1560    /// How long should a sampled, un-confirmed guard be kept in the sample before it expires?
1561    lifetime_unconfirmed: Duration,
1562    /// How long should a confirmed guard be kept in the sample before
1563    /// it expires?
1564    lifetime_confirmed: Duration,
1565    /// How long may  a guard be unlisted before we remove it from the sample?
1566    lifetime_unlisted: Duration,
1567    /// Largest number of guards we're willing to add to the sample.
1568    max_sample_size: usize,
1569    /// Largest fraction of the network's guard bandwidth that we're
1570    /// willing to add to the sample.
1571    max_sample_bw_fraction: f64,
1572    /// Smallest number of guards that we're willing to have in the
1573    /// sample, after applying a [`GuardFilter`].
1574    min_filtered_sample_size: usize,
1575    /// How many guards are considered "Primary"?
1576    n_primary: usize,
1577    /// When making a regular circuit, how many primary guards should we
1578    /// be willing to try?
1579    data_parallelism: usize,
1580    /// When making a one-hop directory circuit, how many primary
1581    /// guards should we be willing to try?
1582    dir_parallelism: usize,
1583    /// For how long does a pending attempt to connect to a guard
1584    /// block an attempt to use a less-favored non-primary guard?
1585    np_connect_timeout: Duration,
1586    /// How long do we allow a circuit to a successful but unfavored
1587    /// non-primary guard to sit around before deciding not to use it?
1588    np_idle_timeout: Duration,
1589    /// After how much time without successful activity does a
1590    /// successful circuit indicate that we should retry our primary
1591    /// guards?
1592    internet_down_timeout: Duration,
1593    /// What fraction of the guards can be can be filtered out before we
1594    /// decide that our filter is "very restrictive"?
1595    filter_threshold: f64,
1596    /// What fraction of the guards determine that our filter is "very
1597    /// restrictive"?
1598    extreme_threshold: f64,
1599}
1600
1601impl Default for GuardParams {
1602    fn default() -> Self {
1603        let one_day = Duration::from_secs(86400);
1604        GuardParams {
1605            lifetime_unconfirmed: one_day * 120,
1606            lifetime_confirmed: one_day * 60,
1607            lifetime_unlisted: one_day * 20,
1608            max_sample_size: 60,
1609            max_sample_bw_fraction: 0.2,
1610            min_filtered_sample_size: 20,
1611            n_primary: 3,
1612            data_parallelism: 1,
1613            dir_parallelism: 3,
1614            np_connect_timeout: Duration::from_secs(15),
1615            np_idle_timeout: Duration::from_secs(600),
1616            internet_down_timeout: Duration::from_secs(600),
1617            filter_threshold: 0.2,
1618            extreme_threshold: 0.01,
1619        }
1620    }
1621}
1622
1623impl TryFrom<&NetParameters> for GuardParams {
1624    type Error = tor_units::Error;
1625    fn try_from(p: &NetParameters) -> Result<GuardParams, Self::Error> {
1626        Ok(GuardParams {
1627            lifetime_unconfirmed: p.guard_lifetime_unconfirmed.try_into()?,
1628            lifetime_confirmed: p.guard_lifetime_confirmed.try_into()?,
1629            lifetime_unlisted: p.guard_remove_unlisted_after.try_into()?,
1630            max_sample_size: p.guard_max_sample_size.try_into()?,
1631            max_sample_bw_fraction: p.guard_max_sample_threshold.as_fraction(),
1632            min_filtered_sample_size: p.guard_filtered_min_sample_size.try_into()?,
1633            n_primary: p.guard_n_primary.try_into()?,
1634            data_parallelism: p.guard_use_parallelism.try_into()?,
1635            dir_parallelism: p.guard_dir_use_parallelism.try_into()?,
1636            np_connect_timeout: p.guard_nonprimary_connect_timeout.try_into()?,
1637            np_idle_timeout: p.guard_nonprimary_idle_timeout.try_into()?,
1638            internet_down_timeout: p.guard_internet_likely_down.try_into()?,
1639            filter_threshold: p.guard_meaningful_restriction.as_fraction(),
1640            extreme_threshold: p.guard_extreme_restriction.as_fraction(),
1641        })
1642    }
1643}
1644
1645/// Representation of a guard or fallback, as returned by [`GuardMgr::select_guard()`].
1646#[derive(Debug, Clone)]
1647pub struct FirstHop {
1648    /// The sample from which this guard was taken, or `None` if this is a fallback.
1649    sample: Option<GuardSetSelector>,
1650    /// Information about connecting to (or through) this guard.
1651    inner: FirstHopInner,
1652}
1653/// The enumeration inside a FirstHop that holds information about how to
1654/// connect to (and possibly through) a guard or fallback.
1655#[derive(Debug, Clone)]
1656enum FirstHopInner {
1657    /// We have enough information to connect to a guard.
1658    Chan(OwnedChanTarget),
1659    /// We have enough information to connect to a guards _and_ to build
1660    /// multihop circuits through it.
1661    #[cfg_attr(not(feature = "bridge-client"), allow(dead_code))]
1662    Circ(OwnedCircTarget),
1663}
1664
1665impl FirstHop {
1666    /// Return a new [`FirstHopId`] for this `FirstHop`.
1667    fn first_hop_id(&self) -> FirstHopId {
1668        match &self.sample {
1669            Some(sample) => {
1670                let guard_id = GuardId::from_relay_ids(self);
1671                FirstHopId::in_sample(sample.clone(), guard_id)
1672            }
1673            None => {
1674                let fallback_id = crate::ids::FallbackId::from_relay_ids(self);
1675                FirstHopId::from(fallback_id)
1676            }
1677        }
1678    }
1679
1680    /// Look up this guard in `netdir`.
1681    pub fn get_relay<'a>(&self, netdir: &'a NetDir) -> Option<Relay<'a>> {
1682        match &self.sample {
1683            #[cfg(feature = "bridge-client")]
1684            // Always return "None" for anything that isn't in the netdir.
1685            Some(s) if s.universe_type() == UniverseType::BridgeSet => None,
1686            // Otherwise ask the netdir.
1687            _ => netdir.by_ids(self),
1688        }
1689    }
1690
1691    /// Return true if this guard is a bridge.
1692    pub fn is_bridge(&self) -> bool {
1693        match &self.sample {
1694            #[cfg(feature = "bridge-client")]
1695            Some(s) if s.universe_type() == UniverseType::BridgeSet => true,
1696            _ => false,
1697        }
1698    }
1699
1700    /// If possible, return a view of this object that can be used to build a circuit.
1701    pub fn as_circ_target(&self) -> Option<&OwnedCircTarget> {
1702        match &self.inner {
1703            FirstHopInner::Chan(_) => None,
1704            FirstHopInner::Circ(ct) => Some(ct),
1705        }
1706    }
1707
1708    /// Return a view of this as an OwnedChanTarget.
1709    fn chan_target_mut(&mut self) -> &mut OwnedChanTarget {
1710        match &mut self.inner {
1711            FirstHopInner::Chan(ct) => ct,
1712            FirstHopInner::Circ(ct) => ct.chan_target_mut(),
1713        }
1714    }
1715
1716    /// If possible and appropriate, find a circuit target in `bridges` for this
1717    /// `FirstHop`, and make this `FirstHop` a viable circuit target.
1718    ///
1719    /// (By default, any `FirstHop` that a `GuardSet` returns will have enough
1720    /// information to be a `ChanTarget`, but it will be lacking the additional
1721    /// network information in `CircTarget`[^1] necessary for us to build a
1722    /// multi-hop circuit through it.  If this FirstHop is a regular non-bridge
1723    /// `Relay`, then the `CircMgr` will later look up that circuit information
1724    /// itself from the network directory. But if this `FirstHop` *is* a bridge,
1725    /// then we need to find that information in the `BridgeSet`, since the
1726    /// CircMgr does not keep track of the `BridgeSet`.)
1727    ///
1728    /// [^1]: For example, supported protocol versions and ntor keys.
1729    #[cfg(feature = "bridge-client")]
1730    fn lookup_bridge_circ_target(&mut self, bridges: &bridge::BridgeSet) {
1731        use crate::sample::CandidateStatus::Present;
1732        if self.sample.as_ref().map(|s| s.universe_type()) == Some(UniverseType::BridgeSet)
1733            && matches!(self.inner, FirstHopInner::Chan(_))
1734        {
1735            if let Present(bridge_relay) = bridges.bridge_relay_by_guard(self) {
1736                if let Some(circ_target) = bridge_relay.as_relay_with_desc() {
1737                    self.inner =
1738                        FirstHopInner::Circ(OwnedCircTarget::from_circ_target(&circ_target));
1739                }
1740            }
1741        }
1742    }
1743
1744    /// Return true if this `FirstHop` contains circuit target information.
1745    ///
1746    /// This is true if `lookup_bridge_circ_target()` has been called, and it
1747    /// successfully found the circuit target information.
1748    #[cfg(feature = "bridge-client")]
1749    fn contains_circ_target(&self) -> bool {
1750        matches!(self.inner, FirstHopInner::Circ(_))
1751    }
1752}
1753
1754// This is somewhat redundant with the implementations in crate::guard::Guard.
1755impl tor_linkspec::HasAddrs for FirstHop {
1756    fn addrs(&self) -> &[SocketAddr] {
1757        match &self.inner {
1758            FirstHopInner::Chan(ct) => ct.addrs(),
1759            FirstHopInner::Circ(ct) => ct.addrs(),
1760        }
1761    }
1762}
1763impl tor_linkspec::HasRelayIds for FirstHop {
1764    fn identity(
1765        &self,
1766        key_type: tor_linkspec::RelayIdType,
1767    ) -> Option<tor_linkspec::RelayIdRef<'_>> {
1768        match &self.inner {
1769            FirstHopInner::Chan(ct) => ct.identity(key_type),
1770            FirstHopInner::Circ(ct) => ct.identity(key_type),
1771        }
1772    }
1773}
1774impl tor_linkspec::HasChanMethod for FirstHop {
1775    fn chan_method(&self) -> tor_linkspec::ChannelMethod {
1776        match &self.inner {
1777            FirstHopInner::Chan(ct) => ct.chan_method(),
1778            FirstHopInner::Circ(ct) => ct.chan_method(),
1779        }
1780    }
1781}
1782impl tor_linkspec::ChanTarget for FirstHop {}
1783
1784/// The purpose for which we plan to use a guard.
1785///
1786/// This can affect the guard selection algorithm.
1787#[derive(Clone, Debug, Default, Eq, PartialEq)]
1788#[non_exhaustive]
1789pub enum GuardUsageKind {
1790    /// We want to use this guard for a data circuit.
1791    ///
1792    /// (This encompasses everything except the `OneHopDirectory` case.)
1793    #[default]
1794    Data,
1795    /// We want to use this guard for a one-hop, non-anonymous
1796    /// directory request.
1797    ///
1798    /// (Our algorithm allows more parallelism for the guards that we use
1799    /// for these circuits.)
1800    OneHopDirectory,
1801}
1802
1803/// A set of parameters describing how a single guard should be selected.
1804///
1805/// Used as an argument to [`GuardMgr::select_guard`].
1806#[derive(Clone, Debug, derive_builder::Builder)]
1807#[builder(build_fn(error = "tor_config::ConfigBuildError"))]
1808pub struct GuardUsage {
1809    /// The purpose for which this guard will be used.
1810    #[builder(default)]
1811    kind: GuardUsageKind,
1812    /// A list of restrictions on which guard may be used.
1813    ///
1814    /// The default is the empty list.
1815    #[builder(sub_builder, setter(custom))]
1816    restrictions: GuardRestrictionList,
1817}
1818
1819impl_standard_builder! { GuardUsage: !Deserialize }
1820
1821/// List of socket restrictions, as configured
1822pub type GuardRestrictionList = Vec<GuardRestriction>;
1823
1824define_list_builder_helper! {
1825    pub struct GuardRestrictionListBuilder {
1826        restrictions: [GuardRestriction],
1827    }
1828    built: GuardRestrictionList = restrictions;
1829    default = vec![];
1830    item_build: |restriction| Ok(restriction.clone());
1831}
1832
1833define_list_builder_accessors! {
1834    struct GuardUsageBuilder {
1835        pub restrictions: [GuardRestriction],
1836    }
1837}
1838
1839impl GuardUsageBuilder {
1840    /// Create a new empty [`GuardUsageBuilder`].
1841    pub fn new() -> Self {
1842        Self::default()
1843    }
1844}
1845
1846/// A restriction that applies to a single request for a guard.
1847///
1848/// Restrictions differ from filters (see [`GuardFilter`]) in that
1849/// they apply to single requests, not to our entire set of guards.
1850/// They're suitable for things like making sure that we don't start
1851/// and end a circuit at the same relay, or requiring a specific
1852/// subprotocol version for certain kinds of requests.
1853#[derive(Clone, Debug, Serialize, Deserialize)]
1854#[non_exhaustive]
1855pub enum GuardRestriction {
1856    /// Don't pick a guard with the provided identity.
1857    AvoidId(RelayId),
1858    /// Don't pick a guard with any of the provided Ed25519 identities.
1859    AvoidAllIds(RelayIdSet),
1860}
1861
1862/// The kind of vanguards to use.
1863#[derive(Debug, Default, Clone, Copy, Eq, PartialEq, Ord, PartialOrd)] //
1864#[derive(Serialize, Deserialize)] //
1865#[derive(derive_more::Display)] //
1866#[serde(rename_all = "lowercase")]
1867#[cfg(feature = "vanguards")]
1868#[non_exhaustive]
1869pub enum VanguardMode {
1870    /// "Lite" vanguards.
1871    #[default]
1872    #[display("lite")]
1873    Lite = 1,
1874    /// "Full" vanguards.
1875    #[display("full")]
1876    Full = 2,
1877    /// Vanguards are disabled.
1878    #[display("disabled")]
1879    Disabled = 0,
1880}
1881
1882#[cfg(feature = "vanguards")]
1883impl VanguardMode {
1884    /// Build a `VanguardMode` from a [`NetParameters`] parameter.
1885    ///
1886    /// Used for converting [`vanguards_enabled`](NetParameters::vanguards_enabled)
1887    /// or [`vanguards_hs_service`](NetParameters::vanguards_hs_service)
1888    /// to the corresponding `VanguardMode`.
1889    pub(crate) fn from_net_parameter(val: BoundedInt32<0, 2>) -> Self {
1890        match val.get() {
1891            0 => VanguardMode::Disabled,
1892            1 => VanguardMode::Lite,
1893            2 => VanguardMode::Full,
1894            _ => unreachable!("BoundedInt32 was not bounded?!"),
1895        }
1896    }
1897}
1898
1899impl_not_auto_value!(VanguardMode);
1900
1901/// Vanguards configuration.
1902#[derive(Debug, Default, Clone, Eq, PartialEq, derive_builder::Builder)]
1903#[builder(build_fn(error = "ConfigBuildError"))]
1904#[builder(derive(Debug, Serialize, Deserialize))]
1905pub struct VanguardConfig {
1906    /// The kind of vanguards to use.
1907    #[builder_field_attr(serde(default))]
1908    #[builder(default)]
1909    mode: ExplicitOrAuto<VanguardMode>,
1910}
1911
1912impl VanguardConfig {
1913    /// Return the configured [`VanguardMode`].
1914    ///
1915    /// Returns the [`Default`] `VanguardMode`
1916    /// if the mode is [`Auto`](ExplicitOrAuto) or unspecified.
1917    pub fn mode(&self) -> VanguardMode {
1918        match self.mode {
1919            ExplicitOrAuto::Auto => Default::default(),
1920            ExplicitOrAuto::Explicit(mode) => mode,
1921        }
1922    }
1923}
1924
1925/// The kind of vanguards to use.
1926#[derive(Debug, Default, Clone, Copy, Eq, PartialEq, Ord, PartialOrd)] //
1927#[derive(Serialize, Deserialize)] //
1928#[derive(derive_more::Display)] //
1929#[serde(rename_all = "lowercase")]
1930#[cfg(not(feature = "vanguards"))]
1931#[non_exhaustive]
1932pub enum VanguardMode {
1933    /// Vanguards are disabled.
1934    #[default]
1935    #[display("disabled")]
1936    Disabled = 0,
1937}
1938
1939#[cfg(test)]
1940mod test {
1941    // @@ begin test lint list maintained by maint/add_warning @@
1942    #![allow(clippy::bool_assert_comparison)]
1943    #![allow(clippy::clone_on_copy)]
1944    #![allow(clippy::dbg_macro)]
1945    #![allow(clippy::mixed_attributes_style)]
1946    #![allow(clippy::print_stderr)]
1947    #![allow(clippy::print_stdout)]
1948    #![allow(clippy::single_char_pattern)]
1949    #![allow(clippy::unwrap_used)]
1950    #![allow(clippy::unchecked_duration_subtraction)]
1951    #![allow(clippy::useless_vec)]
1952    #![allow(clippy::needless_pass_by_value)]
1953    //! <!-- @@ end test lint list maintained by maint/add_warning @@ -->
1954    use super::*;
1955    use tor_linkspec::{HasAddrs, HasRelayIds};
1956    use tor_persist::TestingStateMgr;
1957    use tor_rtcompat::test_with_all_runtimes;
1958
1959    #[test]
1960    fn guard_param_defaults() {
1961        let p1 = GuardParams::default();
1962        let p2: GuardParams = (&NetParameters::default()).try_into().unwrap();
1963        assert_eq!(p1, p2);
1964    }
1965
1966    fn init<R: Runtime>(rt: R) -> (GuardMgr<R>, TestingStateMgr, NetDir) {
1967        use tor_netdir::{testnet, MdReceiver, PartialNetDir};
1968        let statemgr = TestingStateMgr::new();
1969        let have_lock = statemgr.try_lock().unwrap();
1970        assert!(have_lock.held());
1971        let guardmgr = GuardMgr::new(rt, statemgr.clone(), &TestConfig::default()).unwrap();
1972        let (con, mds) = testnet::construct_network().unwrap();
1973        let param_overrides = vec![
1974            // We make the sample size smaller than usual to compensate for the
1975            // small testing network.  (Otherwise, we'd sample the whole network,
1976            // and not be able to observe guards in the tests.)
1977            "guard-min-filtered-sample-size=5",
1978            // We choose only two primary guards, to make the tests easier to write.
1979            "guard-n-primary-guards=2",
1980            // We define any restriction that allows 75% or fewer of relays as "meaningful",
1981            // so that we can test the "restrictive" guard sample behavior, and to avoid
1982            "guard-meaningful-restriction-percent=75",
1983        ];
1984        let param_overrides: String =
1985            itertools::Itertools::intersperse(param_overrides.into_iter(), " ").collect();
1986        let override_p = param_overrides.parse().unwrap();
1987        let mut netdir = PartialNetDir::new(con, Some(&override_p));
1988        for md in mds {
1989            netdir.add_microdesc(md);
1990        }
1991        let netdir = netdir.unwrap_if_sufficient().unwrap();
1992
1993        (guardmgr, statemgr, netdir)
1994    }
1995
1996    #[test]
1997    #[allow(clippy::clone_on_copy)]
1998    fn simple_case() {
1999        test_with_all_runtimes!(|rt| async move {
2000            let (guardmgr, statemgr, netdir) = init(rt.clone());
2001            let usage = GuardUsage::default();
2002            guardmgr.install_test_netdir(&netdir);
2003
2004            let (id, mon, usable) = guardmgr.select_guard(usage).unwrap();
2005            // Report that the circuit succeeded.
2006            mon.succeeded();
2007
2008            // May we use the circuit?
2009            let usable = usable.await.unwrap();
2010            assert!(usable);
2011
2012            // Save the state...
2013            guardmgr.flush_msg_queue().await;
2014            guardmgr.store_persistent_state().unwrap();
2015            drop(guardmgr);
2016
2017            // Try reloading from the state...
2018            let guardmgr2 =
2019                GuardMgr::new(rt.clone(), statemgr.clone(), &TestConfig::default()).unwrap();
2020            guardmgr2.install_test_netdir(&netdir);
2021
2022            // Since the guard was confirmed, we should get the same one this time!
2023            let usage = GuardUsage::default();
2024            let (id2, _mon, _usable) = guardmgr2.select_guard(usage).unwrap();
2025            assert!(id2.same_relay_ids(&id));
2026        });
2027    }
2028
2029    #[test]
2030    fn simple_waiting() {
2031        // TODO(nickm): This test fails in rare cases; I suspect a
2032        // race condition somewhere.
2033        //
2034        // I've doubled up on the queue flushing in order to try to make the
2035        // race less likely, but we should investigate.
2036        test_with_all_runtimes!(|rt| async move {
2037            let (guardmgr, _statemgr, netdir) = init(rt);
2038            let u = GuardUsage::default();
2039            guardmgr.install_test_netdir(&netdir);
2040
2041            // We'll have the first two guard fail, which should make us
2042            // try a non-primary guard.
2043            let (id1, mon, _usable) = guardmgr.select_guard(u.clone()).unwrap();
2044            mon.failed();
2045            guardmgr.flush_msg_queue().await; // avoid race
2046            guardmgr.flush_msg_queue().await; // avoid race
2047            let (id2, mon, _usable) = guardmgr.select_guard(u.clone()).unwrap();
2048            mon.failed();
2049            guardmgr.flush_msg_queue().await; // avoid race
2050            guardmgr.flush_msg_queue().await; // avoid race
2051
2052            assert!(!id1.same_relay_ids(&id2));
2053
2054            // Now we should get two sampled guards. They should be different.
2055            let (id3, mon3, usable3) = guardmgr.select_guard(u.clone()).unwrap();
2056            let (id4, mon4, usable4) = guardmgr.select_guard(u.clone()).unwrap();
2057            assert!(!id3.same_relay_ids(&id4));
2058
2059            let (u3, u4) = futures::join!(
2060                async {
2061                    mon3.failed();
2062                    guardmgr.flush_msg_queue().await; // avoid race
2063                    usable3.await.unwrap()
2064                },
2065                async {
2066                    mon4.succeeded();
2067                    usable4.await.unwrap()
2068                }
2069            );
2070
2071            assert_eq!((u3, u4), (false, true));
2072        });
2073    }
2074
2075    #[test]
2076    fn filtering_basics() {
2077        test_with_all_runtimes!(|rt| async move {
2078            let (guardmgr, _statemgr, netdir) = init(rt);
2079            let u = GuardUsage::default();
2080            let filter = {
2081                let mut f = GuardFilter::default();
2082                // All the addresses in the test network are {0,1,2,3,4}.0.0.3:9001.
2083                // Limit to only 2.0.0.0/8
2084                f.push_reachable_addresses(vec!["2.0.0.0/8:9001".parse().unwrap()]);
2085                f
2086            };
2087            guardmgr.set_filter(filter);
2088            guardmgr.install_test_netdir(&netdir);
2089            let (guard, _mon, _usable) = guardmgr.select_guard(u).unwrap();
2090            // Make sure that the filter worked.
2091            let addr = guard.addrs()[0];
2092            assert_eq!(addr, "2.0.0.3:9001".parse().unwrap());
2093        });
2094    }
2095
2096    #[test]
2097    fn external_status() {
2098        test_with_all_runtimes!(|rt| async move {
2099            let (guardmgr, _statemgr, netdir) = init(rt);
2100            let data_usage = GuardUsage::default();
2101            let dir_usage = GuardUsageBuilder::new()
2102                .kind(GuardUsageKind::OneHopDirectory)
2103                .build()
2104                .unwrap();
2105            guardmgr.install_test_netdir(&netdir);
2106            {
2107                // Override this parameter, so that we can get deterministic results below.
2108                let mut inner = guardmgr.inner.lock().unwrap();
2109                inner.params.dir_parallelism = 1;
2110            }
2111
2112            let (guard, mon, _usable) = guardmgr.select_guard(data_usage.clone()).unwrap();
2113            mon.succeeded();
2114
2115            // Record that this guard gave us a bad directory object.
2116            guardmgr.note_external_failure(&guard, ExternalActivity::DirCache);
2117
2118            // We ask for another guard, for data usage.  We should get the same
2119            // one as last time, since the director failure doesn't mean this
2120            // guard is useless as a primary guard.
2121            let (g2, mon, _usable) = guardmgr.select_guard(data_usage).unwrap();
2122            assert_eq!(g2.ed_identity(), guard.ed_identity());
2123            mon.succeeded();
2124
2125            // But if we ask for a guard for directory usage, we should get a
2126            // different one, since the last guard we gave out failed.
2127            let (g3, mon, _usable) = guardmgr.select_guard(dir_usage.clone()).unwrap();
2128            assert_ne!(g3.ed_identity(), guard.ed_identity());
2129            mon.succeeded();
2130
2131            // Now record a success for for directory usage.
2132            guardmgr.note_external_success(&guard, ExternalActivity::DirCache);
2133
2134            // Now that the guard is working as a cache, asking for it should get us the same guard.
2135            let (g4, _mon, _usable) = guardmgr.select_guard(dir_usage).unwrap();
2136            assert_eq!(g4.ed_identity(), guard.ed_identity());
2137        });
2138    }
2139
2140    #[cfg(feature = "vanguards")]
2141    #[test]
2142    fn vanguard_mode_ord() {
2143        assert!(VanguardMode::Disabled < VanguardMode::Lite);
2144        assert!(VanguardMode::Disabled < VanguardMode::Full);
2145        assert!(VanguardMode::Lite < VanguardMode::Full);
2146    }
2147}