tor_guardmgr/
lib.rs

1#![cfg_attr(docsrs, feature(doc_auto_cfg, doc_cfg))]
2#![doc = include_str!("../README.md")]
3// @@ begin lint list maintained by maint/add_warning @@
4#![allow(renamed_and_removed_lints)] // @@REMOVE_WHEN(ci_arti_stable)
5#![allow(unknown_lints)] // @@REMOVE_WHEN(ci_arti_nightly)
6#![warn(missing_docs)]
7#![warn(noop_method_call)]
8#![warn(unreachable_pub)]
9#![warn(clippy::all)]
10#![deny(clippy::await_holding_lock)]
11#![deny(clippy::cargo_common_metadata)]
12#![deny(clippy::cast_lossless)]
13#![deny(clippy::checked_conversions)]
14#![warn(clippy::cognitive_complexity)]
15#![deny(clippy::debug_assert_with_mut_call)]
16#![deny(clippy::exhaustive_enums)]
17#![deny(clippy::exhaustive_structs)]
18#![deny(clippy::expl_impl_clone_on_copy)]
19#![deny(clippy::fallible_impl_from)]
20#![deny(clippy::implicit_clone)]
21#![deny(clippy::large_stack_arrays)]
22#![warn(clippy::manual_ok_or)]
23#![deny(clippy::missing_docs_in_private_items)]
24#![warn(clippy::needless_borrow)]
25#![warn(clippy::needless_pass_by_value)]
26#![warn(clippy::option_option)]
27#![deny(clippy::print_stderr)]
28#![deny(clippy::print_stdout)]
29#![warn(clippy::rc_buffer)]
30#![deny(clippy::ref_option_ref)]
31#![warn(clippy::semicolon_if_nothing_returned)]
32#![warn(clippy::trait_duplication_in_bounds)]
33#![deny(clippy::unchecked_duration_subtraction)]
34#![deny(clippy::unnecessary_wraps)]
35#![warn(clippy::unseparated_literal_suffix)]
36#![deny(clippy::unwrap_used)]
37#![deny(clippy::mod_module_files)]
38#![allow(clippy::let_unit_value)] // This can reasonably be done for explicitness
39#![allow(clippy::uninlined_format_args)]
40#![allow(clippy::significant_drop_in_scrutinee)] // arti/-/merge_requests/588/#note_2812945
41#![allow(clippy::result_large_err)] // temporary workaround for arti#587
42#![allow(clippy::needless_raw_string_hashes)] // complained-about code is fine, often best
43#![allow(clippy::needless_lifetimes)] // See arti#1765
44#![allow(mismatched_lifetime_syntaxes)] // temporary workaround for arti#2060
45//! <!-- @@ end lint list maintained by maint/add_warning @@ -->
46
47// TODO #1645 (either remove this, or decide to have it everywhere)
48#![cfg_attr(not(all(feature = "full", feature = "experimental")), allow(unused))]
49
50// Glossary:
51//     Primary guard
52//     Sample
53//     confirmed
54//     filtered
55
56use futures::channel::mpsc;
57use futures::task::SpawnExt;
58use serde::{Deserialize, Serialize};
59use std::collections::HashMap;
60use std::net::SocketAddr;
61use std::sync::{Arc, Mutex, Weak};
62use std::time::{Duration, Instant, SystemTime};
63#[cfg(feature = "bridge-client")]
64use tor_error::internal;
65use tor_linkspec::{OwnedChanTarget, OwnedCircTarget, RelayId, RelayIdSet};
66use tor_netdir::NetDirProvider;
67use tor_proto::ClockSkew;
68use tor_units::BoundedInt32;
69use tracing::{debug, info, trace, warn};
70
71use tor_config::{define_list_builder_accessors, define_list_builder_helper};
72use tor_config::{impl_not_auto_value, ReconfigureError};
73use tor_config::{impl_standard_builder, ExplicitOrAuto};
74use tor_netdir::{params::NetParameters, NetDir, Relay};
75use tor_persist::{DynStorageHandle, StateMgr};
76use tor_rtcompat::Runtime;
77
78#[cfg(feature = "bridge-client")]
79pub mod bridge;
80mod config;
81mod daemon;
82mod dirstatus;
83mod err;
84mod events;
85pub mod fallback;
86mod filter;
87mod guard;
88mod ids;
89mod pending;
90mod sample;
91mod skew;
92mod util;
93#[cfg(feature = "vanguards")]
94pub mod vanguards;
95
96#[cfg(not(feature = "bridge-client"))]
97#[path = "bridge_disabled.rs"]
98pub mod bridge;
99
100#[cfg(any(test, feature = "testing"))]
101pub use config::testing::TestConfig;
102
103#[cfg(test)]
104use oneshot_fused_workaround as oneshot;
105
106pub use config::GuardMgrConfig;
107pub use err::{GuardMgrConfigError, GuardMgrError, PickGuardError};
108pub use events::ClockSkewEvents;
109pub use filter::GuardFilter;
110pub use ids::FirstHopId;
111pub use pending::{GuardMonitor, GuardStatus, GuardUsable};
112pub use skew::SkewEstimate;
113
114#[cfg(feature = "vanguards")]
115#[cfg_attr(docsrs, doc(cfg(feature = "vanguards")))]
116pub use vanguards::VanguardMgrError;
117
118use pending::{PendingRequest, RequestId};
119use sample::{GuardSet, Universe, UniverseRef};
120
121use crate::ids::{FirstHopIdInner, GuardId};
122
123use tor_config::ConfigBuildError;
124
125/// A "guard manager" that selects and remembers a persistent set of
126/// guard nodes.
127///
128/// This is a "handle"; clones of it share state.
129#[derive(Clone)]
130pub struct GuardMgr<R: Runtime> {
131    /// An asynchronous runtime object.
132    ///
133    /// GuardMgr uses this runtime for timing, timeouts, and spawning
134    /// tasks.
135    runtime: R,
136
137    /// Internal state for the guard manager.
138    inner: Arc<Mutex<GuardMgrInner>>,
139}
140
141/// Helper type that holds the data used by a [`GuardMgr`].
142///
143/// This would just be a [`GuardMgr`], except that it needs to sit inside
144/// a `Mutex` and get accessed by daemon tasks.
145struct GuardMgrInner {
146    /// Last time when marked all of our primary guards as retriable.
147    ///
148    /// We keep track of this time so that we can rate-limit
149    /// these attempts.
150    last_primary_retry_time: Instant,
151
152    /// Persistent guard manager state.
153    ///
154    /// This object remembers one or more persistent set of guards that we can
155    /// use, along with their relative priorities and statuses.
156    guards: GuardSets,
157
158    /// The current filter that we're using to decide which guards are
159    /// supported.
160    //
161    // TODO: This field is duplicated in the current active [`GuardSet`]; we
162    // should fix that.
163    filter: GuardFilter,
164
165    /// Configuration values derived from the consensus parameters.
166    ///
167    /// This is updated whenever the consensus parameters change.
168    params: GuardParams,
169
170    /// A mpsc channel, used to tell the task running in
171    /// [`daemon::report_status_events`] about a new event to monitor.
172    ///
173    /// This uses an `UnboundedSender` so that we don't have to await
174    /// while sending the message, which in turn allows the GuardMgr
175    /// API to be simpler.  The risk, however, is that there's no
176    /// backpressure in the event that the task running
177    /// [`daemon::report_status_events`] fails to read from this
178    /// channel.
179    ctrl: mpsc::UnboundedSender<daemon::Msg>,
180
181    /// Information about guards that we've given out, but where we have
182    /// not yet heard whether the guard was successful.
183    ///
184    /// Upon leaning whether the guard was successful, the pending
185    /// requests in this map may be either moved to `waiting`, or
186    /// discarded.
187    ///
188    /// There can be multiple pending requests corresponding to the
189    /// same guard.
190    pending: HashMap<RequestId, PendingRequest>,
191
192    /// A list of pending requests for which we have heard that the
193    /// guard was successful, but we have not yet decided whether the
194    /// circuit may be used.
195    ///
196    /// There can be multiple waiting requests corresponding to the
197    /// same guard.
198    waiting: Vec<PendingRequest>,
199
200    /// A list of fallback directories used to access the directory system
201    /// when no other directory information is yet known.
202    fallbacks: fallback::FallbackState,
203
204    /// Location in which to store persistent state.
205    storage: DynStorageHandle<GuardSets>,
206
207    /// A sender object to publish changes in our estimated clock skew.
208    send_skew: postage::watch::Sender<Option<SkewEstimate>>,
209
210    /// A receiver object to hand out to observers who want to know about
211    /// changes in our estimated clock skew.
212    recv_skew: events::ClockSkewEvents,
213
214    /// A netdir provider that we can use for adding new guards when
215    /// insufficient guards are available.
216    ///
217    /// This has to be an Option so it can be initialized from None: at the
218    /// time a GuardMgr is created, there is no NetDirProvider for it to use.
219    netdir_provider: Option<Weak<dyn NetDirProvider>>,
220
221    /// A netdir provider that we can use for discovering bridge descriptors.
222    ///
223    /// This has to be an Option so it can be initialized from None: at the time
224    /// a GuardMgr is created, there is no BridgeDescProvider for it to use.
225    #[cfg(feature = "bridge-client")]
226    bridge_desc_provider: Option<Weak<dyn bridge::BridgeDescProvider>>,
227
228    /// A list of the bridges that we are configured to use, or "None" if we are
229    /// not configured to use bridges.
230    #[cfg(feature = "bridge-client")]
231    configured_bridges: Option<Arc<[bridge::BridgeConfig]>>,
232}
233
234/// A selector that tells us which [`GuardSet`] of several is currently in use.
235#[derive(Clone, Debug, Default, Eq, PartialEq, Ord, PartialOrd, strum::EnumIter)]
236enum GuardSetSelector {
237    /// The default guard set is currently in use: that's the one that we use
238    /// when we have no filter installed, or the filter permits most of the
239    /// guards on the network.
240    #[default]
241    Default,
242    /// A "restrictive" guard set is currently in use: that's the one that we
243    /// use when we have a filter that excludes a large fraction of the guards
244    /// on the network.
245    Restricted,
246    /// The "bridges" guard set is currently in use: we are selecting our guards
247    /// from among the universe of configured bridges.
248    #[cfg(feature = "bridge-client")]
249    Bridges,
250}
251
252/// Describes the [`Universe`] that a guard sample should take its guards from.
253#[derive(Clone, Copy, Debug, Eq, PartialEq)]
254enum UniverseType {
255    /// Take information from the network directory.
256    NetDir,
257    /// Take information from the configured bridges.
258    #[cfg(feature = "bridge-client")]
259    BridgeSet,
260}
261
262impl GuardSetSelector {
263    /// Return a description of which [`Universe`] this guard sample should take
264    /// its guards from.
265    fn universe_type(&self) -> UniverseType {
266        match self {
267            GuardSetSelector::Default | GuardSetSelector::Restricted => UniverseType::NetDir,
268            #[cfg(feature = "bridge-client")]
269            GuardSetSelector::Bridges => UniverseType::BridgeSet,
270        }
271    }
272}
273
274/// Persistent state for a guard manager, as serialized to disk.
275#[derive(Debug, Clone, Default, Serialize, Deserialize)]
276struct GuardSets {
277    /// Which set of guards is currently in use?
278    #[serde(skip)]
279    active_set: GuardSetSelector,
280
281    /// The default set of guards to use.
282    ///
283    /// We use this one when there is no filter, or the filter permits most of the
284    /// guards on the network.
285    default: GuardSet,
286
287    /// A guard set to use when we have a restrictive filter.
288    #[serde(default)]
289    restricted: GuardSet,
290
291    /// A guard set sampled from our configured bridges.
292    #[serde(default)]
293    #[cfg(feature = "bridge-client")]
294    bridges: GuardSet,
295
296    /// Unrecognized fields, including (possibly) other guard sets.
297    #[serde(flatten)]
298    remaining: HashMap<String, tor_persist::JsonValue>,
299}
300
301/// The key (filename) we use for storing our persistent guard state in the
302/// `StateMgr`.
303///
304/// We used to store this in a different format in a filename called
305/// "default_guards" (before Arti 0.1.0).
306const STORAGE_KEY: &str = "guards";
307
308/// A description of which circuits to retire because of a configuration change.
309///
310/// TODO(nickm): Eventually we will want to add a "Some" here, to support
311/// removing only those circuits that correspond to no-longer-usable guards.
312#[derive(Clone, Debug, Eq, PartialEq)]
313#[must_use]
314#[non_exhaustive]
315pub enum RetireCircuits {
316    /// There's no need to retire any circuits.
317    None,
318    /// All circuits should be retired.
319    All,
320}
321
322impl<R: Runtime> GuardMgr<R> {
323    /// Create a new "empty" guard manager and launch its background tasks.
324    ///
325    /// It won't be able to hand out any guards until a [`NetDirProvider`] has
326    /// been installed.
327    pub fn new<S>(
328        runtime: R,
329        state_mgr: S,
330        config: &impl GuardMgrConfig,
331    ) -> Result<Self, GuardMgrError>
332    where
333        S: StateMgr + Send + Sync + 'static,
334    {
335        let (ctrl, rcv) = mpsc::unbounded();
336        let storage: DynStorageHandle<GuardSets> = state_mgr.create_handle(STORAGE_KEY);
337        // TODO(nickm): We should do something about the old state in
338        // `default_guards`.  Probably it would be best to delete it.  We could
339        // try to migrate it instead, but that's beyond the stability guarantee
340        // that we're getting at this stage of our (pre-0.1) development.
341        let state = storage.load()?.unwrap_or_default();
342
343        let (send_skew, recv_skew) = postage::watch::channel();
344        let recv_skew = ClockSkewEvents { inner: recv_skew };
345
346        let inner = Arc::new(Mutex::new(GuardMgrInner {
347            guards: state,
348            filter: GuardFilter::unfiltered(),
349            last_primary_retry_time: runtime.now(),
350            params: GuardParams::default(),
351            ctrl,
352            pending: HashMap::new(),
353            waiting: Vec::new(),
354            fallbacks: config.fallbacks().into(),
355            storage,
356            send_skew,
357            recv_skew,
358            netdir_provider: None,
359            #[cfg(feature = "bridge-client")]
360            bridge_desc_provider: None,
361            #[cfg(feature = "bridge-client")]
362            configured_bridges: None,
363        }));
364        #[cfg(feature = "bridge-client")]
365        {
366            let mut inner = inner.lock().expect("lock poisoned");
367            // TODO(nickm): This calls `GuardMgrInner::update`. Will we mind doing so before any
368            // providers are configured? I think not, but we should make sure.
369            let _: RetireCircuits =
370                inner.replace_bridge_config(config, runtime.wallclock(), runtime.now())?;
371        }
372        {
373            let weak_inner = Arc::downgrade(&inner);
374            let rt_clone = runtime.clone();
375            runtime
376                .spawn(daemon::report_status_events(rt_clone, weak_inner, rcv))
377                .map_err(|e| GuardMgrError::from_spawn("guard status event reporter", e))?;
378        }
379        {
380            let rt_clone = runtime.clone();
381            let weak_inner = Arc::downgrade(&inner);
382            runtime
383                .spawn(daemon::run_periodic(rt_clone, weak_inner))
384                .map_err(|e| GuardMgrError::from_spawn("periodic guard updater", e))?;
385        }
386        Ok(GuardMgr { runtime, inner })
387    }
388
389    /// Install a [`NetDirProvider`] for use by this guard manager.
390    ///
391    /// It will be used to keep the guards up-to-date with changes from the
392    /// network directory, and to find new guards when no NetDir is provided to
393    /// select_guard().
394    ///
395    /// TODO: we should eventually return some kind of a task handle from this
396    /// task, even though it is not strictly speaking periodic.
397    ///
398    /// The guardmgr retains only a `Weak` reference to `provider`,
399    /// `install_netdir_provider` downgrades it on entry,
400    // TODO add ref to document when https://gitlab.torproject.org/tpo/core/arti/-/issues/624
401    // is fixed.  Also, maybe take an owned `Weak` to start with.
402    //
403    /// # Panics
404    ///
405    /// Panics if a [`NetDirProvider`] is already installed.
406    pub fn install_netdir_provider(
407        &self,
408        provider: &Arc<dyn NetDirProvider>,
409    ) -> Result<(), GuardMgrError> {
410        let weak_provider = Arc::downgrade(provider);
411        {
412            let mut inner = self.inner.lock().expect("Poisoned lock");
413            assert!(inner.netdir_provider.is_none());
414            inner.netdir_provider = Some(weak_provider.clone());
415        }
416        let weak_inner = Arc::downgrade(&self.inner);
417        let rt_clone = self.runtime.clone();
418        self.runtime
419            .spawn(daemon::keep_netdir_updated(
420                rt_clone,
421                weak_inner,
422                weak_provider,
423            ))
424            .map_err(|e| GuardMgrError::from_spawn("periodic guard netdir updater", e))?;
425        Ok(())
426    }
427
428    /// Configure a new [`bridge::BridgeDescProvider`] for this [`GuardMgr`].
429    ///
430    /// It will be used to learn about changes in the set of available bridge
431    /// descriptors; we'll inform it whenever our desired set of bridge
432    /// descriptors changes.
433    ///
434    /// TODO: Same todo as in `install_netdir_provider` about task handles.
435    ///
436    /// # Panics
437    ///
438    /// Panics if a [`bridge::BridgeDescProvider`] is already installed.
439    #[cfg(feature = "bridge-client")]
440    pub fn install_bridge_desc_provider(
441        &self,
442        provider: &Arc<dyn bridge::BridgeDescProvider>,
443    ) -> Result<(), GuardMgrError> {
444        let weak_provider = Arc::downgrade(provider);
445        {
446            let mut inner = self.inner.lock().expect("Poisoned lock");
447            assert!(inner.bridge_desc_provider.is_none());
448            inner.bridge_desc_provider = Some(weak_provider.clone());
449        }
450
451        let weak_inner = Arc::downgrade(&self.inner);
452        let rt_clone = self.runtime.clone();
453        self.runtime
454            .spawn(daemon::keep_bridge_descs_updated(
455                rt_clone,
456                weak_inner,
457                weak_provider,
458            ))
459            .map_err(|e| GuardMgrError::from_spawn("periodic guard netdir updater", e))?;
460
461        Ok(())
462    }
463
464    /// Flush our current guard state to the state manager, if there
465    /// is any unsaved state.
466    pub fn store_persistent_state(&self) -> Result<(), GuardMgrError> {
467        let inner = self.inner.lock().expect("Poisoned lock");
468        trace!("Flushing guard state to disk.");
469        inner.storage.store(&inner.guards)?;
470        Ok(())
471    }
472
473    /// Reload state from the state manager.
474    ///
475    /// We only call this method if we _don't_ have the lock on the state
476    /// files.  If we have the lock, we only want to save.
477    pub fn reload_persistent_state(&self) -> Result<(), GuardMgrError> {
478        let mut inner = self.inner.lock().expect("Poisoned lock");
479        if let Some(new_guards) = inner.storage.load()? {
480            inner.replace_guards_with(new_guards, self.runtime.wallclock(), self.runtime.now());
481        }
482        Ok(())
483    }
484
485    /// Switch from having an unowned persistent state to having an owned one.
486    ///
487    /// Requires that we hold the lock on the state files.
488    pub fn upgrade_to_owned_persistent_state(&self) -> Result<(), GuardMgrError> {
489        let mut inner = self.inner.lock().expect("Poisoned lock");
490        debug_assert!(inner.storage.can_store());
491        let new_guards = inner.storage.load()?.unwrap_or_default();
492        let wallclock = self.runtime.wallclock();
493        let now = self.runtime.now();
494        inner.replace_guards_with(new_guards, wallclock, now);
495        Ok(())
496    }
497
498    /// Return true if `netdir` has enough information to safely become our new netdir.
499    pub fn netdir_is_sufficient(&self, netdir: &NetDir) -> bool {
500        let mut inner = self.inner.lock().expect("Poisoned lock");
501        if inner.guards.active_set.universe_type() != UniverseType::NetDir {
502            // If we aren't using the netdir, this isn't something we want to look at.
503            return true;
504        }
505        inner
506            .guards
507            .active_guards_mut()
508            .n_primary_without_id_info_in(netdir)
509            == 0
510    }
511
512    /// Mark every guard as potentially retriable, regardless of how recently we
513    /// failed to connect to it.
514    pub fn mark_all_guards_retriable(&self) {
515        let mut inner = self.inner.lock().expect("Poisoned lock");
516        inner.guards.active_guards_mut().mark_all_guards_retriable();
517    }
518
519    /// Configure this guardmgr to use a fixed [`NetDir`] instead of a provider.
520    ///
521    /// This function is for testing only, and is exclusive with
522    /// `install_netdir_provider`.
523    ///
524    /// # Panics
525    ///
526    /// Panics if any [`NetDirProvider`] has already been installed.
527    #[cfg(any(test, feature = "testing"))]
528    pub fn install_test_netdir(&self, netdir: &NetDir) {
529        use tor_netdir::testprovider::TestNetDirProvider;
530        let wallclock = self.runtime.wallclock();
531        let now = self.runtime.now();
532        let netdir_provider: Arc<dyn NetDirProvider> =
533            Arc::new(TestNetDirProvider::from(netdir.clone()));
534        self.install_netdir_provider(&netdir_provider)
535            .expect("Couldn't install testing network provider");
536
537        let mut inner = self.inner.lock().expect("Poisoned lock");
538        inner.update(wallclock, now);
539    }
540
541    /// Replace the configuration in this `GuardMgr` with `config`.
542    pub fn reconfigure(
543        &self,
544        config: &impl GuardMgrConfig,
545    ) -> Result<RetireCircuits, ReconfigureError> {
546        let mut inner = self.inner.lock().expect("Poisoned lock");
547        // Change the set of configured fallbacks.
548        {
549            let mut fallbacks: fallback::FallbackState = config.fallbacks().into();
550            std::mem::swap(&mut inner.fallbacks, &mut fallbacks);
551            inner.fallbacks.take_status_from(fallbacks);
552        }
553        // If we are built to use bridges, change the bridge configuration.
554        #[cfg(feature = "bridge-client")]
555        {
556            let wallclock = self.runtime.wallclock();
557            let now = self.runtime.now();
558            Ok(inner.replace_bridge_config(config, wallclock, now)?)
559        }
560        // If we are built to use bridges, change the bridge configuration.
561        #[cfg(not(feature = "bridge-client"))]
562        {
563            Ok(RetireCircuits::None)
564        }
565    }
566
567    /// Replace the current [`GuardFilter`] used by this `GuardMgr`.
568    // TODO should this be part of the config?
569    pub fn set_filter(&self, filter: GuardFilter) {
570        let wallclock = self.runtime.wallclock();
571        let now = self.runtime.now();
572        let mut inner = self.inner.lock().expect("Poisoned lock");
573        inner.set_filter(filter, wallclock, now);
574    }
575
576    /// Select a guard for a given [`GuardUsage`].
577    ///
578    /// On success, we return a [`FirstHop`] object to identify which
579    /// guard we have picked, a [`GuardMonitor`] object that the
580    /// caller can use to report whether its attempt to use the guard
581    /// succeeded or failed, and a [`GuardUsable`] future that the
582    /// caller can use to decide whether a circuit built through the
583    /// guard is actually safe to use.
584    ///
585    /// That last point is important: It's okay to build a circuit
586    /// through the guard returned by this function, but you can't
587    /// actually use it for traffic unless the [`GuardUsable`] future
588    /// yields "true".
589    pub fn select_guard(
590        &self,
591        usage: GuardUsage,
592    ) -> Result<(FirstHop, GuardMonitor, GuardUsable), PickGuardError> {
593        let now = self.runtime.now();
594        let wallclock = self.runtime.wallclock();
595
596        let mut inner = self.inner.lock().expect("Poisoned lock");
597
598        // (I am not 100% sure that we need to consider_all_retries here, but
599        // it should _probably_ not hurt.)
600        inner.guards.active_guards_mut().consider_all_retries(now);
601
602        let (origin, guard) = inner.select_guard_with_expand(&usage, now, wallclock)?;
603        trace!(?guard, ?usage, "Guard selected");
604
605        let (usable, usable_sender) = if origin.usable_immediately() {
606            (GuardUsable::new_usable_immediately(), None)
607        } else {
608            let (u, snd) = GuardUsable::new_uncertain();
609            (u, Some(snd))
610        };
611        let request_id = pending::RequestId::next();
612        let ctrl = inner.ctrl.clone();
613        let monitor = GuardMonitor::new(request_id, ctrl);
614
615        // Note that the network can be down even if all the primary guards
616        // are not yet marked as unreachable.  But according to guard-spec we
617        // don't want to acknowledge the net as down before that point, since
618        // we don't mark all the primary guards as retriable unless
619        // we've been forced to non-primary guards.
620        let net_has_been_down =
621            if let Some(duration) = tor_proto::time_since_last_incoming_traffic() {
622                inner
623                    .guards
624                    .active_guards_mut()
625                    .all_primary_guards_are_unreachable()
626                    && duration >= inner.params.internet_down_timeout
627            } else {
628                // TODO: Is this the correct behavior in this case?
629                false
630            };
631
632        let pending_request = pending::PendingRequest::new(
633            guard.first_hop_id(),
634            usage,
635            usable_sender,
636            net_has_been_down,
637        );
638        inner.pending.insert(request_id, pending_request);
639
640        match &guard.sample {
641            Some(sample) => {
642                let guard_id = GuardId::from_relay_ids(&guard);
643                inner
644                    .guards
645                    .guards_mut(sample)
646                    .record_attempt(&guard_id, now);
647            }
648            None => {
649                // We don't record attempts for fallbacks; we only care when
650                // they have failed.
651            }
652        }
653
654        Ok((guard, monitor, usable))
655    }
656
657    /// Record that _after_ we built a circuit with a guard, something described
658    /// in `external_failure` went wrong with it.
659    pub fn note_external_failure<T>(&self, identity: &T, external_failure: ExternalActivity)
660    where
661        T: tor_linkspec::HasRelayIds + ?Sized,
662    {
663        let now = self.runtime.now();
664        let mut inner = self.inner.lock().expect("Poisoned lock");
665        let ids = inner.lookup_ids(identity);
666        for id in ids {
667            match &id.0 {
668                FirstHopIdInner::Guard(sample, id) => {
669                    inner
670                        .guards
671                        .guards_mut(sample)
672                        .record_failure(id, Some(external_failure), now);
673                }
674                FirstHopIdInner::Fallback(id) => {
675                    if external_failure == ExternalActivity::DirCache {
676                        inner.fallbacks.note_failure(id, now);
677                    }
678                }
679            }
680        }
681    }
682
683    /// Record that _after_ we built a circuit with a guard, some activity
684    /// described in `external_activity` was successful with it.
685    pub fn note_external_success<T>(&self, identity: &T, external_activity: ExternalActivity)
686    where
687        T: tor_linkspec::HasRelayIds + ?Sized,
688    {
689        let mut inner = self.inner.lock().expect("Poisoned lock");
690
691        inner.record_external_success(identity, external_activity, self.runtime.wallclock());
692    }
693
694    /// Return a stream of events about our estimated clock skew; these events
695    /// are `None` when we don't have enough information to make an estimate,
696    /// and `Some(`[`SkewEstimate`]`)` otherwise.
697    ///
698    /// Note that this stream can be lossy: if the estimate changes more than
699    /// one before you read from the stream, you might only get the most recent
700    /// update.
701    pub fn skew_events(&self) -> ClockSkewEvents {
702        let inner = self.inner.lock().expect("Poisoned lock");
703        inner.recv_skew.clone()
704    }
705
706    /// Ensure that the message queue is flushed before proceeding to
707    /// the next step.  Used for testing.
708    #[cfg(test)]
709    async fn flush_msg_queue(&self) {
710        let (snd, rcv) = oneshot::channel();
711        let pingmsg = daemon::Msg::Ping(snd);
712        {
713            let inner = self.inner.lock().expect("Poisoned lock");
714            inner
715                .ctrl
716                .unbounded_send(pingmsg)
717                .expect("Guard observer task exited prematurely.");
718        }
719        let _ = rcv.await;
720    }
721}
722
723/// An activity that can succeed or fail, and whose success or failure can be
724/// attributed to a guard.
725#[derive(Copy, Clone, Debug, Eq, PartialEq)]
726#[non_exhaustive]
727pub enum ExternalActivity {
728    /// The activity of using the guard as a directory cache.
729    DirCache,
730}
731
732impl GuardSets {
733    /// Return a reference to the currently active set of guards.
734    ///
735    /// (That's easy enough for now, since there is never more than one set of
736    /// guards.  But eventually that will change, as we add support for more
737    /// complex filter types, and for bridge relays. Those will use separate
738    /// `GuardSet` instances, and this accessor will choose the right one.)
739    fn active_guards(&self) -> &GuardSet {
740        self.guards(&self.active_set)
741    }
742
743    /// Return the set of guards corresponding to the provided selector.
744    fn guards(&self, selector: &GuardSetSelector) -> &GuardSet {
745        match selector {
746            GuardSetSelector::Default => &self.default,
747            GuardSetSelector::Restricted => &self.restricted,
748            #[cfg(feature = "bridge-client")]
749            GuardSetSelector::Bridges => &self.bridges,
750        }
751    }
752
753    /// Return a mutable reference to the currently active set of guards.
754    fn active_guards_mut(&mut self) -> &mut GuardSet {
755        self.guards_mut(&self.active_set.clone())
756    }
757
758    /// Return a mutable reference to the set of guards corresponding to the
759    /// provided selector.
760    fn guards_mut(&mut self, selector: &GuardSetSelector) -> &mut GuardSet {
761        match selector {
762            GuardSetSelector::Default => &mut self.default,
763            GuardSetSelector::Restricted => &mut self.restricted,
764            #[cfg(feature = "bridge-client")]
765            GuardSetSelector::Bridges => &mut self.bridges,
766        }
767    }
768
769    /// Update all non-persistent state for the guards in this object with the
770    /// state in `other`.
771    fn copy_status_from(&mut self, mut other: GuardSets) {
772        use strum::IntoEnumIterator;
773        for sample in GuardSetSelector::iter() {
774            self.guards_mut(&sample)
775                .copy_ephemeral_status_into_newly_loaded_state(std::mem::take(
776                    other.guards_mut(&sample),
777                ));
778        }
779        self.active_set = other.active_set;
780    }
781}
782
783impl GuardMgrInner {
784    /// Look up the latest [`NetDir`] (if there is one) from our
785    /// [`NetDirProvider`] (if we have one).
786    fn timely_netdir(&self) -> Option<Arc<NetDir>> {
787        self.netdir_provider
788            .as_ref()
789            .and_then(Weak::upgrade)
790            .and_then(|np| np.timely_netdir().ok())
791    }
792
793    /// Look up the latest [`BridgeDescList`](bridge::BridgeDescList) (if there
794    /// is one) from our [`BridgeDescProvider`](bridge::BridgeDescProvider) (if
795    /// we have one).
796    #[cfg(feature = "bridge-client")]
797    fn latest_bridge_desc_list(&self) -> Option<Arc<bridge::BridgeDescList>> {
798        self.bridge_desc_provider
799            .as_ref()
800            .and_then(Weak::upgrade)
801            .map(|bp| bp.bridges())
802    }
803
804    /// Run a function that takes `&mut self` and an optional NetDir.
805    ///
806    /// We try to use the netdir from our [`NetDirProvider`] (if we have one).
807    /// Therefore, although its _parameters_ are suitable for every
808    /// [`GuardSet`], its _contents_ might not be. For those, call
809    /// [`with_opt_universe`](Self::with_opt_universe) instead.
810    //
811    // This function exists to handle the lifetime mess where sometimes the
812    // resulting NetDir will borrow from `netdir`, and sometimes it will borrow
813    // from an Arc returned by `self.latest_netdir()`.
814    fn with_opt_netdir<F, T>(&mut self, func: F) -> T
815    where
816        F: FnOnce(&mut Self, Option<&NetDir>) -> T,
817    {
818        if let Some(nd) = self.timely_netdir() {
819            func(self, Some(nd.as_ref()))
820        } else {
821            func(self, None)
822        }
823    }
824
825    /// Return the latest `BridgeSet` based on our `BridgeDescProvider` and our
826    /// configured bridges.
827    ///
828    /// Returns `None` if we are not configured to use bridges.
829    #[cfg(feature = "bridge-client")]
830    fn latest_bridge_set(&self) -> Option<bridge::BridgeSet> {
831        let bridge_config = self.configured_bridges.as_ref()?.clone();
832        let bridge_descs = self.latest_bridge_desc_list();
833        Some(bridge::BridgeSet::new(bridge_config, bridge_descs))
834    }
835
836    /// Run a function that takes `&mut self` and an optional [`UniverseRef`].
837    ///
838    /// We try to get a universe from the appropriate source for the current
839    /// active guard set.
840    fn with_opt_universe<F, T>(&mut self, func: F) -> T
841    where
842        F: FnOnce(&mut Self, Option<&UniverseRef>) -> T,
843    {
844        // TODO: it might be nice to make `func` take an GuardSet and a set of
845        // parameters, so we can't get the active set wrong. Doing that will
846        // require a fair amount of refactoring so that the borrow checker is
847        // happy, however.
848        match self.guards.active_set.universe_type() {
849            UniverseType::NetDir => {
850                if let Some(nd) = self.timely_netdir() {
851                    func(self, Some(&UniverseRef::NetDir(nd)))
852                } else {
853                    func(self, None)
854                }
855            }
856            #[cfg(feature = "bridge-client")]
857            UniverseType::BridgeSet => func(
858                self,
859                self.latest_bridge_set()
860                    .map(UniverseRef::BridgeSet)
861                    .as_ref(),
862            ),
863        }
864    }
865
866    /// Update the status of all guards in the active set, based on the passage
867    /// of time, our configuration, and the relevant Universe for our active
868    /// set.
869    fn update(&mut self, wallclock: SystemTime, now: Instant) {
870        self.with_opt_netdir(|this, netdir| {
871            // Here we update our parameters from the latest NetDir, and check
872            // whether we need to change to a (non)-restrictive GuardSet based
873            // on those parameters and our configured filter.
874            //
875            // This uses a NetDir unconditionally, since we always want to take
876            // the network parameters our parameters from the consensus even if
877            // the guards themselves are from a BridgeSet.
878            this.update_active_set_params_and_filter(netdir);
879        });
880        self.with_opt_universe(|this, univ| {
881            // Now we update the set of guards themselves based on the
882            // Universe, which is either the latest NetDir, or the latest
883            // BridgeSet—depending on what the GuardSet wants.
884            Self::update_guardset_internal(
885                &this.params,
886                wallclock,
887                this.guards.active_set.universe_type(),
888                this.guards.active_guards_mut(),
889                univ,
890            );
891            #[cfg(feature = "bridge-client")]
892            this.update_desired_descriptors(now);
893            #[cfg(not(feature = "bridge-client"))]
894            let _ = now;
895        });
896    }
897
898    /// Replace our bridge configuration with the one from `new_config`.
899    #[cfg(feature = "bridge-client")]
900    fn replace_bridge_config(
901        &mut self,
902        new_config: &impl GuardMgrConfig,
903        wallclock: SystemTime,
904        now: Instant,
905    ) -> Result<RetireCircuits, GuardMgrConfigError> {
906        match (&self.configured_bridges, new_config.bridges_enabled()) {
907            (None, false) => {
908                assert_ne!(
909                    self.guards.active_set.universe_type(),
910                    UniverseType::BridgeSet
911                );
912                return Ok(RetireCircuits::None); // nothing to do
913            }
914            (_, true) if !self.storage.can_store() => {
915                // TODO: Ideally we would try to upgrade, obtaining an exclusive lock,
916                // but `StorageHandle` currently lacks a method for that.
917                return Err(GuardMgrConfigError::NoLock("bridges configured".into()));
918            }
919            (Some(current_bridges), true) if new_config.bridges() == current_bridges.as_ref() => {
920                assert_eq!(
921                    self.guards.active_set.universe_type(),
922                    UniverseType::BridgeSet
923                );
924                return Ok(RetireCircuits::None); // nothing to do.
925            }
926            (_, true) => {
927                self.configured_bridges = Some(new_config.bridges().into());
928                self.guards.active_set = GuardSetSelector::Bridges;
929            }
930            (_, false) => {
931                self.configured_bridges = None;
932                self.guards.active_set = GuardSetSelector::Default;
933            }
934        }
935
936        // If we have gotten here, we have changed the set of bridges, changed
937        // which set is active, or changed them both.  We need to make sure that
938        // our `GuardSet` object is up-to-date with our configuration.
939        self.update(wallclock, now);
940
941        // We also need to tell the caller that its circuits are no good any
942        // more.
943        //
944        // TODO(nickm): Someday we can do this more judiciously by retuning
945        // "Some" in the case where we're still using bridges but our new bridge
946        // set contains different elements; see comment on RetireCircuits.
947        //
948        // TODO(nickm): We could also safely return RetireCircuits::None if we
949        // are using bridges, and our new bridge list is a superset of the older
950        // one.
951        Ok(RetireCircuits::All)
952    }
953
954    /// Update our parameters, our selection (based on network parameters and
955    /// configuration), and make sure the active GuardSet has the right
956    /// configuration itself.
957    ///
958    /// We should call this whenever the NetDir's parameters change, or whenever
959    /// our filter changes.  We do not need to call it for new elements arriving
960    /// in our Universe, since those do not affect anything here.
961    ///
962    /// We should also call this whenever a new GuardSet becomes active for any
963    /// reason _other_ than just having called this function.
964    ///
965    /// (This function is only invoked from `update`, which should be called
966    /// under the above circumstances.)
967    fn update_active_set_params_and_filter(&mut self, netdir: Option<&NetDir>) {
968        // Set the parameters.  These always come from the NetDir, even if this
969        // is a bridge set.
970        if let Some(netdir) = netdir {
971            match GuardParams::try_from(netdir.params()) {
972                Ok(params) => self.params = params,
973                Err(e) => warn!("Unusable guard parameters from consensus: {}", e),
974            }
975
976            self.select_guard_set_based_on_filter(netdir);
977        }
978
979        // Change the filter, if it doesn't match what the guards have.
980        //
981        // TODO(nickm): We could use a "dirty" flag or something to decide
982        // whether we need to call set_filter, if this comparison starts to show
983        // up in profiles.
984        if self.guards.active_guards().filter() != &self.filter {
985            let restrictive = self.guards.active_set == GuardSetSelector::Restricted;
986            self.guards
987                .active_guards_mut()
988                .set_filter(self.filter.clone(), restrictive);
989        }
990    }
991
992    /// Update the status of every guard in `active_guards`, and expand it as
993    /// needed.
994    ///
995    /// This function doesn't take `&self`, to make sure that we are only
996    /// affecting a single `GuardSet`, and to avoid confusing the borrow
997    /// checker.
998    ///
999    /// We should call this whenever the contents of the universe have changed.
1000    ///
1001    /// We should also call this whenever a new GuardSet becomes active.
1002    fn update_guardset_internal<U: Universe>(
1003        params: &GuardParams,
1004        now: SystemTime,
1005        universe_type: UniverseType,
1006        active_guards: &mut GuardSet,
1007        universe: Option<&U>,
1008    ) -> ExtendedStatus {
1009        // Expire guards.  Do that early, in case doing so makes it clear that
1010        // we need to grab more guards or mark others as primary.
1011        active_guards.expire_old_guards(params, now);
1012
1013        let extended = if let Some(universe) = universe {
1014            // TODO: This check here may be completely unnecessary. I inserted
1015            // it back in 5ac0fcb7ef603e0d14 because I was originally concerned
1016            // it might be undesirable to list a primary guard as "missing dir
1017            // info" (and therefore unusable) if we were expecting to get its
1018            // microdescriptor "very soon."
1019            //
1020            // But due to the other check in `netdir_is_sufficient`, we
1021            // shouldn't be installing a netdir until it has microdescs for all
1022            // of the (non-bridge) primary guards that it lists. - nickm
1023            if active_guards.n_primary_without_id_info_in(universe) > 0
1024                && universe_type == UniverseType::NetDir
1025            {
1026                // We are missing the information from a NetDir needed to see
1027                // whether our primary guards are listed, so we shouldn't update
1028                // our guard status.
1029                //
1030                // We don't want to do this check if we are using bridges, since
1031                // a missing bridge descriptor is not guaranteed to temporary
1032                // problem in the same way that a missing microdescriptor is.
1033                // (When a bridge desc is missing, the bridge could be down or
1034                // unreachable, and nobody else can help us. But if a microdesc
1035                // is missing, we just need to find a cache that has it.)
1036                return ExtendedStatus::No;
1037            }
1038            active_guards.update_status_from_dir(universe);
1039            active_guards.extend_sample_as_needed(now, params, universe)
1040        } else {
1041            ExtendedStatus::No
1042        };
1043
1044        active_guards.select_primary_guards(params);
1045
1046        extended
1047    }
1048
1049    /// If using bridges, tell the BridgeDescProvider which descriptors we want.
1050    /// We need to check this *after* we select our primary guards.
1051    #[cfg(feature = "bridge-client")]
1052    fn update_desired_descriptors(&mut self, now: Instant) {
1053        if self.guards.active_set.universe_type() != UniverseType::BridgeSet {
1054            return;
1055        }
1056
1057        let provider = self.bridge_desc_provider.as_ref().and_then(Weak::upgrade);
1058        let bridge_set = self.latest_bridge_set();
1059        if let (Some(provider), Some(bridge_set)) = (provider, bridge_set) {
1060            let desired: Vec<_> = self
1061                .guards
1062                .active_guards()
1063                .descriptors_to_request(now, &self.params)
1064                .into_iter()
1065                .flat_map(|guard| bridge_set.bridge_by_guard(guard))
1066                .cloned()
1067                .collect();
1068
1069            provider.set_bridges(&desired);
1070        }
1071    }
1072
1073    /// Replace the active guard state with `new_state`, preserving
1074    /// non-persistent state for any guards that are retained.
1075    fn replace_guards_with(
1076        &mut self,
1077        mut new_guards: GuardSets,
1078        wallclock: SystemTime,
1079        now: Instant,
1080    ) {
1081        std::mem::swap(&mut self.guards, &mut new_guards);
1082        self.guards.copy_status_from(new_guards);
1083        self.update(wallclock, now);
1084    }
1085
1086    /// Update which guard set is active based on the current filter and the
1087    /// provided netdir.
1088    ///
1089    /// After calling this function, the new guard set's filter may be
1090    /// out-of-date: be sure to call `set_filter` as appropriate.
1091    fn select_guard_set_based_on_filter(&mut self, netdir: &NetDir) {
1092        // In general, we'd like to use the restricted set if we're under the
1093        // threshold, and the default set if we're over the threshold.  But if
1094        // we're sitting close to the threshold, we want to avoid flapping back
1095        // and forth, so we only change when we're more than 5% "off" from
1096        // whatever our current setting is.
1097        //
1098        // (See guard-spec section 2 for more information.)
1099        let offset = match self.guards.active_set {
1100            GuardSetSelector::Default => -0.05,
1101            GuardSetSelector::Restricted => 0.05,
1102            // If we're using bridges, then we don't switch between the other guard sets based on on the filter at all.
1103            #[cfg(feature = "bridge-client")]
1104            GuardSetSelector::Bridges => return,
1105        };
1106        let frac_permitted = self.filter.frac_bw_permitted(netdir);
1107        let threshold = self.params.filter_threshold + offset;
1108        let new_choice = if frac_permitted < threshold {
1109            GuardSetSelector::Restricted
1110        } else {
1111            GuardSetSelector::Default
1112        };
1113
1114        if new_choice != self.guards.active_set {
1115            info!(
1116                "Guard selection changed; we are now using the {:?} guard set",
1117                &new_choice
1118            );
1119
1120            self.guards.active_set = new_choice;
1121
1122            if frac_permitted < self.params.extreme_threshold {
1123                warn!(
1124                      "The number of guards permitted is smaller than the recommended minimum of {:.0}%.",
1125                      self.params.extreme_threshold * 100.0,
1126                );
1127            }
1128        }
1129    }
1130
1131    /// Mark all of our primary guards as retriable, if we haven't done
1132    /// so since long enough before `now`.
1133    ///
1134    /// We want to call this function whenever a guard attempt succeeds,
1135    /// if the internet seemed to be down when the guard attempt was
1136    /// first launched.
1137    fn maybe_retry_primary_guards(&mut self, now: Instant) {
1138        // We don't actually want to mark our primary guards as
1139        // retriable more than once per internet_down_timeout: after
1140        // the first time, we would just be noticing the same "coming
1141        // back online" event more than once.
1142        let interval = self.params.internet_down_timeout;
1143        if self.last_primary_retry_time + interval <= now {
1144            debug!("Successfully reached a guard after a while off the internet; marking all primary guards retriable.");
1145            self.guards
1146                .active_guards_mut()
1147                .mark_primary_guards_retriable();
1148            self.last_primary_retry_time = now;
1149        }
1150    }
1151
1152    /// Replace the current GuardFilter with `filter`.
1153    fn set_filter(&mut self, filter: GuardFilter, wallclock: SystemTime, now: Instant) {
1154        self.filter = filter;
1155        self.update(wallclock, now);
1156    }
1157
1158    /// Called when the circuit manager reports (via [`GuardMonitor`]) that
1159    /// a guard succeeded or failed.
1160    ///
1161    /// Changes the guard's status as appropriate, and updates the pending
1162    /// request as needed.
1163    #[allow(clippy::cognitive_complexity)]
1164    pub(crate) fn handle_msg(
1165        &mut self,
1166        request_id: RequestId,
1167        status: GuardStatus,
1168        skew: Option<ClockSkew>,
1169        runtime: &impl tor_rtcompat::SleepProvider,
1170    ) {
1171        if let Some(mut pending) = self.pending.remove(&request_id) {
1172            // If there was a pending request matching this RequestId, great!
1173            let guard_id = pending.guard_id();
1174            trace!(?guard_id, ?status, "Received report of guard status");
1175
1176            // First, handle the skew report (if any)
1177            if let Some(skew) = skew {
1178                let now = runtime.now();
1179                let observation = skew::SkewObservation { skew, when: now };
1180
1181                match &guard_id.0 {
1182                    FirstHopIdInner::Guard(_, id) => {
1183                        self.guards.active_guards_mut().record_skew(id, observation);
1184                    }
1185                    FirstHopIdInner::Fallback(id) => {
1186                        self.fallbacks.note_skew(id, observation);
1187                    }
1188                }
1189                // TODO: We call this whenever we receive an observed clock
1190                // skew. That's not the perfect timing for two reasons.  First
1191                // off, it might be too frequent: it does an O(n) calculation,
1192                // which isn't ideal.  Second, it might be too infrequent: after
1193                // an hour has passed, a given observation won't be up-to-date
1194                // any more, and we might want to recalculate the skew
1195                // accordingly.
1196                self.update_skew(now);
1197            }
1198
1199            match (status, &guard_id.0) {
1200                (GuardStatus::Failure, FirstHopIdInner::Fallback(id)) => {
1201                    // We used a fallback, and we weren't able to build a circuit through it.
1202                    self.fallbacks.note_failure(id, runtime.now());
1203                }
1204                (_, FirstHopIdInner::Fallback(_)) => {
1205                    // We don't record any other kind of circuit activity if we
1206                    // took the entry from the fallback list.
1207                }
1208                (GuardStatus::Success, FirstHopIdInner::Guard(sample, id)) => {
1209                    // If we had gone too long without any net activity when we
1210                    // gave out this guard, and now we're seeing a circuit
1211                    // succeed, tell the primary guards that they might be
1212                    // retriable.
1213                    if pending.net_has_been_down() {
1214                        self.maybe_retry_primary_guards(runtime.now());
1215                    }
1216
1217                    // The guard succeeded.  Tell the GuardSet.
1218                    self.guards.guards_mut(sample).record_success(
1219                        id,
1220                        &self.params,
1221                        None,
1222                        runtime.wallclock(),
1223                    );
1224                    // Either tell the request whether the guard is
1225                    // usable, or schedule it as a "waiting" request.
1226                    if let Some(usable) = self.guard_usability_status(&pending, runtime.now()) {
1227                        trace!(?guard_id, usable, "Known usability status");
1228                        pending.reply(usable);
1229                    } else {
1230                        // This is the one case where we can't use the
1231                        // guard yet.
1232                        trace!(?guard_id, "Not able to answer right now");
1233                        pending.mark_waiting(runtime.now());
1234                        self.waiting.push(pending);
1235                    }
1236                }
1237                (GuardStatus::Failure, FirstHopIdInner::Guard(sample, id)) => {
1238                    self.guards
1239                        .guards_mut(sample)
1240                        .record_failure(id, None, runtime.now());
1241                    pending.reply(false);
1242                }
1243                (GuardStatus::AttemptAbandoned, FirstHopIdInner::Guard(sample, id)) => {
1244                    self.guards.guards_mut(sample).record_attempt_abandoned(id);
1245                    pending.reply(false);
1246                }
1247                (GuardStatus::Indeterminate, FirstHopIdInner::Guard(sample, id)) => {
1248                    self.guards
1249                        .guards_mut(sample)
1250                        .record_indeterminate_result(id);
1251                    pending.reply(false);
1252                }
1253            };
1254        } else {
1255            warn!(
1256                "Got a status {:?} for a request {:?} that wasn't pending",
1257                status, request_id
1258            );
1259        }
1260
1261        // We might need to update the primary guards based on changes in the
1262        // status of guards above.
1263        self.guards
1264            .active_guards_mut()
1265            .select_primary_guards(&self.params);
1266
1267        // Some waiting request may just have become ready (usable or
1268        // not); we need to give them the information they're waiting
1269        // for.
1270        self.expire_and_answer_pending_requests(runtime.now());
1271    }
1272
1273    /// Helper to implement `GuardMgr::note_external_success()`.
1274    ///
1275    /// (This has to be a separate function so that we can borrow params while
1276    /// we have `mut self` borrowed.)
1277    fn record_external_success<T>(
1278        &mut self,
1279        identity: &T,
1280        external_activity: ExternalActivity,
1281        now: SystemTime,
1282    ) where
1283        T: tor_linkspec::HasRelayIds + ?Sized,
1284    {
1285        for id in self.lookup_ids(identity) {
1286            match &id.0 {
1287                FirstHopIdInner::Guard(sample, id) => {
1288                    self.guards.guards_mut(sample).record_success(
1289                        id,
1290                        &self.params,
1291                        Some(external_activity),
1292                        now,
1293                    );
1294                }
1295                FirstHopIdInner::Fallback(id) => {
1296                    if external_activity == ExternalActivity::DirCache {
1297                        self.fallbacks.note_success(id);
1298                    }
1299                }
1300            }
1301        }
1302    }
1303
1304    /// Return an iterator over all of the clock skew observations we've made
1305    /// for guards or fallbacks.
1306    fn skew_observations(&self) -> impl Iterator<Item = &skew::SkewObservation> {
1307        self.fallbacks
1308            .skew_observations()
1309            .chain(self.guards.active_guards().skew_observations())
1310    }
1311
1312    /// Recalculate our estimated clock skew, and publish it to anybody who
1313    /// cares.
1314    fn update_skew(&mut self, now: Instant) {
1315        let estimate = skew::SkewEstimate::estimate_skew(self.skew_observations(), now);
1316        // TODO: we might want to do this only conditionally, when the skew
1317        // estimate changes.
1318        *self.send_skew.borrow_mut() = estimate;
1319    }
1320
1321    /// If the circuit built because of a given [`PendingRequest`] may
1322    /// now be used (or discarded), return `Some(true)` or
1323    /// `Some(false)` respectively.
1324    ///
1325    /// Return None if we can't yet give an answer about whether such
1326    /// a circuit is usable.
1327    fn guard_usability_status(&self, pending: &PendingRequest, now: Instant) -> Option<bool> {
1328        match &pending.guard_id().0 {
1329            FirstHopIdInner::Guard(sample, id) => self.guards.guards(sample).circ_usability_status(
1330                id,
1331                pending.usage(),
1332                &self.params,
1333                now,
1334            ),
1335            // Fallback circuits are usable immediately, since we don't have to wait to
1336            // see whether any _other_ circuit succeeds or fails.
1337            FirstHopIdInner::Fallback(_) => Some(true),
1338        }
1339    }
1340
1341    /// For requests that have been "waiting" for an answer for too long,
1342    /// expire them and tell the circuit manager that their circuits
1343    /// are unusable.
1344    fn expire_and_answer_pending_requests(&mut self, now: Instant) {
1345        // A bit ugly: we use a separate Vec here to avoid borrowing issues,
1346        // and put it back when we're done.
1347        let mut waiting = Vec::new();
1348        std::mem::swap(&mut waiting, &mut self.waiting);
1349
1350        waiting.retain_mut(|pending| {
1351            let expired = pending
1352                .waiting_since()
1353                .and_then(|w| now.checked_duration_since(w))
1354                .map(|d| d >= self.params.np_idle_timeout)
1355                == Some(true);
1356            if expired {
1357                trace!(?pending, "Pending request expired");
1358                pending.reply(false);
1359                return false;
1360            }
1361
1362            // TODO-SPEC: guard_usability_status isn't what the spec says.  It
1363            // says instead that we should look at _circuit_ status, saying:
1364            //  "   Definition: In the algorithm above, C2 "blocks" C1 if:
1365            // * C2 obeys all the restrictions that C1 had to obey, AND
1366            // * C2 has higher priority than C1, AND
1367            // * Either C2 is <complete>, or C2 is <waiting_for_better_guard>,
1368            // or C2 has been <usable_if_no_better_guard> for no more than
1369            // {NONPRIMARY_GUARD_CONNECT_TIMEOUT} seconds."
1370            //
1371            // See comments in sample::GuardSet::circ_usability_status.
1372
1373            if let Some(answer) = self.guard_usability_status(pending, now) {
1374                trace!(?pending, answer, "Pending request now ready");
1375                pending.reply(answer);
1376                return false;
1377            }
1378            true
1379        });
1380
1381        // Put the waiting list back.
1382        std::mem::swap(&mut waiting, &mut self.waiting);
1383    }
1384
1385    /// Return every currently extant FirstHopId for a guard or fallback
1386    /// directory matching (or possibly matching) the provided keys.
1387    ///
1388    /// An identity is _possibly matching_ if it contains some of the IDs in the
1389    /// provided identity, and it has no _contradictory_ identities, but it does
1390    /// not necessarily contain _all_ of those identities.
1391    ///
1392    /// # TODO
1393    ///
1394    /// This function should probably not exist; it's only used so that dirmgr
1395    /// can report successes or failures, since by the time it observes them it
1396    /// doesn't know whether its circuit came from a guard or a fallback.  To
1397    /// solve that, we'll need CircMgr to record and report which one it was
1398    /// using, which will take some more plumbing.
1399    ///
1400    /// TODO relay: we will have to make the change above when we implement
1401    /// relays; otherwise, it would be possible for an attacker to exploit it to
1402    /// mislead us about our guard status.
1403    fn lookup_ids<T>(&self, identity: &T) -> Vec<FirstHopId>
1404    where
1405        T: tor_linkspec::HasRelayIds + ?Sized,
1406    {
1407        use strum::IntoEnumIterator;
1408        let mut vec = Vec::with_capacity(2);
1409
1410        let id = ids::GuardId::from_relay_ids(identity);
1411        for sample in GuardSetSelector::iter() {
1412            let guard_id = match self.guards.guards(&sample).contains(&id) {
1413                Ok(true) => &id,
1414                Err(other) => other,
1415                Ok(false) => continue,
1416            };
1417            vec.push(FirstHopId(FirstHopIdInner::Guard(sample, guard_id.clone())));
1418        }
1419
1420        let id = ids::FallbackId::from_relay_ids(identity);
1421        if self.fallbacks.contains(&id) {
1422            vec.push(id.into());
1423        }
1424
1425        vec
1426    }
1427
1428    /// Run any periodic events that update guard status, and return a
1429    /// duration after which periodic events should next be run.
1430    pub(crate) fn run_periodic_events(&mut self, wallclock: SystemTime, now: Instant) -> Duration {
1431        self.update(wallclock, now);
1432        self.expire_and_answer_pending_requests(now);
1433        Duration::from_secs(1) // TODO: Too aggressive.
1434    }
1435
1436    /// Try to select a guard, expanding the sample if the first attempt fails.
1437    fn select_guard_with_expand(
1438        &mut self,
1439        usage: &GuardUsage,
1440        now: Instant,
1441        wallclock: SystemTime,
1442    ) -> Result<(sample::ListKind, FirstHop), PickGuardError> {
1443        // Try to find a guard.
1444        let first_error = match self.select_guard_once(usage, now) {
1445            Ok(res1) => return Ok(res1),
1446            Err(e) => {
1447                trace!("Couldn't select guard on first attempt: {}", e);
1448                e
1449            }
1450        };
1451
1452        // That didn't work. If we have a netdir, expand the sample and try again.
1453        let res = self.with_opt_universe(|this, univ| {
1454            let univ = univ?;
1455            trace!("No guards available, trying to extend the sample.");
1456            // Make sure that the status on all of our guards are accurate, and
1457            // expand the sample if we can.
1458            //
1459            // Our parameters and configuration did not change, so we do not
1460            // need to call update() or update_active_set_and_filter(). This
1461            // call is sufficient to  extend the sample and recompute primary
1462            // guards.
1463            let extended = Self::update_guardset_internal(
1464                &this.params,
1465                wallclock,
1466                this.guards.active_set.universe_type(),
1467                this.guards.active_guards_mut(),
1468                Some(univ),
1469            );
1470            if extended == ExtendedStatus::Yes {
1471                match this.select_guard_once(usage, now) {
1472                    Ok(res) => return Some(res),
1473                    Err(e) => {
1474                        trace!("Couldn't select guard after update: {}", e);
1475                    }
1476                }
1477            }
1478            None
1479        });
1480        if let Some(res) = res {
1481            return Ok(res);
1482        }
1483
1484        // Okay, that didn't work either.  If we were asked for a directory
1485        // guard, and we aren't using bridges, then we may be able to use a
1486        // fallback.
1487        if usage.kind == GuardUsageKind::OneHopDirectory
1488            && self.guards.active_set.universe_type() == UniverseType::NetDir
1489        {
1490            return self.select_fallback(now);
1491        }
1492
1493        // Couldn't extend the sample or use a fallback; return the original error.
1494        Err(first_error)
1495    }
1496
1497    /// Helper: try to pick a single guard, without retrying on failure.
1498    fn select_guard_once(
1499        &self,
1500        usage: &GuardUsage,
1501        now: Instant,
1502    ) -> Result<(sample::ListKind, FirstHop), PickGuardError> {
1503        let active_set = &self.guards.active_set;
1504        #[cfg_attr(not(feature = "bridge-client"), allow(unused_mut))]
1505        let (list_kind, mut first_hop) =
1506            self.guards
1507                .guards(active_set)
1508                .pick_guard(active_set, usage, &self.params, now)?;
1509        #[cfg(feature = "bridge-client")]
1510        if self.guards.active_set.universe_type() == UniverseType::BridgeSet {
1511            // See if we can promote first_hop to a viable CircTarget.
1512            let bridges = self.latest_bridge_set().ok_or_else(|| {
1513                PickGuardError::Internal(internal!(
1514                    "No bridge set available, even though this is the Bridges sample"
1515                ))
1516            })?;
1517            first_hop.lookup_bridge_circ_target(&bridges);
1518
1519            if usage.kind == GuardUsageKind::Data && !first_hop.contains_circ_target() {
1520                return Err(PickGuardError::Internal(internal!(
1521                    "Tried to return a non-circtarget guard with Data usage!"
1522                )));
1523            }
1524        }
1525        Ok((list_kind, first_hop))
1526    }
1527
1528    /// Helper: Select a fallback directory.
1529    ///
1530    /// Called when we have no guard information to use. Return values are as
1531    /// for [`GuardMgr::select_guard()`]
1532    fn select_fallback(
1533        &self,
1534        now: Instant,
1535    ) -> Result<(sample::ListKind, FirstHop), PickGuardError> {
1536        let filt = self.guards.active_guards().filter();
1537
1538        let fallback = self
1539            .fallbacks
1540            .choose(&mut rand::rng(), now, filt)?
1541            .as_guard();
1542        let fallback = filt.modify_hop(fallback)?;
1543        Ok((sample::ListKind::Fallback, fallback))
1544    }
1545}
1546
1547/// A possible outcome of trying to extend a guard sample.
1548#[derive(Copy, Clone, Debug, Eq, PartialEq)]
1549enum ExtendedStatus {
1550    /// The guard sample was extended. (At least one guard was added to it.)
1551    Yes,
1552    /// The guard sample was not extended.
1553    No,
1554}
1555
1556/// A set of parameters, derived from the consensus document, controlling
1557/// the behavior of a guard manager.
1558#[derive(Debug, Clone)]
1559#[cfg_attr(test, derive(PartialEq))]
1560struct GuardParams {
1561    /// How long should a sampled, un-confirmed guard be kept in the sample before it expires?
1562    lifetime_unconfirmed: Duration,
1563    /// How long should a confirmed guard be kept in the sample before
1564    /// it expires?
1565    lifetime_confirmed: Duration,
1566    /// How long may  a guard be unlisted before we remove it from the sample?
1567    lifetime_unlisted: Duration,
1568    /// Largest number of guards we're willing to add to the sample.
1569    max_sample_size: usize,
1570    /// Largest fraction of the network's guard bandwidth that we're
1571    /// willing to add to the sample.
1572    max_sample_bw_fraction: f64,
1573    /// Smallest number of guards that we're willing to have in the
1574    /// sample, after applying a [`GuardFilter`].
1575    min_filtered_sample_size: usize,
1576    /// How many guards are considered "Primary"?
1577    n_primary: usize,
1578    /// When making a regular circuit, how many primary guards should we
1579    /// be willing to try?
1580    data_parallelism: usize,
1581    /// When making a one-hop directory circuit, how many primary
1582    /// guards should we be willing to try?
1583    dir_parallelism: usize,
1584    /// For how long does a pending attempt to connect to a guard
1585    /// block an attempt to use a less-favored non-primary guard?
1586    np_connect_timeout: Duration,
1587    /// How long do we allow a circuit to a successful but unfavored
1588    /// non-primary guard to sit around before deciding not to use it?
1589    np_idle_timeout: Duration,
1590    /// After how much time without successful activity does a
1591    /// successful circuit indicate that we should retry our primary
1592    /// guards?
1593    internet_down_timeout: Duration,
1594    /// What fraction of the guards can be can be filtered out before we
1595    /// decide that our filter is "very restrictive"?
1596    filter_threshold: f64,
1597    /// What fraction of the guards determine that our filter is "very
1598    /// restrictive"?
1599    extreme_threshold: f64,
1600}
1601
1602impl Default for GuardParams {
1603    fn default() -> Self {
1604        let one_day = Duration::from_secs(86400);
1605        GuardParams {
1606            lifetime_unconfirmed: one_day * 120,
1607            lifetime_confirmed: one_day * 60,
1608            lifetime_unlisted: one_day * 20,
1609            max_sample_size: 60,
1610            max_sample_bw_fraction: 0.2,
1611            min_filtered_sample_size: 20,
1612            n_primary: 3,
1613            data_parallelism: 1,
1614            dir_parallelism: 3,
1615            np_connect_timeout: Duration::from_secs(15),
1616            np_idle_timeout: Duration::from_secs(600),
1617            internet_down_timeout: Duration::from_secs(600),
1618            filter_threshold: 0.2,
1619            extreme_threshold: 0.01,
1620        }
1621    }
1622}
1623
1624impl TryFrom<&NetParameters> for GuardParams {
1625    type Error = tor_units::Error;
1626    fn try_from(p: &NetParameters) -> Result<GuardParams, Self::Error> {
1627        Ok(GuardParams {
1628            lifetime_unconfirmed: p.guard_lifetime_unconfirmed.try_into()?,
1629            lifetime_confirmed: p.guard_lifetime_confirmed.try_into()?,
1630            lifetime_unlisted: p.guard_remove_unlisted_after.try_into()?,
1631            max_sample_size: p.guard_max_sample_size.try_into()?,
1632            max_sample_bw_fraction: p.guard_max_sample_threshold.as_fraction(),
1633            min_filtered_sample_size: p.guard_filtered_min_sample_size.try_into()?,
1634            n_primary: p.guard_n_primary.try_into()?,
1635            data_parallelism: p.guard_use_parallelism.try_into()?,
1636            dir_parallelism: p.guard_dir_use_parallelism.try_into()?,
1637            np_connect_timeout: p.guard_nonprimary_connect_timeout.try_into()?,
1638            np_idle_timeout: p.guard_nonprimary_idle_timeout.try_into()?,
1639            internet_down_timeout: p.guard_internet_likely_down.try_into()?,
1640            filter_threshold: p.guard_meaningful_restriction.as_fraction(),
1641            extreme_threshold: p.guard_extreme_restriction.as_fraction(),
1642        })
1643    }
1644}
1645
1646/// Representation of a guard or fallback, as returned by [`GuardMgr::select_guard()`].
1647#[derive(Debug, Clone)]
1648pub struct FirstHop {
1649    /// The sample from which this guard was taken, or `None` if this is a fallback.
1650    sample: Option<GuardSetSelector>,
1651    /// Information about connecting to (or through) this guard.
1652    inner: FirstHopInner,
1653}
1654/// The enumeration inside a FirstHop that holds information about how to
1655/// connect to (and possibly through) a guard or fallback.
1656#[derive(Debug, Clone)]
1657enum FirstHopInner {
1658    /// We have enough information to connect to a guard.
1659    Chan(OwnedChanTarget),
1660    /// We have enough information to connect to a guards _and_ to build
1661    /// multihop circuits through it.
1662    #[cfg_attr(not(feature = "bridge-client"), allow(dead_code))]
1663    Circ(OwnedCircTarget),
1664}
1665
1666impl FirstHop {
1667    /// Return a new [`FirstHopId`] for this `FirstHop`.
1668    fn first_hop_id(&self) -> FirstHopId {
1669        match &self.sample {
1670            Some(sample) => {
1671                let guard_id = GuardId::from_relay_ids(self);
1672                FirstHopId::in_sample(sample.clone(), guard_id)
1673            }
1674            None => {
1675                let fallback_id = crate::ids::FallbackId::from_relay_ids(self);
1676                FirstHopId::from(fallback_id)
1677            }
1678        }
1679    }
1680
1681    /// Look up this guard in `netdir`.
1682    pub fn get_relay<'a>(&self, netdir: &'a NetDir) -> Option<Relay<'a>> {
1683        match &self.sample {
1684            #[cfg(feature = "bridge-client")]
1685            // Always return "None" for anything that isn't in the netdir.
1686            Some(s) if s.universe_type() == UniverseType::BridgeSet => None,
1687            // Otherwise ask the netdir.
1688            _ => netdir.by_ids(self),
1689        }
1690    }
1691
1692    /// Return true if this guard is a bridge.
1693    pub fn is_bridge(&self) -> bool {
1694        match &self.sample {
1695            #[cfg(feature = "bridge-client")]
1696            Some(s) if s.universe_type() == UniverseType::BridgeSet => true,
1697            _ => false,
1698        }
1699    }
1700
1701    /// If possible, return a view of this object that can be used to build a circuit.
1702    pub fn as_circ_target(&self) -> Option<&OwnedCircTarget> {
1703        match &self.inner {
1704            FirstHopInner::Chan(_) => None,
1705            FirstHopInner::Circ(ct) => Some(ct),
1706        }
1707    }
1708
1709    /// Return a view of this as an OwnedChanTarget.
1710    fn chan_target_mut(&mut self) -> &mut OwnedChanTarget {
1711        match &mut self.inner {
1712            FirstHopInner::Chan(ct) => ct,
1713            FirstHopInner::Circ(ct) => ct.chan_target_mut(),
1714        }
1715    }
1716
1717    /// If possible and appropriate, find a circuit target in `bridges` for this
1718    /// `FirstHop`, and make this `FirstHop` a viable circuit target.
1719    ///
1720    /// (By default, any `FirstHop` that a `GuardSet` returns will have enough
1721    /// information to be a `ChanTarget`, but it will be lacking the additional
1722    /// network information in `CircTarget`[^1] necessary for us to build a
1723    /// multi-hop circuit through it.  If this FirstHop is a regular non-bridge
1724    /// `Relay`, then the `CircMgr` will later look up that circuit information
1725    /// itself from the network directory. But if this `FirstHop` *is* a bridge,
1726    /// then we need to find that information in the `BridgeSet`, since the
1727    /// CircMgr does not keep track of the `BridgeSet`.)
1728    ///
1729    /// [^1]: For example, supported protocol versions and ntor keys.
1730    #[cfg(feature = "bridge-client")]
1731    fn lookup_bridge_circ_target(&mut self, bridges: &bridge::BridgeSet) {
1732        use crate::sample::CandidateStatus::Present;
1733        if self.sample.as_ref().map(|s| s.universe_type()) == Some(UniverseType::BridgeSet)
1734            && matches!(self.inner, FirstHopInner::Chan(_))
1735        {
1736            if let Present(bridge_relay) = bridges.bridge_relay_by_guard(self) {
1737                if let Some(circ_target) = bridge_relay.as_relay_with_desc() {
1738                    self.inner =
1739                        FirstHopInner::Circ(OwnedCircTarget::from_circ_target(&circ_target));
1740                }
1741            }
1742        }
1743    }
1744
1745    /// Return true if this `FirstHop` contains circuit target information.
1746    ///
1747    /// This is true if `lookup_bridge_circ_target()` has been called, and it
1748    /// successfully found the circuit target information.
1749    #[cfg(feature = "bridge-client")]
1750    fn contains_circ_target(&self) -> bool {
1751        matches!(self.inner, FirstHopInner::Circ(_))
1752    }
1753}
1754
1755// This is somewhat redundant with the implementations in crate::guard::Guard.
1756impl tor_linkspec::HasAddrs for FirstHop {
1757    fn addrs(&self) -> &[SocketAddr] {
1758        match &self.inner {
1759            FirstHopInner::Chan(ct) => ct.addrs(),
1760            FirstHopInner::Circ(ct) => ct.addrs(),
1761        }
1762    }
1763}
1764impl tor_linkspec::HasRelayIds for FirstHop {
1765    fn identity(
1766        &self,
1767        key_type: tor_linkspec::RelayIdType,
1768    ) -> Option<tor_linkspec::RelayIdRef<'_>> {
1769        match &self.inner {
1770            FirstHopInner::Chan(ct) => ct.identity(key_type),
1771            FirstHopInner::Circ(ct) => ct.identity(key_type),
1772        }
1773    }
1774}
1775impl tor_linkspec::HasChanMethod for FirstHop {
1776    fn chan_method(&self) -> tor_linkspec::ChannelMethod {
1777        match &self.inner {
1778            FirstHopInner::Chan(ct) => ct.chan_method(),
1779            FirstHopInner::Circ(ct) => ct.chan_method(),
1780        }
1781    }
1782}
1783impl tor_linkspec::ChanTarget for FirstHop {}
1784
1785/// The purpose for which we plan to use a guard.
1786///
1787/// This can affect the guard selection algorithm.
1788#[derive(Clone, Debug, Default, Eq, PartialEq)]
1789#[non_exhaustive]
1790pub enum GuardUsageKind {
1791    /// We want to use this guard for a data circuit.
1792    ///
1793    /// (This encompasses everything except the `OneHopDirectory` case.)
1794    #[default]
1795    Data,
1796    /// We want to use this guard for a one-hop, non-anonymous
1797    /// directory request.
1798    ///
1799    /// (Our algorithm allows more parallelism for the guards that we use
1800    /// for these circuits.)
1801    OneHopDirectory,
1802}
1803
1804/// A set of parameters describing how a single guard should be selected.
1805///
1806/// Used as an argument to [`GuardMgr::select_guard`].
1807#[derive(Clone, Debug, derive_builder::Builder)]
1808#[builder(build_fn(error = "tor_config::ConfigBuildError"))]
1809pub struct GuardUsage {
1810    /// The purpose for which this guard will be used.
1811    #[builder(default)]
1812    kind: GuardUsageKind,
1813    /// A list of restrictions on which guard may be used.
1814    ///
1815    /// The default is the empty list.
1816    #[builder(sub_builder, setter(custom))]
1817    restrictions: GuardRestrictionList,
1818}
1819
1820impl_standard_builder! { GuardUsage: !Deserialize }
1821
1822/// List of socket restrictions, as configured
1823pub type GuardRestrictionList = Vec<GuardRestriction>;
1824
1825define_list_builder_helper! {
1826    pub struct GuardRestrictionListBuilder {
1827        restrictions: [GuardRestriction],
1828    }
1829    built: GuardRestrictionList = restrictions;
1830    default = vec![];
1831    item_build: |restriction| Ok(restriction.clone());
1832}
1833
1834define_list_builder_accessors! {
1835    struct GuardUsageBuilder {
1836        pub restrictions: [GuardRestriction],
1837    }
1838}
1839
1840impl GuardUsageBuilder {
1841    /// Create a new empty [`GuardUsageBuilder`].
1842    pub fn new() -> Self {
1843        Self::default()
1844    }
1845}
1846
1847/// A restriction that applies to a single request for a guard.
1848///
1849/// Restrictions differ from filters (see [`GuardFilter`]) in that
1850/// they apply to single requests, not to our entire set of guards.
1851/// They're suitable for things like making sure that we don't start
1852/// and end a circuit at the same relay, or requiring a specific
1853/// subprotocol version for certain kinds of requests.
1854#[derive(Clone, Debug, Serialize, Deserialize)]
1855#[non_exhaustive]
1856pub enum GuardRestriction {
1857    /// Don't pick a guard with the provided identity.
1858    AvoidId(RelayId),
1859    /// Don't pick a guard with any of the provided Ed25519 identities.
1860    AvoidAllIds(RelayIdSet),
1861}
1862
1863/// The kind of vanguards to use.
1864#[derive(Debug, Default, Clone, Copy, Eq, PartialEq, Ord, PartialOrd)] //
1865#[derive(Serialize, Deserialize)] //
1866#[derive(derive_more::Display)] //
1867#[serde(rename_all = "lowercase")]
1868#[cfg(feature = "vanguards")]
1869#[non_exhaustive]
1870pub enum VanguardMode {
1871    /// "Lite" vanguards.
1872    #[default]
1873    #[display("lite")]
1874    Lite = 1,
1875    /// "Full" vanguards.
1876    #[display("full")]
1877    Full = 2,
1878    /// Vanguards are disabled.
1879    #[display("disabled")]
1880    Disabled = 0,
1881}
1882
1883#[cfg(feature = "vanguards")]
1884impl VanguardMode {
1885    /// Build a `VanguardMode` from a [`NetParameters`] parameter.
1886    ///
1887    /// Used for converting [`vanguards_enabled`](NetParameters::vanguards_enabled)
1888    /// or [`vanguards_hs_service`](NetParameters::vanguards_hs_service)
1889    /// to the corresponding `VanguardMode`.
1890    pub(crate) fn from_net_parameter(val: BoundedInt32<0, 2>) -> Self {
1891        match val.get() {
1892            0 => VanguardMode::Disabled,
1893            1 => VanguardMode::Lite,
1894            2 => VanguardMode::Full,
1895            _ => unreachable!("BoundedInt32 was not bounded?!"),
1896        }
1897    }
1898}
1899
1900impl_not_auto_value!(VanguardMode);
1901
1902/// Vanguards configuration.
1903#[derive(Debug, Default, Clone, Eq, PartialEq, derive_builder::Builder)]
1904#[builder(build_fn(error = "ConfigBuildError"))]
1905#[builder(derive(Debug, Serialize, Deserialize))]
1906pub struct VanguardConfig {
1907    /// The kind of vanguards to use.
1908    #[builder_field_attr(serde(default))]
1909    #[builder(default)]
1910    mode: ExplicitOrAuto<VanguardMode>,
1911}
1912
1913impl VanguardConfig {
1914    /// Return the configured [`VanguardMode`].
1915    ///
1916    /// Returns the [`Default`] `VanguardMode`
1917    /// if the mode is [`Auto`](ExplicitOrAuto) or unspecified.
1918    pub fn mode(&self) -> VanguardMode {
1919        match self.mode {
1920            ExplicitOrAuto::Auto => Default::default(),
1921            ExplicitOrAuto::Explicit(mode) => mode,
1922        }
1923    }
1924}
1925
1926/// The kind of vanguards to use.
1927#[derive(Debug, Default, Clone, Copy, Eq, PartialEq, Ord, PartialOrd)] //
1928#[derive(Serialize, Deserialize)] //
1929#[derive(derive_more::Display)] //
1930#[serde(rename_all = "lowercase")]
1931#[cfg(not(feature = "vanguards"))]
1932#[non_exhaustive]
1933pub enum VanguardMode {
1934    /// Vanguards are disabled.
1935    #[default]
1936    #[display("disabled")]
1937    Disabled = 0,
1938}
1939
1940#[cfg(test)]
1941mod test {
1942    // @@ begin test lint list maintained by maint/add_warning @@
1943    #![allow(clippy::bool_assert_comparison)]
1944    #![allow(clippy::clone_on_copy)]
1945    #![allow(clippy::dbg_macro)]
1946    #![allow(clippy::mixed_attributes_style)]
1947    #![allow(clippy::print_stderr)]
1948    #![allow(clippy::print_stdout)]
1949    #![allow(clippy::single_char_pattern)]
1950    #![allow(clippy::unwrap_used)]
1951    #![allow(clippy::unchecked_duration_subtraction)]
1952    #![allow(clippy::useless_vec)]
1953    #![allow(clippy::needless_pass_by_value)]
1954    //! <!-- @@ end test lint list maintained by maint/add_warning @@ -->
1955    use super::*;
1956    use tor_linkspec::{HasAddrs, HasRelayIds};
1957    use tor_persist::TestingStateMgr;
1958    use tor_rtcompat::test_with_all_runtimes;
1959
1960    #[test]
1961    fn guard_param_defaults() {
1962        let p1 = GuardParams::default();
1963        let p2: GuardParams = (&NetParameters::default()).try_into().unwrap();
1964        assert_eq!(p1, p2);
1965    }
1966
1967    fn init<R: Runtime>(rt: R) -> (GuardMgr<R>, TestingStateMgr, NetDir) {
1968        use tor_netdir::{testnet, MdReceiver, PartialNetDir};
1969        let statemgr = TestingStateMgr::new();
1970        let have_lock = statemgr.try_lock().unwrap();
1971        assert!(have_lock.held());
1972        let guardmgr = GuardMgr::new(rt, statemgr.clone(), &TestConfig::default()).unwrap();
1973        let (con, mds) = testnet::construct_network().unwrap();
1974        let param_overrides = vec![
1975            // We make the sample size smaller than usual to compensate for the
1976            // small testing network.  (Otherwise, we'd sample the whole network,
1977            // and not be able to observe guards in the tests.)
1978            "guard-min-filtered-sample-size=5",
1979            // We choose only two primary guards, to make the tests easier to write.
1980            "guard-n-primary-guards=2",
1981            // We define any restriction that allows 75% or fewer of relays as "meaningful",
1982            // so that we can test the "restrictive" guard sample behavior, and to avoid
1983            "guard-meaningful-restriction-percent=75",
1984        ];
1985        let param_overrides: String =
1986            itertools::Itertools::intersperse(param_overrides.into_iter(), " ").collect();
1987        let override_p = param_overrides.parse().unwrap();
1988        let mut netdir = PartialNetDir::new(con, Some(&override_p));
1989        for md in mds {
1990            netdir.add_microdesc(md);
1991        }
1992        let netdir = netdir.unwrap_if_sufficient().unwrap();
1993
1994        (guardmgr, statemgr, netdir)
1995    }
1996
1997    #[test]
1998    #[allow(clippy::clone_on_copy)]
1999    fn simple_case() {
2000        test_with_all_runtimes!(|rt| async move {
2001            let (guardmgr, statemgr, netdir) = init(rt.clone());
2002            let usage = GuardUsage::default();
2003            guardmgr.install_test_netdir(&netdir);
2004
2005            let (id, mon, usable) = guardmgr.select_guard(usage).unwrap();
2006            // Report that the circuit succeeded.
2007            mon.succeeded();
2008
2009            // May we use the circuit?
2010            let usable = usable.await.unwrap();
2011            assert!(usable);
2012
2013            // Save the state...
2014            guardmgr.flush_msg_queue().await;
2015            guardmgr.store_persistent_state().unwrap();
2016            drop(guardmgr);
2017
2018            // Try reloading from the state...
2019            let guardmgr2 =
2020                GuardMgr::new(rt.clone(), statemgr.clone(), &TestConfig::default()).unwrap();
2021            guardmgr2.install_test_netdir(&netdir);
2022
2023            // Since the guard was confirmed, we should get the same one this time!
2024            let usage = GuardUsage::default();
2025            let (id2, _mon, _usable) = guardmgr2.select_guard(usage).unwrap();
2026            assert!(id2.same_relay_ids(&id));
2027        });
2028    }
2029
2030    #[test]
2031    fn simple_waiting() {
2032        // TODO(nickm): This test fails in rare cases; I suspect a
2033        // race condition somewhere.
2034        //
2035        // I've doubled up on the queue flushing in order to try to make the
2036        // race less likely, but we should investigate.
2037        test_with_all_runtimes!(|rt| async move {
2038            let (guardmgr, _statemgr, netdir) = init(rt);
2039            let u = GuardUsage::default();
2040            guardmgr.install_test_netdir(&netdir);
2041
2042            // We'll have the first two guard fail, which should make us
2043            // try a non-primary guard.
2044            let (id1, mon, _usable) = guardmgr.select_guard(u.clone()).unwrap();
2045            mon.failed();
2046            guardmgr.flush_msg_queue().await; // avoid race
2047            guardmgr.flush_msg_queue().await; // avoid race
2048            let (id2, mon, _usable) = guardmgr.select_guard(u.clone()).unwrap();
2049            mon.failed();
2050            guardmgr.flush_msg_queue().await; // avoid race
2051            guardmgr.flush_msg_queue().await; // avoid race
2052
2053            assert!(!id1.same_relay_ids(&id2));
2054
2055            // Now we should get two sampled guards. They should be different.
2056            let (id3, mon3, usable3) = guardmgr.select_guard(u.clone()).unwrap();
2057            let (id4, mon4, usable4) = guardmgr.select_guard(u.clone()).unwrap();
2058            assert!(!id3.same_relay_ids(&id4));
2059
2060            let (u3, u4) = futures::join!(
2061                async {
2062                    mon3.failed();
2063                    guardmgr.flush_msg_queue().await; // avoid race
2064                    usable3.await.unwrap()
2065                },
2066                async {
2067                    mon4.succeeded();
2068                    usable4.await.unwrap()
2069                }
2070            );
2071
2072            assert_eq!((u3, u4), (false, true));
2073        });
2074    }
2075
2076    #[test]
2077    fn filtering_basics() {
2078        test_with_all_runtimes!(|rt| async move {
2079            let (guardmgr, _statemgr, netdir) = init(rt);
2080            let u = GuardUsage::default();
2081            let filter = {
2082                let mut f = GuardFilter::default();
2083                // All the addresses in the test network are {0,1,2,3,4}.0.0.3:9001.
2084                // Limit to only 2.0.0.0/8
2085                f.push_reachable_addresses(vec!["2.0.0.0/8:9001".parse().unwrap()]);
2086                f
2087            };
2088            guardmgr.set_filter(filter);
2089            guardmgr.install_test_netdir(&netdir);
2090            let (guard, _mon, _usable) = guardmgr.select_guard(u).unwrap();
2091            // Make sure that the filter worked.
2092            let addr = guard.addrs()[0];
2093            assert_eq!(addr, "2.0.0.3:9001".parse().unwrap());
2094        });
2095    }
2096
2097    #[test]
2098    fn external_status() {
2099        test_with_all_runtimes!(|rt| async move {
2100            let (guardmgr, _statemgr, netdir) = init(rt);
2101            let data_usage = GuardUsage::default();
2102            let dir_usage = GuardUsageBuilder::new()
2103                .kind(GuardUsageKind::OneHopDirectory)
2104                .build()
2105                .unwrap();
2106            guardmgr.install_test_netdir(&netdir);
2107            {
2108                // Override this parameter, so that we can get deterministic results below.
2109                let mut inner = guardmgr.inner.lock().unwrap();
2110                inner.params.dir_parallelism = 1;
2111            }
2112
2113            let (guard, mon, _usable) = guardmgr.select_guard(data_usage.clone()).unwrap();
2114            mon.succeeded();
2115
2116            // Record that this guard gave us a bad directory object.
2117            guardmgr.note_external_failure(&guard, ExternalActivity::DirCache);
2118
2119            // We ask for another guard, for data usage.  We should get the same
2120            // one as last time, since the director failure doesn't mean this
2121            // guard is useless as a primary guard.
2122            let (g2, mon, _usable) = guardmgr.select_guard(data_usage).unwrap();
2123            assert_eq!(g2.ed_identity(), guard.ed_identity());
2124            mon.succeeded();
2125
2126            // But if we ask for a guard for directory usage, we should get a
2127            // different one, since the last guard we gave out failed.
2128            let (g3, mon, _usable) = guardmgr.select_guard(dir_usage.clone()).unwrap();
2129            assert_ne!(g3.ed_identity(), guard.ed_identity());
2130            mon.succeeded();
2131
2132            // Now record a success for for directory usage.
2133            guardmgr.note_external_success(&guard, ExternalActivity::DirCache);
2134
2135            // Now that the guard is working as a cache, asking for it should get us the same guard.
2136            let (g4, _mon, _usable) = guardmgr.select_guard(dir_usage).unwrap();
2137            assert_eq!(g4.ed_identity(), guard.ed_identity());
2138        });
2139    }
2140
2141    #[cfg(feature = "vanguards")]
2142    #[test]
2143    fn vanguard_mode_ord() {
2144        assert!(VanguardMode::Disabled < VanguardMode::Lite);
2145        assert!(VanguardMode::Disabled < VanguardMode::Full);
2146        assert!(VanguardMode::Lite < VanguardMode::Full);
2147    }
2148}