tor_guardmgr/
lib.rs

1#![cfg_attr(docsrs, feature(doc_cfg))]
2#![doc = include_str!("../README.md")]
3// @@ begin lint list maintained by maint/add_warning @@
4#![allow(renamed_and_removed_lints)] // @@REMOVE_WHEN(ci_arti_stable)
5#![allow(unknown_lints)] // @@REMOVE_WHEN(ci_arti_nightly)
6#![warn(missing_docs)]
7#![warn(noop_method_call)]
8#![warn(unreachable_pub)]
9#![warn(clippy::all)]
10#![deny(clippy::await_holding_lock)]
11#![deny(clippy::cargo_common_metadata)]
12#![deny(clippy::cast_lossless)]
13#![deny(clippy::checked_conversions)]
14#![warn(clippy::cognitive_complexity)]
15#![deny(clippy::debug_assert_with_mut_call)]
16#![deny(clippy::exhaustive_enums)]
17#![deny(clippy::exhaustive_structs)]
18#![deny(clippy::expl_impl_clone_on_copy)]
19#![deny(clippy::fallible_impl_from)]
20#![deny(clippy::implicit_clone)]
21#![deny(clippy::large_stack_arrays)]
22#![warn(clippy::manual_ok_or)]
23#![deny(clippy::missing_docs_in_private_items)]
24#![warn(clippy::needless_borrow)]
25#![warn(clippy::needless_pass_by_value)]
26#![warn(clippy::option_option)]
27#![deny(clippy::print_stderr)]
28#![deny(clippy::print_stdout)]
29#![warn(clippy::rc_buffer)]
30#![deny(clippy::ref_option_ref)]
31#![warn(clippy::semicolon_if_nothing_returned)]
32#![warn(clippy::trait_duplication_in_bounds)]
33#![deny(clippy::unchecked_duration_subtraction)]
34#![deny(clippy::unnecessary_wraps)]
35#![warn(clippy::unseparated_literal_suffix)]
36#![deny(clippy::unwrap_used)]
37#![deny(clippy::mod_module_files)]
38#![allow(clippy::let_unit_value)] // This can reasonably be done for explicitness
39#![allow(clippy::uninlined_format_args)]
40#![allow(clippy::significant_drop_in_scrutinee)] // arti/-/merge_requests/588/#note_2812945
41#![allow(clippy::result_large_err)] // temporary workaround for arti#587
42#![allow(clippy::needless_raw_string_hashes)] // complained-about code is fine, often best
43#![allow(clippy::needless_lifetimes)] // See arti#1765
44#![allow(mismatched_lifetime_syntaxes)] // temporary workaround for arti#2060
45//! <!-- @@ end lint list maintained by maint/add_warning @@ -->
46
47// TODO #1645 (either remove this, or decide to have it everywhere)
48#![cfg_attr(not(all(feature = "full", feature = "experimental")), allow(unused))]
49
50// Glossary:
51//     Primary guard
52//     Sample
53//     confirmed
54//     filtered
55
56use futures::channel::mpsc;
57use futures::task::SpawnExt;
58use itertools::Either;
59use serde::{Deserialize, Serialize};
60use std::collections::HashMap;
61use std::net::SocketAddr;
62use std::sync::{Arc, Mutex, Weak};
63use std::time::{Duration, Instant, SystemTime};
64#[cfg(feature = "bridge-client")]
65use tor_error::internal;
66use tor_linkspec::{OwnedChanTarget, OwnedCircTarget, RelayId, RelayIdSet};
67use tor_netdir::NetDirProvider;
68use tor_proto::ClockSkew;
69use tor_units::BoundedInt32;
70use tracing::{debug, info, instrument, trace, warn};
71
72use tor_config::{ExplicitOrAuto, impl_standard_builder};
73use tor_config::{ReconfigureError, impl_not_auto_value};
74use tor_config::{define_list_builder_accessors, define_list_builder_helper};
75use tor_netdir::{NetDir, Relay, params::NetParameters};
76use tor_persist::{DynStorageHandle, StateMgr};
77use tor_rtcompat::Runtime;
78
79#[cfg(feature = "bridge-client")]
80pub mod bridge;
81mod config;
82mod daemon;
83mod dirstatus;
84mod err;
85mod events;
86pub mod fallback;
87mod filter;
88mod guard;
89mod ids;
90mod pending;
91mod sample;
92mod skew;
93mod util;
94#[cfg(feature = "vanguards")]
95pub mod vanguards;
96
97#[cfg(not(feature = "bridge-client"))]
98#[path = "bridge_disabled.rs"]
99pub mod bridge;
100
101#[cfg(any(test, feature = "testing"))]
102pub use config::testing::TestConfig;
103
104#[cfg(test)]
105use oneshot_fused_workaround as oneshot;
106
107pub use config::GuardMgrConfig;
108pub use err::{GuardMgrConfigError, GuardMgrError, PickGuardError};
109pub use events::ClockSkewEvents;
110pub use filter::GuardFilter;
111pub use ids::FirstHopId;
112pub use pending::{GuardMonitor, GuardStatus, GuardUsable};
113pub use skew::SkewEstimate;
114
115#[cfg(feature = "vanguards")]
116#[cfg_attr(docsrs, doc(cfg(feature = "vanguards")))]
117pub use vanguards::VanguardMgrError;
118
119use pending::{PendingRequest, RequestId};
120use sample::{GuardSet, Universe, UniverseRef};
121
122use crate::ids::{FirstHopIdInner, GuardId};
123
124use tor_config::ConfigBuildError;
125
126/// A "guard manager" that selects and remembers a persistent set of
127/// guard nodes.
128///
129/// This is a "handle"; clones of it share state.
130#[derive(Clone)]
131pub struct GuardMgr<R: Runtime> {
132    /// An asynchronous runtime object.
133    ///
134    /// GuardMgr uses this runtime for timing, timeouts, and spawning
135    /// tasks.
136    runtime: R,
137
138    /// Internal state for the guard manager.
139    inner: Arc<Mutex<GuardMgrInner>>,
140}
141
142/// Helper type that holds the data used by a [`GuardMgr`].
143///
144/// This would just be a [`GuardMgr`], except that it needs to sit inside
145/// a `Mutex` and get accessed by daemon tasks.
146struct GuardMgrInner {
147    /// Last time when marked all of our primary guards as retriable.
148    ///
149    /// We keep track of this time so that we can rate-limit
150    /// these attempts.
151    last_primary_retry_time: Instant,
152
153    /// Persistent guard manager state.
154    ///
155    /// This object remembers one or more persistent set of guards that we can
156    /// use, along with their relative priorities and statuses.
157    guards: GuardSets,
158
159    /// The current filter that we're using to decide which guards are
160    /// supported.
161    //
162    // TODO: This field is duplicated in the current active [`GuardSet`]; we
163    // should fix that.
164    filter: GuardFilter,
165
166    /// Configuration values derived from the consensus parameters.
167    ///
168    /// This is updated whenever the consensus parameters change.
169    params: GuardParams,
170
171    /// A mpsc channel, used to tell the task running in
172    /// [`daemon::report_status_events`] about a new event to monitor.
173    ///
174    /// This uses an `UnboundedSender` so that we don't have to await
175    /// while sending the message, which in turn allows the GuardMgr
176    /// API to be simpler.  The risk, however, is that there's no
177    /// backpressure in the event that the task running
178    /// [`daemon::report_status_events`] fails to read from this
179    /// channel.
180    ctrl: mpsc::UnboundedSender<daemon::Msg>,
181
182    /// Information about guards that we've given out, but where we have
183    /// not yet heard whether the guard was successful.
184    ///
185    /// Upon leaning whether the guard was successful, the pending
186    /// requests in this map may be either moved to `waiting`, or
187    /// discarded.
188    ///
189    /// There can be multiple pending requests corresponding to the
190    /// same guard.
191    pending: HashMap<RequestId, PendingRequest>,
192
193    /// A list of pending requests for which we have heard that the
194    /// guard was successful, but we have not yet decided whether the
195    /// circuit may be used.
196    ///
197    /// There can be multiple waiting requests corresponding to the
198    /// same guard.
199    waiting: Vec<PendingRequest>,
200
201    /// A list of fallback directories used to access the directory system
202    /// when no other directory information is yet known.
203    fallbacks: fallback::FallbackState,
204
205    /// Location in which to store persistent state.
206    storage: DynStorageHandle<GuardSets>,
207
208    /// A sender object to publish changes in our estimated clock skew.
209    send_skew: postage::watch::Sender<Option<SkewEstimate>>,
210
211    /// A receiver object to hand out to observers who want to know about
212    /// changes in our estimated clock skew.
213    recv_skew: events::ClockSkewEvents,
214
215    /// A netdir provider that we can use for adding new guards when
216    /// insufficient guards are available.
217    ///
218    /// This has to be an Option so it can be initialized from None: at the
219    /// time a GuardMgr is created, there is no NetDirProvider for it to use.
220    netdir_provider: Option<Weak<dyn NetDirProvider>>,
221
222    /// A netdir provider that we can use for discovering bridge descriptors.
223    ///
224    /// This has to be an Option so it can be initialized from None: at the time
225    /// a GuardMgr is created, there is no BridgeDescProvider for it to use.
226    #[cfg(feature = "bridge-client")]
227    bridge_desc_provider: Option<Weak<dyn bridge::BridgeDescProvider>>,
228
229    /// A list of the bridges that we are configured to use, or "None" if we are
230    /// not configured to use bridges.
231    #[cfg(feature = "bridge-client")]
232    configured_bridges: Option<Arc<[bridge::BridgeConfig]>>,
233}
234
235/// A selector that tells us which [`GuardSet`] of several is currently in use.
236#[derive(Clone, Debug, Default, Eq, PartialEq, Ord, PartialOrd, strum::EnumIter)]
237enum GuardSetSelector {
238    /// The default guard set is currently in use: that's the one that we use
239    /// when we have no filter installed, or the filter permits most of the
240    /// guards on the network.
241    #[default]
242    Default,
243    /// A "restrictive" guard set is currently in use: that's the one that we
244    /// use when we have a filter that excludes a large fraction of the guards
245    /// on the network.
246    Restricted,
247    /// The "bridges" guard set is currently in use: we are selecting our guards
248    /// from among the universe of configured bridges.
249    #[cfg(feature = "bridge-client")]
250    Bridges,
251}
252
253/// Describes the [`Universe`] that a guard sample should take its guards from.
254#[derive(Clone, Copy, Debug, Eq, PartialEq)]
255enum UniverseType {
256    /// Take information from the network directory.
257    NetDir,
258    /// Take information from the configured bridges.
259    #[cfg(feature = "bridge-client")]
260    BridgeSet,
261}
262
263impl GuardSetSelector {
264    /// Return a description of which [`Universe`] this guard sample should take
265    /// its guards from.
266    fn universe_type(&self) -> UniverseType {
267        match self {
268            GuardSetSelector::Default | GuardSetSelector::Restricted => UniverseType::NetDir,
269            #[cfg(feature = "bridge-client")]
270            GuardSetSelector::Bridges => UniverseType::BridgeSet,
271        }
272    }
273}
274
275/// Persistent state for a guard manager, as serialized to disk.
276#[derive(Debug, Clone, Default, Serialize, Deserialize)]
277struct GuardSets {
278    /// Which set of guards is currently in use?
279    #[serde(skip)]
280    active_set: GuardSetSelector,
281
282    /// The default set of guards to use.
283    ///
284    /// We use this one when there is no filter, or the filter permits most of the
285    /// guards on the network.
286    default: GuardSet,
287
288    /// A guard set to use when we have a restrictive filter.
289    #[serde(default)]
290    restricted: GuardSet,
291
292    /// A guard set sampled from our configured bridges.
293    #[serde(default)]
294    #[cfg(feature = "bridge-client")]
295    bridges: GuardSet,
296
297    /// Unrecognized fields, including (possibly) other guard sets.
298    #[serde(flatten)]
299    remaining: HashMap<String, tor_persist::JsonValue>,
300}
301
302/// The key (filename) we use for storing our persistent guard state in the
303/// `StateMgr`.
304///
305/// We used to store this in a different format in a filename called
306/// "default_guards" (before Arti 0.1.0).
307const STORAGE_KEY: &str = "guards";
308
309/// A description of which circuits to retire because of a configuration change.
310///
311/// TODO(nickm): Eventually we will want to add a "Some" here, to support
312/// removing only those circuits that correspond to no-longer-usable guards.
313#[derive(Clone, Debug, Eq, PartialEq)]
314#[must_use]
315#[non_exhaustive]
316pub enum RetireCircuits {
317    /// There's no need to retire any circuits.
318    None,
319    /// All circuits should be retired.
320    All,
321}
322
323impl<R: Runtime> GuardMgr<R> {
324    /// Create a new "empty" guard manager and launch its background tasks.
325    ///
326    /// It won't be able to hand out any guards until a [`NetDirProvider`] has
327    /// been installed.
328    pub fn new<S>(
329        runtime: R,
330        state_mgr: S,
331        config: &impl GuardMgrConfig,
332    ) -> Result<Self, GuardMgrError>
333    where
334        S: StateMgr + Send + Sync + 'static,
335    {
336        let (ctrl, rcv) = mpsc::unbounded();
337        let storage: DynStorageHandle<GuardSets> = state_mgr.create_handle(STORAGE_KEY);
338        // TODO(nickm): We should do something about the old state in
339        // `default_guards`.  Probably it would be best to delete it.  We could
340        // try to migrate it instead, but that's beyond the stability guarantee
341        // that we're getting at this stage of our (pre-0.1) development.
342        let state = storage.load()?.unwrap_or_default();
343
344        let (send_skew, recv_skew) = postage::watch::channel();
345        let recv_skew = ClockSkewEvents { inner: recv_skew };
346
347        let inner = Arc::new(Mutex::new(GuardMgrInner {
348            guards: state,
349            filter: GuardFilter::unfiltered(),
350            last_primary_retry_time: runtime.now(),
351            params: GuardParams::default(),
352            ctrl,
353            pending: HashMap::new(),
354            waiting: Vec::new(),
355            fallbacks: config.fallbacks().into(),
356            storage,
357            send_skew,
358            recv_skew,
359            netdir_provider: None,
360            #[cfg(feature = "bridge-client")]
361            bridge_desc_provider: None,
362            #[cfg(feature = "bridge-client")]
363            configured_bridges: None,
364        }));
365        #[cfg(feature = "bridge-client")]
366        {
367            let mut inner = inner.lock().expect("lock poisoned");
368            // TODO(nickm): This calls `GuardMgrInner::update`. Will we mind doing so before any
369            // providers are configured? I think not, but we should make sure.
370            let _: RetireCircuits =
371                inner.replace_bridge_config(config, runtime.wallclock(), runtime.now())?;
372        }
373        {
374            let weak_inner = Arc::downgrade(&inner);
375            let rt_clone = runtime.clone();
376            runtime
377                .spawn(daemon::report_status_events(rt_clone, weak_inner, rcv))
378                .map_err(|e| GuardMgrError::from_spawn("guard status event reporter", e))?;
379        }
380        {
381            let rt_clone = runtime.clone();
382            let weak_inner = Arc::downgrade(&inner);
383            runtime
384                .spawn(daemon::run_periodic(rt_clone, weak_inner))
385                .map_err(|e| GuardMgrError::from_spawn("periodic guard updater", e))?;
386        }
387        Ok(GuardMgr { runtime, inner })
388    }
389
390    /// Install a [`NetDirProvider`] for use by this guard manager.
391    ///
392    /// It will be used to keep the guards up-to-date with changes from the
393    /// network directory, and to find new guards when no NetDir is provided to
394    /// select_guard().
395    ///
396    /// TODO: we should eventually return some kind of a task handle from this
397    /// task, even though it is not strictly speaking periodic.
398    ///
399    /// The guardmgr retains only a `Weak` reference to `provider`,
400    /// `install_netdir_provider` downgrades it on entry,
401    // TODO add ref to document when https://gitlab.torproject.org/tpo/core/arti/-/issues/624
402    // is fixed.  Also, maybe take an owned `Weak` to start with.
403    //
404    /// # Panics
405    ///
406    /// Panics if a [`NetDirProvider`] is already installed.
407    pub fn install_netdir_provider(
408        &self,
409        provider: &Arc<dyn NetDirProvider>,
410    ) -> Result<(), GuardMgrError> {
411        let weak_provider = Arc::downgrade(provider);
412        {
413            let mut inner = self.inner.lock().expect("Poisoned lock");
414            assert!(inner.netdir_provider.is_none());
415            inner.netdir_provider = Some(weak_provider.clone());
416        }
417        let weak_inner = Arc::downgrade(&self.inner);
418        let rt_clone = self.runtime.clone();
419        self.runtime
420            .spawn(daemon::keep_netdir_updated(
421                rt_clone,
422                weak_inner,
423                weak_provider,
424            ))
425            .map_err(|e| GuardMgrError::from_spawn("periodic guard netdir updater", e))?;
426        Ok(())
427    }
428
429    /// Configure a new [`bridge::BridgeDescProvider`] for this [`GuardMgr`].
430    ///
431    /// It will be used to learn about changes in the set of available bridge
432    /// descriptors; we'll inform it whenever our desired set of bridge
433    /// descriptors changes.
434    ///
435    /// TODO: Same todo as in `install_netdir_provider` about task handles.
436    ///
437    /// # Panics
438    ///
439    /// Panics if a [`bridge::BridgeDescProvider`] is already installed.
440    #[cfg(feature = "bridge-client")]
441    pub fn install_bridge_desc_provider(
442        &self,
443        provider: &Arc<dyn bridge::BridgeDescProvider>,
444    ) -> Result<(), GuardMgrError> {
445        let weak_provider = Arc::downgrade(provider);
446        {
447            let mut inner = self.inner.lock().expect("Poisoned lock");
448            assert!(inner.bridge_desc_provider.is_none());
449            inner.bridge_desc_provider = Some(weak_provider.clone());
450        }
451
452        let weak_inner = Arc::downgrade(&self.inner);
453        let rt_clone = self.runtime.clone();
454        self.runtime
455            .spawn(daemon::keep_bridge_descs_updated(
456                rt_clone,
457                weak_inner,
458                weak_provider,
459            ))
460            .map_err(|e| GuardMgrError::from_spawn("periodic guard netdir updater", e))?;
461
462        Ok(())
463    }
464
465    /// Flush our current guard state to the state manager, if there
466    /// is any unsaved state.
467    pub fn store_persistent_state(&self) -> Result<(), GuardMgrError> {
468        let inner = self.inner.lock().expect("Poisoned lock");
469        trace!("Flushing guard state to disk.");
470        inner.storage.store(&inner.guards)?;
471        Ok(())
472    }
473
474    /// Reload state from the state manager.
475    ///
476    /// We only call this method if we _don't_ have the lock on the state
477    /// files.  If we have the lock, we only want to save.
478    pub fn reload_persistent_state(&self) -> Result<(), GuardMgrError> {
479        let mut inner = self.inner.lock().expect("Poisoned lock");
480        if let Some(new_guards) = inner.storage.load()? {
481            inner.replace_guards_with(new_guards, self.runtime.wallclock(), self.runtime.now());
482        }
483        Ok(())
484    }
485
486    /// Switch from having an unowned persistent state to having an owned one.
487    ///
488    /// Requires that we hold the lock on the state files.
489    pub fn upgrade_to_owned_persistent_state(&self) -> Result<(), GuardMgrError> {
490        let mut inner = self.inner.lock().expect("Poisoned lock");
491        debug_assert!(inner.storage.can_store());
492        let new_guards = inner.storage.load()?.unwrap_or_default();
493        let wallclock = self.runtime.wallclock();
494        let now = self.runtime.now();
495        inner.replace_guards_with(new_guards, wallclock, now);
496        Ok(())
497    }
498
499    /// Return true if `netdir` has enough information to safely become our new netdir.
500    pub fn netdir_is_sufficient(&self, netdir: &NetDir) -> bool {
501        let mut inner = self.inner.lock().expect("Poisoned lock");
502        if inner.guards.active_set.universe_type() != UniverseType::NetDir {
503            // If we aren't using the netdir, this isn't something we want to look at.
504            return true;
505        }
506        inner
507            .guards
508            .active_guards_mut()
509            .n_primary_without_id_info_in(netdir)
510            == 0
511    }
512
513    /// Mark every guard as potentially retriable, regardless of how recently we
514    /// failed to connect to it.
515    pub fn mark_all_guards_retriable(&self) {
516        let mut inner = self.inner.lock().expect("Poisoned lock");
517        inner.guards.active_guards_mut().mark_all_guards_retriable();
518    }
519
520    /// Configure this guardmgr to use a fixed [`NetDir`] instead of a provider.
521    ///
522    /// This function is for testing only, and is exclusive with
523    /// `install_netdir_provider`.
524    ///
525    /// # Panics
526    ///
527    /// Panics if any [`NetDirProvider`] has already been installed.
528    #[cfg(any(test, feature = "testing"))]
529    pub fn install_test_netdir(&self, netdir: &NetDir) {
530        use tor_netdir::testprovider::TestNetDirProvider;
531        let wallclock = self.runtime.wallclock();
532        let now = self.runtime.now();
533        let netdir_provider: Arc<dyn NetDirProvider> =
534            Arc::new(TestNetDirProvider::from(netdir.clone()));
535        self.install_netdir_provider(&netdir_provider)
536            .expect("Couldn't install testing network provider");
537
538        let mut inner = self.inner.lock().expect("Poisoned lock");
539        inner.update(wallclock, now);
540    }
541
542    /// Replace the configuration in this `GuardMgr` with `config`.
543    pub fn reconfigure(
544        &self,
545        config: &impl GuardMgrConfig,
546    ) -> Result<RetireCircuits, ReconfigureError> {
547        let mut inner = self.inner.lock().expect("Poisoned lock");
548        // Change the set of configured fallbacks.
549        {
550            let mut fallbacks: fallback::FallbackState = config.fallbacks().into();
551            std::mem::swap(&mut inner.fallbacks, &mut fallbacks);
552            inner.fallbacks.take_status_from(fallbacks);
553        }
554        // If we are built to use bridges, change the bridge configuration.
555        #[cfg(feature = "bridge-client")]
556        {
557            let wallclock = self.runtime.wallclock();
558            let now = self.runtime.now();
559            Ok(inner.replace_bridge_config(config, wallclock, now)?)
560        }
561        // If we are built to use bridges, change the bridge configuration.
562        #[cfg(not(feature = "bridge-client"))]
563        {
564            Ok(RetireCircuits::None)
565        }
566    }
567
568    /// Replace the current [`GuardFilter`] used by this `GuardMgr`.
569    // TODO should this be part of the config?
570    pub fn set_filter(&self, filter: GuardFilter) {
571        let wallclock = self.runtime.wallclock();
572        let now = self.runtime.now();
573        let mut inner = self.inner.lock().expect("Poisoned lock");
574        inner.set_filter(filter, wallclock, now);
575    }
576
577    /// Select a guard for a given [`GuardUsage`].
578    ///
579    /// On success, we return a [`FirstHop`] object to identify which
580    /// guard we have picked, a [`GuardMonitor`] object that the
581    /// caller can use to report whether its attempt to use the guard
582    /// succeeded or failed, and a [`GuardUsable`] future that the
583    /// caller can use to decide whether a circuit built through the
584    /// guard is actually safe to use.
585    ///
586    /// That last point is important: It's okay to build a circuit
587    /// through the guard returned by this function, but you can't
588    /// actually use it for traffic unless the [`GuardUsable`] future
589    /// yields "true".
590    pub fn select_guard(
591        &self,
592        usage: GuardUsage,
593    ) -> Result<(FirstHop, GuardMonitor, GuardUsable), PickGuardError> {
594        let now = self.runtime.now();
595        let wallclock = self.runtime.wallclock();
596
597        let mut inner = self.inner.lock().expect("Poisoned lock");
598
599        // (I am not 100% sure that we need to consider_all_retries here, but
600        // it should _probably_ not hurt.)
601        inner.guards.active_guards_mut().consider_all_retries(now);
602
603        let (origin, guard) = inner.select_guard_with_expand(&usage, now, wallclock)?;
604        trace!(?guard, ?usage, "Guard selected");
605
606        let (usable, usable_sender) = if origin.usable_immediately() {
607            (GuardUsable::new_usable_immediately(), None)
608        } else {
609            let (u, snd) = GuardUsable::new_uncertain();
610            (u, Some(snd))
611        };
612        let request_id = pending::RequestId::next();
613        let ctrl = inner.ctrl.clone();
614        let monitor = GuardMonitor::new(request_id, ctrl);
615
616        // Note that the network can be down even if all the primary guards
617        // are not yet marked as unreachable.  But according to guard-spec we
618        // don't want to acknowledge the net as down before that point, since
619        // we don't mark all the primary guards as retriable unless
620        // we've been forced to non-primary guards.
621        let net_has_been_down =
622            if let Some(duration) = tor_proto::time_since_last_incoming_traffic() {
623                inner
624                    .guards
625                    .active_guards_mut()
626                    .all_primary_guards_are_unreachable()
627                    && duration >= inner.params.internet_down_timeout
628            } else {
629                // TODO: Is this the correct behavior in this case?
630                false
631            };
632
633        let pending_request = pending::PendingRequest::new(
634            guard.first_hop_id(),
635            usage,
636            usable_sender,
637            net_has_been_down,
638        );
639        inner.pending.insert(request_id, pending_request);
640
641        match &guard.sample {
642            Some(sample) => {
643                let guard_id = GuardId::from_relay_ids(&guard);
644                inner
645                    .guards
646                    .guards_mut(sample)
647                    .record_attempt(&guard_id, now);
648            }
649            None => {
650                // We don't record attempts for fallbacks; we only care when
651                // they have failed.
652            }
653        }
654
655        Ok((guard, monitor, usable))
656    }
657
658    /// Record that _after_ we built a circuit with a guard, something described
659    /// in `external_failure` went wrong with it.
660    pub fn note_external_failure<T>(&self, identity: &T, external_failure: ExternalActivity)
661    where
662        T: tor_linkspec::HasRelayIds + ?Sized,
663    {
664        let now = self.runtime.now();
665        let mut inner = self.inner.lock().expect("Poisoned lock");
666        let ids = inner.lookup_ids(identity);
667        for id in ids {
668            match &id.0 {
669                FirstHopIdInner::Guard(sample, id) => {
670                    inner
671                        .guards
672                        .guards_mut(sample)
673                        .record_failure(id, Some(external_failure), now);
674                }
675                FirstHopIdInner::Fallback(id) => {
676                    if external_failure == ExternalActivity::DirCache {
677                        inner.fallbacks.note_failure(id, now);
678                    }
679                }
680            }
681        }
682    }
683
684    /// Record that _after_ we built a circuit with a guard, some activity
685    /// described in `external_activity` was successful with it.
686    pub fn note_external_success<T>(&self, identity: &T, external_activity: ExternalActivity)
687    where
688        T: tor_linkspec::HasRelayIds + ?Sized,
689    {
690        let mut inner = self.inner.lock().expect("Poisoned lock");
691
692        inner.record_external_success(identity, external_activity, self.runtime.wallclock());
693    }
694
695    /// Return a stream of events about our estimated clock skew; these events
696    /// are `None` when we don't have enough information to make an estimate,
697    /// and `Some(`[`SkewEstimate`]`)` otherwise.
698    ///
699    /// Note that this stream can be lossy: if the estimate changes more than
700    /// one before you read from the stream, you might only get the most recent
701    /// update.
702    pub fn skew_events(&self) -> ClockSkewEvents {
703        let inner = self.inner.lock().expect("Poisoned lock");
704        inner.recv_skew.clone()
705    }
706
707    /// Ensure that the message queue is flushed before proceeding to
708    /// the next step.  Used for testing.
709    #[cfg(test)]
710    async fn flush_msg_queue(&self) {
711        let (snd, rcv) = oneshot::channel();
712        let pingmsg = daemon::Msg::Ping(snd);
713        {
714            let inner = self.inner.lock().expect("Poisoned lock");
715            inner
716                .ctrl
717                .unbounded_send(pingmsg)
718                .expect("Guard observer task exited prematurely.");
719        }
720        let _ = rcv.await;
721    }
722}
723
724/// An activity that can succeed or fail, and whose success or failure can be
725/// attributed to a guard.
726#[derive(Copy, Clone, Debug, Eq, PartialEq)]
727#[non_exhaustive]
728pub enum ExternalActivity {
729    /// The activity of using the guard as a directory cache.
730    DirCache,
731}
732
733impl GuardSets {
734    /// Return a reference to the currently active set of guards.
735    ///
736    /// (That's easy enough for now, since there is never more than one set of
737    /// guards.  But eventually that will change, as we add support for more
738    /// complex filter types, and for bridge relays. Those will use separate
739    /// `GuardSet` instances, and this accessor will choose the right one.)
740    fn active_guards(&self) -> &GuardSet {
741        self.guards(&self.active_set)
742    }
743
744    /// Return the set of guards corresponding to the provided selector.
745    fn guards(&self, selector: &GuardSetSelector) -> &GuardSet {
746        match selector {
747            GuardSetSelector::Default => &self.default,
748            GuardSetSelector::Restricted => &self.restricted,
749            #[cfg(feature = "bridge-client")]
750            GuardSetSelector::Bridges => &self.bridges,
751        }
752    }
753
754    /// Return a mutable reference to the currently active set of guards.
755    fn active_guards_mut(&mut self) -> &mut GuardSet {
756        self.guards_mut(&self.active_set.clone())
757    }
758
759    /// Return a mutable reference to the set of guards corresponding to the
760    /// provided selector.
761    fn guards_mut(&mut self, selector: &GuardSetSelector) -> &mut GuardSet {
762        match selector {
763            GuardSetSelector::Default => &mut self.default,
764            GuardSetSelector::Restricted => &mut self.restricted,
765            #[cfg(feature = "bridge-client")]
766            GuardSetSelector::Bridges => &mut self.bridges,
767        }
768    }
769
770    /// Update all non-persistent state for the guards in this object with the
771    /// state in `other`.
772    fn copy_status_from(&mut self, mut other: GuardSets) {
773        use strum::IntoEnumIterator;
774        for sample in GuardSetSelector::iter() {
775            self.guards_mut(&sample)
776                .copy_ephemeral_status_into_newly_loaded_state(std::mem::take(
777                    other.guards_mut(&sample),
778                ));
779        }
780        self.active_set = other.active_set;
781    }
782}
783
784impl GuardMgrInner {
785    /// Look up the latest [`NetDir`] (if there is one) from our
786    /// [`NetDirProvider`] (if we have one).
787    fn timely_netdir(&self) -> Option<Arc<NetDir>> {
788        self.netdir_provider
789            .as_ref()
790            .and_then(Weak::upgrade)
791            .and_then(|np| np.timely_netdir().ok())
792    }
793
794    /// Look up the latest [`BridgeDescList`](bridge::BridgeDescList) (if there
795    /// is one) from our [`BridgeDescProvider`](bridge::BridgeDescProvider) (if
796    /// we have one).
797    #[cfg(feature = "bridge-client")]
798    fn latest_bridge_desc_list(&self) -> Option<Arc<bridge::BridgeDescList>> {
799        self.bridge_desc_provider
800            .as_ref()
801            .and_then(Weak::upgrade)
802            .map(|bp| bp.bridges())
803    }
804
805    /// Run a function that takes `&mut self` and an optional NetDir.
806    ///
807    /// We try to use the netdir from our [`NetDirProvider`] (if we have one).
808    /// Therefore, although its _parameters_ are suitable for every
809    /// [`GuardSet`], its _contents_ might not be. For those, call
810    /// [`with_opt_universe`](Self::with_opt_universe) instead.
811    //
812    // This function exists to handle the lifetime mess where sometimes the
813    // resulting NetDir will borrow from `netdir`, and sometimes it will borrow
814    // from an Arc returned by `self.latest_netdir()`.
815    fn with_opt_netdir<F, T>(&mut self, func: F) -> T
816    where
817        F: FnOnce(&mut Self, Option<&NetDir>) -> T,
818    {
819        if let Some(nd) = self.timely_netdir() {
820            func(self, Some(nd.as_ref()))
821        } else {
822            func(self, None)
823        }
824    }
825
826    /// Return the latest `BridgeSet` based on our `BridgeDescProvider` and our
827    /// configured bridges.
828    ///
829    /// Returns `None` if we are not configured to use bridges.
830    #[cfg(feature = "bridge-client")]
831    fn latest_bridge_set(&self) -> Option<bridge::BridgeSet> {
832        let bridge_config = self.configured_bridges.as_ref()?.clone();
833        let bridge_descs = self.latest_bridge_desc_list();
834        Some(bridge::BridgeSet::new(bridge_config, bridge_descs))
835    }
836
837    /// Run a function that takes `&mut self` and an optional [`UniverseRef`].
838    ///
839    /// We try to get a universe from the appropriate source for the current
840    /// active guard set.
841    fn with_opt_universe<F, T>(&mut self, func: F) -> T
842    where
843        F: FnOnce(&mut Self, Option<&UniverseRef>) -> T,
844    {
845        // TODO: it might be nice to make `func` take an GuardSet and a set of
846        // parameters, so we can't get the active set wrong. Doing that will
847        // require a fair amount of refactoring so that the borrow checker is
848        // happy, however.
849        match self.guards.active_set.universe_type() {
850            UniverseType::NetDir => {
851                if let Some(nd) = self.timely_netdir() {
852                    func(self, Some(&UniverseRef::NetDir(nd)))
853                } else {
854                    func(self, None)
855                }
856            }
857            #[cfg(feature = "bridge-client")]
858            UniverseType::BridgeSet => func(
859                self,
860                self.latest_bridge_set()
861                    .map(UniverseRef::BridgeSet)
862                    .as_ref(),
863            ),
864        }
865    }
866
867    /// Update the status of all guards in the active set, based on the passage
868    /// of time, our configuration, and the relevant Universe for our active
869    /// set.
870    #[instrument(skip_all, level = "trace")]
871    fn update(&mut self, wallclock: SystemTime, now: Instant) {
872        self.with_opt_netdir(|this, netdir| {
873            // Here we update our parameters from the latest NetDir, and check
874            // whether we need to change to a (non)-restrictive GuardSet based
875            // on those parameters and our configured filter.
876            //
877            // This uses a NetDir unconditionally, since we always want to take
878            // the network parameters our parameters from the consensus even if
879            // the guards themselves are from a BridgeSet.
880            this.update_active_set_params_and_filter(netdir);
881        });
882        self.with_opt_universe(|this, univ| {
883            // Now we update the set of guards themselves based on the
884            // Universe, which is either the latest NetDir, or the latest
885            // BridgeSet—depending on what the GuardSet wants.
886            Self::update_guardset_internal(
887                &this.params,
888                wallclock,
889                this.guards.active_set.universe_type(),
890                this.guards.active_guards_mut(),
891                univ,
892            );
893            #[cfg(feature = "bridge-client")]
894            this.update_desired_descriptors(now);
895            #[cfg(not(feature = "bridge-client"))]
896            let _ = now;
897        });
898    }
899
900    /// Replace our bridge configuration with the one from `new_config`.
901    #[cfg(feature = "bridge-client")]
902    fn replace_bridge_config(
903        &mut self,
904        new_config: &impl GuardMgrConfig,
905        wallclock: SystemTime,
906        now: Instant,
907    ) -> Result<RetireCircuits, GuardMgrConfigError> {
908        match (&self.configured_bridges, new_config.bridges_enabled()) {
909            (None, false) => {
910                assert_ne!(
911                    self.guards.active_set.universe_type(),
912                    UniverseType::BridgeSet
913                );
914                return Ok(RetireCircuits::None); // nothing to do
915            }
916            (_, true) if !self.storage.can_store() => {
917                // TODO: Ideally we would try to upgrade, obtaining an exclusive lock,
918                // but `StorageHandle` currently lacks a method for that.
919                return Err(GuardMgrConfigError::NoLock("bridges configured".into()));
920            }
921            (Some(current_bridges), true) if new_config.bridges() == current_bridges.as_ref() => {
922                assert_eq!(
923                    self.guards.active_set.universe_type(),
924                    UniverseType::BridgeSet
925                );
926                return Ok(RetireCircuits::None); // nothing to do.
927            }
928            (_, true) => {
929                self.configured_bridges = Some(new_config.bridges().into());
930                self.guards.active_set = GuardSetSelector::Bridges;
931            }
932            (_, false) => {
933                self.configured_bridges = None;
934                self.guards.active_set = GuardSetSelector::Default;
935            }
936        }
937
938        // If we have gotten here, we have changed the set of bridges, changed
939        // which set is active, or changed them both.  We need to make sure that
940        // our `GuardSet` object is up-to-date with our configuration.
941        self.update(wallclock, now);
942
943        // We also need to tell the caller that its circuits are no good any
944        // more.
945        //
946        // TODO(nickm): Someday we can do this more judiciously by retuning
947        // "Some" in the case where we're still using bridges but our new bridge
948        // set contains different elements; see comment on RetireCircuits.
949        //
950        // TODO(nickm): We could also safely return RetireCircuits::None if we
951        // are using bridges, and our new bridge list is a superset of the older
952        // one.
953        Ok(RetireCircuits::All)
954    }
955
956    /// Update our parameters, our selection (based on network parameters and
957    /// configuration), and make sure the active GuardSet has the right
958    /// configuration itself.
959    ///
960    /// We should call this whenever the NetDir's parameters change, or whenever
961    /// our filter changes.  We do not need to call it for new elements arriving
962    /// in our Universe, since those do not affect anything here.
963    ///
964    /// We should also call this whenever a new GuardSet becomes active for any
965    /// reason _other_ than just having called this function.
966    ///
967    /// (This function is only invoked from `update`, which should be called
968    /// under the above circumstances.)
969    fn update_active_set_params_and_filter(&mut self, netdir: Option<&NetDir>) {
970        // Set the parameters.  These always come from the NetDir, even if this
971        // is a bridge set.
972        if let Some(netdir) = netdir {
973            match GuardParams::try_from(netdir.params()) {
974                Ok(params) => self.params = params,
975                Err(e) => warn!("Unusable guard parameters from consensus: {}", e),
976            }
977
978            self.select_guard_set_based_on_filter(netdir);
979        }
980
981        // Change the filter, if it doesn't match what the guards have.
982        //
983        // TODO(nickm): We could use a "dirty" flag or something to decide
984        // whether we need to call set_filter, if this comparison starts to show
985        // up in profiles.
986        if self.guards.active_guards().filter() != &self.filter {
987            let restrictive = self.guards.active_set == GuardSetSelector::Restricted;
988            self.guards
989                .active_guards_mut()
990                .set_filter(self.filter.clone(), restrictive);
991        }
992    }
993
994    /// Update the status of every guard in `active_guards`, and expand it as
995    /// needed.
996    ///
997    /// This function doesn't take `&self`, to make sure that we are only
998    /// affecting a single `GuardSet`, and to avoid confusing the borrow
999    /// checker.
1000    ///
1001    /// We should call this whenever the contents of the universe have changed.
1002    ///
1003    /// We should also call this whenever a new GuardSet becomes active.
1004    fn update_guardset_internal<U: Universe>(
1005        params: &GuardParams,
1006        now: SystemTime,
1007        universe_type: UniverseType,
1008        active_guards: &mut GuardSet,
1009        universe: Option<&U>,
1010    ) -> ExtendedStatus {
1011        // Expire guards.  Do that early, in case doing so makes it clear that
1012        // we need to grab more guards or mark others as primary.
1013        active_guards.expire_old_guards(params, now);
1014
1015        let extended = if let Some(universe) = universe {
1016            // TODO: This check here may be completely unnecessary. I inserted
1017            // it back in 5ac0fcb7ef603e0d14 because I was originally concerned
1018            // it might be undesirable to list a primary guard as "missing dir
1019            // info" (and therefore unusable) if we were expecting to get its
1020            // microdescriptor "very soon."
1021            //
1022            // But due to the other check in `netdir_is_sufficient`, we
1023            // shouldn't be installing a netdir until it has microdescs for all
1024            // of the (non-bridge) primary guards that it lists. - nickm
1025            let n = active_guards.n_primary_without_id_info_in(universe);
1026            if n > 0 && universe_type == UniverseType::NetDir {
1027                // We are missing the information from a NetDir needed to see
1028                // whether our primary guards are listed, so we shouldn't update
1029                // our guard status.
1030                //
1031                // We don't want to do this check if we are using bridges, since
1032                // a missing bridge descriptor is not guaranteed to temporary
1033                // problem in the same way that a missing microdescriptor is.
1034                // (When a bridge desc is missing, the bridge could be down or
1035                // unreachable, and nobody else can help us. But if a microdesc
1036                // is missing, we just need to find a cache that has it.)
1037                trace!(
1038                    n_primary_without_id_info = n,
1039                    "Not extending guardset, missing information."
1040                );
1041                return ExtendedStatus::No;
1042            }
1043            active_guards.update_status_from_dir(universe);
1044            active_guards.extend_sample_as_needed(now, params, universe)
1045        } else {
1046            trace!("Not extending guardset, no universe given.");
1047            ExtendedStatus::No
1048        };
1049
1050        active_guards.select_primary_guards(params);
1051
1052        extended
1053    }
1054
1055    /// If using bridges, tell the BridgeDescProvider which descriptors we want.
1056    /// We need to check this *after* we select our primary guards.
1057    #[cfg(feature = "bridge-client")]
1058    fn update_desired_descriptors(&mut self, now: Instant) {
1059        if self.guards.active_set.universe_type() != UniverseType::BridgeSet {
1060            return;
1061        }
1062
1063        let provider = self.bridge_desc_provider.as_ref().and_then(Weak::upgrade);
1064        let bridge_set = self.latest_bridge_set();
1065        if let (Some(provider), Some(bridge_set)) = (provider, bridge_set) {
1066            let desired: Vec<_> = self
1067                .guards
1068                .active_guards()
1069                .descriptors_to_request(now, &self.params)
1070                .into_iter()
1071                .flat_map(|guard| bridge_set.bridge_by_guard(guard))
1072                .cloned()
1073                .collect();
1074
1075            provider.set_bridges(&desired);
1076        }
1077    }
1078
1079    /// Replace the active guard state with `new_state`, preserving
1080    /// non-persistent state for any guards that are retained.
1081    fn replace_guards_with(
1082        &mut self,
1083        mut new_guards: GuardSets,
1084        wallclock: SystemTime,
1085        now: Instant,
1086    ) {
1087        std::mem::swap(&mut self.guards, &mut new_guards);
1088        self.guards.copy_status_from(new_guards);
1089        self.update(wallclock, now);
1090    }
1091
1092    /// Update which guard set is active based on the current filter and the
1093    /// provided netdir.
1094    ///
1095    /// After calling this function, the new guard set's filter may be
1096    /// out-of-date: be sure to call `set_filter` as appropriate.
1097    fn select_guard_set_based_on_filter(&mut self, netdir: &NetDir) {
1098        // In general, we'd like to use the restricted set if we're under the
1099        // threshold, and the default set if we're over the threshold.  But if
1100        // we're sitting close to the threshold, we want to avoid flapping back
1101        // and forth, so we only change when we're more than 5% "off" from
1102        // whatever our current setting is.
1103        //
1104        // (See guard-spec section 2 for more information.)
1105        let offset = match self.guards.active_set {
1106            GuardSetSelector::Default => -0.05,
1107            GuardSetSelector::Restricted => 0.05,
1108            // If we're using bridges, then we don't switch between the other guard sets based on on the filter at all.
1109            #[cfg(feature = "bridge-client")]
1110            GuardSetSelector::Bridges => return,
1111        };
1112        let frac_permitted = self.filter.frac_bw_permitted(netdir);
1113        let threshold = self.params.filter_threshold + offset;
1114        let new_choice = if frac_permitted < threshold {
1115            GuardSetSelector::Restricted
1116        } else {
1117            GuardSetSelector::Default
1118        };
1119
1120        if new_choice != self.guards.active_set {
1121            info!(
1122                "Guard selection changed; we are now using the {:?} guard set",
1123                &new_choice
1124            );
1125
1126            self.guards.active_set = new_choice;
1127
1128            if frac_permitted < self.params.extreme_threshold {
1129                warn!(
1130                    "The number of guards permitted is smaller than the recommended minimum of {:.0}%.",
1131                    self.params.extreme_threshold * 100.0,
1132                );
1133            }
1134        }
1135    }
1136
1137    /// Mark all of our primary guards as retriable, if we haven't done
1138    /// so since long enough before `now`.
1139    ///
1140    /// We want to call this function whenever a guard attempt succeeds,
1141    /// if the internet seemed to be down when the guard attempt was
1142    /// first launched.
1143    fn maybe_retry_primary_guards(&mut self, now: Instant) {
1144        // We don't actually want to mark our primary guards as
1145        // retriable more than once per internet_down_timeout: after
1146        // the first time, we would just be noticing the same "coming
1147        // back online" event more than once.
1148        let interval = self.params.internet_down_timeout;
1149        if self.last_primary_retry_time + interval <= now {
1150            debug!(
1151                "Successfully reached a guard after a while off the internet; marking all primary guards retriable."
1152            );
1153            self.guards
1154                .active_guards_mut()
1155                .mark_primary_guards_retriable();
1156            self.last_primary_retry_time = now;
1157        }
1158    }
1159
1160    /// Replace the current GuardFilter with `filter`.
1161    fn set_filter(&mut self, filter: GuardFilter, wallclock: SystemTime, now: Instant) {
1162        self.filter = filter;
1163        self.update(wallclock, now);
1164    }
1165
1166    /// Called when the circuit manager reports (via [`GuardMonitor`]) that
1167    /// a guard succeeded or failed.
1168    ///
1169    /// Changes the guard's status as appropriate, and updates the pending
1170    /// request as needed.
1171    #[allow(clippy::cognitive_complexity)]
1172    pub(crate) fn handle_msg(
1173        &mut self,
1174        request_id: RequestId,
1175        status: GuardStatus,
1176        skew: Option<ClockSkew>,
1177        runtime: &impl tor_rtcompat::SleepProvider,
1178    ) {
1179        if let Some(mut pending) = self.pending.remove(&request_id) {
1180            // If there was a pending request matching this RequestId, great!
1181            let guard_id = pending.guard_id();
1182            trace!(?guard_id, ?status, "Received report of guard status");
1183
1184            // First, handle the skew report (if any)
1185            if let Some(skew) = skew {
1186                let now = runtime.now();
1187                let observation = skew::SkewObservation { skew, when: now };
1188
1189                match &guard_id.0 {
1190                    FirstHopIdInner::Guard(_, id) => {
1191                        self.guards.active_guards_mut().record_skew(id, observation);
1192                    }
1193                    FirstHopIdInner::Fallback(id) => {
1194                        self.fallbacks.note_skew(id, observation);
1195                    }
1196                }
1197                // TODO: We call this whenever we receive an observed clock
1198                // skew. That's not the perfect timing for two reasons.  First
1199                // off, it might be too frequent: it does an O(n) calculation,
1200                // which isn't ideal.  Second, it might be too infrequent: after
1201                // an hour has passed, a given observation won't be up-to-date
1202                // any more, and we might want to recalculate the skew
1203                // accordingly.
1204                self.update_skew(now);
1205            }
1206
1207            match (status, &guard_id.0) {
1208                (GuardStatus::Failure, FirstHopIdInner::Fallback(id)) => {
1209                    // We used a fallback, and we weren't able to build a circuit through it.
1210                    self.fallbacks.note_failure(id, runtime.now());
1211                }
1212                (_, FirstHopIdInner::Fallback(_)) => {
1213                    // We don't record any other kind of circuit activity if we
1214                    // took the entry from the fallback list.
1215                }
1216                (GuardStatus::Success, FirstHopIdInner::Guard(sample, id)) => {
1217                    // If we had gone too long without any net activity when we
1218                    // gave out this guard, and now we're seeing a circuit
1219                    // succeed, tell the primary guards that they might be
1220                    // retriable.
1221                    if pending.net_has_been_down() {
1222                        self.maybe_retry_primary_guards(runtime.now());
1223                    }
1224
1225                    // The guard succeeded.  Tell the GuardSet.
1226                    self.guards.guards_mut(sample).record_success(
1227                        id,
1228                        &self.params,
1229                        None,
1230                        runtime.wallclock(),
1231                    );
1232                    // Either tell the request whether the guard is
1233                    // usable, or schedule it as a "waiting" request.
1234                    if let Some(usable) = self.guard_usability_status(&pending, runtime.now()) {
1235                        trace!(?guard_id, usable, "Known usability status");
1236                        pending.reply(usable);
1237                    } else {
1238                        // This is the one case where we can't use the
1239                        // guard yet.
1240                        trace!(?guard_id, "Not able to answer right now");
1241                        pending.mark_waiting(runtime.now());
1242                        self.waiting.push(pending);
1243                    }
1244                }
1245                (GuardStatus::Failure, FirstHopIdInner::Guard(sample, id)) => {
1246                    self.guards
1247                        .guards_mut(sample)
1248                        .record_failure(id, None, runtime.now());
1249                    pending.reply(false);
1250                }
1251                (GuardStatus::AttemptAbandoned, FirstHopIdInner::Guard(sample, id)) => {
1252                    self.guards.guards_mut(sample).record_attempt_abandoned(id);
1253                    pending.reply(false);
1254                }
1255                (GuardStatus::Indeterminate, FirstHopIdInner::Guard(sample, id)) => {
1256                    self.guards
1257                        .guards_mut(sample)
1258                        .record_indeterminate_result(id);
1259                    pending.reply(false);
1260                }
1261            };
1262        } else {
1263            warn!(
1264                "Got a status {:?} for a request {:?} that wasn't pending",
1265                status, request_id
1266            );
1267        }
1268
1269        // We might need to update the primary guards based on changes in the
1270        // status of guards above.
1271        self.guards
1272            .active_guards_mut()
1273            .select_primary_guards(&self.params);
1274
1275        // Some waiting request may just have become ready (usable or
1276        // not); we need to give them the information they're waiting
1277        // for.
1278        self.expire_and_answer_pending_requests(runtime.now());
1279    }
1280
1281    /// Helper to implement `GuardMgr::note_external_success()`.
1282    ///
1283    /// (This has to be a separate function so that we can borrow params while
1284    /// we have `mut self` borrowed.)
1285    fn record_external_success<T>(
1286        &mut self,
1287        identity: &T,
1288        external_activity: ExternalActivity,
1289        now: SystemTime,
1290    ) where
1291        T: tor_linkspec::HasRelayIds + ?Sized,
1292    {
1293        for id in self.lookup_ids(identity) {
1294            match &id.0 {
1295                FirstHopIdInner::Guard(sample, id) => {
1296                    self.guards.guards_mut(sample).record_success(
1297                        id,
1298                        &self.params,
1299                        Some(external_activity),
1300                        now,
1301                    );
1302                }
1303                FirstHopIdInner::Fallback(id) => {
1304                    if external_activity == ExternalActivity::DirCache {
1305                        self.fallbacks.note_success(id);
1306                    }
1307                }
1308            }
1309        }
1310    }
1311
1312    /// Return an iterator over all of the clock skew observations we've made
1313    /// for guards or fallbacks.
1314    fn skew_observations(&self) -> impl Iterator<Item = &skew::SkewObservation> {
1315        self.fallbacks
1316            .skew_observations()
1317            .chain(self.guards.active_guards().skew_observations())
1318    }
1319
1320    /// Recalculate our estimated clock skew, and publish it to anybody who
1321    /// cares.
1322    fn update_skew(&mut self, now: Instant) {
1323        let estimate = skew::SkewEstimate::estimate_skew(self.skew_observations(), now);
1324        // TODO: we might want to do this only conditionally, when the skew
1325        // estimate changes.
1326        *self.send_skew.borrow_mut() = estimate;
1327    }
1328
1329    /// If the circuit built because of a given [`PendingRequest`] may
1330    /// now be used (or discarded), return `Some(true)` or
1331    /// `Some(false)` respectively.
1332    ///
1333    /// Return None if we can't yet give an answer about whether such
1334    /// a circuit is usable.
1335    fn guard_usability_status(&self, pending: &PendingRequest, now: Instant) -> Option<bool> {
1336        match &pending.guard_id().0 {
1337            FirstHopIdInner::Guard(sample, id) => self.guards.guards(sample).circ_usability_status(
1338                id,
1339                pending.usage(),
1340                &self.params,
1341                now,
1342            ),
1343            // Fallback circuits are usable immediately, since we don't have to wait to
1344            // see whether any _other_ circuit succeeds or fails.
1345            FirstHopIdInner::Fallback(_) => Some(true),
1346        }
1347    }
1348
1349    /// For requests that have been "waiting" for an answer for too long,
1350    /// expire them and tell the circuit manager that their circuits
1351    /// are unusable.
1352    fn expire_and_answer_pending_requests(&mut self, now: Instant) {
1353        // A bit ugly: we use a separate Vec here to avoid borrowing issues,
1354        // and put it back when we're done.
1355        let mut waiting = Vec::new();
1356        std::mem::swap(&mut waiting, &mut self.waiting);
1357
1358        waiting.retain_mut(|pending| {
1359            let expired = pending
1360                .waiting_since()
1361                .and_then(|w| now.checked_duration_since(w))
1362                .map(|d| d >= self.params.np_idle_timeout)
1363                == Some(true);
1364            if expired {
1365                trace!(?pending, "Pending request expired");
1366                pending.reply(false);
1367                return false;
1368            }
1369
1370            // TODO-SPEC: guard_usability_status isn't what the spec says.  It
1371            // says instead that we should look at _circuit_ status, saying:
1372            //  "   Definition: In the algorithm above, C2 "blocks" C1 if:
1373            // * C2 obeys all the restrictions that C1 had to obey, AND
1374            // * C2 has higher priority than C1, AND
1375            // * Either C2 is <complete>, or C2 is <waiting_for_better_guard>,
1376            // or C2 has been <usable_if_no_better_guard> for no more than
1377            // {NONPRIMARY_GUARD_CONNECT_TIMEOUT} seconds."
1378            //
1379            // See comments in sample::GuardSet::circ_usability_status.
1380
1381            if let Some(answer) = self.guard_usability_status(pending, now) {
1382                trace!(?pending, answer, "Pending request now ready");
1383                pending.reply(answer);
1384                return false;
1385            }
1386            true
1387        });
1388
1389        // Put the waiting list back.
1390        std::mem::swap(&mut waiting, &mut self.waiting);
1391    }
1392
1393    /// Return every currently extant FirstHopId for a guard or fallback
1394    /// directory matching (or possibly matching) the provided keys.
1395    ///
1396    /// An identity is _possibly matching_ if it contains some of the IDs in the
1397    /// provided identity, and it has no _contradictory_ identities, but it does
1398    /// not necessarily contain _all_ of those identities.
1399    ///
1400    /// # TODO
1401    ///
1402    /// This function should probably not exist; it's only used so that dirmgr
1403    /// can report successes or failures, since by the time it observes them it
1404    /// doesn't know whether its circuit came from a guard or a fallback.  To
1405    /// solve that, we'll need CircMgr to record and report which one it was
1406    /// using, which will take some more plumbing.
1407    ///
1408    /// TODO relay: we will have to make the change above when we implement
1409    /// relays; otherwise, it would be possible for an attacker to exploit it to
1410    /// mislead us about our guard status.
1411    fn lookup_ids<T>(&self, identity: &T) -> Vec<FirstHopId>
1412    where
1413        T: tor_linkspec::HasRelayIds + ?Sized,
1414    {
1415        use strum::IntoEnumIterator;
1416        let mut vec = Vec::with_capacity(2);
1417
1418        let id = ids::GuardId::from_relay_ids(identity);
1419        for sample in GuardSetSelector::iter() {
1420            let guard_id = match self.guards.guards(&sample).contains(&id) {
1421                Ok(true) => &id,
1422                Err(other) => other,
1423                Ok(false) => continue,
1424            };
1425            vec.push(FirstHopId(FirstHopIdInner::Guard(sample, guard_id.clone())));
1426        }
1427
1428        let id = ids::FallbackId::from_relay_ids(identity);
1429        if self.fallbacks.contains(&id) {
1430            vec.push(id.into());
1431        }
1432
1433        vec
1434    }
1435
1436    /// Run any periodic events that update guard status, and return a
1437    /// duration after which periodic events should next be run.
1438    #[instrument(skip_all, level = "trace")]
1439    pub(crate) fn run_periodic_events(&mut self, wallclock: SystemTime, now: Instant) -> Duration {
1440        self.update(wallclock, now);
1441        self.expire_and_answer_pending_requests(now);
1442        Duration::from_secs(1) // TODO: Too aggressive.
1443    }
1444
1445    /// Try to select a guard, expanding the sample if the first attempt fails.
1446    fn select_guard_with_expand(
1447        &mut self,
1448        usage: &GuardUsage,
1449        now: Instant,
1450        wallclock: SystemTime,
1451    ) -> Result<(sample::ListKind, FirstHop), PickGuardError> {
1452        // Try to find a guard.
1453        let first_error = match self.select_guard_once(usage, now) {
1454            Ok(res1) => return Ok(res1),
1455            Err(e) => {
1456                trace!("Couldn't select guard on first attempt: {}", e);
1457                e
1458            }
1459        };
1460
1461        // That didn't work. If we have a netdir, expand the sample and try again.
1462        let res = self.with_opt_universe(|this, univ| {
1463            let univ = univ?;
1464            trace!("No guards available, trying to extend the sample.");
1465            // Make sure that the status on all of our guards are accurate, and
1466            // expand the sample if we can.
1467            //
1468            // Our parameters and configuration did not change, so we do not
1469            // need to call update() or update_active_set_and_filter(). This
1470            // call is sufficient to  extend the sample and recompute primary
1471            // guards.
1472            let extended = Self::update_guardset_internal(
1473                &this.params,
1474                wallclock,
1475                this.guards.active_set.universe_type(),
1476                this.guards.active_guards_mut(),
1477                Some(univ),
1478            );
1479            if extended == ExtendedStatus::Yes {
1480                match this.select_guard_once(usage, now) {
1481                    Ok(res) => return Some(res),
1482                    Err(e) => {
1483                        trace!("Couldn't select guard after update: {}", e);
1484                    }
1485                }
1486            }
1487            None
1488        });
1489        if let Some(res) = res {
1490            return Ok(res);
1491        }
1492
1493        // Okay, that didn't work either.  If we were asked for a directory
1494        // guard, and we aren't using bridges, then we may be able to use a
1495        // fallback.
1496        if usage.kind == GuardUsageKind::OneHopDirectory
1497            && self.guards.active_set.universe_type() == UniverseType::NetDir
1498        {
1499            return self.select_fallback(now);
1500        }
1501
1502        // Couldn't extend the sample or use a fallback; return the original error.
1503        Err(first_error)
1504    }
1505
1506    /// Helper: try to pick a single guard, without retrying on failure.
1507    fn select_guard_once(
1508        &self,
1509        usage: &GuardUsage,
1510        now: Instant,
1511    ) -> Result<(sample::ListKind, FirstHop), PickGuardError> {
1512        let active_set = &self.guards.active_set;
1513        #[cfg_attr(not(feature = "bridge-client"), allow(unused_mut))]
1514        let (list_kind, mut first_hop) =
1515            self.guards
1516                .guards(active_set)
1517                .pick_guard(active_set, usage, &self.params, now)?;
1518        #[cfg(feature = "bridge-client")]
1519        if self.guards.active_set.universe_type() == UniverseType::BridgeSet {
1520            // See if we can promote first_hop to a viable CircTarget.
1521            let bridges = self.latest_bridge_set().ok_or_else(|| {
1522                PickGuardError::Internal(internal!(
1523                    "No bridge set available, even though this is the Bridges sample"
1524                ))
1525            })?;
1526            first_hop.lookup_bridge_circ_target(&bridges);
1527
1528            if usage.kind == GuardUsageKind::Data && !first_hop.contains_circ_target() {
1529                return Err(PickGuardError::Internal(internal!(
1530                    "Tried to return a non-circtarget guard with Data usage!"
1531                )));
1532            }
1533        }
1534        Ok((list_kind, first_hop))
1535    }
1536
1537    /// Helper: Select a fallback directory.
1538    ///
1539    /// Called when we have no guard information to use. Return values are as
1540    /// for [`GuardMgr::select_guard()`]
1541    fn select_fallback(
1542        &self,
1543        now: Instant,
1544    ) -> Result<(sample::ListKind, FirstHop), PickGuardError> {
1545        let filt = self.guards.active_guards().filter();
1546
1547        let fallback = crate::FirstHop {
1548            sample: None,
1549            inner: crate::FirstHopInner::Chan(OwnedChanTarget::from_chan_target(
1550                self.fallbacks.choose(&mut rand::rng(), now, filt)?,
1551            )),
1552        };
1553        let fallback = filt.modify_hop(fallback)?;
1554        Ok((sample::ListKind::Fallback, fallback))
1555    }
1556}
1557
1558/// A possible outcome of trying to extend a guard sample.
1559#[derive(Copy, Clone, Debug, Eq, PartialEq)]
1560enum ExtendedStatus {
1561    /// The guard sample was extended. (At least one guard was added to it.)
1562    Yes,
1563    /// The guard sample was not extended.
1564    No,
1565}
1566
1567/// A set of parameters, derived from the consensus document, controlling
1568/// the behavior of a guard manager.
1569#[derive(Debug, Clone)]
1570#[cfg_attr(test, derive(PartialEq))]
1571struct GuardParams {
1572    /// How long should a sampled, un-confirmed guard be kept in the sample before it expires?
1573    lifetime_unconfirmed: Duration,
1574    /// How long should a confirmed guard be kept in the sample before
1575    /// it expires?
1576    lifetime_confirmed: Duration,
1577    /// How long may  a guard be unlisted before we remove it from the sample?
1578    lifetime_unlisted: Duration,
1579    /// Largest number of guards we're willing to add to the sample.
1580    max_sample_size: usize,
1581    /// Largest fraction of the network's guard bandwidth that we're
1582    /// willing to add to the sample.
1583    max_sample_bw_fraction: f64,
1584    /// Smallest number of guards that we're willing to have in the
1585    /// sample, after applying a [`GuardFilter`].
1586    min_filtered_sample_size: usize,
1587    /// How many guards are considered "Primary"?
1588    n_primary: usize,
1589    /// When making a regular circuit, how many primary guards should we
1590    /// be willing to try?
1591    data_parallelism: usize,
1592    /// When making a one-hop directory circuit, how many primary
1593    /// guards should we be willing to try?
1594    dir_parallelism: usize,
1595    /// For how long does a pending attempt to connect to a guard
1596    /// block an attempt to use a less-favored non-primary guard?
1597    np_connect_timeout: Duration,
1598    /// How long do we allow a circuit to a successful but unfavored
1599    /// non-primary guard to sit around before deciding not to use it?
1600    np_idle_timeout: Duration,
1601    /// After how much time without successful activity does a
1602    /// successful circuit indicate that we should retry our primary
1603    /// guards?
1604    internet_down_timeout: Duration,
1605    /// What fraction of the guards can be can be filtered out before we
1606    /// decide that our filter is "very restrictive"?
1607    filter_threshold: f64,
1608    /// What fraction of the guards determine that our filter is "very
1609    /// restrictive"?
1610    extreme_threshold: f64,
1611}
1612
1613impl Default for GuardParams {
1614    fn default() -> Self {
1615        let one_day = Duration::from_secs(86400);
1616        GuardParams {
1617            lifetime_unconfirmed: one_day * 120,
1618            lifetime_confirmed: one_day * 60,
1619            lifetime_unlisted: one_day * 20,
1620            max_sample_size: 60,
1621            max_sample_bw_fraction: 0.2,
1622            min_filtered_sample_size: 20,
1623            n_primary: 3,
1624            data_parallelism: 1,
1625            dir_parallelism: 3,
1626            np_connect_timeout: Duration::from_secs(15),
1627            np_idle_timeout: Duration::from_secs(600),
1628            internet_down_timeout: Duration::from_secs(600),
1629            filter_threshold: 0.2,
1630            extreme_threshold: 0.01,
1631        }
1632    }
1633}
1634
1635impl TryFrom<&NetParameters> for GuardParams {
1636    type Error = tor_units::Error;
1637    fn try_from(p: &NetParameters) -> Result<GuardParams, Self::Error> {
1638        Ok(GuardParams {
1639            lifetime_unconfirmed: p.guard_lifetime_unconfirmed.try_into()?,
1640            lifetime_confirmed: p.guard_lifetime_confirmed.try_into()?,
1641            lifetime_unlisted: p.guard_remove_unlisted_after.try_into()?,
1642            max_sample_size: p.guard_max_sample_size.try_into()?,
1643            max_sample_bw_fraction: p.guard_max_sample_threshold.as_fraction(),
1644            min_filtered_sample_size: p.guard_filtered_min_sample_size.try_into()?,
1645            n_primary: p.guard_n_primary.try_into()?,
1646            data_parallelism: p.guard_use_parallelism.try_into()?,
1647            dir_parallelism: p.guard_dir_use_parallelism.try_into()?,
1648            np_connect_timeout: p.guard_nonprimary_connect_timeout.try_into()?,
1649            np_idle_timeout: p.guard_nonprimary_idle_timeout.try_into()?,
1650            internet_down_timeout: p.guard_internet_likely_down.try_into()?,
1651            filter_threshold: p.guard_meaningful_restriction.as_fraction(),
1652            extreme_threshold: p.guard_extreme_restriction.as_fraction(),
1653        })
1654    }
1655}
1656
1657/// Representation of a guard or fallback, as returned by [`GuardMgr::select_guard()`].
1658#[derive(Debug, Clone)]
1659pub struct FirstHop {
1660    /// The sample from which this guard was taken, or `None` if this is a fallback.
1661    sample: Option<GuardSetSelector>,
1662    /// Information about connecting to (or through) this guard.
1663    inner: FirstHopInner,
1664}
1665/// The enumeration inside a FirstHop that holds information about how to
1666/// connect to (and possibly through) a guard or fallback.
1667#[derive(Debug, Clone)]
1668enum FirstHopInner {
1669    /// We have enough information to connect to a guard.
1670    Chan(OwnedChanTarget),
1671    /// We have enough information to connect to a guards _and_ to build
1672    /// multihop circuits through it.
1673    #[cfg_attr(not(feature = "bridge-client"), allow(dead_code))]
1674    Circ(OwnedCircTarget),
1675}
1676
1677impl FirstHop {
1678    /// Return a new [`FirstHopId`] for this `FirstHop`.
1679    fn first_hop_id(&self) -> FirstHopId {
1680        match &self.sample {
1681            Some(sample) => {
1682                let guard_id = GuardId::from_relay_ids(self);
1683                FirstHopId::in_sample(sample.clone(), guard_id)
1684            }
1685            None => {
1686                let fallback_id = crate::ids::FallbackId::from_relay_ids(self);
1687                FirstHopId::from(fallback_id)
1688            }
1689        }
1690    }
1691
1692    /// Look up this guard in `netdir`.
1693    pub fn get_relay<'a>(&self, netdir: &'a NetDir) -> Option<Relay<'a>> {
1694        match &self.sample {
1695            #[cfg(feature = "bridge-client")]
1696            // Always return "None" for anything that isn't in the netdir.
1697            Some(s) if s.universe_type() == UniverseType::BridgeSet => None,
1698            // Otherwise ask the netdir.
1699            _ => netdir.by_ids(self),
1700        }
1701    }
1702
1703    /// Return true if this guard is a bridge.
1704    pub fn is_bridge(&self) -> bool {
1705        match &self.sample {
1706            #[cfg(feature = "bridge-client")]
1707            Some(s) if s.universe_type() == UniverseType::BridgeSet => true,
1708            _ => false,
1709        }
1710    }
1711
1712    /// If possible, return a view of this object that can be used to build a circuit.
1713    pub fn as_circ_target(&self) -> Option<&OwnedCircTarget> {
1714        match &self.inner {
1715            FirstHopInner::Chan(_) => None,
1716            FirstHopInner::Circ(ct) => Some(ct),
1717        }
1718    }
1719
1720    /// Return a view of this as an OwnedChanTarget.
1721    fn chan_target_mut(&mut self) -> &mut OwnedChanTarget {
1722        match &mut self.inner {
1723            FirstHopInner::Chan(ct) => ct,
1724            FirstHopInner::Circ(ct) => ct.chan_target_mut(),
1725        }
1726    }
1727
1728    /// If possible and appropriate, find a circuit target in `bridges` for this
1729    /// `FirstHop`, and make this `FirstHop` a viable circuit target.
1730    ///
1731    /// (By default, any `FirstHop` that a `GuardSet` returns will have enough
1732    /// information to be a `ChanTarget`, but it will be lacking the additional
1733    /// network information in `CircTarget`[^1] necessary for us to build a
1734    /// multi-hop circuit through it.  If this FirstHop is a regular non-bridge
1735    /// `Relay`, then the `CircMgr` will later look up that circuit information
1736    /// itself from the network directory. But if this `FirstHop` *is* a bridge,
1737    /// then we need to find that information in the `BridgeSet`, since the
1738    /// CircMgr does not keep track of the `BridgeSet`.)
1739    ///
1740    /// [^1]: For example, supported protocol versions and ntor keys.
1741    #[cfg(feature = "bridge-client")]
1742    fn lookup_bridge_circ_target(&mut self, bridges: &bridge::BridgeSet) {
1743        use crate::sample::CandidateStatus::Present;
1744        if self.sample.as_ref().map(|s| s.universe_type()) == Some(UniverseType::BridgeSet)
1745            && matches!(self.inner, FirstHopInner::Chan(_))
1746        {
1747            if let Present(bridge_relay) = bridges.bridge_relay_by_guard(self) {
1748                if let Some(circ_target) = bridge_relay.as_relay_with_desc() {
1749                    self.inner =
1750                        FirstHopInner::Circ(OwnedCircTarget::from_circ_target(&circ_target));
1751                }
1752            }
1753        }
1754    }
1755
1756    /// Return true if this `FirstHop` contains circuit target information.
1757    ///
1758    /// This is true if `lookup_bridge_circ_target()` has been called, and it
1759    /// successfully found the circuit target information.
1760    #[cfg(feature = "bridge-client")]
1761    fn contains_circ_target(&self) -> bool {
1762        matches!(self.inner, FirstHopInner::Circ(_))
1763    }
1764}
1765
1766// This is somewhat redundant with the implementations in crate::guard::Guard.
1767impl tor_linkspec::HasAddrs for FirstHop {
1768    fn addrs(&self) -> impl Iterator<Item = SocketAddr> {
1769        match &self.inner {
1770            FirstHopInner::Chan(ct) => Either::Left(ct.addrs()),
1771            FirstHopInner::Circ(ct) => Either::Right(ct.addrs()),
1772        }
1773    }
1774}
1775impl tor_linkspec::HasRelayIds for FirstHop {
1776    fn identity(
1777        &self,
1778        key_type: tor_linkspec::RelayIdType,
1779    ) -> Option<tor_linkspec::RelayIdRef<'_>> {
1780        match &self.inner {
1781            FirstHopInner::Chan(ct) => ct.identity(key_type),
1782            FirstHopInner::Circ(ct) => ct.identity(key_type),
1783        }
1784    }
1785}
1786impl tor_linkspec::HasChanMethod for FirstHop {
1787    fn chan_method(&self) -> tor_linkspec::ChannelMethod {
1788        match &self.inner {
1789            FirstHopInner::Chan(ct) => ct.chan_method(),
1790            FirstHopInner::Circ(ct) => ct.chan_method(),
1791        }
1792    }
1793}
1794impl tor_linkspec::ChanTarget for FirstHop {}
1795
1796/// The purpose for which we plan to use a guard.
1797///
1798/// This can affect the guard selection algorithm.
1799#[derive(Clone, Debug, Default, Eq, PartialEq)]
1800#[non_exhaustive]
1801pub enum GuardUsageKind {
1802    /// We want to use this guard for a data circuit.
1803    ///
1804    /// (This encompasses everything except the `OneHopDirectory` case.)
1805    #[default]
1806    Data,
1807    /// We want to use this guard for a one-hop, non-anonymous
1808    /// directory request.
1809    ///
1810    /// (Our algorithm allows more parallelism for the guards that we use
1811    /// for these circuits.)
1812    OneHopDirectory,
1813}
1814
1815/// A set of parameters describing how a single guard should be selected.
1816///
1817/// Used as an argument to [`GuardMgr::select_guard`].
1818#[derive(Clone, Debug, derive_builder::Builder)]
1819#[builder(build_fn(error = "tor_config::ConfigBuildError"))]
1820pub struct GuardUsage {
1821    /// The purpose for which this guard will be used.
1822    #[builder(default)]
1823    kind: GuardUsageKind,
1824    /// A list of restrictions on which guard may be used.
1825    ///
1826    /// The default is the empty list.
1827    #[builder(sub_builder, setter(custom))]
1828    restrictions: GuardRestrictionList,
1829}
1830
1831impl_standard_builder! { GuardUsage: !Deserialize }
1832
1833/// List of socket restrictions, as configured
1834pub type GuardRestrictionList = Vec<GuardRestriction>;
1835
1836define_list_builder_helper! {
1837    pub struct GuardRestrictionListBuilder {
1838        restrictions: [GuardRestriction],
1839    }
1840    built: GuardRestrictionList = restrictions;
1841    default = vec![];
1842    item_build: |restriction| Ok(restriction.clone());
1843}
1844
1845define_list_builder_accessors! {
1846    struct GuardUsageBuilder {
1847        pub restrictions: [GuardRestriction],
1848    }
1849}
1850
1851impl GuardUsageBuilder {
1852    /// Create a new empty [`GuardUsageBuilder`].
1853    pub fn new() -> Self {
1854        Self::default()
1855    }
1856}
1857
1858/// A restriction that applies to a single request for a guard.
1859///
1860/// Restrictions differ from filters (see [`GuardFilter`]) in that
1861/// they apply to single requests, not to our entire set of guards.
1862/// They're suitable for things like making sure that we don't start
1863/// and end a circuit at the same relay, or requiring a specific
1864/// subprotocol version for certain kinds of requests.
1865#[derive(Clone, Debug, Serialize, Deserialize)]
1866#[non_exhaustive]
1867pub enum GuardRestriction {
1868    /// Don't pick a guard with the provided identity.
1869    AvoidId(RelayId),
1870    /// Don't pick a guard with any of the provided Ed25519 identities.
1871    AvoidAllIds(RelayIdSet),
1872}
1873
1874/// The kind of vanguards to use.
1875#[derive(Debug, Default, Clone, Copy, Eq, PartialEq, Ord, PartialOrd)] //
1876#[derive(Serialize, Deserialize)] //
1877#[derive(derive_more::Display)] //
1878#[serde(rename_all = "lowercase")]
1879#[cfg(feature = "vanguards")]
1880#[non_exhaustive]
1881pub enum VanguardMode {
1882    /// "Lite" vanguards.
1883    #[default]
1884    #[display("lite")]
1885    Lite = 1,
1886    /// "Full" vanguards.
1887    #[display("full")]
1888    Full = 2,
1889    /// Vanguards are disabled.
1890    #[display("disabled")]
1891    Disabled = 0,
1892}
1893
1894#[cfg(feature = "vanguards")]
1895impl VanguardMode {
1896    /// Build a `VanguardMode` from a [`NetParameters`] parameter.
1897    ///
1898    /// Used for converting [`vanguards_enabled`](NetParameters::vanguards_enabled)
1899    /// or [`vanguards_hs_service`](NetParameters::vanguards_hs_service)
1900    /// to the corresponding `VanguardMode`.
1901    pub(crate) fn from_net_parameter(val: BoundedInt32<0, 2>) -> Self {
1902        match val.get() {
1903            0 => VanguardMode::Disabled,
1904            1 => VanguardMode::Lite,
1905            2 => VanguardMode::Full,
1906            _ => unreachable!("BoundedInt32 was not bounded?!"),
1907        }
1908    }
1909}
1910
1911impl_not_auto_value!(VanguardMode);
1912
1913/// Vanguards configuration.
1914#[derive(Debug, Default, Clone, Eq, PartialEq, derive_builder::Builder)]
1915#[builder(build_fn(error = "ConfigBuildError"))]
1916#[builder(derive(Debug, Serialize, Deserialize))]
1917pub struct VanguardConfig {
1918    /// The kind of vanguards to use.
1919    #[builder_field_attr(serde(default))]
1920    #[builder(default)]
1921    mode: ExplicitOrAuto<VanguardMode>,
1922}
1923
1924impl VanguardConfig {
1925    /// Return the configured [`VanguardMode`].
1926    ///
1927    /// Returns the [`Default`] `VanguardMode`
1928    /// if the mode is [`Auto`](ExplicitOrAuto) or unspecified.
1929    pub fn mode(&self) -> VanguardMode {
1930        match self.mode {
1931            ExplicitOrAuto::Auto => Default::default(),
1932            ExplicitOrAuto::Explicit(mode) => mode,
1933        }
1934    }
1935}
1936
1937/// The kind of vanguards to use.
1938#[derive(Debug, Default, Clone, Copy, Eq, PartialEq, Ord, PartialOrd)] //
1939#[derive(Serialize, Deserialize)] //
1940#[derive(derive_more::Display)] //
1941#[serde(rename_all = "lowercase")]
1942#[cfg(not(feature = "vanguards"))]
1943#[non_exhaustive]
1944pub enum VanguardMode {
1945    /// Vanguards are disabled.
1946    #[default]
1947    #[display("disabled")]
1948    Disabled = 0,
1949}
1950
1951#[cfg(test)]
1952mod test {
1953    // @@ begin test lint list maintained by maint/add_warning @@
1954    #![allow(clippy::bool_assert_comparison)]
1955    #![allow(clippy::clone_on_copy)]
1956    #![allow(clippy::dbg_macro)]
1957    #![allow(clippy::mixed_attributes_style)]
1958    #![allow(clippy::print_stderr)]
1959    #![allow(clippy::print_stdout)]
1960    #![allow(clippy::single_char_pattern)]
1961    #![allow(clippy::unwrap_used)]
1962    #![allow(clippy::unchecked_duration_subtraction)]
1963    #![allow(clippy::useless_vec)]
1964    #![allow(clippy::needless_pass_by_value)]
1965    //! <!-- @@ end test lint list maintained by maint/add_warning @@ -->
1966    use super::*;
1967    use tor_linkspec::{HasAddrs, HasRelayIds};
1968    use tor_persist::TestingStateMgr;
1969    use tor_rtcompat::test_with_all_runtimes;
1970
1971    #[test]
1972    fn guard_param_defaults() {
1973        let p1 = GuardParams::default();
1974        let p2: GuardParams = (&NetParameters::default()).try_into().unwrap();
1975        assert_eq!(p1, p2);
1976    }
1977
1978    fn init<R: Runtime>(rt: R) -> (GuardMgr<R>, TestingStateMgr, NetDir) {
1979        use tor_netdir::{MdReceiver, PartialNetDir, testnet};
1980        let statemgr = TestingStateMgr::new();
1981        let have_lock = statemgr.try_lock().unwrap();
1982        assert!(have_lock.held());
1983        let guardmgr = GuardMgr::new(rt, statemgr.clone(), &TestConfig::default()).unwrap();
1984        let (con, mds) = testnet::construct_network().unwrap();
1985        let param_overrides = vec![
1986            // We make the sample size smaller than usual to compensate for the
1987            // small testing network.  (Otherwise, we'd sample the whole network,
1988            // and not be able to observe guards in the tests.)
1989            "guard-min-filtered-sample-size=5",
1990            // We choose only two primary guards, to make the tests easier to write.
1991            "guard-n-primary-guards=2",
1992            // We define any restriction that allows 75% or fewer of relays as "meaningful",
1993            // so that we can test the "restrictive" guard sample behavior, and to avoid
1994            "guard-meaningful-restriction-percent=75",
1995        ];
1996        let param_overrides: String =
1997            itertools::Itertools::intersperse(param_overrides.into_iter(), " ").collect();
1998        let override_p = param_overrides.parse().unwrap();
1999        let mut netdir = PartialNetDir::new(con, Some(&override_p));
2000        for md in mds {
2001            netdir.add_microdesc(md);
2002        }
2003        let netdir = netdir.unwrap_if_sufficient().unwrap();
2004
2005        (guardmgr, statemgr, netdir)
2006    }
2007
2008    #[test]
2009    #[allow(clippy::clone_on_copy)]
2010    fn simple_case() {
2011        test_with_all_runtimes!(|rt| async move {
2012            let (guardmgr, statemgr, netdir) = init(rt.clone());
2013            let usage = GuardUsage::default();
2014            guardmgr.install_test_netdir(&netdir);
2015
2016            let (id, mon, usable) = guardmgr.select_guard(usage).unwrap();
2017            // Report that the circuit succeeded.
2018            mon.succeeded();
2019
2020            // May we use the circuit?
2021            let usable = usable.await.unwrap();
2022            assert!(usable);
2023
2024            // Save the state...
2025            guardmgr.flush_msg_queue().await;
2026            guardmgr.store_persistent_state().unwrap();
2027            drop(guardmgr);
2028
2029            // Try reloading from the state...
2030            let guardmgr2 =
2031                GuardMgr::new(rt.clone(), statemgr.clone(), &TestConfig::default()).unwrap();
2032            guardmgr2.install_test_netdir(&netdir);
2033
2034            // Since the guard was confirmed, we should get the same one this time!
2035            let usage = GuardUsage::default();
2036            let (id2, _mon, _usable) = guardmgr2.select_guard(usage).unwrap();
2037            assert!(id2.same_relay_ids(&id));
2038        });
2039    }
2040
2041    #[test]
2042    fn simple_waiting() {
2043        // TODO(nickm): This test fails in rare cases; I suspect a
2044        // race condition somewhere.
2045        //
2046        // I've doubled up on the queue flushing in order to try to make the
2047        // race less likely, but we should investigate.
2048        test_with_all_runtimes!(|rt| async move {
2049            let (guardmgr, _statemgr, netdir) = init(rt);
2050            let u = GuardUsage::default();
2051            guardmgr.install_test_netdir(&netdir);
2052
2053            // We'll have the first two guard fail, which should make us
2054            // try a non-primary guard.
2055            let (id1, mon, _usable) = guardmgr.select_guard(u.clone()).unwrap();
2056            mon.failed();
2057            guardmgr.flush_msg_queue().await; // avoid race
2058            guardmgr.flush_msg_queue().await; // avoid race
2059            let (id2, mon, _usable) = guardmgr.select_guard(u.clone()).unwrap();
2060            mon.failed();
2061            guardmgr.flush_msg_queue().await; // avoid race
2062            guardmgr.flush_msg_queue().await; // avoid race
2063
2064            assert!(!id1.same_relay_ids(&id2));
2065
2066            // Now we should get two sampled guards. They should be different.
2067            let (id3, mon3, usable3) = guardmgr.select_guard(u.clone()).unwrap();
2068            let (id4, mon4, usable4) = guardmgr.select_guard(u.clone()).unwrap();
2069            assert!(!id3.same_relay_ids(&id4));
2070
2071            let (u3, u4) = futures::join!(
2072                async {
2073                    mon3.failed();
2074                    guardmgr.flush_msg_queue().await; // avoid race
2075                    usable3.await.unwrap()
2076                },
2077                async {
2078                    mon4.succeeded();
2079                    usable4.await.unwrap()
2080                }
2081            );
2082
2083            assert_eq!((u3, u4), (false, true));
2084        });
2085    }
2086
2087    #[test]
2088    fn filtering_basics() {
2089        test_with_all_runtimes!(|rt| async move {
2090            let (guardmgr, _statemgr, netdir) = init(rt);
2091            let u = GuardUsage::default();
2092            let filter = {
2093                let mut f = GuardFilter::default();
2094                // All the addresses in the test network are {0,1,2,3,4}.0.0.3:9001.
2095                // Limit to only 2.0.0.0/8
2096                f.push_reachable_addresses(vec!["2.0.0.0/8:9001".parse().unwrap()]);
2097                f
2098            };
2099            guardmgr.set_filter(filter);
2100            guardmgr.install_test_netdir(&netdir);
2101            let (guard, _mon, _usable) = guardmgr.select_guard(u).unwrap();
2102            // Make sure that the filter worked.
2103            let addr = guard.addrs().next().unwrap();
2104            assert_eq!(addr, "2.0.0.3:9001".parse().unwrap());
2105        });
2106    }
2107
2108    #[test]
2109    fn external_status() {
2110        test_with_all_runtimes!(|rt| async move {
2111            let (guardmgr, _statemgr, netdir) = init(rt);
2112            let data_usage = GuardUsage::default();
2113            let dir_usage = GuardUsageBuilder::new()
2114                .kind(GuardUsageKind::OneHopDirectory)
2115                .build()
2116                .unwrap();
2117            guardmgr.install_test_netdir(&netdir);
2118            {
2119                // Override this parameter, so that we can get deterministic results below.
2120                let mut inner = guardmgr.inner.lock().unwrap();
2121                inner.params.dir_parallelism = 1;
2122            }
2123
2124            let (guard, mon, _usable) = guardmgr.select_guard(data_usage.clone()).unwrap();
2125            mon.succeeded();
2126
2127            // Record that this guard gave us a bad directory object.
2128            guardmgr.note_external_failure(&guard, ExternalActivity::DirCache);
2129
2130            // We ask for another guard, for data usage.  We should get the same
2131            // one as last time, since the director failure doesn't mean this
2132            // guard is useless as a primary guard.
2133            let (g2, mon, _usable) = guardmgr.select_guard(data_usage).unwrap();
2134            assert_eq!(g2.ed_identity(), guard.ed_identity());
2135            mon.succeeded();
2136
2137            // But if we ask for a guard for directory usage, we should get a
2138            // different one, since the last guard we gave out failed.
2139            let (g3, mon, _usable) = guardmgr.select_guard(dir_usage.clone()).unwrap();
2140            assert_ne!(g3.ed_identity(), guard.ed_identity());
2141            mon.succeeded();
2142
2143            // Now record a success for for directory usage.
2144            guardmgr.note_external_success(&guard, ExternalActivity::DirCache);
2145
2146            // Now that the guard is working as a cache, asking for it should get us the same guard.
2147            let (g4, _mon, _usable) = guardmgr.select_guard(dir_usage).unwrap();
2148            assert_eq!(g4.ed_identity(), guard.ed_identity());
2149        });
2150    }
2151
2152    #[cfg(feature = "vanguards")]
2153    #[test]
2154    fn vanguard_mode_ord() {
2155        assert!(VanguardMode::Disabled < VanguardMode::Lite);
2156        assert!(VanguardMode::Disabled < VanguardMode::Full);
2157        assert!(VanguardMode::Lite < VanguardMode::Full);
2158    }
2159}