tor_error/
retriable.rs

1//! Declare the `RetryTime` enumeration and related code.
2
3use derive_more::{From, Into};
4use std::{
5    cmp::Ordering,
6    time::{Duration, Instant},
7};
8use strum::EnumDiscriminants;
9
10/// A description of when an operation may be retried.
11///
12/// # Retry times values are contextual.
13///
14/// Note that retrying is necessarily contextual, depending on what exactly
15/// we're talking about retrying.
16///
17/// For an example of how context matters:  suppose that we try to build a
18/// circuit, and encounter a failure extending to the second hop.  If we try to
19/// build a circuit _through the same path_ immediately, it's likely to fail
20/// again.  But if we try to build a circuit through a different path, then
21/// there's no reason to expect that same kind of error.
22///
23/// Thus, the same inner error condition ("failed to extend to the nth hop") can
24/// indicate either a "Retry after waiting for a while" or "Retry immediately."
25///
26/// # Retry times depend on what we think might change.
27///
28/// Whether retrying will help depends on what we think is likely to change in
29/// the near term.
30///
31/// For example, we generally assume an unreachable relay has some likelihood of
32/// becoming reachable in the near future, and therefore connecting to such a
33/// relay is worth retrying.
34///
35/// On the other hand, we _don't_ assume that the network is changing wildly
36/// over time.  Thus, if there is currently no relay that supports delivering
37/// traffic to port 23 (telnet), we say that building a request for such a relay
38/// is not retriable, even though technically such a relay might appear in the
39/// next consensus.
40#[derive(Copy, Clone, Debug, Eq, PartialEq, EnumDiscriminants)]
41#[non_exhaustive]
42// We define a discriminant type so we can simplify loose_cmp.
43#[strum_discriminants(derive(Ord, PartialOrd))]
44// We don't want to expose RetryTimeDiscriminants.
45#[strum_discriminants(vis())]
46pub enum RetryTime {
47    /// The operation can be retried immediately, and no delay is needed.
48    ///
49    /// The recipient of this `RetryTime` variant may retry the operation
50    /// immediately without waiting.
51    ///
52    /// This case should be used cautiously: it risks making code retry in a
53    /// loop without delay.  It should only be used for error conditions that
54    /// are necessarily produced via a process that itself introduces a delay.
55    /// (For example, this case is suitable for errors caused by a remote
56    /// timeout.)
57    Immediate,
58
59    /// The operation can be retried after a short delay, to prevent overloading
60    /// the network.  
61    ///
62    /// The recipient of this `RetryTime` variant should delay a short amount of
63    /// time before retrying.  The amount of time to delay should be randomized,
64    /// and should tend to grow larger the more failures there have been
65    /// recently for the given operation.  (The `RetryDelay` type from
66    /// `tor-basic-utils` is suitable for managing this calculation.)
67    ///
68    /// This case should be used for problems that tend to be "self correcting",
69    /// such as remote server failures (the server might come back up).
70    AfterWaiting,
71
72    /// The operation can be retried after a particular delay.
73    ///
74    /// The recipient of this `RetryTime` variant should wait for at least the
75    /// given duration before retrying the operation.
76    ///
77    /// This case should only be used if there is some reason not to return
78    /// `AfterWaiting`: for example, if the implementor is providing their own
79    /// back-off algorithm instead of using `RetryDelay.`
80    ///
81    /// (This is a separate variant from `At`, since the constructor may not
82    /// have convenient access to (a mocked view of) the current time.  If you
83    /// know that the current time is `now`, then `After(d)` is equivalent to
84    /// `At(now + d)`.)
85    After(Duration),
86
87    /// The operation can be retried at some particular time in the future.
88    ///
89    /// The recipient of this this `RetryTime` variant should wait until the
90    /// current time (as returned by `Instant::now` or `SleepProvider::now` as
91    /// appropriate) is at least this given instant.
92    ///
93    /// This case is appropriate for when we have a failure condition caused by
94    /// waiting for multiple other timeouts.  (For example, if we believe that
95    /// all our guards are down, then we won't be able to try getting a guard
96    /// until the next time guard is scheduled to be marked as retriable.)
97    At(Instant),
98
99    /// Retrying is unlikely to make this operation succeed, unless something
100    /// else is fixed first.
101    ///
102    /// The recipient of this `RetryTime` variant should generally give up, and
103    /// stop retrying the given operation.
104    ///
105    /// We don't mean "literally" that the operation will never succeed: only
106    /// that retrying it in the near future without fixing the underlying cause
107    /// is unlikely to help.
108    ///
109    /// This case is appropriate for issues like misconfiguration, internal
110    /// errors, and requests for operations that the network doesn't support.
111    ///
112    /// This case is also appropriate for a problem that is "technically"
113    /// retriable, but where any resolution is likelier to take days or weeks
114    /// instead  of minutes or hours.
115    Never,
116}
117
118/// A `RetryTime` wrapped so that it compares according to [`RetryTime::loose_cmp`]
119#[derive(From, Into, Copy, Clone, Debug, Eq, PartialEq)]
120pub struct LooseCmpRetryTime(RetryTime);
121
122/// Trait for an error object that can tell us when the operation which
123/// generated it can be retried.
124pub trait HasRetryTime {
125    /// Return the time when the operation that gave this error can be retried.
126    ///
127    /// See all caveats and explanations on [`RetryTime`].
128    fn retry_time(&self) -> RetryTime;
129
130    /// Return an absolute retry when the operation that gave this error can be
131    /// retried.
132    ///
133    /// Requires that `now` is the current time, and `choose_delay` is a
134    /// function to choose a delay for [`RetryTime::AfterWaiting`].
135    fn abs_retry_time<F>(&self, now: Instant, choose_delay: F) -> AbsRetryTime
136    where
137        F: FnOnce() -> Duration,
138        Self: Sized,
139    {
140        self.retry_time().absolute(now, choose_delay)
141    }
142}
143
144/// An absolute [`RetryTime`].
145///
146/// Unlike `RetryTime`, this type always denotes a particular instant in time.
147/// You can derive it using [`RetryTime::absolute`].
148#[derive(Copy, Clone, Debug, Eq, PartialEq, Ord, PartialOrd)]
149#[allow(clippy::exhaustive_enums)]
150pub enum AbsRetryTime {
151    /// See [`RetryTime::Immediate`].
152    Immediate,
153    /// See [`RetryTime::At`].
154    At(Instant),
155    /// See [`RetryTime::Never`].
156    Never,
157}
158
159impl AbsRetryTime {
160    /// Construct an AbsRetryTime representing `base` + `plus`.
161    fn from_sum(base: Instant, plus: Duration) -> Self {
162        match base.checked_add(plus) {
163            Some(t) => AbsRetryTime::At(t),
164            None => AbsRetryTime::Never,
165        }
166    }
167}
168
169impl RetryTime {
170    /// Convert this [`RetryTime`] in to an absolute time.
171    ///
172    /// Requires that `now` is the current time, and `choose_delay` is a
173    /// function to choose a delay for [`RetryTime::AfterWaiting`].
174    pub fn absolute<F>(self, now: Instant, choose_delay: F) -> AbsRetryTime
175    where
176        F: FnOnce() -> Duration,
177    {
178        match self {
179            RetryTime::Immediate => AbsRetryTime::Immediate,
180            RetryTime::AfterWaiting => AbsRetryTime::from_sum(now, choose_delay()),
181            RetryTime::After(d) => AbsRetryTime::from_sum(now, d),
182            RetryTime::At(t) => AbsRetryTime::At(t),
183            RetryTime::Never => AbsRetryTime::Never,
184        }
185    }
186
187    /// Convert all the provided `items` into [`AbsRetryTime`] values, and
188    /// return the earliest one.
189    ///
190    /// Requires that `now` is the current time, and `choose_delay` is a
191    /// function to choose a delay for [`RetryTime::AfterWaiting`].
192    ///
193    /// Differs from `items.map(AbsRetryTime::absolute(now,
194    /// choose_delay)).min()` in that it calls `choose_delay` at most once.
195    pub fn earliest_absolute<I, F>(items: I, now: Instant, choose_delay: F) -> Option<AbsRetryTime>
196    where
197        I: Iterator<Item = RetryTime>,
198        F: FnOnce() -> Duration,
199    {
200        let chosen_delay =
201            once_cell::unsync::Lazy::new(|| AbsRetryTime::from_sum(now, choose_delay()));
202
203        items
204            .map(|item| match item {
205                RetryTime::AfterWaiting => *chosen_delay,
206                other => other.absolute(now, || unreachable!()),
207            })
208            .min()
209    }
210
211    /// Return the "approximately earliest" item for an iterator of retry times.
212    ///
213    /// This is necessarily an approximation, since we can't be sure what time
214    /// will be chosen if the retry is supposed to happen at a random time, and
215    /// therefore cannot tell whether `AfterWaiting` comes before or after
216    /// particular `At` and `After` instances.
217    ///
218    /// If you need an exact answer, use earliest_absolute.
219    pub fn earliest_approx<I>(items: I) -> Option<RetryTime>
220    where
221        I: Iterator<Item = RetryTime>,
222    {
223        items.min_by(|a, b| a.loose_cmp(b))
224    }
225
226    /// A loose-but-total comparison operator, suitable for choosing a retry
227    /// time when multiple attempts have failed.
228    ///
229    /// If you need an absolute comparison operator, convert to [`AbsRetryTime`] first.
230    ///
231    /// See also:
232    /// [`LooseCmpRetryTime`], a wrapper for `RetryTime` that uses this comparison.
233    pub fn loose_cmp(&self, other: &Self) -> Ordering {
234        use RetryTime as RT;
235
236        match (self, other) {
237            // When we have the same type with an internal embedded duration or time,
238            // we compare based on the duration or time.
239            (RT::After(d1), RetryTime::After(d2)) => d1.cmp(d2),
240            (RT::At(t1), RetryTime::At(t2)) => t1.cmp(t2),
241
242            // Otherwise, we compare based on discriminant type.
243            //
244            // This can't do a perfect "apples-to-apples" comparison for
245            // `AfterWaiting` vs `At` vs `After`, but at least it imposes a
246            // total order.
247            (a, b) => RetryTimeDiscriminants::from(a).cmp(&RetryTimeDiscriminants::from(b)),
248        }
249    }
250}
251
252impl Ord for LooseCmpRetryTime {
253    fn cmp(&self, other: &Self) -> Ordering {
254        self.0.loose_cmp(&other.0)
255    }
256}
257impl PartialOrd for LooseCmpRetryTime {
258    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
259        Some(self.cmp(other))
260    }
261}
262
263#[cfg(test)]
264mod test {
265    // @@ begin test lint list maintained by maint/add_warning @@
266    #![allow(clippy::bool_assert_comparison)]
267    #![allow(clippy::clone_on_copy)]
268    #![allow(clippy::dbg_macro)]
269    #![allow(clippy::mixed_attributes_style)]
270    #![allow(clippy::print_stderr)]
271    #![allow(clippy::print_stdout)]
272    #![allow(clippy::single_char_pattern)]
273    #![allow(clippy::unwrap_used)]
274    #![allow(clippy::unchecked_duration_subtraction)]
275    #![allow(clippy::useless_vec)]
276    #![allow(clippy::needless_pass_by_value)]
277    //! <!-- @@ end test lint list maintained by maint/add_warning @@ -->
278    use super::*;
279
280    #[test]
281    fn comparison() {
282        use RetryTime as RT;
283        let sec = Duration::from_secs(1);
284        let now = Instant::now();
285
286        let sorted = vec![
287            RT::Immediate,
288            RT::AfterWaiting,
289            RT::After(sec * 10),
290            RT::After(sec * 20),
291            RT::At(now),
292            RT::At(now + sec * 30),
293            RT::Never,
294        ];
295
296        // Verify that these objects are actually in loose-cmp sorted order.
297        for (i, a) in sorted.iter().enumerate() {
298            for (j, b) in sorted.iter().enumerate() {
299                assert_eq!(a.loose_cmp(b), i.cmp(&j));
300            }
301        }
302    }
303
304    #[test]
305    fn abs_comparison() {
306        use AbsRetryTime as ART;
307        let sec = Duration::from_secs(1);
308        let now = Instant::now();
309
310        let sorted = vec![
311            ART::Immediate,
312            ART::At(now),
313            ART::At(now + sec * 30),
314            ART::Never,
315        ];
316
317        // Verify that these objects are actually in loose-cmp sorted order.
318        for (i, a) in sorted.iter().enumerate() {
319            for (j, b) in sorted.iter().enumerate() {
320                assert_eq!(a.cmp(b), i.cmp(&j));
321            }
322        }
323    }
324
325    #[test]
326    fn earliest_absolute() {
327        let sec = Duration::from_secs(1);
328        let now = Instant::now();
329
330        let times = vec![RetryTime::AfterWaiting, RetryTime::Never];
331
332        let earliest = RetryTime::earliest_absolute(times.into_iter(), now, || sec);
333        assert_eq!(
334            earliest.expect("no absolute time"),
335            AbsRetryTime::At(now + sec)
336        );
337    }
338
339    #[test]
340    fn abs_from_sum() {
341        let base = Instant::now();
342        let delta = Duration::from_secs(1);
343        assert_eq!(
344            AbsRetryTime::from_sum(base, delta),
345            AbsRetryTime::At(base + delta)
346        );
347
348        assert_eq!(
349            AbsRetryTime::from_sum(base, Duration::MAX),
350            AbsRetryTime::Never
351        );
352    }
353}