1
//! Code for creating and manipulating observations about clock skew.
2

            
3
// TODO:
4
//   - See if we can safely report a "no-confidence" value with even fewer
5
//     observations than we currently collect.
6
//   - If the universe of fallbacks and/or the guard sample size and/or the list
7
//     of bridges is very small, see if we can still use that to make a
8
//     low-confidence value.
9

            
10
use std::time::{Duration, Instant};
11

            
12
use tor_proto::ClockSkew;
13

            
14
/// A single observation related to reported clock skew.
15
#[derive(Debug, Clone)]
16
pub(crate) struct SkewObservation {
17
    /// The reported clock skew
18
    pub(crate) skew: ClockSkew,
19
    /// The time when we added this observation.
20
    pub(crate) when: Instant,
21
}
22

            
23
impl SkewObservation {
24
    /// Return true if this observation has been made more recently than
25
    /// `cutoff`. If cutoff is None, consider it's very far in the past.
26
248
    pub(crate) fn more_recent_than(&self, cutoff: Option<Instant>) -> bool {
27
372
        cutoff.is_none_or(|cutoff| self.when > cutoff)
28
248
    }
29
}
30

            
31
/// An estimate of how skewed our clock is, plus a summary of why we think so.
32
//
33
// SEMVER NOTE: this type is re-exported from tor-circmgr.
34
#[derive(Clone, Debug)]
35
pub struct SkewEstimate {
36
    /// Our best guess for the magnitude of the skew.
37
    estimate: ClockSkew,
38
    /// The number of observations leading to this estimate.
39
    n_observations: usize,
40
    /// A description of how confident we are.
41
    confidence: Confidence,
42
}
43

            
44
/// Subjective description of how sure we are that our clock is/isn't skewed.
45
#[derive(Clone, Debug)]
46
enum Confidence {
47
    /// We aren't very sure about our estimate.
48
    None,
49
    /// It seems plausible that our clock is skewed
50
    Low,
51
    /// We are pretty confident that our clock is skewed.
52
    High,
53
}
54

            
55
/// How bad does clock skew need to be before we'll tell the user that it's a
56
/// problem?
57
const SIGNIFICANCE_THRESHOLD: Duration = Duration::from_secs(15 * 60);
58

            
59
impl std::fmt::Display for SkewEstimate {
60
4
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
61
        /// Format the whole-second part of `d`.
62
        ///
63
        /// We don't care about fractions here, since skew only mattes if it's
64
        /// on the order of many minutes.
65
4
        fn fmt_secs(d: Duration) -> humantime::FormattedDuration {
66
4
            humantime::format_duration(Duration::from_secs(d.as_secs()))
67
4
        }
68

            
69
4
        match self.estimate {
70
2
            ClockSkew::Slow(d) => write!(f, "slow by around {}", fmt_secs(d)),
71
2
            ClockSkew::None => write!(
72
2
                f,
73
2
                "not skewed by more than {}",
74
2
                fmt_secs(SIGNIFICANCE_THRESHOLD)
75
2
            ),
76
            ClockSkew::Fast(d) => write!(f, "fast by around {}", fmt_secs(d)),
77
        }?;
78

            
79
4
        let confidence = match self.confidence {
80
            Confidence::None => "very little confidence",
81
2
            Confidence::Low => "some confidence",
82
2
            Confidence::High => "high confidence",
83
        };
84

            
85
4
        write!(
86
4
            f,
87
4
            " (based on {} recent observations, with {})",
88
4
            self.n_observations, confidence
89
4
        )
90
4
    }
91
}
92

            
93
impl SkewEstimate {
94
    /// Return our best estimate for the current clock skew.
95
    pub fn skew(&self) -> ClockSkew {
96
        self.estimate
97
    }
98

            
99
    /// Return true if this estimate is worth telling the user about.
100
    pub fn noteworthy(&self) -> bool {
101
        !matches!(self.estimate, ClockSkew::None) && !matches!(self.confidence, Confidence::None)
102
    }
103

            
104
    /// Compute an estimate of how skewed we think our clock is, based on the
105
    /// reports in `skews`.
106
10
    pub(crate) fn estimate_skew<'a>(
107
10
        skews: impl Iterator<Item = &'a SkewObservation>,
108
10
        now: Instant,
109
10
    ) -> Option<Self> {
110
10
        // Only consider skew observations reported at least this recently.
111
10
        let cutoff = now.checked_sub(Duration::from_secs(3600));
112
10

            
113
10
        // Don't even look at our observations unless we  have at least this
114
10
        // many. (This value is chosen somewhat arbitrarily.)
115
10
        //
116
10
        // Note that under normal client operation, we won't connect to this
117
10
        // many guards or fallbacks.  That's fine: clock skew is only a problem
118
10
        // when it keeps us from bootstrapping, and when we are having
119
10
        // bootstrapping problems, we _will_ connect to many guards or
120
10
        // fallbacks.
121
10
        let min_observations = 8;
122
10

            
123
10
        let skews: Vec<_> = skews
124
248
            .filter_map(|obs| obs.more_recent_than(cutoff).then_some(obs.skew))
125
10
            .collect();
126
10
        if skews.len() < min_observations {
127
6
            return None;
128
4
        }
129
4

            
130
4
        // Throw away all the members of `skews` that are too eccentric, and
131
4
        // convert the rest to f64s.
132
4
        let skews: Vec<f64> = discard_outliers(skews);
133
4
        let n_observations = skews.len();
134
4
        debug_assert!(n_observations >= 3);
135

            
136
        // Use the mean of the remaining observations to determine our estimate.
137
4
        let (mean, standard_deviation) = mean_and_standard_deviation(&skews[..]);
138
4
        let estimate = ClockSkew::from_secs_f64(mean)
139
4
            .expect("Somehow generated NaN clock skew‽")
140
4
            .if_above(SIGNIFICANCE_THRESHOLD);
141

            
142
        // Use the standard deviation to determine how confident we should be in
143
        // our estimate.
144
        //
145
        // TODO: probably we should be using a real statistical test instead,
146
        // but that seems like overkill.
147
4
        let confidence = if standard_deviation < 1.0 {
148
            // Avoid divide-by-zero below: if the standard deviation is less
149
            // than 1 second then the mean is probably right.
150
            Confidence::High
151
        } else {
152
4
            let distance = if estimate.is_skewed() {
153
                // If we're saying that we are skewed, look at how many standard
154
                // deviations we are from zero.
155
2
                estimate.magnitude().as_secs_f64() / standard_deviation
156
            } else {
157
                // If we're saying that we're not skewed, look at how many
158
                // standard deviations zero is from "skewed".
159
2
                SIGNIFICANCE_THRESHOLD.as_secs_f64() / standard_deviation
160
            };
161
4
            if distance >= 3.0 {
162
2
                Confidence::High
163
2
            } else if distance >= 2.0 {
164
2
                Confidence::Low
165
            } else {
166
                Confidence::None
167
            }
168
        };
169

            
170
4
        Some(SkewEstimate {
171
4
            estimate: estimate.if_above(SIGNIFICANCE_THRESHOLD),
172
4
            n_observations,
173
4
            confidence,
174
4
        })
175
10
    }
176
}
177

            
178
/// Remove all outliers from `values`, and convert the resulting times into
179
/// `f64`s.
180
///
181
/// We guarantee that no more than 1/2 of the input will be discarded.
182
///
183
/// # Panics
184
///
185
/// Panics if values is empty.
186
8
fn discard_outliers(mut values: Vec<ClockSkew>) -> Vec<f64> {
187
    // Compute the quartiles  of our observations.
188
8
    let (q1, q3) = {
189
8
        let n = values.len();
190
8
        let (low, _median, high) = values.select_nth_unstable(n / 2);
191
8
        let n_low = low.len();
192
8
        let n_high = high.len();
193
8
        debug_assert!(n_low >= 1);
194
8
        debug_assert!(n_high >= 1);
195
8
        let (_, q1, _) = low.select_nth_unstable(n_low / 2);
196
8
        let (_, q3, _) = high.select_nth_unstable(n_high / 2);
197
8

            
198
8
        (q1, q3)
199
8
    };
200
8

            
201
8
    // Compute the inter-quartile range (IQR) and use this to discard outliers.
202
8
    //
203
8
    // (Define IRQ = Q3-Q1. We'll allow all values that are no more than 1.5 IQR
204
8
    // outside the quartiles.)
205
8
    let iqr = (q1.as_secs_f64() - q3.as_secs_f64()).abs();
206
8
    let permissible_range = (q1.as_secs_f64() - iqr * 1.5)..=(q3.as_secs_f64() + iqr * 1.5);
207
8
    values
208
8
        .into_iter()
209
70
        .filter_map(|skew| Some(skew.as_secs_f64()).filter(|v| permissible_range.contains(v)))
210
8
        .collect()
211
8
}
212

            
213
/// Compute and return the mean and standard deviation of `values`.
214
///
215
/// Returns `(Nan,Nan)` if `values` is empty.
216
10
fn mean_and_standard_deviation(values: &[f64]) -> (f64, f64) {
217
10
    let n = values.len() as f64;
218
10
    let mean = values.iter().sum::<f64>() / n;
219
10
    let variance = values
220
10
        .iter()
221
67
        .map(|v| {
222
62
            let diff = v - mean;
223
62
            diff * diff
224
67
        })
225
10
        .sum::<f64>()
226
10
        / n;
227
10

            
228
10
    (mean, variance.sqrt())
229
10
}
230

            
231
#[cfg(test)]
232
mod test {
233
    // @@ begin test lint list maintained by maint/add_warning @@
234
    #![allow(clippy::bool_assert_comparison)]
235
    #![allow(clippy::clone_on_copy)]
236
    #![allow(clippy::dbg_macro)]
237
    #![allow(clippy::mixed_attributes_style)]
238
    #![allow(clippy::print_stderr)]
239
    #![allow(clippy::print_stdout)]
240
    #![allow(clippy::single_char_pattern)]
241
    #![allow(clippy::unwrap_used)]
242
    #![allow(clippy::unchecked_duration_subtraction)]
243
    #![allow(clippy::useless_vec)]
244
    #![allow(clippy::needless_pass_by_value)]
245
    //! <!-- @@ end test lint list maintained by maint/add_warning @@ -->
246
    use super::*;
247
    use float_eq::assert_float_eq;
248

            
249
    /// Tolerance for float comparison.
250
    const TOL: f64 = 0.00001;
251

            
252
    #[test]
253
    fn mean_stddev() {
254
        // This case is trivial.
255
        let a = [17.0];
256
        let (m, s) = mean_and_standard_deviation(&a[..]);
257
        assert_float_eq!(m, 17.0, abs <= TOL);
258
        assert_float_eq!(s, 0.0, abs <= TOL);
259

            
260
        // Computed these by hand using a calculator.
261
        let a = [1.0, 2.0, 3.0, 4.0];
262
        let (m, s) = mean_and_standard_deviation(&a[..]);
263
        assert_float_eq!(m, 2.5, abs <= TOL);
264
        assert_float_eq!(s, 1.11803398, abs <= TOL);
265

            
266
        // Generated these using numpy from normal distribution with stddev=1,
267
        // mean=0.
268
        let a = [
269
            1.34528777,
270
            0.17855632,
271
            -0.08147599,
272
            0.14845672,
273
            0.6838537,
274
            -1.59034826,
275
            0.06777352,
276
            -2.42469117,
277
            -0.12017179,
278
            0.47098959,
279
        ];
280
        let (m, s) = mean_and_standard_deviation(&a[..]);
281
        assert_float_eq!(m, -0.132176959, abs <= TOL);
282
        assert_float_eq!(s, 1.0398321132, abs <= TOL);
283
    }
284

            
285
    #[test]
286
    fn outliers() {
287
        use ClockSkew::{Fast, Slow};
288
        let hour = Duration::from_secs(3600);
289
        // median will be 0. quartiles will ± 2 hours.  That makes
290
        // the IQR 4 hours, so nothing will be discarded.
291
        let a = vec![
292
            Slow(hour * 3),
293
            Slow(hour * 2),
294
            Slow(hour),
295
            ClockSkew::None,
296
            Fast(hour),
297
            Fast(hour * 2),
298
            Fast(hour * 3),
299
        ];
300
        let mut b = discard_outliers(a.clone());
301
        b.sort_by(|a, b| a.partial_cmp(b).unwrap());
302
        assert_eq!(b.len(), 7);
303
        for (ai, bi) in a.iter().zip(b.iter()) {
304
            assert_float_eq!(ai.as_secs_f64(), bi, abs <= TOL);
305
        }
306

            
307
        // Now try with a case that does have some outliers. This time, the IQR
308
        // will be 1 hour, so the first and last times will be discarded as
309
        // outliers.
310
        let a = vec![
311
            Slow(hour * 4),
312
            Slow(hour / 2),
313
            Slow(hour / 3),
314
            ClockSkew::None,
315
            Fast(hour / 3),
316
            Fast(hour / 2),
317
            Fast(hour * 4),
318
        ];
319
        let mut b = discard_outliers(a.clone());
320
        b.sort_by(|a, b| a.partial_cmp(b).unwrap());
321
        assert_eq!(b.len(), 5);
322
        for (ai, bi) in a[1..=5].iter().zip(b.iter()) {
323
            assert_float_eq!(ai.as_secs_f64(), bi, abs <= TOL);
324
        }
325
    }
326

            
327
    #[test]
328
    fn estimate_with_no_data() {
329
        // zero inputs -> output is none.
330
        let now = Instant::now();
331
        let est = SkewEstimate::estimate_skew([].iter(), now);
332
        assert!(est.is_none());
333

            
334
        // Same with fewer than min_observations.
335
        let year = Duration::from_secs(365 * 24 * 60 * 60);
336
        let obs = vec![
337
            SkewObservation {
338
                skew: ClockSkew::Fast(year),
339
                when: now
340
            };
341
            5
342
        ];
343
        let est = SkewEstimate::estimate_skew(obs.iter(), now);
344
        assert!(est.is_none());
345

            
346
        // Same with many observations all of which are obsolete.
347
        //
348
        // (advance the clock: not all Instant implementations let you go back a long time
349
        // before startup.)
350
        let now = now + year;
351
        let obs = vec![
352
            SkewObservation {
353
                skew: ClockSkew::Fast(year),
354
                when: now - year
355
            };
356
            100
357
        ];
358
        let est = SkewEstimate::estimate_skew(obs.iter(), now);
359
        assert!(est.is_none());
360
    }
361

            
362
    /// Construct a vector of SkewObservations from a slice of skew magnitudes
363
    /// expressed in minutes.
364
    fn from_minutes(mins: &[f64]) -> Vec<SkewObservation> {
365
        mins.iter()
366
            .map(|m| SkewObservation {
367
                skew: ClockSkew::from_secs_f64(m * 60.0).unwrap(),
368
                when: Instant::now(),
369
            })
370
            .collect()
371
    }
372

            
373
    #[test]
374
    fn estimate_skewed() {
375
        // The quartiles here are -22 and -10.  The IQR is therefore 12, so we
376
        // won't discard anything.
377
        //
378
        // The mean is -17.125: That's more than 15 minutes from zero, so we'll
379
        // say we're slow.
380
        //
381
        // The standard deviation is 7.67: that puts the mean between 2 and 3
382
        // standard deviations from zero, so we'll say we're skewed with "low"
383
        // confidence.
384
        let obs = from_minutes(&[-20.0, -10.0, -20.0, -25.0, 0.0, -18.0, -22.0, -22.0]);
385

            
386
        let est = SkewEstimate::estimate_skew(obs.iter(), Instant::now()).unwrap();
387
        assert_eq!(
388
            est.to_string(),
389
            "slow by around 17m 7s (based on 8 recent observations, with some confidence)"
390
        );
391
    }
392

            
393
    #[test]
394
    fn estimate_not_skewed() {
395
        // The quartiles here are -2 and 6: IRQ is 8, so we'll discard all the
396
        // huge values, leaving 8.
397
        //
398
        // Mean of the remaining items is 0.75 and standard deviation is 2.62:
399
        // we'll be pretty sure we're not much skewed.
400
        let obs = from_minutes(&[
401
            -100.0, 100.0, -3.0, -2.0, 0.0, 1.0, 0.5, 6.0, 3.0, 0.5, 99.0,
402
        ]);
403

            
404
        let est = SkewEstimate::estimate_skew(obs.iter(), Instant::now()).unwrap();
405
        assert_eq!(
406
            est.to_string(),
407
            "not skewed by more than 15m (based on 8 recent observations, with high confidence)"
408
        );
409
    }
410
}