1
//! Support for reporting the status of an onion service.
2

            
3
use crate::internal_prelude::*;
4

            
5
/// The current reported status of an onion service.
6
#[derive(Debug, Clone, Eq, PartialEq)]
7
pub struct OnionServiceStatus {
8
    /// The current high-level state for the IPT manager.
9
    ipt_mgr: ComponentStatus,
10

            
11
    /// The current high-level state for the descriptor publisher.
12
    publisher: ComponentStatus,
13

            
14
    /// The current high-level state for the PoW manager.
15
    #[cfg(feature = "hs-pow-full")]
16
    pow_manager: ComponentStatus,
17
    // TODO (#1194): Add key expiration
18
    //
19
    // NOTE: Do _not_ add general metrics (like failure/success rates , number
20
    // of intro points, etc) here.
21
}
22

            
23
/// The current reported status of an onion service subsystem.
24
#[derive(Debug, Clone)]
25
pub(crate) struct ComponentStatus {
26
    /// The current high-level state.
27
    state: State,
28

            
29
    /// The last error we have seen.
30
    latest_error: Option<Problem>,
31
}
32

            
33
impl ComponentStatus {
34
    /// Create a new ComponentStatus for a component that has not been bootstrapped.
35
108
    fn new_shutdown() -> Self {
36
108
        Self {
37
108
            state: State::Shutdown,
38
108
            latest_error: None,
39
108
        }
40
108
    }
41
}
42

            
43
impl PartialEq for ComponentStatus {
44
272
    fn eq(&self, other: &Self) -> bool {
45
        let Self {
46
272
            state,
47
272
            latest_error,
48
272
        } = self;
49
        let Self {
50
272
            state: state_other,
51
272
            latest_error: lastest_error_other,
52
272
        } = other;
53

            
54
        // NOTE: Errors are never equal. We _could_ add half-baked PartialEq implementations for
55
        // all of our error types, but it doesn't seem worth it. If there is a state change, or if
56
        // we've encountered an error (even if it's the same as the previous one), we'll notify the
57
        // watchers.
58
272
        state == state_other && latest_error.is_none() && lastest_error_other.is_none()
59
272
    }
60
}
61

            
62
impl Eq for ComponentStatus {}
63

            
64
/// The high-level state of an onion service.
65
///
66
/// This type summarizes the most basic information about an onion service's
67
/// status.
68
#[derive(Copy, Clone, Debug, Eq, PartialEq)]
69
#[non_exhaustive]
70
pub enum State {
71
    /// The service is not launched.
72
    ///
73
    /// Either [`OnionService::launch`](crate::OnionService::launch) has not
74
    /// been called, or the service has been shut down.
75
    ///
76
    /// ## Reachability
77
    ///
78
    /// The service is not reachable.
79
    Shutdown,
80
    /// The service is bootstrapping.
81
    ///
82
    /// Specifically, we have been offline, or we just initialized:
83
    /// We are trying to build introduction points and publish a descriptor,
84
    /// and haven't hit any significant problems yet.
85
    ///
86
    /// ## Reachability
87
    ///
88
    /// The service is not fully reachable, but may be reachable by some clients.
89
    Bootstrapping,
90
    /// The service is running in a degraded state.
91
    ///
92
    /// Specifically, we are not satisfied with our introduction points, but
93
    /// we do have a number of working introduction points,
94
    /// and our descriptor is up-to-date.
95
    ///
96
    /// ## Reachability
97
    ///
98
    /// The service is reachable.
99
    ///
100
    // TODO: this variant is only used by the IptManager.
101
    // We should split this enum into IptManagerState and PublisherState.
102
    DegradedReachable,
103
    /// The service is running in a degraded state.
104
    ///
105
    /// Specifically, we have a number of working introduction points,
106
    /// but we have failed to upload the descriptor to one or more HsDirs.
107
    ///
108
    /// ## Reachability
109
    ///
110
    /// The service is unlikely to be reachable.
111
    ///
112
    DegradedUnreachable,
113
    /// The service is running.
114
    ///
115
    /// Specifically, we are satisfied with our introduction points, and our
116
    /// descriptor is up-to-date.
117
    ///
118
    /// ## Reachability
119
    ///
120
    /// The service is believed to be fully reachable.
121
    Running,
122
    /// The service is trying to recover from a minor interruption.
123
    ///
124
    /// Specifically:
125
    ///   * We have encountered a problem (like a dead intro point or an
126
    ///     intermittent failure to upload a descriptor)
127
    ///   * We are trying to recover from the problem.
128
    ///   * We have not yet failed.
129
    ///
130
    /// ## Reachability
131
    ///
132
    /// The service is unlikely to be reachable.
133
    ///
134
    //
135
    // NOTE: this status is currently only set by `IptManager` whenever:
136
    //   * there are no good IPTs (so the service will be unreachable); or
137
    //   * there aren't enough good IPTs to publish (AFAICT in this case the service
138
    //   may be reachable, if the IPTs we _do_ have are have previously been published).
139
    //
140
    // TODO (#1270): split this state into 2 different states (one for the "service is
141
    // still reachable" case, and another for the "unreachable" one).
142
    Recovering,
143
    /// The service is not working.
144
    ///
145
    /// Specifically, there is a problem with this onion service, and either it
146
    /// is one we cannot recover from, or we have tried for a while to recover
147
    /// and have failed.
148
    ///
149
    /// ## Reachability
150
    ///
151
    /// The service is not fully reachable. It may temporarily be reachable by some clients.
152
    Broken,
153
}
154

            
155
impl State {
156
    /// Check whether the service is *believed* to be fully reachable.
157
    ///
158
    /// This is at best an implication in one direction, even if this returns
159
    /// `false`, the service may still be reachable.
160
    pub fn is_fully_reachable(&self) -> bool {
161
        matches!(self, Self::Running | Self::DegradedReachable)
162
    }
163
}
164

            
165
/// An error type for descriptor upload failures with retries.
166
#[derive(Clone, Debug, thiserror::Error)]
167
#[non_exhaustive]
168
pub enum DescUploadRetryError {
169
    /// A fatal (non-transient) error occurred.
170
    #[error("A fatal (non-transient) error occurred")]
171
    FatalError(RetryError<DescUploadError>),
172

            
173
    /// Ran out of retries.
174
    #[error("Ran out of retries")]
175
    MaxRetryCountExceeded(RetryError<DescUploadError>),
176

            
177
    /// Exceeded the maximum allowed time.
178
    #[error("Timeout exceeded")]
179
    Timeout(RetryError<DescUploadError>),
180

            
181
    /// Encountered an internal error.
182
    #[error("Internal error")]
183
    Bug(#[from] Bug),
184
}
185

            
186
/// A problem encountered by an onion service.
187
#[derive(Clone, Debug, derive_more::From)]
188
#[non_exhaustive]
189
pub enum Problem {
190
    /// A fatal error occurred.
191
    Runtime(FatalError),
192

            
193
    /// One or more descriptor uploads failed.
194
    DescriptorUpload(Vec<DescUploadRetryError>),
195

            
196
    /// We failed to establish one or more introduction points.
197
    Ipt(Vec<IptError>),
198

            
199
    /// Error in the PowManager subsystem
200
    // TODO: add variants for other transient errors?
201
    #[cfg(feature = "hs-pow-full")]
202
    Pow(crate::pow::v1::PowError),
203
}
204

            
205
impl OnionServiceStatus {
206
    /// Create a new OnionServiceStatus for a service that has not been bootstrapped.
207
36
    pub(crate) fn new_shutdown() -> Self {
208
36
        Self {
209
36
            ipt_mgr: ComponentStatus::new_shutdown(),
210
36
            publisher: ComponentStatus::new_shutdown(),
211
36
            #[cfg(feature = "hs-pow-full")]
212
36
            pow_manager: ComponentStatus::new_shutdown(),
213
36
        }
214
36
    }
215

            
216
    /// Return the current high-level state of this onion service.
217
    ///
218
    /// The overall state is derived from the `State`s of its underlying components
219
    /// (i.e. the IPT manager and descriptor publisher).
220
    pub fn state(&self) -> State {
221
        use State::*;
222

            
223
        cfg_if::cfg_if! {
224
            if #[cfg(feature = "hs-pow-full")] {
225
                let pow_manager_state = self.pow_manager.state;
226
            } else {
227
                // This is slightly janky, but should give correct results.
228
                let pow_manager_state = Running;
229
            }
230
        }
231

            
232
        match (self.ipt_mgr.state, self.publisher.state, pow_manager_state) {
233
            (Shutdown, _, _) | (_, Shutdown, _) => Shutdown,
234
            (Bootstrapping, _, _) | (_, Bootstrapping, _) => Bootstrapping,
235
            (Running, Running, Running) => Running,
236
            (Recovering, _, _) | (_, Recovering, _) | (_, _, Recovering) => Recovering,
237
            (Broken, _, _) | (_, Broken, _) => Broken,
238
            (DegradedUnreachable, _, _) | (_, DegradedUnreachable, _) => DegradedUnreachable,
239
            (DegradedReachable, Running, _)
240
            | (Running, DegradedReachable, _)
241
            | (DegradedReachable, DegradedReachable, _)
242
            | (Running, Running, _) => DegradedReachable,
243
        }
244
    }
245

            
246
    /// Return the most severe current problem
247
    pub fn current_problem(&self) -> Option<&Problem> {
248
        cfg_if::cfg_if! {
249
            if #[cfg(feature = "hs-pow-full")] {
250
                let pow_manager_error = &self.pow_manager.latest_error;
251
            } else {
252
                let pow_manager_error = &None;
253
            }
254
        }
255

            
256
        match (
257
            &self.ipt_mgr.latest_error,
258
            &self.publisher.latest_error,
259
            pow_manager_error,
260
        ) {
261
            (None, None, None) => None,
262
            (Some(e), Some(_), _) => {
263
                // For now, assume IPT manager errors are always more severe
264
                // TODO: decide which error is the more severe (or return both)
265
                Some(e)
266
            }
267
            (_, Some(e), _) | (Some(e), _, _) => Some(e),
268
            (_, _, Some(e)) => Some(e),
269
        }
270
    }
271

            
272
    /// Return a time before which the user must re-provision this onion service
273
    /// with new keys.
274
    ///
275
    /// Returns `None` if the onion service is able to generate and sign new
276
    /// keys as needed.
277
    pub fn provisioned_key_expiration(&self) -> Option<SystemTime> {
278
        None // TODO (#1194): Implement
279
    }
280
}
281

            
282
/// A stream of OnionServiceStatus events, returned by an onion service.
283
///
284
/// Note that multiple status change events may be coalesced into one if the
285
/// receiver does not read them as fast as they are generated.  Note also
286
/// that it's possible for an item to arise in this stream without an underlying
287
/// change having occurred.
288
///
289
//
290
// We define this so that we aren't exposing postage in our public API.
291
#[derive(Clone)]
292
pub struct OnionServiceStatusStream(postage::watch::Receiver<OnionServiceStatus>);
293

            
294
impl futures::Stream for OnionServiceStatusStream {
295
    type Item = OnionServiceStatus;
296

            
297
16
    fn poll_next(
298
16
        mut self: std::pin::Pin<&mut Self>,
299
16
        cx: &mut std::task::Context<'_>,
300
16
    ) -> std::task::Poll<Option<Self::Item>> {
301
16
        self.0.poll_next_unpin(cx)
302
16
    }
303
}
304

            
305
/// A shared handle to a postage::watch::Sender that we can use to update an OnionServiceStatus.
306
#[derive(Clone)]
307
pub(crate) struct StatusSender(Arc<Mutex<postage::watch::Sender<OnionServiceStatus>>>);
308

            
309
/// A handle that can be used by the [`IptManager`]
310
/// to update the [`OnionServiceStatus`].
311
#[derive(Clone, derive_more::From)]
312
pub(crate) struct IptMgrStatusSender(StatusSender);
313

            
314
/// A handle that can be used by the [`Publisher`]
315
/// to update the [`OnionServiceStatus`].
316
#[derive(Clone, derive_more::From)]
317
pub(crate) struct PublisherStatusSender(StatusSender);
318

            
319
/// A handle that can be used by the [`Publisher`]
320
/// to update the [`OnionServiceStatus`].
321
#[derive(Clone, derive_more::From)]
322
#[cfg(feature = "hs-pow-full")]
323
pub(crate) struct PowManagerStatusSender(StatusSender);
324

            
325
/// A helper for implementing [`PublisherStatusSender`], [`IptMgrStatusSender`], etc.
326
///
327
/// TODO: this macro is a bit repetitive, it would be nice if we could reduce duplication even
328
/// further (and auto-generate a `note_<state>` function for every `State` variant).
329
macro_rules! impl_status_sender {
330
    ($sender:ident, $field:ident) => {
331
        impl $sender {
332
            /// Update `latest_error` and set the underlying state to `Broken`.
333
            ///
334
            /// If the new state is different, this updates the current status
335
            /// and notifies all listeners.
336
            pub(crate) fn send_broken(&self, err: impl Into<Problem>) {
337
                self.send(State::Broken, Some(err.into()));
338
            }
339

            
340
            /// Update `latest_error` and set the underlying state to `Recovering`.
341
            ///
342
            /// If the new state is different, this updates the current status
343
            /// and notifies all listeners.
344
            #[allow(dead_code)] // NOTE: this is dead code in PublisherStatusSender
345
80
            pub(crate) fn send_recovering(&self, err: impl Into<Problem>) {
346
80
                self.send(State::Recovering, Some(err.into()));
347
80
            }
348

            
349
            /// Set `latest_error` to `None` and the underlying state to `Shutdown`.
350
            ///
351
            /// If the new state is different, this updates the current status
352
            /// and notifies all listeners.
353
36
            pub(crate) fn send_shutdown(&self) {
354
36
                self.send(State::Shutdown, None);
355
36
            }
356

            
357
            /// Update the underlying state and latest_error.
358
            ///
359
            /// If the new state is different, this updates the current status
360
            /// and notifies all listeners.
361
172
            pub(crate) fn send(&self, state: State, err: Option<Problem>) {
362
172
                let sender = &self.0;
363
172
                let mut tx = sender.0.lock().expect("Poisoned lock");
364
172
                let mut svc_status = tx.borrow().clone();
365
172
                svc_status.$field.state = state;
366
172
                svc_status.$field.latest_error = err;
367
172
                tx.maybe_send(|_| svc_status);
368
172
            }
369
        }
370
    };
371
}
372

            
373
impl_status_sender!(IptMgrStatusSender, ipt_mgr);
374
impl_status_sender!(PublisherStatusSender, publisher);
375
#[cfg(feature = "hs-pow-full")]
376
impl_status_sender!(PowManagerStatusSender, pow_manager);
377

            
378
impl StatusSender {
379
    /// Create a new StatusSender with a given initial status.
380
36
    pub(crate) fn new(initial_status: OnionServiceStatus) -> Self {
381
36
        let (tx, _) = postage::watch::channel_with(initial_status);
382
36
        StatusSender(Arc::new(Mutex::new(tx)))
383
36
    }
384

            
385
    /// Return a copy of the current status.
386
    pub(crate) fn get(&self) -> OnionServiceStatus {
387
        self.0.lock().expect("Poisoned lock").borrow().clone()
388
    }
389

            
390
    /// Return a new OnionServiceStatusStream to return events from this StatusSender.
391
8
    pub(crate) fn subscribe(&self) -> OnionServiceStatusStream {
392
8
        OnionServiceStatusStream(self.0.lock().expect("Poisoned lock").subscribe())
393
8
    }
394
}
395

            
396
#[cfg(test)]
397
impl OnionServiceStatus {
398
    /// Return the current high-level state of the publisher`.
399
16
    pub(crate) fn publisher_status(&self) -> ComponentStatus {
400
16
        self.publisher.clone()
401
16
    }
402
}
403

            
404
#[cfg(test)]
405
impl ComponentStatus {
406
    /// The current `State` of this component.
407
16
    pub(crate) fn state(&self) -> State {
408
16
        self.state
409
16
    }
410

            
411
    /// The current error of this component.
412
16
    pub(crate) fn current_problem(&self) -> Option<&Problem> {
413
16
        self.latest_error.as_ref()
414
16
    }
415
}